Related
Maybe it`s already too late for working in my code.
I have to redo my figure in ggplot2, and I am unabable to do so.
My errorbars are not showing and I cannot understand why.
dev.new()
ggplot() +
geom_point(data = conf_intervals, aes(y = mean_ALA, x = mean_LIN, shape=trophic,
color=feeding_type_2,
size = 3,
alpha = 0.5)) + scale_shape_manual("Trophic level", values = c(15:19)) +
geom_errorbar(data = conf_intervals,aes(mean_ALA, ymin = mean_ALA - se_ALA,
ymax = mean_ALA + se_ALA,)) +
geom_errorbarh(data = conf_intervals,aes(mean_LIN, ymin = mean_LIN - se_LIN,
ymax = mean_LIN + se_LIN)) +
geom_point(data = CSIA_inverts_basal, aes(y=ALA.d13C, x=LIN.d13C, color=feeding_type_2))
labs (title="Biplot of compound stable isotopes- Centroids with 95 % CI", subtitle="LIN VS. ALA",
y=expression({delta}^13*C[ALA]~'\211'~VPDB),
x=expression({delta}^13*C[LIN]~'\211'~VPDB)) +
# guides(color = FALSE, shape = FALSE) +
theme_classic()
Error: geom_errorbarh requires the following missing aesthetics: y
Run rlang::last_error() to see where the error occurred.
In addition: Warning message:
Ignoring unknown aesthetics: x
Here is my example data:
dput(head(CSIA_inverts_basal))
structure(list(d13C.VPDB = c(-35.10487341, -34.85465717, -34.67216423,
-34.06032315, -33.68548439, -33.4811546), d15.NAIR = c(-6.321847159,
-5.384989361, -2.638749276, -4.986045928, -5.946279778, -6.648526348
), ALA.d13C = c(-43.2375195, -44.77813854, -42.1921855, -41.58363894,
-39.156857, -40.33135344), LIN.d13C = c(-40.864145, -42.32043061,
-41.4247005, -36.08156681, -39.45744387, -37.76516617), combi = structure(c(1L,
1L, 1L, 1L, 1L, 1L), levels = c("epilithon", "fresh.leaves",
"gammarus", "grazing.ephemeroptera", "predatory.plecoptera",
"salmonid.eyes", "shreddering.plecoptera", "submerged.leaves"
), class = "factor"), feeding_type = c("epilithon", "epilithon",
"epilithon", "epilithon", "epilithon", "epilithon"), sampling.time = c("summer",
"fall", "summer", "fall", "fall", "fall"), year = c(2018L, 2016L,
2018L, 2016L, 2016L, 2016L), split = structure(c(2L, 2L, 2L,
2L, 2L, 2L), levels = c("consumer", "resource"), class = "factor"),
split_2 = c("epilithon", "epilithon", "epilithon", "epilithon",
"epilithon", "epilithon"), split_3 = c("epilithon", "epilithon",
"epilithon", "epilithon", "epilithon", "epilithon"), feeding_type_2 = structure(c(1L,
1L, 1L, 1L, 1L, 1L), levels = c("Epilithon", "Fresh leaves",
"Grazer", "Salmonid (Eyes)", "Predator", "Submerged leaves",
"Shredder"), class = "factor"), trophic = structure(c(1L,
1L, 1L, 1L, 1L, 1L), levels = c("Base", "Non-predatory invertebrate",
"Predatory invertebrate", "Predator"), class = "factor")), row.names = 2:7, class = "data.frame")
dput(conf_intervals)
structure(list(trophic = structure(c(1L, 1L, 1L, 2L, 2L, 3L,
4L), levels = c("Base", "Non-predatory invertebrate", "Predatory invertebrate",
"Predator"), class = "factor"), feeding_type_2 = structure(c(1L,
2L, 6L, 3L, 7L, 5L, 4L), levels = c("Epilithon", "Fresh leaves",
"Grazer", "Salmonid (Eyes)", "Predator", "Submerged leaves",
"Shredder"), class = "factor"), mean_ALA = c(-42.1, -39.7, -38.7,
-45.7, -40.3, -42.8, -42.7), mean_LIN = c(-39.2, -40, -37.2,
-40.8, -35.9, -36.7, -37.9), se_ALA = c(1.1, 1.1, 1.1, 2.2, 1.2,
1.9, 0.4), se_LIN = c(1.1, 1.1, 0.6, 1.8, 0.9, 1.3, 0.6), N_ALA = c(12L,
14L, 10L, 9L, 14L, 7L, 17L), LIN_N = c(12L, 14L, 10L, 9L, 14L,
7L, 17L)), class = "data.frame", row.names = c(NA, -7L))
Can someone help me?
geom_errorbarh doesn't have an aesthetic called x. It has a y, an xmin and an xmax. I suspect you are mixing up the x and y variables in your errorbar calls too, so check these carefully. Also, you should move alpha and size outside of aes so they don't appear in the legend.
It's also a good idea to make sure your code is formatted in such a way that it is easier to read and debug. Limiting your line length and using inheritance of the data passed to your initial ggplot call helps to simplify things a bit too.
ggplot(conf_intervals) +
geom_point(aes(y = mean_ALA, x = mean_LIN, shape = trophic,
color = feeding_type_2), size = 3, alpha = 0.5) +
scale_shape_manual("Trophic level", values = c(15:19)) +
geom_errorbar(aes(mean_LIN, ymin = mean_ALA - se_ALA,
ymax = mean_ALA + se_ALA)) +
geom_errorbarh(aes(y = mean_ALA, xmin = mean_LIN - se_LIN,
xmax = mean_LIN + se_LIN)) +
geom_point(data = CSIA_inverts_basal,
aes(y = ALA.d13C, x = LIN.d13C, color = feeding_type_2)) +
labs(title = "Biplot of compound stable isotopes- Centroids with 95 % CI",
subtitle = "LIN VS. ALA",
y = expression({delta}^13*C[ALA]~'\211'~VPDB),
x = expression({delta}^13*C[LIN]~'\211'~VPDB)) +
theme_classic()
I have a plot that looks like below. I want to change the order so that the larger value comes first (so cyan would precede red). But I can't seem to do this. What am I doing wrong?
This is my current code block so far:
ggplot(df, aes(x = Gene.Set.Size, y = OR, label =P.value, color = Method, group = Method)) +
geom_point(position=position_dodge(width=0.5)) +
ggrepel::geom_text_repel(size = 6, box.padding = 1, segment.angle = 20, position=position_dodge(width=0.5))+
geom_pointrange(aes(ymax = UpperCI, ymin = LowerCI),position=position_dodge(width=0.5)) +
theme_bw() +
theme(text=element_text(size=25),axis.text.x = element_text(angle = 45, hjust = 1)) +
ylab("Odds ratio") +
xlab("Gene set size") +
theme(plot.margin = unit(c(2,2,2,2), "cm"))
> dput(df)
structure(list(Method = structure(c(1L, 1L, 1L, 2L, 2L, 2L), .Label = c("MAGMA",
"Pairwise"), class = "factor"), P.value = c(8.74e-28, 1.33e-56,
5.57e-92, 1.63e-44, 4.23e-71, 2.78e-95), OR = c(1.39, 1.424668,
1.4, 1.513, 1.478208, 1.409563), UpperCI = c(1.481491, 1.487065,
1.446039, 1.601557, 1.417117, 1.455425), LowerCI = c(1.316829,
1.364601, 1.356358, 1.42, 1.541768, 1.365056), Gene.Set.Size = structure(c(1L,
2L, 3L, 1L, 2L, 3L), .Label = c("500", "1000", "2000"), class = "factor")), row.names = c(NA,
-6L), class = "data.frame")
You must set the factor order.
library(ggplot2)
df <- structure(list(Method = structure(c(1L, 1L, 1L, 2L, 2L, 2L), .Label = c("MAGMA",
"Pairwise"), class = "factor"), P.value = c(8.74e-28, 1.33e-56,
5.57e-92, 1.63e-44, 4.23e-71, 2.78e-95), OR = c(1.39, 1.424668,
1.4, 1.513, 1.478208, 1.409563), UpperCI = c(1.481491, 1.487065,
1.446039, 1.601557, 1.417117, 1.455425), LowerCI = c(1.316829,
1.364601, 1.356358, 1.42, 1.541768, 1.365056), Gene.Set.Size = structure(c(1L,
2L, 3L, 1L, 2L, 3L), .Label = c("500", "1000", "2000"), class = "factor")), row.names = c(NA,
-6L), class = "data.frame")
#reorder Factor
df$Method = factor(df$Method, levels=c("Pairwise", "MAGMA"))
ggplot(df, aes(x=Gene.Set.Size, y=OR, label=P.value,
group= Method, color=Method)) +
geom_point(position=position_dodge(width=0.5)) +
ggrepel::geom_text_repel(size = 6, box.padding = 1, segment.angle = 20, position=position_dodge(width=0.5))+
geom_pointrange(aes(ymax = UpperCI, ymin = LowerCI),position=position_dodge(width=0.5)) +
theme_bw() +
theme(text=element_text(size=25),axis.text.x = element_text(angle = 45, hjust = 1)) +
ylab("Odds ratio") +
xlab("Gene set size") +
theme(plot.margin = unit(c(2,2,2,2), "cm"))
df %>% mutate(Method = fct_relevel(Method, 'Pairwise')) %>% <<your ggplot2 code>
should do the job, assuming you have imported the tidyverse pipe operator %>% and the forcats package, which you can do with require(tidyverse)
You can simply reverse the ordering of the Method factor with forcats::fct_rev.
df$Method <- fct_rev(df$Method)
Alternatively, you can specify the first level when you initially converted that column to a factor.
trying to establish individual bar data labels ONLY if the value is negative. I was able to do it fine for a variable that comprised simple integers, but for a variable that needs to be formatted as dollar with the thousands separator, I can't seem to get rid of the "NA" label.
DolSumPlot <- ggplot(data = DolSums, aes(x = Group.1, fill = Group.2)) +
geom_bar(aes(weight = x), position = position_stack(reverse = TRUE)) +
coord_flip() +
labs(title = "Dollars Billed by Technician and Shop, Between 02/01/2018 and 05/31/2018",
y = "Dollars Billed", x = "Technician", fill = "Shop") +
scale_y_continuous(limits= c(NA,NA),
labels = scales::dollar,
breaks = seq(0, 50000 + 10000, 5000*2),
minor_breaks = seq(0,50000 + 10000, by = 5000)) +
scale_fill_brewer(palette = "Set1") +
geom_label(aes(label=scales::dollar(ifelse(DolSums$x < 0, DolSums$x,NA)),
y = DolSums$x),
show.legend = FALSE, size = 2.6, colour = "white", fontface = "bold")
Data:
DolSums = structure(list(Group.1 = c((names)), Group.2 = structure(c(4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
5L, 5L, 5L, 5L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Radio",
"Video", "Engineering", "800Mhz", "PSSRP", "Other"), class = "factor"),
x = c(4646, 16008.5, 48793.1, 4040, 14468.25, 13332, 1565.5,
6060, 6549.85, 2929, 4444, 3257.25, 5904, 2029.5, 3321, 6767,
8105.25, 8105.25, 8130.5, 3131, 5075.25, 3383.5, 4418.75,
23381.5, 1363.5, -2323, 29133.45, 2550.25, 505, 26042.85,
35203.55, 35940.85, 1641.25, 45066.2, 37541.7, 606, 45439.9
)), .Names = c("Group.1", "Group.2", "x"), row.names = c(NA,
-37L), class = "data.frame")
You can do this by using the data argument in geom_label and subsetting only rows with negative x. Also note that since you already have DolSums as input, there is no need to write DolSums$x. Instead, use column name to refer to a specific column directly:
library(ggplot2)
ggplot(data = DolSums, aes(x = Group.1, fill = Group.2)) +
geom_bar(aes(weight = x), position = position_stack(reverse = TRUE)) +
coord_flip() +
labs(title = "Dollars Billed by Technician and Shop, Between 02/01/2018 and 05/31/2018",
y = "Dollars Billed", x = "Technician", fill = "Shop") +
scale_y_continuous(limits= c(NA,NA),
labels = scales::dollar,
breaks = seq(0, 50000 + 10000, 5000*2),
minor_breaks = seq(0,50000 + 10000, by = 5000)) +
scale_fill_brewer(palette = "Set1") +
geom_label(data = DolSums[DolSums$x < 0,],
aes(label=scales::dollar(x),
y = x),
show.legend = FALSE, size = 2.6, colour = "white", fontface = "bold")
I want to add empty lines between groups to make the graph look less dense.
The first 10 rows of data is attached here:
> dput(droplevels(head(hosp, 10)))
structure(list(mzipid = c("FL-3", "FL-3", "FL-3", "FL-4", "FL-4",
"FL-4", "FL-4", "FL-4", "FL-4", "FL-4"), region_oe = c(1.6459234,
1.6459234, 1.6459234, 1.3399296, 1.3399296, 1.3399296, 1.3399296,
1.3399296, 1.3399296, 1.3399296), region_low_ci = c(0.53041852,
0.53041852, 0.53041852, 0.9696098, 0.9696098, 0.9696098, 0.9696098,
0.9696098, 0.9696098, 0.9696098), region_high_ci = c(3.8410261,
3.8410261, 3.8410261, 1.8049299, 1.8049299, 1.8049299, 1.8049299,
1.8049299, 1.8049299, 1.8049299), hosp_id = structure(c(5L, 9L,
4L, 7L, 8L, 6L, 2L, 1L, 9L, 3L), .Label = c("100025", "100054",
"100093", "100106", "100113", "100122", "100124", "100223", "non_local_hosp"
), class = "factor"), outc_n = c(2L, 2L, 1L, 1L, 6L, 3L, 1L,
19L, 2L, 9L), pre_outc_n = c(0.339173126965761, 0.577540323603898,
0.838447939604521, 0.463049655780196, 2.99622658733279, 1.86155441217125,
0.626489417627454, 12.4346171403304, 1.37301584333181, 6.90619195345789
), oe = c(NA, NA, NA, NA, 2.0025189, 1.6115564, NA, 1.5279924,
NA, 1.3031784), low_ci = c(NA, NA, NA, NA, 0.73123336, 0.32389662,
NA, 0.91951913, NA, 0.59465784), high_ci = c(NA, NA, NA, NA,
4.3587809, 4.7086916, NA, 2.3862863, NA, 2.4740036), state = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "FL", class = "factor"),
id = 1:10, outlier = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = "black", class = "factor"), cv = c(" 66.88",
" 66.88", " 66.88", " 38.37", " 38.37", " 38.37", " 38.37",
" 38.37", " 38.37", " 38.37"), start_id = c(1L, 1L, 1L, 4L,
4L, 4L, 4L, 4L, 4L, 4L), end_id = c(3L, 3L, 3L, 12L, 12L,
12L, 12L, 12L, 12L, 12L)), .Names = c("mzipid", "region_oe",
"region_low_ci", "region_high_ci", "hosp_id", "outc_n", "pre_outc_n",
"oe", "low_ci", "high_ci", "state", "id", "outlier", "cv", "start_id",
"end_id"), row.names = 105:114, class = "data.frame")
The current code is like this:
## draw graph
ggplot(hosp, aes(x = id, y = oe, group = mzipid)) +
theme(panel.background = element_rect(fill = "white", colour = "grey50")) +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
geom_point(size=1, show.legend = FALSE, colour = hosp$outlier) +
geom_linerange(aes(ymin = low_ci, ymax = high_ci), alpha = 0.4, na.rm = TRUE, colour = hosp$outlier)) +
scale_linetype_manual(values = linetype, guide = FALSE) +
geom_segment(aes(x = start_id, xend = end_id, y = region_oe, yend = region_oe, linetype = "1",
size = 1), na.rm = TRUE) +
geom_ribbon(aes(ymin = region_low_ci, ymax = region_high_ci), alpha=0.2, linetype = "blank", show.legend = FALSE) +
geom_hline(aes(yintercept = 1, alpha = 0.2, colour = "red", size = 1), show.legend = FALSE) +
scale_size_identity() +
scale_x_continuous(name = "hospital id", breaks = seq(0,350, by = 20)) +
scale_y_continuous(name = "O:E ratio", breaks = seq(0,6, by = 0.5),
sec.axis = sec_axis(trans = ~., breaks = c(0.1),
labels = "coefficient of variation")) +
stat_summaryh(fun.x = mean, aes(label = mzipid, y = 5.3), geom = "text", size = 2, srt = 35,
check_overlap = F) +
stat_summaryh(fun.x = mean, aes(label = cv, y = 0.1), geom = "text", size = 2, srt = 35,
check_overlap = F) +
guides(colour = FALSE) +
ggtitle("O:E ratio:hospital level\nmedian clusters") +
theme(plot.title = element_text(hjust = 0.5))
And the current graph looks like this:
How can I add empty lines/space between each zip group (mzipid)? Given the data provided, if anyone can show how to add space between FL-3 group and FL-4 group would be great.
Thanks!!
I'm working with ggplot2 for the first time, and I'm having trouble making the colors of the labels I created with ggrepel change dynamically. Currently, my code looks like this:
ggplot(tstat) +
geom_point(aes(Mu, Sigma),size = 5, color = 'black') +
geom_label_repel(aes(Mu, Sigma, label = VarNames, fill = factor(Hemisphere)), fontface = 'bold', color = 'white',
box.padding = unit(0.25, 'lines'),point.padding = unit(0.5, 'lines')) +
geom_rangeframe() +
theme_tufte() +
xlab(expression(paste(mu, "*"))) +
ylab(expression(sigma)) +
theme(axis.title.x = element_text(vjust=-0.5), axis.title.y = element_text(vjust=1.5)) +
ggtitle("Model Sensitivity by Hemisphere")
In general, this works pretty well, except I strongly dislike the toothpaste green color it gives me for one of the two factors plotted. I want to dictate the specific colors of that fill = factor(Hemisphere)) line, but I don't know how.
I have already tried using the scale_colours_manual function, but when I include it within the geom_label_repel(.....) paratheses in line 3, the program complains that "ggplot2 doesn't know how to deal with data of class ScaleDiscrete/Scale/ggproto", and when I place the scale_colours_manual line outside of line 3, it has no effect at all, as in this example, which produced an identical plot to the one above:
ggplot(tstat) +
geom_point(aes(Mu, Sigma),size = 5, color = 'black') +
scale_colour_manual(values = c('blue', 'red')) +
geom_label_repel(aes(Mu, Sigma, label = VarNames, fill = factor(Hemisphere)), fontface = 'bold', color = 'white',
box.padding = unit(0.25, 'lines'),point.padding = unit(0.5, 'lines')) +
geom_rangeframe() +
theme_tufte() +
xlab(expression(paste(mu, "*"))) +
ylab(expression(sigma)) +
theme(axis.title.x = element_text(vjust=-0.5), axis.title.y = element_text(vjust=1.5)) +
ggtitle("Model Sensitivity by Hemisphere")
I know there has to be a way to do this, but I'm at a loss. Thanks for any help you've got!
EDIT: At request, I've attached a dput() of tstat. Not a big data frame.
structure(list(VarNames = structure(c(4L, 1L, 3L, 2L, 5L, 6L,
4L, 1L, 3L, 2L, 5L, 6L), .Label = c("Dry Deposition", "MEGAN Acetone",
"MEGAN Terpenes", "Monoterpene Yield", "Ocean", "Photolysis"), class = "factor"),
Mu = c(2703.09, 8066.01, 6566.6, 19741.7, 5809.6, 14231.8, 1493.56, 3067.54, 3631.32, 9951.06, 8748.95, 7967.93),
Sigma = c(3478.28, 8883.23, 7276.49, 18454.4, 6218.8, 14989.7, 1925.14, 3410.27, 4017.64, 9289.57, 9354.64, 8403.1),
Hemisphere = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L),
.Label = c("Northern", "Southern"), class = "factor")),
.Names = c("VarNames", "Mu", "Sigma", "Hemisphere"),
class = "data.frame", row.names = c(NA, -12L))
You can use scale_fill_manual:
tstat <- structure(list(VarNames = structure(c(4L, 1L, 3L, 2L, 5L, 6L,
4L, 1L, 3L, 2L, 5L, 6L), .Label = c("Dry Deposition", "MEGAN Acetone",
"MEGAN Terpenes", "Monoterpene Yield", "Ocean", "Photolysis"), class = "factor"),
Mu = c(2703.09, 8066.01, 6566.6, 19741.7, 5809.6, 14231.8, 1493.56, 3067.54, 3631.32, 9951.06, 8748.95, 7967.93),
Sigma = c(3478.28, 8883.23, 7276.49, 18454.4, 6218.8, 14989.7, 1925.14, 3410.27, 4017.64, 9289.57, 9354.64, 8403.1),
Hemisphere = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L),
.Label = c("Northern", "Southern"), class = "factor")),
.Names = c("VarNames", "Mu", "Sigma", "Hemisphere"),
class = "data.frame", row.names = c(NA, -12L))
library(ggplot2)
library(ggrepel)
library(ggthemes)
ggplot(tstat) +
geom_point(aes(Mu, Sigma),size = 5, color = 'black') +
geom_label_repel(aes(Mu, Sigma, label = VarNames, fill = factor(Hemisphere)), fontface = 'bold', color = 'white',
box.padding = unit(0.25, 'lines'),point.padding = unit(0.5, 'lines')) +
geom_rangeframe() +
theme_tufte() +
xlab(expression(paste(mu, "*"))) +
ylab(expression(sigma)) +
theme(axis.title.x = element_text(vjust=-0.5), axis.title.y = element_text(vjust=1.5)) +
ggtitle("Model Sensitivity by Hemisphere") +
scale_fill_manual(values = setNames(c("lightblue", "darkgreen"), levels(tstat$Hemisphere)))