I'm using the excellent package ggrepel() to position text labels on bars with a bit of jitter. The only trouble is that they are appearing in reverse order. Here's some reproducible code:
library(tidyverse)
require(scales) # Used for adding percentages to bar charts
library(ggrepel)
# ingest some sample data
structure(list(Q52_bin = structure(c(3L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 1L, 2L,
2L, 1L, 3L, 2L, 3L, 3L, 1L), .Label = c("low", "medium", "high"
), class = "factor"), Q53_bin = structure(c(2L, 3L, 2L, 2L, 2L,
2L, 2L, 3L, 2L, 3L, 2L, 1L, 2L, 2L, 1L, 2L, 3L, 2L, 2L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 3L, 1L,
2L, 2L, 2L, 1L, 2L, 2L, 3L, 2L), .Label = c("low", "medium",
"high"), class = "factor"), Q57_bin = structure(c(2L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 3L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 1L,
1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 3L,
3L, 1L, 2L, 2L, 2L, 2L, 1L, 3L, 2L, 2L), .Label = c("low", "medium",
"high"), class = "factor"), Q4 = c(2, 3, 3, 5, 4, 3, 4, 5, 2,
4, 2, 3, 5, 4, 3, 3, 5, 5, 4, 5, 3, 2, 4, 1, 5, 4, 4, 4, 4, 4,
5, 3, 5, 1, 5, 5, 4, 5, 4, 1, 4, 2, 1, 5, 4)), row.names = c(NA,
-45L), class = c("tbl_df", "tbl", "data.frame"))
df <- select(climate_experience_data, Q52_bin, Q53_bin, Q57_bin, Q4)
names(df) <- c("Q52_bin", "Q53_bin", "Q57_bin", "response")
facet_names <- c(`Q52_bin` = "Spirituality", `Q53_bin` = "Politics L/R", `Q57_bin` = "Religiosity", `low`="low", `medium`="medium", `high`="high")
facet_labeller <- function(variable,value){return(facet_names[value])}
q4_levels = c("Not at all", "A little", "Some", "A lot", "A great deal")
df$response <- factor(df$response, ordered = TRUE, levels = c("5", "4", "3", "2", "1"))
df$response <- fct_recode(df$response, "Not at all" = "1", "A little" = "2", "Some" = "3", "A lot" = "4", "A great deal" = "5")
caption <- "How much have you thought about climate change before today?"
df %>%
# we need to get the data including facet info in long format, so we use pivot_longer()
pivot_longer(!response, names_to = "bin_name", values_to = "b") %>%
# add counts for plot below
count(response, bin_name, b) %>%
group_by(bin_name,b) %>%
mutate(perc=paste0(round(n*100/sum(n),1),"%")) %>%
# run ggplot
ggplot(aes(x = n, y = "", fill = response, label = perc)) +
geom_col(position=position_fill(), aes(fill=response)) +
coord_cartesian(clip = "off") +
geom_label_repel(
fill = "white",
size = 3,
min.segment.length = 0,
max.overlaps = Inf,
position = position_fill()
) +
scale_fill_brewer(palette="YlOrBr") +
scale_x_continuous(labels = scales::percent_format(), expand = c(0.05, 0.05)) +
facet_grid(vars(b), vars(bin_name), labeller=as_labeller(facet_names)) +
labs(caption = caption, x = "", y = "") +
guides(fill = guide_legend(title = NULL))
ggsave("figures/q4_faceted.png", width = 30, height = 10, units = "cm")
Here's the visual that I'm getting:
So what's going on here in terms of labels reversing? I'm concerned that there may be something about the plot that is inaccurate!
The issue is that the grouping variable used for your labels is different from the one used for the columns. To fix that you have to explicitly tell ggrepel to group the labels by response using the group aes:
library(tidyverse)
library(ggrepel)
df_long <- df %>%
pivot_longer(!response, names_to = "bin_name", values_to = "b") %>%
count(response, bin_name, b) %>%
group_by(bin_name, b) %>%
mutate(perc = paste0(round(n * 100 / sum(n), 1), "%"))
ggplot(df_long, aes(x = n, y = "")) +
geom_col(position = position_fill(), aes(fill = response)) +
coord_cartesian(clip = "off") +
geom_label_repel(
aes(group = response, label = perc),
fill = "white",
size = 3,
min.segment.length = 0,
max.overlaps = Inf,
position = position_fill()
) +
scale_fill_brewer(palette = "YlOrBr") +
scale_x_continuous(labels = scales::percent_format(), expand = c(0.05, 0.05)) +
facet_grid(vars(b), vars(bin_name), labeller = as_labeller(facet_names)) +
labs(caption = caption, x = "", y = "") +
guides(fill = guide_legend(title = NULL))
Related
UPDATED:
Data has now been updated to full chemistry values as opposed to mean values.
I am attempting to create a box and whisker plot in r, on a very small dataset. My data is not behaving itself or I am missing some glaringly obvious error.
This is the code i have for making said plot
library(ggplot2)
Methanogenesis_Data=read.csv("CO2-CH4 Rates.csv")
attach(Methanogenesis_Data)
summary(Methanogenesis_Data)
str(Methanogenesis_Data)
boxplot(CH4rate~Patch+Temperature, data = Methanogenesis_Data,
xlab="Patch", ylab="CH4 Production")
cols<-c("red", "blue")
From this small dataset.
structure(list(Patch = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Gravel", "Macrophytes",
"Marginal"), class = "factor"), Temperature = structure(c(2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L), .Label = c("Cold",
"Warm"), class = "factor"), CH4rate = c(0.001262595, 0.00138508,
0.001675944, 0.001592354, 0.002169233, 0.001772964, 0.002156633,
0.002864403, 0.002301383, 0.002561042, 0.005189598, 0.004557227,
0.008484851, 0.006867866, 0.007438633, 0.005405327, 0.006381582,
0.008860084, 0.007615417, 0.007705906, 0.009198508, 0.00705233,
0.007943024, 0.008319768, 0.010362114, 0.007822153, 0.010339339,
0.009252302, 0.008249555, 0.008197657), CO2rate = c(0.002274825,
0.002484866, 0.003020209, 0.00289133, 0.003927232, 0.003219346,
0.003922613, 0.005217026, 0.00418674, 0.00466427, 0.009427322,
0.008236453, 0.015339532, 0.012494729, 0.013531303, 0.009839847,
0.011624428, 0.016136746, 0.0138831, 0.014051034, 0.016753211,
0.012780956, 0.01445912, 0.01515584, 0.01883252, 0.014249452,
0.018849478, 0.016863299, 0.015045964, 0.014941168)), .Names = c("Patch",
"Temperature", "CH4rate", "CO2rate"), class = "data.frame", row.names =
c(NA,
-30L))
The plot I get as output is good, however I would like the Variables on the X axis to simply display "Gravel" "Macrophytes" "Marginal" as opposed to each of those variables with Warm and Cold. Thanks for any assistance
THIS IS WHAT I AM TRYING TO ACHEIVE -----> Exact Boxplot I want to create
Following your update with an example graph :
I have also included the formating for the legend position. If you want to edit the y axis label to include subscript I would suggest you read over this. I have included a blank title for relabelling.
test <- structure(list(Patch = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Gravel", "Macrophytes",
"Marginal"), class = "factor"), Temperature = structure(c(2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L), .Label = c("Cold",
"Warm"), class = "factor"), CH4rate = c(0.001262595, 0.00138508,
0.001675944, 0.001592354, 0.002169233, 0.001772964, 0.002156633,
0.002864403, 0.002301383, 0.002561042, 0.005189598, 0.004557227,
0.008484851, 0.006867866, 0.007438633, 0.005405327, 0.006381582,
0.008860084, 0.007615417, 0.007705906, 0.009198508, 0.00705233,
0.007943024, 0.008319768, 0.010362114, 0.007822153, 0.010339339,
0.009252302, 0.008249555, 0.008197657), CO2rate = c(0.002274825,
0.002484866, 0.003020209, 0.00289133, 0.003927232, 0.003219346,
0.003922613, 0.005217026, 0.00418674, 0.00466427, 0.009427322,
0.008236453, 0.015339532, 0.012494729, 0.013531303, 0.009839847,
0.011624428, 0.016136746, 0.0138831, 0.014051034, 0.016753211,
0.012780956, 0.01445912, 0.01515584, 0.01883252, 0.014249452,
0.018849478, 0.016863299, 0.015045964, 0.014941168)), .Names = c("Patch",
"Temperature", "CH4rate", "CO2rate"), class = "data.frame", row.names =
c(NA,
-30L))
Now I will create two data sets one for each graph just for simplicity you could leave them combined and facet but for formatting purposes this might be easier.
CH4rate <- test %>%
gather("id", "value", 3:4) %>%
filter(id == "CH4rate")
CO2rate <- test %>%
gather("id", "value", 3:4) %>%
filter(id == "CO2rate")
First plot:
ggplot(CH4rate) +
geom_boxplot(mapping = aes(x = Patch, y = value, fill=factor(Temperature, levels = c("Warm", "Cold")))) +
theme(legend.position = c(0.15, 0.9), panel.background = element_rect(fill = "white", colour = "grey50")) +
labs(title = "Title of graph", x="Patch Type", y = "CH4rate") +
scale_fill_manual(name = "", values = c("orange", "light blue")
, labels = c("Cold" = "Incubated at 10˙C", "Warm" = "Incubated at 26˙C"))
Second plot:
ggplot(CO2rate) +
geom_boxplot(mapping = aes(x = Patch, y = value, fill=factor(Temperature, levels = c("Warm", "Cold")))) +
theme(legend.position = c(0.15, 0.9), panel.background = element_rect(fill = "white", colour = "grey50")) +
labs(title = "Title of graph", x="Patch Type", y = "CO2rate") +
scale_fill_manual(name = "", values = c("orange", "light blue")
, labels = c("Cold" = "Incubated at 10˙C", "Warm" = "Incubated at 26˙C"))
I have a dataset of >100 different samples. Samples are from different genotypes (e.g. X, Y, Z) and 4 different time points (T0,1,2,3) with 3 biological replicates (R1,2,3). I'm measuring values for 50 different genes (in rows; A,B..)
longdata <- structure(list(Gene = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("A", "B"), class = "factor"), Genotype = structure(c(1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L), .Label = c("X", "Y", "Z"), class = "factor"),
Time = structure(c(1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L), .Label = c("T0",
"T1", "T2", "T3"), class = "factor"), Ave = c(1.32606106633333,
1.499956424, 1.118528738, 1.025082136, 0.424537206666667,
0.723243112666667, 0.335509156333333, 0.328275209, 0.788329993666667,
1.125292329, 2.357924224, 0.678921448, 0.222768019, 0.293117217,
0.548228048, 0.841192647333333, 3.144197864, 0.576764958333333,
1.32037215366667, 1.15039119233333, 1.03539976366667, 1.00032109266667,
0.740699933666667, 0.687992671666667), SE = c(0.119785209010494,
0.168580466330281, 0.264739468221289, 0.124588107424543,
0.194995686650518, 0.0392007703821249, 0.06203362889702,
0.0482287534807508, 0.396968455138007, 0.0903480171168777,
0.717823561374135, 0.164024037188693, 0.0078580995264886,
0.0980939303386436, 0.233081861930954, 0.0870744069976396,
0.324195222544884, 0.434640930315622, 0.0658409437053185,
0.135850334794207, 0.175517934316736, 0.123213160632528,
0.133598346586129, 0.203707785326976)), .Names = c("Gene",
"Genotype", "Time", "Ave", "SE"), class = c("tbl_df", "tbl",
"data.frame"), row.names = c(NA, -24L))
How can I modify this syntax to generate each graph separately and save them as JPG/PNG files?
longdata %>% ggplot(aes(x = Time, y = Ave, fill = Genotype)) + geom_bar(position = position_dodge(), stat = "identity") + geom_errorbar(aes(ymin = Ave - SE, ymax = Ave + SE), width = 0.1, position = position_dodge(0.9)) + facet_wrap(~ Gene)
You can put ggplot and ggsave within a loop.
lapply(sort(unique(longdata$Gene)), function(i){
ggplot(longdata[longdata$Gene == i, ], aes(x = Time, y = Ave, fill = Genotype)) + geom_bar(position = position_dodge(), stat = "identity") + geom_errorbar(aes(ymin = Ave - SE, ymax = Ave + SE), width = 0.1, position = position_dodge(0.9))
ggsave(filename = paste0(i, ".png"))
})
This loop gets the unique elements of Gene, sorts them, create a plot, then save the result.
I have the following data frame and plotting code:
d <- structure(list(a = structure(c(1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L,
1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L), .Label = c("-20", "20-", "40-",
"50-"), class = "factor"), tci = structure(c(1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), .Label = c("A",
"B"), class = "factor"), Score = c(1, 2, 3, 4, 5,
6, 7, 8, 40.7, 51.9, 14.8, 3.7, 15, 75, 35, 20)), .Names = c("Foo",
"bar", "Score"), row.names = c(NA, -16L), class = "data.frame")
library(ggplot2)
p <-ggplot(d,aes(x=Foo,y=Score,fill=bar))+geom_bar(position="dodge",stat="identity")
What I want to do is to add the p-value bracket on the columns, which looks like this:
But why this code failed:
> p + geom_path(x=c(1,1,2,2),y=c(42,45,45,42))
Error: Incompatible lengths for set aesthetics: x, y
One solution is to put x and y in the dataframe
p + geom_path(data=data.frame(x=c(0.75,0.75,1.25,1.25),y=c(42,45,45,42)),
aes(x,y),inherit.aes=FALSE)
Another solution is to use annotate() instead of geom_path().
p + annotate(x=c(0.75,0.75,1.25,1.25),y=c(42,45,45,42),"path")
Suppose I have this data.frame:
my.df <- structure(list(mean = c(0.045729661, 0.030416531, 0.043202944,
0.025600973, 0.040526913, 0.046167044, 0.029352414, 0.021477789,
0.027580529, 0.017614864, 0.020324659, 0.027547972, 0.0268722,
0.030804717, 0.021502093, 0.008342398, 0.02295506, 0.022386184,
0.030849534, 0.017291356, 0.030957321, 0.01871551, 0.016945678,
0.014143042, 0.026686185, 0.020877973, 0.028612298, 0.013227244,
0.010710895, 0.024460647, 0.03704981, 0.019832982, 0.031858501,
0.022194059, 0.030575241, 0.024632496, 0.040815748, 0.025595652,
0.023839083, 0.026474704, 0.033000706, 0.044125751, 0.02714219,
0.025724641, 0.020767752, 0.026480009, 0.016794441, 0.00709195
), std.dev = c(0.007455271, 0.006120299, 0.008243454, 0.005552582,
0.006871527, 0.008920899, 0.007137174, 0.00582671, 0.007439398,
0.005265133, 0.006180637, 0.008312494, 0.006628951, 0.005956211,
0.008532386, 0.00613411, 0.005741645, 0.005876588, 0.006640122,
0.005339993, 0.008842722, 0.006246828, 0.005532832, 0.005594483,
0.007268493, 0.006634795, 0.008287031, 0.00588119, 0.004479003,
0.006333063, 0.00803285, 0.006226441, 0.009681048, 0.006457784,
0.006045368, 0.006293256, 0.008062195, 0.00857954, 0.008160441,
0.006830088, 0.008095485, 0.006665062, 0.007437581, 0.008599525,
0.008242957, 0.006379928, 0.007168385, 0.004643819), parent.origin = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("maternal",
"paternal"), class = "factor"), group = structure(c(4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("F1i:F",
"F1i:M", "F1r:F", "F1r:M"), class = "factor"), replicate = c(1,
2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4,
5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1,
2, 3, 4, 5, 6)), .Names = c("mean", "std.dev", "parent.origin",
"group", "replicate"), row.names = c(NA, -48L), class = "data.frame")
Which I'm plotting this way:
library(ggplot2)
p1 <- ggplot(data = my.df, aes(factor(replicate), color = factor(parent.origin)))
p1 <- p1 + geom_boxplot(aes(fill = factor(parent.origin),lower = mean - std.dev, upper = mean + std.dev, middle = mean, ymin = mean - 3*std.dev, ymax = mean + 3*std.dev), position = position_dodge(width = 0), width = 0.5, alpha = 0.5, stat="identity") + facet_wrap(~group, ncol = 4)+scale_fill_manual(values = c("red","blue"),labels = c("maternal","paternal"),name = "parental allele")+scale_colour_manual(values = c("red","blue"),labels = c("maternal","paternal"),name = "parental allele")
p1 <- p1 + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_rect(fill = 'white', colour = 'white'), legend.position = "none")+theme(strip.background=element_rect(fill="white"))
Which produces:
What I'd like to do is add black vertical lines to the left of the left facet, to the right of the right facet (i.e., left and right y axis lines), and another one between the 2nd and 3rd facets - right in the middle.
I know that geom_vline is the function I should be using but I can only get it to add a line for each facet. But what I want is lines that, at least how I see it, are independent of the facets.
Is this possible?
It is possible. Add the following line:
p1 <- p1 + geom_vline(data=data.frame(x= c(0,7,7), group = levels(my.df$group)[-3]), aes(xintercept = x), size=2 )
I have this data:
datat <- structure(list(Carga = structure(c(2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L), .Label = c("Outra", "88"), class = "factor"),
Categoria = structure(c(1L, 1L, 3L, 3L, 2L, 2L, 1L, 1L, 3L,
3L, 2L, 2L), .Label = c("A", "G", "B"), class = "factor"),
Vagas = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L), .Label = c("Ocupadas", "Autorizadas"), class = "factor"),
Cat.A.88 = c(26, 1, 30, 1, 18, 0, 57, 0, 39, 0, 0, 0)), .Names = c("Carga",
"Categoria", "Vagas", "Cat.A.88"), class = "data.frame", row.names = c(NA,
-12L))
and this plot:
ggplot(datat, aes(x=Carga, y=Cat.A.88, fill=Vagas)) + geom_bar(stat='identity', position='dodge') + ylab('Vagas') + xlab('Carga horária') + facet_grid(. ~ Categoria) + coord_flip()
The legend colours are in inverse order if compared with plot colours (plot have green before red, and legend have red before green). I want they appers in the same order. I tried add the parameter order=-as.numeric(Vagas) in aes(), but didn't changed anything.
This should help:
ggplot(datat, aes(x=Carga, y=Cat.A.88, fill=Vagas)) +
geom_bar(stat='identity', position='dodge') + ylab('Vagas') +
xlab('Carga horária') + facet_grid(. ~ Categoria) + coord_flip() +
guides(fill = guide_legend(reverse=T))