Move position of text in ggplot - r

Hopefully a pretty simply problem to solve - I have produced the boxplot pictured using the following code
ggplot(minanalysis, aes(x=MinAnaType, y=Calcium)) +
geom_boxplot() +
geom_point(aes(colour=Ploidy)) +
stat_summary(fun.y=mean, colour="black", geom="point", size=2,show.legend = FALSE) +
geom_text(data = means, vjust = -0.25, hjust=-0.3, size = 3, aes(label = round(Calcium, 2),
y = Calcium + 0.08)) +
#add a sample size n =
geom_text(data = minanalysis %>% group_by(MinAnaType) %>%
summarize(Count = n(),
Calcium=max(Calcium) + 0.05 * diff(range(minanalysis$Calcium))),
aes(label = paste0("n = ", Count)),
position = position_dodge(0.85), size=3, show.legend = FALSE) +
# Add pairwise comparisons p-value and global p-value
stat_compare_means(comparisons = my_comparisons, label = "p.signif", label.y = 9500, size = 4) +
stat_compare_means(label.y = 10500, size = 4)
What I want is for the "n=290" and n=50" to be located underneath x axis labels 'Eviscerated' and 'WholeBody'. Having them above the boxplots themselves is too messy. Could somebody help with the code for this, I'm new to R studio.
Thanks

The easiest is probably to add a linebreak to your x labels. You can then add your count, either hardcoded, or programmatically.
(This would be within scale_x_...(labels = ...))
E.g.
library(tidyverse)
ggplot(filter(iris, Species %in% c('setosa','virginica')), aes(Species, Sepal.Length)) +
geom_boxplot() +
scale_x_discrete(labels = c(setosa = paste('setosa', '\nn = 30'),
virginica = paste('virginica', '\nn = 30')))

Related

How do you replace the points on a box plot with the point's corresponding row number index?

I have a data frame that looks like this:
Train_Table_Time_Power <- data.frame(
Skew = runif(250),
Crest = runif(250),
Kurt = runif(250),
Impulse = runif(250),
TI = sample(c("0.05", "0.10", "0.15", "0.20"), 10, replace = TRUE)
)
I then created a box with points using the code below:
Crest_BoxPlot <- ggplot(Train_Table_Time_Power, aes(x = TI, y = Crest, color = TI)) +
geom_boxplot(notch = T, id=TRUE) +
stat_summary(fun = mean, geom="point", shape=19, color="red", size=2) +
geom_jitter(shape=16, position = position_jitter(0.2), size = 0.3) +
labs(title = "Crest_Time", x = "TI", y = "Normalized Magnitude") +
theme_minimal() + theme_Publication()
I would like to somehow have the individual points of the boxplot be replaced with their row number index, however, I can't seem to figure out a way how. Could someone direct me on how to do this, if it is indeed possible?
Just use geom_text()instead of geom_jitter(), but be aware that readability is limited due to the overlapping labels.
# add the row number as column
library(tibble)
Train_Table_Time_Power <- rowid_to_column(Train_Table_Time_Power)
ggplot(Train_Table_Time_Power, aes(x = TI, y = Crest, color = TI, label = rowid)) +
geom_boxplot(notch = T, id=TRUE) +
stat_summary(fun = mean, geom="point", shape=19, color="red", size=2) +
geom_text(position = position_jitter(0.2)) +
labs(title = "Crest_Time", x = "TI", y = "Normalized Magnitude") +
theme_minimal()

Bar plot ggplot2 - Error: Aesthetics must be either length 1 or the same as the data (150): fill, x, y

Hey I know there are lots of questions about this particular error but i still cant find what is wrong, pretty new to R and coding in general.
here is a link to may data
https://www.dropbox.com/s/qfo5rp7ywgsgxhy/CRERDATA.csv?dl=0
and here is my code to make the graph
not all used for graph obviously
library(car)
library(ggplot2)
library(Rmisc)
library(dunn.test)
library(FSA)
summarizes my data so i can get standard error bars
index_sum <- summarySE(CRERDATA, measurevar = "index", groupvars = c("site", "scenario"), na.rm = TRUE)
graph code
index_graph <- ggplot(CRERDATA, aes(x = index_sum$site, y = index_sum$index, fill = index_sum$scenario)) +
geom_bar(aes(fill = index_sum$scenario), position = position_dodge(), stat="identity") + ylab("Bleaching index") + xlab("Sites") +
labs(fill = "scenario") + scale_fill_grey() + theme_minimal() +
geom_errorbar(aes(ymin = index_sum$index-se, ymax = index_sum$index+se), width = .2, position = position_dodge(.9), color = "red")
You have specified CRERDATA in the ggplot as data, when you are actually using index_sum.
Load necessary packages:
library(ggplot2)
library(Rmisc)
Read data and summarise:
CRERDATA <- read.csv('CRERDATA.csv')
index_sum <- summarySE(CRERDATA, measurevar = "index", groupvars = c("site", "scenario"), na.rm = TRUE)
Instead of CRERDATA, use index_sum. You then don't need to use the dollar sign $ to access columns:
ggplot(index_sum, aes(x = site, y = index, fill = scenario)) +
geom_bar(aes(fill = scenario), position = position_dodge(), stat="identity") +
ylab("Bleaching index") + xlab("Sites") + labs(fill = "scenario") + scale_fill_grey() +
theme_minimal() +
geom_errorbar(aes(ymin = index-se, ymax = index+se), width = .2, position = position_dodge(.9), color = "red")
The result:

adding summary statistics to two factor boxplot

I would like to add summary statistics (e.g. mean) to the boxplot which have two factors. I have tried this:
library(ggplot2)
ggplot(ToothGrowth, aes(x = factor(dose), y = len)) +
stat_boxplot(geom = "errorbar", aes(col = supp, fill=supp), position = position_dodge(width = 0.85)) +
geom_boxplot(aes(col = supp, fill=supp), notch=T, notchwidth = 0.5, outlier.size=2, position = position_dodge(width = 0.85)) +
stat_summary(fun.y=mean, aes(supp,dose), geom="point", shape=20, size=7, color="violet", fill="violet") +
scale_color_manual(name = "SUPP", values = c("blue", "darkgreen")) +
scale_fill_manual(name = "SUPP", values = c("lightblue", "green"))
I got this picture:
It is possible somehow put the sample size of each box (e.g. top of the whiskers)? I have tried this:
ggplot(ToothGrowth, aes(x = factor(dose), y = len)) +
stat_boxplot(geom = "errorbar", aes(col = supp, fill=supp), position = position_dodge(width = 0.85)) +
geom_boxplot(aes(col = supp, fill=supp), notch=T, notchwidth = 0.5, outlier.size=2, position = position_dodge(width = 0.85)) +
stat_summary(fun.y=mean,aes(supp,dose),geom="point", shape=20, size=7, color="violet", fill="violet") +
scale_color_manual(name = "SUPP", values = c("blue", "darkgreen")) +
scale_fill_manual(name = "SUPP", values = c("lightblue", "green")) +
geom_text(data = ToothGrowth,
group_by(dose, supp),
summarize(Count = n(),
q3 = quantile(ToothGrowth, 0.75),
iqr = IQR(ToothGrowth),
aes(x= dose, y = len,label = paste0("n = ",Count, "\n")), position = position_dodge(width = 0.75)))
You can state the aesthetics just once by putting them in the main ggplot call and then they will apply to all of the geom layers: ggplot(ToothGrowth, aes(x = factor(dose), y = len, color=supp, fill=supp))
For the count of observations: The data summary step in geom_text isn't coded properly. Also, to set len (the y-value) for the text placement, the summarize function needs to output values for len.
To add the mean values in the correct locations on the x-axis, use stat_summary with the exact same aesthetics as the other geoms and stats. I've overridden the color aesthetic by setting the color to yellow so that the point markers will be visible on top of the box plot fill colors.
The code to implement the plot is below:
library(tidyverse)
pd = position_dodge(0.85)
ggplot(ToothGrowth, aes(x = factor(dose), y = len, color=supp, fill=supp)) +
stat_boxplot(geom = "errorbar", position = pd) +
geom_boxplot(notch=TRUE, notchwidth=0.5, outlier.size=2, position=pd) +
stat_summary(fun.y=mean, geom="point", shape=3, size=2, colour="yellow", stroke=1.5,
position=pd, show.legend=FALSE) +
scale_color_manual(name = "SUPP", values = c("blue", "darkgreen")) +
scale_fill_manual(name = "SUPP", values = c("lightblue", "green")) +
geom_text(data = ToothGrowth %>% group_by(dose, supp) %>%
summarize(Count = n(),
len=max(len) + 0.05 * diff(range(ToothGrowth$len))),
aes(label = paste0("n = ", Count)),
position = pd, size=3, show.legend = FALSE) +
theme_bw()
Note that the notch goes outside the hinges for all of the box plots. Also, having the sample size just above the maximum of each boxplot seems distracting and unnecessary to me. You could place all of the text annotations at the bottom of the plot like this:
geom_text(data = ToothGrowth %>% group_by(dose, supp) %>%
summarize(Count = n()) %>%
ungroup %>%
mutate(len=min(ToothGrowth$len) - 0.05 * diff(range(ToothGrowth$len))),
aes(label = paste0("n = ", Count)),
position = pd, size=3, show.legend = FALSE) +

Aesthetics must be either length 1 or the same as the data (1): x, y, label

I'm working on some data on party polarization (something like this) and used geom_dumbbell from ggalt and ggplot2. I keep getting the same aes error and other solutions in the forum did not address this as effectively. This is my sample data.
df <- data_frame(policy=c("Not enough restrictions on gun ownership", "Climate change is an immediate threat", "Abortion should be illegal"),
Democrats=c(0.54, 0.82, 0.30),
Republicans=c(0.23, 0.38, 0.40),
diff=sprintf("+%d", as.integer((Democrats-Republicans)*100)))
I wanted to keep order of the plot, so converted policy to factor and wanted % to be shown only on the first line.
df <- arrange(df, desc(diff))
df$policy <- factor(df$policy, levels=rev(df$policy))
percent_first <- function(x) {
x <- sprintf("%d%%", round(x*100))
x[2:length(x)] <- sub("%$", "", x[2:length(x)])
x
}
Then I used ggplot that rendered something close to what I wanted.
gg2 <- ggplot()
gg2 <- gg + geom_segment(data = df, aes(y=country, yend=country, x=0, xend=1), color = "#b2b2b2", size = 0.15)
# making the dumbbell
gg2 <- gg + geom_dumbbell(data=df, aes(y=country, x=Democrats, xend=Republicans),
size=1.5, color = "#B2B2B2", point.size.l=3, point.size.r=3,
point.color.l = "#9FB059", point.color.r = "#EDAE52")
I then wanted the dumbbell to read Democrat and Republican on top to label the two points (like this). This is where I get the error.
gg2 <- gg + geom_text(data=filter(df, country=="Government will not control gun violence"),
aes(x=Democrats, y=country, label="Democrats"),
color="#9fb059", size=3, vjust=-2, fontface="bold", family="Calibri")
gg2 <- gg + geom_text(data=filter(df, country=="Government will not control gun violence"),
aes(x=Republicans, y=country, label="Republicans"),
color="#edae52", size=3, vjust=-2, fontface="bold", family="Calibri")
Any thoughts on what I might be doing wrong?
I think it would be easier to build your own "dumbbells" with geom_segment() and geom_point(). Working with your df and changing the variable refences "country" to "policy":
library(tidyverse)
# gather data into long form to make ggplot happy
df2 <- gather(df,"party", "value", Democrats:Republicans)
ggplot(data = df2, aes(y = policy, x = value, color = party)) +
# our dumbell
geom_path(aes(group = policy), color = "#b2b2b2", size = 2) +
geom_point(size = 7, show.legend = FALSE) +
# the text labels
geom_text(aes(label = party), vjust = -1.5) + # use vjust to shift text up to no overlap
scale_color_manual(values = c("Democrats" = "blue", "Republicans" = "red")) + # named vector to map colors to values in df2
scale_x_continuous(limits = c(0,1), labels = scales::percent) # use library(scales) nice math instead of pasting
Produces this plot:
Which has some overlapping labels. I think you could avoid that if you use just the first letter of party like this:
ggplot(data = df2, aes(y = policy, x = value, color = party)) +
geom_path(aes(group = policy), color = "#b2b2b2", size = 2) +
geom_point(size = 7, show.legend = FALSE) +
geom_text(aes(label = gsub("^(\\D).*", "\\1", party)), vjust = -1.5) + # just the first letter instead
scale_color_manual(values = c("Democrats" = "blue", "Republicans" = "red"),
guide = "none") +
scale_x_continuous(limits = c(0,1), labels = scales::percent)
Only label the top issue with names:
ggplot(data = df2, aes(y = policy, x = value, color = party)) +
geom_path(aes(group = policy), color = "#b2b2b2", size = 2) +
geom_point(size = 7, show.legend = FALSE) +
geom_text(data = filter(df2, policy == "Not enough restrictions on gun ownership"),
aes(label = party), vjust = -1.5) +
scale_color_manual(values = c("Democrats" = "blue", "Republicans" = "red")) +
scale_x_continuous(limits = c(0,1), labels = scales::percent)

How can I add mean labels to a bar chart?

I would like to add the mean of each condition at the base of my bar chart in R. The final product looks something like this in excel (note the means are displayed at the base of each bar):
My current code is as follows:
pmrtBar <- ggplot(reslagdfClean,aes(x=Condition, y=PMMissProp*100)) + stat_summary(fun.y = mean, geom = "bar", fill = cbPalette) +
theme(axis.title=element_text(size=12)) +
stat_summary(fun.data = mean_cl_normal, geom = "errorbar", width=.1, size = .25) +
coord_cartesian(ylim=c(0,50)) +
labs(x = "Condition", y="Mean Mean Miss Proportion (%)") +
apatheme
pmrtBar
I am new the R environment. Any feedback on the code above is also appreciated.
It's always good to add a reproducible example to your question.
Converting my comment to an answer with the use of some example data:
# example data
dat <- data.frame(id = c("ACT","Blank","None"),
mn = c(0.3833,0.38,0.4033),
se = c(0.1,0.15,0.12))
# creating the plot
ggplot(dat, aes(x=id, y=mn, fill=id)) +
geom_bar(stat="identity", width=0.7) +
geom_errorbar(aes(ymax = mn + se, ymin = mn - se), width=0.25) +
geom_text(aes(y = 0.2, label = paste(mn*100, "%"))) +
labs(x = "\nCondition", y = "Proportion (%)\n") +
scale_y_continuous(breaks = seq(0.15, 0.55, 0.05), labels = scales::percent) +
coord_cartesian(ylim = c(0.15,0.55)) +
theme_minimal(base_size = 14) +
theme(panel.grid.major.y = element_line(linetype = 2, color = "grey80", size = 1))
which results in:

Resources