to show mean value in ggplot box plot - r

I need to be able to show the mean value in ggplot box plot. Below works for a point but I need the white dashed lines? Any body help?
x
Team Value
A 10
B 5
C 29
D 35
ggplot(aes(x = Team , y = Value), data = x)
+ geom_boxplot (aes(fill=Team), alpha=.25, width=0.5, position = position_dodge(width = .9))
+ stat_summary(fun.y=mean, colour="red", geom="point")

Here's my way of adding mean to boxplots:
ggplot(RQA, aes(x = Type, y = engagementPercent)) +
geom_boxplot(aes(fill = Type),alpha = .6,size = 1) +
scale_fill_brewer(palette = "Set2") +
stat_summary(fun.y = "mean", geom = "text", label="----", size= 10, color= "white") +
ggtitle("Participation distribution by type") +
theme(axis.title.y=element_blank()) + theme(axis.title.x=element_blank())
ggplot(df, aes(x = Type, y = scorepercent)) +
geom_boxplot(aes(fill = Type),alpha = .6,size = 1) +
scale_fill_brewer(palette = "Set2") +
stat_summary(fun.y = "mean", geom = "point", shape= 23, size= 3, fill= "white") +
ggtitle("score distribution by type") +
theme(axis.title.y=element_blank()) + theme(axis.title.x=element_blank())
I would caution against using text to this and do geom_line instead as text is offset slightly and gives the wrong portrayal of the mean.
Hey user1471980, I think people are more inclined to help if you have a unique user name but then again you have a lot of points :)

this is a hack but does this help:
Value<-c(1,2,3,4,5,6)
Team<-c("a","a","a","b","b","b")
x<-data.frame(Team,Value) #note means for a=2, mean for b=5
ggplot(aes(x = Team , y = Value), data = x) + geom_boxplot (aes(fill=Team), alpha=.25, width=0.5, position = position_dodge(width = .9)) +
annotate(geom="text", x=1, y=2, label="----", colour="white", size=7, fontface="bold", angle=0) +
annotate(geom="text", x=2, y=5, label="----", colour="white", size=7, fontface="bold", angle=0)

Related

Merge legend in ggplot when the geoms are different

I have the following code which yields the figure below:
ggplot(data=data.frame(x=x, y=y, mass=mass)) +
geom_line(mapping = aes(x=x, y=y, linetype='Gompertz predicted mass', col='Gompertz predicted mass')) +
geom_point(mapping = aes(x=x, y=mass, shape='Actual mass',col='Actual mass')) +
theme_bw() +
ylab('Mass') +
xlab('t') +
scale_color_manual(name='',values = c("black",'red')) +
scale_linetype_manual(name='',values = c("solid")) +
scale_shape_manual(name='', values = c(19)) +
scale_x_continuous(breaks=seq(4,26,2)) +
ylim(c(0, 20000)) +
ggtitle('Problem 3: Plot of tumor mass with time')
Notice how the legend is separated. I'd like to merge it for shape and color. When the geoms are the same, the technique of using scale_something_manual works perfectly fine to merge the legends. However, I'm having trouble with it here since I have two different geoms.
The problem is similar to the one described in https://github.com/tidyverse/ggplot2/issues/3648. There is no elegant solution at the moment. Because you haven't included any data, I've presumed that your problem is conceptually similar to the plot below:
library(ggplot2)
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(shape = "Point", colour = "Point")) +
geom_smooth(aes(linetype = "Line", colour = "Line"),
formula = y ~ x, se = FALSE, method = "loess") +
scale_colour_manual(values = c("red", "black")) +
scale_linetype_manual(values = "solid") +
scale_shape_manual(values = 19)
The way to fix the problem is to get rid of the linetype and shape aesthetics and scales, and instead override aesthetics at the level of the legend.
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(colour = "Point")) +
geom_smooth(aes(colour = "Line"),
formula = y ~ x, se = FALSE, method = "loess") +
scale_colour_manual(
values = c("red", "black"),
guide = guide_legend(override.aes = list(shape = c(NA, 19),
linetype = c(1, NA)))
)
Created on 2021-09-04 by the reprex package (v2.0.1)

Removing points from geom_bar legend ggplot r

This is my data.
Mod <- as.factor(c(rep("GLM",5),rep("MLP",5),rep("RF",5),rep("SVML",5),rep("SVMR",5)))
Manifold <- as.factor(rep(c("LLE","Iso","PCA","MDS","kPCA"),5))
ROC <- runif(25,0,1)
Sens <- runif(25,0,1)
Spec <- runif(25,0,1)
df <- data.frame("Mod"= Mod, "Manifold"= Manifold, "ROC" = ROC, "Sens" = sens, "Spec" = spec)
And I am making this graph
resul3 <- ggplot(df, aes(x = Mod, y = ROC, fill= Manifold)) +
geom_bar(stat = "identity", position = "dodge", color = "black") +
ylab("ROC & Specificity") +
xlab("Classifiers") +
theme_bw() +
ggtitle("Classifiers' ROC per Feature Extraction Plasma") +
geom_point(aes(y=Spec), color = "black", position=position_dodge(.9)) +
scale_fill_manual(name = "Feature \nExtraction", values = c("#FFEFCA",
"#EDA16A" ,"#C83741", "#6C283D", "#62BF94"))
first graph
And what I want is another legend with tittle "Specificity" and a single black point. I dont want the point to be inside the Manifolds legend.
Something like this but without the points inside the manifold squares
Changing the geom_point line, adding a scale_color_manual and using the override as seen in #drmariod's answer will result in this plot:
ggplot(df, aes(x = Mod, y = ROC, fill= Manifold)) +
geom_bar(stat = "identity", position = "dodge", color = "black") +
ylab("ROC & Specificity") +
xlab("Classifiers") +
theme_bw() +
ggtitle("Classifiers' ROC per Feature Extraction Plasma") +
geom_point(aes(y=Spec, color = "Specificity"), position=position_dodge(.9)) +
scale_fill_manual(name = "Feature \nExtraction", values = c("#FFEFCA",
"#EDA16A" ,"#C83741", "#6C283D", "#62BF94")) +
scale_color_manual(name = NULL, values = c("Specificity" = "black")) +
guides(fill = guide_legend(override.aes = list(shape = NA)))
You can overwrite the aesthetics for shape and set it to NA like this
ggplot(df, aes(x = Mod, y = ROC, fill= Manifold)) +
geom_bar(stat = "identity", position = "dodge", color = "black") +
ylab("ROC & Specificity") +
xlab("Classifiers") +
theme_bw() +
ggtitle("Classifiers' ROC per Feature Extraction Plasma") +
geom_point(aes(y=Spec), color = "black", position=position_dodge(.9)) +
scale_fill_manual(name = "Feature \nExtraction", values = c("#FFEFCA",
"#EDA16A" ,"#C83741", "#6C283D", "#62BF94")) +
guides(fill = guide_legend(override.aes = list(shape = NA)))

how to combine 2 DVs in one graph using ggplot

I am trying to combine 2 dependent variables (or 2 graphs) in one graph using ggplot function. All the suggestions I could find online were not really helpful in my case.
Graph1 <- ggplot(mydata, aes(age, conf))
Graph1 + stat_summary(fun.y = mean, geom = "point") +
stat_summary(fun.y = mean, geom = "line", aes(group = 1)) +
stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.2) +
labs(x = "Age Group", y = "Accuracy (%)") + ylim(0, 1)
Graph2 <- ggplot(mydata, aes(age, acc))
Graph2 + stat_summary(fun.y = percent(1), geom = "point") +
stat_summary(fun.y = mean, geom = "line", aes(group = 1), linetype = "dashed") +
stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.2) +
labs(x = "Age Group", y = "Accuracy (%)") + ylim(0, 1)
In addition to this, I will need to have the means and error bars not overlapping. Any advice would be greatly appreciated.
After further investigation I have found the following suggestion which seems to be a great solution. However, I cannot install tidyr as it incompatible with the current R version. I have tried different options to download the package, without success.
library(tidyr)
home.land.byyear <- gather(housing.byyear, value = "value", key = "type",
Home.Value, Land.Value)
ggplot(home.land.byyear, aes(x=Date, y=value, color=type)) + geom_line()
see http://tutorials.iq.harvard.edu/R/Rgraphics/Rgraphics.html

changing the color palette in ggplot

I have created ggplot from my data (sample below):
I have created a violin plot of the NKV with the individual NKV data points plotted over it. I want to differentiate betweeen which PID my datapoints belong to. So far so good:
violin.murgang <- ggplot(nkv.murgang, aes(x = factor("Murgang"), nkv.murgang$NK)) +
geom_violin(color = "black", fill = "darkorange") +
ggtitle("NKV Murgang - Einfamilienhaus") +
labs(x = "Prozess", y = "Nutzen / Konsten \n Verhälhniss") +
stat_summary(geom = "text", fun.y = quantile,
aes(label=sprintf("%1.1f", ..y..)),
position=position_nudge(x=0.4), size=3) +
theme (legend.position = "none") +
stat_summary(fun.data = give.n, geom = "text", position=position_nudge(x=-0.4)) +
geom_jitter(aes(col = PID ), width = 0.35)
violin.murgang
The problem is that all the NKV data points are only visualized in different shade of blue. I would like to have different colours. I have tried adding this:
scale_colour_brewer(palette="Spectral")
which yields the error:
Error: Continuous value supplied to discrete scale
How can i achieve having different colour for the geom_jitter part?
What causes the error?
Thanks!
If you PID have more levels than colors of 'Spectral' palette, you could try scale_color_distiller, which extends brewer colors to continuous scale, see the manual of scale_color_distiller:
# Use distiller variant with continous data
v <- ggplot(faithfuld) +
geom_tile(aes(waiting, eruptions, fill = density))
v
v + scale_fill_distiller()
v + scale_fill_distiller(palette = "Spectral")
Therefore, we could try:
ggplot(nkv.murgang, aes(x = factor("Murgang"), nkv.murgang$NK)) +
geom_violin(color = "black", fill = "darkorange") +
ggtitle("NKV Murgang - Einfamilienhaus") +
labs(x = "Prozess", y = "Nutzen / Konsten \n Verhälhniss") +
stat_summary(geom = "text", fun.y = quantile,
aes(label=sprintf("%1.1f", ..y..)),
position=position_nudge(x=0.4), size=3) +
theme (legend.position = "none") +
geom_jitter(aes(color = PID), width = 0.35) +
scale_color_distiller(palette = "Spectral")
If you data has a few levels, we could use discrete scales. PID is integer, which does work with discrete scales. You should convert it to character or factor first:
ggplot(nkv.murgang, aes(x = factor("Murgang"), nkv.murgang$NK)) +
geom_violin(color = "black", fill = "darkorange") +
ggtitle("NKV Murgang - Einfamilienhaus") +
labs(x = "Prozess", y = "Nutzen / Konsten \n Verhälhniss") +
stat_summary(geom = "text", fun.y = quantile,
aes(label=sprintf("%1.1f", ..y..)),
position=position_nudge(x=0.4), size=3) +
theme (legend.position = "none") +
geom_jitter(aes(color = as.factor(PID) ), width = 0.35) +
scale_color_brewer(palette = "Spectral")

How can I add mean labels to a bar chart?

I would like to add the mean of each condition at the base of my bar chart in R. The final product looks something like this in excel (note the means are displayed at the base of each bar):
My current code is as follows:
pmrtBar <- ggplot(reslagdfClean,aes(x=Condition, y=PMMissProp*100)) + stat_summary(fun.y = mean, geom = "bar", fill = cbPalette) +
theme(axis.title=element_text(size=12)) +
stat_summary(fun.data = mean_cl_normal, geom = "errorbar", width=.1, size = .25) +
coord_cartesian(ylim=c(0,50)) +
labs(x = "Condition", y="Mean Mean Miss Proportion (%)") +
apatheme
pmrtBar
I am new the R environment. Any feedback on the code above is also appreciated.
It's always good to add a reproducible example to your question.
Converting my comment to an answer with the use of some example data:
# example data
dat <- data.frame(id = c("ACT","Blank","None"),
mn = c(0.3833,0.38,0.4033),
se = c(0.1,0.15,0.12))
# creating the plot
ggplot(dat, aes(x=id, y=mn, fill=id)) +
geom_bar(stat="identity", width=0.7) +
geom_errorbar(aes(ymax = mn + se, ymin = mn - se), width=0.25) +
geom_text(aes(y = 0.2, label = paste(mn*100, "%"))) +
labs(x = "\nCondition", y = "Proportion (%)\n") +
scale_y_continuous(breaks = seq(0.15, 0.55, 0.05), labels = scales::percent) +
coord_cartesian(ylim = c(0.15,0.55)) +
theme_minimal(base_size = 14) +
theme(panel.grid.major.y = element_line(linetype = 2, color = "grey80", size = 1))
which results in:

Resources