ggplot2 mean symbol in boxplots - r

I have the following data:
GENDER Addressee_gender_and_age likelihood
Female F20 4
Female F20 5
Male F20 3
Female F20 3
Female F20 4
Male F20 1
I am interested in getting the boxplot
p = ggplot(data = melteddata, aes(x=Addressee_gender_and_age, y=likelihood)) +
ggtitle("Distribution of the likelihood of complaining by gender") +
theme(plot.title = element_text(hjust = 0.5)) +
geom_boxplot(aes(fill=GENDER))
p + facet_wrap( ~ Addressee_gender_and_age, scales="free") +
stat_summary(fun=mean, colour="darkred", geom="point",
shape=18, size=3,show_guide = FALSE)
The problem is the mean symbol is given for the entire wrap as follows:

The problem is that you have the wrong value set as the x axis. I first create a reproducible example (https://stackoverflow.com/help/minimal-reproducible-example), and then change aes(x=age, y=x) to aes(x=gender, y=x) In your example it would be GENDER instead of Addressee_gender_and_age
Test<-data.frame(x=rnorm(40),age=rep(c(10,20,30,40,50),8),
gender=rep(c("Male","Female"),20))
library(ggplot2)
ggplot(data = Test, aes(x=age, y=x)) +
geom_boxplot(aes(fill=gender))+
facet_wrap( ~ age, scales="free") +
stat_summary(fun=mean, colour="darkred", geom="point",
shape=18, size=3,show_guide = FALSE)
ggplot(data = Test, aes(x=gender, y=x)) +
geom_boxplot(aes(fill=gender))+
facet_wrap( ~ age, scales="free") +
stat_summary(fun=mean, colour="darkred", geom="point",
shape=18, size=3,show_guide = FALSE)

Related

how to add trendline for ggplot graph in R

I have a graph with 7 dots and I want to add trendline
thanks for the help!
percentage.no.work <- cleanData %>% group_by(AREA) %>%
summarise(percentage = mean(ESTIMATED.CITY.UNEMPLOYMENT))
ggplot() +
geom_point(data=percentage.no.work, aes(x=AREA, y=percentage), alpha=0.6, color="purple", size=2) +
geom_smooth(method = "lm") +
theme_minimal() + ggtitle("Percentage Estimated City Unemployment") +
ylab("Percentage")

How to join plot layers from different dataset in one plot - ggplot2?

I have three dataset, they are three distinguished bird's assemblages and I'm trying to merge the test results in one plot together. Each one looks like this:
Exemple:
library(ggplot2)
beta1<-c(0.714286,0.625,0.72973,0.5625,0.733333,1,0.655172,0.92,0.769231,0.586207,0.724138,0.846154,
0.833333,0.76,1)
group<-rep(c("q0", "q1", "q2"), each = 5)
beta2<-c(1.714286,1.625,1.72973,1.5625,1.733333,1,1.655172,1.92,1.769231,1.586217,1.724138,1.846154,
1.833333,1.76,1)
dados1<-data.frame(beta1, group)
dados2<-data.frame(beta2, group)
p1<-ggplot(data=dados1, aes(x=group, y=beta1)) +
stat_summary(fun.y=mean, geom="line", aes(group=1)) +
stat_summary(fun.y=mean, geom="point")+ylim(0,2)
p2<-ggplot(data=dados2, aes(x=group, y=beta2)) +
stat_summary(fun.y=mean, geom="line", aes(group=1)) +
stat_summary(fun.y=mean, geom="point")+ylim(0,2)
the result that I need is like this:
plot_merged
I could do this:
ggplot() + stat_summary(fun.y=mean, geom="line", data=dados2, aes(x=group, y=beta2)) + stat_summary(fun.y=mean, geom="point", data=dados2, aes(x=group, y=beta2)) + stat_summary(fun.y=mean, geom="line", data=dados1, aes(x=group, y=beta1)) + stat_summary(fun.y=mean, geom="point", data=dados1, aes(x=group, y=beta1))+ylim(0,2)
but still not enough, because couldn't plot lines...
So I think this will approximately give what you want. We just combine the beta1 and beta2 in 1 data.frame and plot that:
dados1 <-data.frame(beta = beta1, group, id = "beta1")
dados2 <-data.frame(beta = beta2, group, id = "beta2")
df <- rbind(dados1, dados2)
ggplot(df, aes(group, beta, colour = id, group = id)) +
stat_summary(fun.y=mean, geom="line") +
stat_summary(fun.y=mean, geom="point") +
ylim(0,2)
In addition, I solve other question of mine: "If I want to do the same but with boxplot, how can I do this?"
I tried this:
1.Step - by teunbrand (answer above).
dados1 <-data.frame(beta = beta1, group, id = "beta1")
dados2 <-data.frame(beta = beta2, group, id = "beta2")
df <- rbind(dados1, dados2)
2.step:
ggplot(df, aes(group, beta, colour = id, facet_grid= id))+ geom_boxplot()
Plot
Thank you guys!

How do I color points separately when plotting multiple line graphs with a common x-axis?

I want to make 3 stacked line graphs, L, F & P, all with a common x axis depth, and the points colored by Depo. I have found multiple ways of stacking the graphs but I am struggling to integrate my desired color coding for the points.
Here is some example data - sorry it won't stay formatted as a table for some reason
depth L F P Depo
67.48 1.003 1.063 1.066 Turb
67.63 1.004 1.020 1.024 Dri
67.73 1.011 1.017 1.028 Dri
67.83 1.006 1.007 1.014 Turb
67.92 1.003 1.029 1.032 Pro
68.06 1.004 1.007 1.011 Pro
I can sort of get what I want by making the graphs and then using grid.draw to stack them. But this repeats the x axis values for each graph.
Lin <- ggplot(MyData, aes(x=depth, y=L)) + geom_line() + geom_point(data = MyData, aes(x=depth, y=L, color = Depo))
Fab <- ggplot(MyData, aes(x=depth, y=P)) + geom_path() + geom_point(data = MyData, aes(x=depth, y=P, color = Depo))
Fol <- ggplot(MyData, aes(x=depth, y=F)) + geom_path() + geom_point(data = MyData, aes(x=depth, y=F, color = Depo))
grid.draw(rbind(ggplotGrob(Fol), ggplotGrob(Lin), ggplotGrob(Fab), size = "last"))
The following works to plot the graphs without the x axis being repeated but I can't figure out how to change the points by Depo.
mm <- melt(subset(MyData, select=c(depth, L, F,P)), id.var="depth")
ggplot(mm, aes(x = depth, y = value)) + geom_line(aes(color = variable)) +
facet_grid(variable ~ ., scales = "free_y") + theme(legend.position = "none")
I'd suggest not using melt. In my experience, it's hard to use correctly and mistakes are made easily. I personally prefer the tidyr verbs gather() and spread().
Here's my approach to your graph:
MyData %>%
gather(variable, value, L:P) %>%
ggplot() + aes(depth, value) +
geom_path() + geom_point(aes(color = Depo, shape = Depo), size = 3) +
facet_wrap(~variable, ncol = 1, scales = "free_y")
The first step is a gather operation on the L, F and P columns, keeping depth and Depo. My code for the graph is not very different from what you'd already tried.
You can suppress the x-axis for the upper plots:
Lin <- ggplot(MyData, aes(x=depth, y=L)) + geom_line() + geom_point(data = MyData, aes(x=depth, y=L, color = Depo)) + theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())
Fab <- ggplot(MyData, aes(x=depth, y=P)) + geom_path() + geom_point(data = MyData, aes(x=depth, y=P, color = Depo))
Fol <- ggplot(MyData, aes(x=depth, y=F)) + geom_path() + geom_point(data = MyData, aes(x=depth, y=F, color = Depo)) + theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())
grid.draw(rbind(ggplotGrob(Fol), ggplotGrob(Lin), ggplotGrob(Fab), size = "last"))

Anotating mean values on a distribution frequency

I want to make a grupal frequency distribution plots and anotate the mean value. Most direct way is:
library(dplyr)
library(ggplot2)
load(mtcars)
mtcars0=mtcars%>%group_by(cyl)%>%mutate(MeanMpg=round(mean(mpg),2))
mtcars1=mtcars%>%group_by(cyl)%>%summarize(MeanMpg=round(mean(mpg),2))
p <- ggplot(mtcars0, aes(mpg, fill=cyl)) +
facet_wrap(. ~ cyl) +
geom_density(alpha=.2) +
geom_vline(data=mtcars1, aes(xintercept=MeanMpg), linetype="dashed", size=1) +
annotate("text", label = labels, size = 4, x = 15, y = 0.26)
p
the problem shows when i want to put mean values exactly on the middle of the mean line:
p <- ggplot(mtcars0, aes(mpg, fill=cyl)) +
facet_wrap(. ~ cyl) +
geom_density(alpha=.2) +
geom_vline(data=mtcars1, aes(xintercept=MeanMpg), linetype="dashed", size=1) +
annotate("text", label = labels, size = 4, x = mtcars1$MeanMpg, y = 0.26)
> p
Error: Aesthetics must be either length 1 or the same as the data (9): label
Ihn this case R multiply the text and i get an error.
How I can achive to put the label on each mean position in the facets?
You can use your mtcars1 data to specify the position of the labels:
ggplot(mtcars0, aes(mpg, fill=cyl)) +
facet_wrap(. ~ cyl) +
geom_density(alpha=.2) +
geom_vline(data = mtcars1, aes(xintercept = MeanMpg), linetype="dashed", size=1) +
geom_text(data = mtcars1, aes(x = MeanMpg, y = 0.25, label = MeanMpg))

Extrapolation of non-linear relationships in R (ggplot2)

Assuming this dataset (df):
Year<- c(1900, 1920,1940,1960,1980,2000, 2016)
Percent<-(0, 2, 4, 8, 10, 15, 18)
df<-cbind (Year, Percent)
df<-as.data.frame (df)
How would it be possible to extrapolate this plotted loess relationship to the years 2040, 2060, 2080, 2100. Using three different scenarios with different slopes to get to a y value (Percent) of 50%?
ggplot(data=df, aes(x=Year, y=Percent)) +
geom_smooth(method="loess", color="#bdc9e1") +
geom_point(color="#2b8cbe", size=0.5) + theme_bw() +
scale_y_continuous (limits=c(0,60), "Percent of Area") +
scale_x_continuous (limits=c(1900,2100), "Year") +
geom_hline(aes(yintercept=50)) + geom_vline(xintercept = 2016)
This should work:
library(ggplot2)
p <- ggplot(data=df, aes(x=Year, y=Percent)) +
geom_smooth(method="loess", color="#bdc9e1") +
geom_point(color="#2b8cbe", size=0.5) + theme_bw() +
scale_y_continuous (limits=c(0,60), "Percent of Area") +
scale_x_continuous (limits=c(1900,2100), "Year") +
geom_hline(aes(yintercept=50)) + geom_vline(xintercept = 2016)
p
model <- loess(Percent~Year,df, control=loess.control(surface="direct"))
newdf <- data.frame(Year=seq(2017,2100,1))
predictions <- predict(model, newdata=seq(2017,2100,1), se=TRUE)
newdf$fit <- predictions$fit
newdf$upper <- predictions$fit + qt(0.975,predictions$df)*predictions$se
newdf$lower <- predictions$fit - qt(0.975,predictions$df)*predictions$se
head(newdf)
# Year fit upper lower
#1 2017 18.42822 32.18557 4.6708718
#2 2018 18.67072 33.36952 3.9719107
#3 2019 18.91375 34.63008 3.1974295
#4 2020 19.15729 35.96444 2.3501436
#5 2021 19.40129 37.37006 1.4325124
#6 2022 19.64571 38.84471 0.4467122
p +
geom_ribbon(data=newdf, aes(x=Year, y=fit, ymax=upper, ymin=lower), fill="grey90") +
geom_line(data=newdf, aes(x=Year, y=fit), color='steelblue', lwd=1.2, lty=2)
A colleague from work offered this solution: Thanks ADAM!
loess_mod <- loess(Perc_area~Estab_Yr, data = marine_sub, control=loess.control(surface="direct"))
prd <- data.frame(Estab_Yr = seq(2017, 2100, by = 1))
loess_df <- data.frame(Estab_Yr = prd, Perc_area = predict(loess_mod, newdata = prd))
#Then, we can use geom_line and geom_point, but we need to tweak the scale on the y-axis to allow for where the predictions in 2017 start (just above 60):
ggplot(data=marine_sub, aes(x=Estab_Yr, y=Perc_area)) +
geom_smooth(method="loess", color="#bdc9e1") +
geom_point(color="#2b8cbe", size=0.5) + theme_bw() +
scale_y_continuous (limits=c(0,100), "Percent of Protected Area") +
scale_x_continuous (limits=c(1900,2100), "Year Protected") +
geom_hline(aes(yintercept=50)) + geom_vline(xintercept = 2017) +
geom_line(data= loess_df, color = "orange", size = 1) +
geom_point(data = loess_df, aes(x = Estab_Yr, y = Perc_area), size=.25)

Resources