how to put mean lines in ggplot? - r

I am try to do a plot whith means lines by group (in ggplot), and i have my code like this
ggplot(gama, aes(x = distancia, y= glipidoscmtejido, colour= estado)) +
geom_point(position=position_dodge(.5), alpha= 1, size=3) +
geom_crossbar(data=gama,aes(x=distancia,ymin=mean(glipidoscmtejido),
ymax=mean(glipidoscmtejido),y=mean(glipidoscmtejido),colour=estado), width = 0.5)
and I get this
but i need the mean line by distancia and by estado. how can i make it?
thanks.

How about this using stat_summary to plot the means per estado per distancia:
# Generate some sample data
set.seed(2017);
df <- cbind.data.frame(
x = rnorm(100),
estado = sample(c("sana", "lesionada"), 100, replace = T),
distancia = sample(c("0-1", "2.5-3.5", "5.6"), 100, replace = T));
require(ggplot2);
ggplot(df, aes(x = distancia, y = x, colour = estado)) +
geom_point(position = position_dodge(width = 0.3)) +
stat_summary(
fun.y = mean,
geom = "errorbar",
aes(ymax = ..y.., ymin = ..y..),
position = position_dodge(width = 0.3),
width = 0.25);

Related

stat_summary() and fun.data = mean_sdl not working

set.seed(1) # generate random data
day1 = rnorm(20,0,1)
day2 = rnorm(20,5,1)
Subject <- rep(paste0('S',seq(1:20)), 2)
Data <- data.frame(Value = matrix(c(day1,day2),ncol=1))
Day <- rep(c('Day 1', 'Day 2'), each = length(day1))
df <- cbind(Subject, Data, Day)
Using this random data, I'd like to plot individual points with unique color for each subject and a summary point (mean + standard deviation).
It seems that the plot is okay when all points are plotted with the same color because stat_summary(fun.data = mean_sdl) works properly.
ggplot(data = df, mapping = aes(x= Day, y =Value)) +
stat_summary(fun.data = mean_sdl, fun.args = list(mult = 2),
geom = 'pointrange', fatten = 3*1.2, size = 1.2,
color= 'black') +
geom_point(size = 2)
But not when all points have unique color (for each subject).
ggplot(data = df, mapping = aes(x = Day, y = Value,
fill = Subject)) +
stat_summary(fun.data = mean_sdl, fun.args = list(mult = 2),
geom = 'pointrange', fatten = 3*1.2, size = 1.2,
color = 'black') +
geom_point(shape = 21, color = 'white', size = 2)
In your example ggplot assumes that each color corresponds to an individual group, but you want the grouping and color to be separate. Therefore, you need to explicitly define the group to be "Day".
ggplot(data = df, mapping = aes(x = Day, y = Value,
fill = Subject, group = Day)) +
stat_summary(fun.data = mean_sdl, fun.args = list(mult = 2),
geom = 'pointrange', fatten = 3*1.2, size = 1.2,
color = 'black') +
geom_point(shape = 21, color = 'white', size = 2)
Try the following:
ggplot(data = df, mapping = aes(x= Day, y =Value)) +
stat_summary(fun.data = mean_sdl, fun.args = list(mult = 2),
geom = 'pointrange', fatten = 3*1.2, size = 1.2,
color= 'black') +
geom_point(size = 2, aes(color = Subject))
Instead of specifying fill in aes() in the first line (ggplot(...)), I've moved it to the geom_point() element instead. Otherwise, stat_summary() will be doing its calculations grouped using Subject!

plot group means using multiple columns in ggplot2 (R)

I have a dataset that has a group column and a parameter estimate column for that group. I want to plot the estimates and CIs on the yaxis, and have separate rows for each group and parameter estimate. I know that I can create a composite column from the group and paramter esimate column, but I am wondering if there is a way in ggplot2 to keep the columns separate.
Here is some example code.
group1 = c(rep("A",3),rep("B",3),rep("C",3))
group2 = c(rep(c("param1","param2","param3"),3))
est = rnorm(9,mean = 0, sd = 1)
lwr = est - sd(est)
upr = est + sd(est)
df = data.frame(group1,group2,est,lwr,upr)
figure.gg = ggplot(data = df, aes(x = group2, y = est, ymin = lwr, ymax = upr)) +
geom_point(position = position_dodge(width = 0.5)) +
geom_errorbar(position = position_dodge(width = 0.5), width = 0.1) +
coord_flip() +
ylab("estimate")
figure.gg
I hope I have understood this correctly please add a comment to clarify if this is not the case.
One solution would be to use facets to separate the plot the rows. To do this I have used the facet_wrap() function to separate the different group 2s and set the y axis to use the different group 1s.
library(tidyverse)
group1 = c(rep("A",3),rep("B",3),rep("C",3))
group2 = c(rep(c("param1","param2","param3"),3))
est = rnorm(9,mean = 0, sd = 1)
lwr = est - sd(est)
upr = est + sd(est)
df = data.frame(group1,group2,est,lwr,upr)
# Swapped the x axis to use group1
figure.gg = ggplot(data = df, aes(x = group1, y = est, ymin = lwr, ymax = upr)) +
geom_point(position = position_dodge(width = 0.5)) +
geom_errorbar(position = position_dodge(width = 0.5), width = 0.1) +
coord_flip() +
# Facet wrapped with one column using group 2s
facet_wrap(~group2, ncol = 1, strip.position = "right") +
ylab("estimate")
figure.gg
Created on 2021-04-05 by the reprex package (v2.0.0)
Alternatively we could use a secondary grouping to separate the different entries such as group = group1 or colour = group1 which are set within aes(). These are presented below
# Set group 1 as a group in aes
figure.gg = ggplot(data = df, aes(x = group2, group = group1, y = est, ymin = lwr, ymax = upr)) +
geom_point(position = position_dodge(width = 0.5)) +
geom_errorbar(position = position_dodge(width = 0.5), width = 0.1) +
coord_flip() +
ylab("estimate")
figure.gg
# Set group1 as the colour using aes
figure.gg = ggplot(data = df, aes(x = group2, colour = group1, y = est, ymin = lwr, ymax = upr)) +
geom_point(position = position_dodge(width = 0.5)) +
geom_errorbar(position = position_dodge(width = 0.5), width = 0.1) +
coord_flip() +
ylab("estimate")
figure.gg
Created on 2021-04-05 by the reprex package (v2.0.0)
I think this makes more sense:
ggplot(df1, aes(x=group1, y=est, colour=group2)) +
geom_errorbar(aes(ymin=lwr, ymax=upr), position = position_dodge(width = 0.5), width = 0.1) +
geom_point(position = position_dodge(width = 0.5)) +
theme_bw()

R ggpubr Boxplot adding summary stats label to dynamic Y axis

I would like to add summary statistics on a box plot at the max of a dynamic y axis.
In the real data the y axis is a dynamic dropdown, one value is between 0 - 6; and the other between 0 - 100. In the example below I have hard coded where I would like the labels to be, but I cannot hard code them in the real data.
Is there a way to either:
Set labels outside the graph above the y axis? So that the labels will not move even if the axis changes?
Or is there a way to set it to max of Y + n?
Example:
# library
library(ggplot2)
library(ggpubr)
# create a data frame
variety=rep(LETTERS[1:7], each=40)
treatment=rep(c("high","low"),each=20)
note=seq(1:280)+sample(1:150, 280, replace=T)
data=data.frame(variety, treatment , note)
# grouped boxplot
ggplot(data, aes(x = variety, y = note, fill = treatment)) +
geom_boxplot() +
scale_fill_manual(values = c("#79AAB9", "#467786")) +
stat_compare_means(aes(group = treatment), label = "p.format") +
stat_summary(
fun.data = function(x)
data.frame(y = 460, label = paste(round(median(
x
), 1))),
geom = "text",
aes(group = treatment),
hjust = 0.5,
position = position_dodge(0.9)
) +
stat_summary(
fun.data = function(x)
data.frame(y = 445, label = paste("n", length(x))),
geom = "text",
aes(group = treatment),
hjust = 0.5,
position = position_dodge(0.9)
) +
expand_limits(y = 100)
Thanks so much for any help in advance.
Managed to get the following working with suggestion from #MarkNeal
# library
library(ggplot2)
library(ggpubr)
# create a data frame
variety=rep(LETTERS[1:7], each=40)
treatment=rep(c("high","low"),each=20)
note=seq(1:280)+sample(1:150, 280, replace=T)
data=data.frame(variety, treatment , note)
# grouped boxplot
ggplot(data, aes(x = variety, y = note, fill = treatment)) +
geom_boxplot() +
scale_fill_manual(values = c("#79AAB9", "#467786")) +
stat_compare_means(aes(group = treatment), label = "p.format", vjust = 3) +
stat_summary(
fun.data = function(x)
data.frame(y= Inf, label = paste(round(median(
x
), 1))),
geom = "text",
aes(group = treatment),
hjust = 0.5, vjust = 1,
position = position_dodge(0.9)
) +
stat_summary(
fun.data = function(x)
data.frame(y = Inf, label = paste("n", length(x))),
geom = "text",
aes(group = treatment),
hjust = 0.5, vjust = 2,
position = position_dodge(0.9)
)

Adding a dotted line between two categorical points in R

I'm new to R and I've scripted the interaction plot below, for which I want two dotted lines connecting both "coral" and both "darkgoldenrod2" points respectively:
df <- tibble::tribble(~Proportion, ~Lower,~Upper, ~Area,~Time,
invlogit(-0.033886), invlogit(-0.517223067), invlogit(0.449451067), "SNP", "Day",
(invlogit(-0.9231219)+invlogit(-0.3786)), 0.5727 ,0.8087, "SNP", "Night",
invlogit(-0.9231219), invlogit(-1.406458967), invlogit(-0.439784833),"LGCA", "Day",
invlogit(-0.1604356), invlogit(-0.643772667) ,invlogit(0.322901467), "LGCA","Night")
df
dfnew <- df %>%
mutate(ymin = Proportion - Lower,
ymax = Proportion + Upper)
p <- ggplot(data = dfnew, aes(x = Time, y = Proportion, color=Area)) +
geom_point(size = 6, stroke = 0, shape = 16,
position = position_dodge(width = 0.1))+
geom_errorbar(aes(y=Proportion, ymin = Lower, ymax = Upper),width=0.1,size=1,
position = position_dodge(width = 0.1)) +
theme(axis.text=element_text(size=15),
axis.title=element_text(size=20)) +
scale_color_manual(values = c("SNP" = "coral",
"LGCA" = "darkgoldenrod2"))
p
Reading other posts here on SO, I've used the command line: +geom_line(aes(group = 1),size=2)
This however isn't producing the desired plot as you can see below:
Any help with this is truly appreciated!
You should add group=Area to your ggplot mapping and then you just need to call geom_line. You also don't need y=Position in geom_errorbar.
p <- ggplot(data = dfnew, aes(x = Time, y = Proportion, color=Area, group=Area)) +
geom_point(size = 6, stroke = 0, shape = 16,
position = position_dodge(width = 0.1))+
geom_errorbar(aes(ymin = Lower, ymax = Upper), width=0.1, size=1,
position = position_dodge(width = 0.1)) +
theme(axis.text=element_text(size=15),
axis.title=element_text(size=20)) +
scale_color_manual(values = c("SNP" = "coral",
"LGCA" = "darkgoldenrod2")) +
geom_line(size=2)
p

geom_errorbar() overlap and positioning issues

I am having trouble with geom_errorbars particularly in utilizing position_dodge() effectively in this script.
library(ggplot2)
library(plyr)
Dose <- rep(c(3,10,30,100), each = 6)
Visit <- rep(c(1,28), each = 3, times = 4)
Animal <- rep(1:3, times = 8)
Estimate <- runif(24)
Dose <- factor(Dose)
Visit <- factor(Visit)
df <- data.frame(Animal, Dose, Visit, Estimate)
e <- ddply(df, .(Dose, Visit), summarise, mean = mean(Estimate), sd = sd(Estimate), n = length(Estimate))
e$se = e$sd/sqrt(e$n)
trace.out <- ggplot(data = e, aes(x = Visit, y = mean, colour = Dose))
trace.out <- trace.out +
geom_point(data = e, aes(y = mean), size = 3, postion = position_dodge(width = 0.2)) +
geom_line(data = e, aes(y = mean, group = Dose), position = position_dodge(width = 0.2)) +
geom_errorbar(aes(ymin= mean - se, ymax = mean + se), postion = position_dodge(0.2), colour='black', width= 0.3) +
labs(y = 'Estimate') +
theme_bw()
print(trace.out)
The output for me looks like:
I would like for the points, lines and error bars to line up and to have the errorbars not overlap. Is there some way to do that? Additionally I get an error of:
ymax not defined: adjusting position using y instead
Would this have anything to do with it? Thanks in Advance!
Maybe facets are an option:
trace.out <- ggplot(data = e, aes(x = Visit, y = mean, colour = Dose, ymin= mean - se, ymax = mean + se, group = Dose))
trace.out <- trace.out +
geom_point(size = 3, postion = position_dodge(width = 0.2)) +
geom_line(position = position_dodge(width = 0.2), ) +
geom_errorbar(postion = position_dodge(0.2), colour='black', width= 0.3) +
labs(y = 'Estimate') +
theme_bw()
print(trace.out + facet_grid(~Dose) )

Resources