Related
I have barplots, but would like to run a Wilcox.test within each "grp1" comparing the bars to the control for that group, and then putting an asterix if it is significant.
I've seen "compare_means" to get the comparisons, but I'm trying to make it automated and not so manual. Would "geom_signif" or "stat_compare_means" do this? Can someone help with this? Thank you very much.
I need the comparison to be made using the full dataset, not just the means (which is only one value per bar). I added a line at the end of the code running one of the comparisons so you can see where I need the p-values from.
y <- c(runif(100,0,4.5),runif(100,3,6),runif(100,4,7))
grp1 <- sample(c("A","B","C","D"),size = 300, replace = TRUE)
grp2 <- rep(c("High","Med","Contrl"),each=100)
dataset <- data.frame(y,grp1,grp2)
means <- aggregate(y~grp1+grp2,data=dataset,mean)
sd <- aggregate(y~grp1+grp2,data=dataset,function(x){sd(x)})
means.all <- merge(sd,means,by=c("grp1","grp2"))
names(means.all)[3:4] <- c("sd","y.mean")
library(ggplot2)
p<- ggplot(means.all, aes(x=grp1, y=y.mean, fill=grp2))+
geom_bar(stat="identity", color="black",
position=position_dodge()) +
geom_errorbar(aes(ymin=y.mean-sd, ymax=y.mean+sd), width=.2,
position=position_dodge(.9))
p
compare_means(y~grp2,data = dataset[dataset$grp1=="A",],method="wilcox.test")
Maybe this is not the optimal way but you can create a list splitting the data and applying the stat_compare_means() function individually at each level of your data. After that you can arrange the plots in one using patchwork:
library(ggplot2)
library(ggpubr)
library(patchwork)
#Split data
List <- split(means.all,means.all$grp1)
#Function for plot
myfun <- function(x)
{
#Ref group
rg <- paste0(unique(x$grp1),'.','Contrl')
#Plot
G <- ggplot(x, aes(x=interaction(grp1,grp2), y=y.mean, fill=grp2))+
geom_bar(stat="identity", color="black",
position=position_dodge()) +
geom_errorbar(aes(ymin=y.mean-sd, ymax=y.mean+sd), width=.2,
position=position_dodge(.9))+
stat_compare_means(ref.group = rg,label = "p.signif",method = "wilcox.test",label.y = 7)+
theme(axis.text.x = element_blank())+
xlab(unique(x$grp1))
return(G)
}
#Apply
Lplot <- lapply(List, myfun)
#Wrap plots
wrap_plots(Lplot,nrow = 1)+plot_layout(guides = 'collect')
Output:
Consider this update that takes the values for asterisks stored in a new dataframe:
#Create p-vals dataset
List2 <- split(dataset,dataset$grp1)
#p-val function
mypval <- function(x)
{
y <- compare_means(y~grp2,data = x,method="wilcox.test")
y <- y[,c('group2', 'group1','p.signif')]
names(y)<-c('grp2','grp1','p.signif')
y <- y[y$grp2=='Contrl',]
y$grp2 <- y$grp1
y <- rbind(y,data.frame(grp2='Contrl',grp1='',p.signif=''))
y$grp1 <- unique(x$grp1)
y$y.mean=7
return(y)
}
#Apply
dfpvals <- lapply(List2, mypval)
df <- do.call(rbind,dfpvals)
#Plot
ggplot(means.all, aes(x=grp1, y=y.mean, fill=grp2,group=grp2))+
geom_bar(stat="identity", color="black",
position=position_dodge()) +
geom_errorbar(aes(ymin=y.mean-sd, ymax=y.mean+sd), width=.2,
position=position_dodge(.9))+
geom_text(data=df,aes(x=grp1, y=y.mean,group=grp2,label=p.signif),
position=position_dodge(0.9))
Output:
I want to make a plot of the Daily Streamflow in each Station and save it in png format. I want a separate png for each station, something like the image below:
I have a list with the data frame for each station, as shown in the figure below:
I am trying using the following code, but it is not working because R aborted, I am not sure if it is because of the quantity of data:
for (i in 1:length(listDF2))
{
df1 <- as.data.frame(listDF2[[i]])
df1[is.na(df1)] <- 0
temp_plot <- ggplot(df1, aes(x = day, y = DailyMeanStreamflow, colour=Station)) +
geom_line(size = 1) +
geom_point(size=1.5, shape=21, fill="white") +
facet_wrap(~ month, ncol = 3) +
labs(title = "Daily Mean Streamflow",
subtitle = "Data plotted by month",
y = "Daily Mean Streamflow [m3/s]", x="Days") +
scale_y_continuous (breaks=seq(0,max(df1$DailyMeanStreamflow, na.rm=TRUE),by=1500)) +
scale_x_continuous (breaks=seq(1,max(df1$day),by=1)) + theme(axis.text.x = element_text(size=9))
print(temp_plot)
name4<- paste("DailyStreamflow_byMonth","_", siteNumber[i], ".png", sep="")
ggsave(temp_plot,filename = name4,width=22,height=11,units="in",dpi=500)
#while (!is.null(dev.list()))
dev.off()
}
I have also a "big" data frame with the data for each station one after the other. This data frame is useful when I want to apply functions like data_frame %>% group_by(station) %>% summarise(...)
Any idea in how to make the plots for each station? Is it better to use the list or the "big" data frame for this purpose?
I am not sure where the problem in your workflow occures. It is quite hard to help you, as we have not minimal working example. Also I am not sure if you just want to produce your plots in a loop or if you (also) want to put them together in one visualization?
Anyways ... I tried to give you a starting point ... maybe this will help?
"%>%" <- magrittr::"%>%"
df_list <- list(
A=dplyr::tibble(ID=1:10,
x=rnorm(10),
y=rnorm(10)),
B=dplyr::tibble(ID=1:10,
x=rnorm(10),
y=rnorm(10)),
C=dplyr::tibble(ID=1:10,
x=rnorm(10),
y=rnorm(10)),
D=dplyr::tibble(ID=1:10,
x=rnorm(10),
y=rnorm(10)))
# Lapply approach
lapply(df_list, function(dat){
p <- dat %>%
ggplot2::ggplot(ggplot2::aes(x=x,y=y)) +
ggplot2::geom_point()
print(p)
})
# Loop approach
for (i in 1:length(df_list)){
p <- df_list[[i]] %>%
ggplot2::ggplot(ggplot2::aes(x=x,y=y)) +
ggplot2::geom_point()
print(p)
fname <- paste("test","_", i, ".png", sep="")
ggsave(p,
filename=fname,
width=22,
height=11,
units="in",
dpi=500)
}
say I want to plot two layers in ggplot, one containing points and another one containing lines if a certain criteria is fulfilled.
The code without the criteria could look like this:
library("ggplot2")
# Summarise number of movie ratings by year of movie
mry <- do.call(rbind, by(movies, round(movies$rating), function(df) {
nums <- tapply(df$length, df$year, length)
data.frame(rating=round(df$rating[1]), year = as.numeric(names(nums)), number=as.vector(nums))
}))
p <- ggplot(mry, aes(x=year, y=number, group=rating))
p +
geom_point()+
geom_line()
now the condition for plotting the points and not only the lines would be, that an object called tmp.data does not equal the expression "no value".
tmp.data<-c(1,2,3) # in this case the condition is fulfilled
# attempt to plot the two layers including the condition in the plotting function
p+
if(tmp.data[1]!="no value"){ geom_point()+}
geom_line()
fails....
Error: unexpected '}' in:
"p+
if(tmp.data[1]!="no value"){ geom_point()+}"
geom_line()
geom_line:
stat_identity:
position_identity: (width = NULL, height = NULL)
This was done using ggplot2 2.1.0. I think you can do exactly what the OP wished, just by switching the parenthesis so that they encompass the entire if statement.
Here is an example that add a horizontal line depending on if Swtich is T or F. First, where the condition is TRUE
library(ggplot2)
df<-data.frame(x=1:10,y=11:20)
Switch=T
ggplot(df,aes(x,y))+
{if(Switch)geom_hline(yintercept=15)}+
geom_point()
Now, the same thing but the condition is FALSE
df<-data.frame(x=1:10,y=11:20)
Switch=F
ggplot(df,aes(x,y))+
{if(Switch)geom_hline(yintercept=15)}+
geom_point()
What you are seeing is a syntax error. The most robust way I can think of is:
tmp.data<-c(1,2,3)
if(tmp.data[1]!="no value") {
p = p + geom_point()
}
p + geom_line()
So you compose the object p in a sequence, only adding geom_point() when the if statements yields TRUE.
Following the ggplot2 book, you can create a function which returns a list. Any NULL components will be ignored.
library(ggplot2)
library(ggplot2movies)
# Summarise number of movie ratings by year of movie
mry <- do.call(rbind, by(movies, round(movies$rating), function(df) {
nums <- tapply(df$length, df$year, length)
data.frame(rating=round(df$rating[1]), year = as.numeric(names(nums)), number=as.vector(nums))
}))
# create function to add points conditionally
# If the list contains any NULL elements, they’re ignored.
my_plot <- function(point = FALSE){
list(
geom_line(),
if (point)
geom_point()
)
}
p <- ggplot(mry, aes(x=year, y=number, group=rating))
p + my_plot()
p + my_plot(point = TRUE)
Created on 2020-02-25 by the reprex package (v0.3.0)
library(ggplot2)
# Summarise number of movie ratings by year of movie
mry <- do.call(rbind, by(movies, round(movies$rating), function(df) {
nums <- tapply(df$length, df$year, length)
data.frame(rating=round(df$rating[1]), year = as.numeric(names(nums)), number=as.vector(nums))
}))
tmp.data<-c(1,2,3) # in this case the condition is fulfilled
p <- ggplot(mry, aes(x=year, y=number, group=rating))
# this won't "loop through" the data points but it's what you asked for
if (tmp.data[1]!="no value") {
p <- p + geom_point() + geom_line()
} else {
p <- p + geom_line()
}
p
but perhaps this is more like what you really want?
mry$rating <- factor(mry$rating)
p <- ggplot(mry, aes(x=year, y=number, group=rating))
p <- p + geom_line()
p <- p + geom_point(data=mry[!(mry$rating %in% tmp.data),],
aes(x=year, y=number, group=rating, color=rating), size=2)
p <- p + scale_color_brewer()
p
An alternative example using iris dataset creating a plot with conditional components, some with more than one line, and with multiple switches.
library("ggplot2")
p <- ggplot(iris,
aes(x=Sepal.Length,
y=Sepal.Width,
colour=Species))
# set plot switches to turn on or off various components
include_points = TRUE
include_lines = FALSE
include_density = TRUE
include_facet = TRUE
p +
list(
if(include_points){geom_point()},
if(include_lines){geom_line()},
if(include_facet){
# multi-line conditional elements can be included as a list
list(
facet_grid(rows = "Species"),
labs(subtitle = "This plot has been faceted, and colour has been removed"),
aes(colour = NULL)
)
},
if(include_density){geom_density2d()}) +
# additional elements common to all possible conditional combinations
# can be added as well
theme_minimal() +
labs(title = "Plot with conditional elements")
With the conditions as follows:
include_points = TRUE
include_lines = FALSE
include_density = TRUE
include_facet = TRUE
The following plot is generated:
Faceted plot with density and points
Setting include_facet = FALSE the following plot is generated:
Coloured plot with density - no facets
say I want to plot two layers in ggplot, one containing points and another one containing lines if a certain criteria is fulfilled.
The code without the criteria could look like this:
library("ggplot2")
# Summarise number of movie ratings by year of movie
mry <- do.call(rbind, by(movies, round(movies$rating), function(df) {
nums <- tapply(df$length, df$year, length)
data.frame(rating=round(df$rating[1]), year = as.numeric(names(nums)), number=as.vector(nums))
}))
p <- ggplot(mry, aes(x=year, y=number, group=rating))
p +
geom_point()+
geom_line()
now the condition for plotting the points and not only the lines would be, that an object called tmp.data does not equal the expression "no value".
tmp.data<-c(1,2,3) # in this case the condition is fulfilled
# attempt to plot the two layers including the condition in the plotting function
p+
if(tmp.data[1]!="no value"){ geom_point()+}
geom_line()
fails....
Error: unexpected '}' in:
"p+
if(tmp.data[1]!="no value"){ geom_point()+}"
geom_line()
geom_line:
stat_identity:
position_identity: (width = NULL, height = NULL)
This was done using ggplot2 2.1.0. I think you can do exactly what the OP wished, just by switching the parenthesis so that they encompass the entire if statement.
Here is an example that add a horizontal line depending on if Swtich is T or F. First, where the condition is TRUE
library(ggplot2)
df<-data.frame(x=1:10,y=11:20)
Switch=T
ggplot(df,aes(x,y))+
{if(Switch)geom_hline(yintercept=15)}+
geom_point()
Now, the same thing but the condition is FALSE
df<-data.frame(x=1:10,y=11:20)
Switch=F
ggplot(df,aes(x,y))+
{if(Switch)geom_hline(yintercept=15)}+
geom_point()
What you are seeing is a syntax error. The most robust way I can think of is:
tmp.data<-c(1,2,3)
if(tmp.data[1]!="no value") {
p = p + geom_point()
}
p + geom_line()
So you compose the object p in a sequence, only adding geom_point() when the if statements yields TRUE.
Following the ggplot2 book, you can create a function which returns a list. Any NULL components will be ignored.
library(ggplot2)
library(ggplot2movies)
# Summarise number of movie ratings by year of movie
mry <- do.call(rbind, by(movies, round(movies$rating), function(df) {
nums <- tapply(df$length, df$year, length)
data.frame(rating=round(df$rating[1]), year = as.numeric(names(nums)), number=as.vector(nums))
}))
# create function to add points conditionally
# If the list contains any NULL elements, they’re ignored.
my_plot <- function(point = FALSE){
list(
geom_line(),
if (point)
geom_point()
)
}
p <- ggplot(mry, aes(x=year, y=number, group=rating))
p + my_plot()
p + my_plot(point = TRUE)
Created on 2020-02-25 by the reprex package (v0.3.0)
library(ggplot2)
# Summarise number of movie ratings by year of movie
mry <- do.call(rbind, by(movies, round(movies$rating), function(df) {
nums <- tapply(df$length, df$year, length)
data.frame(rating=round(df$rating[1]), year = as.numeric(names(nums)), number=as.vector(nums))
}))
tmp.data<-c(1,2,3) # in this case the condition is fulfilled
p <- ggplot(mry, aes(x=year, y=number, group=rating))
# this won't "loop through" the data points but it's what you asked for
if (tmp.data[1]!="no value") {
p <- p + geom_point() + geom_line()
} else {
p <- p + geom_line()
}
p
but perhaps this is more like what you really want?
mry$rating <- factor(mry$rating)
p <- ggplot(mry, aes(x=year, y=number, group=rating))
p <- p + geom_line()
p <- p + geom_point(data=mry[!(mry$rating %in% tmp.data),],
aes(x=year, y=number, group=rating, color=rating), size=2)
p <- p + scale_color_brewer()
p
An alternative example using iris dataset creating a plot with conditional components, some with more than one line, and with multiple switches.
library("ggplot2")
p <- ggplot(iris,
aes(x=Sepal.Length,
y=Sepal.Width,
colour=Species))
# set plot switches to turn on or off various components
include_points = TRUE
include_lines = FALSE
include_density = TRUE
include_facet = TRUE
p +
list(
if(include_points){geom_point()},
if(include_lines){geom_line()},
if(include_facet){
# multi-line conditional elements can be included as a list
list(
facet_grid(rows = "Species"),
labs(subtitle = "This plot has been faceted, and colour has been removed"),
aes(colour = NULL)
)
},
if(include_density){geom_density2d()}) +
# additional elements common to all possible conditional combinations
# can be added as well
theme_minimal() +
labs(title = "Plot with conditional elements")
With the conditions as follows:
include_points = TRUE
include_lines = FALSE
include_density = TRUE
include_facet = TRUE
The following plot is generated:
Faceted plot with density and points
Setting include_facet = FALSE the following plot is generated:
Coloured plot with density - no facets
say I want to plot two layers in ggplot, one containing points and another one containing lines if a certain criteria is fulfilled.
The code without the criteria could look like this:
library("ggplot2")
# Summarise number of movie ratings by year of movie
mry <- do.call(rbind, by(movies, round(movies$rating), function(df) {
nums <- tapply(df$length, df$year, length)
data.frame(rating=round(df$rating[1]), year = as.numeric(names(nums)), number=as.vector(nums))
}))
p <- ggplot(mry, aes(x=year, y=number, group=rating))
p +
geom_point()+
geom_line()
now the condition for plotting the points and not only the lines would be, that an object called tmp.data does not equal the expression "no value".
tmp.data<-c(1,2,3) # in this case the condition is fulfilled
# attempt to plot the two layers including the condition in the plotting function
p+
if(tmp.data[1]!="no value"){ geom_point()+}
geom_line()
fails....
Error: unexpected '}' in:
"p+
if(tmp.data[1]!="no value"){ geom_point()+}"
geom_line()
geom_line:
stat_identity:
position_identity: (width = NULL, height = NULL)
This was done using ggplot2 2.1.0. I think you can do exactly what the OP wished, just by switching the parenthesis so that they encompass the entire if statement.
Here is an example that add a horizontal line depending on if Swtich is T or F. First, where the condition is TRUE
library(ggplot2)
df<-data.frame(x=1:10,y=11:20)
Switch=T
ggplot(df,aes(x,y))+
{if(Switch)geom_hline(yintercept=15)}+
geom_point()
Now, the same thing but the condition is FALSE
df<-data.frame(x=1:10,y=11:20)
Switch=F
ggplot(df,aes(x,y))+
{if(Switch)geom_hline(yintercept=15)}+
geom_point()
What you are seeing is a syntax error. The most robust way I can think of is:
tmp.data<-c(1,2,3)
if(tmp.data[1]!="no value") {
p = p + geom_point()
}
p + geom_line()
So you compose the object p in a sequence, only adding geom_point() when the if statements yields TRUE.
Following the ggplot2 book, you can create a function which returns a list. Any NULL components will be ignored.
library(ggplot2)
library(ggplot2movies)
# Summarise number of movie ratings by year of movie
mry <- do.call(rbind, by(movies, round(movies$rating), function(df) {
nums <- tapply(df$length, df$year, length)
data.frame(rating=round(df$rating[1]), year = as.numeric(names(nums)), number=as.vector(nums))
}))
# create function to add points conditionally
# If the list contains any NULL elements, they’re ignored.
my_plot <- function(point = FALSE){
list(
geom_line(),
if (point)
geom_point()
)
}
p <- ggplot(mry, aes(x=year, y=number, group=rating))
p + my_plot()
p + my_plot(point = TRUE)
Created on 2020-02-25 by the reprex package (v0.3.0)
library(ggplot2)
# Summarise number of movie ratings by year of movie
mry <- do.call(rbind, by(movies, round(movies$rating), function(df) {
nums <- tapply(df$length, df$year, length)
data.frame(rating=round(df$rating[1]), year = as.numeric(names(nums)), number=as.vector(nums))
}))
tmp.data<-c(1,2,3) # in this case the condition is fulfilled
p <- ggplot(mry, aes(x=year, y=number, group=rating))
# this won't "loop through" the data points but it's what you asked for
if (tmp.data[1]!="no value") {
p <- p + geom_point() + geom_line()
} else {
p <- p + geom_line()
}
p
but perhaps this is more like what you really want?
mry$rating <- factor(mry$rating)
p <- ggplot(mry, aes(x=year, y=number, group=rating))
p <- p + geom_line()
p <- p + geom_point(data=mry[!(mry$rating %in% tmp.data),],
aes(x=year, y=number, group=rating, color=rating), size=2)
p <- p + scale_color_brewer()
p
An alternative example using iris dataset creating a plot with conditional components, some with more than one line, and with multiple switches.
library("ggplot2")
p <- ggplot(iris,
aes(x=Sepal.Length,
y=Sepal.Width,
colour=Species))
# set plot switches to turn on or off various components
include_points = TRUE
include_lines = FALSE
include_density = TRUE
include_facet = TRUE
p +
list(
if(include_points){geom_point()},
if(include_lines){geom_line()},
if(include_facet){
# multi-line conditional elements can be included as a list
list(
facet_grid(rows = "Species"),
labs(subtitle = "This plot has been faceted, and colour has been removed"),
aes(colour = NULL)
)
},
if(include_density){geom_density2d()}) +
# additional elements common to all possible conditional combinations
# can be added as well
theme_minimal() +
labs(title = "Plot with conditional elements")
With the conditions as follows:
include_points = TRUE
include_lines = FALSE
include_density = TRUE
include_facet = TRUE
The following plot is generated:
Faceted plot with density and points
Setting include_facet = FALSE the following plot is generated:
Coloured plot with density - no facets