ggplot error bar legend - r

I am having difficulties adding a legend to my error bar plot. I tried several command that I've seen in other subject, but unfortunately it doesn't work (I am sure I'm missing something but I can't figure out what)
library(ggplot2)
errors=matrix(c(-3.800904,-3.803444,-3.805985,-3.731204,-3.743969,
-3.756735,-3.742510,-3.764961,-3.787413,-3.731204,-3.743969,-3.756735,
-3.711420,-3.721589,-3.731758,-3.731204,-3.743969,-3.756735,-3.636346,
-3.675159,-3.713971,-3.731204,-3.743969,-3.756735),nrow=4,byrow=TRUE)
modelName=c("model 1","model 2","model 3","model 0")
boxdata=data.frame(errors,modelName)
colnames(boxdata)=c("icp","pred","icm","icp_obs","obs","icm_obs","model")
qplot(boxdata$model,boxdata$pred,
main = paste("confidance level 95% for age ", age_bp + start_age - 1,sep="")) +
geom_errorbar(aes(x=boxdata$model, ymin=boxdata$icm, ymax=boxdata$icp), width=0.20,col='deepskyblue') +
geom_point(aes(x=boxdata$model,y=boxdata$obs),shape=4,col="orange") +
geom_errorbar(aes(x=boxdata$model, ymin=boxdata$icm_obs, ymax=boxdata$icp_obs), width=0.20,col='red') +
scale_shape_manual(name="legend", values=c(19,4)) +
scale_color_manual(name="legend", values = c("black","orange")) +
xlab("models") +
ylab("confidence level")

The problem is that you are using wide form data rather than long form data. You need to convert the data from wide to long before plotting if you want to get a legend.
library(ggplot2)
errors=matrix(c(-3.800904,-3.803444,-3.805985,-3.731204,-3.743969,
-3.756735,-3.742510,-3.764961,-3.787413,-3.731204,-3.743969,-3.756735,
-3.711420,-3.721589,-3.731758,-3.731204,-3.743969,-3.756735,-3.636346,
-3.675159,-3.713971,-3.731204,-3.743969,-3.756735),nrow=4,byrow=TRUE)
errors = rbind(errors[, 1:3], errors[,4:6]) # manually reshaping the data
modelName=c("model 1","model 2","model 3","model 0")
type = rep(c("model", "obs"), each = 4)
boxdata=data.frame(errors,modelName, type)
colnames(boxdata)=c("icp","pred","icm","model", "type")
ggplot(boxdata, aes(x = model, y = pred, ymax = icp, ymin = icm,
group = type, colour = type, shape = type)) +
geom_errorbar(width=0.20) +
geom_point() +
scale_shape_manual(values=c(19, 4)) +
scale_color_manual(values = c("black","orange")) +
xlab("models") +
ylab("confidence level")
The output looks closer to your output can be generated by:
ggplot(boxdata, aes(x = model, y = pred, ymax = icp, ymin = icm,
group = type, colour = type, shape = type)) +
geom_errorbar(width=0.20) +
geom_point(colour = rep(c("black","orange"), each = 4)) +
scale_shape_manual(values=c(19, 4)) +
scale_color_manual(values = c("deepskyblue", "red")) +
xlab("models") +
ylab("confidence level")

Related

Bar plot ggplot2 - Error: Aesthetics must be either length 1 or the same as the data (150): fill, x, y

Hey I know there are lots of questions about this particular error but i still cant find what is wrong, pretty new to R and coding in general.
here is a link to may data
https://www.dropbox.com/s/qfo5rp7ywgsgxhy/CRERDATA.csv?dl=0
and here is my code to make the graph
not all used for graph obviously
library(car)
library(ggplot2)
library(Rmisc)
library(dunn.test)
library(FSA)
summarizes my data so i can get standard error bars
index_sum <- summarySE(CRERDATA, measurevar = "index", groupvars = c("site", "scenario"), na.rm = TRUE)
graph code
index_graph <- ggplot(CRERDATA, aes(x = index_sum$site, y = index_sum$index, fill = index_sum$scenario)) +
geom_bar(aes(fill = index_sum$scenario), position = position_dodge(), stat="identity") + ylab("Bleaching index") + xlab("Sites") +
labs(fill = "scenario") + scale_fill_grey() + theme_minimal() +
geom_errorbar(aes(ymin = index_sum$index-se, ymax = index_sum$index+se), width = .2, position = position_dodge(.9), color = "red")
You have specified CRERDATA in the ggplot as data, when you are actually using index_sum.
Load necessary packages:
library(ggplot2)
library(Rmisc)
Read data and summarise:
CRERDATA <- read.csv('CRERDATA.csv')
index_sum <- summarySE(CRERDATA, measurevar = "index", groupvars = c("site", "scenario"), na.rm = TRUE)
Instead of CRERDATA, use index_sum. You then don't need to use the dollar sign $ to access columns:
ggplot(index_sum, aes(x = site, y = index, fill = scenario)) +
geom_bar(aes(fill = scenario), position = position_dodge(), stat="identity") +
ylab("Bleaching index") + xlab("Sites") + labs(fill = "scenario") + scale_fill_grey() +
theme_minimal() +
geom_errorbar(aes(ymin = index-se, ymax = index+se), width = .2, position = position_dodge(.9), color = "red")
The result:

Plot standard deviation

I want to plot the standard deviation for 1 line (1 flow serie, the plot will have 2) in a plot with lines or smoth areas. I've seen and applied some code from sd representation and other examples... but it's not working for me.
My original data has several flow values for the same day, of which I've calculated the daily mean and sd. I'm stuck here, don't know if it is possible to represent the daily sd with lines from the column created "called sd" or should I use the original data.
The bellow code is a general example of what I'll apply to my data. The flow, flow1 and sd, are examples of the result calculation of daily mean and sd of the original data.
library(gridExtra)
library(ggplot2)
library(grid)
x <- data.frame(
date = seq(as.Date("2012-01-01"),as.Date("2012-12-31"), by="week"),
rain = sample(0:20,53,replace=T),
flow1 = sample(50:150,53,replace=T),
flow = sample(50:200,53,replace=T),
sd = sample (0:10,53, replace=T))
g.top <- ggplot(x, aes(x = date, y = rain, ymin=0, ymax=rain)) +
geom_linerange() +
scale_y_continuous(limits=c(22,0),expand=c(0,0), trans="reverse")+
theme_classic() +
theme(plot.margin = unit(c(5,5,-32,6),units="points"),
axis.title.y = element_text(vjust = 0.3))+
labs(y = "Rain (mm)")
g.bottom <- ggplot(x, aes(x = date)) +
geom_line(aes(y = flow, colour = "flow")) +
geom_line(aes(y = flow1, colour = "flow1")) +
stat_summary(geom="ribbon", fun.ymin="min", fun.ymax="max", aes(fill=sd), alpha=0.3) +
theme_classic() +
theme(plot.margin = unit(c(0,5,1,1),units="points"),legend.position="bottom") +
labs(x = "Date", y = "River flow (m/s)")
grid.arrange(g.top, g.bottom , heights = c(1/5, 4/5))
The above code gives Error: stat_summary requires the following missing aesthetics: y
Other option is geom_smooth, but as far as I could understand it requires some line equation (I can be wrong, I'm new in R).
Something like this maybe?
g.bottom <- x %>%
select(date, flow1, flow, sd) %>%
gather(key, value, c(flow, flow1)) %>%
mutate(min = value - sd, max = value + sd) %>%
ggplot(aes(x = date)) +
geom_ribbon(aes(ymin = min, ymax = max, fill = key)) +
geom_line(aes(y = value, colour = key)) +
scale_fill_manual(values = c("grey", "grey")) +
theme_classic() +
theme(plot.margin = unit(c(0,5,1,1),units="points"),legend.position="bottom") +
labs(x = "Date", y = "River flow (m/s)")

Make overlapping histogram in with geom_histogram

I am trying to make an overlapping histogram like this:
ggplot(histogram, aes = (x), mapping = aes(x = value)) +
geom_histogram(data = melt(tpm_18_L_SD), breaks = seq(1,10,by = 1),
aes(y = 100*(..count../sum(..count..))), alpha=0.2) +
geom_histogram(data = melt(tpm_18_S_SD), breaks = seq(1,10,by = 1),
aes(y = 100*(..count../sum(..count..))), alpha=0.2) +
geom_histogram(data = melt(tpm_18_N_SD), breaks = seq(1,10,by = 1),
aes(y = 100*(..count../sum(..count..))), alpha=0.2) +
facet_wrap(~variable, scales = 'free_x') +
ylim(0, 20) +
ylab("Percentage of Genes") +
xlab("Standard Deviation")
My code can only make them plot side by side and I would like to also make them overlap. Thank you! I based mine off of the original post where this came from but it did not work for me. It was originally 3 separate graphs which I combined with grid and ggarrange. It looks like this right now.
Here is the code of the three separate graphs.
SD_18_L <- ggplot(data = melt(tpm_18_L_SD), mapping = aes(x = value)) +
geom_histogram(aes(y = 100*(..count../sum(..count..))), breaks = seq(1, 10, by = 1)) +
facet_wrap(~variable, scales = 'free_x') +
ylim(0, 20) +
ylab("Percentage of Genes") +
xlab("Standard Deviation")
SD_18_S <- ggplot(data = melt(tpm_18_S_SD), mapping = aes(x = value)) +
geom_histogram(aes(y = 100*(..count../sum(..count..))), breaks = seq(1, 10, by = 1)) +
facet_wrap(~variable, scales = 'free_x') +
ylim(0, 20) +
ylab("Percentage of Genes") +
xlab("Standard Deviation")
SD_18_N <- ggplot(data = melt(tpm_18_N_SD), mapping = aes(x = value)) +
geom_histogram(aes(y = 100*(..count../sum(..count..))), breaks = seq(1, 10, by = 1)) +
facet_wrap(~variable, scales = 'free_x') +
ylim(0, 20) +
ylab("Percentage of Genes") +
xlab("Standard Deviation")
What my graphs look like now:
ggplot expects dataframes in a long format. I'm not sure what your data looks like, but you shouldn't have to call geom_histogram for each category. Instead, get all your data into a single dataframe (you can use rbind for this) in long format (what you're doing already with melt) first, then feed it into ggplot and map fill to whatever your categorical variable is.
Your call to facet_wrap is what puts them in 3 different plots. If you want them all on the same plot, take that line out.
An example using the iris data:
ggplot(iris, aes(x = Sepal.Length, fill = Species)) +
geom_histogram(alpha = 0.6, position = "identity")
I decreased alpha in geom_histogram so you can see where colors overlap, and added position = "identity" so observations aren't being stacked. Hope that helps!

Coloring the vertical lines by Class in ggplot

I want to plot the distribution of a variable by Class and add vertical lines denoting the means of the subsets defined by each Class and having them colored by Class. While I succeed to color the distributions by Class, the vertical lines appear gray. For a reproducible example see below:
library(data.table)
library(ggplot2)
library(ggthemes)
data(mtcars)
setDT(mtcars)
mtcars[, am := factor(am, levels = c(1, 0))]
mean_data <- mtcars[, .(mu = mean(hp)), by = am]
ggplot(mtcars, aes(x = hp, fill = am , color = am)) +
geom_histogram(aes(y=..density..), position="identity",alpha = 0.4) + guides(color = FALSE) +
geom_density (alpha = 0.5)+
geom_vline(data = mean_data, xintercept = mean_data$mu, aes(color = as.factor(mean_data$am)), size = 2, alpha = 0.5) +
ggtitle("Hp by am") + scale_fill_discrete(labels=c("am" , "no am")) +
labs(fill = "Transmission") + theme_economist()
This code renders the following plot:
Your advice will be appreciated.
You need to include the xintercept mapping in your aes call, so that ggplot properly maps all the aesthetics:
ggplot(mtcars, aes(x = hp, fill = am , color = am)) +
geom_histogram(aes(y=..density..), position="identity",alpha = 0.4) + guides(color = FALSE) +
geom_density (alpha = 0.5)+
geom_vline(data = mean_data, aes(xintercept = mu, color = as.factor(am)), size = 2, alpha = 0.5) +
ggtitle("Hp by am") + scale_fill_discrete(labels=c("am" , "no am")) +
labs(fill = "Transmission") + theme_economist()
Anything you put in a geom call that's not in aes gets treated as a one-off value, and doesn't get all the mapped aesthetics applied to it.

Plotting bar plot below xyplot with same x-axis?

I am trying to plot different types of plots (line plot and bar charts) beneath one another, they all have the same axis:
c1 <- ggplot(data, aes(date, TotalMutObs)) + stat_smooth(se = FALSE) +
geom_point() +
opts(axis.title.x = theme_blank()) +
ylab("Cumulative number of new mutations")
c2 <- ggplot(data, aes(date, distance)) + stat_smooth(se = FALSE) +
geom_point() +
opts(axis.title.x = theme_blank()) +
ylab("Cumulative mean pairwise distance")
c3 <- ggplot(data, aes(x = date, y = NbOfHorses)) +
geom_bar(stat = "identity") +
opts(axis.title.x = theme_blank()) +
ylab("Number of horses sampled")
grid.arrange(c1, c2,c3)
However, the dates on the x-axis are not lining up for the different plots.
Here is some data to try it out:
date<-c("2003-03-13","2003-03-25","2003-03-26","2003-03-27","2003-03-28","2003-03-31","2003-04-01","2003-04-02","2003-04-04","2003-04-08","2003-04-09","2003-04-10","2003-04-11","2003-04-14","2003-04-15","2003-04-17","2003-04-19","2003-04-21","2003-04-22","2003-04-28","2003-05-08");
NbOfHorses<-c("1","2","1","3","4","5","4","3","3","3","3","4","2","4","1","2","4","1","2","1","2");
TotalMutObs<-c("20","30","58","72","140","165","204","230","250","286","302","327","346","388","393","414","443","444","462","467","485");
distance<-c("0.000693202","0.00073544","0.000855432","0.000506876","0.000720193","0.000708047","0.000835468","0.000812401","0.000803149","0.000839117","0.000842048","0.000856393","0.000879973","0.000962382","0.000990666","0.001104861","0.001137515","0.001143838","0.00121874","0.001213737","0.001201379");
data<-as.data.frame(cbind(date,NbOfHorses,TotalMutObs,distance));
Cheers,
Joseph
The way to solve this problem is to work within ggplot2 and get creative about stacking copies of your data and then sending subsets to each geom that you need.
#A version of your data cleaned up
dat <- data.frame(date = as.Date(date),NbOfHorses = as.numeric(NbOfHorses),
TotalMutObs = as.numeric(TotalMutObs),distance = as.numeric(distance))
#Create three copies, one for each panel
# Use informative titles for grp to be panel titles
fullDat <- rbind(dat,dat,dat)
fullDat$grp <- rep(c('Cumulative number of new mutations',
'Cumulative mean pairwise distance',
'Number of horses sampled'),each = nrow(dat))
ggplot(fullDat,aes(x = date)) +
facet_wrap(~grp,nrow = 3,scale = "free_y") +
geom_point(data = subset(fullDat,grp == 'Cumulative number of new mutations'),
aes(y = TotalMutObs)) +
stat_smooth(data = subset(fullDat,grp == 'Cumulative number of new mutations'),
aes(y = TotalMutObs),se = FALSE) +
geom_point(data = subset(fullDat,grp == 'Cumulative mean pairwise distance'),
aes(y = distance)) +
stat_smooth(data = subset(fullDat,grp == 'Cumulative mean pairwise distance'),
aes(y = distance),se = FALSE) +
geom_bar(data = subset(fullDat,grp == 'Number of horses sampled'),
aes(y = NbOfHorses),stat = "identity") +
labs(x = NULL,y = NULL)

Resources