Using a separate dataframe to generate error bars in ggplot - r

I have a data frame that looks like this:
genotype DIV3 DIV4 ...
WT 12.4 15.2
WT 35.4 35.3
HET 1.3 1.2
HET 1.5 5.2
I calculate the means and sd by the following functions:
means = aggregate(. ~ genotype, data=dat, FUN=mean)
errors = aggregate(. ~ genotype, data=dat, FUN=sd)
I am using ggplot2 to plot the means as a scatter plot. I want to use the errors dataframe for error bars, but I am having trouble calculating ymin and ymax since I have two dataframes.
Is there a better way to do this?
EDIT:
ggplot2 code:
x = melt(means)
ggplot(x_melt, aes(group=genotype, variable, value, col=genotype, shape = genotype)) +
geom_line() +
geom_point(size=3)+
theme(axis.text=element_text(size=14),
axis.title.x=element_blank(),
axis.text.x=element_text(angle = 45, vjust = 0.8, hjust = .9, color = "black"),
axis.text.y=element_text(color="black"))

You can do this either by creating a single dataset from the aggregate step and then reshape it before plotting.
dat2 <- do.call(`data.frame`,
aggregate(. ~genotype, dat, FUN= function(x) c(Mean=mean(x), SD=sd(x))))
nm1 <- unique(gsub("\\..*", "", colnames(dat2)[-1]))
datN <- reshape(dat2, direction="long", idvar="genotype",
varying=list(c(2,4), c(3,5)),sep=".")
datN$time <- nm1[datN$time]
colnames(datN)[3:4] <- c("Mean", "SD")
library(ggplot2)
ggplot(datN, aes(group=genotype, time, Mean, col=genotype,
shape=genotype))+
geom_line()+
geom_point(size=3)+
geom_errorbar(aes(ymin=Mean-SD, ymax=Mean+SD), width=0.1)+
theme(axis.text=element_text(size=14),
axis.title.x=element_blank(),
axis.text.x=element_text(angle = 45, vjust = 0.8, hjust = .9, color = "black"),
axis.text.y=element_text(color="black"))
Or you can merge the melted datasets means and errors
library(reshape2)
x_melt <- melt(means, value.name="Mean")
y_melt <- melt(errors, value.name="SD")
datN1 <- merge(x_melt, y_melt)
ggplot(datN1, aes(group=genotype, variable, Mean, col=genotype,
shape=genotype))+
geom_line()+
geom_point(size=3)+
geom_errorbar(aes(ymin=Mean-SD, ymax=Mean+SD), width=0.1)+
theme(axis.text=element_text(size=14),
axis.title.x=element_blank(),
axis.text.x=element_text(angle = 45, vjust = 0.8, hjust = .9, color = "black"),
axis.text.y=element_text(color="black"))
data
dat <- structure(list(genotype = c("WT", "WT", "HET", "HET"), DIV3 = c(12.4,
35.4, 1.3, 1.5), DIV4 = c(15.2, 35.3, 1.2, 5.2)), .Names = c("genotype",
"DIV3", "DIV4"), class = "data.frame", row.names = c(NA, -4L))

Related

geom_text change factor order of facet plot

I am trying annotate individual plots of a facet plot. I have set the order to 2008, 1999 using factor levels.
But when I add the geom_text to the ggplot, the order of the plots change. See examples below. What am I doing wrong? How can I solve this?
library(tidyverse)
df <- mpg %>% mutate(year = factor(year, levels = c(2008,1999)))
anno <- data.frame(xstar = c(5, 2), ystar = c(100, 70),
lab = c("text1","text2"),
year = c("2008","1999"))
df %>% ggplot(aes(class, displ)) +
geom_col(aes(fill=drv)) +
facet_grid(~year) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
geom_text(data = anno, aes(x = xstar, y = ystar, label = lab))
Convert the year column in your annotation dataframe also to a factor with the same levels and order as in your main df:
library(ggplot2)
df <- mpg
df$year = factor(df$year, levels = c(2008, 1999))
anno <- data.frame(
xstar = c(5, 2), ystar = c(100, 70),
lab = c("text1", "text2"),
year = factor(c("2008", "1999"), levels = c(2008, 1999))
)
ggplot(df, aes(class, displ)) +
geom_col(aes(fill = drv)) +
geom_text(data = anno, aes(x = xstar, y = ystar, label = lab)) +
facet_grid(~year) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

Geom_bar_pattern not treating x-axis categories as different

take the following data
df <- data.frame(replicate(2,sample(0:1,30,rep=TRUE)))
df <- reshape(data=df, varying=list(1:2),
direction="long",
times = names(df),
timevar="Type",
v.names="Score")
plotted like this:
plot <- ggbarplot(df, x = "Type", y = "Score",
color = "black", fill = "Type", add = "mean_ci")
And I want to add stripes only to X1
plot +
geom_bar_pattern(stat = "summary", fun = "mean", position="dodge", color="black", width=1,pattern_angle = 45, pattern_density = 0.4,pattern_spacing = 0.025, pattern_key_scale_factor = 0.6) +
scale_pattern_manual(values = c(X1 = "stripe", X2 = "none"))
However stripes are added to both x-axis categories (scale_pattern_manual does not work?)
Any help is much appreciated.
You could build your error bars with stat_summary instead of using ggpubr::ggbarplot, then you would get this:
library(ggplot2)
library(ggpattern)
df <- data.frame(replicate(2,sample(0:1,30,rep=TRUE)))
df <- reshape(data=df, varying=list(1:2),
direction="long",
times = names(df),
timevar="Type",
v.names="Score")
ggplot(df, aes(x = Type, y = Score, pattern=Type,
color = "black", fill = Type)) +
geom_bar_pattern(stat = "summary",
fun = "mean",
position="dodge",
color="black",
width=1, pattern_angle = 45,
pattern_density = 0.4, pattern_spacing = 0.025,
pattern_key_scale_factor = 0.6) +
scale_pattern_manual(values = c("stripe", "none")) +
stat_summary(fun.data=mean_cl_normal, geom="errorbar", col="black", width=.1)
Created on 2021-05-19 by the reprex package (v2.0.0)
As far as I know scale_pattern_manual will not work in this setting.
To avoid that stripes are added to both cols add aes(pattern = Type) to geom_bar_pattern.
See Gallery of ggpattern package
plot <- ggbarplot(df, x = "Type", y = "Score",
color = "black", fill = "Type", add = "mean_ci")
plot +
geom_bar_pattern(
stat = "summary",
fun = "mean",
position="dodge",
color="white",
width=0.7,
pattern_angle = 45,
pattern_density = 1,
pattern_spacing = 0.025,
pattern_key_scale_factor = 0.8,
aes(pattern = Type))

manipulate scale_y_log in geom_bar ggplot2

I have the following df as example:
sites <- c('s1','s1','s2', "s2", "s3", "s3")
conc <- c(15, 12, 0.5, 0.05, 3, 0.005)
trop <- c("pp", "pt")
df <- data.frame(sites, conc, trop)
df$trop<- factor(df$trop, levels = c("pp", "pt"))
ggplot(df, aes(x= sites, y= conc))+
geom_bar(stat = "identity", colour="black")+
scale_y_log10()+
facet_grid(.~trop)+
theme_bw()
which gives as results the following figure, which is quite helpful for my data analysis since I want to highlight sites with values above 1.
However, under another assumption, I need to highlight sites above 1 and 0.1 using facet_grid, ending up with something like this (I edited this figure as desire output):
Do you know any option in scale_y_log10 in order to get the second figure under facet_grid?
One option is to reparameterise the bars as rectangles and plot that instead.
library(ggplot2)
#> Warning: package 'ggplot2' was built under R version 4.0.3
sites <- c('s1','s1','s2', "s2", "s3", "s3")
conc <- c(15, 12, 0.5, 0.05, 3, 0.005)
trop <- c("pp", "pt")
df <- data.frame(sites, conc, trop)
df$trop<- factor(df$trop, levels = c("pp", "pt"))
char2num <- function(x){match(x, sort(unique(x)))}
ggplot(df) +
geom_rect(
aes(
xmin = char2num(sites) - 0.4,
xmax = char2num(sites) + 0.4,
ymin = ifelse(trop == "pt", 0.1, 1),
ymax = conc
),
colour = 'black'
) +
scale_y_log10() +
# Fake discrete axis
scale_x_continuous(labels = sort(unique(df$sites)),
breaks = 1:3) +
facet_grid(. ~ trop) +
theme_bw()
Created on 2021-02-26 by the reprex package (v1.0.0)

Add a second y-axis to ggplot

I have the following data set:
structure(list(Jahr = 2005:2019, Fahrrad = c(275208L, 296105L,
308336L, 313363L, 326017L, 311756L, 302193L, 295702L, 268773L,
268295L, 256726L, 248916L, 250242L, 233652L, 230464L), E.Bike = c(1792L,
3181L, 5825L, 12600L, 23886L, 39247L, 49615L, 52941L, 49362L,
57613L, 66332L, 75665L, 87987L, 111661L, 133032L), gesamt = c(277000L,
299286L, 314161L, 325963L, 349903L, 351003L, 351808L, 348643L,
318135L, 325908L, 323058L, 324581L, 338229L, 345313L, 363496L
)), class = "data.frame", row.names = c(NA, -15L))
My goal is to create a graph that shows on the left y-axis the absolute purchases of different bike types. On the right y-axis id like to show the ratio of "E-Bike" purchases over "Fahrrad" purchases to emphasize the trend (if both are sold equally the desired value is 100, if e-bike is less than below 100). Something like this:
Is that even possible? I know ggplot doesn't allow some second y-axis.
Here is the code to produce the plot below (without the MS paint edits)
dfm <- melt(df, id="Jahr")
dfm$variable <- factor(dfm$variable, levels = c("gesamt", "Fahrrad", "E.Bike"))
dfm$variable <- revalue(dfm$variable, c("E.Bike"="E-Bike"))
dfm$value <- dfm$value/1000
ggplot(data=dfm) +
geom_line(aes(x=dfm$Jahr, y=dfm$value, colour=dfm$variable), lineend = "round", lwd=1.5)+
scale_x_continuous(limits = c(2013,2019), breaks = c(2013, 2014, 2015,2016, 2017,2018, 2019))+
scale_y_continuous(limits = c(0, 400))+
labs(title = "Verkäufe in Tausend")+
theme_minimal()+
scale_colour_manual(name="Mode",
labels=c("Total", "Fahrrad","E-Bike"),
values = c( "#8c8c8c", "#256bc2","#7ea9de"),
guide="none")+
geom_text(x = 2013.5, y = 350, label="Total" , hjust = "inward", size=3) +
geom_text(x = 2013.5, y = 290, label="Fahrrad" , hjust = "inward", size=3) +
geom_text(x = 2013.5, y = 80, label = "E-Bike", hjust = "inward", size=3)+
theme(legend.title = element_blank(),
axis.title.y=element_blank(),
axis.title.x=element_blank(),
panel.grid.minor.x=element_blank(),
panel.grid.major.x=element_blank(),
axis.text=element_text(size=8.5),
plot.title = element_text(hjust = -0.06, size=9),
plot.caption = element_text(hjust = -0.08, size=6,color="#BFBFBF"))
The way that secondary axes work in ggplot are as follows. At the position scale, add a sec.axis argument for a secondary axis that is a linear transformation of the first axis, specified in trans. Since both the primary and secondary axes can start at 0, this means a simple scaling factor will do. You need to manually transform the input data and specify the reverse transformation as the trans argument. Simplified example below (assuming df is your provided data):
library(ggplot2)
library(scales)
dfm <- reshape2::melt(df, id="Jahr")
# Scale factor for utilising whole y-axis range
scalef <- max(df$gesamt) / max(df$E.Bike / df$gesamt)
# Scale factor for using 0-100%
# scalef <- max(df$gesamt)
ggplot(dfm, aes(Jahr, value)) +
geom_line(aes(colour = variable)) +
geom_line(aes(y = E.Bike / gesamt * scalef),
data = df, linetype = 2) +
scale_y_continuous(
labels = number_format(scale = 1e-3),
sec.axis = sec_axis(trans = ~ .x / scalef,
labels = percent_format(),
name = "Percentage E-bike")
)
Created on 2021-01-04 by the reprex package (v0.3.0)

Plotting spectroscopic data using ggplot2

I am trying to plot spectroscopic data using ggplot2. I get my data in the following form:
My code so far is:
library(ggplot2)
library(reshape2)
melt_data <- melt(spectroscopic_data, id.vars = "sample_name", variable.name = "wavenumber", value.name = "intensity")
melt_data$probe = factor(melt_data$probe)
melt_data$wellenzahl = as.numeric(levels(melt_data$wellenzahl))[melt_data$wellenzahl]
ggplot(melt_data, aes(x=wavenumber, y=intensity, group=sample_name, color=sample_name)) + geom_line() +
scale_x_reverse(breaks=c(10000, 9500, 9000, 8500, 8000, 7500, 7000, 6500, 6000, 5500, 5000, 4500, 4000)) +
scale_color_manual(values=c("#FF0000", "#0000CC", "#00CC00", "#FF00FF", "#FF9900", "#000000", "#999900", "#33FFFF", "#FFCCFF", "#FFFF00", "#999999", "#9933FF", "#993300", "#99FF33")) +
theme_bw() +
theme(legend.position = "bottom") +
labs(x=expression(wavenumbers), y="intensity", colour = "") +
theme(legend.text=element_text(size=10), axis.text=element_text(size=12), axis.title=element_text(size=14)) +
guides(colour = guide_legend(ncol = 2, keywidth=1.5, keyheight=1, override.aes = list(size=1.8)))
I need the same color for aaa-samples, bbb-samples and so on (multiple measurements of one sample) but the plot does not work. I get a plot that looks like this when you zoom in:
It looks like ggplot2 connects two samples/lines of the same measurement instead of plotting them separately. Does anyone have an idea? I am trying to fix this since hours...
Thank you!
Here is my result after Luke C's awesome support:
library(ggplot2)
library(reshape2)
melted_data <- melt(newtestdata, id.vars = c("sample_name",
"sample_id"), variable.name = "wavenumber", value.name = "intensity")
melted_data$wavenumber=as.numeric(levels(melted_data$wavenumber))[melted_data$wavenumber]
ggplot(melted_data, aes(x=wavenumber, y=intensity, group = sample_id, color = sample_name)) + geom_line() +
scale_x_reverse(breaks=c(1005, 1200, 1400), expand = c(0.01, 0.01)) +
scale_y_continuous(breaks=c(0, 0.5, 1.0, 1.5, 2.0), expand = c(0.01, 0.01)) +
scale_color_manual(values=c("#FF0000", "#0000CC", "#00CC00", "#FF00FF", "#FF9900", "#000000")) +
theme_bw() +
theme(legend.position = "bottom") +
theme(plot.margin=unit(c(1,1,0.5,1),"cm")) +
labs(x=expression(wavenumbers~"in"~cm^{"-1"}), y="absorbance in a.u.", colour = "") +
theme(legend.text=element_text(size=10), axis.text=element_text(size=12), axis.title=element_text(size=14)) +
guides(colour = guide_legend(ncol = 3, keywidth=1.5, keyheight=1, override.aes = list(size=1.2)))
ggsave("buechi-all.pdf", width = 11.69, height = 8.27)
One way is to add a sample id to your data frame before you reshape it. That will allow you to keep the names like "aaa" and "bbb" but assign a unique identifier to act as your grouping variable (since it cannot differentiate between two observations at the same x variable otherwise). For an example where I tried to mimic your input data:
ex<-cbind(c("aaa","aaa","bbb","bbb"), c(0.426,0.405,0.409,0.395), c(0.430,0.408,0.411,0.399), c(0.432,0.411,0.413,0.401))
ex<- as.data.frame(ex)
colnames(ex) <- c("sample_name", "4000", "4004", "4008")
ex$sample_id<-1:nrow(ex)
melt <- melt(ex, id.vars = c("sample_name", "sample_id"), variable.name = "wavenumber", value.name = "intensity")
ggplot(melt, aes(x = wavenumber, y = intensity, group = sample_id, color = sample_name)) +
geom_line() +
theme_classic()
This outputs separate lines for different measurements of samples grouped by sample id, but keeping the color according to the sample name:
Is that sort of what you're after?
Edits below
To show the same approach with a larger dataset:
alpha <-rep(sapply(letters[1:10], function(x) {paste(x,x,x, sep = "")}), each = 2)
adf <- data.frame(alpha)
adf$sample_id <- seq(1, (length(alpha)))
adf$t <- rnorm(20, 0.4, 0.1)
wavenum <- seq(4, 1503)
for(i in wavenum){
for(j in 1:length(alpha)){
adf[j,i] <- adf[j,i-1] + (rnorm(1, 0.01, 0.01))
}
}
adf[1:10, 1:10]
anames <- c("sample_name", "sample_id", (1400 + 4 * seq(0, 1500)))
names(adf)<-anames
melt <- melt(adf, id.vars = c("sample_name", "sample_id"), variable.name = "wavenumber", value.name = "intensity")
head(melt)
ggplot(melt[1:1500,], aes(x = wavenumber, y = intensity, group = sample_id, color = sample_name)) +
geom_line(lwd = 1.5) +
theme_classic()
This will give a similar plot to the one above, where each sample has an individual line for each measurement that are both the same color.
If I'm still missing what you're actually after, I apologize!

Resources