I'm struggling to find a straightforward solution to fix my plot. The problem stems down to the discrete nature of the x-axis. I want to annotate the plot with text and segments in order to show statistical results.
1) I want to print the p-value between "Baby" and "Queen" as well as between "Queen" and "Worker", but ggplot only allows to annotate above each label, not between them.
2) Similarly, I want the first two geom_segments to be separated, but ggplot won't let me end the first one at something like "Queen"-0.1 and start the second one at "Queen"+0.1 as it is mixing factors and numbers.
Fully reproducible example below, with issues on line 12, 13 and 18:
data <- data.frame(Group.1 = rep(c("A","B"),3),Group.2 = c("Baby","Baby","Worker","Worker","Queen","Queen"),
value = c(0.18,0.30,0.09,0.25,-0.26,-0.55))
boxplot_candidates <- ggplot(aes(y=value,x=Group.2,fill=Group.2),data= data) + theme_bw() +
scale_fill_manual(values=c("lightgreen","darkgreen","goldenrod1"),name="") +
theme(plot.title = element_text(face="bold", size=18, hjust=0)) +
labs(x="",y="Transcript expression\n(log2-centered TMM-nornalised TPMs)") +
theme(plot.title=element_text(size=18, vjust=2),legend.position="", legend.text=element_text(size=14),
axis.text.x = element_text(size = 14, colour = "black"),
axis.text.y = element_text(size = 14, colour = "black"),
axis.title.y=element_text(size = 14, colour = "black",vjust=1),
axis.title.x=element_text(size = 14, colour = "black")) +
geom_segment(aes(x="Baby",xend="Queen",y=0.7,yend=0.7)) + ##### MAKE XEND SMALLER
geom_segment(aes(x="Queen",xend="Worker",y=0.7,yend=0.7)) + ##### MAKE XEND LARGER
geom_segment(aes(x="Baby",xend="Worker",y=1.2,yend=1.2)) +
ylim(-1.5,1.5) + stat_boxplot(geom ='errorbar') +
geom_boxplot(notch=F,outlier.shape=NA) +
geom_point(size=2,position = position_jitter(width = 0.2)) + stat_summary(fun.y=mean, colour = "white",geom="point", size=4) +
annotate("text", x = as.factor(unique(data$Group.2)),y=c(0.8,0.8,1.3),
label = c("p < 0.001","p < 0.001","p = 0.89"),family="",fontface = 3,size=4) ##### PRINT "p < 0.001" BETWEEN LABELS
print(boxplot_candidates)
Categorical variables are simply placed at locations 1, 2, 3, etc. If you want to reach locations between two categorical variables, you can use coordinates such as 1.2 or 1.5 etc.
Here is a reproducible example with all the irrelevant theme code stripped out:
data <- data.frame(Group.1 = rep(c("A", "B"), 3),
Group.2 = c("Baby", "Baby", "Worker", "Worker", "Queen", "Queen"),
value = c(0.18, 0.30, 0.09, 0.25, -0.26, -0.55))
ggplot(data, aes(y = value, x = Group.2, fill = Group.2)) +
stat_boxplot(geom = 'errorbar') +
geom_boxplot(notch = F, outlier.shape = NA) +
geom_segment(aes(x=1.1, xend=1.9, y=0.7, yend=0.7)) +
geom_segment(aes(x=2.1, xend=2.9, y=0.7, yend=0.7)) +
geom_segment(aes(x=1.1, xend=2.9, y=1.2, yend=1.2)) +
geom_point(size = 2, position = position_jitter(width = 0.2)) +
stat_summary(fun.y = mean, colour = "white", geom = "point", size = 4) +
annotate("text",
x = c(1.5, 2.5, 2),
y = c(0.8, 0.8, 1.3),
label = c("p < 0.001", "p < 0.001", "p = 0.89"),
family = "", fontface = 3, size=4) +
scale_fill_manual(values=c("lightgreen", "darkgreen", "goldenrod1"),
guide = "none") +
ylim(-1.5, 1.5) +
labs(x="", y="Transcript expression\n(log2-centered TMM-nornalised TPMs)") +
theme_bw()
Related
I have this data frame :
Raw.Score = c(0,1,2,3,4,5,6,7,8)
Severity = c(-3.56553994,-2.70296933,-1.63969850,-0.81321707,-0.04629182,
0.73721320,1.61278518,2.76647043,3.94804472)
x = data.frame(Raw.Score = Raw.Score, Severity = Severity)
Raw.score are raw numbers from 0 to 8 (let's consider them as the labels of the severity numbers)
Severity are relative numbres that represent the locations of the scores in the diagram
I want to graphically present the results as in the following example using ggplot (the example includes different numbers but I want something similar)
As a fun exercise in ggplot-ing here is one approach to achieve or come close to your desired result.
Raw.Score = c(0,1,2,3,4,5,6,7,8)
Severity = c(-3.56553994,-2.70296933,-1.63969850,-0.81321707,-0.04629182,
0.73721320,1.61278518,2.76647043,3.94804472)
dat <- data.frame(Raw.Score, Severity)
library(ggplot2)
dat_tile <- data.frame(
Severity = seq(-4.1, 4.1, .05)
)
dat_axis <- data.frame(
Severity = seq(-4, 4, 2)
)
tile_height = .15
ymax <- .5
ggplot(dat, aes(y = 0, x = Severity, fill = Severity)) +
# Axis line
geom_hline(yintercept = -tile_height / 2) +
# Colorbar
geom_tile(data = dat_tile, aes(color = Severity), height = tile_height) +
# Sgements connecting top and bottom labels
geom_segment(aes(xend = Severity, yend = -ymax, y = ymax), color = "orange") +
# Axis ticks aka dots
geom_point(data = dat_axis,
y = -tile_height / 2, shape = 21, stroke = 1, fill = "white") +
# ... and labels
geom_text(data = dat_axis, aes(label = Severity),
y = -tile_height / 2 - .1, vjust = 1, fontface = "bold") +
# Bottom labels
geom_label(aes(y = -ymax, label = scales::number(Severity, accuracy = .01))) +
# Top labels
geom_point(aes(y = ymax, color = Severity), size = 8) +
geom_text(aes(y = ymax, label = Raw.Score), fontface = "bold") +
# Colorbar annotations
annotate(geom = "text", fontface = "bold", label = "MILD", color = "black", x = -3.75, y = 0) +
annotate(geom = "text", fontface = "bold", label = "SEVERE", color = "white", x = 3.75, y = 0) +
# Fixing the scales
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(limits = c(-ymax, ymax)) +
# Color gradient
scale_fill_gradient(low = "orange", high = "red", guide = "none") +
scale_color_gradient(low = "orange", high = "red", guide = "none") +
# Get rid of all non-data ink
theme_void() +
# Add some plot margin
theme(plot.margin = rep(unit(10, "pt"), 4)) +
coord_cartesian(clip = "off")
I created a plot that turned out mostly how I'd like it in ggplot but I need the lines to appear in a slightly different color arrangement.
Basically, I need all "mean" lines to appear in blue and all "odd" lines to appear in red. Pref 1 will appear in either the lighter or darker shade and vice versa. As you can see ggplot has not quite done that.
p2 <- ggplot(asd_pref_plot_groups, aes(x, pref_plot_groups$predicted, col = combined)) +
geom_line(size=1.5) +
scale_color_manual(values = c("blue","deepskyblue","red","pink")) +
geom_ribbon(aes(ymin=conf.low,ymax=conf.high, fill=combined),alpha=.2,colour=NA) +
scale_fill_manual(values = c("blue","deepskyblue","red","pink")) +
geom_point(data=summStats,aes(trial,mean,col = combined),size=2) +
scale_color_manual(values = c("blue","deepskyblue","red","pink")) +
theme_bw() +
xlab('Trial') +
ylab('Prediction Error') +
ggtitle('ASD learning about TD vs. ASD \n learning about ASD') +
theme(text=element_text(size=20),
plot.title = element_text(hjust = 0.5),
panel.border = element_blank())
Above is my code. I thought I could shift around scale_color_manual as needed but it doesn't seem to work? Is there an easy fix or does this extend to my data frames. Thank you
Your question didn't include any example data, so I have had to try to recreate your data set (see footnote)
To ensure we are on the right track, I will use exactly your plotting code to get a very similar plot:
ggplot(asd_pref_plot_groups, aes(x, pref_plot_groups$predicted, col = combined)) +
geom_line(size=1.5) +
scale_color_manual(values = c("blue","deepskyblue","red","pink")) +
geom_ribbon(aes(ymin = conf.low, ymax = conf.high, fill = combined),
alpha = 0.2, colour = NA) +
scale_fill_manual(values = c("blue","deepskyblue","red","pink")) +
geom_point(data = summStats, aes(trial, mean,col = combined), size = 2) +
scale_color_manual(values = c("blue","deepskyblue","red","pink")) +
theme_bw() +
xlab('Trial') +
ylab('Prediction Error') +
ggtitle('ASD learning about TD vs. ASD \n learning about ASD') +
theme(text=element_text(size=20),
plot.title = element_text(hjust = 0.5),
panel.border = element_blank())
All we need to do here is to remove one of your redundant scale_color_manual calls (you currently have 2), and change the ordering of the colors in both the fill and color scales:
ggplot(asd_pref_plot_groups, aes(x, pref_plot_groups$predicted,
col = combined, fill = combined)) +
geom_line(size = 1.5) +
geom_ribbon(aes(ymin = conf.low, ymax = conf.high),
alpha = 0.2, colour = NA) +
scale_fill_manual(values = c("blue","red", "deepskyblue", "pink")) +
scale_color_manual(values = c("blue","red","deepskyblue", "pink")) +
geom_point(data = summStats, aes(trial, mean,col = combined), size = 2) +
theme_bw() +
xlab('Trial') +
ylab('Prediction Error') +
ggtitle('ASD learning about TD vs. ASD \n learning about ASD') +
theme(text=element_text(size=20),
plot.title = element_text(hjust = 0.5),
panel.border = element_blank())
Footnote: Reproducible data to approximate data in question
set.seed(1)
asd_pref_plot_groups <- data.frame(x = rep(c(1, 60), 4),
combined = rep(c('pref1_mean', 'pref1_odd',
'pref2_mean', 'pref2_odd'),
each = 2),
predicted = c(1.3, 1.3, 1.45, 1.3,
2, 1.75, 2.05, 1.77),
conf.high = c(1.35, 1.35, 1.5, 1.35,
2.05, 1.8, 2.1, 1.82),
conf.low = c(1.25, 1.25, 1.4, 1.25,
1.95, 1.7, 2, 1.72))
pref_plot_groups <- asd_pref_plot_groups
summStats <- data.frame(trial = rep(1:60, 4),
combined = rep(c('pref1_mean', 'pref1_odd',
'pref2_mean', 'pref2_odd'),
each = 60),
mean = c(rnorm(60, seq(1.3, 1.3, length = 60), 0.05),
rnorm(60, seq(1.45, 1.3, length = 60), 0.05),
rnorm(60, seq(2, 1.75, length = 60), 0.05),
rnorm(60, seq(2.05, 1.77, length = 60), 0.05)))
I am trying to align significance asterisks (* or ** or ***) to the points of a geom point graph with position dodge to indicate the significance of a value using ggplot2. I wasn't able to find any similar questions and answers with similar issue.
Here is data frame 'df':
df<-data.frame(conc=c(1,10,100,1, 10,100,1, 10, 100),
mean=c( 0.9008428,0.8278645,0.7890388,0.9541905,
0.8537885,0.8212504,1.3828724,0.7165685, 0.7985398),
Treatment=c("A","A","A","B", "B", "B","C","C", "C"),
upper =c(1.0990144, 0.9505348, 0.8273494, 1.0389074, 0.9227461, 0.9657371, 1.6864420, 0.7401891, 0.9046951),
lower=c(0.7026713, 0.7051941, 0.7507282, 0.9528077, 0.7848309, 0.6767638, 1.0793029, 0.6929479, 0.6923846),
p.value=c(0.0003, 0.6500, 1,0.02,0.0400,
0.3301,0.100,0.023, 0.05))
I made a plot with an automatic asterisk, but it is not aligned how i want to, and i believe it's because of position_dodge, but i have too many points in one concentration, so i have to use it (given data frame is minimal).
legend_title <- "Treatment"
breaks_y =c(0, 0.25, 0.5, 0.75, 1, 1.25, 1.5)
breaks = c(1, 10, 100)
df$Label <- NA
df$Label[df$p.value<0.001]<-'***'
df$Label[df$p.value<0.01 & is.na(df$Label)]<-'**'
df$Label[df$p.value<0.05 & is.na(df$Label)]<-'*'
ggplot(df, aes(x = conc, y = mean, color = Treatment)) +
geom_errorbar(aes(ymax = upper, ymin = lower, width = 0),position = position_dodge(width=0.5)) +
geom_point(aes(shape = Treatment, fill = Treatment), size = 4, position = position_dodge(width=0.5)) +
geom_text(aes(label = Label),size = 4, position = position_dodge(width =0.5), color = "black") +
scale_shape_manual(values = c(22, 21, 23)) +
scale_color_manual(values=c('blue','coral1', 'darkgreen' )) +
scale_fill_manual(values=c('blue','coral1', 'darkgreen')) +
labs(x = "Concentration (\u03BCM)", y = "Abs", title = "Viability", fill = "Treatment") +
scale_x_continuous(trans="log10", limits = c(0.5, 170), breaks = breaks) +
scale_y_continuous(limits = c(0, 1.5), breaks = breaks_y) +
theme_light() +
ggpubr::rotate_x_text(angle = 70) +
theme(axis.text = element_text(size = 12, face = "bold"),
axis.title.y = element_text(size = 12, face ="bold"),
axis.title.x = element_text(size = 12, face ="bold"))
How can I align the asterisk automatically to be directly above the correct dot with position_dodge?
I am attempting to recreate some plots from a research article in R and am running into an issue with applying a log scale to y axis. The visualization I'm attempting to recreate is this:
reference plot with y log scale
I currently have a working version without the logarithmic scale applied to the y-axis:
Proportion_Mean_Plot <- ggplot(proportions, aes(days2,
proportion_mean, group = observation)) +
geom_point(aes(shape = observation)) +
geom_line() +
scale_x_continuous(breaks = seq(0,335,20)) +
scale_y_continuous(breaks = seq(0,6,.5)) +
theme_tufte() +
geom_rangeframe() +
theme(legend.position="none") +
theme(axis.line.x = element_line(colour = "black", size = 0.5, linetype = 1),
axis.line.y = element_line(colour = "black", size = 0.5, linetype = 1)) +
labs(title = "Proportion of Baseline Mean",
subtitle = "Daily steps within each intervention phase",
x = "DAYS",
y = "PROPORTION OF BASELINE \n(MEAN)") +
geom_vline(xintercept = 164.5) +
geom_hline(yintercept = 1) +
annotate("text", x = c(82, 246), y = 5,
label = c("Intervention 1", "Intervention 2")) +
geom_segment(aes(x = 0, y = mean, xend = end, yend = mean),
data = proportion_intervention1_data) +
geom_segment(aes(x = start, y = mean, xend = end, yend = mean),
data = proportion_intervention2_data, linetype = 4)
This produces a decent representation of the original:
normally scaled y-axis plot
I would like to try to apply that logarithmic scaling to more closely match it. Any help is appreciated.
As per Richard's suggestion, here is a quick example how you can use scale_y_log10:
suppressPackageStartupMessages(library(tidyverse))
set.seed(123)
# generate some data
proportions <- tibble(interv_1 = pmax(0.4, rnorm(160, mean = 1.3, sd = 0.2)),
interv_2 = pmax(0.01, rnorm(160, mean = 1.6, sd = 0.5)))
proportions <- proportions %>%
gather(key = observation, value = proportion_mean) %>%
mutate(days2 = 1:320)
# create the plot
ggplot(proportions, aes(days2, proportion_mean, group = observation)) +
geom_point(aes(shape = observation)) +
geom_line() +
scale_x_continuous(breaks = seq(0,335,20), expand = c(0, 0)) +
scale_y_log10(breaks = c( 0.1, 0.5, 1, 2, 3, 4, 5), limits = c(0.1, 5)) +
# theme_tufte() +
# geom_rangeframe() +
theme(legend.position="none") +
theme(axis.line.x = element_line(colour = "black", size = 0.5, linetype = 1),
axis.line.y = element_line(colour = "black", size = 0.5, linetype = 1)) +
labs(title = "Proportion of Baseline Mean",
subtitle = "Daily steps within each intervention phase",
x = "DAYS",
y = "PROPORTION OF BASELINE \n(MEAN)") +
geom_vline(xintercept = 164.5) +
geom_hline(yintercept = 1) +
annotate("text", x = c(82, 246), y = 5,
label = c("Intervention 1", "Intervention 2")) +
# plugged the values for the means of the two distributions
geom_segment(aes(x = 0, y = 1.3, xend = 164.5, yend = 1.3)) +
geom_segment(aes(x = 164.5, y = 1.6, xend = 320, yend = 1.6), linetype = 4)
I have a data set with 2 factors (MACH & YOU) Id like to produce a BoxPlot using ggplot2 and have the BoxPlot colour split by MACH whilst highlighting certain points (YOU) in a different shape and in Black..?
I can get the plot working but i can't make the (YOU) factor be bigger in terms of shape and make it black...without effecting all other points on the graph.
Ignore the commented lines - I was just playing around with those.
My dataframe x has the form
MEDIAN MACH YOU PROD
34.5 tool1 false ME
33.8 tool1 false ME
32.9 tool2 true ME
30.1 tool2 true ME
33.8 tool2 false.....etc
x<- data.frame(MEDIAN=c(34,32,56,34,45,34,45,33,23), MACH=c("t1","t1","t1","t2","t2","t2","t1","t1","t2"), YOU=c("false","false","false","false","true","true","true","false","false"), PROD="U","U","U","U","U","U","U","U","U")
ggplot(data=x,aes(MACH,MEDIAN ))+
geom_boxplot(fill = "white", colour = "blue")+
theme(panel.grid.minor = element_line(colour = "grey"), plot.title = element_text(size = rel(0.8)),axis.text.x = element_text(angle=90, vjust=1), strip.text.x = element_text(size = 8, colour = "black", face = "bold")) +
#geom_abline(colour = "grey80")+
#geom_point(shape = factor(YOURLOTS)), size = 3) +
#geom_hline(yintercept=x$TARG_AVG,colour = "green")+
#geom_hline(yintercept=x$TARG_MIN,colour = "red")+
#geom_hline(yintercept=x$TARG_MAX,colour = "red")+
geom_point(alpha = 0.6, position = position_jitter(w = 0.05, h = 0.0), aes(colour=factor(MACH),shape = factor(YOU)), size =3)+
facet_wrap(~PROD, scales = "free") +
ggtitle("MyTitle") +
scale_size_area() +
xlab("STAGE HIST EQUIPID")+
ylab("yaxis")
If you want to make the points for YOU of different size, depending on their value, you can add aes(size = factor(YOU)) inside geom_point().
You can choose the range of size of the points adding scale_size_discrete(range = c(3, 6)) to you plot. In this example, the minimum size would be 3 and the maximum value would be 6.
That would be
ggplot(data = x, aes(MACH, MEDIAN)) +
geom_boxplot(fill = "white", aes(color = MACH)) +
geom_point(aes(shape = factor(YOU), size = factor(YOU)), color = "black", alpha = 0.6, position = position_jitter(w = 0.05, h = 0.0)) +
labs(title = "My Title", x = "Stage Hist Equip ID", y = "y-axis") +
scale_size_discrete(range = c(3, 6))
I would solve this by using two subsets and two calls to geom_point():
library(ggplot2)
x <- data.frame(MEDIAN = c(34,32,56,34,45,34,45,33,23),
MACH = c("t1","t1","t1","t2","t2","t2","t1","t1","t2"),
YOU = c("false","false","false","false","true","true","true","false","false"),
PROD = c("U","U","U","U","U","U","U","U","U"))
ggplot(data = x, aes(MACH, MEDIAN)) +
geom_boxplot(fill = "white", colour = "blue") +
geom_point(data = subset(x, YOU != "true"), aes(color = MACH),
size = 8, alpha = 0.6,
position = position_jitter(w = 0.05, h = 0.0)) +
geom_point(data = subset(x, YOU == "true"), aes(shape = YOU),
color = "black", size = 8, alpha = 0.6,
position = position_jitter(w = -0.05, h = 0.0)) +
labs(title = "My Title", x = "Stage Hist Equip ID", y = "y-axis")