How do I change the geom plotting order in legend only? - r

I am making a plot in ggplot2 that contains a geom_pointrange and a geom_line. I see that when I change the order of the geoms, either the points are plotted on top of the line, or vice versa. The legend also changes which geom is plotted on top of the other based on the same ordering of the geoms. However, I would like for the line to plot first, then the pointrange on top, in the plot itself, with the opposite in the legend. Is this possible? I would greatly appreciate any input.
Here is the code I used to make the figure.
md.figd2 <- structure(list(date = c("2013-05-28", "2013-07-11", "2013-09-22",
"2013-05-28", "2013-07-11", "2013-09-22", "2013-05-28", "2013-07-11",
"2013-09-22"), trt = structure(c(3L, 3L, 3L, 1L, 1L, 1L, 2L,
2L, 2L), .Label = c("- Fescue", "- Random", "Control"), class = "factor"),
means = c(1, 0.921865257043089, 0.793438250521971, 1, 0.878305313846414,
0.85698797555687, 1, 0.840679145697309, 0.798547331410388
), mins = c(1, 0.87709562979756, 0.72278951032918, 1, 0.816185624483356,
0.763720265496049, 1, 0.780804129401513, 0.717089626439849
), maxes = c(1, 0.966634884288619, 0.864086990714762, 1,
0.940425003209472, 0.950255685617691, 1, 0.900554161993105,
0.880005036380927)), .Names = c("date", "trt", "means", "mins",
"maxes"), row.names = c(NA, 9L), class = "data.frame")
library(ggplot2)
dplot1.ysc <- scale_y_continuous(limits=c(0,1), breaks=seq(0,1,.2), name='Proportion mass lost')
dplot1.xsc <- scale_x_date(limits=as.Date(c('2013-05-23', '2013-10-03')), labels=c('May 28', 'July 11', 'Sep 22'), breaks=md.figdata$date, name='Date')
dplot1.csc <- scale_color_manual(values=c('grey20','grey50','grey80'))
dplot1.lsc <- scale_linetype_manual(values=c('solid','dotted','dashed'))
djitter <- rep(c(0,-1,1), each=3)
# This one produces the plot with the legend I want.
dplot1b <- ggplot(md.figd2, aes(x=date + djitter, y=means, group=trt)) + geom_pointrange(aes(ymin=mins, ymax=maxes, color=trt), size=2) + geom_line(aes(linetype=trt), size=1)
# This one produces the plot with the points on the main plot that I want.
dplot1b <- ggplot(md.figd2, aes(x=date + djitter, y=means, group=trt)) + geom_line(aes(linetype=trt), size=1) + geom_pointrange(aes(ymin=mins, ymax=maxes, color=trt), size=2)
dplot1b + dplot1.xsc + dplot1.ysc + dplot1.csc + dplot1.lsc

You can use gtable::gtable_filter to extract the legend from the plot you want, and then gridExtra::grid.arrange to recreate the plot you want
# the legend I want
plot1a <- ggplot(md.figd2, aes(x=date , y=means, group=trt)) +
geom_pointrange(aes(ymin=mins, ymax=maxes, color=trt), size=2,
position = position_dodge(width=1)) +
geom_line(aes(linetype=trt), size=1)
# This one produces the plot with the points on the main plot that I want.
dplot1b <- ggplot(md.figd2, aes(x=date, y=means, group=trt)) +
geom_line(aes(linetype=trt), size=1) +
geom_pointrange(aes(ymin=mins, ymax=maxes, color=trt), size=2)
w <- dplot1b + dplot1.xsc + dplot1.ysc + dplot1.csc + dplot1.lsc
# legend
l <- dplot1a + dplot1.xsc + dplot1.ysc + dplot1.csc + dplot1.lsc
library(gtable)
library(gridExtra)
# extract legend ("guide-box" element)
leg <- gtable_filter(ggplot_gtable(ggplot_build(l)), 'guide-box')
# plot the two components, adjusting the widths as you see fit.
grid.arrange(w + theme(legend.position='none'),leg,ncol=2, widths = c(3,1))
An alternative is to simply replace the legend in the plot you want with the legend you want that you have extracted (using gtable_filter)
# create ggplotGrob of plot you want
wGrob <- ggplotGrob(w)
# replace the legend
wGrob$grobs[wGrob$layout$name == "guide-box"][[1]] <- leg
grid.draw(wGrob)

Quick and dirty. To get the correct plotting order in both figure and legend, add the layers like this: (1) geom_pointrange, (2) geom_line, and then (3) a second geom_pointrange without legend (show.legend = FALSE).
ggplot(md.figd2, aes(x = date, y = means, group = trt)) +
geom_pointrange(aes(ymin = mins, ymax = maxes, color = trt),
position = position_dodge(width = 5), size = 2) +
geom_line(aes(linetype = trt), size = 1) +
geom_pointrange(aes(ymin = mins, ymax = maxes, color = trt),
position = position_dodge(width = 5), size = 2,
show.legend = FALSE) +
scale_y_continuous(limits = c(0,1), breaks = seq(0,1, 0.2), name = 'Proportion mass lost') +
scale_x_date(limits = as.Date(c('2013-05-23', '2013-10-03')), name = 'Date') +
scale_color_manual(values = c('grey20', 'grey50', 'grey80')) +
scale_linetype_manual(values = c('solid', 'dotted', 'dashed'))

Related

Add an additional X axis to the plot and some lines/annotations to show the percentage of data under it

I was trying to recreate this plot:
using the following code -
library(tidyverse)
set.seed(0); r <- rnorm(10000);
df <- as.data.frame(r)
avg <- round(mean(r),2)
SD <- round(sd(r),2)
x.scale <- seq(from = avg - 3*SD, to = avg + 3*SD, by = SD)
x.lab <- c("-3SD", "-2SD", "-1SD", "Mean", "1SD", "2SD", "3SD")
df %>% ggplot(aes(r)) +
geom_histogram(aes(y=..density..), bins = 20,
colour="black", fill="lightblue") +
geom_density(alpha=.2, fill="darkblue") +
scale_x_continuous(breaks = x.scale, labels = x.lab) +
labs(x = "")
Using the code I plotted this:
,
but this isn't near to the plot that I am trying to create. How do I make an additional axis with the X axis? How do I add the lines to automatically show the percentage of observations? Is there any way, that I can create the plot as nearly identical as possible using ggplot2?
Welcome to SO. Excellent first question!
It's actually quite tricky. You'd need to create a second plot (the second x axis) but it's not the most straight forward to align both perfectly.
I will be using Z.lin's amazing modification of the cowplot package.
I am not using the reprex package, because I think I'd need to define every single function (and I don't know how to use trace within reprex.)
library(tidyverse)
library(cowplot)
set.seed(0); r <- rnorm(10000);
foodf <- as.data.frame(r)
avg <- round(mean(r),2)
SD <- round(sd(r),2)
x.scale <- round(seq(from = avg - 3*SD, to = avg + 3*SD, by = SD), 1)
x.lab <- c("-3SD", "-2SD", "-1SD", "Mean", "1SD", "2SD", "3SD")
x2lab <- -3:3
# calculate the density manually
dens_r <- density(r)
# for each x value, calculate the closest x value in the density object and get the respective y values
y_dens <- dens_r$y[sapply(x.scale, function(x) which.min(abs(dens_r$x - x)))]
# added annotation for segments and labels.
# Arrow segments can be added in a similar way.
p1 <-
ggplot(foodf, aes(r)) +
geom_histogram(aes(y=..density..), bins = 20,
colour="black", fill="lightblue") +
geom_density(alpha=.2, fill="darkblue") +
scale_x_continuous(breaks = x.scale, labels = x.lab) +
labs(x = NULL) +# use NULL here
annotate(geom = "segment", x = x.scale, xend = x.scale,
yend = 1.1 * max(dens_r$y), y = y_dens, lty = 2 ) +
annotate(geom = "text", label = x.lab,
x = x.scale, y = 1.2 * max(dens_r$y))
p2 <-
ggplot(foodf, aes(r)) +
scale_x_continuous(breaks = x.scale, labels = x2lab) +
labs(x = NULL) +
theme_classic() +
theme(axis.line.y = element_blank())
# This is with the modified plot_grid() / align_plot() function!!!
plot_grid(p1, p2, ncol = 1, align = "v", rel_heights = c(1, 0.1))

How to add direct labels to a bar chart in ggplot for numeric x axis

I am trying to create a bar chart in ggplot where the widths of the bars are associated with a variable Cost$Sum.of.FS_P_Reduction_Kg. I am using the argument width=Sum.of.FS_P_Reduction_Kg to set the width of the bars according to a variable.
I want to add direct labels to the chart to label each bar, similar to the image documented below. I am also seeking to add in x axis labels corresponding to the argument width=Sum.of.FS_P_Reduction_Kg. Any help would be greatly appreciated. I am aware of ggrepel but haven't been able to get the desired effect so far.
I have used the following code:
# Plot the data
P1 <- ggplot(Cost,
aes(x = Row.Labels,
y = Average.of.Cost_Per_Kg_P_Removal.undiscounted..LOW_Oncost,
width = Average.of.FS_Annual_P_Reduction_Kg, label = Row.Labels)) +
geom_col(fill = "grey", colour = "black") +
geom_label_repel(
arrow = arrow(length = unit(0.03, "npc"), type = "closed", ends = "first"),
force = 10,
xlim = NA) +
facet_grid(~reorder(Row.Labels,
Average.of.Cost_Per_Kg_P_Removal.undiscounted..LOW_Oncost),
scales = "free_x", space = "free_x") +
labs(x = "Measure code and average P reduction (kg/P/yr)",
y = "Mean annual TOTEX (£/kg) of P removal (thousands)") +
coord_cartesian(expand = FALSE) + # remove spacing within each facet
theme_classic() +
theme(strip.text = element_blank(), # hide facet title (since it's same as x label anyway)
panel.spacing = unit(0, "pt"), # remove spacing between facets
plot.margin = unit(c(rep(5.5, 3), 10), "pt"), # more space on left for axis label
axis.title=element_text(size=14),
axis.text.y = element_text(size=12),
axis.text.x = element_text(size=12, angle=45, vjust=0.2, hjust=0.1)) +
scale_x_discrete(labels = function(x) str_wrap(x, width = 10))
P1 = P1 + scale_y_continuous(labels = function(x) format(x/1000))
P1
The example data table can be reproduced with the following code:
> dput(Cost)
structure(list(Row.Labels = structure(c(1L, 2L, 6L, 9L, 4L, 3L,
5L, 7L, 8L), .Label = c("Change the way P is applied", "Improve management of manure",
"In channel measures to slow flow", "Keep stock away from watercourses",
"No till trial ", "Reduce runoff from tracks and gateways", "Reversion to different vegetation",
"Using buffer strips to intercept pollutants", "Water features to intercept pollutants"
), class = "factor"), Average.of.FS_Annual_P_Reduction_Kg = c(0.11,
1.5425, 1.943, 3.560408144, 1.239230769, 18.49, 0.091238043,
1.117113762, 0.11033263), Average.of.FS_._Change = c(0.07, 0.975555556,
1.442, 1.071692763, 1.212307692, 8.82, 0.069972352, 0.545940711,
0.098636339), Average.of.Cost_Per_Kg_P_Removal.undiscounted..LOW_Oncost = c(2792.929621,
2550.611429, 964.061346, 9966.056875, 2087.021801, 57.77580744,
165099.0425, 20682.62962, 97764.80805), Sum.of.Total_._Cost = c(358.33,
114310.49, 19508.2, 84655, 47154.23, 7072, 21210, 106780.34,
17757.89), Average.of.STW_Treatment_Cost_BASIC = c(155.1394461,
155.1394461, 155.1394461, 155.1394461, 155.1394461, 155.1394461,
155.1394461, 155.1394461, 155.1394461), Average.of.STW_Treatment_Cost_HIGH = c(236.4912345,
236.4912345, 236.4912345, 236.4912345, 236.4912345, 236.4912345,
236.4912345, 236.4912345, 236.4912345), Average.of.STW_Treatment_Cost_INTENSIVE = c(1023.192673,
1023.192673, 1023.192673, 1023.192673, 1023.192673, 1023.192673,
1023.192673, 1023.192673, 1023.192673)), class = "data.frame", row.names = c(NA,
-9L))
I think it will be easier to do a bit of data prep so you can put all the boxes in one facet with a shared x-axis. For instance, we can calc the cumulative sum of reduction Kg, and use that to define the starting x for each box.
EDIT -- added ylim = c(0, NA), xlim = c(0, NA), to keep ggrepel::geom_text_repel text within positive range of plot.
library(ggplot2)
library(ggrepel)
library(stringr)
library(dplyr)
Cost %>%
arrange(desc(Average.of.Cost_Per_Kg_P_Removal.undiscounted..LOW_Oncost)) %>%
mutate(Row.Labels = forcats::fct_inorder(Row.Labels),
cuml_reduc = cumsum(Average.of.FS_Annual_P_Reduction_Kg),
bar_start = cuml_reduc - Average.of.FS_Annual_P_Reduction_Kg,
bar_center = cuml_reduc - 0.5*Average.of.FS_Annual_P_Reduction_Kg) %>%
ggplot(aes(xmin = bar_start, xmax = cuml_reduc,
ymin = 0, ymax = Average.of.Cost_Per_Kg_P_Removal.undiscounted..LOW_Oncost)) +
geom_rect(fill = "grey", colour = "black") +
geom_text_repel(aes(x = bar_center,
y = Average.of.Cost_Per_Kg_P_Removal.undiscounted..LOW_Oncost,
label = str_wrap(Row.Labels, 15)),
ylim = c(0, NA), xlim = c(0, NA), ## EDIT
size = 3, nudge_y = 1E4, nudge_x = 2, lineheight = 0.7,
segment.alpha = 0.3) +
scale_y_continuous(labels = scales::comma) +
labs(x = "Measure code and average P reduction (kg/P/yr)",
y = "Mean annual TOTEX (£/kg) of P removal (thousands)")
You could experiment with scaling the values a little bit, e.g. using logarithmization. Since I prefer baseplots over gglplot2 I show you a base solution using barplot accordingly.
First, we transform the firs column into rownames and delete it.
cost <- `rownames<-`(Cost[-1], Cost[,1])
Defining widths in barplot is quite straightforward, since it has an option width= where we put in the logarithmized values of the according variable. For the bar-labels we need to calculate the positions and use text; to achieve line-wraps we may use strwrap. A label can conveniently left out if it's a hardship case (as #6 in the example). Finally we use (headless) arrows .
# logarithmize values
w <- log(w1 <- cost$Average.of.Cost_Per_Kg_P_Removal.undiscounted..LOW_Oncost)
# define vector labels inside / outside, at best by hand
inside <- as.logical(c(0, 1, 0, 1, 1, 0, 1, 1, 1))
# calculate `x0` values of labels
x0 <- w / 2 + c(0, cumsum(w)[- length(w)])
# define y values o. labels
y0 <- ifelse(inside, colSums(t(cost)) / 2, 1.5e5)
# make labels using 'strwrap'
labs <- mapply(paste, strwrap(rownames(cost), 15, simplify=F), collapse="\n")
# define nine colors
colores <- hcl.colors(9, "Spectral", alpha=.7)
# the actual plot
b <- barplot(cs <- colSums(t(cost)), width=w, space=0, ylim=c(1, 2e5),
xlim=c(-1, 80), xaxt="n", xaxs="i", col=colores, border=NA,
xlab="Measure code and average P reduction (kg/P/yr)",
ylab="Mean annual TOTEX (£/kg) of P removal (thousands)")
# place lables, leave out # 6
text(x0[-6], y0[-6], labels=labs[-6], cex=.7)
# arrows
arrows(x0[c(1, 3)], 1.35e5, x0[c(1, 3)], cs[c(1, 3)], length=0)
# label # 6
text(40, 1e5, labs[6], cex=.7)
# arrow # 6
arrows(40, 8.4e4, x0[6], cs[6], length=0)
# make x axis
axis(1, c(0, cumsum(log(seq(0, 1e5, 1e4)[-1]))),
labels=format(c(0, cumsum(seq(0, 1e5, 1e4)[-1])), format="d"), tck=-.02)
# put it in a box
box()
Result
I hope I got the x axis values right.
You probably have to figure out a little how the probably new functions work, but it's quite easy using the help files, e.g. type ?barplot.

Karyogram and SNP with ggplot2 (geom_path, geom_bar, ggbio)

I want to plot a karyogram with SNP markers.
It works with function segments but I want to use ggplot2 package to display an elegant graphic.
ggbio:
I checked the package ggbio with the function layout_karyogram but the chromosomes are plotted in a vertical position. I didn't find a way to rotate the graph with the name below each chromosome and to write the name of my SNP next to their segment.
geom_bar:
Then I tried geom_bar from the package ggplot2:
data<-data.frame(chromosome=paste0("chr", 1:4),size=c(100,400,300,200),stringsAsFactors = FALSE)
dat$chromosome<-factor(dat$chromosome, levels = dat$chromosome)
SNP<-data.frame(chromosome=c(1,1,2,3,3,4),Position=c(50,70,250,20,290,110),Type=c("A","A","A","B","B","B"),labels=c("SNP1","SNP2","SNP3","SNP4","SNP5","SNP6"))
p <- ggplot(data=data, aes(x=chromosome, y=size)) + geom_bar( stat="identity", fill="grey70",width = .5) +theme_bw()
p + geom_segment(data=SNP, aes(x=SNP$chromosome-0.2, xend=SNP$chromosome+0.2, y=SNP$Position,yend=SNP$Position,colour=SNP$Type), size=1) +annotate("text", label =SNP$labels, x =SNP$chromosome-0.5, y = SNP$Position, size = 2, colour= "red")
The only problem here, it looks more like a barplot than a chromosome. I would like to have rounded extremities. I found someone who got the same problem as I am.
geom_path:
Instead of using geom_bar, I used geom_path with the option lineend = "round" to get rounded extremities.
ggplot() + geom_path(data=NULL, mapping=aes(x=c(1,1), y=c(1,100)),size=3, lineend="round")
The shape looks quite good. So I tried to run the code for severals chromosomes.
p <- ggplot()
data<-data.frame(chromosome=paste0("chr", 1:4),size=c(100,400,300,200),stringsAsFactors = FALSE)
for (i in 1:length(data[,1])){
p <- p + geom_path(data=NULL, mapping=aes(x=c(i,i), y=c(1,data[i,2])), size=3, lineend="round")
}
It doesn't work, I don't know why but p only save the last chromosome instead of plotting the four chromosomes in my karyogram.
Any suggestions for these problems ?
I would go for geom_segment. The x start/end of the SNP segments are hardcoded (as.integer(chr) -+ 0.05), but otherwise the code is fairly straightforward.
ggplot() +
geom_segment(data = data,
aes(x = chr, xend = chr, y = 0, yend = size),
lineend = "round", color = "lightgrey", size = 5) +
geom_segment(data = SNP,
aes(x = as.integer(chr) - 0.05, xend = as.integer(chr) + 0.05,
y = pos, yend = pos, color = type),
size = 1) +
theme_minimal()
data <- data.frame(chr = paste0("chr", 1:4),
size = c(100, 400, 300, 200))
SNP <- data.frame(chr = paste0("chr", c(1, 1, 2, 3, 3, 4)),
pos = c(50, 70, 250, 20, 290, 110),
type = c("A", "A", "A", "B", "B", "B"))

Put stars on ggplot barplots and boxplots - to indicate the level of significance (p-value)

It's common to put stars on barplots or boxplots to show the level of significance (p-value) of one or between two groups, below are several examples:
The number of stars are defined by p-value, for example one can put 3 stars for p-value < 0.001, two stars for p-value < 0.01, and so on (although this changes from one article to the other).
And my questions: How to generate similar charts? The methods that automatically put stars based on significance level are more than welcome.
I know that this is an old question and the answer by Jens Tierling already provides one solution for the problem. But I recently created a ggplot-extension that simplifies the whole process of adding significance bars: ggsignif
Instead of tediously adding the geom_line and geom_text to your plot you just add a single layer geom_signif:
library(ggplot2)
library(ggsignif)
ggplot(iris, aes(x=Species, y=Sepal.Length)) +
geom_boxplot() +
geom_signif(comparisons = list(c("versicolor", "virginica")),
map_signif_level=TRUE)
To create a more advanced plot similar to the one shown by Jens Tierling, you can do:
dat <- data.frame(Group = c("S1", "S1", "S2", "S2"),
Sub = c("A", "B", "A", "B"),
Value = c(3,5,7,8))
ggplot(dat, aes(Group, Value)) +
geom_bar(aes(fill = Sub), stat="identity", position="dodge", width=.5) +
geom_signif(stat="identity",
data=data.frame(x=c(0.875, 1.875), xend=c(1.125, 2.125),
y=c(5.8, 8.5), annotation=c("**", "NS")),
aes(x=x,xend=xend, y=y, yend=y, annotation=annotation)) +
geom_signif(comparisons=list(c("S1", "S2")), annotations="***",
y_position = 9.3, tip_length = 0, vjust=0.4) +
scale_fill_manual(values = c("grey80", "grey20"))
Full documentation of the package is available at CRAN.
Please find my attempt below.
First, I created some dummy data and a barplot which can be modified as we wish.
windows(4,4)
dat <- data.frame(Group = c("S1", "S1", "S2", "S2"),
Sub = c("A", "B", "A", "B"),
Value = c(3,5,7,8))
## Define base plot
p <-
ggplot(dat, aes(Group, Value)) +
theme_bw() + theme(panel.grid = element_blank()) +
coord_cartesian(ylim = c(0, 15)) +
scale_fill_manual(values = c("grey80", "grey20")) +
geom_bar(aes(fill = Sub), stat="identity", position="dodge", width=.5)
Adding asterisks above a column is easy, as baptiste already mentioned. Just create a data.frame with the coordinates.
label.df <- data.frame(Group = c("S1", "S2"),
Value = c(6, 9))
p + geom_text(data = label.df, label = "***")
To add the arcs that indicate a subgroup comparison, I computed parametric coordinates of a half circle and added them connected with geom_line. Asterisks need new coordinates, too.
label.df <- data.frame(Group = c(1,1,1, 2,2,2),
Value = c(6.5,6.8,7.1, 9.5,9.8,10.1))
# Define arc coordinates
r <- 0.15
t <- seq(0, 180, by = 1) * pi / 180
x <- r * cos(t)
y <- r*5 * sin(t)
arc.df <- data.frame(Group = x, Value = y)
p2 <-
p + geom_text(data = label.df, label = "*") +
geom_line(data = arc.df, aes(Group+1, Value+5.5), lty = 2) +
geom_line(data = arc.df, aes(Group+2, Value+8.5), lty = 2)
Lastly, to indicate comparison between groups, I built a larger circle and flattened it at the top.
r <- .5
x <- r * cos(t)
y <- r*4 * sin(t)
y[20:162] <- y[20] # Flattens the arc
arc.df <- data.frame(Group = x, Value = y)
p2 + geom_line(data = arc.df, aes(Group+1.5, Value+11), lty = 2) +
geom_text(x = 1.5, y = 12, label = "***")
There is also an extension of the ggsignif package called ggpubr that is more powerful when it comes to multi-group comparisons. It builds on top of ggsignif, but also handles anova and kruskal-wallis as well as pairwise comparisons against the gobal mean.
Example:
library(ggpubr)
my_comparisons = list( c("0.5", "1"), c("1", "2"), c("0.5", "2") )
ggboxplot(ToothGrowth, x = "dose", y = "len",
color = "dose", palette = "jco")+
stat_compare_means(comparisons = my_comparisons, label.y = c(29, 35, 40))+
stat_compare_means(label.y = 45)
I found this one is useful.
library(ggplot2)
library(ggpval)
data("PlantGrowth")
plt <- ggplot(PlantGrowth, aes(group, weight)) +
geom_boxplot()
add_pval(plt, pairs = list(c(1, 3)), test='wilcox.test')
Made my own function:
ts_test <- function(dataL,x,y,method="t.test",idCol=NULL,paired=F,label = "p.signif",p.adjust.method="none",alternative = c("two.sided", "less", "greater"),...) {
options(scipen = 999)
annoList <- list()
setDT(dataL)
if(paired) {
allSubs <- dataL[,.SD,.SDcols=idCol] %>% na.omit %>% unique
dataL <- dataL[,merge(.SD,allSubs,by=idCol,all=T),by=x] #idCol!!!
}
if(method =="t.test") {
dataA <- eval(parse(text=paste0(
"dataL[,.(",as.name(y),"=mean(get(y),na.rm=T),sd=sd(get(y),na.rm=T)),by=x] %>% setDF"
)))
res<-pairwise.t.test(x=dataL[[y]], g=dataL[[x]], p.adjust.method = p.adjust.method,
pool.sd = !paired, paired = paired,
alternative = alternative, ...)
}
if(method =="wilcox.test") {
dataA <- eval(parse(text=paste0(
"dataL[,.(",as.name(y),"=median(get(y),na.rm=T),sd=IQR(get(y),na.rm=T,type=6)),by=x] %>% setDF"
)))
res<-pairwise.wilcox.test(x=dataL[[y]], g=dataL[[x]], p.adjust.method = p.adjust.method,
paired = paired, ...)
}
#Output the groups
res$p.value %>% dimnames %>% {paste(.[[2]],.[[1]],sep="_")} %>% cat("Groups ",.)
#Make annotations ready
annoList[["label"]] <- res$p.value %>% diag %>% round(5)
if(!is.null(label)) {
if(label == "p.signif"){
annoList[["label"]] %<>% cut(.,breaks = c(-0.1, 0.0001, 0.001, 0.01, 0.05, 1),
labels = c("****", "***", "**", "*", "ns")) %>% as.character
}
}
annoList[["x"]] <- dataA[[x]] %>% {diff(.)/2 + .[-length(.)]}
annoList[["y"]] <- {dataA[[y]] + dataA[["sd"]]} %>% {pmax(lag(.), .)} %>% na.omit
#Make plot
coli="#0099ff";sizei=1.3
p <-ggplot(dataA, aes(x=get(x), y=get(y))) +
geom_errorbar(aes(ymin=len-sd, ymax=len+sd),width=.1,color=coli,size=sizei) +
geom_line(color=coli,size=sizei) + geom_point(color=coli,size=sizei) +
scale_color_brewer(palette="Paired") + theme_minimal() +
xlab(x) + ylab(y) + ggtitle("title","subtitle")
#Annotate significances
p <-p + annotate("text", x = annoList[["x"]], y = annoList[["y"]], label = annoList[["label"]])
return(p)
}
Data and call:
library(ggplot2);library(data.table);library(magrittr);
df_long <- rbind(ToothGrowth[,-2],data.frame(len=40:50,dose=3.0))
df_long$ID <- data.table::rowid(df_long$dose)
ts_test(dataL=df_long,x="dose",y="len",idCol="ID",method="wilcox.test",paired=T)
Result:

Color one point and add an annotation in ggplot2?

I have a dataframe a with three columns :
GeneName, Index1, Index2
I draw a scatterplot like this
ggplot(a, aes(log10(Index1+1), Index2)) +geom_point(alpha=1/5)
Then I want to color a point whose GeneName is "G1" and add a text box near that point, what might be the easiest way to do it?
You could create a subset containing just that point and then add it to the plot:
# create the subset
g1 <- subset(a, GeneName == "G1")
# plot the data
ggplot(a, aes(log10(Index1+1), Index2)) + geom_point(alpha=1/5) + # this is the base plot
geom_point(data=g1, colour="red") + # this adds a red point
geom_text(data=g1, label="G1", vjust=1) # this adds a label for the red point
NOTE: Since everyone keeps up-voting this question, I thought I would make it easier to read.
Something like this should work. You may need to mess around with the x and y arguments to geom_text().
library(ggplot2)
highlight.gene <- "G1"
set.seed(23456)
a <- data.frame(GeneName = paste("G", 1:10, sep = ""),
Index1 = runif(10, 100, 200),
Index2 = runif(10, 100, 150))
a$highlight <- ifelse(a$GeneName == highlight.gene, "highlight", "normal")
textdf <- a[a$GeneName == highlight.gene, ]
mycolours <- c("highlight" = "red", "normal" = "grey50")
a
textdf
ggplot(data = a, aes(x = Index1, y = Index2)) +
geom_point(size = 3, aes(colour = highlight)) +
scale_color_manual("Status", values = mycolours) +
geom_text(data = textdf, aes(x = Index1 * 1.05, y = Index2, label = "my label")) +
theme(legend.position = "none") +
theme()

Resources