Align Text to geom_vline with varying location - r

I have a function that creates a histogram with an overlying density plot. The function also displays a red dotted line indicating alpha. Users can indicate the alpha level. Moreover, the count in the histogram will differ as a function of the input data. I want a label indicating alpha = 0.05(for example) next to the red dotted line. The label should always be next to the alpha line and always be near the top of the graph (I did not solve that). I´m aware of Align geom_text to a geom_vline in ggplot2, but they do not provide what I´m looking for (and/or produce error messages, I tried to reduce the size of the label by text=element_text(size=11) as suggested there, but that does not work).
Find below some sample code:
multiverse.p.histogram <- function(dataframe, pvalues, alpha = 0.05){
hist <- ggplot(dataframe, aes(x = p.value)) + geom_histogram(binwidth = 0.01, color = "black",fill = "dodgerblue") + theme_bw() + xlim(0,1) + geom_density(alpha = 0.5, fill = "#FF6666") +xlab("p-value") + ggtitle("Histogram of Multiverse P-Values") + geom_vline(xintercept = alpha, color = "red", linetype = "dashed") +
geom_text(aes(x = alpha, y = 75, label = "Alpha"), color = "red") +
theme(
axis.text = element_text(color = "black"),
axis.line = element_line(colour = "black"),
legend.position = "none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank())
return(hist)
}#close histogram function
#and some sample data
df_multiverse <- structure(list(transformation = c("normal", "normal", "normal",
"normal", "normal", "normal", "normal", "normal", "normal", "normal",
"normal", "normal", "normal", "normal", "normal", "normal", "normal",
"normal", "normal", "normal", "normal", "normal", "normal", "normal",
"normal", "normal", "normal", "normal", "normal", "normal", "normal",
"normal", "normal", "normal", "normal", "normal", "normal", "normal",
"normal", "normal", "normal", "normal", "normal", "normal", "normal",
"normal", "normal", "normal", "normal", "normal"), datatrimming = c("notrimming",
"notrimming", "notrimming", "notrimming", "notrimming", "notrimming",
"notrimming", "notrimming", "notrimming", "notrimming", "notrimming",
"mad", "mad", "mad", "mad", "mad", "mad", "mad", "mad", "mad",
"mad", "mad", "mad", "mad", "mad", "mad", "mad", "mad", "mad",
"mad", "mad", "mad", "mad", "mad", "mad", "mad", "mad", "mad",
"mad", "mad", "mad", "mad", "mad", "mad", "mad", "mad", "mad",
"mad", "mad", "mad"), fixedtrimming = c("min", "min", "min",
"min", "min", "minmax", "minmax", "minmax", "minmax", "minmax",
"nofixedtrimming", "min", "min", "min", "min", "min", "minmax",
"minmax", "minmax", "minmax", "minmax", "nofixedtrimming", "min",
"min", "min", "min", "min", "minmax", "minmax", "minmax", "minmax",
"minmax", "nofixedtrimming", "min", "min", "min", "min", "min",
"minmax", "minmax", "minmax", "minmax", "minmax", "nofixedtrimming",
"min", "min", "min", "min", "min", "minmax"), min = c("0.1",
"0.2", "0.3", "0.4", "0.5", "0.1", "0.2", "0.3", "0.4", "0.5",
NA, "0.1", "0.2", "0.3", "0.4", "0.5", "0.1", "0.2", "0.3", "0.4",
"0.5", NA, "0.1", "0.2", "0.3", "0.4", "0.5", "0.1", "0.2", "0.3",
"0.4", "0.5", NA, "0.1", "0.2", "0.3", "0.4", "0.5", "0.1", "0.2",
"0.3", "0.4", "0.5", NA, "0.1", "0.2", "0.3", "0.4", "0.5", "0.1"
), max = c("4.78103879314337", "4.78103879314337", "4.78103879314337",
"4.78103879314337", "4.78103879314337", "10", "10", "10", "10",
"10", NA, "1.50348972125673", "1.50348972125673", "1.50348972125673",
"1.50348972125673", "1.50348972125673", "10", "10", "10", "10",
"10", NA, "1.6673730851492", "1.6673730851492", "1.6673730851492",
"1.6673730851492", "1.6673730851492", "10", "10", "10", "10",
"10", NA, "1.82875939263309", "1.82875939263309", "1.82875939263309",
"1.82875939263309", "1.82875939263309", "10", "10", "10", "10",
"10", NA, "1.98682907108801", "1.98682907108801", "1.98682907108801",
"1.98682907108801", "1.98682907108801", "10"), DispersionMeasure = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "2", "2", "2", "2", "2",
"2", "2", "2", "2", "2", "2", "2.5", "2.5", "2.5", "2.5", "2.5",
"2.5", "2.5", "2.5", "2.5", "2.5", "2.5", "3", "3", "3", "3",
"3", "3", "3", "3", "3", "3", "3", "3.5", "3.5", "3.5", "3.5",
"3.5", "3.5"), df = c(23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23), t.value = c(-1.96240490816673,
-1.91062435558061, -1.88913858576971, -1.50889838134833, -0.584414818091524,
-1.96240490816673, -1.91062435558061, -1.88913858576971, -1.50889838134833,
-0.584414818091524, -2.01035512741752, -2.32446732021548, -2.32446732021548,
-2.25138730178018, -1.75805360848308, -0.671509667928522, -2.32446732021548,
-2.32446732021548, -2.25138730178018, -1.75805360848308, -0.671509667928522,
-2.32446732021548, -2.07781942947361, -2.04327207374561, -1.96398718960439,
-1.45016152484876, -0.43329653628318, -2.07781942947361, -2.04327207374561,
-1.96398718960439, -1.45016152484876, -0.43329653628318, -2.07781942947361,
-3.1795493150037, -3.14621983607465, -3.03987566457514, -2.35519486220697,
-1.34118074962509, -3.1795493150037, -3.14621983607465, -3.03987566457514,
-2.35519486220697, -1.34118074962509, -3.19618807311348, -3.37575126770368,
-3.33582114002809, -3.25737102188504, -2.65364122964845, -1.74520405186558,
-3.37575126770368), p.value = c(0.0619242560601778, 0.0685974542038329,
0.0715464534237802, 0.14494031195569, 0.564630276572904, 0.0619242560601778,
0.0685974542038329, 0.0715464534237802, 0.14494031195569, 0.564630276572904,
0.056262190757649, 0.0292871811194525, 0.0292871811194525, 0.0342153500184824,
0.0920408256371383, 0.508584931329577, 0.0292871811194525, 0.0292871811194525,
0.0342153500184824, 0.0920408256371383, 0.508584931329577, 0.0292871811194525,
0.049074641173751, 0.0526459198825374, 0.0617296734199745, 0.160514579425126,
0.668835951230964, 0.049074641173751, 0.0526459198825374, 0.0617296734199745,
0.160514579425126, 0.668835951230964, 0.049074641173751, 0.00417775230313281,
0.00452298394363368, 0.00581820793330847, 0.0274164539383892,
0.192956766873482, 0.00417775230313281, 0.00452298394363368,
0.00581820793330847, 0.0274164539383892, 0.192956766873482, 0.00401507276581307,
0.00260719926285416, 0.00287129534969705, 0.00346795018735445,
0.0141919615636613, 0.0942977424474807, 0.00260719926285416),
estimate = c(-0.797956867083461, -0.776801900236937, -0.7455698051489,
-0.444049984838546, -0.10530217843728, -0.797956867083461,
-0.776801900236937, -0.7455698051489, -0.444049984838546,
-0.10530217843728, -0.820469748450972, -0.251308805770323,
-0.251308805770323, -0.251096848307402, -0.226028966303428,
-0.134612249858047, -0.251308805770323, -0.251308805770323,
-0.251096848307402, -0.226028966303428, -0.134612249858047,
-0.251308805770323, -0.265907227757688, -0.261504591915461,
-0.260164781545852, -0.225524157517464, -0.10176195202019,
-0.265907227757688, -0.261504591915461, -0.260164781545852,
-0.225524157517464, -0.10176195202019, -0.265907227757688,
-0.409969137221152, -0.405618224033153, -0.409494543344045,
-0.387356945276789, -0.329354185640372, -0.409969137221152,
-0.405618224033153, -0.409494543344045, -0.387356945276789,
-0.329354185640372, -0.422572659021681, -0.506062313897924,
-0.501186805248218, -0.510763602114717, -0.498830153358464,
-0.447892133899374, -0.506062313897924)), row.names = c("df",
"df1", "df2", "df3", "df4", "df5", "df6", "df7", "df8", "df9",
"df10", "df11", "df12", "df13", "df14", "df15", "df16", "df17",
"df18", "df19", "df20", "df21", "df22", "df23", "df24", "df25",
"df26", "df27", "df28", "df29", "df30", "df31", "df32", "df33",
"df34", "df35", "df36", "df37", "df38", "df39", "df40", "df41",
"df42", "df43", "df44", "df45", "df46", "df47", "df48", "df49"
), class = "data.frame")
#execute function
multiverse.p.histogram(df_multiverse, df_multiverse$p.value)
There are two problems with the code:
The alpha does not display next to the line, but on the line and I had to specify y = 75 manually. Ideally, it should always be shortly underneath the upper border. Finally, I can´t get the text size of the alpha to decrease. I tried nudge_x, but that produces the following error: Warnmeldungen:
1: Removed 2 rows containing missing values (geom_bar).
2: Removed 264 rows containing missing values (geom_text).
Does anyone have suggestions?
Thanks already!
Edit:
Based on the answers, here is my updated code:
multiverse.p.histogram <- function(dataframe, pvalues, alpha = 0.05){
ggplot(dataframe, aes(x = p.value)) +
geom_histogram(binwidth = 0.01, color = "black", fill = "dodgerblue") + #plots the histogram
geom_density(alpha = 0.5, fill = "#FF6666") + #adds densityplot
geom_vline(xintercept = alpha, color = "red", linetype = "dashed") + #adds alpha line
geom_text(x = alpha, hjust = -0.5, #adds alpha symbol next to line
y = Inf,
label = expression(paste(alpha)),
color = "red", check_overlap = TRUE,
vjust = "inward") +
ggtitle("Histogram of Multiverse P-Values") +
xlab("p-value") +
theme_bw() +
theme(axis.text = element_text(color = "black"),
axis.line = element_line(colour = "black"),
legend.position = "none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank())
}

Here's a few tweaks to your function that may help:
Find out where the approximate upper limit of your plot will be by using the base R hist function. Use this as the position for alpha, then set the upper y limit as a small multiple of that to ensure everything fits nicely.
You only need a single alpha label, so don't map the text to an aesthetic. You can use x and y positions directly.
Use hjust to adjust your text position.
It makes your code easier to read and debug if you arrange the plot code so it all fits neatly across a single screen and is in a predictable order (I like ggplot then geoms then scales then lims, then labels then themes, but whatever order works best for you, stick to a consistent scheme.
multiverse.p.histogram <- function(dataframe, pvalues, alpha = 0.05)
{
upper <- max(hist(dataframe$p.value, breaks = seq(0, 1, 0.01))$counts)
ggplot(dataframe, aes(x = p.value)) +
geom_histogram(binwidth = 0.01, color = "black", fill = "dodgerblue") +
geom_density(alpha = 0.5, fill = "#FF6666") +
geom_vline(xintercept = alpha, color = "red", linetype = "dashed") +
geom_text(x = alpha, hjust = -0.25,
y = upper,
label = "Alpha",
color = "red", check_overlap = TRUE) +
coord_cartesian(xlim = c(0, 1)) +
xlim(-0.01, 1) +
ylim(0, upper * 1.1) +
ggtitle("Histogram of Multiverse P-Values") +
xlab("p-value") +
theme_bw() +
theme(axis.text = element_text(color = "black"),
axis.line = element_line(colour = "black"),
legend.position = "none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank())
}

Related

Change legend and shape in ggbiplot pca

can you please help me with my pca? I would like to change the shapes in that way that each species has a different color and all 2-3 organisms for each species have 2-3 different symbols. It should look like this: enter image description here
So far I tried the following code:
setwd("~/Schwarze Johannisbeeren/SJ Wein mit nicht Sc/PCA/stackoverflow frage")
results = read.csv("results.csv", sep = ";", encoding = "UTF-8", header=TRUE, check.names=FALSE)
results.pca <- prcomp(results[,c(3:7)],
center = TRUE,
scale. = TRUE)
#grouping by organism
results.organism <- results[, 1]
#by species
results.species <- results[, 2]
summary(results.pca)
library(ggplot2)
library(ggbiplot)
ggbiplot(results.pca, alpha=0, obs.scale = 1, var.scale = 1 ,ellipse = TRUE,ellipse.prob=0.68, circle = F, varname.size=0,
var.axes = F, groups=results$species) +
theme_bw()+
geom_point(aes( colour=factor(results.species)), size=2)+
scale_shape_manual(values= c("Mt1"= 1, "Mt2" =2, "Al1"= 1, "Al2" =2, "Bg1" =1, "Bg2"=2, "Bg3" =3, "Cs1"= 1, "Cs2" =2, "Cs3" =3, "Df1"= 1, "Df2" =2, "Df3" =3))+
#scale_color_brewer(name= "organism", type = "qual", palette = 2)+
#scale_x_continuous (limits = c (-1, 9))+
theme(axis.text.x = element_text(size = 12, colour = "black", vjust = 0.5, hjust = 1, face= "bold"),
axis.title.y = element_text(size = 12, face = "bold"),
axis.title.x = element_text(size = 12, face = "bold"),
axis.text.y = element_text(colour = "black", size = 12, face = "bold"))
and that´s my data
> results
organism species lactones cyanides alcohols ethers acids
1 Mt1 Mt 23435.167 166.4 137653.9 4040.1 1131.52
2 Mt1 Mt 23303.111 168.9 153511.0 4529.1 1148.52
3 Mt1 Mt 22340.556 176.6 150719.9 3255.8 1200.88
4 Mt2 Mt 51519.222 175.9 173401.1 3890.1 1196.12
5 Mt2 Mt 48824.500 166.5 171614.4 3694.1 1132.20
6 Mt2 Mt 50427.278 165.4 168865.1 3693.2 1124.72
7 Al1 Al 25260.222 162.0 211737.4 9563.9 1101.60
8 Al1 Al 23177.556 161.5 199886.7 10403.3 1098.20
9 Al1 Al 27903.000 156.2 240088.4 11897.1 1062.16
10 Al2 Al 5993.722 180.4 289334.9 6673.3 1226.72
11 Al2 Al 7307.389 169.7 275631.1 8333.4 1153.96
12 Al2 Al 9419.167 147.5 277924.5 9622.2 1003.00
13 Bg1 Bg 58216.944 132.4 92275.3 4099.5 900.32
14 Bg1 Bg 69860.222 147.4 105654.9 4080.6 1002.32
15 Bg1 Bg 72809.333 145.8 111731.3 4014.6 991.44
16 Bg2 Bg 51584.611 142.9 105548.2 6450.1 971.72
17 Bg2 Bg 57738.056 141.2 117728.9 6332.4 960.16
18 Bg2 Bg 53356.056 142.7 110260.2 6506.2 970.36
19 Bg3 Bg 41983.389 130.8 103799.4 4781.8 889.44
20 Bg3 Bg 46930.722 148.3 113944.6 5151.6 1008.44
21 Bg3 Bg 49487.611 139.4 121976.5 5318.3 947.92
22 Cs1 Cs 7155.056 161.6 221538.8 8356.0 1098.88
23 Cs1 Cs 8153.611 151.0 179823.0 7961.2 1026.80
24 Cs1 Cs 7445.722 168.6 176978.0 8196.5 1146.48
25 Cs2 Cs 10771.556 126.4 144314.1 8634.6 859.52
26 Cs2 Cs 12239.556 142.6 142913.7 9471.9 969.68
27 Cs2 Cs 13788.611 136.1 131506.7 9390.4 925.48
28 Cs3 Cs 12082.111 152.0 171730.0 6259.6 1033.60
29 Cs3 Cs 14331.556 143.3 141748.7 7532.8 974.44
30 Cs3 Cs 14123.056 158.2 150303.0 7755.8 1075.76
31 Df1 Df 26906.778 156.2 310203.9 5505.5 1062.16
32 Df1 Df 20689.111 163.5 214322.9 5315.6 1111.80
33 Df1 Df 22872.722 154.1 197572.9 4627.7 1047.88
34 Df2 Df 18838.222 159.2 125167.6 12372.9 1082.56
35 Df2 Df 18218.667 155.8 127077.2 11182.0 1059.44
36 Df2 Df 18545.389 156.2 154400.4 10543.6 1062.16
37 Df3 Df 19924.111 156.4 199472.6 4452.3 1063.52
38 Df3 Df 22504.056 158.0 196343.0 3994.1 1074.40
39 Df3 Df 16907.278 151.5 185052.9 4084.6 1030.20
>
By the way, is it possible to have only PC1( x %) instead of PC1(x % explained var.) for the axis labeling?
One approach to achieve your desired result would be to first create shape and color palettes which map organism names to shapes and colors. Second, inside your geom_point extend the data by adding a column with the organism for which I use dplyr::bind_cols. Doing so allows to map the organism on the shape and the color aes. Finally, get rid of the color legend for the groups using scale_color_discrete(guide = "none") and add a second color scale via ggnewscale::new_scale_color and a scale_color_manual:
Note: Easy fix for the axis titles would be to set them manually using +labs(x = ..., y = ...).
library(ggplot2)
library(ggbiplot)
pal_shape <- gsub("^.*?(.)$", "\\1", results$organism)
pal_shape <- scales::shape_pal()(3)[as.integer(pal_shape)]
names(pal_shape) <- results$organism
pal_color <- gsub("^(.*?).$", "\\1", results$organism)
pal_color <- setNames(scales::hue_pal()(5), sort(unique(results$species)))[pal_color]
names(pal_color) <- results$organism
ggbiplot(results.pca,
alpha = 0, obs.scale = 1, var.scale = 1, ellipse = TRUE, ellipse.prob = 0.68, circle = F, varname.size = 0,
var.axes = F, groups = results$species
) +
scale_color_discrete(guide = "none") +
ggnewscale::new_scale_color() +
geom_point(data = ~ dplyr::bind_cols(.x, organism = results$organism),
aes(shape = organism, colour = organism),
size = 2) +
scale_shape_manual(values = pal_shape) +
scale_color_manual(values = pal_color) +
theme_bw() +
theme(
axis.text.x = element_text(size = 12, colour = "black", vjust = 0.5, hjust = 1, face = "bold"),
axis.title.y = element_text(size = 12, face = "bold"),
axis.title.x = element_text(size = 12, face = "bold"),
axis.text.y = element_text(colour = "black", size = 12, face = "bold")
)
DATA
results <- structure(list(organism = c(
"Mt1", "Mt1", "Mt1", "Mt2", "Mt2",
"Mt2", "Al1", "Al1", "Al1", "Al2", "Al2", "Al2", "Bg1", "Bg1",
"Bg1", "Bg2", "Bg2", "Bg2", "Bg3", "Bg3", "Bg3", "Cs1", "Cs1",
"Cs1", "Cs2", "Cs2", "Cs2", "Cs3", "Cs3", "Cs3", "Df1", "Df1",
"Df1", "Df2", "Df2", "Df2", "Df3", "Df3", "Df3"
), species = c(
"Mt",
"Mt", "Mt", "Mt", "Mt", "Mt", "Al", "Al", "Al", "Al", "Al", "Al",
"Bg", "Bg", "Bg", "Bg", "Bg", "Bg", "Bg", "Bg", "Bg", "Cs", "Cs",
"Cs", "Cs", "Cs", "Cs", "Cs", "Cs", "Cs", "Df", "Df", "Df", "Df",
"Df", "Df", "Df", "Df", "Df"
), lactones = c(
23435.167, 23303.111,
22340.556, 51519.222, 48824.5, 50427.278, 25260.222, 23177.556,
27903, 5993.722, 7307.389, 9419.167, 58216.944, 69860.222, 72809.333,
51584.611, 57738.056, 53356.056, 41983.389, 46930.722, 49487.611,
7155.056, 8153.611, 7445.722, 10771.556, 12239.556, 13788.611,
12082.111, 14331.556, 14123.056, 26906.778, 20689.111, 22872.722,
18838.222, 18218.667, 18545.389, 19924.111, 22504.056, 16907.278
), cyanides = c(
166.4, 168.9, 176.6, 175.9, 166.5, 165.4, 162,
161.5, 156.2, 180.4, 169.7, 147.5, 132.4, 147.4, 145.8, 142.9,
141.2, 142.7, 130.8, 148.3, 139.4, 161.6, 151, 168.6, 126.4,
142.6, 136.1, 152, 143.3, 158.2, 156.2, 163.5, 154.1, 159.2,
155.8, 156.2, 156.4, 158, 151.5
), alcohols = c(
137653.9, 153511,
150719.9, 173401.1, 171614.4, 168865.1, 211737.4, 199886.7, 240088.4,
289334.9, 275631.1, 277924.5, 92275.3, 105654.9, 111731.3, 105548.2,
117728.9, 110260.2, 103799.4, 113944.6, 121976.5, 221538.8, 179823,
176978, 144314.1, 142913.7, 131506.7, 171730, 141748.7, 150303,
310203.9, 214322.9, 197572.9, 125167.6, 127077.2, 154400.4, 199472.6,
196343, 185052.9
), ethers = c(
4040.1, 4529.1, 3255.8, 3890.1,
3694.1, 3693.2, 9563.9, 10403.3, 11897.1, 6673.3, 8333.4, 9622.2,
4099.5, 4080.6, 4014.6, 6450.1, 6332.4, 6506.2, 4781.8, 5151.6,
5318.3, 8356, 7961.2, 8196.5, 8634.6, 9471.9, 9390.4, 6259.6,
7532.8, 7755.8, 5505.5, 5315.6, 4627.7, 12372.9, 11182, 10543.6,
4452.3, 3994.1, 4084.6
), acids = c(
1131.52, 1148.52, 1200.88,
1196.12, 1132.2, 1124.72, 1101.6, 1098.2, 1062.16, 1226.72, 1153.96,
1003, 900.32, 1002.32, 991.44, 971.72, 960.16, 970.36, 889.44,
1008.44, 947.92, 1098.88, 1026.8, 1146.48, 859.52, 969.68, 925.48,
1033.6, 974.44, 1075.76, 1062.16, 1111.8, 1047.88, 1082.56, 1059.44,
1062.16, 1063.52, 1074.4, 1030.2
)), class = "data.frame", row.names = c(
"1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24",
"25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35",
"36", "37", "38", "39"
))

how to group levels of each subgroup in a forest plot

> dput(fig2b_data)
structure(list(subgroup = c("sex", "sex", "ai_comorbid_bool",
"ai_comorbid_bool", "non_ai_comorbid_bool", "non_ai_comorbid_bool",
"age_70_plus", "age_70_plus", "ecog_combined", "ecog_combined",
"indication_combined", "indication_combined", "site", "site",
"site", "site", "site", "site", "site", "site"), level = c("Female",
"Male", "No", "Yes", "No", "Yes", "No", "Yes", "0", "1+", "Adjuvant",
"Metastatic / Unresectable", "Cambridge", "Belfast", "Cardiff",
"Liverpool", "Norwich", "Preston", "Southampton", "Taunton"),
subgroup_level = c("sex_Female", "sex_Male", "ai_comorbid_bool_No",
"ai_comorbid_bool_Yes", "non_ai_comorbid_bool_No", "non_ai_comorbid_bool_Yes",
"age_70_plus_No", "age_70_plus_Yes", "ecog_combined_0", "ecog_combined_1+",
"indication_combined_Adjuvant", "indication_combined_Metastatic / Unresectable",
"site_Cambridge", "site_Belfast", "site_Cardiff", "site_Liverpool",
"site_Norwich", "site_Preston", "site_Southampton", "site_Taunton"
), ref = c(TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE,
TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE), adj_or = c(1, 1.92697788983048, 1,
0.309313271153888, 1, 1.60176654927755, 1, 0.581067651194834,
1, 0.606677244239784, 1, 0.757510322046024, 1, 0.0671548910659019,
1.24115412701041, 0.111740502056371, 0.296334401152569, 0.407313416513578,
0.100703132319318, 0.0580853387590806), ci_low = c(NA, 1.08574689964253,
NA, 0.0933004210866726, NA, 0.813446935851162, NA, 0.300096568750007,
NA, 0.301300997438692, NA, 0.395638695943013, NA, 0.0184879397812241,
0.316512222510664, 0.0310182213975059, 0.0774035454553755,
0.0834303368267395, 0.0228743220824828, 0.011193138928203
), ci_high = c(NA, 3.4667621174982, NA, 0.929482385449043,
NA, 3.1938659749789, NA, 1.11325241104074, NA, 1.21374279615277,
NA, 1.44670881667103, NA, 0.205952672316014, 4.59055508109202,
0.342443550375257, 1.00710088916867, 2.04034216674928, 0.387728614421501,
0.257636420370032), p = c(NA, 0.0263295963311719, NA, 0.0432646112707497,
NA, 0.175314541854903, NA, 0.103298047943536, NA, 0.158264479732785,
NA, 0.399589361570504, NA, 8.78601713425597e-06, 0.747238599523183,
0.000291277241946869, 0.0597081504970594, 0.260985385401162,
0.00132018341690714, 0.000328378914869459), sig = c(NA, TRUE,
NA, TRUE, NA, FALSE, NA, FALSE, NA, FALSE, NA, FALSE, NA,
TRUE, FALSE, TRUE, FALSE, FALSE, TRUE, TRUE), col = c("REF",
"UP", "REF", "DOWN", "REF", "INSIG", "REF", "INSIG", "REF",
"INSIG", "REF", "INSIG", "REF", "DOWN", "INSIG", "DOWN",
"INSIG", "INSIG", "DOWN", "DOWN")), row.names = c(NA, -20L
), class = "data.frame")
I'd like to draw a forest plot, but where each level is grouped by subgroup. How can I do this?
I've tried this so far to get my plot, but struggling with grouping the levels:
........................................................................................................................................................................................................
# plot
ggplot(data = fig2b_data, aes(x = adj_or, y = subgroup_level)) +
geom_errorbarh(aes(xmax = ci_high, xmin = ci_low, color = col), size = .5, height = .2) +
geom_point(aes(color = col), size = 2) +
theme_bw()
I would like:
_________________________
Sex
Male (ref) x
Female |----x-----|
_______________________________________________________________
AI comorbid
No (ref) x
Yes |----x-----|
_______________________________________________________________
Site
Cambridge(ref) x
Preston |----x-----|
Southampton |----x-----|
Belfast |----x-----|
__________________________________1____________________________
That was quite of a workaround. The adjusted hight for the facet_wrap() height is borrowed from here
Code
library(dplyr)
library(ggplot2)
fig2b_data_cleared <- fig2b_data %>%
mutate(subgroup = fct_recode(subgroup, "Age >= 70" = "age_70_plus",
"AI Comorbidities" = "ai_comorbid_bool",
"ECOG" = "ecog_combined",
"Indication" = "indication_combined",
"Non-AI Comorbidities" = "non_ai_comorbid_bool",
"Sex" = "sex",
"Site" = "site"),
subgroup_level = fct_relevel(subgroup_level, "age_70_plus_Yes", "age_70_plus_No",
"ai_comorbid_bool_Yes", "ai_comorbid_bool_No",
"ecog_combined_1+", "ecog_combined_0",
"indication_combined_Metastatic / Unresectable", "indication_combined_Adjuvant",
"non_ai_comorbid_bool_Yes", "non_ai_comorbid_bool_No",
"sex_Male", "sex_Female",
"site_Belfast",
"site_Cardiff", "site_Liverpool",
"site_Norwich", "site_Preston",
"site_Southampton", "site_Taunton",
"site_Cambridge"))
p <- ggplot(data = fig2b_data_cleared, aes(x = adj_or, y = subgroup_level)) +
geom_vline(xintercept = 1, linetype = 2, color = "red") +
geom_point(aes(color = col), size = 3) +
xlab("Adjusted Odds Ratio") +
ylab("") +
geom_errorbar(aes(xmax = ci_high, xmin = ci_low, color = col), size = 0.8, width = 0.5) +
theme(plot.title.x = element_text(size = 16, face = "bold"),
axis.text.y = element_blank(),
axis.text.x = element_text(face = "bold"),
axis.title.y = element_blank(),
strip.text.y = element_text(hjust = 0, vjust = 1, angle = 180, face = "bold"),
legend.title = element_blank()) +
theme_bw() +
scale_y_discrete(breaks=c("age_70_plus_No", "age_70_plus_Yes",
"ai_comorbid_bool_No", "ai_comorbid_bool_Yes",
"ecog_combined_0", "ecog_combined_1+",
"indication_combined_Adjuvant", "indication_combined_Metastatic / Unresectable",
"non_ai_comorbid_bool_No", "non_ai_comorbid_bool_Yes",
"sex_Female", "sex_Male",
"site_Cambridge", "site_Belfast",
"site_Cardiff", "site_Liverpool",
"site_Norwich", "site_Preston",
"site_Southampton", "site_Taunton"),
labels=c("No (Ref)", "Yes",
"No (Ref)", "Yes",
"No (Ref)", "Yes",
"No (Ref)", "Yes",
"No (Ref)", "Yes",
"Female (Ref)", "Male",
"Cambridge (Ref)", "Belfast",
"Cardiff", "Liverpool",
"Norwich", "Preston",
"Southampton", "Tanton")) +
scale_color_discrete(limits = c("REF", "INSIG", "DOWN", "UP"),
name = "")
p.grid <- p + facet_grid(subgroup ~ ., scales = "free_y", space = "free_y")
p.wrap <- p + facet_wrap(~ subgroup, ncol = 1, scales = "free_y")
gp.grid <- ggplotGrob(p.grid)
gp.wrap <- ggplotGrob(p.wrap)
gp.wrap$heights[gp.wrap$layout[grep("panel", gp.wrap$layout$name), "t"]] <-
gp.grid$heights[gp.grid$layout[grep("panel", gp.grid$layout$name), "t"]]
grid::grid.draw(gp.wrap)
Output

Add back layers in ggpubr

I removed the boxplot from a ggpubr plot with the following line of code:
plot$layers <- plot$layers[-1]
I saw this solution on this Stack post: Boxplots with ggpaired() WITHOUT connecting lines
I am not as familiar with ggpubr but thought it would be fit for purpose for this particular set of plots. I was told to remove the boxplot and now to add it back in and now I am stuck. I am thinking I changed the environment settings and don't know how to revert it to the original way.
Any help is greatly appreciated! Plots provided with drug name removed.
Plot code:
ggpar(ggpaired(data, x = "treatment", y = "value",
color = "treatment", palette = c("#202960", "#8CC63E"),
add = "dotplot", title = "Jiang (TIDE) CTL Panel - Mock vs. Drug",
xlab = "Treatment", ylab = "CTL Score",
width = 0, point.size = 2,
linetype = "dashed", line.color = "gray") +
stat_compare_means(method = "t.test", paired = TRUE,
label.x = 1.4, label.y = 1.8) +
#geom_text_repel(data = data %>% filter(treatment == "Mock"),
# aes(x = "Mock", y = value, label = subject),
# nudge_x = -0.2, inherit.aes = FALSE, size = 3,
# color = "#6c706d") +
scale_y_continuous(breaks = c(0,0.25,0.5,0.75,1,1.25,1.5,1.75,2,2.25,2.5),
labels = c("0","","0.5","","1.0","","1.5","","2.0","","2.5")) +
coord_cartesian(ylim = c(0,2)), legend = "FALSE")
Current Plot Example:
Target Plot Example (minus geom_text_repel label):
Reprex:
data = structure(list(...1 = c("1", "2", "3", "4", "5", "6", "7", "1",
"2", "3", "4", "5", "6", "7"), subject = c("G5", "G6", "G7",
"G8", "G12", "G13", "G14", "G5", "G6", "G7", "G8", "G12", "G13",
"G14"), value = c(0.733377605384461, 1.65662450012502, 1.38787220722645,
1.37053666253192, 1.62679859595799, 0.541814589221617, 0.966081047005121,
0.892329475047538, 1.75954000718932, 1.36253416559681, 1.22104521047091,
1.61168414813187, 0.940537918248694, 1.00171611627718), treatment = c("Drug",
"Drug", "Drug", "Drug", "Drug", "Drug", "Drug", "Mock", "Mock",
"Mock", "Mock", "Mock", "Mock", "Mock")), row.names = c(NA, -14L
), class = "data.frame")
I ended up reinstalling the package and changing the width (width = 0.2 instead of width = 0), which gave me the target plot.
I believe the following line of code changes the global package settings, just FYI: plot$layers <- plot$layers[-1]
Edit: Nevermind... I'm just dumb... I think I had the width at 0, which is why it was looking funny. Please ignore me.

Can we color the different rows/covariates/studies in different colors in R forest plots?

Using the forestplot package in the programming language R, I would like to make a forest plot that has each row in a different color. By each row, I mean the boxes and the respective confidence intervals.
Taking an example from the vignette [https://cran.r-project.org/web/packages/forestplot/vignettes/forestplot.html],
library(forestplot)
# Cochrane data from the 'rmeta'-package
cochrane_from_rmeta <-
structure(list(
mean = c(NA, NA, 0.578, 0.165, 0.246, 0.700, 0.348, 0.139, 1.017, NA, 0.531),
lower = c(NA, NA, 0.372, 0.018, 0.072, 0.333, 0.083, 0.016, 0.365, NA, 0.386),
upper = c(NA, NA, 0.898, 1.517, 0.833, 1.474, 1.455, 1.209, 2.831, NA, 0.731)),
.Names = c("mean", "lower", "upper"),
row.names = c(NA, -11L),
class = "data.frame")
tabletext <- cbind(c("", "Study", "Auckland", "Block", "Doran", "Gamsu", "Morrison", "Papageorgiou", "Tauesch", NA, "Summary"),
c("Deaths", "(steroid)", "36", "1", "4", "14", "3", "1", "8", NA, NA),
c("Deaths", "(placebo)", "60", "5", "11", "20", "7", "7", "10", NA, NA),
c("", "OR", "0.58", "0.16", "0.25", "0.70", "0.35", "0.14", "1.02", NA, "0.53"))
forestplot(tabletext, cochrane_from_rmeta, new_page = TRUE,
is.summary = c(TRUE, TRUE, rep(FALSE, 8), TRUE),
clip = c(0.1,2.5), xlog = TRUE,
col = fpColors(box="royalblue",line="darkblue", summary="royalblue"))
I want each study to have its own color (Auckland can be colored blue, Block can be colored red, Doran can be colored green, and so on). I think that this might be accomplished by changing the argument to the fpColors() function.
Is there any way to do this?
Take a look at fpShapesGp. With this, it's possible to color the rows in different colors.
An simple example for your code:
styles <- fpShapesGp(
lines = list(
gpar(col = "black"),
gpar(col = "blue"),
gpar(col = "black"),
gpar(col = "blue"),
gpar(col = "black"),
gpar(col = "blue"),
gpar(col = "black"),
gpar(col = "blue"),
gpar(col = "black"),
gpar(col = "blue"),
gpar(col = "black")
),
box = list(
gpar(fill = "black"),
gpar(fill = "blue"),
gpar(fill = "black"),
gpar(fill = "blue"),
gpar(fill = "black"),
gpar(fill = "blue"),
gpar(fill = "black"),
gpar(fill = "blue"),
gpar(fill = "black"),
gpar(fill = "blue"),
gpar(fill = "black")
)
)
forestplot(tabletext, cochrane_from_rmeta, new_page = TRUE,
is.summary = c(TRUE, TRUE, rep(FALSE, 8), TRUE),
clip = c(0.1,2.5), xlog = TRUE,
shapes_gp = styles)
As a result you get this:

Adjust margin for forestplot, footnotes

I'm trying to use the forestplot and I want to include a lengthy footnote (3 lines of text). I can't get enough space at the bottom and I don't think the par() options are doing anything. Here's the example given in the vingette along with my attempt at making a footnote:
library(forestplot)
# Cochrane data from the 'rmeta'-package
cochrane_from_rmeta <-
structure(list(
mean = c(NA, NA, 0.578, 0.165, 0.246, 0.700, 0.348, 0.139, 1.017, NA, 0.531),
lower = c(NA, NA, 0.372, 0.018, 0.072, 0.333, 0.083, 0.016, 0.365, NA, 0.386),
upper = c(NA, NA, 0.898, 1.517, 0.833, 1.474, 1.455, 1.209, 2.831, NA, 0.731)),
.Names = c("mean", "lower", "upper"),
row.names = c(NA, -11L),
class = "data.frame")
tabletext<-cbind(
c("", "Study", "Auckland", "Block",
"Doran", "Gamsu", "Morrison", "Papageorgiou",
"Tauesch", NA, "Summary"),
c("Deaths", "(steroid)", "36", "1",
"4", "14", "3", "1",
"8", NA, NA),
c("Deaths", "(placebo)", "60", "5",
"11", "20", "7", "7",
"10", NA, NA),
c("", "OR", "0.58", "0.16",
"0.25", "0.70", "0.35", "0.14",
"1.02", NA, "0.53"))
dev.new()
par(mar = c(4, 1, 1, 1))
forestplot(tabletext,
cochrane_from_rmeta,new_page = TRUE,
is.summary=c(TRUE,TRUE,rep(FALSE,8),TRUE),
clip=c(0.1,2.5),
xlog=TRUE,
col=fpColors(box="royalblue",line="darkblue", summary="royalblue"))
grid.text('* Adjusted for demographic & baseline variables,blur blur blur',
x = unit(.43, 'npc'),
y = unit(1, 'lines'))
Any suggestions on how to create some more space?
Thanks!
I find a solution, first I though that I could change the margine thanks to the ggplot option of the theme :
+ theme(plot.margin = margin(2, 2, 2, 2, "cm"))
But it was not possible to have access to the plot object in forestplot, so I look closly the option of forestplot function and there is an argument mar which take units as parameter, the following solution works for me :
forestplot(tabletext,
cochrane_from_rmeta,new_page = TRUE,
is.summary=c(TRUE, TRUE, rep(FALSE,8), TRUE),
clip=c(0.1, 2.5),
xlog=TRUE,
mar = unit(rep(10, times = 4), "mm"),
col=fpColors(box="royalblue",
line="darkblue",
summary="royalblue"))
grid.text('* Adjusted for demographic & baseline variables,blur blur blur',
x = unit(.43, 'npc'),
y = unit(1, 'lines'))

Resources