how to group levels of each subgroup in a forest plot - r

> dput(fig2b_data)
structure(list(subgroup = c("sex", "sex", "ai_comorbid_bool",
"ai_comorbid_bool", "non_ai_comorbid_bool", "non_ai_comorbid_bool",
"age_70_plus", "age_70_plus", "ecog_combined", "ecog_combined",
"indication_combined", "indication_combined", "site", "site",
"site", "site", "site", "site", "site", "site"), level = c("Female",
"Male", "No", "Yes", "No", "Yes", "No", "Yes", "0", "1+", "Adjuvant",
"Metastatic / Unresectable", "Cambridge", "Belfast", "Cardiff",
"Liverpool", "Norwich", "Preston", "Southampton", "Taunton"),
subgroup_level = c("sex_Female", "sex_Male", "ai_comorbid_bool_No",
"ai_comorbid_bool_Yes", "non_ai_comorbid_bool_No", "non_ai_comorbid_bool_Yes",
"age_70_plus_No", "age_70_plus_Yes", "ecog_combined_0", "ecog_combined_1+",
"indication_combined_Adjuvant", "indication_combined_Metastatic / Unresectable",
"site_Cambridge", "site_Belfast", "site_Cardiff", "site_Liverpool",
"site_Norwich", "site_Preston", "site_Southampton", "site_Taunton"
), ref = c(TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE,
TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE), adj_or = c(1, 1.92697788983048, 1,
0.309313271153888, 1, 1.60176654927755, 1, 0.581067651194834,
1, 0.606677244239784, 1, 0.757510322046024, 1, 0.0671548910659019,
1.24115412701041, 0.111740502056371, 0.296334401152569, 0.407313416513578,
0.100703132319318, 0.0580853387590806), ci_low = c(NA, 1.08574689964253,
NA, 0.0933004210866726, NA, 0.813446935851162, NA, 0.300096568750007,
NA, 0.301300997438692, NA, 0.395638695943013, NA, 0.0184879397812241,
0.316512222510664, 0.0310182213975059, 0.0774035454553755,
0.0834303368267395, 0.0228743220824828, 0.011193138928203
), ci_high = c(NA, 3.4667621174982, NA, 0.929482385449043,
NA, 3.1938659749789, NA, 1.11325241104074, NA, 1.21374279615277,
NA, 1.44670881667103, NA, 0.205952672316014, 4.59055508109202,
0.342443550375257, 1.00710088916867, 2.04034216674928, 0.387728614421501,
0.257636420370032), p = c(NA, 0.0263295963311719, NA, 0.0432646112707497,
NA, 0.175314541854903, NA, 0.103298047943536, NA, 0.158264479732785,
NA, 0.399589361570504, NA, 8.78601713425597e-06, 0.747238599523183,
0.000291277241946869, 0.0597081504970594, 0.260985385401162,
0.00132018341690714, 0.000328378914869459), sig = c(NA, TRUE,
NA, TRUE, NA, FALSE, NA, FALSE, NA, FALSE, NA, FALSE, NA,
TRUE, FALSE, TRUE, FALSE, FALSE, TRUE, TRUE), col = c("REF",
"UP", "REF", "DOWN", "REF", "INSIG", "REF", "INSIG", "REF",
"INSIG", "REF", "INSIG", "REF", "DOWN", "INSIG", "DOWN",
"INSIG", "INSIG", "DOWN", "DOWN")), row.names = c(NA, -20L
), class = "data.frame")
I'd like to draw a forest plot, but where each level is grouped by subgroup. How can I do this?
I've tried this so far to get my plot, but struggling with grouping the levels:
........................................................................................................................................................................................................
# plot
ggplot(data = fig2b_data, aes(x = adj_or, y = subgroup_level)) +
geom_errorbarh(aes(xmax = ci_high, xmin = ci_low, color = col), size = .5, height = .2) +
geom_point(aes(color = col), size = 2) +
theme_bw()
I would like:
_________________________
Sex
Male (ref) x
Female |----x-----|
_______________________________________________________________
AI comorbid
No (ref) x
Yes |----x-----|
_______________________________________________________________
Site
Cambridge(ref) x
Preston |----x-----|
Southampton |----x-----|
Belfast |----x-----|
__________________________________1____________________________

That was quite of a workaround. The adjusted hight for the facet_wrap() height is borrowed from here
Code
library(dplyr)
library(ggplot2)
fig2b_data_cleared <- fig2b_data %>%
mutate(subgroup = fct_recode(subgroup, "Age >= 70" = "age_70_plus",
"AI Comorbidities" = "ai_comorbid_bool",
"ECOG" = "ecog_combined",
"Indication" = "indication_combined",
"Non-AI Comorbidities" = "non_ai_comorbid_bool",
"Sex" = "sex",
"Site" = "site"),
subgroup_level = fct_relevel(subgroup_level, "age_70_plus_Yes", "age_70_plus_No",
"ai_comorbid_bool_Yes", "ai_comorbid_bool_No",
"ecog_combined_1+", "ecog_combined_0",
"indication_combined_Metastatic / Unresectable", "indication_combined_Adjuvant",
"non_ai_comorbid_bool_Yes", "non_ai_comorbid_bool_No",
"sex_Male", "sex_Female",
"site_Belfast",
"site_Cardiff", "site_Liverpool",
"site_Norwich", "site_Preston",
"site_Southampton", "site_Taunton",
"site_Cambridge"))
p <- ggplot(data = fig2b_data_cleared, aes(x = adj_or, y = subgroup_level)) +
geom_vline(xintercept = 1, linetype = 2, color = "red") +
geom_point(aes(color = col), size = 3) +
xlab("Adjusted Odds Ratio") +
ylab("") +
geom_errorbar(aes(xmax = ci_high, xmin = ci_low, color = col), size = 0.8, width = 0.5) +
theme(plot.title.x = element_text(size = 16, face = "bold"),
axis.text.y = element_blank(),
axis.text.x = element_text(face = "bold"),
axis.title.y = element_blank(),
strip.text.y = element_text(hjust = 0, vjust = 1, angle = 180, face = "bold"),
legend.title = element_blank()) +
theme_bw() +
scale_y_discrete(breaks=c("age_70_plus_No", "age_70_plus_Yes",
"ai_comorbid_bool_No", "ai_comorbid_bool_Yes",
"ecog_combined_0", "ecog_combined_1+",
"indication_combined_Adjuvant", "indication_combined_Metastatic / Unresectable",
"non_ai_comorbid_bool_No", "non_ai_comorbid_bool_Yes",
"sex_Female", "sex_Male",
"site_Cambridge", "site_Belfast",
"site_Cardiff", "site_Liverpool",
"site_Norwich", "site_Preston",
"site_Southampton", "site_Taunton"),
labels=c("No (Ref)", "Yes",
"No (Ref)", "Yes",
"No (Ref)", "Yes",
"No (Ref)", "Yes",
"No (Ref)", "Yes",
"Female (Ref)", "Male",
"Cambridge (Ref)", "Belfast",
"Cardiff", "Liverpool",
"Norwich", "Preston",
"Southampton", "Tanton")) +
scale_color_discrete(limits = c("REF", "INSIG", "DOWN", "UP"),
name = "")
p.grid <- p + facet_grid(subgroup ~ ., scales = "free_y", space = "free_y")
p.wrap <- p + facet_wrap(~ subgroup, ncol = 1, scales = "free_y")
gp.grid <- ggplotGrob(p.grid)
gp.wrap <- ggplotGrob(p.wrap)
gp.wrap$heights[gp.wrap$layout[grep("panel", gp.wrap$layout$name), "t"]] <-
gp.grid$heights[gp.grid$layout[grep("panel", gp.grid$layout$name), "t"]]
grid::grid.draw(gp.wrap)
Output

Related

how to group levels into a subgroup in a ggplot forest plot

> dput(fig2b_data)
structure(list(subgroup = c("sex", "sex", "ai_comorbid_bool",
"ai_comorbid_bool", "non_ai_comorbid_bool", "non_ai_comorbid_bool",
"age_70_plus", "age_70_plus", "ecog_combined", "ecog_combined",
"indication_combined", "indication_combined", "site", "site",
"site", "site", "site", "site", "site", "site"), level = c("Female",
"Male", "No", "Yes", "No", "Yes", "No", "Yes", "0", "1+", "Adjuvant",
"Metastatic / Unresectable", "Cambridge", "Belfast", "Cardiff",
"Liverpool", "Norwich", "Preston", "Southampton", "Taunton"),
subgroup_level = c("sex_Female", "sex_Male", "ai_comorbid_bool_No",
"ai_comorbid_bool_Yes", "non_ai_comorbid_bool_No", "non_ai_comorbid_bool_Yes",
"age_70_plus_No", "age_70_plus_Yes", "ecog_combined_0", "ecog_combined_1+",
"indication_combined_Adjuvant", "indication_combined_Metastatic / Unresectable",
"site_Cambridge", "site_Belfast", "site_Cardiff", "site_Liverpool",
"site_Norwich", "site_Preston", "site_Southampton", "site_Taunton"
), ref = c(TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE,
TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE), adj_or = c(1, 1.92697788983048, 1,
0.309313271153888, 1, 1.60176654927755, 1, 0.581067651194834,
1, 0.606677244239784, 1, 0.757510322046024, 1, 0.0671548910659019,
1.24115412701041, 0.111740502056371, 0.296334401152569, 0.407313416513578,
0.100703132319318, 0.0580853387590806), ci_low = c(NA, 1.08574689964253,
NA, 0.0933004210866726, NA, 0.813446935851162, NA, 0.300096568750007,
NA, 0.301300997438692, NA, 0.395638695943013, NA, 0.0184879397812241,
0.316512222510664, 0.0310182213975059, 0.0774035454553755,
0.0834303368267395, 0.0228743220824828, 0.011193138928203
), ci_high = c(NA, 3.4667621174982, NA, 0.929482385449043,
NA, 3.1938659749789, NA, 1.11325241104074, NA, 1.21374279615277,
NA, 1.44670881667103, NA, 0.205952672316014, 4.59055508109202,
0.342443550375257, 1.00710088916867, 2.04034216674928, 0.387728614421501,
0.257636420370032), p = c(NA, 0.0263295963311719, NA, 0.0432646112707497,
NA, 0.175314541854903, NA, 0.103298047943536, NA, 0.158264479732785,
NA, 0.399589361570504, NA, 8.78601713425597e-06, 0.747238599523183,
0.000291277241946869, 0.0597081504970594, 0.260985385401162,
0.00132018341690714, 0.000328378914869459), sig = c(NA, TRUE,
NA, TRUE, NA, FALSE, NA, FALSE, NA, FALSE, NA, FALSE, NA,
TRUE, FALSE, TRUE, FALSE, FALSE, TRUE, TRUE), col = c("REF",
"UP", "REF", "DOWN", "REF", "INSIG", "REF", "INSIG", "REF",
"INSIG", "REF", "INSIG", "REF", "DOWN", "INSIG", "DOWN",
"INSIG", "INSIG", "DOWN", "DOWN")), row.names = c(NA, -20L
), class = "data.frame")
I'd like to draw a forest plot, but where each level is grouped by subgroup. How can I do this?
I've tried this so far to get my plot, but struggling with grouping the levels:
........................................................................................................................................................................................................
# plot
ggplot(data = fig2b_data, aes(x = adj_or, y = subgroup_level)) +
geom_errorbarh(aes(xmax = ci_high, xmin = ci_low, color = col), size = .5, height = .2) +
geom_point(aes(color = col), size = 2) +
theme_bw()
I would like:
_________________________
Sex
Male (ref) x
Female |----x-----|
_______________________________________________________________
AI comorbid
No (ref) x
Yes |----x-----|
_______________________________________________________________
Site
Cambridge(ref) x
Preston |----x-----|
Southampton |----x-----|
Belfast |----x-----|
__________________________________1____________________________
I think this can help. First you have to use melt function to arrange your data based on the target columns you want and then you can plot. Even though, since I dont know what do you want to do, my plot has many points for the subgroup = site and I think this doesn't convey any graphical information.
df <- melt(df,id.vars=c("subgroup",'adj_or','ci_low','ci_high','col'))
ggplot(df, aes(x=adj_or, y=subgroup, color=variable))+
geom_errorbarh(aes(xmax = ci_high, xmin = ci_low, color = col), size = .5,height = .2) +
geom_point(aes(color = col), size = 2) + theme_bw()

Can we color the different rows/covariates/studies in different colors in R forest plots?

Using the forestplot package in the programming language R, I would like to make a forest plot that has each row in a different color. By each row, I mean the boxes and the respective confidence intervals.
Taking an example from the vignette [https://cran.r-project.org/web/packages/forestplot/vignettes/forestplot.html],
library(forestplot)
# Cochrane data from the 'rmeta'-package
cochrane_from_rmeta <-
structure(list(
mean = c(NA, NA, 0.578, 0.165, 0.246, 0.700, 0.348, 0.139, 1.017, NA, 0.531),
lower = c(NA, NA, 0.372, 0.018, 0.072, 0.333, 0.083, 0.016, 0.365, NA, 0.386),
upper = c(NA, NA, 0.898, 1.517, 0.833, 1.474, 1.455, 1.209, 2.831, NA, 0.731)),
.Names = c("mean", "lower", "upper"),
row.names = c(NA, -11L),
class = "data.frame")
tabletext <- cbind(c("", "Study", "Auckland", "Block", "Doran", "Gamsu", "Morrison", "Papageorgiou", "Tauesch", NA, "Summary"),
c("Deaths", "(steroid)", "36", "1", "4", "14", "3", "1", "8", NA, NA),
c("Deaths", "(placebo)", "60", "5", "11", "20", "7", "7", "10", NA, NA),
c("", "OR", "0.58", "0.16", "0.25", "0.70", "0.35", "0.14", "1.02", NA, "0.53"))
forestplot(tabletext, cochrane_from_rmeta, new_page = TRUE,
is.summary = c(TRUE, TRUE, rep(FALSE, 8), TRUE),
clip = c(0.1,2.5), xlog = TRUE,
col = fpColors(box="royalblue",line="darkblue", summary="royalblue"))
I want each study to have its own color (Auckland can be colored blue, Block can be colored red, Doran can be colored green, and so on). I think that this might be accomplished by changing the argument to the fpColors() function.
Is there any way to do this?
Take a look at fpShapesGp. With this, it's possible to color the rows in different colors.
An simple example for your code:
styles <- fpShapesGp(
lines = list(
gpar(col = "black"),
gpar(col = "blue"),
gpar(col = "black"),
gpar(col = "blue"),
gpar(col = "black"),
gpar(col = "blue"),
gpar(col = "black"),
gpar(col = "blue"),
gpar(col = "black"),
gpar(col = "blue"),
gpar(col = "black")
),
box = list(
gpar(fill = "black"),
gpar(fill = "blue"),
gpar(fill = "black"),
gpar(fill = "blue"),
gpar(fill = "black"),
gpar(fill = "blue"),
gpar(fill = "black"),
gpar(fill = "blue"),
gpar(fill = "black"),
gpar(fill = "blue"),
gpar(fill = "black")
)
)
forestplot(tabletext, cochrane_from_rmeta, new_page = TRUE,
is.summary = c(TRUE, TRUE, rep(FALSE, 8), TRUE),
clip = c(0.1,2.5), xlog = TRUE,
shapes_gp = styles)
As a result you get this:

Align Text to geom_vline with varying location

I have a function that creates a histogram with an overlying density plot. The function also displays a red dotted line indicating alpha. Users can indicate the alpha level. Moreover, the count in the histogram will differ as a function of the input data. I want a label indicating alpha = 0.05(for example) next to the red dotted line. The label should always be next to the alpha line and always be near the top of the graph (I did not solve that). I´m aware of Align geom_text to a geom_vline in ggplot2, but they do not provide what I´m looking for (and/or produce error messages, I tried to reduce the size of the label by text=element_text(size=11) as suggested there, but that does not work).
Find below some sample code:
multiverse.p.histogram <- function(dataframe, pvalues, alpha = 0.05){
hist <- ggplot(dataframe, aes(x = p.value)) + geom_histogram(binwidth = 0.01, color = "black",fill = "dodgerblue") + theme_bw() + xlim(0,1) + geom_density(alpha = 0.5, fill = "#FF6666") +xlab("p-value") + ggtitle("Histogram of Multiverse P-Values") + geom_vline(xintercept = alpha, color = "red", linetype = "dashed") +
geom_text(aes(x = alpha, y = 75, label = "Alpha"), color = "red") +
theme(
axis.text = element_text(color = "black"),
axis.line = element_line(colour = "black"),
legend.position = "none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank())
return(hist)
}#close histogram function
#and some sample data
df_multiverse <- structure(list(transformation = c("normal", "normal", "normal",
"normal", "normal", "normal", "normal", "normal", "normal", "normal",
"normal", "normal", "normal", "normal", "normal", "normal", "normal",
"normal", "normal", "normal", "normal", "normal", "normal", "normal",
"normal", "normal", "normal", "normal", "normal", "normal", "normal",
"normal", "normal", "normal", "normal", "normal", "normal", "normal",
"normal", "normal", "normal", "normal", "normal", "normal", "normal",
"normal", "normal", "normal", "normal", "normal"), datatrimming = c("notrimming",
"notrimming", "notrimming", "notrimming", "notrimming", "notrimming",
"notrimming", "notrimming", "notrimming", "notrimming", "notrimming",
"mad", "mad", "mad", "mad", "mad", "mad", "mad", "mad", "mad",
"mad", "mad", "mad", "mad", "mad", "mad", "mad", "mad", "mad",
"mad", "mad", "mad", "mad", "mad", "mad", "mad", "mad", "mad",
"mad", "mad", "mad", "mad", "mad", "mad", "mad", "mad", "mad",
"mad", "mad", "mad"), fixedtrimming = c("min", "min", "min",
"min", "min", "minmax", "minmax", "minmax", "minmax", "minmax",
"nofixedtrimming", "min", "min", "min", "min", "min", "minmax",
"minmax", "minmax", "minmax", "minmax", "nofixedtrimming", "min",
"min", "min", "min", "min", "minmax", "minmax", "minmax", "minmax",
"minmax", "nofixedtrimming", "min", "min", "min", "min", "min",
"minmax", "minmax", "minmax", "minmax", "minmax", "nofixedtrimming",
"min", "min", "min", "min", "min", "minmax"), min = c("0.1",
"0.2", "0.3", "0.4", "0.5", "0.1", "0.2", "0.3", "0.4", "0.5",
NA, "0.1", "0.2", "0.3", "0.4", "0.5", "0.1", "0.2", "0.3", "0.4",
"0.5", NA, "0.1", "0.2", "0.3", "0.4", "0.5", "0.1", "0.2", "0.3",
"0.4", "0.5", NA, "0.1", "0.2", "0.3", "0.4", "0.5", "0.1", "0.2",
"0.3", "0.4", "0.5", NA, "0.1", "0.2", "0.3", "0.4", "0.5", "0.1"
), max = c("4.78103879314337", "4.78103879314337", "4.78103879314337",
"4.78103879314337", "4.78103879314337", "10", "10", "10", "10",
"10", NA, "1.50348972125673", "1.50348972125673", "1.50348972125673",
"1.50348972125673", "1.50348972125673", "10", "10", "10", "10",
"10", NA, "1.6673730851492", "1.6673730851492", "1.6673730851492",
"1.6673730851492", "1.6673730851492", "10", "10", "10", "10",
"10", NA, "1.82875939263309", "1.82875939263309", "1.82875939263309",
"1.82875939263309", "1.82875939263309", "10", "10", "10", "10",
"10", NA, "1.98682907108801", "1.98682907108801", "1.98682907108801",
"1.98682907108801", "1.98682907108801", "10"), DispersionMeasure = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "2", "2", "2", "2", "2",
"2", "2", "2", "2", "2", "2", "2.5", "2.5", "2.5", "2.5", "2.5",
"2.5", "2.5", "2.5", "2.5", "2.5", "2.5", "3", "3", "3", "3",
"3", "3", "3", "3", "3", "3", "3", "3.5", "3.5", "3.5", "3.5",
"3.5", "3.5"), df = c(23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23), t.value = c(-1.96240490816673,
-1.91062435558061, -1.88913858576971, -1.50889838134833, -0.584414818091524,
-1.96240490816673, -1.91062435558061, -1.88913858576971, -1.50889838134833,
-0.584414818091524, -2.01035512741752, -2.32446732021548, -2.32446732021548,
-2.25138730178018, -1.75805360848308, -0.671509667928522, -2.32446732021548,
-2.32446732021548, -2.25138730178018, -1.75805360848308, -0.671509667928522,
-2.32446732021548, -2.07781942947361, -2.04327207374561, -1.96398718960439,
-1.45016152484876, -0.43329653628318, -2.07781942947361, -2.04327207374561,
-1.96398718960439, -1.45016152484876, -0.43329653628318, -2.07781942947361,
-3.1795493150037, -3.14621983607465, -3.03987566457514, -2.35519486220697,
-1.34118074962509, -3.1795493150037, -3.14621983607465, -3.03987566457514,
-2.35519486220697, -1.34118074962509, -3.19618807311348, -3.37575126770368,
-3.33582114002809, -3.25737102188504, -2.65364122964845, -1.74520405186558,
-3.37575126770368), p.value = c(0.0619242560601778, 0.0685974542038329,
0.0715464534237802, 0.14494031195569, 0.564630276572904, 0.0619242560601778,
0.0685974542038329, 0.0715464534237802, 0.14494031195569, 0.564630276572904,
0.056262190757649, 0.0292871811194525, 0.0292871811194525, 0.0342153500184824,
0.0920408256371383, 0.508584931329577, 0.0292871811194525, 0.0292871811194525,
0.0342153500184824, 0.0920408256371383, 0.508584931329577, 0.0292871811194525,
0.049074641173751, 0.0526459198825374, 0.0617296734199745, 0.160514579425126,
0.668835951230964, 0.049074641173751, 0.0526459198825374, 0.0617296734199745,
0.160514579425126, 0.668835951230964, 0.049074641173751, 0.00417775230313281,
0.00452298394363368, 0.00581820793330847, 0.0274164539383892,
0.192956766873482, 0.00417775230313281, 0.00452298394363368,
0.00581820793330847, 0.0274164539383892, 0.192956766873482, 0.00401507276581307,
0.00260719926285416, 0.00287129534969705, 0.00346795018735445,
0.0141919615636613, 0.0942977424474807, 0.00260719926285416),
estimate = c(-0.797956867083461, -0.776801900236937, -0.7455698051489,
-0.444049984838546, -0.10530217843728, -0.797956867083461,
-0.776801900236937, -0.7455698051489, -0.444049984838546,
-0.10530217843728, -0.820469748450972, -0.251308805770323,
-0.251308805770323, -0.251096848307402, -0.226028966303428,
-0.134612249858047, -0.251308805770323, -0.251308805770323,
-0.251096848307402, -0.226028966303428, -0.134612249858047,
-0.251308805770323, -0.265907227757688, -0.261504591915461,
-0.260164781545852, -0.225524157517464, -0.10176195202019,
-0.265907227757688, -0.261504591915461, -0.260164781545852,
-0.225524157517464, -0.10176195202019, -0.265907227757688,
-0.409969137221152, -0.405618224033153, -0.409494543344045,
-0.387356945276789, -0.329354185640372, -0.409969137221152,
-0.405618224033153, -0.409494543344045, -0.387356945276789,
-0.329354185640372, -0.422572659021681, -0.506062313897924,
-0.501186805248218, -0.510763602114717, -0.498830153358464,
-0.447892133899374, -0.506062313897924)), row.names = c("df",
"df1", "df2", "df3", "df4", "df5", "df6", "df7", "df8", "df9",
"df10", "df11", "df12", "df13", "df14", "df15", "df16", "df17",
"df18", "df19", "df20", "df21", "df22", "df23", "df24", "df25",
"df26", "df27", "df28", "df29", "df30", "df31", "df32", "df33",
"df34", "df35", "df36", "df37", "df38", "df39", "df40", "df41",
"df42", "df43", "df44", "df45", "df46", "df47", "df48", "df49"
), class = "data.frame")
#execute function
multiverse.p.histogram(df_multiverse, df_multiverse$p.value)
There are two problems with the code:
The alpha does not display next to the line, but on the line and I had to specify y = 75 manually. Ideally, it should always be shortly underneath the upper border. Finally, I can´t get the text size of the alpha to decrease. I tried nudge_x, but that produces the following error: Warnmeldungen:
1: Removed 2 rows containing missing values (geom_bar).
2: Removed 264 rows containing missing values (geom_text).
Does anyone have suggestions?
Thanks already!
Edit:
Based on the answers, here is my updated code:
multiverse.p.histogram <- function(dataframe, pvalues, alpha = 0.05){
ggplot(dataframe, aes(x = p.value)) +
geom_histogram(binwidth = 0.01, color = "black", fill = "dodgerblue") + #plots the histogram
geom_density(alpha = 0.5, fill = "#FF6666") + #adds densityplot
geom_vline(xintercept = alpha, color = "red", linetype = "dashed") + #adds alpha line
geom_text(x = alpha, hjust = -0.5, #adds alpha symbol next to line
y = Inf,
label = expression(paste(alpha)),
color = "red", check_overlap = TRUE,
vjust = "inward") +
ggtitle("Histogram of Multiverse P-Values") +
xlab("p-value") +
theme_bw() +
theme(axis.text = element_text(color = "black"),
axis.line = element_line(colour = "black"),
legend.position = "none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank())
}
Here's a few tweaks to your function that may help:
Find out where the approximate upper limit of your plot will be by using the base R hist function. Use this as the position for alpha, then set the upper y limit as a small multiple of that to ensure everything fits nicely.
You only need a single alpha label, so don't map the text to an aesthetic. You can use x and y positions directly.
Use hjust to adjust your text position.
It makes your code easier to read and debug if you arrange the plot code so it all fits neatly across a single screen and is in a predictable order (I like ggplot then geoms then scales then lims, then labels then themes, but whatever order works best for you, stick to a consistent scheme.
multiverse.p.histogram <- function(dataframe, pvalues, alpha = 0.05)
{
upper <- max(hist(dataframe$p.value, breaks = seq(0, 1, 0.01))$counts)
ggplot(dataframe, aes(x = p.value)) +
geom_histogram(binwidth = 0.01, color = "black", fill = "dodgerblue") +
geom_density(alpha = 0.5, fill = "#FF6666") +
geom_vline(xintercept = alpha, color = "red", linetype = "dashed") +
geom_text(x = alpha, hjust = -0.25,
y = upper,
label = "Alpha",
color = "red", check_overlap = TRUE) +
coord_cartesian(xlim = c(0, 1)) +
xlim(-0.01, 1) +
ylim(0, upper * 1.1) +
ggtitle("Histogram of Multiverse P-Values") +
xlab("p-value") +
theme_bw() +
theme(axis.text = element_text(color = "black"),
axis.line = element_line(colour = "black"),
legend.position = "none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank())
}

Not able to print forestplot in high resolution format in R

I need to create a forestplot of high resolution. I used the forestplot() function from library(forestplot) to create my plot, and then attempted to use the tiff() function to create a high resolution image for publication. However, my image turned blank.
It works if I export directly from R but not as high resolution as it was supposed to.
library(forestplot)
df <- structure(list(
mean = c(NA, 0.22, 0.20, 0.27),
lower = c(NA, 0.05, 0.04, 0.01),
upper = c(NA, 0.95, 1.08, 9.12)),
.Names = c("mean", "lower", "upper"),
row.names = c(NA, -4L),
class = "data.frame")
tabletext <- cbind(
c("", "Pooled", "Group 1", "Group 2"),
c("N", "4334", "3354", "980"),
c("HR (95% CI)", "0.22 (0.05, 0.95)", "0.20 (0.04, 1.08)", "0.27 (0.01, 9.12)"),
c("p-value", "0.042", "0.061", "0.467")
)
ggfp <- forestplot(tabletext,
df,
new_page = TRUE,
is.summary = c(TRUE, rep(FALSE, 3)),
clip = c(0, 2),
colgap = unit(5, "mm"),
line.margin = unit(2, "mm"),
lineheight = unit(1, "in"),
txt_gp = fpTxtGp(label = gpar(cex = 1),
ticks = gpar(cex = 1)),
align = c("l", "c", "c", "c"),
boxsize = 0.2,
xticks = seq(0, 2.0, 0.5),
zero = 1,
col = fpColors(box = "royalblue",
line = "darkblue"),
mar = unit(c(-1, 0.5, -2, 0.5), "in"))
tiff("forestplot.tiff", units = "in", width = 9, height = 7, res = 300)
ggfp
dev.off()
The file was created but it was a blank page
This works for me (output file is 17MB):
library(forestplot)
setwd("/path/to/directory/for/plot")
df <- structure(list(
mean = c(NA, 0.22, 0.20, 0.27),
lower = c(NA, 0.05, 0.04, 0.01),
upper = c(NA, 0.95, 1.08, 9.12)),
.Names = c("mean", "lower", "upper"),
row.names = c(NA, -4L),
class = "data.frame")
tabletext <- cbind(
c("", "Pooled", "Group 1", "Group 2"),
c("N", "4334", "3354", "980"),
c("HR (95% CI)", "0.22 (0.05, 0.95)", "0.20 (0.04, 1.08)", "0.27 (0.01, 9.12)"),
c("p-value", "0.042", "0.061", "0.467")
)
tiff("forestplot.tiff", units = "in", width = 9, height = 7, res = 300)
forestplot(tabletext,
df,
new_page = TRUE,
is.summary = c(TRUE, rep(FALSE, 3)),
clip = c(0, 2),
colgap = unit(5, "mm"),
line.margin = unit(2, "mm"),
lineheight = unit(1, "in"),
txt_gp = fpTxtGp(label = gpar(cex = 1),
ticks = gpar(cex = 1)),
align = c("l", "c", "c", "c"),
boxsize = 0.2,
xticks = seq(0, 2.0, 0.5),
zero = 1,
col = fpColors(box = "royalblue",
line = "darkblue"),
mar = unit(c(-1, 0.5, -2, 0.5), "in"))
dev.off()

Why ggplot2 geom_hlines plots more than intended?

Here is a sample of the dataframe I am working with.
> head(tbl[,c('logFC', 'CI_L', 'CI_R', "adj_P_Value","gene",'Group1','Group2', 'Study_ID')])
logFC CI_L CI_R adj_P_Value gene Group1 Group2 Study_ID
1 -0.09017596 -0.43955752 0.25920561 1 CD244 Male Female GSE2461
2 0.08704844 -0.26134341 0.43544028 1 CD244 ulcerative colitis irritable bowel syndrome GSE2461
3 -0.03501474 -0.12677636 0.05674688 1 CD244 nonlesional skin lesional skin GSE27887
4 0.01096914 -0.08064105 0.10257932 1 CD244 pretreatment posttreatment GSE27887
5 -0.03707265 -0.12407201 0.04992672 1 CD244 Infliximab Before treatment GSE42296
6 0.07644834 -0.02849309 0.18138977 1 CD244 Responder Nonresponder GSE42296
> dput(droplevels(head(tbl, 4)))
structure(list(Probe_gene = c("211828_s_at", "213107_at", "213109_at",
"211828_s_at"), logFC = c(0.299038590078202, 0.110797898105632,
0.183214738942169, -0.733505457149486), CI_L = c(-0.0332844208935414,
-0.246475718463096, -0.103358698007331, -1.06488707237429), CI_R = c(0.631361601049945,
0.46807151467436, 0.469788175891669, -0.402123841924678), AveExpr = c(7.38827278419383,
7.83576862202959, 6.68411901305011, 7.38827278419383), t = c(2.08930195860002,
0.720053829585981, 1.48442706763586, -5.13936340603241), P_Value = c(0.0714526369900392,
0.492771856681782, 0.177447421180599, 0.000998740960213292),
adj_P_Value = c(1, 1, 1, 1), B = c(-4.07430683864883, -5.56181503167371,
-4.83144498851773, -0.294306065125513), gene = c("TNIK",
"TNIK", "TNIK", "TNIK"), Study_ID = c("GSE2461", "GSE2461",
"GSE2461", "GSE2461"), Group1 = c("Male", "Male", "Male",
"ulcerative colitis"), Group2 = c("Female", "Female", "Female",
"irritable bowel syndrome"), Study_ID = c("GSE2461", "GSE2461",
"GSE2461", "GSE2461"), Disease = c("irritable bowel syndrome; ulcerative colitis",
"irritable bowel syndrome; ulcerative colitis", "irritable bowel syndrome; ulcerative colitis",
"irritable bowel syndrome; ulcerative colitis"), DOID = c(9778L,
9778L, 9778L, 9778L), Title = c("Control (IBS) & Ulcerative colitis (UC) subjects",
"Control (IBS) & Ulcerative colitis (UC) subjects", "Control (IBS) & Ulcerative colitis (UC) subjects",
"Control (IBS) & Ulcerative colitis (UC) subjects"), GEO_Platform_ID = c("GPL96",
"GPL96", "GPL96", "GPL96"), Platform = c("Affymetrix Human U133A Array",
"Affymetrix Human U133A Array", "Affymetrix Human U133A Array",
"Affymetrix Human U133A Array"), PMID = c(0L, 0L, 0L, 0L),
Organism = c("Homo sapiens", "Homo sapiens", "Homo sapiens",
"Homo sapiens"), Data_Type = c("RNA", "RNA", "RNA", "RNA"
), Biomaterial = c("Colonic Mucosal biopsy", "Colonic Mucosal biopsy",
"Colonic Mucosal biopsy", "Colonic Mucosal biopsy"), Study_Type = c("in vivo",
"in vivo", "in vivo", "in vivo"), Samples = c(8L, 8L, 8L,
8L), Time_Point = c("Baseline", "Baseline", "Baseline", "Baseline"
), Treatment = c("NA", "NA", "NA", "NA"), Treatment_Protocol = c("NA",
"NA", "NA", "NA"), Raw_Data = c(0L, 0L, 0L, 0L), Notes = c("controls are IBS, not healty",
"controls are IBS, not healty", "controls are IBS, not healty",
"controls are IBS, not healty"), ylab = c("Female → Male",
"Female → Male", "Female → Male", "irritable bowel syndrome → ulcerative colitis"
)), .Names = c("Probe_gene", "logFC", "CI_L", "CI_R", "AveExpr",
"t", "P_Value", "adj_P_Value", "B", "gene", "Study_ID", "Group1",
"Group2", "Study_ID", "Disease", "DOID", "Title", "GEO_Platform_ID",
"Platform", "PMID", "Organism", "Data_Type", "Biomaterial", "Study_Type",
"Samples", "Time_Point", "Treatment", "Treatment_Protocol", "Raw_Data",
"Notes", "ylab"), row.names = c(NA, 4L), class = "data.frame")
I am using this to construct a plot that has the GSE # (Study_ID), followed by the contrast (Group1 vs Group2) on the y-axis, and logFC as the x-axis. I want to plot a horizontal line between each of the different GSE #'s for visual clarity, but my code doesn't seem to be working.
datasetList = tbl$Study_ID
hLines =(which(duplicated(datasetList) == FALSE) - 0.5)
tbl$ylab <- paste(tbl$Group2," \U2192 ", tbl$Group1, sep = "")
p <- ggplot(data = tbl, aes(x = logFC, y = Probe_gene, group = Study_ID)) +
geom_point() +
geom_vline(xintercept = log(0.5,2), size = 0.2) +
geom_vline(xintercept = log(2/3,2), size = 0.2) +
geom_vline(xintercept = log(1.5,2), size = 0.2) +
geom_vline(xintercept = log(2,2), size = 0.2) +
geom_hline(yintercept = hLines) +
labs(title = tbl$gene, y = "Contrasts", x = bquote(~Log[2]~'(Fold Change)')) +
geom_errorbarh(aes(x = logFC, xmin = CI_L, xmax = CI_R), height = .1) +
geom_point(aes(colour = cut(adj_P_Value, c(-Inf, 0.01, 0.05, Inf)))) +
scale_color_manual(name = "P Value",
values = c("(-Inf,0.01]" = "red",
"(0.01,0.05)" = "orange",
"(0.05, Inf]" = "black"),
labels = c("<= 0.01", "0.01 < P Value <= 0.05", "> 0.05")) +
#theme_bw()+
theme(axis.text.y = element_blank(), strip.text.y = element_text(angle = 180),
panel.spacing.y = unit(0,'lines'), axis.ticks.y = element_blank()) +
facet_grid(Study_ID+ylab~ ., scales = 'free', space = 'free', switch = 'both')
p
For some reason with the code I have now, ggplot prints many more horizontal lines than I need. It is printing a line in between each GSE #, when I only need it to print a line in between the unique GSE #'s. What I am doing wrong? hLines contains the y-intercepts of where the lines should go.
P.S. As a bit of a side question, if anyone knows of a way for me to specify the shapes that appears (similar to how I specify the colors), that would be very appreciated. In reference to the colors, I need red circles, orange squares, and black crosses for the same conditions that appear in the scale_color_manual() function.

Resources