I have a function that creates a histogram with an overlying density plot. The function also displays a red dotted line indicating alpha. Users can indicate the alpha level. Moreover, the count in the histogram will differ as a function of the input data. I want a label indicating alpha = 0.05(for example) next to the red dotted line. The label should always be next to the alpha line and always be near the top of the graph (I did not solve that). I´m aware of Align geom_text to a geom_vline in ggplot2, but they do not provide what I´m looking for (and/or produce error messages, I tried to reduce the size of the label by text=element_text(size=11) as suggested there, but that does not work).
Find below some sample code:
multiverse.p.histogram <- function(dataframe, pvalues, alpha = 0.05){
hist <- ggplot(dataframe, aes(x = p.value)) + geom_histogram(binwidth = 0.01, color = "black",fill = "dodgerblue") + theme_bw() + xlim(0,1) + geom_density(alpha = 0.5, fill = "#FF6666") +xlab("p-value") + ggtitle("Histogram of Multiverse P-Values") + geom_vline(xintercept = alpha, color = "red", linetype = "dashed") +
geom_text(aes(x = alpha, y = 75, label = "Alpha"), color = "red") +
theme(
axis.text = element_text(color = "black"),
axis.line = element_line(colour = "black"),
legend.position = "none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank())
return(hist)
}#close histogram function
#and some sample data
df_multiverse <- structure(list(transformation = c("normal", "normal", "normal",
"normal", "normal", "normal", "normal", "normal", "normal", "normal",
"normal", "normal", "normal", "normal", "normal", "normal", "normal",
"normal", "normal", "normal", "normal", "normal", "normal", "normal",
"normal", "normal", "normal", "normal", "normal", "normal", "normal",
"normal", "normal", "normal", "normal", "normal", "normal", "normal",
"normal", "normal", "normal", "normal", "normal", "normal", "normal",
"normal", "normal", "normal", "normal", "normal"), datatrimming = c("notrimming",
"notrimming", "notrimming", "notrimming", "notrimming", "notrimming",
"notrimming", "notrimming", "notrimming", "notrimming", "notrimming",
"mad", "mad", "mad", "mad", "mad", "mad", "mad", "mad", "mad",
"mad", "mad", "mad", "mad", "mad", "mad", "mad", "mad", "mad",
"mad", "mad", "mad", "mad", "mad", "mad", "mad", "mad", "mad",
"mad", "mad", "mad", "mad", "mad", "mad", "mad", "mad", "mad",
"mad", "mad", "mad"), fixedtrimming = c("min", "min", "min",
"min", "min", "minmax", "minmax", "minmax", "minmax", "minmax",
"nofixedtrimming", "min", "min", "min", "min", "min", "minmax",
"minmax", "minmax", "minmax", "minmax", "nofixedtrimming", "min",
"min", "min", "min", "min", "minmax", "minmax", "minmax", "minmax",
"minmax", "nofixedtrimming", "min", "min", "min", "min", "min",
"minmax", "minmax", "minmax", "minmax", "minmax", "nofixedtrimming",
"min", "min", "min", "min", "min", "minmax"), min = c("0.1",
"0.2", "0.3", "0.4", "0.5", "0.1", "0.2", "0.3", "0.4", "0.5",
NA, "0.1", "0.2", "0.3", "0.4", "0.5", "0.1", "0.2", "0.3", "0.4",
"0.5", NA, "0.1", "0.2", "0.3", "0.4", "0.5", "0.1", "0.2", "0.3",
"0.4", "0.5", NA, "0.1", "0.2", "0.3", "0.4", "0.5", "0.1", "0.2",
"0.3", "0.4", "0.5", NA, "0.1", "0.2", "0.3", "0.4", "0.5", "0.1"
), max = c("4.78103879314337", "4.78103879314337", "4.78103879314337",
"4.78103879314337", "4.78103879314337", "10", "10", "10", "10",
"10", NA, "1.50348972125673", "1.50348972125673", "1.50348972125673",
"1.50348972125673", "1.50348972125673", "10", "10", "10", "10",
"10", NA, "1.6673730851492", "1.6673730851492", "1.6673730851492",
"1.6673730851492", "1.6673730851492", "10", "10", "10", "10",
"10", NA, "1.82875939263309", "1.82875939263309", "1.82875939263309",
"1.82875939263309", "1.82875939263309", "10", "10", "10", "10",
"10", NA, "1.98682907108801", "1.98682907108801", "1.98682907108801",
"1.98682907108801", "1.98682907108801", "10"), DispersionMeasure = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "2", "2", "2", "2", "2",
"2", "2", "2", "2", "2", "2", "2.5", "2.5", "2.5", "2.5", "2.5",
"2.5", "2.5", "2.5", "2.5", "2.5", "2.5", "3", "3", "3", "3",
"3", "3", "3", "3", "3", "3", "3", "3.5", "3.5", "3.5", "3.5",
"3.5", "3.5"), df = c(23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23), t.value = c(-1.96240490816673,
-1.91062435558061, -1.88913858576971, -1.50889838134833, -0.584414818091524,
-1.96240490816673, -1.91062435558061, -1.88913858576971, -1.50889838134833,
-0.584414818091524, -2.01035512741752, -2.32446732021548, -2.32446732021548,
-2.25138730178018, -1.75805360848308, -0.671509667928522, -2.32446732021548,
-2.32446732021548, -2.25138730178018, -1.75805360848308, -0.671509667928522,
-2.32446732021548, -2.07781942947361, -2.04327207374561, -1.96398718960439,
-1.45016152484876, -0.43329653628318, -2.07781942947361, -2.04327207374561,
-1.96398718960439, -1.45016152484876, -0.43329653628318, -2.07781942947361,
-3.1795493150037, -3.14621983607465, -3.03987566457514, -2.35519486220697,
-1.34118074962509, -3.1795493150037, -3.14621983607465, -3.03987566457514,
-2.35519486220697, -1.34118074962509, -3.19618807311348, -3.37575126770368,
-3.33582114002809, -3.25737102188504, -2.65364122964845, -1.74520405186558,
-3.37575126770368), p.value = c(0.0619242560601778, 0.0685974542038329,
0.0715464534237802, 0.14494031195569, 0.564630276572904, 0.0619242560601778,
0.0685974542038329, 0.0715464534237802, 0.14494031195569, 0.564630276572904,
0.056262190757649, 0.0292871811194525, 0.0292871811194525, 0.0342153500184824,
0.0920408256371383, 0.508584931329577, 0.0292871811194525, 0.0292871811194525,
0.0342153500184824, 0.0920408256371383, 0.508584931329577, 0.0292871811194525,
0.049074641173751, 0.0526459198825374, 0.0617296734199745, 0.160514579425126,
0.668835951230964, 0.049074641173751, 0.0526459198825374, 0.0617296734199745,
0.160514579425126, 0.668835951230964, 0.049074641173751, 0.00417775230313281,
0.00452298394363368, 0.00581820793330847, 0.0274164539383892,
0.192956766873482, 0.00417775230313281, 0.00452298394363368,
0.00581820793330847, 0.0274164539383892, 0.192956766873482, 0.00401507276581307,
0.00260719926285416, 0.00287129534969705, 0.00346795018735445,
0.0141919615636613, 0.0942977424474807, 0.00260719926285416),
estimate = c(-0.797956867083461, -0.776801900236937, -0.7455698051489,
-0.444049984838546, -0.10530217843728, -0.797956867083461,
-0.776801900236937, -0.7455698051489, -0.444049984838546,
-0.10530217843728, -0.820469748450972, -0.251308805770323,
-0.251308805770323, -0.251096848307402, -0.226028966303428,
-0.134612249858047, -0.251308805770323, -0.251308805770323,
-0.251096848307402, -0.226028966303428, -0.134612249858047,
-0.251308805770323, -0.265907227757688, -0.261504591915461,
-0.260164781545852, -0.225524157517464, -0.10176195202019,
-0.265907227757688, -0.261504591915461, -0.260164781545852,
-0.225524157517464, -0.10176195202019, -0.265907227757688,
-0.409969137221152, -0.405618224033153, -0.409494543344045,
-0.387356945276789, -0.329354185640372, -0.409969137221152,
-0.405618224033153, -0.409494543344045, -0.387356945276789,
-0.329354185640372, -0.422572659021681, -0.506062313897924,
-0.501186805248218, -0.510763602114717, -0.498830153358464,
-0.447892133899374, -0.506062313897924)), row.names = c("df",
"df1", "df2", "df3", "df4", "df5", "df6", "df7", "df8", "df9",
"df10", "df11", "df12", "df13", "df14", "df15", "df16", "df17",
"df18", "df19", "df20", "df21", "df22", "df23", "df24", "df25",
"df26", "df27", "df28", "df29", "df30", "df31", "df32", "df33",
"df34", "df35", "df36", "df37", "df38", "df39", "df40", "df41",
"df42", "df43", "df44", "df45", "df46", "df47", "df48", "df49"
), class = "data.frame")
#execute function
multiverse.p.histogram(df_multiverse, df_multiverse$p.value)
There are two problems with the code:
The alpha does not display next to the line, but on the line and I had to specify y = 75 manually. Ideally, it should always be shortly underneath the upper border. Finally, I can´t get the text size of the alpha to decrease. I tried nudge_x, but that produces the following error: Warnmeldungen:
1: Removed 2 rows containing missing values (geom_bar).
2: Removed 264 rows containing missing values (geom_text).
Does anyone have suggestions?
Thanks already!
Edit:
Based on the answers, here is my updated code:
multiverse.p.histogram <- function(dataframe, pvalues, alpha = 0.05){
ggplot(dataframe, aes(x = p.value)) +
geom_histogram(binwidth = 0.01, color = "black", fill = "dodgerblue") + #plots the histogram
geom_density(alpha = 0.5, fill = "#FF6666") + #adds densityplot
geom_vline(xintercept = alpha, color = "red", linetype = "dashed") + #adds alpha line
geom_text(x = alpha, hjust = -0.5, #adds alpha symbol next to line
y = Inf,
label = expression(paste(alpha)),
color = "red", check_overlap = TRUE,
vjust = "inward") +
ggtitle("Histogram of Multiverse P-Values") +
xlab("p-value") +
theme_bw() +
theme(axis.text = element_text(color = "black"),
axis.line = element_line(colour = "black"),
legend.position = "none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank())
}
Here's a few tweaks to your function that may help:
Find out where the approximate upper limit of your plot will be by using the base R hist function. Use this as the position for alpha, then set the upper y limit as a small multiple of that to ensure everything fits nicely.
You only need a single alpha label, so don't map the text to an aesthetic. You can use x and y positions directly.
Use hjust to adjust your text position.
It makes your code easier to read and debug if you arrange the plot code so it all fits neatly across a single screen and is in a predictable order (I like ggplot then geoms then scales then lims, then labels then themes, but whatever order works best for you, stick to a consistent scheme.
multiverse.p.histogram <- function(dataframe, pvalues, alpha = 0.05)
{
upper <- max(hist(dataframe$p.value, breaks = seq(0, 1, 0.01))$counts)
ggplot(dataframe, aes(x = p.value)) +
geom_histogram(binwidth = 0.01, color = "black", fill = "dodgerblue") +
geom_density(alpha = 0.5, fill = "#FF6666") +
geom_vline(xintercept = alpha, color = "red", linetype = "dashed") +
geom_text(x = alpha, hjust = -0.25,
y = upper,
label = "Alpha",
color = "red", check_overlap = TRUE) +
coord_cartesian(xlim = c(0, 1)) +
xlim(-0.01, 1) +
ylim(0, upper * 1.1) +
ggtitle("Histogram of Multiverse P-Values") +
xlab("p-value") +
theme_bw() +
theme(axis.text = element_text(color = "black"),
axis.line = element_line(colour = "black"),
legend.position = "none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank())
}
I am trying to plot parameter estimates from 2 different models on the same plot with confidence intervals. The values for each parameter are overlapping and I would like to have the values for each x value (parameter) side by side.
Data
ci_glm<- dput(head(ci_glm))
structure(list(parameter = c(-0.989960390831752, 0.23802371512626,
-0.0616305811832892, -1.19145279737722, -0.39565631764158, -2.70713419498971
), lower = c(-1.76343111098339, -0.0384145902617419, -0.338256948651047,
-2.41452042708909, -1.56076899496423, -3.8540190563328), upper = c(0.037528250100757,
0.514419550437131, 0.214814325315589, -0.0336965457336884, 0.639688082685478,
-1.70838885452134), par = c("Intercept", "Vessel 2", "Vessel 3",
"10", "11", "13")), .Names = c("parameter", "lower", "upper",
"par"), row.names = c("(Intercept)", "as.factor(CruiseID)201502",
"as.factor(CruiseID)201503", "as.factor(Stratum)10", "as.factor(Stratum)11",
"as.factor(Stratum)13"), class = "data.frame")
ci_boot<-dput(head(ci_boot2))
structure(list(parameter = c(-1.23409264614473, NA, NA, -0.434928403121171,
-2.74151010196932, -0.361626461606862), lower = c(-1.99928925205138,
NA, NA, -2.16613527555384, -1.38979210854727, -3.9529283095427
), upper = c(-0.118870916073164, NA, NA, 0.0238247660480798,
0.860847808652077, -1.44129237641604), par = c("Intercept", "Vessel 2",
"Vessel 3", "10", "11", "13"), diff = c(1.88041833597822, NA,
NA, 2.18996004160192, 2.25063991719935, 2.51163593312666), diff1 = c(0.76519660590665,
NA, NA, 1.73120687243267, -1.35171799342205, 3.59130184793584
), diff2 = c(1.11522173007157, NA, NA, 0.458753169169251, 3.6023579106214,
-1.07966591480918)), .Names = c("parameter", "lower", "upper",
"par", "diff", "diff1", "diff2"), row.names = c("(Intercept)",
"1", "11", "as.factor(Stratum)10", "as.factor(Stratum)11", "as.factor(Stratum)13"
), class = "data.frame")
plot
require(plotrix)
plot(ci_glm$parameter, type='n', ylab="Parameter Estimate",xlab="Parameter",pch=20, axes=F,ylim=c(-4,4))
axis(1, at=1:6, labels=unique(ci_glm$par),las=3,cex.axis=0.8)
axis(2)
abline(h=0,lty=2,col="light gray")
plotCI(ci_glm$parameter,y=NULL,uiw = abs(ci_glm$upper-ci_glm$parameter),liw=abs(ci_glm$parameter-ci_glm$lower),
ui=NULL,li=NULL,err="y", sfrac=0.01,gap=0,slty=par("lty"),
add=T,scol="black",pch=18,pt.bg=par("bg",col="black"))
par(new=T)
plot(ci_boot$parameter, type='n',ylab="Parameter Estimate",xlab="Parameter",pch=20, axes=F,ylim=c(-4,4),col="red")
axis(2)
abline(h=0,lty=2,col="light gray")
plotCI(ci_boot$parameter,y=NULL,uiw = abs(ci_boot$upper-ci_boot$parameter),liw=abs(ci_boot$parameter-ci_boot$lower),
ui=NULL,li=NULL,err="y", sfrac=0.01,gap=0,slty=par("lty"),
add=T,scol="red",pch=18,pt.bg=par("bg",col="red"))
leg.text<-c("GLM","GLMM")
legend("bottomright",leg.text,lty=c(1,1),pch=c(20,20),text.col="black",col=c("black","red"),bty="y",cex=.8)
R version 3.3.1 (2016-06-21)
Platform: i386-w64-mingw32/i386 (32-bit)
Running under: Windows 7 x64 (build 7601) Service Pack 1
If you are willing to switch to using ggplot2, this can be done easily with position_dodge(), as seen here. I am also using dplyr to combine the two model outputs into one plottable data.frame.
toPlot <-
bind_rows(boot = ci_boot %>% mutate(Parameter = row.names(.))
, glm = ci_glm %>% mutate(Parameter = row.names(.))
, .id = "Model")
ggplot(
toPlot
, aes(x = Parameter
, y = parameter
, ymin = lower
, ymax = upper
, col = Model)) +
geom_linerange(position = position_dodge(0.2)) +
geom_point(position = position_dodge(0.2)) +
theme(axis.text.x = element_text(angle = 45
, hjust = 1))
Note that, for a couple of your bootstrap model parameters, the estimate is somehow outside the range of your confidence interval. You will likely want to clean up the labels etc. (and either remove or supply values for 1 and 11), but this addresses the overlap question.