Plotting normal distribution density plot for hazard ratio in R

Plotting normal distribution density plot for hazard ratio in R - r

I'm trying to plot 2 normal distribution density plots for null and alternative hazard ratios of 1 and 0.65, respectively, to replicate an example (plot attached). Here's my code so far but it doesn't makes sense to me to have negative values for hazard ratios, but when I don't have negative values, the distributions are cut off. So I know I'm doing something wrong here. Thanks!
x <- seq(-2, 2, length.out = 100000)
df <- do.call(rbind,
list(data.frame(x=x, y=dnorm(x, mean = log(1), sd = sqrt(1/60 + 1/60)), id="H0, HR = 1"),
data.frame(x=x, y=dnorm(x, mean = log(0.65), sd = sqrt(1/60 + 1/60)), id="H1, HR = 0.65")))
vline <- 0.65
p1 <- ggplot(df, aes(x, y, group = id, color = id)) +
geom_line() +
geom_area(aes(fill = id),
data = ~ subset(., (id == "H1, HR = 0.65" & x > (vline)) | (id == "H0, HR = 1" & x < (vline))),
alpha = 0.3) +
geom_vline(xintercept = vline, linetype = "dashed") +
labs(x = "log(Hazard Ratio)", y = 'Density') + xlim(-2, 2) +
guides(fill = "none", color = guide_legend(override.aes = list(fill = "white"))) +
theme_classic() +
theme(legend.title=element_text(size=10), legend.position = c(0.8, 0.4),
legend.text = element_text(size = 10),
axis.line.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank()
) +
scale_color_manual(name = '', values = c('red', 'blue')) +
scale_fill_manual(values = c('red', 'blue'))
The plot I'm trying to replicate

This gets reasonably close to the image that you have posted.
You should not use the log() of the means, but rather the means as is. Moreover if you use the normal distribution, you assume that parameters can take any value between -Inf and Inf, albeit with very small densities far from the mean. Therefore, you cannot expect all values to be positive. If you would like your values to be bounded by 0, then you should use a gamma distribution instead.
x <- seq(-2, 2, length.out = 1000)
df <- do.call(rbind,
list(data.frame(x=x, y=dnorm(x, mean = 1, sd = sqrt(1/50)), id="H0, HR = 1"),
data.frame(x=x, y=dnorm(x, mean = 0.65, sd = sqrt1/50)), id="H1, HR = 0.65")))
vline <- 0.65
ggplot(df, aes(x, y, group = id, color = id)) +
geom_line() +
geom_area(aes(fill = id),
data = ~ subset(., (id == "H1, HR = 0.65" & x > (vline)) | (id == "H0, HR = 1" & x < (vline))),
alpha = 0.3) +
geom_vline(xintercept = vline, linetype = "dashed") +
labs(x = "log(Hazard Ratio)", y = 'Density') + xlim(-2, 2) +
guides(fill = "none", color = guide_legend(override.aes = list(fill = "white"))) +
theme_classic() +
theme(legend.title=element_text(size=10), legend.position = c(0.8, 0.4),
legend.text = element_text(size = 10),
axis.line.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank()
) +
scale_color_manual(name = '', values = c('red', 'blue')) +
scale_fill_manual(values = c('red', 'blue')) +
scale_x_continuous(breaks = seq(-0.3, 2.1, 0.3),
limits = c(-0.3, 2.1))

Related

Setting the same bar/grid width for multiple barplots arranged together?

So I am trying to arrange four bar plots together so I can visually compare their values for a range of sites throughout different periods that I am analyzing. As the data comes from four different datasets (one for each period) I have tried to do this by making a separate plot for each period that I am looking at and then arranging them together with ggarrange, but one of them (Results4) has wider bars (and gridlines) than the rest of them.
I just can't seem to figure out how I can make sure that the bars are all the same width for each of the plots, so I hope anyone here has an idea!
This is what I have:
Results1 <- ggplot(DataPC, aes(SITE, PS_score, group = 1)) +
geom_col(colour = "white", fill = "#BC3F4A", width = 1.00) +
theme_bw() +
theme(axis.text=element_text(size = 10, angle = 75, hjust = 1), panel.grid = element_line(size = 0.75)) +
labs(y = "PS Score") +
ylim(0, 5)
Results2 <- ggplot(DataEC, aes(SITE, PS_score, group = 1)) +
geom_col(colour = "white", fill = "#BC3F4A", width = 1.00) +
theme_bw() +
theme(axis.text=element_text(size = 10, angle = 75, hjust = 1), panel.grid = element_line(size = 0.75)) +
labs(y = "PS Score") +
ylim(0, 5)
Results3 <- ggplot(DataLC, aes(SITE, PS_score, group = 1)) +
geom_col(colour = "white", fill = "#BC3F4A", width = 1.00) +
theme_bw() +
theme(axis.text=element_text(size = 10, angle = 75, hjust = 1), panel.grid = element_line(size = 0.75)) +
labs(y = "PS Score") +
ylim(0, 5)
Results4 <- ggplot(DataTC, aes(SITE, PS_score, group = 1)) +
geom_col(colour = "white", fill = "#BC3F4A", width = 1.00) +
theme_bw() +
theme(axis.text=element_text(size = 10, angle = 75, hjust = 1), panel.grid = element_line(size = 0.75)) +
labs(y = "PS Score") +
ylim(0, 5)
ggarrange(Results1, Results2, Results3, Results4,
labels = c("Preclassic", "Early Classic", "Late Classic", "Terminal Classic"), ncol = 2, nrow = 2)

As stefan suggested in the comments, it's probably easiest to just combine your datasets into a single data.frame and then plot using faceting. This allows you to (optionally) enforce a shared x axis.
library(tidyverse)
d1 <- tibble(site = letters[1:4], count = sample(25, 4))
d2 <- tibble(site = letters[3:6], count = sample(25, 4))
bind_rows(
d1 %>% mutate(category = "1"),
d2 %>% mutate(category = "2")
) %>%
ggplot(aes(site, count)) +
geom_col() +
facet_wrap(~category)
Created on 2022-10-21 by the reprex package (v2.0.1)

ggplot poinstrange with multiple categories - change fill color

I have the following plot:
ggplot() +
geom_pointrange(data=data_FA, mapping=aes(x=snr, y=median, ymin=p25, ymax=p75, colour=factor(method), group=method), position = pd) +
geom_hline(yintercept=FA_GT, linetype="dashed", color = "blue") +
theme(legend.title = element_blank(), legend.position = "none", panel.border = element_rect(colour = "gray", fill=NA, size=1),
plot.margin = unit( c(0,0.5,0,0) , units = "lines" )) +
labs( title = "", subtitle = "")
obtained from the following dataset:
For each group (red and blue) codified by the factor method, I want to see red/blue dots and lines with different transparency according to the factor subset. Does anyone know how to do that? In addition, how can I add more separation space between the two groups (red and blue)?
Thank you!

You can just map alpha to subset inside aes:
ggplot(data_FA) +
geom_pointrange(aes(snr, median, ymin = p25, ymax = p75,
colour = factor(method), group = method,
alpha = subset),
position = pd) +
geom_hline(yintercept = FA_GT, linetype = "dashed", color = "blue") +
scale_alpha_manual(values = c(0.3, 1)) +
theme_bw() +
theme(legend.position = 'none',
panel.border = element_rect(colour = "gray", fill = NA, size = 1),
plot.margin = unit( c(0,0.5,0,0), units = "lines" )) +
labs(title = "", subtitle = "")
Data
data_FA <- data.frame(X = c("X1", "X1.7", "X1.14", "X1.21"),
snr = "snr10",
subset = c("full", "full", "subset5", "subset5"),
method= c("sc", "trunc", "sc", "trunc"),
median = c(0.4883985, 0.4883985, 0.4923685, 0.4914260),
p25 = c(0.4170183, 0.4170183, 0.4180174, 0.4187472),
p75 = c(0.5617713, 0.5617713, 0.5654203, 0.5661565))
FA_GT <- 0.513
pd <- position_dodge2(width = 1)

Add the quantilies to each line, ggplot2

I am trying to shade the 0.025 and 0.975 quantiles on this graph that has three lines. I have tried geom_area, geom_ribbon, and I cannot highlight every quantile in every line.
Please note that "y" was ignored in this density graph.
example <-data.frame(source=c("Leaflitter","Leaflitter","Leaflitter","Leaflitter",
"Leaflitter","Leaflitter","Leaflitter","Leaflitter","Leaflitter","Leaflitter",
"Biofilm","Biofilm","Biofilm","Biofilm","Biofilm","Biofilm","Biofilm","Biofilm",
"Biofilm","Biofilm","Algae","Algae","Algae","Algae","Algae","Algae","Algae","Algae",
"Algae","Algae"), n=c(1,2,3,4,5,6,7,8,9,10,1,2,3,4,5,6,7,8,9,10,1,2,3,4,5,6,7,8,9,10),
density=c(0.554786934, 0.650578421, 0.039317168, 0.53537613,0.435081982,0.904056941,0.556284164,0.855319434,
0.399169622,0.570246304,0.076722032,0.257427999,0.172736928,0.447424473,0.520976948,0.011720494,0.311348655,
0.120698996,0.016336661,0.331741377, 0.368491034,0.09199358,0.787945904,0.017199397,0.04394107,
0.084222564,0.132367181,0.023981569,0.584493716,0.098012319))
example
One subgroup and quantiles
L <- filter(QPA_G_Feb17, source == "Leaflitter")
L <-as.data.frame(L)
Lq025 <- quantile(L$density, .025)
Lq975 <- quantile(L$density, .975)
ggplot(QPA_G_Feb17, aes(x=density, color=source)) +
labs(y="Density", x="Sorce contribution") +
geom_density(aes(linetype = source), size=1.2) +
scale_color_manual(values=c("#31a354", "#2c7fb8", "#d95f0e")) +
scale_linetype_manual(values = c("solid", "dotted", "longdash")) +
theme_classic()+
ylim(0, 5)+
theme(axis.text.y=element_text(angle=0, size=12, vjust=0.5, color="black")) +
theme(axis.text.x =element_text(angle=0, size=12, vjust=0.5, color="black")) +
theme(axis.title.x = element_text(color="black", size=14))+
theme(axis.title.y = element_text(color="black", size=14))
I would appreciate your help since I have looked in other forums, and there is information to highlight when there is only 1 line.

I think this data is a bit more representative of the data displayed in your plot:
set.seed(50)
QPA_G_Feb17 <- data.frame(density = c(rgamma(400, 2, 10),
rgamma(400, 2.25, 9),
rgamma(400, 5, 7)),
source = rep(c("Algae", "Biofilm", "Leaflitter"),
each = 400))
I find that when you are trying to do something complex or non-standard in ggplot, the best thing to do is calculate the data you wish to plot ahead of time. In this case, we can calculate the density curves and the cumulative densities, including their 0.025 and 0.975 quantiles, and putting them all in a data frame like this:
dens <- lapply(split(QPA_G_Feb17, QPA_G_Feb17$source),
function(x) density(x$density, from = 0, to = 1))
df <- do.call(rbind, mapply(function(x, y) {
data.frame(x = x$x, y = x$y, source = y)
}, dens, names(dens), SIMPLIFY = FALSE))
df <- df %>%
group_by(source) %>%
mutate(cdf = cumsum(y * mean(diff(x))),
lower = cdf < 0.025,
upper = cdf > 0.975)
Now it is easy to plot using geom_area:
ggplot(df, aes(x, y, color = source)) +
geom_area(data = df[df$lower,], aes(fill = source), alpha = 0.5,
position = "identity") +
geom_area(data = df[df$upper,], aes(fill = source), alpha = 0.5,
position = "identity") +
labs(y = "Density", x = "Source contribution") +
geom_line(aes(linetype = source), size = 1.2) +
scale_fill_manual(values = c("#31a354", "#2c7fb8", "#d95f0e")) +
scale_color_manual(values = c("#31a354", "#2c7fb8", "#d95f0e")) +
scale_linetype_manual(values = c("solid", "dotted", "longdash")) +
theme_classic() +
ylim(0, 5) +
xlim(0, 1) +
theme(axis.text.y = element_text(size = 12, vjust = 0.5),
axis.text.x = element_text(size = 12, vjust = 0.5),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14))
Here, the 2.5% and 97.5% extremeties of each density curve are shaded below each line. The exception is in the "Leaflitter` line, which clearly extends out of the 0-1 range that has been plotted in your example.

How to plot multiple time series with a reverse barplot on the secondary axis using ggplot2?

I have a data frame having four columns as shown below (here I just put header of my actual data frame):
df <- tibble(Date=c("2007-05-01", "2007-05-02","2007-05-03", "2007-05-04", "2007-05-05"), Obs = c(0.16,0.15,0.17,0.19,0.14), Sim = c(0.17, 0.11, 0.21, 0.15, 0.13), Rain = c(0.1, 0.11, 0.04,0.21,0.5))
How can I plot the data such that the variables Obs and Sim are plotted on the primary y-axis and Rain is plotted as bars on a reverse secondary axis?
Here is the code I have tried thus far:
ggplot(df, aes(x=as.Date(Date))) +
geom_line(aes(y=Obs, color="red")) +
geom_line(aes(y=Sim, color="green")) +
geom_bar(mapping = aes(y = Rain), stat = "identity") +
scale_y_continuous(name = expression('Soil moisture, m'^"3"*' m'^"-3"),
sec.axis = sec_axis(~ 3 - .*0.5, name = "Precipitation (inch)"))
Here is my expected output:
Edit: Additionally, how can I insert a legend that corresponds to each line (i.e. Obs, Sim, and Rain)?

You can also make two separate plots and stack them on top of each other. This would be useful for people (myself included) who prefer not to use dual-axis plots.
library(tidyverse)
library(lubridate)
library(scales)
df <- tibble(Date = c("2007-05-01", "2007-05-02", "2007-05-03", "2007-05-04", "2007-05-05"),
Obs = c(0.16, 0.15, 0.17, 0.19, 0.14),
Sim = c(0.17, 0.11, 0.21, 0.15, 0.13),
Rain = c(0.10, 0.11, 0.04, 0.21, 0.5))
# convert data to long format
df_long <- df %>%
mutate(Date = as.Date(Date)) %>%
pivot_longer(-Date,
names_to = 'key',
values_to = 'value')
Soil moisture plot
sm1 <- ggplot(data = df_long %>% filter(key != 'Rain'),
aes(x = Date, y = value,
group = key,
shape = key,
linetype = key,
col = key)) +
xlab("") +
ylab(expression('Soil moisture, m'^"3"*' m'^"-3")) +
geom_line(lwd = 0.5) +
geom_point(size = 3, alpha = 0.6) +
scale_color_brewer("", palette = 'Dark2') +
scale_linetype_manual("", values = c(NA, 'solid')) +
scale_shape_manual("", values = c(19, NA)) +
theme_bw(base_size = 16) +
theme(legend.position = "bottom") +
theme(panel.border = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line()) +
theme(axis.title.x = element_blank()) +
theme(legend.key.size = unit(3, 'lines')) +
guides(color = guide_legend(override.aes = list(linetype = c(NA, 1),
alpha = 1.0,
shape = c(19, NA)),
nrow = 1, byrow = TRUE))
Precipitation plot
prec_long <- df_long %>%
filter(key == 'Rain') %>%
rename(Precipitation = matches("Rain"))
maxPrec <- 1.1 * max(prec_long$value, na.rm = TRUE)
p1 <- ggplot(data = prec_long, aes(x = Date, y = value)) +
# use `geom_linerange` to mimic `type = h` in Base R plot
# https://stackoverflow.com/questions/26139878/needle-plot-in-ggplot2
geom_linerange(aes(x = Date,
ymin = 0,
ymax = value),
color = "#2c7fb8",
size = 10) +
xlab("") +
ylab(paste("Precipitation (mm)", sep = "")) +
scale_x_date(position = "top") +
scale_y_reverse(expand = c(0, 0), limits = c(maxPrec, 0)) +
theme_bw(base_size = 16) +
theme(panel.border = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line()) +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank()) +
theme(legend.position = "none")
Stack two plots on top of each other
### `cowplot` or `egg` package would work too
# install.packages("patchwork", dependencies = TRUE)
library(patchwork)
p1 / sm1 +
plot_layout(nrow = 2, heights = c(1, 2)) +
plot_annotation(title = "My plot",
subtitle = "Precipitation and Soil moisture")
Created on 2020-07-26 by the reprex package (v0.3.0)

Here's an approach using geom_rect.
Calculate the ratio between the maximum of the primary and secondary axes.
Store the maximum of the secondary reverse axis.
Plot the rectangles using the ymin as the maximum minus the value times the ratio.
Set the secondary axis ticks as the maximum minus the values divided by the ratio.
I added a BottomOffset parameter you could tweak if you want some extra space at the bottom on the secondary axis. I also went ahead and added the code to change the colors of the axes.
Edit: Now with a legend.
Ratio <- max(c(df$Obs, df$Sim), na.rm = TRUE) / max(df$Rain)
RainMax <- max(df$Rain,na.rm = TRUE)
BottomOffset <- 0.05
ggplot(df, aes(x=as.Date(Date))) +
geom_line(aes(y=Obs, color="1")) +
geom_line(aes(y=Sim, color="2")) +
geom_rect(aes(xmin=as.Date(Date) - 0.1,
xmax = as.Date(Date) + 0.1,
ymin = (BottomOffset + RainMax - Rain) * Ratio,
ymax = (BottomOffset + RainMax) * Ratio,
color = "3"),
fill = "red", show.legend=FALSE) +
geom_hline(yintercept = (BottomOffset + RainMax) * Ratio, color = "red") +
geom_hline(yintercept = 0, color = "black") +
labs(x = "Date", color = "Variable") +
scale_y_continuous(name = expression('Soil moisture, m'^"3"*' m'^"-3"),
sec.axis = sec_axis(~ BottomOffset + RainMax - . / Ratio, name = "Precipitation (inch)"),
expand = c(0,0)) +
scale_color_manual(values = c("1" = "blue", "2" = "green", "3" = "red"),
labels = c("1" = "Obs", "2" = "Sim", "3"= "Rain")) +
theme(axis.line.y.right = element_line(color = "red"),
axis.ticks.y.right = element_line(color = "red"),
axis.text.y.right = element_text(color = "red"),
axis.title.y.right = element_text(color = "red"),
axis.line.y.left = element_line(color = "blue"),
axis.ticks.y.left = element_line(color = "blue"),
axis.text.y.left = element_text(color = "blue"),
axis.title.y.left = element_text(color = "blue"),
legend.position = "bottom")

Adding an additional point to ggplot

This question is a follow-up to this post: previous post
I have 28 variables, M1, M2, ..., M28, for which I compute certain statistics x and y.
library(ggplot2)
df = data.frame(model = factor(paste("M", 1:28, sep = ""), levels=paste("M", 1:28, sep = "")), a = runif(28, 1, 1.05), b = runif(28, 1, 1.05))
levels = seq(0.8, 1.2, 0.05)
Here is the plot:
ggplot(data=df) +
geom_polygon(aes(x=model, y=a, group=1), color = "black", fill = NA) +
geom_polygon(aes(x=model, y=b, group=1), color = "blue", fill = NA) +
coord_polar() +
scale_y_continuous(limits=range(levels), breaks=levels, labels=levels) +
theme(axis.text.y = element_blank(), axis.ticks = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank())
I would like to add a point the the plot, with y-value = 1 for M1 (model1). I tried adding:
geom_point(aes(y = 1, x = "M1"), color = "red", cex = 0.5)
but it doesn't work. Any idea what I am doing wrong?
Thanks for your help!

cex is not an argument for geom_point. Try size, e.g.
geom_point(aes(y = 1, x = "M1"), color = "red", size = 10)

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

Plotting normal distribution density plot for hazard ratio in R - r

Related

Setting the same bar/grid width for multiple barplots arranged together?

ggplot poinstrange with multiple categories - change fill color

Add the quantilies to each line, ggplot2

How to plot multiple time series with a reverse barplot on the secondary axis using ggplot2?

Adding an additional point to ggplot

Categories

Resources