Adding an additional point to ggplot - r

This question is a follow-up to this post: previous post
I have 28 variables, M1, M2, ..., M28, for which I compute certain statistics x and y.
library(ggplot2)
df = data.frame(model = factor(paste("M", 1:28, sep = ""), levels=paste("M", 1:28, sep = "")), a = runif(28, 1, 1.05), b = runif(28, 1, 1.05))
levels = seq(0.8, 1.2, 0.05)
Here is the plot:
ggplot(data=df) +
geom_polygon(aes(x=model, y=a, group=1), color = "black", fill = NA) +
geom_polygon(aes(x=model, y=b, group=1), color = "blue", fill = NA) +
coord_polar() +
scale_y_continuous(limits=range(levels), breaks=levels, labels=levels) +
theme(axis.text.y = element_blank(), axis.ticks = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank())
I would like to add a point the the plot, with y-value = 1 for M1 (model1). I tried adding:
geom_point(aes(y = 1, x = "M1"), color = "red", cex = 0.5)
but it doesn't work. Any idea what I am doing wrong?
Thanks for your help!

cex is not an argument for geom_point. Try size, e.g.
geom_point(aes(y = 1, x = "M1"), color = "red", size = 10)

Related

Plot quarterly data and specify quarters using ggplot in rstudio

I want to plot quarterly data, where the information is saved in the variable yq as: YYYY Q (e..g 2007 Q1), I created by combining Year and Quarter (1, 2, 3, 4):
df$yq = as.yearqtr(paste(df$Year, df$Quarter), "%Y %q")
Looking around on the examples online, I wrote this code but the problem is that it does not let me specify the years I want to display on the x-axis - or rather I can't seem to find a way to apply the breaks for this type of data. I have 15 years of data and would like to only display the label every 3 years, so that the only labels are 2005.Q1, 2008.Q1, 2011.Q1, 2014.Q1, 2017.Q1.
Grateful for tips!
plot = ggplot(df, aes(x = yq, y = rate, color = "red")) +
geom_point() +
geom_line(aes(y = lin, color = "green"), size = 1) +
geom_line(aes(y = quad, color = "brown"), size = 1) +
geom_line(aes(y = cube, color = "blue"), size = 1) +
geom_line(aes(y = log, color = "pink"), size = 1) +
geom_line(aes(y = power_corr, color = "black"), size = 1) +
geom_line(aes(y = exp_corr, color = "yellow"), size = 1) +
scale_color_identity(name = "Models",
breaks = c("red", "green", "brown", "blue", "pink", "black", "yellow"),
labels = c("Observed", "Linear", "Quadratic", "Cubic", "Log", "Power", "Exponential"),
guide = "legend")
plot +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"))+
theme(axis.text.x = element_text(size = 8, angle=-45, hjust = 0.001),
legend.key = element_rect(color = NA, fill = NA))+
labs(title="", x = "Year/quarters", y = "Smoking prevalence")+
scale_x_yearqtr(format='%Y.Q%q')
The data was not provided so we will use a simple example of x and y vectors and use scale_x_yearqtr with the indicated breaks and format arguments.
library(ggplot2)
library(zoo)
# inputs
x <- as.yearqtr(2000) + 0:29
y <- 1:30
qplot(x, y) +
scale_x_yearqtr(breaks = seq(min(x), max(x), by = 3), format = "%Y.Q%q")

ggplot poinstrange with multiple categories - change fill color

I have the following plot:
ggplot() +
geom_pointrange(data=data_FA, mapping=aes(x=snr, y=median, ymin=p25, ymax=p75, colour=factor(method), group=method), position = pd) +
geom_hline(yintercept=FA_GT, linetype="dashed", color = "blue") +
theme(legend.title = element_blank(), legend.position = "none", panel.border = element_rect(colour = "gray", fill=NA, size=1),
plot.margin = unit( c(0,0.5,0,0) , units = "lines" )) +
labs( title = "", subtitle = "")
obtained from the following dataset:
For each group (red and blue) codified by the factor method, I want to see red/blue dots and lines with different transparency according to the factor subset. Does anyone know how to do that? In addition, how can I add more separation space between the two groups (red and blue)?
Thank you!
You can just map alpha to subset inside aes:
ggplot(data_FA) +
geom_pointrange(aes(snr, median, ymin = p25, ymax = p75,
colour = factor(method), group = method,
alpha = subset),
position = pd) +
geom_hline(yintercept = FA_GT, linetype = "dashed", color = "blue") +
scale_alpha_manual(values = c(0.3, 1)) +
theme_bw() +
theme(legend.position = 'none',
panel.border = element_rect(colour = "gray", fill = NA, size = 1),
plot.margin = unit( c(0,0.5,0,0), units = "lines" )) +
labs(title = "", subtitle = "")
Data
data_FA <- data.frame(X = c("X1", "X1.7", "X1.14", "X1.21"),
snr = "snr10",
subset = c("full", "full", "subset5", "subset5"),
method= c("sc", "trunc", "sc", "trunc"),
median = c(0.4883985, 0.4883985, 0.4923685, 0.4914260),
p25 = c(0.4170183, 0.4170183, 0.4180174, 0.4187472),
p75 = c(0.5617713, 0.5617713, 0.5654203, 0.5661565))
FA_GT <- 0.513
pd <- position_dodge2(width = 1)

Plotting normal distribution density plot for hazard ratio in R

I'm trying to plot 2 normal distribution density plots for null and alternative hazard ratios of 1 and 0.65, respectively, to replicate an example (plot attached). Here's my code so far but it doesn't makes sense to me to have negative values for hazard ratios, but when I don't have negative values, the distributions are cut off. So I know I'm doing something wrong here. Thanks!
x <- seq(-2, 2, length.out = 100000)
df <- do.call(rbind,
list(data.frame(x=x, y=dnorm(x, mean = log(1), sd = sqrt(1/60 + 1/60)), id="H0, HR = 1"),
data.frame(x=x, y=dnorm(x, mean = log(0.65), sd = sqrt(1/60 + 1/60)), id="H1, HR = 0.65")))
vline <- 0.65
p1 <- ggplot(df, aes(x, y, group = id, color = id)) +
geom_line() +
geom_area(aes(fill = id),
data = ~ subset(., (id == "H1, HR = 0.65" & x > (vline)) | (id == "H0, HR = 1" & x < (vline))),
alpha = 0.3) +
geom_vline(xintercept = vline, linetype = "dashed") +
labs(x = "log(Hazard Ratio)", y = 'Density') + xlim(-2, 2) +
guides(fill = "none", color = guide_legend(override.aes = list(fill = "white"))) +
theme_classic() +
theme(legend.title=element_text(size=10), legend.position = c(0.8, 0.4),
legend.text = element_text(size = 10),
axis.line.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank()
) +
scale_color_manual(name = '', values = c('red', 'blue')) +
scale_fill_manual(values = c('red', 'blue'))
The plot I'm trying to replicate
This gets reasonably close to the image that you have posted.
You should not use the log() of the means, but rather the means as is. Moreover if you use the normal distribution, you assume that parameters can take any value between -Inf and Inf, albeit with very small densities far from the mean. Therefore, you cannot expect all values to be positive. If you would like your values to be bounded by 0, then you should use a gamma distribution instead.
x <- seq(-2, 2, length.out = 1000)
df <- do.call(rbind,
list(data.frame(x=x, y=dnorm(x, mean = 1, sd = sqrt(1/50)), id="H0, HR = 1"),
data.frame(x=x, y=dnorm(x, mean = 0.65, sd = sqrt1/50)), id="H1, HR = 0.65")))
vline <- 0.65
ggplot(df, aes(x, y, group = id, color = id)) +
geom_line() +
geom_area(aes(fill = id),
data = ~ subset(., (id == "H1, HR = 0.65" & x > (vline)) | (id == "H0, HR = 1" & x < (vline))),
alpha = 0.3) +
geom_vline(xintercept = vline, linetype = "dashed") +
labs(x = "log(Hazard Ratio)", y = 'Density') + xlim(-2, 2) +
guides(fill = "none", color = guide_legend(override.aes = list(fill = "white"))) +
theme_classic() +
theme(legend.title=element_text(size=10), legend.position = c(0.8, 0.4),
legend.text = element_text(size = 10),
axis.line.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank()
) +
scale_color_manual(name = '', values = c('red', 'blue')) +
scale_fill_manual(values = c('red', 'blue')) +
scale_x_continuous(breaks = seq(-0.3, 2.1, 0.3),
limits = c(-0.3, 2.1))

Missing Dots and Line Style in Figure Legend with ggplot2

After running the following commands:
Population <- c("A", "A", "A", "A", "B", "B", "B", "B")
Group <- rep(c("Experimental", "Experimental", "Control", "Control"), 2)
wave <- rep(c("Pretest", "Posttest"), 4)
outcome <- c(-.3, -.2, -.3, .4, -.6, -.5, -.6, .6)
ci <- rep(c(.13, .14), 4)
df <- data.frame(Population, Group, wave, outcome, ci)
df$wave <- factor(df$wave,levels = c('Pretest','Posttest'))
library(ggplot2)
pd <- position_dodge(0.1)
ggplot(df, aes(x = wave, y = outcome, color = interaction(Population, Group), shape = Group, group = interaction(Population, Group))) +
geom_errorbar(aes(ymin = outcome - ci, ymax = outcome + ci), width = .25, position = pd, size=.5) +
geom_line(aes(linetype = Group), position = pd, size=1, show.legend = FALSE) +
geom_point(position = pd, size = 3.5, fill = "white", stroke = 1.25, show.legend = FALSE) +
scale_color_manual(values = c("#000000", "#606060", "#000000", "#606060")) +
scale_shape_manual(values = c(23, 21)) +
coord_cartesian(xlim = c(1.4, 1.6), ylim = c(-.91, .91)) +
labs(title = "Outcomes by Population and Study Group", x = "Time", y = "Outcome\nLower scores denote fewer instances", color = "Population and Study Group") +
theme(plot.title = element_text(hjust = 0.5), axis.text.x = element_text(color = "black"), axis.text.y = element_text(color = "black"), panel.background = element_rect(fill = "#F0F0F0"))
I generate a figure that does not have dots symbols or correct line styles in the legend:
How can I:
add the dots shown in the figure itself into the legend and
have the legend lines reflect that some of dotted lines in the figure?
TYIA.
The simplest way is to create another variable that would reflect the interaction instead of creating it on the fly. If we build the plot step by step, this below gives the dots and errorbars:
library(ggplot2)
pd <- position_dodge(0.1)
df$grp = paste(df$Population,df$Group,sep=".")
g = ggplot(df, aes(x = wave, y = outcome, color = grp, shape = grp))+
geom_errorbar(aes(ymin = outcome - ci, ymax = outcome + ci), width = .25, position = pd, size=.5) +
geom_point(position = pd, size = 3.5, fill = "white", stroke = 1.25) +
scale_color_manual(values = c("#000000", "#000000","#606060", "#606060")) +
scale_shape_manual(values = c(23,21,23,21)) +
coord_cartesian(xlim = c(1.4, 1.6), ylim = c(-.91, .91)) +
labs(title = "Outcomes by Population and Study Group", x = "Time", y = "Outcome\nLower scores denote fewer instances") +
theme(plot.title = element_text(hjust = 0.5), axis.text.x = element_text(color = "black"),
axis.text.y = element_text(color = "black"), panel.background = element_rect(fill = "#F0F0F0"))
print(g)
Then add the line while specifying the legend:
g +
geom_line(inherit.aes=FALSE,aes(x = wave, y = outcome,group=grp,linetype=grp)) +
scale_linetype_manual(values=c("solid","dashed","solid","dashed"))

ggplot2: Adjust legend symbols in overlayed plot

I need to create a plot, in which a histogram gets overlayed by a density. Here is my result so far using some example data:
library("ggplot2")
set.seed(1234)
a <- round(rnorm(10000, 5, 5), 0)
b <- rnorm(10000, 5, 7)
df <- data.frame(a, b)
ggplot(df) +
geom_histogram(aes(x = a, y = ..density.., col = "histogram", linetype = "histogram"), fill = "blue") +
stat_density(aes(x = b, y = ..density.., col = "density", linetype = "density"), geom = "line") +
scale_color_manual(values = c("red", "white"),
breaks = c("density", "histogram")) +
scale_linetype_manual(values = c("solid", "solid")) +
theme(legend.title = element_blank(),
legend.position = c(.75, .75),
legend.text = element_text(size = 15))
Unfortunately I can not figure out how I can change the symbols in the legend properly. The first symbol should be a relatively thick red line and the second symbol should be a blue box without the white line in the middle.
Based on some internet research, I tried to change different things in scale_linetype_manual and further I tried to use override.aes, but I could not figure out how I would have to use it in this specific case.
EDIT - Here is the best solution based on the very helpful answers below.
ggplot(df) +
geom_histogram(aes(x = a, y = ..density.., linetype = "histogram"),
fill = "blue",
# I added the following 2 lines to keep the white colour arround the histogram.
col = "white") +
scale_linetype_manual(values = c("solid", "solid")) +
stat_density(aes(x = b, y = ..density.., linetype = "density"),
geom = "line", color = "red") +
theme(legend.title = element_blank(),
legend.position = c(.75, .75),
legend.text = element_text(size = 15),
legend.key = element_blank()) +
guides(linetype = guide_legend(override.aes = list(linetype = c(1, 0),
fill = c("white", "blue"),
size = c(1.5, 1.5))))
As you thought, most of the work can be done via override.aes for linetype.
Note I removed color from the aes of both layers to avoid some trouble I was having with the legend box outline. Doing this also avoids the need for the scale_*_* function calls. To set the color of the density line I used color outside of aes.
In override.aes I set the linetype to be solid or blank, the fill to be either white or blue, and the size to be 2 or 0 for the density box and histogram box, respectively.
ggplot(df) +
geom_histogram(aes(x = a, y = ..density.., linetype = "histogram"), fill = "blue") +
stat_density(aes(x = b, y = ..density.., linetype = "density"), geom = "line", color = "red") +
theme(legend.title = element_blank(),
legend.position = c(.75, .75),
legend.text = element_text(size = 15),
legend.key = element_blank()) +
guides(linetype = guide_legend(override.aes = list(linetype = c(1, 0),
fill = c("white", "blue"),
size = c(2, 0))))
The fill and colour aesthetics are labelled by histogram and density respectively, and their values set using scale_*_manual. Doing so maps directly to the desired legend without needing any overrides.
ggplot(df) +
geom_histogram(aes(x = a, y = ..density.., fill = "histogram")) +
stat_density(aes(x = b, y = ..density.., colour="density"), geom = "line") +
scale_fill_manual(values = c("blue")) +
scale_colour_manual(values = c("red")) +
labs(fill="", colour="") +
theme(legend.title = element_blank(),
legend.position = c(.75, .75),
legend.box.just = "left",
legend.background = element_rect(fill=NULL),
legend.key = element_rect(fill=NULL),
legend.text = element_text(size = 15))

Resources