I would like to position labels close to the legend.
In the code below I have hardcoded (x,y) values in geom_label to get desired result for the current dataframe:
# Creating dataframe
library(ggplot2)
values <- c(rep(0,2), rep(2,3), rep(3,3), rep(4,3), 5, rep(6,2), 8, 9, rep(11,2) )
obs_number <- c(rep(18,18))
value_1 <- c(rep(4,18))
value_2 <- c(rep(7,18))
value_3 <- c(rep(3,18))
data_to_plot <- data.frame(values, obs_number, value_1, value_2, value_3)
# Calculate max frequency value for using in `geom_label`
frequency_count <- data_to_plot %>% group_by(values) %>% count()%>% arrange(n)
max_frequency <- max(frequency_count$n)
# Plot
ggplot(data_to_plot, aes(x = values)) +
geom_histogram(aes(y = ..count..), binwidth = 1, colour= "black", fill = "white") +
geom_density(aes(y=..count..), fill="blue", alpha = .25)+
geom_vline(aes(xintercept = value_1),
color="red", linetype = "dashed", size = 0.5, alpha = 1) +
geom_vline(aes(xintercept = value_1),
color="forestgreen", linetype="dashed", size = 0.5, alpha = 1) +
geom_vline(aes(xintercept = value_3),
color="purple", linetype = "dashed", size = 0.5, alpha = 1) +
geom_label(aes(label = obs_number, y = max_frequency*0.87, x = (max(values) - 2.2), color = 'blue'), size = 3.5, alpha = 1) +
geom_label(aes(label = value_1, y = max_frequency * 0.83, x = (max(values) - 2.2 ), color = 'forestgreen'), size = 3.5, alpha = 1) +
geom_label(aes(label = value_2, y = max_frequency * 0.79, x = (max(values) - 2.2) , color = 'purple'), size = 3.5, alpha = 1) +
geom_label(aes(label = value_3, y = max_frequency * 0.75, x = (max(values) - 2.2) , color = 'red'), size = 3.5, alpha = 1) +
scale_color_manual(name="Values",
labels = c("Observations number",
"value_1",
"value_2",
"value_3"
),
values = c( "blue",
"forestgreen",
"purple",
"red")) +
labs(title = "relevant_title", y = "Distribution fors DLT values", x = "DLT for the route: average values per batch") +
theme(plot.title = element_text(hjust = 0.5),
axis.title.x = element_text(colour = "darkblue"),
axis.text.x = element_text(face="plain", color="black",
size=10, angle=0),
axis.title.y = element_text(colour = "darkblue"),
axis.text.y = element_text(face="plain", color="black",
size=10, angle=0),
legend.position = c(.90, .80)
)+
labs(title="DLT values", y = "frequency", x = "days")+
scale_x_continuous(breaks = seq(0, max(data_to_plot$values), 1))
This is desired result:
But this will not work for all datasets.
Question:
How can I get cartesian coordinates of the plot area, so I would replace max_frequency and max(values) in geom_label and align labels with the legend, given that legend.position = c(.90, .80).
Other alternatives are also welcome.
Under the flag of 'alternatives are also welcome': why not use a text glyph for the geom_vline()s and override the actual labels?
I rearranged the code a bit for my own understanding, but here is an example:
library(tidyverse)
#> Warning: package 'tibble' was built under R version 4.0.3
#> Warning: package 'tidyr' was built under R version 4.0.3
#> Warning: package 'readr' was built under R version 4.0.3
#> Warning: package 'dplyr' was built under R version 4.0.3
values <- c(rep(0,2), rep(2,3), rep(3,3), rep(4,3), 5, rep(6,2), 8, 9, rep(11,2) )
obs_number <- c(rep(18,18))
value_1 <- c(rep(4,18))
value_2 <- c(rep(7,18))
value_3 <- c(rep(3,18))
data_to_plot <- data.frame(values, obs_number, value_1, value_2, value_3)
# Extra dataframe for storing the xintercepts and labels
vals <- data.frame(xintercept = c(18, 4, 7, 3),
label = c("Observations number", "value_1", "value_2", "value_3"))
frequency_count <- data_to_plot %>% group_by(values) %>% count()%>% arrange(n)
max_frequency <- max(frequency_count$n)
ggplot(data_to_plot, aes(x = values)) +
geom_histogram(aes(y = ..count..),
binwidth = 1, colour= "black", fill = "white") +
geom_density(aes(y=..count..),
fill="blue", alpha = .25)+
geom_vline(aes(xintercept = xintercept, color = label),
data = vals[2:nrow(vals), ],
linetype = "dashed", size = 0.5, alpha = 1,
# Give different legend glyph for vlines
key_glyph = draw_key_text) +
scale_color_manual(
name= "Values",
limits = vals$label,
values = c("blue", "forestgreen", "purple", "red"),
# Override the labels and set size to something sensible
guide = guide_legend(override.aes = list(label = vals$xintercept,
size = 3.88))
) +
labs(title = "relevant_title", y = "Distribution fors DLT values",
x = "DLT for the route: average values per batch") +
theme(plot.title = element_text(hjust = 0.5),
axis.title.x = element_text(colour = "darkblue"),
axis.text.x = element_text(face="plain", color="black",
size=10, angle=0),
axis.title.y = element_text(colour = "darkblue"),
axis.text.y = element_text(face="plain", color="black",
size=10, angle=0),
legend.position = c(.90, .80)
)+
labs(title="DLT values", y = "frequency", x = "days")+
scale_x_continuous(breaks = seq(0, max(data_to_plot$values), 1))
Created on 2021-01-08 by the reprex package (v0.3.0)
Related
I'm trying to plot 2 normal distribution density plots for null and alternative hazard ratios of 1 and 0.65, respectively, to replicate an example (plot attached). Here's my code so far but it doesn't makes sense to me to have negative values for hazard ratios, but when I don't have negative values, the distributions are cut off. So I know I'm doing something wrong here. Thanks!
x <- seq(-2, 2, length.out = 100000)
df <- do.call(rbind,
list(data.frame(x=x, y=dnorm(x, mean = log(1), sd = sqrt(1/60 + 1/60)), id="H0, HR = 1"),
data.frame(x=x, y=dnorm(x, mean = log(0.65), sd = sqrt(1/60 + 1/60)), id="H1, HR = 0.65")))
vline <- 0.65
p1 <- ggplot(df, aes(x, y, group = id, color = id)) +
geom_line() +
geom_area(aes(fill = id),
data = ~ subset(., (id == "H1, HR = 0.65" & x > (vline)) | (id == "H0, HR = 1" & x < (vline))),
alpha = 0.3) +
geom_vline(xintercept = vline, linetype = "dashed") +
labs(x = "log(Hazard Ratio)", y = 'Density') + xlim(-2, 2) +
guides(fill = "none", color = guide_legend(override.aes = list(fill = "white"))) +
theme_classic() +
theme(legend.title=element_text(size=10), legend.position = c(0.8, 0.4),
legend.text = element_text(size = 10),
axis.line.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank()
) +
scale_color_manual(name = '', values = c('red', 'blue')) +
scale_fill_manual(values = c('red', 'blue'))
The plot I'm trying to replicate
This gets reasonably close to the image that you have posted.
You should not use the log() of the means, but rather the means as is. Moreover if you use the normal distribution, you assume that parameters can take any value between -Inf and Inf, albeit with very small densities far from the mean. Therefore, you cannot expect all values to be positive. If you would like your values to be bounded by 0, then you should use a gamma distribution instead.
x <- seq(-2, 2, length.out = 1000)
df <- do.call(rbind,
list(data.frame(x=x, y=dnorm(x, mean = 1, sd = sqrt(1/50)), id="H0, HR = 1"),
data.frame(x=x, y=dnorm(x, mean = 0.65, sd = sqrt1/50)), id="H1, HR = 0.65")))
vline <- 0.65
ggplot(df, aes(x, y, group = id, color = id)) +
geom_line() +
geom_area(aes(fill = id),
data = ~ subset(., (id == "H1, HR = 0.65" & x > (vline)) | (id == "H0, HR = 1" & x < (vline))),
alpha = 0.3) +
geom_vline(xintercept = vline, linetype = "dashed") +
labs(x = "log(Hazard Ratio)", y = 'Density') + xlim(-2, 2) +
guides(fill = "none", color = guide_legend(override.aes = list(fill = "white"))) +
theme_classic() +
theme(legend.title=element_text(size=10), legend.position = c(0.8, 0.4),
legend.text = element_text(size = 10),
axis.line.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank()
) +
scale_color_manual(name = '', values = c('red', 'blue')) +
scale_fill_manual(values = c('red', 'blue')) +
scale_x_continuous(breaks = seq(-0.3, 2.1, 0.3),
limits = c(-0.3, 2.1))
I have a data frame having four columns as shown below (here I just put header of my actual data frame):
df <- tibble(Date=c("2007-05-01", "2007-05-02","2007-05-03", "2007-05-04", "2007-05-05"), Obs = c(0.16,0.15,0.17,0.19,0.14), Sim = c(0.17, 0.11, 0.21, 0.15, 0.13), Rain = c(0.1, 0.11, 0.04,0.21,0.5))
How can I plot the data such that the variables Obs and Sim are plotted on the primary y-axis and Rain is plotted as bars on a reverse secondary axis?
Here is the code I have tried thus far:
ggplot(df, aes(x=as.Date(Date))) +
geom_line(aes(y=Obs, color="red")) +
geom_line(aes(y=Sim, color="green")) +
geom_bar(mapping = aes(y = Rain), stat = "identity") +
scale_y_continuous(name = expression('Soil moisture, m'^"3"*' m'^"-3"),
sec.axis = sec_axis(~ 3 - .*0.5, name = "Precipitation (inch)"))
Here is my expected output:
Edit: Additionally, how can I insert a legend that corresponds to each line (i.e. Obs, Sim, and Rain)?
You can also make two separate plots and stack them on top of each other. This would be useful for people (myself included) who prefer not to use dual-axis plots.
library(tidyverse)
library(lubridate)
library(scales)
df <- tibble(Date = c("2007-05-01", "2007-05-02", "2007-05-03", "2007-05-04", "2007-05-05"),
Obs = c(0.16, 0.15, 0.17, 0.19, 0.14),
Sim = c(0.17, 0.11, 0.21, 0.15, 0.13),
Rain = c(0.10, 0.11, 0.04, 0.21, 0.5))
# convert data to long format
df_long <- df %>%
mutate(Date = as.Date(Date)) %>%
pivot_longer(-Date,
names_to = 'key',
values_to = 'value')
Soil moisture plot
sm1 <- ggplot(data = df_long %>% filter(key != 'Rain'),
aes(x = Date, y = value,
group = key,
shape = key,
linetype = key,
col = key)) +
xlab("") +
ylab(expression('Soil moisture, m'^"3"*' m'^"-3")) +
geom_line(lwd = 0.5) +
geom_point(size = 3, alpha = 0.6) +
scale_color_brewer("", palette = 'Dark2') +
scale_linetype_manual("", values = c(NA, 'solid')) +
scale_shape_manual("", values = c(19, NA)) +
theme_bw(base_size = 16) +
theme(legend.position = "bottom") +
theme(panel.border = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line()) +
theme(axis.title.x = element_blank()) +
theme(legend.key.size = unit(3, 'lines')) +
guides(color = guide_legend(override.aes = list(linetype = c(NA, 1),
alpha = 1.0,
shape = c(19, NA)),
nrow = 1, byrow = TRUE))
Precipitation plot
prec_long <- df_long %>%
filter(key == 'Rain') %>%
rename(Precipitation = matches("Rain"))
maxPrec <- 1.1 * max(prec_long$value, na.rm = TRUE)
p1 <- ggplot(data = prec_long, aes(x = Date, y = value)) +
# use `geom_linerange` to mimic `type = h` in Base R plot
# https://stackoverflow.com/questions/26139878/needle-plot-in-ggplot2
geom_linerange(aes(x = Date,
ymin = 0,
ymax = value),
color = "#2c7fb8",
size = 10) +
xlab("") +
ylab(paste("Precipitation (mm)", sep = "")) +
scale_x_date(position = "top") +
scale_y_reverse(expand = c(0, 0), limits = c(maxPrec, 0)) +
theme_bw(base_size = 16) +
theme(panel.border = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line()) +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank()) +
theme(legend.position = "none")
Stack two plots on top of each other
### `cowplot` or `egg` package would work too
# install.packages("patchwork", dependencies = TRUE)
library(patchwork)
p1 / sm1 +
plot_layout(nrow = 2, heights = c(1, 2)) +
plot_annotation(title = "My plot",
subtitle = "Precipitation and Soil moisture")
Created on 2020-07-26 by the reprex package (v0.3.0)
Here's an approach using geom_rect.
Calculate the ratio between the maximum of the primary and secondary axes.
Store the maximum of the secondary reverse axis.
Plot the rectangles using the ymin as the maximum minus the value times the ratio.
Set the secondary axis ticks as the maximum minus the values divided by the ratio.
I added a BottomOffset parameter you could tweak if you want some extra space at the bottom on the secondary axis. I also went ahead and added the code to change the colors of the axes.
Edit: Now with a legend.
Ratio <- max(c(df$Obs, df$Sim), na.rm = TRUE) / max(df$Rain)
RainMax <- max(df$Rain,na.rm = TRUE)
BottomOffset <- 0.05
ggplot(df, aes(x=as.Date(Date))) +
geom_line(aes(y=Obs, color="1")) +
geom_line(aes(y=Sim, color="2")) +
geom_rect(aes(xmin=as.Date(Date) - 0.1,
xmax = as.Date(Date) + 0.1,
ymin = (BottomOffset + RainMax - Rain) * Ratio,
ymax = (BottomOffset + RainMax) * Ratio,
color = "3"),
fill = "red", show.legend=FALSE) +
geom_hline(yintercept = (BottomOffset + RainMax) * Ratio, color = "red") +
geom_hline(yintercept = 0, color = "black") +
labs(x = "Date", color = "Variable") +
scale_y_continuous(name = expression('Soil moisture, m'^"3"*' m'^"-3"),
sec.axis = sec_axis(~ BottomOffset + RainMax - . / Ratio, name = "Precipitation (inch)"),
expand = c(0,0)) +
scale_color_manual(values = c("1" = "blue", "2" = "green", "3" = "red"),
labels = c("1" = "Obs", "2" = "Sim", "3"= "Rain")) +
theme(axis.line.y.right = element_line(color = "red"),
axis.ticks.y.right = element_line(color = "red"),
axis.text.y.right = element_text(color = "red"),
axis.title.y.right = element_text(color = "red"),
axis.line.y.left = element_line(color = "blue"),
axis.ticks.y.left = element_line(color = "blue"),
axis.text.y.left = element_text(color = "blue"),
axis.title.y.left = element_text(color = "blue"),
legend.position = "bottom")
After running the following commands:
Population <- c("A", "A", "A", "A", "B", "B", "B", "B")
Group <- rep(c("Experimental", "Experimental", "Control", "Control"), 2)
wave <- rep(c("Pretest", "Posttest"), 4)
outcome <- c(-.3, -.2, -.3, .4, -.6, -.5, -.6, .6)
ci <- rep(c(.13, .14), 4)
df <- data.frame(Population, Group, wave, outcome, ci)
df$wave <- factor(df$wave,levels = c('Pretest','Posttest'))
library(ggplot2)
pd <- position_dodge(0.1)
ggplot(df, aes(x = wave, y = outcome, color = interaction(Population, Group), shape = Group, group = interaction(Population, Group))) +
geom_errorbar(aes(ymin = outcome - ci, ymax = outcome + ci), width = .25, position = pd, size=.5) +
geom_line(aes(linetype = Group), position = pd, size=1, show.legend = FALSE) +
geom_point(position = pd, size = 3.5, fill = "white", stroke = 1.25, show.legend = FALSE) +
scale_color_manual(values = c("#000000", "#606060", "#000000", "#606060")) +
scale_shape_manual(values = c(23, 21)) +
coord_cartesian(xlim = c(1.4, 1.6), ylim = c(-.91, .91)) +
labs(title = "Outcomes by Population and Study Group", x = "Time", y = "Outcome\nLower scores denote fewer instances", color = "Population and Study Group") +
theme(plot.title = element_text(hjust = 0.5), axis.text.x = element_text(color = "black"), axis.text.y = element_text(color = "black"), panel.background = element_rect(fill = "#F0F0F0"))
I generate a figure that does not have dots symbols or correct line styles in the legend:
How can I:
add the dots shown in the figure itself into the legend and
have the legend lines reflect that some of dotted lines in the figure?
TYIA.
The simplest way is to create another variable that would reflect the interaction instead of creating it on the fly. If we build the plot step by step, this below gives the dots and errorbars:
library(ggplot2)
pd <- position_dodge(0.1)
df$grp = paste(df$Population,df$Group,sep=".")
g = ggplot(df, aes(x = wave, y = outcome, color = grp, shape = grp))+
geom_errorbar(aes(ymin = outcome - ci, ymax = outcome + ci), width = .25, position = pd, size=.5) +
geom_point(position = pd, size = 3.5, fill = "white", stroke = 1.25) +
scale_color_manual(values = c("#000000", "#000000","#606060", "#606060")) +
scale_shape_manual(values = c(23,21,23,21)) +
coord_cartesian(xlim = c(1.4, 1.6), ylim = c(-.91, .91)) +
labs(title = "Outcomes by Population and Study Group", x = "Time", y = "Outcome\nLower scores denote fewer instances") +
theme(plot.title = element_text(hjust = 0.5), axis.text.x = element_text(color = "black"),
axis.text.y = element_text(color = "black"), panel.background = element_rect(fill = "#F0F0F0"))
print(g)
Then add the line while specifying the legend:
g +
geom_line(inherit.aes=FALSE,aes(x = wave, y = outcome,group=grp,linetype=grp)) +
scale_linetype_manual(values=c("solid","dashed","solid","dashed"))
Based on the following data frame and plot, I would like to conditionally change the colour of data points to black when did.it=="y". The shape of the dots and the colour of the lines, however, should remain unchanged. How can I do that?
set.seed(4887)
Strain <- rep(c(rep("A", times = 2), rep("B", times = 4)), times = 2)
Sex_ID <- rep(c("M_1", "F_2", "M_3", "F_4", "M_5", "F_6"), times = 2)
State <- rep(c("virgin", "mated", "expecting", "parent"), each = 6)
Huddling <- runif(8, 1.5, 3.8)
did.it<-rep(c("y","n","n"), times=8)
d <- data.frame(Strain, Sex_ID, State, Huddling, did.it)
library(tidyr)
d <- d %>%
separate(Sex_ID, c('Sex', 'ID'), sep = '_')
ggplot(d, aes(x = factor(State), y = Huddling, color = Sex, group = ID, shape = ID))+
facet_grid(Strain ~ ., scales = 'free_y') +
geom_point(size = 3, position = position_dodge(width=0.3), show.legend = F) +
geom_line(size = 0.7, position = position_dodge(width=0.3)) +
scale_color_manual(values = c('red4', 'midnightblue')) +
scale_fill_manual(values = "white") +
scale_x_discrete(limits = c("virgin", "mated", "expecting", "parent"),
labels = c("Virgin", "Mated", "Expecting", "Parent")) +
labs(y = "Time huddling (s)", x = "Reproductive stage") +
theme_classic() +
theme(axis.line.x = element_line(color = "black", size = 1),
axis.line.y = element_line(color = "black", size = 1),
axis.text = element_text(size = 17),
axis.title = element_text(size = 19,face = "bold"),
legend.title = element_text(size = 17),
legend.text = element_text(size = 15),
plot.title = element_text(lineheight = .8, face = "bold",size = 22))
You get part way there by just doing:
geom_point(size = 3, aes(color = did.it) ...) +
...
scale_color_manual(values = c('red4', 'midnightblue', 'orange', 'black')) ...
But this doesn't leave the points' colours unchanged when did.it is FALSE. So:
d$point_col <- ifelse(d$did.it=='y', 'y', d$Sex)
ggplot(d, aes(x = factor(State), y = Huddling, color = Sex, group = ID, shape = ID))+
facet_grid(Strain ~ ., scales = 'free_y') +
geom_point(size = 3, aes(color = point_col), position = position_dodge(width=0.3),
show.legend = F) +
geom_line(size = 0.7, position = position_dodge(width=0.3)) +
scale_color_manual(values = c('red4', 'midnightblue', 'black')) +
scale_fill_manual(values = "white") +
scale_x_discrete(limits = c("virgin", "mated", "expecting", "parent"),
labels = c("Virgin", "Mated", "Expecting", "Parent")) +
labs(y = "Time huddling (s)", x = "Reproductive stage")
(Plus your extra theme statements.)
This question is a follow-up to this post: previous post
I have 28 variables, M1, M2, ..., M28, for which I compute certain statistics x and y.
library(ggplot2)
df = data.frame(model = factor(paste("M", 1:28, sep = ""), levels=paste("M", 1:28, sep = "")), a = runif(28, 1, 1.05), b = runif(28, 1, 1.05))
levels = seq(0.8, 1.2, 0.05)
Here is the plot:
ggplot(data=df) +
geom_polygon(aes(x=model, y=a, group=1), color = "black", fill = NA) +
geom_polygon(aes(x=model, y=b, group=1), color = "blue", fill = NA) +
coord_polar() +
scale_y_continuous(limits=range(levels), breaks=levels, labels=levels) +
theme(axis.text.y = element_blank(), axis.ticks = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank())
I would like to add a point the the plot, with y-value = 1 for M1 (model1). I tried adding:
geom_point(aes(y = 1, x = "M1"), color = "red", cex = 0.5)
but it doesn't work. Any idea what I am doing wrong?
Thanks for your help!
cex is not an argument for geom_point. Try size, e.g.
geom_point(aes(y = 1, x = "M1"), color = "red", size = 10)