add pvalue bars to facet plot with "fill" sub-group - r

I'm looking for a solution since too much time without finding it, so it's time to ask for some help...
I would like to add pValue to boxplots organized with facet_wrap (ggplot2). Similar to what you obtain with the script I add to this post (the first part of the script is the exemple of what I want and it's working well for 1 plot, the second part is related to facet and doesn't work).
I would like to add pvalue between all "dose" values of "OJ", same for "VC", but also between, for exemple "dose"=1 of OJ and VC (as in the plot). It's working well for 1 plot, but not in facet_wrap. The error message is:
Error: Assigned data value must be compatible with existing data.
x Existing data has 6 rows.
x Assigned data has 60 rows.
ℹ Only vectors of size 1 are recycled.
Thanks for your help (if only...)
The script:
################# DATAFRAME
data("ToothGrowth")
df <- ToothGrowth
vec <- c("A","B")
df$dose <- as.character(df$dose)
df$facet <- rep(sample(vec, 2),replace=T, nrow(df)/2)
view(df)
################### STAT
df_pval <- df %>%
rstatix::group_by(dose) %>%
rstatix::wilcox_test(len ~ supp) %>%
rstatix::add_xy_position()
df_pval2 <- df %>%
rstatix::group_by(supp) %>%
rstatix::wilcox_test(len ~ dose) %>%
rstatix::add_xy_position(x = "supp", dodge = 0.8)
################### PLOT
plotx <- ggplot(df, aes(x = supp, y = len)) +
geom_boxplot(aes(fill = dose)) +
stat_pvalue_manual(df_pval,
label = "{p}",
color = "dose",
fontface = "bold",
step.group.by = "dose",
step.increase = 0.1,
tip.length = 0,
bracket.colour = "black",
show.legend = FALSE) +
stat_pvalue_manual(df_pval2,
label = "{p}",
color = "black",
fontface = "bold",
step.group.by = "supp",
step.increase = 0.1,
tip.length = 0,
bracket.colour = "black",
show.legend = FALSE)
plot(plotx)
################### STAT FACET
df_pval3 <- df %>%
rstatix::group_by(dose, facet) %>%
rstatix::wilcox_test(len ~ supp) %>%
rstatix::add_xy_position()
df_pval4 <- df %>%
rstatix::group_by(supp, facet) %>%
rstatix::wilcox_test(len ~ dose) %>%
rstatix::add_xy_position(x = "supp", dodge = 0.8)
print(df_pval)
print(df_pval2)
###################### PLOT FACET
ploty <- ggplot(df, aes(x = supp, y = len)) +
geom_boxplot(aes(fill = dose)) +
facet_wrap(~df[,4]) + stat_pvalue_manual(df_pval3,
label = "{p}",
color = "dose",
fontface = "bold",
step.group.by = "dose",
step.increase = 0.1,
tip.length = 0,
bracket.colour = "black",
show.legend = FALSE) +
stat_pvalue_manual(df_pval4,
label = "{p}",
color = "black",
fontface = "bold",
step.group.by = "supp",
step.increase = 0.1,
tip.length = 0,
bracket.colour = "black",
show.legend = FALSE)
plot(ploty)

Related

plot density plots with confidence intervals of 95% in R

I'm trying draw multiple density plots in one plot for comparison porpuses. I wanted them to have their confidence interval of 95% like in the following figure. I'm working with ggplot2 and my df is a long df of observations for a certain location that I would like to compare for different time intervals.
I've done some experimentation following this example but I don't have the coding knowledge to achieve what I want.
What i managed to do so far:
library(magrittr)
library(ggplot2)
library(dplyr)
build_object <- ggplot_build(
ggplot(data=ex_long, aes(x=val)) + geom_density())
plot_credible_interval <- function(
gg_density, # ggplot object that has geom_density
bound_left,
bound_right
) {
build_object <- ggplot_build(gg_density)
x_dens <- build_object$data[[1]]$x
y_dens <- build_object$data[[1]]$y
index_left <- min(which(x_dens >= bound_left))
index_right <- max(which(x_dens <= bound_right))
gg_density + geom_area(
data=data.frame(
x=x_dens[index_left:index_right],
y=y_dens[index_left:index_right]),
aes(x=x,y=y),
fill="grey",
alpha=0.6)
}
gg_density <- ggplot(data=ex_long, aes(x=val)) +
geom_density()
gg_density %>% plot_credible_interval(tab$q2.5[[40]], tab$q97.5[[40]])
Help would be much apreaciated.
This is obviously on a different set of data, but this is roughly that plot with data from 2 t distributions. I've included the data generation in case it is of use.
library(tidyverse)
x1 <- seq(-5, 5, by = 0.1)
t_dist1 <- data.frame(x = x1,
y = dt(x1, df = 3),
dist = "dist1")
x2 <- seq(-5, 5, by = 0.1)
t_dist2 <- data.frame(x = x2,
y = dt(x2, df = 3),
dist = "dist2")
t_data = rbind(t_dist1, t_dist2) %>%
mutate(x = case_when(
dist == "dist2" ~ x + 1,
TRUE ~ x
))
p <- ggplot(data = t_data,
aes(x = x,
y = y )) +
geom_line(aes(color = dist))
plot_data <- as.data.frame(ggplot_build(p)$data)
bottom <- data.frame(plot_data) %>%
mutate(dist = case_when(
group == 1 ~ "dist1",
group == 2 ~ "dist2"
)) %>%
group_by(dist) %>%
slice_head(n = ceiling(nrow(.) * 0.1)) %>%
ungroup()
top <- data.frame(plot_data) %>%
mutate(dist = case_when(
group == 1 ~ "dist1",
group == 2 ~ "dist2"
)) %>%
group_by(dist) %>%
slice_tail(n = ceiling(nrow(.) * 0.1)) %>%
ungroup()
segments <- t_data %>%
group_by(dist) %>%
summarise(x = mean(x),
y = max(y))
p + geom_area(data = bottom,
aes(x = x,
y = y,
fill = dist),
alpha = 0.25,
position = "identity") +
geom_area(data = top,
aes(x = x,
y = y,
fill = dist),
alpha = 0.25,
position = "identity") +
geom_segment(data = segments,
aes(x = x,
y = 0,
xend = x,
yend = y,
color = dist,
linetype = dist)) +
scale_color_manual(values = c("red", "blue")) +
scale_linetype_manual(values = c("dashed", "dashed"),
labels = NULL) +
ylab("Density") +
xlab("\U03B2 for AQIv") +
guides(color = guide_legend(title = "p.d.f \U03B2",
title.position = "right",
labels = NULL),
linetype = guide_legend(title = "Mean \U03B2",
title.position = "right",
labels = NULL,
override.aes = list(color = c("red", "blue"))),
fill = guide_legend(title = "Rej. area \U03B1 = 0.05",
title.position = "right",
labels = NULL)) +
annotate(geom = "text",
x = c(-4.75, -4),
y = 0.35,
label = c("RK", "OK")) +
theme(panel.background = element_blank(),
panel.border = element_rect(fill = NA,
color = "black"),
legend.position = c(0.2, 0.7),
legend.key = element_blank(),
legend.direction = "horizontal",
legend.text = element_blank(),
legend.title = element_text(size = 8))

How to adjust overall plot width (X-axis size)?

I have been using the following R script, but the width of the X-axis is too wide. Could someone kindly help me to adjust the X-axis width? Thanks
library(ggpubr)
library(rstatix)
df <- ToothGrowth
df$dose <- as.factor(df$dose)
head(df, 6)
# Statistical test
stat.test <- df %>%
t_test(len ~ supp) %>%
add_significance()
stat.test
bxp <- ggboxplot(df, x = "supp", y = "len", fill = "supp",
palette = c("#00AFBB", "#E7B800"),width = 0.5)
stat.test <- stat.test %>% add_xy_position(x = "supp")
bxp + stat_pvalue_manual(
stat.test, label = "T-test, p = {p}",
vjust = -1, bracket.nudge.y = 1
) +
scale_y_continuous(expand = expansion(mult = c(0.05, 0.15)))+
scale_x_discrete(expand = c(2, 2))
It sounds like you may want to adjust the aspect ratio of the theme:
bxp + stat_pvalue_manual(
stat.test, label = "T-test, p = {p}",
vjust = -1, bracket.nudge.y = 1
) +
scale_y_continuous(expand = expansion(mult = c(0.05, 0.15)))+
scale_x_discrete(expand = c(2, 2)) +
theme(
aspect.ratio = 2
)

Text color with geom_label_repel

Not specific to any particular piece of code, is there a relatively straightforward way to change the color of the text in a geom_label_repel box?
Specifically, I have code that produces the below chart
The percentage in the label box is the percent change in 7-day moving average for the most recent week over the week prior. I'd simply like to color the text red when the value is positive and green when it is negative.
The dataframe for this chart can be copied from here.
The plot code is
#endpoint layer
BaseEndpoints <- smDailyBaseData %>% filter(Base %in% AFMCbases) %>%
group_by(Base) %>%
filter(DaysSince == max(DaysSince)) %>%
select(Base, abbv, DaysSince, newRate,label) %>%
ungroup()
ZoomEndpoints <- BaseEndpoints %>% filter(Base != 'Edwards') %>%
mutate(zoom = TRUE)
CAEndPoint <- BaseEndpoints %>% filter(Base == 'Edwards') %>%
mutate(zoom = FALSE)
ZoomEndpoints <- rbind(ZoomEndpoints, CAEndPoint)
BasePlot <- smDailyBaseData %>% filter(Base %in% AFMCbases) %>%
ggplot(mapping = aes(x = as.numeric(DaysSince), y = newRate)) +
geom_line(aes(color=abbv),show.legend = FALSE) +
scale_color_ucscgb() +
geom_point(data = BaseEndpoints,size = 1.5,shape = 21,
aes(color = abbv,fill = abbv), show.legend = FALSE) +
geom_label_repel(data=ZoomEndpoints, aes(label=label), show.legend = FALSE,
vjust = 0, xlim=c(105,200), size=3, direction='y') +
labs(x = "Days Since First Confirmed Case",
y = "% Local Population Infected Daily") +
theme(plot.title = element_text(size = rel(1), face = "bold"),
plot.subtitle = element_text(size = rel(0.7)),
plot.caption = element_text(size = rel(1))) +
facet_zoom(xlim = c(50,120), ylim=c(0,0.011),zoom.data=zoom)
print(BasePlot)
Yes, it's as simple as this:
library(ggplot2)
df <- data.frame(x = c(-1, -1, 1, 1), y = c(-1, 1, 1, -1), value = c(-2, -1, 1, 2))
ggplot(df, aes(x, y)) +
geom_point(size = 3) +
ggrepel::geom_label_repel(aes(label = value, colour = factor(sign(value)))) +
lims(x = c(-100, 100), y = c(-100, 100)) +
scale_colour_manual(values = c("red", "forestgreen"))
EDIT
Now we have a more concrete example, I can see the problem more clearly. There are workarounds such as using ggnewscale or a hand-crafted solution such as Ian Campbell's thorough example. Personally, I would just note that you haven't used the fill scale yet, and this looks pretty good to my eye:
Here's a bit of a hacky solution since you can't have two scale_color_*'s at the same time:
The approach centers on manually assigning the color outside of aes in the geom_label_repel call. Adding one to the grepl result that searches for the minus sign in the label allows you to subset the two colors. You need two colors for each label, I assume for the box and for the text, so I used rep.
smDailyBaseData %>%
ggplot(mapping = aes(x = as.numeric(DaysSince), y = newRate)) +
geom_line(aes(color=abbv),show.legend = FALSE) +
scale_color_ucscgb() +
geom_point(data = BaseEndpoints,size = 1.5,shape = 21,
aes(color = abbv,fill = abbv), show.legend = FALSE) +
geom_label_repel(data=ZoomEndpoints, aes(label=label),
color = rep(c("green","red")[1+grepl("\\-\\d",as.factor(ZoomEndpoints$label))],times = 2),
show.legend = FALSE, vjust = 0, xlim=c(105,200),
size=3, direction='y') +
labs(x = "Days Since First Confirmed Case",
y = "% Local Population Infected Daily") +
theme(plot.title = element_text(size = rel(1), face = "bold"),
plot.subtitle = element_text(size = rel(0.7)),
plot.caption = element_text(size = rel(1))) +
facet_zoom(xlim = c(50,120), ylim=c(0,0.011),zoom.data=zoom)
Data Setup
#source("https://pastebin.com/raw/Vn2abQ4a")
BaseEndpoints <- smDailyBaseData %>%
group_by(Base) %>%
dplyr::filter(DaysSince == max(DaysSince)) %>%
dplyr::select(Base, abbv, DaysSince, newRate,label) %>%
ungroup()
ZoomEndpoints <- BaseEndpoints %>% filter(Base != 'Edwards') %>%
mutate(zoom = TRUE)
CAEndPoint <- BaseEndpoints %>% filter(Base == 'Edwards') %>%
mutate(zoom = FALSE)
ZoomEndpoints <- rbind(ZoomEndpoints, CAEndPoint)

R - How can I add a bivariate legend to my ggplot2 chart?

I'm trying to add a bivariate legend to my ggplot2 chart but I don't know whether (a) this is possible through some guides options and (b) how to achieve it.
The only way I've managed to produce something close to the desired outcome was by specifically creating a new chart which resembles a legend (named p.legend below) and inserting it, via the cowplot package, somewhere in the original chart (named p.chart below). But surely there must be a better way than this, given that this approach requires creating the legend in the first place and fiddling with its size/location to fit it in the original chart.
Here's code for a dummy example of my approach:
library(tidyverse)
# Create Dummy Data #
set.seed(876)
n <- 2
df <- expand.grid(Area = LETTERS[1:n],
Period = c("Summer", "Winter"),
stringsAsFactors = FALSE) %>%
mutate(Objective = runif(2 * n, min = 0, max = 2),
Performance = runif(2 * n) * Objective) %>%
gather(Type, Value, Objective:Performance)
# Original chart without legend #
p.chart <- df %>%
ggplot(., aes(x = Area)) +
geom_col(data = . %>% filter(Type == "Objective"),
aes(y = Value, fill = Period),
position = "dodge", width = 0.7, alpha = 0.6) +
geom_col(data = . %>% filter(Type == "Performance"),
aes(y = Value, fill = Period),
position = "dodge", width = 0.7) +
scale_fill_manual(values = c("Summer" = "#ff7f00", "Winter" = "#1f78b4"), guide = FALSE) +
theme_minimal() +
theme(panel.grid.major.x = element_blank(),
panel.grid.minor.y = element_blank())
# Create a chart resembling a legend #
p.legend <- expand.grid(Period = c("Summer", "Winter"),
Type = c("Objective", "Performance"),
stringsAsFactors = FALSE) %>%
ggplot(., aes(x = Period, y = factor(Type, levels = c("Performance", "Objective")),
fill = Period, alpha = Type)) +
geom_tile() +
scale_fill_manual(values = c("Summer" = "#ff7f00", "Winter" = "#1f78b4"), guide = FALSE) +
scale_alpha_manual(values = c("Objective" = 0.7, "Performance" = 1), guide = FALSE) +
ggtitle("Legend") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5),
rect = element_rect(fill = "transparent"),
axis.title = element_blank(),
panel.grid.major = element_blank())
# Add legend to original chart #
p.final <- cowplot::ggdraw() +
cowplot::draw_plot(plot = p.chart) +
cowplot::draw_plot(plot = p.legend, x = 0.5, y = 0.65, width = 0.4, height = 0.28, scale = 0.7)
# Save chart #
cowplot::ggsave("Bivariate Legend.png", p.final, width = 8, height = 6, dpi = 500)
... and the resulting chart:
Is there an easier way of doing this?
This might work at some point, but right now the colorbox seems to ignore all breaks, names and labels (#ClausWilke?). Probably because the multiscales package is in really early stages.
Posting since it might work when future readers are here.
library(multiscales)
df %>%
mutate(
period = as.numeric(factor(Period)),
type = as.numeric(factor(Type))
) %>%
ggplot(., aes(x = Area, y = Value, fill = zip(period, type), group = interaction(Area, Period))) +
geom_col(width = 0.7, position = 'dodge') +
bivariate_scale(
"fill",
pal_hue_sat(c(0.07, 0.6), c(0.4, 0.8)),
guide = guide_colorbox(
nbin = 2,
name = c("Period", "Type"), #ignored
breaks = list(1:2, 1:2), #ignored
labels = list(levels(.$Period), levels(.$Type)) #ignored
)

Plotting two densities with vertical lines and correct legend

I want to draw two densities with two vertical lines for the averages.
The legend is once to denote the densities and once the vertical
lines.
I tried the code below. However, only one legend appears and the labeling is wrong.
Can anyone help me?
set.seed(1234)
data <- data.frame(value = rnorm(n = 10000, mean = 50, sd = 20),
type = sample(letters[1:2], size = 10000, replace = TRUE))
data$value[data$type == "b"] <- data$value[data$type == "b"] + 50
mean.a <- mean(data$value[data$type == "a"])
mean.b <- mean(data$value[data$type == "b"])
library(ggplot2)
gp <- ggplot(data = data, aes(x = value))
gp <- gp + geom_density(aes(fill = type), color = "black", alpha=0.3, lwd = 1.0, show.legend = TRUE)
gp <- gp + scale_fill_manual(breaks = 1:2, name = "Density", values = c("a" = "green", "b" = "blue"), labels = c("a" = "Density a", "b" = "Density b") )
gp <- gp + geom_vline(aes(color="mean.a", xintercept=mean.a), linetype="solid", size=1.0, show.legend = NA)
gp <- gp + geom_vline(aes(color="mean.b", xintercept=mean.b), linetype="dashed", size=1.0, show.legend = NA)
gp <- gp + scale_color_manual(name = "", values = c("mean.a" = "red", "mean.b" = "darkblue"), labels = c("mean.a" = "Mean.A", "mean.b" = "Mean.B"))
gp <- gp + theme(legend.position="top")
gp
Here are a couple ways to do it. I'm not sure, but I think some of the difficulty comes from having more than one geom_vline and trying to hard-code values in the aes. You're building three scales here: fill for the density curves, and color and linetype for the vertical lines. But you're aiming (correct me if I'm misreading) for two legends.
The easiest way to deal with getting the proper legends is to make a small data frame for the means, rather than individual values for each mean. You can do this easily with dplyr to calculate means for each type.
library(tidyverse)
set.seed(1234)
data <- data.frame(value = rnorm(n = 10000, mean = 50, sd = 20),
type = sample(letters[1:2], size = 10000, replace = TRUE))
data$value[data$type == "b"] <- data$value[data$type == "b"] + 50
means <- group_by(data, type) %>%
summarise(mean = mean(value))
means
#> # A tibble: 2 x 2
#> type mean
#> <fct> <dbl>
#> 1 a 50.3
#> 2 b 99.9
Then when you plot, you can make a single geom_vline call, assigning the means data frame and allowing the aesthetics you want—color and linetype—to be scaled based on this data. The trick then is reconciling the names and labels: if you don't set the same legend name and labels for both the color and linetype scales, you'll have two legends for the lines. Set them the same, and you get a single legend for the mean lines.
ggplot(data, aes(x = value)) +
geom_density(aes(fill = type), alpha = 0.3) +
geom_vline(aes(xintercept = mean, color = type, linetype = type), data = means) +
scale_color_manual(values = c("red", "darkblue"), labels = c("Mean.A", "Mean.B"), name = NULL) +
scale_linetype_discrete(labels = c("Mean.A", "Mean.B"), name = NULL) +
scale_fill_manual(values = c(a = "green", b = "blue"), name = "Density")
The second way is to just add a step to creating the means data frame where you label the types the way you want later, i.e. "Mean.A" instead of just "a". Then you don't need to adjust labels, and you can skip the linetype scale—unless you want to change linetypes manually—and then just remove the name for that legend for both color and linetype in your labs.
means2 <- group_by(data, type) %>%
summarise(mean = mean(value)) %>%
mutate(type = paste("Mean", str_to_upper(type), sep = "."))
means2
#> # A tibble: 2 x 2
#> type mean
#> <chr> <dbl>
#> 1 Mean.A 50.3
#> 2 Mean.B 99.9
ggplot(data, aes(x = value)) +
geom_density(aes(fill = type), alpha = 0.3) +
geom_vline(aes(xintercept = mean, color = type, linetype = type), data = means2) +
scale_color_manual(values = c(Mean.A = "red", Mean.B = "darkblue")) +
scale_fill_manual(values = c(a = "green", b = "blue"), name = "Density") +
labs(color = NULL, linetype = NULL)
Created on 2018-06-05 by the reprex package (v0.2.0).

Resources