shade block between two lines, values vary with facet_wrap - r

I'm plotting the relationships between speed and time for four different species (each in a different facet). For each species, I have a range of speeds I'm interested in, and would like to shade the area between the min and max values. However, these ranges are different for the 4th species compared to the first three.
#data to plot as points
species <- sample(letters[1:4], 40, replace = TRUE)
time <- runif(40, min = 1, max = 100)
speed <- runif(40, min = 1, max = 20)
df <- data.frame(species, time, speed)
#ranges of key speeds
sp <- letters[1:4]
minspeed <- c(5, 5, 5, 8)
maxspeed <- c(10, 10, 10, 13)
df.range <- data.frame(sp, minspeed, maxspeed)
ggplot() +
geom_hline(data = df.range, aes(yintercept = minspeed),
colour = "red") +
geom_hline(data = df.range, aes(yintercept = maxspeed),
colour = "red") +
geom_point(data=df, aes(time, speed),
shape = 1) +
facet_wrap(~species) +
theme_bw()
How do I:
get geom_hline to only plot the max and min ranges for the correct species, and
shade the area between the two lines?
For the later part, I've tried adding geom_ribbon to my plot, but I keep getting an error message that I'm unsure how to address.
geom_ribbon(data = df,
aes(ymin = minspeed, ymax = maxspeed,
x = c(0.0001, 100)),
fill = "grey",
alpha = 0.5) +
Error: Aesthetics must be either length 1 or the same as the data
(40): x, ymin, ymax

As per my comment, the following should work. Perhaps there are other unobserved differences between your actual use case & the example in your question?
colnames(df.range)[which(colnames(df.range) == "sp")] <- "species"
ggplot() +
geom_hline(data = df.range, aes(yintercept = minspeed),
colour = "red") +
geom_hline(data = df.range, aes(yintercept = maxspeed),
colour = "red") +
geom_point(data = df, aes(time, speed),
shape = 1) +
geom_rect(data = df.range,
aes(xmin = -Inf, xmax = Inf, ymin = minspeed, ymax = maxspeed),
fill = "grey", alpha = 0.5) +
facet_wrap(~species) +
theme_bw()
Data used:
df <- data.frame(species = sample(letters[1:4], 40, replace = TRUE),
time = runif(40, min = 1, max = 100),
speed = runif(40, min = 1, max = 20))
df.range <- data.frame(sp = letters[1:4],
minspeed = c(5, 5, 5, 8),
maxspeed = c(10, 10, 10, 13))

Related

How to predefine legend colours based on value range using ggplot2 and RColorBrewer?

I have some data from a range of tests that I'm calculating STEN scores for. I'm aiming to visualise this data in the form of a circular bar plot and would like to set the colour gradient based on a STEN score range. For example, a score of 0-2 would be a very light colour, 2.1-4 light, 4.1-6 moderate, 6.1-8 dark and 8.1-10 very dark. My code below uses the RColorBrewer package and the "YlGn" palette, but I'm stuck on how I can predefine the colour scheme based on the example mentioned above and set this in the plot legend. The example below produces a circular bar plot containing a lowest STEN score of 4.8, so I would like this to be reflected as the moderate colour, where currently its the lightest. I essentially want the legend to show all five STEN score ranges irrespective of whether someone's data scores within each range. Hope this makes sense.
library(tidyverse)
library(RColorBrewer)
set.seed(50)
dat <- data.frame(
par = paste("par", 1:15),
test_1 = round(rnorm(15, mean = 30, sd = 5), 1),
test_2 = round(rnorm(15, mean = 30, sd = 5), 1),
test_3 = round_any(rnorm(15, mean = 90, sd = 5), 2.5),
test_4 = round(rnorm(15, mean = 5.4, sd = 0.3), 1),
test_5 = round(rnorm(15, mean = 17, sd = 1.5), 1)
)
sten_dat <- dat %>%
mutate_if(is.numeric, scale) %>%
mutate(across(c(2:6), ~ . * 2 + 5.5)) %>%
mutate(across(where(is.numeric), round, 1)) %>%
pivot_longer(!par, names_to = "test", values_to = "sten") %>%
filter(par == "par 1")
ggplot(sten_dat) +
geom_col(aes(x = str_wrap(test), y = sten, fill = sten),
position = "dodge2", alpha = 0.7, show.legend = TRUE) +
coord_polar() +
scale_y_continuous(limits = c(-1, 11), breaks = seq(0, 10, 2)) +
scale_fill_gradientn(colours = brewer.pal(name = "YlGn", n = 5))`
Simply add limits to your fill scale:
ggplot(sten_dat) +
geom_col(aes(x = str_wrap(test), y = sten, fill = sten),
position = "dodge2", alpha = 0.7, show.legend = TRUE) +
coord_polar() +
scale_y_continuous(limits = c(-1, 11), breaks = seq(0, 10, 2)) +
scale_fill_gradientn(colours = brewer.pal(name = "YlGn", n = 5),
limits = c(0, 10))
If you want the colors to be clearly "binned" in the way you describe, you can use scale_fill_stepn instead of scale_fill_gradientn
ggplot(sten_dat) +
geom_col(aes(x = str_wrap(test), y = sten, fill = sten),
position = "dodge2", alpha = 0.7, show.legend = TRUE) +
scale_y_continuous(limits = c(-1, 11), breaks = seq(0, 10, 2)) +
scale_fill_stepsn(colours = brewer.pal(name = "YlGn", n = 5),
limits = c(0, 10), breaks = 0:5 * 2) +
geomtextpath::coord_curvedpolar() +
theme_minimal() +
theme(axis.text.x = element_text(size = 16, face = 2),
panel.grid.major.x = element_blank())

make the width scale of y axis proper size in R

how do i correct the span of the y-axis, since the first diagramme has bigger range but the last diagramme has smaller range.
I tried using expand_limit() , but i have to define the limit. what i want is to specify my limit based on the mean value +- 10%
Data
structure(list(height = structure(1:21, .Label = c("_150_5_",
"_150_4_", "01_150_3_", "01_150_2_", "_150_1_",
"01_130_5_", "01_130_4_", "01_130_3_", "01_130_2_",
"L01_130_1_", "01_100_5_", "01_100_4_", "01_100_3_",
"01_100_2_", "01_100_1_", "01_60_5_", "01_60_4_",
"01_60_3_", "01_60_2_", "01_60_1_", "01_30_5_"
), class = "factor"), max = c(153.502609757564, 153.803890640307,
154.030628562627, 153.502609757564, 153.577819267489, 133.497584806195,
133.440753139611, 133.896765965376, 134.068575331457, 133.725396384362,
102.872441458794, 103.347289523556, 103.279185873129, 101.048462000305,
102.035263387027, 60.852713866229, 60.8645299271739, 60.9236791302129,
60.8763505777715, 61.0542129187662, 30.8972231764362), mean = c(152.038047221229,
151.858031107105, 152.211206935181, 151.759867764584, 150.344389742043,
131.874101333396, 131.706179220053, 131.043612919162, 132.264362261993,
130.599623937693, 101.774080628225, 102.110144624754, 102.239940146821,
100.053415273797, 100.577556727676, 60.299452319695, 60.3004949199648,
60.3066081777292, 60.3048844335163, 60.3267015589117, 30.347932670538
), min = c(150.120847282062, 148.344689600069, 148.767123457497,
148.20441093378, 146.06352708525, 129.15217516479, 129.258692422658,
127.367870428665, 129.418798152331, 127.006616339119, 99.7938010585627,
100.401130405172, 101.081047766832, 98.2917306757434, 99.1623945349401,
59.7507299132569, 59.7507299132569, 59.8077330900488, 59.7507299132569,
59.8191467795698, 29.7732075536612), sd = c(0.384120348675233,
0.996143559832467, 0.892389162104352, 0.668245088780541, 1.26871400480022,
0.717796939735463, 0.841062860547558, 1.09283360068465, 0.801961749792679,
1.40866403449516, 0.370811042540416, 0.387499052903713, 0.273143219592094,
0.372612511324188, 0.448178158096896, 0.141781338201885, 0.143328065432486,
0.140326202644008, 0.141854728955873, 0.139981570704421, 0.155319872754675
)), class = "data.frame", row.names = c(NA, -21L))
the code i have tried
ii=1
k=0
plot_list_stat=list()
par(mfcol = c(5, 1))
for (i in 1:4 ){
k=k+1
plot_list_stat[[ii]]=ggplot(stat.std_w[k:(k+4),],aes(x=height,y=mean,group=1))+
geom_ribbon(aes(x=height,ymax=max,ymin=min,color="min-max"
),alpha=0.6,fill= "skyblue",show.legend=TRUE)+
scale_fill_manual("",values ="skyblue", guide = FALSE)+
geom_line()+ylab("")+
geom_point()+
theme(axis.text.x = element_text(angle = 90))+
geom_errorbar(aes(ymin=mean-sd, ymax=mean+sd,color="mean±sd"
), width=.2,position=position_dodge(0.05))+theme(legend.position = "none")
print(ii); #plot_list_stat[[ii]]
ii=ii+1;k=k+4;)
library("cowplot")
pgrid=plot_grid(plotlist=plot_list_stat,nrow=1)
library(ggpubr)
pgrid=ggarrange(pgrid,common.legend = T)
annotate_figure(pgrid,
top = text_grob("statistic ", size = 14),
left = text_grob(" mean",rot = 90))
By default the plot area will be expanded by a small amount from the extreme values in the data. There are a number of mechanisms for manually overriding this behavior depending on your needs. In generaly they are much harder to implement in faceted plots, but the visual you're after really calls for faceting so below I'm showing a solution with geom_blank() which will work with this example.
library(tidyverse)
# parse text in 'height' variable to be sortable
df2 <- df %>%
separate(col = height, into = c("sub_grp", "grp", "order"), remove = F, fill = "left") %>%
mutate(grp = fct_rev(factor(as.numeric(grp))),
order = fct_rev(factor(as.numeric(order))))
#> Warning: Expected 3 pieces. Additional pieces discarded in 21 rows [1, 2, 3, 4,
#> 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
# set fraction to expand plot by
# these are based on the requested +/- 10% in the question
fract_up <- 1.1
fract_down <- 0.9
# extract table of limits to expand plot by
df3 <- df2 %>%
group_by(grp) %>%
summarise(grp_mean = mean(mean), grp_order = mean(as.numeric(order))) %>%
mutate(grp_top = fract_up * grp_mean, grp_bot = fract_down * grp_mean, .keep = "unused") %>%
pivot_longer(-c(grp, grp_order), names_to = "type", names_prefix = "grp_")
# plot it all together
df2 %>%
ggplot(aes(x = order, y = mean, group = grp)) +
geom_ribbon(
aes(
ymax = max,
ymin = min,
color = "min-max",
group = grp
),
alpha = 0.6,
fill = "skyblue"
) +
geom_line() +
geom_point() +
geom_errorbar(
aes(
ymin = mean - sd,
ymax = mean + sd,
color = "mean±sd"
),
width = .2,
position = position_dodge(0.05)
) +
geom_blank(data = df3, aes(x = grp_order, y = value)) +
facet_wrap(facets = vars(grp),
scales = "free",
nrow = 1) +
scale_x_discrete(NULL, labels = df2$height) +
ggtitle("Standard statistic") +
ylab("Mean") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
plot.title = element_text(hjust = 0.5))
#> geom_path: Each group consists of only one observation. Do you need to adjust
#> the group aesthetic?
Created on 2021-03-15 by the reprex package (v1.0.0)
I'd use facets instead. With this for looping you're kind of making your own life really complicated.
library(tidyverse)
df %>%
# makes new variables
separate(height, into = c("x", "height", "index")) %>%
ggplot(aes(x = index, y = mean, group = 1)) +
geom_ribbon(aes(x = index, ymax = max, ymin = min, color = "min-max"), alpha = 0.6, fill = "skyblue", show.legend = TRUE) +
geom_line() +
geom_point() +
geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd, color = "mean±sd"), width = .2, position = position_dodge(0.05)) +
scale_fill_manual(values = "skyblue", guide = FALSE) +
labs(y = NULL) + # use NULL, not "" !!
facet_grid(~height) +
theme(axis.text.x = element_text(angle = 90), legend.position = "none")
#> Warning: Expected 3 pieces. Additional pieces discarded in 21 rows [1, 2, 3, 4,
#> 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
#> geom_path: Each group consists of only one observation. Do you need to adjust
#> the group aesthetic?
Created on 2021-03-15 by the reprex package (v1.0.0)
I personally would prefer the same scale in this case, but if you want flexible y scales, use facet_wrap instead
df %>%
# makes new variables
separate(height, into = c("x", "height", "index")) %>%
ggplot(aes(x = index, y = mean, group = 1)) +
geom_ribbon(aes(x = index, ymax = max, ymin = min, color = "min-max"), alpha = 0.6, fill = "skyblue", show.legend = TRUE) +
geom_line() +
geom_point() +
geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd, color = "mean±sd"), width = .2, position = position_dodge(0.05)) +
scale_fill_manual(values = "skyblue", guide = FALSE) +
labs(y = NULL) +
facet_wrap(~height, scales = "free_y", nrow = 1) +
theme(axis.text.x = element_text(angle = 90), legend.position = "none")

Remove points with 0 density (no data) in stat_density_2d(geom = 'point')

I have two dataframes, one which I want to make a stat_density_2d plot using a 'raster' geom and one in which I want to use a 'point' geom. For the point geom I want to remove any point where there is no data though, as measured by a point size of 0.
The following is my code:
library(tidyverse)
set.seed(1)
#tibble for raster density plot
df <- tibble(x = runif(1000000, min = -7, max = 5),
y = runif(1000000, min = 0, max = 1000))
#tibble for point density plot
df2 <- tibble(x = runif(20000, min = -2, max = 2),
y = runif(20000, min = 0, max = 500))
#create the density plot
p1 <- ggplot(NULL, aes(x=x, y=y) ) +
stat_density_2d(data = df, aes(fill = stat(density)), geom = "raster", contour = FALSE) +
scale_fill_gradient(low="transparent", high="red") +
stat_density_2d(data = df2, geom = "point", aes(size = ..density..), n = 40, contour = FALSE) +
theme_bw() +
theme(text=element_text(size=18)) +
ylim(0, 1000) + xlim(-7, 5)
p1
which returns:
But where the points are smallest (outside the bounds specified in the df2 tibble) I don't want any density points to be shown. Is there anyway to remove these?
Here's a hack, though I don't know how robust it is to differences in data.
BLUF: add scale_radius(range=c(-1,6)).
I reduced your data a lot so that it doesn't take 5 minutes to render.
set.seed(1)
df <- tibble(x = runif(1000, min = -7, max = 5),
y = runif(1000, min = 0, max = 1000))
df2 <- tibble(x = runif(20, min = -2, max = 2),
y = runif(20, min = 0, max = 500))
Four plots:
Your code (my data), no other change;
scale_radius();
scale_radius(range = c(-0.332088004, 6)); and
scale_radius(range = c(-1, 6)).
This is surely a hack, and I don't know how to find a more precise way of filtering out specific levels.
The modified code:
p1 <- ggplot(NULL, aes(x=x, y=y) ) +
stat_density_2d(data = df, aes(fill = stat(density)), geom = "raster", contour = FALSE) +
scale_fill_gradient(low="transparent", high="red") +
stat_density_2d(data = df2, geom = "point", aes(size = ..density..), n = 40, contour = FALSE) +
theme_bw() +
# scale_radius() +
# scale_radius(range = c(-0.332088004, 6)) +
scale_radius(range = c(-1, 6)) +
theme(text=element_text(size=18)) +
ylim(0, 1000) + xlim(-7, 5)

Why do geom_density and stat_density(geom = "line") give different results?

In the following illustration, why do geom_density and stat_density(geom = "line") give different results?
library(ggplot2)
df <- data.frame(
x.values = c(
rnorm(100, mean = 1, sd = 1),
rnorm(100, mean = 4, sd = 1),
rnorm(100, mean = 7, sd = 1),
rnorm(100, mean = 10, sd = 1)
),
mean.values = sort(rep(c(1, 4, 7, 10), 100))
)
p <- ggplot(df, aes(x = x.values, color = mean.values, group = mean.values))
p + geom_density()
p + stat_density(geom = "line")
It's a difference in the position argument. The default in stat_density is position = "stack", whilst with geom_density() it is position = "identity".
If you call p + stat_density(geom = "line", position = "identity") you get the same as geom_density():

Using position_dodge within stat_summary for means and confidence intervals?

I'm trying to show data of two groups. I am using the ggplot2 package to graph the data and using stat_summary() to obtain a point estimate (mean) and 90% CI within the plot of the data. What I'd like is for the mean and confidence interval be structured off to the right of the points representing the distribution of the data. Currently, stat_summary() will simply impose the mean and CI over top of the distribution.
Here is an example of data that I am working with:
set.seed(9909)
Subjects <- 1:100
values <- c(rnorm(n = 50, mean = 30, sd = 5), rnorm(n = 50, mean = 35, sd = 8))
data <- cbind(Subjects, values)
group1 <- rep("group1", 50)
group2 <- rep("group2", 50)
group <- c(group1, group2)
data <- data.frame(data, group)
data
And this is what my current ggplot2 code looks like (distribution as points with the mean and 90% CI overlaid on top for each group):
ggplot(data, aes(x = group, y = values, group = 1)) +  
geom_point() +
stat_summary(fun.y = "mean", color = "red", size = 5, geom = "point") +
stat_summary(fun.data = "mean_cl_normal", color = "red", size = 2, geom = "errorbar", width = 0, fun.args = list(conf.int = 0.9)) + theme_bw()
Is it possible to get the mean and confidence intervals to position_dodge to the right of their respective groups?
You can use position_nudge:
ggplot(data, aes(x = group, y = values, group = 1)) +
geom_point() +
stat_summary(fun.y = "mean", color = "red", size = 5, geom = "point",
position=position_nudge(x = 0.1, y = 0)) +
stat_summary(fun.data = "mean_cl_normal", color = "red", size = 2,
geom = "errorbar", width = 0, fun.args = list(conf.int = 0.9),
position=position_nudge(x = 0.1, y = 0)) +
theme_bw()

Resources