Say I have the following data frame:
# Set seed for RNG
set.seed(33550336)
# Create toy data frame
loc_x <- c(a = 1, b = 2, c = 3)
loc_y <- c(a = 3, b = 2, c = 1)
scaling <- c(temp = 100, sal = 10, chl = 1)
df <- expand.grid(loc_name = letters[1:3],
variables = c("temp", "sal", "chl"),
season = c("spring", "autumn")) %>%
mutate(loc_x = loc_x[loc_name],
loc_y = loc_y[loc_name],
value = runif(nrow(.)),
value = value * scaling[variables])
which looks like,
# > head(df)
# loc_name variables season loc_x loc_y value
# 1 a temp spring 1 3 86.364697
# 2 b temp spring 2 2 35.222573
# 3 c temp spring 3 1 52.574082
# 4 a sal spring 1 3 0.667227
# 5 b sal spring 2 2 3.751383
# 6 c sal spring 3 1 9.197086
I want to plot these data in a facet grid using variables and season to define panels, like this:
g <- ggplot(df) + geom_point(aes(x = loc_name, y = value), size = 5)
g <- g + facet_grid(variables ~ season)
g
As you can see, different variables have very different scales. So, I use scales = "free" to account for this.
g <- ggplot(df) + geom_point(aes(x = loc_name, y = value), size = 5)
g <- g + facet_grid(variables ~ season, scales = "free")
g
Mucho convenient. Now, say I want to do this, but plot the points by loc_x and loc_y and have value represented by colour instead of y position:
g <- ggplot(df) + geom_point(aes(x = loc_x, y = loc_y, colour = value),
size = 5)
g <- g + facet_grid(variables ~ season, scales = "free")
g <- g + scale_colour_gradient2(low = "#3366CC",
mid = "white",
high = "#FF3300",
midpoint = 50)
g
Notice that the colour scales are not free and, like the first figure, values for sal and chl cannot be read easily.
My question: is it possible to do an equivalent of scales = "free" but for colour, so that each row (in this case) has a separate colour bar? Or, do I have to plot each variable (i.e., row in the figure) and patch them together using something like cowplot?
Using the development version of dplyr:
library(dplyr)
library(purrr)
library(ggplot2)
library(cowplot)
df %>%
group_split(variables, season) %>%
map(
~ggplot(., aes(loc_x, loc_y, color = value)) +
geom_point(size = 5) +
scale_colour_gradient2(
low = "#3366CC",
mid = "white",
high = "#FF3300",
midpoint = median(.$value)
) +
facet_grid(~ variables + season, labeller = function(x) label_value(x, multi_line = FALSE))
) %>%
plot_grid(plotlist = ., align = 'hv', ncol = 2)
Related
I have a time series DataFrame in R.
There are 4 groups and the data of each group (variable) is acquired at 3 different timepoints.
Group Timepoint Variable
A 1 1.4705745
B 1 4.6090900
C 1 2.2480962
D 1 1.6443650
E 1 4.4812444
A 2 0.8026552
B 2 4.7803944
C 2 1.3743527
D 2 4.0399467
E 2 3.5651057
A 3 4.7275369
B 3 2.4491532
C 3 3.9508347
D 3 3.4278974
E 3 0.6917490
I made a line plot using the following code,
plot_data <- ggplot(data, aes(x = Timepoint, y = Variable, color = Group, group = Group))+geom_line()+
scale_color_discrete("Group")+
scale_y_continuous(limits = c(0, 6))
plot_data
but also want to add significant asterisk, for instance, like that.
Is there any way to add asterisk to the plot manually?
You can use annotate like this:
library(ggplot2)
plot_data <- ggplot(data, aes(x = Timepoint, y = Variable, color = Group, group = Group))+geom_line()+
scale_color_discrete("Group")+
scale_y_continuous(limits = c(0, 6)) +
annotate('text', x = 1, y = c(5,5.2), label='"*"', parse=TRUE, color = c("pink", "yellow")) +
annotate('text', x = 3, y = c(5,5.2), label='"*"', parse=TRUE, color = c("red", "green"))
plot_data
Created on 2022-09-02 with reprex v2.0.2
To get the exact same colors as in standard ggplot you can check that by using hue_pal like this:
library(ggplot2)
library(scales)
show_col(hue_pal()(5))
plot_data <- ggplot(data, aes(x = Timepoint, y = Variable, color = Group, group = Group))+geom_line()+
scale_color_discrete("Group")+
scale_y_continuous(limits = c(0, 6)) +
annotate('text', x = 1, y = c(5,5.2), label='"*"', parse=TRUE, color = c("#A3A500", "#E76BF3")) +
annotate('text', x = 3, y = c(5,5.2), label='"*"', parse=TRUE, color = c("#F8766D", "#00BF7D"))
plot_data
Created on 2022-09-02 with reprex v2.0.2
I am trying to plot multiple paths in a gganimate plot. I want the lines to fade out over the last N frames (e.g. N=5 in this example).
The data look like this:
set.seed(27)
df <- data.frame(Frame = rep(1:10, 3),
id = factor(rep(1:3, each = 10)),
x = runif(30),
y = runif(30))
head(df)
Frame id x y
1 1 1 0.97175023 0.14257923
2 2 1 0.08375751 0.47864658
3 3 1 0.87386992 0.05182206
4 4 1 0.32923136 0.25514379
5 5 1 0.22227551 0.14262912
6 6 1 0.40164822 0.48288482
I tried to make the plot using shadow_mark, but this doesn't appear to have the lines fade out over time.
df %>%
ggplot(aes(x = x, y = y, group = id, color = id)) +
geom_path() +
geom_point()+
scale_color_manual(values=c("red","blue","green")) +
transition_reveal(along = Frame) +
shadow_mark(size = 0.75) +
theme_void()
This just produces the below:
Is there a way to make these lines fade. Ideally, I'm just plotting a rolling path of N frames.
Is this something like what you're looking for? Adapted from the post mentioned in the comments. You don't need to use transition_reveal() if you use geom_segment().
library(gganimate)
library(dplyr)
library(tidyr)
set.seed(27)
n <- 10
df <- data.frame(Frame = rep(1:n, 3),
id = factor(rep(1:3, each = n)),
x = runif(3*n),
y = runif(3*n))
newdf <- df %>%
uncount(n, .id = "newframe") %>%
filter(Frame <= newframe) %>%
arrange(newframe, Frame) %>%
group_by(newframe, id) %>%
mutate(x_lag = lag(x),
y_lag = lag(y),
tail = last(Frame) - Frame,
# Make the points solid for 1 frame then alpha 0
point_alpha = if_else(tail == 0, 1, 0),
# Make the lines fade out over 3 frames
segment_alpha = pmax(0, (3-tail)/3)) %>%
ungroup()
ggplot(newdf,
aes(x = y, y = x, xend = y_lag, yend = x_lag, group = Frame, color = id)) +
geom_segment(aes(alpha = segment_alpha)) +
geom_point(aes(alpha = point_alpha)) +
scale_alpha(range = c(0,1)) +
guides(alpha = F) +
transition_manual(newframe) +
theme_void() +
scale_color_manual(values = c("red","blue","green"))
# Create the Data Frame
library(tidyverse)
library(ggQC)
set.seed(5555)
Golden_Egg_df <- data.frame(month = 1:12,
egg_diameter = rnorm(n=12, mean=1.5, sd=0.2)) %>%
mutate(grp = c(rep("A", 3), rep("B", 9)))
Golden_Egg_df$egg_diameter[3] <- 5
# Determine the control limit values (red lines)
p <- ggplot(Golden_Egg_df, aes(x = month, y = egg_diameter)) +
geom_point() +
geom_line() +
stat_QC(method = "XmR")
pb <- ggplot_build(p)
thres <- range(pb$data[[3]]$yintercept)
# Circle anything outside the control limits (red lines)
p + geom_point(
data = subset(Golden_Egg_df,
egg_diameter > max(thres) | egg_diameter < min(thres)),
shape = 21,
size = 4,
col = "red"
)
The code chunk above determines the y-values of the control limits (red lines) from the ggplot_build() function. It then draws red circles around outliers. This works great until I facet the plot. It's because the logic of thres <- range(pb$data[[3]]$yintercept) isn't "smart" enough to wade through the different facet groupings.
# ONLY ONE 'Y-INTERCEPT' RANGE HERE TO WORRY ABOUT WITHOUT FACETING
#> $`data`[[3]]
#> yintercept y x label
#> 1 -0.2688471 -0.2688471 -Inf LCL
#> 2 3.7995203 3.7995203 -Inf UCL
#> 3 -0.2688471 -0.2688471 Inf -0.3
#> 4 3.7995203 3.7995203 Inf 3.8
# MULTIPLE 'Y-INTERCEPT' RANGES HERE TO WORRY ABOUT WITH FACETING
#> $`data`[[3]]
#> yintercept y x label
#> 1 -0.8759612 -0.8759612 -Inf LCL
#> 2 4.5303358 4.5303358 -Inf UCL
#> 3 -0.8759612 -0.8759612 Inf -0.9
#> 4 4.5303358 4.5303358 Inf 4.5
#> 5 1.2074161 1.2074161 -Inf LCL
#> 6 1.9521532 1.9521532 -Inf UCL
#> 7 1.2074161 1.2074161 Inf 1.2
#> 8 1.9521532 1.9521532 Inf 2
How do I get my code block below to work properly and circle the outliers? I obviously need a more sophisticated thres2, that can recognize there are different groupings of control limits (red lines) between the different facets.
# Determine the control limit values (red lines)
Golden_Egg_df$egg_diameter[11] <- 5
p2 <- ggplot(Golden_Egg_df, aes(x = month, y = egg_diameter)) +
geom_point() +
geom_line() +
stat_QC(method = "XmR") +
facet_grid(~ grp, scales = "free_x", space = "free_x") +
scale_x_continuous(breaks = 1:12, labels = month.abb)
pb2 <- ggplot_build(p2)
thres2 <- range(pb2$data[[3]]$yintercept)
thres2
#> [1] -2.274056 7.445141
# Circle anything outside the control limits (red lines)
p2 + geom_point(
data = subset(Golden_Egg_df,
egg_diameter > max(thres2) | egg_diameter < min(thres2)),
shape = 21,
size = 4,
col = "red"
)
I think the best way is to get the ranges in the same data.frame as your data. I'am not sure if this is the most elegant solution, but it works with your example:
library(tidyverse)
library(ggQC)
set.seed(5555)
Golden_Egg_df <- data.frame(month = 1:12,
egg_diameter = rnorm(n=12, mean=1.5, sd=0.2)) %>%
mutate(grp = c(rep("A", 3), rep("B", 9)))
Golden_Egg_df$egg_diameter[3] <- 5
Golden_Egg_df$egg_diameter[11] <- 5
# create the plot
p2 <- ggplot(Golden_Egg_df, aes(x = month,
y = egg_diameter)) +
geom_point() +
geom_line() +
stat_QC(method = "XmR") +
facet_grid(~ grp,
scales = "free_x",
space = "free_x") +
scale_x_continuous(breaks = 1:12,
labels = month.abb)
# get all the info about the plot
pb2 <- ggplot_build(p2)
# extract the UCL and LCL for each plot (facet)
Golden_Egg_df <- Golden_Egg_df %>%
mutate(min = ifelse(grp == "A",
min(pb2$data[[3]]$yintercept[1:4]), # LCL of 1st plot
min(pb2$data[[3]]$yintercept[5:8])), # LCL of 1st plot
max = ifelse(grp == "A",
max(pb2$data[[3]]$yintercept[1:4]), # UCL 2nd plot
max(pb2$data[[3]]$yintercept[5:8]))) # UCL 2nd plot
# add the circled outlier
p2 + geom_point(data = subset(Golden_Egg_df,
egg_diameter > max |
egg_diameter < min),
shape = 21,
size = 4,
col = "red")
Cheers, Rico
Having the following sample dataset:
set.seed(20)
N <- 20
df1 <- data.frame(x = rnorm(N),
y = rnorm(N),
grp = paste0('grp_', sample(1:500, N, T)),
lab = sample(letters, N, T))
# x y grp lab
# 1 1.163 0.237 grp_104 w
# 2 -0.586 -0.144 grp_448 y
# 3 1.785 0.722 grp_31 m
# 4 -1.333 0.370 grp_471 z
# 5 -0.447 -0.242 grp_356 o
I want to plot all points but label only subset of them (say, those df1$x>0). It works fine when I use the same color=grp aesthetics for both geom_point and geom_text:
ggplot(df1, aes(x=x,y=y,color=grp))+
geom_point(size=4) +
geom_text(aes(label=lab),data=df1[df1$x>1,],size=5,hjust=1,vjust=1)+
theme(legend.position="none")
But if I want to change points design to fill=grp, colors of labels do not match anymore:
ggplot(df1, aes(x=x,y=y))+
geom_point(aes(fill=grp),size=4,shape=21) +
geom_text(aes(label=lab,color=grp),data=df1[df1$x>1,],size=5,hjust=1,vjust=1)+
theme(legend.position="none")
I understand palette is different because levels of the subset are not the same as levels of the whole dataset. But what would be the simplest solution to enforce using the same palette?
The issue arises from different factor levels for the text and fill colours. We can avoid dropping unused factor levels by using drop = FALSE inside scale_*_discrete:
ggplot(df1, aes(x=x,y=y))+
geom_point(aes(fill=grp),size=4,shape=21) +
geom_text(aes(label=lab,color=grp),data=df1[df1$x>1,],size=5,hjust=1,vjust=1)+
theme(legend.position="none") +
scale_fill_discrete(drop = F) +
scale_colour_discrete(drop = F)
Update
With your real data we need to make sure that grp is in fact a factor.
# Load sample data
load("df1.Rdat")
# Make sure `grp` is a factor
library(tidyverse)
df1 <- df1 %>% mutate(grp = factor(grp))
# Or in base R
# df1$grp = factor(df1$grp)
# Same as before
ggplot(df1, aes(x=x,y=y))+
geom_point(aes(fill=grp),size=4,shape=21) +
geom_text(aes(label=lab,color=grp),data=df1[df1$x>1,],size=5,hjust=1,vjust=1)+
theme(legend.position="none") +
scale_fill_discrete(drop = F) +
scale_colour_discrete(drop = F)
One way is to leave the colour / fill palettes alone, & set all unwanted labels to be transparent instead:
ggplot(df1, aes(x = x, y = y)) +
geom_point(aes(fill = grp), size = 4, shape = 21) +
geom_text(aes(label = lab, color = grp,
alpha = x > 1),
size = 5, hjust = 1, vjust = 1) +
scale_alpha_manual(values = c("TRUE" = 1, "FALSE" = 0)) +
theme(legend.position = "none")
I am trying to add a line to my animation, but I couldn't make it work using the concept of frame. This is a reproducible example:
df <- read.table(header = TRUE, text = 'key value bins maxIntensity
A 4 0 1
A 1 1 1
A 0 2 1
B 3 0 2
B 2 1 2
B 5 2 2
D 2 0 1
D 3 1 1
D 0 2 1')
the animation can be created using gganimate package:
library('animation')
library('gganimate')
par(bg = "white")
g <- ggplot(df, aes(xmin = df$bins, xmax = df$bins + 1, ymin = 0, ymax = df$value, frame = df$key))
g <- g + geom_rect(fill=alpha("Orange", alpha = 1))
g <- g + labs(title = "Test Histogram")
g <- g + scale_y_continuous(labels = scales::comma)
gganimate(g, ani.width=400, ani.height=400, interval = .4, "test.gif")
Which works just fine.
Now I would like to add a line, at a different location for each frame. The location is specified in df$maxIntensity.
So, I think I should add this:
g <- g + geom_vline(xintercept = df$maxIntensity, lty=3, color = "black")
but that simply adds all the lines at once, at each frame. Any idea how add one line to each frame?
Making a reproducible example made me have a much faster code on which I could try a plenty of different options. (My original code would take ~10 minutes to show me any results.)
So, the key is to add frame again to geom_vline:
g <- g + geom_vline(aes(xintercept = df$maxIntensity, frame = df$key))
So, the code would look like:
par(bg = "white")
g <- ggplot(df, aes(xmin = df$bins, xmax = df$bins + 1, ymin = 0, ymax = df$value, frame = df$key))
g <- g + geom_rect(fill=alpha("Orange", alpha = 1))
g <- g + geom_vline(aes(xintercept = df$maxIntensity, frame = df$key), lty=2, size = 1, color = "black")
g <- g + labs(title = "Test Histogram")
g <- g + scale_y_continuous(labels = scales::comma)
gganimate(g, ani.width=400, ani.height=400, interval = .4, "test.gif")