add significance asterisk manually in ggplot2 - r

I have a time series DataFrame in R.
There are 4 groups and the data of each group (variable) is acquired at 3 different timepoints.
Group Timepoint Variable
A 1 1.4705745
B 1 4.6090900
C 1 2.2480962
D 1 1.6443650
E 1 4.4812444
A 2 0.8026552
B 2 4.7803944
C 2 1.3743527
D 2 4.0399467
E 2 3.5651057
A 3 4.7275369
B 3 2.4491532
C 3 3.9508347
D 3 3.4278974
E 3 0.6917490
I made a line plot using the following code,
plot_data <- ggplot(data, aes(x = Timepoint, y = Variable, color = Group, group = Group))+geom_line()+
scale_color_discrete("Group")+
scale_y_continuous(limits = c(0, 6))
plot_data
but also want to add significant asterisk, for instance, like that.
Is there any way to add asterisk to the plot manually?

You can use annotate like this:
library(ggplot2)
plot_data <- ggplot(data, aes(x = Timepoint, y = Variable, color = Group, group = Group))+geom_line()+
scale_color_discrete("Group")+
scale_y_continuous(limits = c(0, 6)) +
annotate('text', x = 1, y = c(5,5.2), label='"*"', parse=TRUE, color = c("pink", "yellow")) +
annotate('text', x = 3, y = c(5,5.2), label='"*"', parse=TRUE, color = c("red", "green"))
plot_data
Created on 2022-09-02 with reprex v2.0.2
To get the exact same colors as in standard ggplot you can check that by using hue_pal like this:
library(ggplot2)
library(scales)
show_col(hue_pal()(5))
plot_data <- ggplot(data, aes(x = Timepoint, y = Variable, color = Group, group = Group))+geom_line()+
scale_color_discrete("Group")+
scale_y_continuous(limits = c(0, 6)) +
annotate('text', x = 1, y = c(5,5.2), label='"*"', parse=TRUE, color = c("#A3A500", "#E76BF3")) +
annotate('text', x = 3, y = c(5,5.2), label='"*"', parse=TRUE, color = c("#F8766D", "#00BF7D"))
plot_data
Created on 2022-09-02 with reprex v2.0.2

Related

How to plot several columns of a matrix in the same plot in R?

I have a matrix called XY with the following entries:
0 1 1 3
2 4 2 3
4 2 3 5
6 2 5 6
I want to plot (in the same graph) columns 1 and 2 as the x and y axes respectively and columns 3 and 4 being the x and y axes respectively. I am trying the following code:
plot(XY[,1],XY[,2],type="l",col="red")
lines(XY[,3],XY[,4],col="green")
points(XY[,1],XY[,2],type="p",col="red")
points(XY[,3],XY[,4],type="p",col="green")
is there a more general way to do this graph without having to repeat the same code?
Thanks
If you don't mind using ggplot rather than base plot (given the tags I think you don't), you can do:
library(ggplot2)
XY <- data.frame(XY)
colnames(XY)[1:4] <- c(paste0("var",1:4))
ggplot(data = XY) +
geom_point(aes(x = var1, y = var2), color = "red") +
geom_line(aes(x = var1, y = var2), color = "red") +
geom_point(aes(x = var3, y = var4), color = "green") +
geom_line(aes(x = var3, y = var4), color = "green")
The column name is a suggestion, maybe you have better variable names
Update
In order to have a legend, long formatted data are easier to handle. You can try something like that:
df <- rbind(
cbind(XY[,c(1,2)], "group1"),
cbind(XY[,c(3,4)], "group2")
)
cols <- colnames(df)
Change "group1" and "group2" to relevent category names.
And you can plot like that:
ggplot(data = df, aes_string(x = cols[1], y = cols[2], col = cols[3])) +
geom_point() +
geom_line() +
labs(color = "My colors")
I use aes_string because column names are quoted this time.
Try base R graphics matplot to avoid repeating code which both the base R and ggplot examples in their own different ways do.
matplot(XY[,c(1, 3)], XY[,c(2, 4)], type = "l", lty = 1, col = c("red", "green"), pch = 1,
xlab = "X label", ylab = "Y label")
matpoints(XY[,c(1, 3)], XY[,c(2, 4)], type = "p", pch = 1, col = c("red", "green"))
This gives you:

Free colour scales in facet_grid

Say I have the following data frame:
# Set seed for RNG
set.seed(33550336)
# Create toy data frame
loc_x <- c(a = 1, b = 2, c = 3)
loc_y <- c(a = 3, b = 2, c = 1)
scaling <- c(temp = 100, sal = 10, chl = 1)
df <- expand.grid(loc_name = letters[1:3],
variables = c("temp", "sal", "chl"),
season = c("spring", "autumn")) %>%
mutate(loc_x = loc_x[loc_name],
loc_y = loc_y[loc_name],
value = runif(nrow(.)),
value = value * scaling[variables])
which looks like,
# > head(df)
# loc_name variables season loc_x loc_y value
# 1 a temp spring 1 3 86.364697
# 2 b temp spring 2 2 35.222573
# 3 c temp spring 3 1 52.574082
# 4 a sal spring 1 3 0.667227
# 5 b sal spring 2 2 3.751383
# 6 c sal spring 3 1 9.197086
I want to plot these data in a facet grid using variables and season to define panels, like this:
g <- ggplot(df) + geom_point(aes(x = loc_name, y = value), size = 5)
g <- g + facet_grid(variables ~ season)
g
As you can see, different variables have very different scales. So, I use scales = "free" to account for this.
g <- ggplot(df) + geom_point(aes(x = loc_name, y = value), size = 5)
g <- g + facet_grid(variables ~ season, scales = "free")
g
Mucho convenient. Now, say I want to do this, but plot the points by loc_x and loc_y and have value represented by colour instead of y position:
g <- ggplot(df) + geom_point(aes(x = loc_x, y = loc_y, colour = value),
size = 5)
g <- g + facet_grid(variables ~ season, scales = "free")
g <- g + scale_colour_gradient2(low = "#3366CC",
mid = "white",
high = "#FF3300",
midpoint = 50)
g
Notice that the colour scales are not free and, like the first figure, values for sal and chl cannot be read easily.
My question: is it possible to do an equivalent of scales = "free" but for colour, so that each row (in this case) has a separate colour bar? Or, do I have to plot each variable (i.e., row in the figure) and patch them together using something like cowplot?
Using the development version of dplyr:
library(dplyr)
library(purrr)
library(ggplot2)
library(cowplot)
df %>%
group_split(variables, season) %>%
map(
~ggplot(., aes(loc_x, loc_y, color = value)) +
geom_point(size = 5) +
scale_colour_gradient2(
low = "#3366CC",
mid = "white",
high = "#FF3300",
midpoint = median(.$value)
) +
facet_grid(~ variables + season, labeller = function(x) label_value(x, multi_line = FALSE))
) %>%
plot_grid(plotlist = ., align = 'hv', ncol = 2)

Enforce same color palette for `color` and `fill` of a subset of data

Having the following sample dataset:
set.seed(20)
N <- 20
df1 <- data.frame(x = rnorm(N),
y = rnorm(N),
grp = paste0('grp_', sample(1:500, N, T)),
lab = sample(letters, N, T))
# x y grp lab
# 1 1.163 0.237 grp_104 w
# 2 -0.586 -0.144 grp_448 y
# 3 1.785 0.722 grp_31 m
# 4 -1.333 0.370 grp_471 z
# 5 -0.447 -0.242 grp_356 o
I want to plot all points but label only subset of them (say, those df1$x>0). It works fine when I use the same color=grp aesthetics for both geom_point and geom_text:
ggplot(df1, aes(x=x,y=y,color=grp))+
geom_point(size=4) +
geom_text(aes(label=lab),data=df1[df1$x>1,],size=5,hjust=1,vjust=1)+
theme(legend.position="none")
But if I want to change points design to fill=grp, colors of labels do not match anymore:
ggplot(df1, aes(x=x,y=y))+
geom_point(aes(fill=grp),size=4,shape=21) +
geom_text(aes(label=lab,color=grp),data=df1[df1$x>1,],size=5,hjust=1,vjust=1)+
theme(legend.position="none")
I understand palette is different because levels of the subset are not the same as levels of the whole dataset. But what would be the simplest solution to enforce using the same palette?
The issue arises from different factor levels for the text and fill colours. We can avoid dropping unused factor levels by using drop = FALSE inside scale_*_discrete:
ggplot(df1, aes(x=x,y=y))+
geom_point(aes(fill=grp),size=4,shape=21) +
geom_text(aes(label=lab,color=grp),data=df1[df1$x>1,],size=5,hjust=1,vjust=1)+
theme(legend.position="none") +
scale_fill_discrete(drop = F) +
scale_colour_discrete(drop = F)
Update
With your real data we need to make sure that grp is in fact a factor.
# Load sample data
load("df1.Rdat")
# Make sure `grp` is a factor
library(tidyverse)
df1 <- df1 %>% mutate(grp = factor(grp))
# Or in base R
# df1$grp = factor(df1$grp)
# Same as before
ggplot(df1, aes(x=x,y=y))+
geom_point(aes(fill=grp),size=4,shape=21) +
geom_text(aes(label=lab,color=grp),data=df1[df1$x>1,],size=5,hjust=1,vjust=1)+
theme(legend.position="none") +
scale_fill_discrete(drop = F) +
scale_colour_discrete(drop = F)
One way is to leave the colour / fill palettes alone, & set all unwanted labels to be transparent instead:
ggplot(df1, aes(x = x, y = y)) +
geom_point(aes(fill = grp), size = 4, shape = 21) +
geom_text(aes(label = lab, color = grp,
alpha = x > 1),
size = 5, hjust = 1, vjust = 1) +
scale_alpha_manual(values = c("TRUE" = 1, "FALSE" = 0)) +
theme(legend.position = "none")

ggplot2: Combining group, color and linetype

I have a data base with 3 factors (condition, measure and time) and would like to plot them using the x-axis, the color/group and the linetype.
As an exemple, my data looks like this:
DT <- data.frame(condition = rep(c("control", "experimental"), each = 4),
measure = rep(c("A", "A", "B", "B"), 2),
time = rep(c("pre-test", "post-test"), 4),
score = 1:8)
> DT
condition measure time score
1 control A pre-test 1
2 control A post-test 2
3 control B pre-test 3
4 control B post-test 4
5 experimental A pre-test 5
6 experimental A post-test 6
7 experimental B pre-test 7
8 experimental B post-test 8
My goal is to draw a graph like this:
I tried:
ggplot(DT, aes(time, score, group = measure, color = measure, linetype = condition)) +
geom_line() +
geom_point()
But it returns this error:
Error: geom_path: If you are using dotted or dashed lines, colour, size and linetype must be constant over the line
What am I missing?
You want to use
ggplot(DT, aes(time, score, group = interaction(measure, condition),
color = measure, linetype = condition)) +
geom_line() + geom_point()
because the actual grouping is not only by measure but also by condition. When grouping by measure alone, I guess it's asking for kind of parallelograms rather than lines.
data.frame(
condition = rep(c("control", "experimental"), each = 4),
measure = rep(c("A", "A", "B", "B"), 2),
time = rep(c("pre-test", "post-test"), 4),
score = 1:8
) -> DT
DT_wide <- tidyr::spread(DT, time, score)
ggplot() +
geom_segment(
data = DT_wide,
aes(
x = "pre-test", xend = "post-test",
y = `pre-test`, yend = `post-test`,
color = measure,
linetype = condition
)
) +
geom_point(
data = DT,
aes(time, score, color = measure)
)

How to add a moving vline to gganimate in addition to the animated histogram?

I am trying to add a line to my animation, but I couldn't make it work using the concept of frame. This is a reproducible example:
df <- read.table(header = TRUE, text = 'key value bins maxIntensity
A 4 0 1
A 1 1 1
A 0 2 1
B 3 0 2
B 2 1 2
B 5 2 2
D 2 0 1
D 3 1 1
D 0 2 1')
the animation can be created using gganimate package:
library('animation')
library('gganimate')
par(bg = "white")
g <- ggplot(df, aes(xmin = df$bins, xmax = df$bins + 1, ymin = 0, ymax = df$value, frame = df$key))
g <- g + geom_rect(fill=alpha("Orange", alpha = 1))
g <- g + labs(title = "Test Histogram")
g <- g + scale_y_continuous(labels = scales::comma)
gganimate(g, ani.width=400, ani.height=400, interval = .4, "test.gif")
Which works just fine.
Now I would like to add a line, at a different location for each frame. The location is specified in df$maxIntensity.
So, I think I should add this:
g <- g + geom_vline(xintercept = df$maxIntensity, lty=3, color = "black")
but that simply adds all the lines at once, at each frame. Any idea how add one line to each frame?
Making a reproducible example made me have a much faster code on which I could try a plenty of different options. (My original code would take ~10 minutes to show me any results.)
So, the key is to add frame again to geom_vline:
g <- g + geom_vline(aes(xintercept = df$maxIntensity, frame = df$key))
So, the code would look like:
par(bg = "white")
g <- ggplot(df, aes(xmin = df$bins, xmax = df$bins + 1, ymin = 0, ymax = df$value, frame = df$key))
g <- g + geom_rect(fill=alpha("Orange", alpha = 1))
g <- g + geom_vline(aes(xintercept = df$maxIntensity, frame = df$key), lty=2, size = 1, color = "black")
g <- g + labs(title = "Test Histogram")
g <- g + scale_y_continuous(labels = scales::comma)
gganimate(g, ani.width=400, ani.height=400, interval = .4, "test.gif")

Resources