Replicating a color-coded spaghetti plot [duplicate] - r

This question already has answers here:
Create a split violin plot with paired points and proper orientation
(2 answers)
Closed 10 months ago.
This post was edited and submitted for review 10 months ago and failed to reopen the post:
Original close reason(s) were not resolved
In this article: https://www.nature.com/articles/s41591-022-01744-z.epdf
I noticed an interesting plot:
2
Is there a simple way to do this in R?
EDIT: I am aware there are similar questions but none deal with the color-coding scheme that marks the improved / worsened cases.

The see package has a half violin geom like that:
ggplot(data = data.frame(id = rep(1:10, 2),
time = rep(c("A", "B"), each = 10),
value = runif(20)),
aes(time, value)) +
see::geom_violinhalf(aes(group = time, fill = time),
trim = FALSE, flip = 1, alpha = 0.2) +
geom_point(aes(color = time)) +
geom_line(aes(group = id))

You can get arbitrarily close to a chosen chart using ggplot:
ggplot(df, aes(xval, modularity, color = group)) +
geom_polygon(data = densdf, aes( x = y, y = x, fill = group), colour = NA) +
scale_fill_manual(values = c('#c2c2c2', '#fbc5b4')) +
scale_color_manual(values = c('#676767', '#ef453e')) +
geom_path(data = densdf, aes(x = y, y = x), size = 2) +
geom_segment(color = '#c2c2c2', inherit.aes = FALSE, size = 1.5,
data = df2[df2$`Post-treatment` > df2$Baseline,], alpha = 0.8,
aes(x = 1, xend = 2, y = Baseline, yend = `Post-treatment`)) +
geom_segment(color = '#ef453e', inherit.aes = FALSE, size = 1.5, alpha = 0.8,
data = df2[df2$`Post-treatment` < df2$Baseline,],
aes(x = 1, xend = 2, y = Baseline, yend = `Post-treatment`)) +
geom_point(size = 3) +
theme_classic() +
scale_x_continuous(breaks = 1:2, labels = c('Baseline', 'Post-treatment'),
name = '', expand = c(0.3, 0)) +
theme(legend.position = 'none',
text = element_text(size = 18, face = 2),
panel.background = element_rect(fill = NA, color = 'black', size = 1.5))
As long as you are prepared to do some work getting your data into the right format:
set.seed(4)
mod <- c(rnorm(16, 2.5, 0.25))
df <- data.frame(modularity = c(mod, mod + rnorm(16, -0.25, 0.2)),
xval = rep(c(1, 2), each = 16),
group = rep(c('Baseline', 'Post-treatment'), each = 16),
id = factor(rep(1:16, 2)))
df2 <- df %>% tidyr::pivot_wider(id_cols = id, names_from = group,
values_from = modularity)
BLdens <- as.data.frame(density(df$modularity[1:16])[c('x', 'y')])
PTdens <- as.data.frame(density(df$modularity[17:32])[c('x', 'y')])
BLdens$y <- 1 - 0.25 * BLdens$y
PTdens$y <- 2 + 0.25 * PTdens$y
densdf <- rbind(BLdens, PTdens)
densdf$group <- rep(c('Baseline', 'Post-treatment'), each = nrow(BLdens))

Related

Geom_label_repel not properly referencing to the sec.axis

I am working with a ggplot that has two axis: one for the geom_bar component, and the other for the geom_linecomponent. And for this, I am using the sec.axis() command.
I wanted to insert a box to provide the last value of the geom_line component, but I am struggling because I believe that while using the commmand geom_label_repel, the aesthetic being used, is referent to the geom_barcomponent.
I'll provide a similar data to illustrate what I am saying.
df <- data.frame(day = as.character(seq(from = 1, to = 100, by = 1)),
total = rbinom(n=100,30,0.5),
prop = runif(100))
df <- df %>% arrange(df, by = day)
df$`percentage` <- label_percent(accuracy = 0.01)(df$prop)
ggplot(data = df,
aes(x = day, y = total)) +
geom_bar(stat = "identity", fill = "lightgreen", width = 0.35) +
geom_line(data = df,
aes(x = day, y = (prop)*15, group = 1),
color = "red", size = 1,inherit.aes = TRUE) +
scale_y_continuous(
labels = function(x) format(x, scientific = FALSE),
#breaks = seq(from = 0, to = 10000000,by = 100000),
sec.axis = sec_axis(trans = ~./15,
name = "Secondary axis",
breaks = seq(from = 0, to = 10, by = 0.1),
scales::percent))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5))+
geom_label_repel(data=df[nrow(df),],
aes(x = day,
y = prop*1,
label = round(prop*100,2)),
color = 'red',
segment.alpha = 0.5) +
scale_x_discrete(expand = expansion(add = c(0, 7)))
Which outputs the following image:
As you can tell, it works well in regards to obtaining the last number of the prop column, which is intended, but it is not automatically placed beside the geom_line.
I have tried messing with the nudge_xand nudge_y commands but it didn't lead me to anywhere, given the fact that I want to have this "number placement" automatic.
Can anyone help?
The sec.axis is in some ways just decorative. ggplot is plotting everything by the main axis. To make the label follow the line, make the same transform as in your geom_line call (y = prop*15):
library(tidyverse)
library(ggrepel)
df <- data.frame(day = as.character(seq(from = 1, to = 100, by = 1)),
total = rbinom(n=100,30,0.5),
prop = runif(100))
df <- df %>% arrange(df, by = day)
df$`percentage` <- scales::label_percent(accuracy = 0.01)(df$prop)
ggplot(data = df,
aes(x = day, y = total)) +
geom_bar(stat = "identity", fill = "lightgreen", width = 0.35) +
geom_line(data = df,
aes(x = day, y = (prop)*15, group = 1),
color = "red", size = 1,inherit.aes = TRUE) +
scale_y_continuous(
labels = function(x) format(x, scientific = FALSE),
#breaks = seq(from = 0, to = 10000000,by = 100000),
sec.axis = sec_axis(trans = ~./15,
name = "Secondary axis",
breaks = seq(from = 0, to = 10, by = 0.1),
scales::percent))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5))+
geom_label_repel(data=df[nrow(df),],
aes(x = day,
y = prop*15,
label = round(prop*100,2)),
color = 'red',
segment.alpha = 0.5) +
scale_x_discrete(expand = expansion(add = c(0, 7)))
#> Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
#> ℹ Please use `linewidth` instead.

How to predefine legend colours based on value range using ggplot2 and RColorBrewer?

I have some data from a range of tests that I'm calculating STEN scores for. I'm aiming to visualise this data in the form of a circular bar plot and would like to set the colour gradient based on a STEN score range. For example, a score of 0-2 would be a very light colour, 2.1-4 light, 4.1-6 moderate, 6.1-8 dark and 8.1-10 very dark. My code below uses the RColorBrewer package and the "YlGn" palette, but I'm stuck on how I can predefine the colour scheme based on the example mentioned above and set this in the plot legend. The example below produces a circular bar plot containing a lowest STEN score of 4.8, so I would like this to be reflected as the moderate colour, where currently its the lightest. I essentially want the legend to show all five STEN score ranges irrespective of whether someone's data scores within each range. Hope this makes sense.
library(tidyverse)
library(RColorBrewer)
set.seed(50)
dat <- data.frame(
par = paste("par", 1:15),
test_1 = round(rnorm(15, mean = 30, sd = 5), 1),
test_2 = round(rnorm(15, mean = 30, sd = 5), 1),
test_3 = round_any(rnorm(15, mean = 90, sd = 5), 2.5),
test_4 = round(rnorm(15, mean = 5.4, sd = 0.3), 1),
test_5 = round(rnorm(15, mean = 17, sd = 1.5), 1)
)
sten_dat <- dat %>%
mutate_if(is.numeric, scale) %>%
mutate(across(c(2:6), ~ . * 2 + 5.5)) %>%
mutate(across(where(is.numeric), round, 1)) %>%
pivot_longer(!par, names_to = "test", values_to = "sten") %>%
filter(par == "par 1")
ggplot(sten_dat) +
geom_col(aes(x = str_wrap(test), y = sten, fill = sten),
position = "dodge2", alpha = 0.7, show.legend = TRUE) +
coord_polar() +
scale_y_continuous(limits = c(-1, 11), breaks = seq(0, 10, 2)) +
scale_fill_gradientn(colours = brewer.pal(name = "YlGn", n = 5))`
Simply add limits to your fill scale:
ggplot(sten_dat) +
geom_col(aes(x = str_wrap(test), y = sten, fill = sten),
position = "dodge2", alpha = 0.7, show.legend = TRUE) +
coord_polar() +
scale_y_continuous(limits = c(-1, 11), breaks = seq(0, 10, 2)) +
scale_fill_gradientn(colours = brewer.pal(name = "YlGn", n = 5),
limits = c(0, 10))
If you want the colors to be clearly "binned" in the way you describe, you can use scale_fill_stepn instead of scale_fill_gradientn
ggplot(sten_dat) +
geom_col(aes(x = str_wrap(test), y = sten, fill = sten),
position = "dodge2", alpha = 0.7, show.legend = TRUE) +
scale_y_continuous(limits = c(-1, 11), breaks = seq(0, 10, 2)) +
scale_fill_stepsn(colours = brewer.pal(name = "YlGn", n = 5),
limits = c(0, 10), breaks = 0:5 * 2) +
geomtextpath::coord_curvedpolar() +
theme_minimal() +
theme(axis.text.x = element_text(size = 16, face = 2),
panel.grid.major.x = element_blank())

Adding different geom_segment to every facet

I have the code below, and it works fine. The problem is, I would like to add "k" and plot a straight line similar to "z", but "k" is a vector of different numbers. Each element in "k" should be plotted as a line on the 3 facets created. If k was a singular value, I would just repeat the geom_segment() command with different y limits. Is there an easy way to do this? The final output should look like attached, assuming I could draw straight lines.
x <- iris[-1:-3]
bw <- 1
nbin <- 100
y <- head(iris, 50)[2]
z <- 1
k <- c(2, 3, 4)
ggplot(x, aes(x = Petal.Width)) +
geom_density(aes(y = bw *..count.., fill = Species), size = 1, alpha = 0.4) +
geom_segment(aes(x = 5, y = 250, xend = z, yend = 250, color = "red")) +
facet_wrap(~Species)+
scale_x_continuous(labels = scales::math_format(10^.x), limits = c(0, 5), expand = c(0,0)) +
scale_y_continuous(expand = c(0,0), limits = c(0, NA)) +
annotation_logticks(sides = "b", short=unit(-1,"mm"), mid=unit(-2,"mm"), long=unit(-3,"mm")) +
coord_cartesian(clip='off') + theme(panel.background = element_blank(),
panel.border = element_rect(colour = "black", fill=NA))
you can try this. Assuming that your plot is saved as p1.
k_data = data.frame(k, Species = levels(x$Species))
p1 + geom_segment(data = k_data, aes(x =5, y = 200, xend = k, yend = 200),
color = "blue", inherit.aes = F)
The idea is to create a dataframe with the columns k and Species and use this data exclusivley in a geom by setting inherit.aes = F
In this solution, the value of k is made part of the data set being plotted through a pipe. It is a temporary modification of the data set, since it is not assigned back to it nor to any other data set.
library(ggplot2)
library(dplyr)
x <- iris[-1:-3]
str(x)
bw <- 1
nbin <- 100
y <- head(iris, 50)[2]
z <- 1
k <- c(2, 3, 4)
x %>%
mutate(k = rep(k, each = 50)) %>%
ggplot(aes(x = Petal.Width)) +
geom_density(aes(y = bw *..count.., fill = Species), size = 1, alpha = 0.4) +
geom_segment(aes(x = 5, y = 250, xend = z, yend = 250), color = "red") +
geom_segment(aes(x = 5, y = 200, xend = k, yend = 200), color = "blue") +
facet_wrap(~Species)+
scale_x_continuous(labels = scales::math_format(10^.x), limits = c(0, 5), expand = c(0,0)) +
scale_y_continuous(expand = c(0,0), limits = c(0, NA)) +
annotation_logticks(sides = "b", short=unit(-1,"mm"), mid=unit(-2,"mm"), long=unit(-3,"mm")) +
coord_cartesian(clip='off') +
theme(panel.background = element_blank(),
panel.border = element_rect(colour = "black", fill=NA))

how to ggplot with upper and lower bound as shaded using facet_wrap in R?

I am trying to automate the process of plotting data using ggplot and the facet_wrap functionality. I want a single y-axis label instead individual plot Ob (i.e., A_Ob, B_ob etc) and also a single X-axis not all the plots having label for x-axis such as below. Below is my sample code using gridextra package. However, i would like to do it through facet_wrap as i have many other plots to draw which i think will save me sometime.
graphics.off()
rm(list = ls())
library(tidyverse)
library(gridExtra)
G1 = data.frame(A_Ob = runif(1000, 5, 50), A_Sim = runif(1000, 3,60), A_upper = runif(1000, 10,70), A_lower = runif(1000, 0, 45 ),
B_Ob = runif(1000, 5, 50), B_Sim = runif(1000, 3,60), B_upper = runif(1000, 10,70), B_lower = runif(1000, 0, 45 ),
C_Ob = runif(1000, 5, 50), C_Sim = runif(1000, 3,60), C_upper = runif(1000, 10,70), C_lower = runif(1000, 0, 45 ),
D_Ob = runif(1000, 5, 50), D_Sim = runif(1000, 3,60), D_upper = runif(1000, 10,70), D_lower = runif(1000, 0, 45 ),
Pos = 1:1000)
A1 = ggplot(data = G1, aes(x = Pos))+
geom_line(aes(y = A_Ob), col = "black")+
geom_line(aes(y = A_Sim), col = "blue")+
geom_vline(xintercept = 750, color = "red", size=1.5)+
geom_ribbon(aes(ymin = A_upper, ymax = A_lower), fill = "grey70")
B1 = ggplot(data = G1, aes(x = Pos))+
geom_line(aes(y = B_Ob), col = "black")+
geom_line(aes(y = B_Sim), col = "blue")+
geom_vline(xintercept = 750, color = "red", size=1.5)+
geom_ribbon(aes(ymin = B_upper, ymax = B_lower), fill = "grey70")
C1 = ggplot(data = G1, aes(x = Pos))+
geom_line(aes(y = C_Ob), col = "black")+
geom_line(aes(y = C_Sim), col = "blue")+
geom_vline(xintercept = 750, color = "red", size=1.5)+
geom_ribbon(aes(ymin = C_upper, ymax = C_lower), fill = "grey70")
D1 = ggplot(data = G1, aes(x = Pos))+
geom_line(aes(y = D_Ob), col = "black")+
geom_line(aes(y = D_Sim), col = "blue")+
geom_vline(xintercept = 750, color = "red", size=1.5)+
geom_ribbon(aes(ymin = D_upper, ymax = D_lower), fill = "grey70")
grid.arrange(A1,B1,C1,D1, nrow = 4)
Here is the result of the code
You need to reshape your dataframe into a longer format and separate values for Ob, Sim, upper and lower.
Using the function melt from data.table package can help you to achieve this:
library(data.table)
setDT(G1)
Ob_cols = grep("_Ob",colnames(G1),value = TRUE)
Sim_cols = grep("_Sim",colnames(G1),value = TRUE)
Upper_cols = grep("_upper",colnames(G1), value = TRUE)
Lower_cols = grep("_lower", colnames(G1), value = TRUE)
g.m <- melt(G1, measure = list(Ob_cols,Sim_cols,Upper_cols,Lower_cols), value.name = c("OBS","SIM","UP","LOW"))
levels(g.m$variable) <- c("A","B","C","D")
Pos variable OBS SIM UP LOW
1: 1 A 5.965488 29.167666 26.66783 29.97259
2: 2 A 23.855719 8.570245 43.75830 30.65616
3: 3 A 16.947887 51.201047 15.20758 39.76122
4: 4 A 49.883306 3.715319 34.38066 20.73177
5: 5 A 5.021938 3.102880 30.05036 32.05123
6: 6 A 19.887176 15.400853 53.67156 28.54982
and now, you can plot it:
library(ggplot2)
ggplot(g.m, aes(x = Pos))+
geom_line(aes(y = OBS), color = "black")+
geom_line(aes(y = SIM), color = "blue")+
geom_vline(xintercept = 750,color = "red", size = 1.5)+
geom_ribbon(aes(ymin = UP, ymax = LOW), fill = "grey70")+
facet_grid(variable~.)
EDIT: Adding annotations & renaming labels
To rename and replace facet labels, you can re-define levels of variable and use facet_wrap instead of facet_grid using ncol = 1 as argument.
To add multiple annotations on a single panel, you need to define a dataframe that you will use in geom_text.
Altogether, you have to do:
# renaming names of each facets:
levels(g.m$variable) <- c("M1","M2","M3","M4")
# Defining annotations to add:
df_text <- data.frame(label = c("Calibration", "Validation"),
x = c(740,760),
y = c(65,65),
hjust = c(1,0),
variable = factor("M1", levels = c("M1","M2","M3","M4")))
# Plotting
ggplot(g.m, aes(x = Pos))+
geom_line(aes(y = OBS), color = "black")+
geom_line(aes(y = SIM), color = "blue")+
geom_vline(xintercept = 750,color = "red", size = 1.5)+
geom_ribbon(aes(ymin = UP, ymax = LOW), fill = "grey70")+
facet_wrap(variable~., ncol = 1)+
theme(strip.text.x = element_text(hjust = 0),
strip.background = element_rect(fill = "white"))+
geom_text(data = df_text, aes(x = x, y = y, label = label, hjust = hjust), color = "red")
Does it look what you are expecting ?

Drawing elements (arrows & circle) in ggplot (R) to show the difference between two bars

I am trying to create a plot in R using ggplot that shows the difference between my two bars in a nice way.
I found an example that did part of what I wanted, but I have two major problems:
It is based on comparing groups of bars, but I only have two, so I added one group with both of them.
I would like to draw the arrow in nicer shape. I attached an image.
Code:
transactions <- c(5000000, 1000000)
time <- c("Q1","Q2")
group <- c("A", "A")
data <- data.frame(transactions, time, group)
library(ggplot2)
fun.data <- function(x){
print(x)
return(data.frame(y = max(x) + 1,
label = paste0(round(diff(x), 2), "cm")))
}
ylab <- c(2.5, 5.0, 7.5, 10)
gg <- ggplot(data, aes(x = time, y = transactions, fill = colors_hc[1], label = round(transactions, 0))) +
geom_bar(stat = "identity", show.legend = FALSE) +
geom_text(position = position_dodge(width = 0.9),
vjust = 1.1) +
geom_line(aes(group = group), position = position_nudge(0.1),
arrow = arrow()) +
stat_summary(aes(x = group, y = transactions),
geom = "label",
fun.data = fun.data,
fontface = "bold", fill = "lightgrey",
inherit.aes = FALSE) +
expand_limits(x = c(0, NA), y = c(0, NA)) +
scale_y_continuous(labels = paste0(ylab, "M"),
breaks = 10 ^ 6 * ylab)
gg
The arrows I am aiming for:
Where I am (ignore the ugliness, didn't style it yet):
This works, but you still need to play around a bit with the axes (or rather beautify them)
library(dplyr)
library(ggplot2)
transactions <- c(5000000, 1000000)
time <- c("Q1","Q2")
group <- c("A", "A")
my_data <- data.frame(transactions, time, group)
fun.data <- function(x){
return(data.frame(y = max(x) + 1,
label = as.integer(diff(x))))
}
my_data %>%
ggplot(aes(x = group, y = transactions, fill = time)) +
geom_bar(stat = 'identity', position = 'dodge') +
geom_text(aes(label = as.integer(transactions)),
position = position_dodge(width = 0.9),
vjust = 1.5) +
geom_line(aes(group = group), position = position_nudge(0.1),
arrow = arrow()) +
stat_summary(aes(x = group, y = transactions),
geom = "label",
size = 5,
position = position_nudge(0.05),
fun.data = fun.data,
fontface = "bold", fill = "lightgrey",
inherit.aes = FALSE)
Edit2:
y_limit <- 6000000
my_data %>%
ggplot(aes(x = time, y = transactions)) +
geom_bar(stat = 'identity',
fill = 'steelblue') +
geom_text(aes(label = as.integer(transactions)),
vjust = 2) +
coord_cartesian(ylim = c(0, y_limit)) +
geom_segment(aes(x = 'Q1', y = max(my_data$transactions),
xend = 'Q1', yend = y_limit)) +
geom_segment(aes(x = 'Q2', y = y_limit,
xend = 'Q2', yend = min(my_data$transactions)),
arrow = arrow()) +
geom_segment(aes(x = 'Q1', y = y_limit,
xend = 'Q2', yend = y_limit)) +
geom_label(aes(x = 'Q2',
y = y_limit,
label = as.integer(min(my_data$transactions)- max(my_data$transactions))),
size = 10,
position = position_nudge(-0.5),
fontface = "bold", fill = "lightgrey")

Resources