keep line graphs after the frame ends in r ggplotly - r

I am trying to create an interactive plot. My lines have different frames (some end shorter). When I play the animation, the lines disappear after their frame ends. I tried to add more rows of fake data with the added frames but that didn't make a difference. I am not sure why this is happening.
Sample Data:
> dput(sample_n(Plot, 20))
structure(list(Strain = c(0.0114976650717703, 0.0283627375, 0.00173272727272728,
0.0150241, 0.0450557, 0.0113663, 3.97637e-05, 0.0434292, 0.00406928,
0.00207895693779905, 0.000215139, 0.00148572499999999, 0.0418233875,
0.000939926, 0.000255219, 0.0213334, 0.023816, 0.0145059, 0.0131467081339713,
0.0668966), Stress = c(75.9505644091148, 78.5302055162329, 48.6062116858257,
68.4024611345473, 85.2904610049016, 63.9140059100703, 1.43583171490823,
84.8256506176318, 64.1272033757161, 53.8795512181689, 14.442082799196,
64.3587637619208, 84.731707366194, 25.4200560130662, 13.8571035440045,
84.1313452985436, 85.4161843153761, 67.9158653588222, 77.7143709896995,
90.4239965485986), Instrumentation = structure(c(2L, 2L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L,
1L), .Label = c("DIC", "LVDT"), class = "factor"), Bar = structure(c(1L,
2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L,
2L, 1L, 2L), .Label = c("Spliced", "Unspliced"), class = "factor"),
ANIM = c(0.01, 0.03, 0, 0.02, 0.05, 0.01, 0, 0.04, 0, 0,
0, 0, 0.04, 0, 0, 0.02, 0.02, 0.01, 0.01, 0.07), frame = c(0.05,
0.05, 0.04, 0.13, 0.05, 0.1, 0.01, 0.11, 0.02, 0, 0.04, 0.11,
0.09, 0.01, 0.01, 0.04, 0.06, 0.09, 0.05, 0.08)), row.names = c(NA,
-20L), class = "data.frame")
The frames are created by the function:
accumulate_by <- function(dat, var) {
var <- lazyeval::f_eval(var, dat)
lvls <- plotly:::getLevels(var)
dats <- lapply(seq_along(lvls), function(x) {
cbind(dat[var %in% lvls[seq(1, x)], ], frame = lvls[[x]])
})
dplyr::bind_rows(dats)
}
I have the following for the plot:
plot <- ggplot(Plot) + aes(x = Strain, y = Stress, colour = Instrumentation, linetype = Bar, frame = frame) +
scale_color_manual(values = c("DIC"="red", "LVDT"="blue")) + geom_line(size = 0.8) + theme_classic() +
labs(x = "Strain (in/in)", y = "Stress (ksi)") +
theme_classic() + theme(axis.text = element_text(color = "black", size = 16),
axis.line = element_line(color = "black", size = 0.2), axis.ticks.y = element_line(color = "black", size = 0.2),
axis.title.y = element_text(color = "black", size = 20, margin = margin(0,40,0,0)),
axis.title.x = element_text(color = "black", size = 20, margin = margin(35,0,0,0)),
legend.title = element_blank(), legend.text = element_text(color = "black", size = 16))
ggplotly(
p = ggplot2::last_plot(),
width = NULL,
height = NULL,
tooltip = c("Strain","Stress","Instrumentation","Bar"),
dynamicTicks = FALSE,
layerData = 1,
originalData = TRUE,) %>% animation_opts(frame = 300, transition = 0, easing = "linear", redraw = TRUE) %>%
layout(yaxis = list(title = list(text = "Stress (ksi)", standoff = 30L), spikesides = TRUE,spikethickness = 1),
xaxis = list(title = list(text = "Strain (in/in)",standoff = 30L), spikesides = TRUE,spikethickness = 1),
legend = list(orientation = "v", x = 0.7, y = 0.13)) %>% animation_slider(hide = TRUE)
Also, the redraw doesn't seem to do what I want. I want the plot to start with the complete graph (last frame and all lines) and then also end with everything completed. I only want the animation when pressing the play button.
EDITED:
The initial data is comprised of multiple dataframes. I accumulated the frames for each dataframe independently and then collected them in a single dataframe with rbind.
> dput(sample_n(Plot.Spliced_DIC, 5))
structure(list(Strain = c(0.00635239, 0.00646411, 0.00304078,
0.00104394, 0.00138191), Stress = c(64.9199586945409, 65.0674673934705,
60.7649363239015, 26.5627079525764, 39.4159208404832), Instrumentation = structure(c(1L,
1L, 1L, 1L, 1L), .Label = "DIC", class = "factor"), Bar = structure(c(1L,
1L, 1L, 1L, 1L), .Label = "Spliced", class = "factor"), ANIM = c(0.01,
0.01, 0, 0, 0)), class = "data.frame", row.names = c(NA,
-5L))
> dput(sample_n(Plot.Unspliced_DIC, 5))
structure(list(Strain = c(7.66444e-05, 0.112481, 6.7294e-05,
0.00275991, 0.0600871), Stress = c(0.00344797841982851, 92.8897366602444,
0.522824377824084, 64.3513766859271, 88.2700702030104), Instrumentation = structure(c(1L,
1L, 1L, 1L, 1L), .Label = "DIC", class = "factor"), Bar = structure(c(1L,
1L, 1L, 1L, 1L), .Label = "Unspliced", class = "factor"), ANIM = c(0,
0.11, 0, 0, 0.06)), class = "data.frame", row.names = c(NA,
-5L))
> dput(sample_n(Plot.Unspliced_LVDT, 5))
structure(list(Strain = c(0.071858225, 0.0020442, 0.01736605,
0.0259565, 0.025441275), Stress = c(90.4766041042011, 60.701740404307,
70.0774864366534, 76.21702132901, 75.9110679987475), Instrumentation = structure(c(1L,
1L, 1L, 1L, 1L), .Label = "LVDT", class = "factor"), Bar = structure(c(1L,
1L, 1L, 1L, 1L), .Label = "Unspliced", class = "factor"), ANIM = c(0.07,
0, 0.02, 0.03, 0.03)), class = "data.frame", row.names = c(NA,
-5L))
> dput(sample_n(Plot.Spliced_LVDT, 5))
structure(list(Strain = c(0.0238954832535885, 0.00040463157894736,
0.0158640956937799, 0.0224346507177033, 0.00269568421052631),
Stress = c(85.3207044456991, 14.881393139828, 79.7864191978379,
84.4980682215488, 56.6941352672259), Instrumentation = structure(c(1L,
1L, 1L, 1L, 1L), .Label = "LVDT", class = "factor"), Bar = structure(c(1L,
1L, 1L, 1L, 1L), .Label = "Spliced", class = "factor"), ANIM = c(0.02,
0, 0.02, 0.02, 0)), class = "data.frame", row.names = c(NA, -5L))
Plot.Spliced_DIC <- Plot.Spliced_DIC %>% accumulate_by(~ANIM)
Plot.Unspliced_DIC <- Plot.Unspliced_DIC %>% accumulate_by(~ANIM)
Plot.Unspliced_LVDT <- Plot.Unspliced_LVDT %>% accumulate_by(~ANIM)
Plot.Spliced_LVDT <- Plot.Spliced_LVDT %>% accumulate_by(~ANIM)
Plot <- rbind(Plot.Spliced_DIC, Plot.Unspliced_DIC, Plot.Unspliced_LVDT, Plot.Spliced_LVDT)

There are two issues regarding your above approach:
you need to call accumulate_by after using rbind
There seems to be a bug when using the tooltip argument along with an animation (traces are disappearing - I therefore commented it out)
Please check the following:
library(plotly)
library(dplyr)
accumulate_by <- function(dat, var) {
var <- lazyeval::f_eval(var, dat)
lvls <- plotly:::getLevels(var)
dats <- lapply(seq_along(lvls), function(x) {
cbind(dat[var %in% lvls[seq(1, x)], ], frame = lvls[[x]])
})
dplyr::bind_rows(dats)
}
Plot.Spliced_DIC <-
structure(list(Strain = c(0.00635239, 0.00646411, 0.00304078, 0.00104394, 0.00138191),
Stress = c(64.9199586945409, 65.0674673934705, 60.7649363239015, 26.5627079525764, 39.4159208404832),
Instrumentation = structure(c(1L, 1L, 1L, 1L, 1L), .Label = "DIC", class = "factor"),
Bar = structure(c(1L, 1L, 1L, 1L, 1L), .Label = "Spliced", class = "factor"),
ANIM = c(0.01, 0.01, 0, 0, 0)), class = "data.frame", row.names = c(NA, -5L))
Plot.Unspliced_DIC <-
structure(list(Strain = c(7.66444e-05, 0.112481, 6.7294e-05, 0.00275991, 0.0600871),
Stress = c(0.00344797841982851, 92.8897366602444, 0.522824377824084, 64.3513766859271, 88.2700702030104),
Instrumentation = structure(c(1L, 1L, 1L, 1L, 1L), .Label = "DIC", class = "factor"),
Bar = structure(c(1L, 1L, 1L, 1L, 1L), .Label = "Unspliced", class = "factor"),
ANIM = c(0, 0.11, 0, 0, 0.06)), class = "data.frame", row.names = c(NA, -5L))
Plot.Unspliced_LVDT <-
structure(list(Strain = c(0.071858225, 0.0020442, 0.01736605,0.0259565, 0.025441275),
Stress = c(90.4766041042011, 60.701740404307, 70.0774864366534, 76.21702132901, 75.9110679987475),
Instrumentation = structure(c(1L, 1L, 1L, 1L, 1L), .Label = "LVDT", class = "factor"),
Bar = structure(c(1L, 1L, 1L, 1L, 1L), .Label = "Unspliced", class = "factor"),
ANIM = c(0.07,0, 0.02, 0.03, 0.03)), class = "data.frame", row.names = c(NA, -5L))
Plot.Spliced_LVDT <-
structure(list(Strain = c(0.0238954832535885, 0.00040463157894736,
0.0158640956937799, 0.0224346507177033, 0.00269568421052631),
Stress = c(85.3207044456991, 14.881393139828, 79.7864191978379,
84.4980682215488, 56.6941352672259),
Instrumentation = structure(c(1L, 1L, 1L, 1L, 1L), .Label = "LVDT", class = "factor"),
Bar = structure(c(1L, 1L, 1L, 1L, 1L), .Label = "Spliced", class = "factor"),
ANIM = c(0.02, 0, 0.02, 0.02, 0)), class = "data.frame", row.names = c(NA, -5L))
plotData <- rbind(Plot.Spliced_DIC, Plot.Unspliced_DIC, Plot.Unspliced_LVDT, Plot.Spliced_LVDT)
plotData <- plotData %>% accumulate_by(~ANIM)
lastFrame <- with(plotData, plotData[frame == max(frame),])
lastFrame$frame <- -1
plotData <- rbind(lastFrame, plotData) # prepend last frame to show all complete traces in the beginning
plot <- ggplot(plotData) + aes(x = Strain, y = Stress, colour = Instrumentation, linetype = Bar, frame = frame) +
scale_color_manual(values = c("DIC"="red", "LVDT"="blue")) + geom_line(size = 0.8) + theme_classic() +
labs(x = "Strain (in/in)", y = "Stress (ksi)") +
theme_classic() + theme(axis.text = element_text(color = "black", size = 16),
axis.line = element_line(color = "black", size = 0.2), axis.ticks.y = element_line(color = "black", size = 0.2),
axis.title.y = element_text(color = "black", size = 20, margin = margin(0,40,0,0)),
axis.title.x = element_text(color = "black", size = 20, margin = margin(35,0,0,0)),
legend.title = element_blank(), legend.text = element_text(color = "black", size = 16))
ggplotly(
p = ggplot2::last_plot(),
width = NULL,
height = NULL,
# tooltip = c("Strain","Stress","Instrumentation","Bar"), # bug?
# tooltip = c("x","y","colour","linetype"), # bug?
dynamicTicks = FALSE,
layerData = 1,
originalData = TRUE) %>%
animation_opts(frame = 300, transition = 0, easing = "linear", redraw = TRUE) %>%
layout(yaxis = list(title = list(text = "Stress (ksi)", standoff = 30L), spikesides = TRUE,spikethickness = 1),
xaxis = list(title = list(text = "Strain (in/in)",standoff = 30L), spikesides = TRUE,spikethickness = 1),
legend = list(orientation = "v", x = 0.7, y = 0.13)) %>%
animation_slider(hide = TRUE)
By the way the approach to create cumulative animations using accumulate_by will turn slower with more data. Here you can find another approach to create a cumulative animation by filtering the data. However, the R plotly package currently doesn't allow providing custom steps for animation sliders.
Please support my FR if you are interested.

Related

following missing aesthetics in ggerrorbar

Maybe it`s already too late for working in my code.
I have to redo my figure in ggplot2, and I am unabable to do so.
My errorbars are not showing and I cannot understand why.
dev.new()
ggplot() +
geom_point(data = conf_intervals, aes(y = mean_ALA, x = mean_LIN, shape=trophic,
color=feeding_type_2,
size = 3,
alpha = 0.5)) + scale_shape_manual("Trophic level", values = c(15:19)) +
geom_errorbar(data = conf_intervals,aes(mean_ALA, ymin = mean_ALA - se_ALA,
ymax = mean_ALA + se_ALA,)) +
geom_errorbarh(data = conf_intervals,aes(mean_LIN, ymin = mean_LIN - se_LIN,
ymax = mean_LIN + se_LIN)) +
geom_point(data = CSIA_inverts_basal, aes(y=ALA.d13C, x=LIN.d13C, color=feeding_type_2))
labs (title="Biplot of compound stable isotopes- Centroids with 95 % CI", subtitle="LIN VS. ALA",
y=expression({delta}^13*C[ALA]~'\211'~VPDB),
x=expression({delta}^13*C[LIN]~'\211'~VPDB)) +
# guides(color = FALSE, shape = FALSE) +
theme_classic()
Error: geom_errorbarh requires the following missing aesthetics: y
Run rlang::last_error() to see where the error occurred.
In addition: Warning message:
Ignoring unknown aesthetics: x
Here is my example data:
dput(head(CSIA_inverts_basal))
structure(list(d13C.VPDB = c(-35.10487341, -34.85465717, -34.67216423,
-34.06032315, -33.68548439, -33.4811546), d15.NAIR = c(-6.321847159,
-5.384989361, -2.638749276, -4.986045928, -5.946279778, -6.648526348
), ALA.d13C = c(-43.2375195, -44.77813854, -42.1921855, -41.58363894,
-39.156857, -40.33135344), LIN.d13C = c(-40.864145, -42.32043061,
-41.4247005, -36.08156681, -39.45744387, -37.76516617), combi = structure(c(1L,
1L, 1L, 1L, 1L, 1L), levels = c("epilithon", "fresh.leaves",
"gammarus", "grazing.ephemeroptera", "predatory.plecoptera",
"salmonid.eyes", "shreddering.plecoptera", "submerged.leaves"
), class = "factor"), feeding_type = c("epilithon", "epilithon",
"epilithon", "epilithon", "epilithon", "epilithon"), sampling.time = c("summer",
"fall", "summer", "fall", "fall", "fall"), year = c(2018L, 2016L,
2018L, 2016L, 2016L, 2016L), split = structure(c(2L, 2L, 2L,
2L, 2L, 2L), levels = c("consumer", "resource"), class = "factor"),
split_2 = c("epilithon", "epilithon", "epilithon", "epilithon",
"epilithon", "epilithon"), split_3 = c("epilithon", "epilithon",
"epilithon", "epilithon", "epilithon", "epilithon"), feeding_type_2 = structure(c(1L,
1L, 1L, 1L, 1L, 1L), levels = c("Epilithon", "Fresh leaves",
"Grazer", "Salmonid (Eyes)", "Predator", "Submerged leaves",
"Shredder"), class = "factor"), trophic = structure(c(1L,
1L, 1L, 1L, 1L, 1L), levels = c("Base", "Non-predatory invertebrate",
"Predatory invertebrate", "Predator"), class = "factor")), row.names = 2:7, class = "data.frame")
dput(conf_intervals)
structure(list(trophic = structure(c(1L, 1L, 1L, 2L, 2L, 3L,
4L), levels = c("Base", "Non-predatory invertebrate", "Predatory invertebrate",
"Predator"), class = "factor"), feeding_type_2 = structure(c(1L,
2L, 6L, 3L, 7L, 5L, 4L), levels = c("Epilithon", "Fresh leaves",
"Grazer", "Salmonid (Eyes)", "Predator", "Submerged leaves",
"Shredder"), class = "factor"), mean_ALA = c(-42.1, -39.7, -38.7,
-45.7, -40.3, -42.8, -42.7), mean_LIN = c(-39.2, -40, -37.2,
-40.8, -35.9, -36.7, -37.9), se_ALA = c(1.1, 1.1, 1.1, 2.2, 1.2,
1.9, 0.4), se_LIN = c(1.1, 1.1, 0.6, 1.8, 0.9, 1.3, 0.6), N_ALA = c(12L,
14L, 10L, 9L, 14L, 7L, 17L), LIN_N = c(12L, 14L, 10L, 9L, 14L,
7L, 17L)), class = "data.frame", row.names = c(NA, -7L))
Can someone help me?
geom_errorbarh doesn't have an aesthetic called x. It has a y, an xmin and an xmax. I suspect you are mixing up the x and y variables in your errorbar calls too, so check these carefully. Also, you should move alpha and size outside of aes so they don't appear in the legend.
It's also a good idea to make sure your code is formatted in such a way that it is easier to read and debug. Limiting your line length and using inheritance of the data passed to your initial ggplot call helps to simplify things a bit too.
ggplot(conf_intervals) +
geom_point(aes(y = mean_ALA, x = mean_LIN, shape = trophic,
color = feeding_type_2), size = 3, alpha = 0.5) +
scale_shape_manual("Trophic level", values = c(15:19)) +
geom_errorbar(aes(mean_LIN, ymin = mean_ALA - se_ALA,
ymax = mean_ALA + se_ALA)) +
geom_errorbarh(aes(y = mean_ALA, xmin = mean_LIN - se_LIN,
xmax = mean_LIN + se_LIN)) +
geom_point(data = CSIA_inverts_basal,
aes(y = ALA.d13C, x = LIN.d13C, color = feeding_type_2)) +
labs(title = "Biplot of compound stable isotopes- Centroids with 95 % CI",
subtitle = "LIN VS. ALA",
y = expression({delta}^13*C[ALA]~'\211'~VPDB),
x = expression({delta}^13*C[LIN]~'\211'~VPDB)) +
theme_classic()

Control order across factors in ggplot2

I have a plot that looks like below. I want to change the order so that the larger value comes first (so cyan would precede red). But I can't seem to do this. What am I doing wrong?
This is my current code block so far:
ggplot(df, aes(x = Gene.Set.Size, y = OR, label =P.value, color = Method, group = Method)) +
geom_point(position=position_dodge(width=0.5)) +
ggrepel::geom_text_repel(size = 6, box.padding = 1, segment.angle = 20, position=position_dodge(width=0.5))+
geom_pointrange(aes(ymax = UpperCI, ymin = LowerCI),position=position_dodge(width=0.5)) +
theme_bw() +
theme(text=element_text(size=25),axis.text.x = element_text(angle = 45, hjust = 1)) +
ylab("Odds ratio") +
xlab("Gene set size") +
theme(plot.margin = unit(c(2,2,2,2), "cm"))
> dput(df)
structure(list(Method = structure(c(1L, 1L, 1L, 2L, 2L, 2L), .Label = c("MAGMA",
"Pairwise"), class = "factor"), P.value = c(8.74e-28, 1.33e-56,
5.57e-92, 1.63e-44, 4.23e-71, 2.78e-95), OR = c(1.39, 1.424668,
1.4, 1.513, 1.478208, 1.409563), UpperCI = c(1.481491, 1.487065,
1.446039, 1.601557, 1.417117, 1.455425), LowerCI = c(1.316829,
1.364601, 1.356358, 1.42, 1.541768, 1.365056), Gene.Set.Size = structure(c(1L,
2L, 3L, 1L, 2L, 3L), .Label = c("500", "1000", "2000"), class = "factor")), row.names = c(NA,
-6L), class = "data.frame")
You must set the factor order.
library(ggplot2)
df <- structure(list(Method = structure(c(1L, 1L, 1L, 2L, 2L, 2L), .Label = c("MAGMA",
"Pairwise"), class = "factor"), P.value = c(8.74e-28, 1.33e-56,
5.57e-92, 1.63e-44, 4.23e-71, 2.78e-95), OR = c(1.39, 1.424668,
1.4, 1.513, 1.478208, 1.409563), UpperCI = c(1.481491, 1.487065,
1.446039, 1.601557, 1.417117, 1.455425), LowerCI = c(1.316829,
1.364601, 1.356358, 1.42, 1.541768, 1.365056), Gene.Set.Size = structure(c(1L,
2L, 3L, 1L, 2L, 3L), .Label = c("500", "1000", "2000"), class = "factor")), row.names = c(NA,
-6L), class = "data.frame")
#reorder Factor
df$Method = factor(df$Method, levels=c("Pairwise", "MAGMA"))
ggplot(df, aes(x=Gene.Set.Size, y=OR, label=P.value,
group= Method, color=Method)) +
geom_point(position=position_dodge(width=0.5)) +
ggrepel::geom_text_repel(size = 6, box.padding = 1, segment.angle = 20, position=position_dodge(width=0.5))+
geom_pointrange(aes(ymax = UpperCI, ymin = LowerCI),position=position_dodge(width=0.5)) +
theme_bw() +
theme(text=element_text(size=25),axis.text.x = element_text(angle = 45, hjust = 1)) +
ylab("Odds ratio") +
xlab("Gene set size") +
theme(plot.margin = unit(c(2,2,2,2), "cm"))
df %>% mutate(Method = fct_relevel(Method, 'Pairwise')) %>% <<your ggplot2 code>
should do the job, assuming you have imported the tidyverse pipe operator %>% and the forcats package, which you can do with require(tidyverse)
You can simply reverse the ordering of the Method factor with forcats::fct_rev.
df$Method <- fct_rev(df$Method)
Alternatively, you can specify the first level when you initially converted that column to a factor.

Color and fill geom_point by group and combine legend

I am having difficulty customising the ggplot (geom_point) output.
Points to have black outline but with different fill colors by group (white and black)
Is there a way to combine the legend into one?
Data
library(dplyr)
library(ggplot2)
dat <- structure(list(q = c(1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L),
is_female = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L), levels = c("Male", "Female"), class = "factor"), mean_exp = c(19.3095475534936,
19.2351713991988, 21.6718804471611, 21.69517120871, 23.4144749290445,
23.4191383190372, 25.2817658487443, 25.2772340355605, 28.3982168611512,
28.7869521340185), mean_fi = c(0.0915386254018914, 0.0912295567094683,
0.0771055282779973, 0.0790597510143077, 0.0859508568981647,
0.088489590940481, 0.109848283385112, 0.11358904634185, 0.128425331060705,
0.136830729164909), b_fi.frail = c(1, 1, 1, 1, 1, 1, 1, 1,
1, 1), pct = c(47.5830407777478, 52.4169592222522, 37.567084078712,
62.432915921288, 36.9897959183673, 63.0102040816327, 34.0960360941025,
65.9039639058975, 29.0891283055828, 70.9108716944172)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -10L), groups = structure(list(
q = 1:5, .rows = structure(list(1:2, 3:4, 5:6, 7:8, 9:10), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -5L), .drop = TRUE))
My attempt:
ggplot(dat, aes(x=mean_exp, y=mean_fi, linetype=is_female)) +
geom_point(aes(color=is_female, fill=is_female), size=2, shape="square") +
geom_line() +
scale_x_continuous(limits = c(18, 30), breaks = seq(20, 30, 5), expand = c(0,0)) +
scale_y_continuous(limits = c(0.05, 0.15), breaks = seq(0.05, 0.15, 0.02), expand = c(0,0)) +
scale_color_manual(values = c("M" = "black", "F" = "black")) +
scale_fill_manual(values = c("M" = "black", "F" = "white")) +
labs(x= expression(Body ~ mass ~ index ~ (kg/m^2)), y= "Mean baseline FI score", title = "BMI") +
theme_classic() +
theme(plot.title = element_text(hjust=0.5),
legend.position = "bottom")
My code outputs a graph with grey points and lines for some reason. I would like black lines and outlines (but points to be filled with black or white depending on the group) and to combine the legend if possible.
I realise there are many similar examples out there but I can't seem to figure out why my code is not working... Thanks for your help in advance!
Use shape = 22 for a fillable square, i.e. one which can have a different outline.
Colour can be outside the aes if both groups require a black outline for the points.
library(dplyr)
library(ggplot2)
dat <- structure(list(
q = c(1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L),
is_female = structure(c(
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L
), levels = c("Male", "Female"), class = "factor"), mean_exp = c(
19.3095475534936,
19.2351713991988, 21.6718804471611, 21.69517120871, 23.4144749290445,
23.4191383190372, 25.2817658487443, 25.2772340355605, 28.3982168611512,
28.7869521340185
), mean_fi = c(
0.0915386254018914, 0.0912295567094683,
0.0771055282779973, 0.0790597510143077, 0.0859508568981647,
0.088489590940481, 0.109848283385112, 0.11358904634185, 0.128425331060705,
0.136830729164909
), b_fi.frail = c(
1, 1, 1, 1, 1, 1, 1, 1,
1, 1
), pct = c(
47.5830407777478, 52.4169592222522, 37.567084078712,
62.432915921288, 36.9897959183673, 63.0102040816327, 34.0960360941025,
65.9039639058975, 29.0891283055828, 70.9108716944172
)
), class = c(
"grouped_df",
"tbl_df", "tbl", "data.frame"
), row.names = c(NA, -10L), groups = structure(list(
q = 1:5, .rows = structure(list(1:2, 3:4, 5:6, 7:8, 9:10), ptype = integer(0), class = c(
"vctrs_list_of",
"vctrs_vctr", "list"
))
), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -5L), .drop = TRUE))
ggplot(dat, aes(x = mean_exp, y = mean_fi, linetype = is_female)) +
geom_point(aes(fill = is_female), size = 2, shape = 22, colour = "black") +
geom_line() +
scale_x_continuous(limits = c(18, 30), breaks = seq(20, 30, 5), expand = c(0, 0)) +
scale_y_continuous(limits = c(0.05, 0.15), breaks = seq(0.05, 0.15, 0.02), expand = c(0, 0)) +
scale_fill_manual(values = c("black", "white")) +
labs(x = expression(Body ~ mass ~ index ~ (kg / m^2)), y = "Mean baseline FI score",
title = "BMI", fill = "Sex", linetype = "Sex") +
theme_classic() +
theme(
plot.title = element_text(hjust = 0.5),
legend.position = "bottom"
)
Created on 2022-07-07 by the reprex package (v2.0.1)

Split legend into two or multiple columns in a plot using ggnewscale::new_scale() with ggplot2 in R

after following Stefan's very helpful answer to this post, where he uses ggnewscale::new_scale(), I now am stuck with the following question:
"How to arrange the custom legends from ggnewscale into multiple vertical columns?"
like it is usually done with a command such as guides(scale_shape_manual=guide_legend(ncol=2)) in ggplot2.
Minimal reproducible example:
# fictional data in the scheme of https://stackoverflow.com/q/66804487/16642045
mutated <- list()
for(i in 1:10) {
mutated[[i]] <- data.frame(Biological.Replicate = rep(1,4),
Reagent.Conc = c(10000, 2500, 625, 156.3),
Reagent = rep(1,8),
Cell.type = rep(LETTERS[i],4),
Mean.Viable.Cells.1 = rep(runif(n = 10, min = 0, max = 1),4))
}
mutated <- do.call(rbind.data.frame, mutated)
The modified code after the answer of user "stefan" looks like this:
# from https://stackoverflow.com/a/66808609/16642045
library(ggplot2)
library(ggnewscale)
library(dplyr)
library(magrittr)
mutated <- mutated %>%
mutate(Cell.type = as.factor(Cell.type),
Reagent = factor(Reagent,
levels = c("0", "1", "2")))
mean_mutated <- mutated %>%
group_by(Reagent, Reagent.Conc, Cell.type) %>%
split(.$Cell.type)
layer_geom_scale <- function(Cell.type) {
list(geom_point(mean_mutated[[Cell.type]], mapping = aes(shape = Reagent)),
geom_line(mean_mutated[[Cell.type]], mapping = aes(group = Reagent, linetype = Reagent)),
scale_linetype_manual(name = Cell.type, values = c("solid", "dashed", "dotted"), drop=FALSE),
scale_shape_manual(name = Cell.type, values = c(15, 16, 4), labels = c("0", "1", "2"), drop=FALSE)
)
}
my_plot <-
ggplot(mapping = aes(
x = as.factor(Reagent.Conc),
y = Mean.Viable.Cells.1)) +
layer_geom_scale(unique(mutated$Cell.type)[1])
for(current_Cell.type_index in 2:length(unique(mutated$Cell.type))) {
my_plot <-
my_plot +
ggnewscale::new_scale("shape") +
ggnewscale::new_scale("linetype") +
layer_geom_scale(unique(mutated$Cell.type)[current_Cell.type_index])
}
my_plot
This results in:
Now, I want the legends to be displayed side-by-side, in two columns, and I tried this (without success):
my_plot +
guides(scale_shape_manual=guide_legend(ncol=2))
EDIT: A picture of the way I want the legends to be arranged
Is there anyone who could help me?
Thanks!
Note: This answer addresses the question before clarification was made at question edit # 4 and beyond.
Horizontal legend
Adding theme(legend.box = "horizontal") will make the legend elements appear side by side.
Multiple columns and ggnewscale
Using guides globally will result in the modification of scales after ggnewscale updates them. In this context, only the variable RKO will be updated:
layer_geom_scale <- function(cell_type, color) {
list(geom_point(mean_mutated[[cell_type]], mapping = aes(shape = Reagent), color = color),
geom_line(mean_mutated[[cell_type]], mapping = aes(group = Reagent, linetype = Reagent), color = color),
scale_linetype_manual(name = cell_type, values = c("solid", "dashed", "dotted"), drop=FALSE),
scale_shape_manual(name = cell_type, values = c(15, 16, 4), labels = c("0", "1", "2"), drop=FALSE)
)
}
my_plot <-
ggplot(mapping = aes(
x = as.factor(Reagent.Conc),
y = Mean.Viable.Cells.1)) +
layer_geom_scale("HCT", "#999999") +
ggnewscale::new_scale("linetype") +
ggnewscale::new_scale("shape") +
layer_geom_scale("RKO", "#E69F00") +
theme(legend.box = "horizontal") +
guides(shape = guide_legend(ncol = 2),
linetype = guide_legend(ncol = 2))
my_plot
To modify the same scales for all variables, the guide should be added inside the scale definitions:
layer_geom_scale <- function(cell_type, color) {
list(geom_point(mean_mutated[[cell_type]], mapping = aes(shape = Reagent), color = color),
geom_line(mean_mutated[[cell_type]], mapping = aes(group = Reagent, linetype = Reagent), color = color),
scale_linetype_manual(name = cell_type, values = c("solid", "dashed", "dotted"), drop=FALSE,
guide = guide_legend(ncol = 2)),
scale_shape_manual(name = cell_type, values = c(15, 16, 4), labels = c("0", "1", "2"), drop=FALSE,
guide = guide_legend(ncol = 2))
)
}
my_plot <-
ggplot(mapping = aes(
x = as.factor(Reagent.Conc),
y = Mean.Viable.Cells.1)) +
layer_geom_scale("HCT", "#999999") +
ggnewscale::new_scale("linetype") +
ggnewscale::new_scale("shape") +
layer_geom_scale("RKO", "#E69F00") +
theme(legend.box = "horizontal")
my_plot
Raw data
# from https://stackoverflow.com/q/66804487/16642045
mutated <- structure(list(
Biological.Replicate = c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L),
Reagent.Conc = c(10000, 2500, 625, 156.3,
39.1, 9.8, 2.4, 0.6,
10000, 2500, 625, 156.3,
39.1, 9.8, 2.4, 0.6,
10000, 2500, 625, 156.3,
39.1, 9.8, 2.4, 0.6),
Reagent = c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L,
0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L),
Cell.type = c("HCT", "HCT", "HCT", "HCT",
"HCT", "HCT", "HCT", "HCT",
"HCT", "HCT", "HCT", "HCT",
"HCT", "HCT", "HCT", "HCT",
"RKO", "RKO", "RKO", "RKO",
"RKO", "RKO", "RKO", "RKO"),
Mean.Viable.Cells.1 = c(1.014923966, 1.022279854, 1.00926559, 0.936979842,
0.935565248, 0.966403395, 1.00007073, 0.978144524,
1.019673384, 0.991595836, 0.977270557, 1.007353643,
1.111928183, 0.963518289, 0.993028364, 1.027409034,
1.055452733, 0.953801253, 0.956577449, 0.792568337,
0.797052961, 0.755623576, 0.838482346, 0.836773918)),
row.names = 9:32,
class = "data.frame")
# from https://stackoverflow.com/a/66808609/16642045
library(ggplot2)
library(ggnewscale)
library(dplyr)
library(magrittr)
mutated <- mutated %>%
mutate(Cell.type = factor(Cell.type,
levels = c("HCT", "RKO")),
Reagent = factor(Reagent,
levels = c("0", "1", "2")))
mean_mutated <- mutated %>%
group_by(Reagent, Reagent.Conc, Cell.type) %>%
split(.$Cell.type)

geom_segment arrow settings - head same width as line

Hello everyone I would need help in order to get a nice geom_segment plot with ggplot2.
Here are the data
structure(list(molecule = structure(c(1L, 1L, 2L, 2L, 3L, 4L,
4L, 5L, 6L), .Label = c("scaffold1", "scaffold2", "scaffold3",
"scaffold4", "scaffold5", "scaffold6"), class = "factor"), gene = structure(1:9, .Label = c("Gene1",
"Gene2", "Gene3", "Gene4", "Gene5", "Gene6", "Gene7", "Gene8",
"Gene9"), class = "factor"), start_gene = c(64000L, 80000L, 60000L,
20000L, 22000L, 20000L, 35000L, 17000L, 2000L), end_gene = c(68000L,
83000L, 68000L, 28000L, 29000L, 33000L, 38000L, 19000L, 2500L
), start_scaff = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), end_scaff = c(125000L,
125000L, 80000L, 80000L, 60000L, 40000L, 40000L, 20000L, 5000L
), strand = structure(c(1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L), .Label = c("forward",
"reverse"), class = "factor"), direction = c(1L, 1L, 1L, -1L,
1L, -1L, 1L, -1L, 1L)), class = "data.frame", row.names = c(NA,
-9L))
Whit these data and this code :
library(ggplot2)
ggplot(tab, aes(x = start_scaff, xend = end_scaff,
y = molecule, yend = molecule)) +
geom_segment(size = 3, col = "grey80") +
geom_segment(aes(x = ifelse(direction == 1, start_gene, end_gene),
xend = ifelse(direction == 1, end_gene, start_gene)),
data = tab,
arrow = arrow(length = unit(0.1, "inches")), size = 2) +
geom_text(aes(x = start_gene, y = molecule, label = gene),
data = tab, nudge_y = 0.2) +
scale_y_discrete(limits = rev(levels(tab$molecule))) +
theme_minimal()
I can manage do create this plot :
And I'm looking for settings in order to get the segment more like rectangles with a small arrow such as :
I think you may want to look at the gggenes package - may help you not only for the arrows :)
From the example in the vignette, I used the settings to get the head to the same width as the segment.
Another advantage of using this geom, you can use alpha
library(gggenes)
library(ggplot2)
ggplot(mydat, aes(x = start_scaff, xend = end_scaff,
y = molecule, yend = molecule)) +
geom_segment(size = 3, col = "grey80") +
geom_gene_arrow(aes(xmin = ifelse(direction == 1, start_gene, end_gene),
xmax = ifelse(direction == 1, end_gene, start_gene)),
arrowhead_height = unit(3, "mm"), arrowhead_width = unit(1, "mm")) +
geom_text(aes(x = start_gene, y = molecule, label = gene),
data = mydat, nudge_y = 0.2) +
scale_y_discrete(limits = rev(levels(mydat$molecule))) +
theme_minimal()

Resources