Avoid legend duplication in plotly conversion from ggplot with facet_wrap - r

Consider the plot produced by the following reprex. Note that the ggplot has sensible legends, while in plotly, the legend is heavily duplicated, with one entry for each time the same category ("manufacturer") appears in each facet. How do I make the plotly legend better match that of the ggplot2 one?
library(plotly)
library(ggplot2)
p <- mpg %>%
ggplot(aes(year)) +
geom_ribbon(aes(ymin=cty, ymax=hwy, fill = manufacturer), alpha=0.2) +
geom_line(aes(y = hwy, col=manufacturer)) +
facet_wrap(~class)
p
plotly::ggplotly(p)

Adapting my answer on this post to your case (which draws on this answer) one option would be to manipulate the plotly object.
The issue is that with facetting we end up with one legend entry for each facet in which a group is present, i.e. the numbers in the legend entries correspond to the number of the facet or panel.
In plotly one could prevent the duplicated legend entries via the legendgroup argument. One option to achieve the same result when using ggplotly would be to assign the legendgroup manually like so:
library(plotly)
library(ggplot2)
p <- mpg %>%
ggplot(aes(year)) +
geom_ribbon(aes(ymin=cty, ymax=hwy, fill = manufacturer), alpha=0.2) +
geom_line(aes(y = hwy, col=manufacturer)) +
facet_wrap(~class)
gp <- ggplotly(p = p)
# Get the names of the legend entries
df <- data.frame(id = seq_along(gp$x$data), legend_entries = unlist(lapply(gp$x$data, `[[`, "name")))
# Extract the group identifier
df$legend_group <- gsub("^\\((.*?),\\d+\\)", "\\1", df$legend_entries)
# Add an indicator for the first entry per group
df$is_first <- !duplicated(df$legend_group)
for (i in df$id) {
# Is the layer the first entry of the group?
is_first <- df$is_first[[i]]
# Assign the group identifier to the name and legendgroup arguments
gp$x$data[[i]]$name <- df$legend_group[[i]]
gp$x$data[[i]]$legendgroup <- gp$x$data[[i]]$name
# Show the legend only for the first layer of the group
if (!is_first) gp$x$data[[i]]$showlegend <- FALSE
}
gp

Thanks, #stefan, for your excellent answer that has both taught me about plotly objects and inspired me to take your concept further.
I've created this function with the following features:
It translates your logic into a function that uses the plotly object as an input.
It applies the purrr library.
The function accepts an optional second parameter (.new_legend) that allows overwriting the legend entries.
The code is certainly longer than your code, though it's elongated by the function, assign_leg_grp, that enables overwriting and also by my "spread out" style.
library(plotly)
library(ggplot2)
library(purrr)
library(stringr)
p <- mpg %>%
ggplot(aes(year)) +
geom_ribbon(aes(ymin=cty, ymax=hwy, fill = manufacturer), alpha=0.2) +
geom_line(aes(y = hwy, col=manufacturer)) +
facet_wrap(~class)
gp <- ggplotly(p = p)
clean_pltly_legend <- function(.pltly_obj, .new_legend = c()) {
# Cleans up a plotly object legend, particularly when ggplot is facetted
assign_leg_grp <- function(.legend_group, .leg_nms) {
# Assigns a legend group from the list of possible entries
# Used to modify the legend settings for a plotly object
leg_nms_rem <- .leg_nms
parse_leg_nms <- function(.leg_options) {
# Assigns a .leg_name, if possible
# .leg_options is a 2-element list: 1 = original value; 2 = remaining options
if (is.na(.leg_options)) {
.leg_options
} else if(length(leg_nms_rem) == 0) {
# No more legend names to assign
.leg_options
} else {
# Transfer the first element of the remaining options
leg_nm_new <- leg_nms_rem[[1]]
leg_nms_rem <<- leg_nms_rem[-1]
leg_nm_new
}
}
.legend_group %>%
map(~ parse_leg_nms(.))
}
simplify_leg_grps <- function(.legendgroup_vec) {
# Simplifies legend groups by removing brackets, position numbers and then de-duplicating
leg_grp_cln <-
map_chr(.legendgroup_vec, ~ str_replace_all(., c("^\\(" = "", ",\\d+\\)$" = "")))
modify_if(leg_grp_cln, duplicated(leg_grp_cln), ~ NA_character_)
}
pltly_obj_data <-
.pltly_obj$x$data
pltly_leg_grp <-
# pltly_leg_grp is a character vector where each element represents a legend group. Element is NA if legend group not required or doesn't exist
pltly_obj_data%>%
map(~ pluck(., "legendgroup")) %>%
map_chr(~ if (is.null(.)) {NA_character_} else {.}) %>%
# Elements where showlegend = FALSE have legendgroup = NULL.
simplify_leg_grps() %>%
assign_leg_grp(.new_legend)
pltly_obj_data_new <-
pltly_obj_data %>%
map2(pltly_leg_grp, ~ list_modify(.x, legendgroup = .y)) %>%
map2(pltly_leg_grp, ~ list_modify(.x, name = .y)) %>%
map2(pltly_leg_grp, ~ list_modify(.x, showlegend = !is.na(.y)))
# i.e. showlegend set to FALSE when is.na(pltly_leg_grp), TRUE when not is.na(pltly_leg_grp)
.pltly_obj$x$data <- pltly_obj_data_new
.pltly_obj
}
clean_pltly_legend(gp)

Related

R: using curly braces referring to column name within a conditionally-defined expression inside custom function does not work

I have defined a function which produces a ggplot chart, where the y-variable can either be a level or a percentage, so I have included an if-else to amend the options appropriately. I then run the chart through ggplotly to produce an interactive version, but the hover-over text label does not work as intended. As shown in the picture, the label just prints my function instead of showing the y-value as intended.
I have struggled to find the right approach and think that I am supposed to use eval, quote or expression in some way, but don't understand how it all fits together. What I do know is that without using e.g. quote, I get an error saying the object values does not exist, which suggests that part of the function is evaluated without being told values is a column within my dataset.
Here is my reprex:
library(tidyverse) #for dplyr
library(ggplot2) #for chart
library(scales) #for axis label
library(plotly) #for interactive chart
# dataset
dataset <- data.frame(geography_name=c("a","b","c","d","e","f"),
values=c(2,3,4,5,6,7))
# custom helper functions
perc_form = function(x, d=1) sprintf(paste0("%1.",d,"f"), x)
value_form = function(x,s=2,d= -1) format(signif(round(as.numeric(x), d),s), big.mark=",")
# function producing ggplot chart
barchart <- function( data_set = dataset,
x_var = geography_name,
y_var = values,
bar_stat_type=NULL) {
# Set axis type dependent on variable format
if (bar_stat_type=="pct") {
y_var_label <- paste0("Rate: ",quote(perc_form({{y_var}})),"%")
label_form <- (percent_format(accuracy=1))
} else if (bar_stat_type=="money") {
y_var_label <- paste0("Level: £",quote(value_form({{y_var}})))
label_form <- (comma_format())
}
# Chart
barchart <- data_set %>%
ggplot(mapping = aes(x = {{x_var}},
y = {{y_var}},
text = paste0(geography_name, "\n",
eval(y_var_label), "\n")))+
geom_bar(stat = "identity", position = position_dodge(), width = 0.4)+
scale_y_continuous(labels = eval(label_form)) #this evaluation works
return(barchart)
}
# ggplot chart is produced fine
gg <- barchart(bar_stat_type="money")
gg
# ggplotly chart does not have correct hover-over labels
ggplotly(gg)
ggplotly labels do not include actual y-variable value
There is no need for quote or eval. Simply make you y_var_labels functions which can be called inside your ggplot2 code:
library(ggplot2)
library(scales)
library(plotly)
barchart <- function(data_set = dataset,
x_var = geography_name,
y_var = values,
bar_stat_type = NULL) {
if (bar_stat_type == "pct") {
y_var_label <- function(x) paste0("Rate: ", perc_form(x), "%")
label_form <- percent_format(accuracy = 1)
} else if (bar_stat_type == "money") {
y_var_label <- function(x) paste0("Level: £", value_form(x))
label_form <- comma_format()
}
data_set %>%
ggplot(mapping = aes(
x = {{ x_var }},
y = {{ y_var }},
text = paste0(
geography_name, "\n",
y_var_label({{ y_var }}), "\n"
)
)) +
geom_bar(stat = "identity", position = position_dodge(), width = 0.4) +
scale_y_continuous(labels = label_form)
}
gg <- barchart(bar_stat_type = "money")
ggplotly(gg)

how to get geom_point and legend onto line plot in R?

This is my R-script, I've been trying to include a legend onto the line plot but it isn't working? Any guidance? I also can't seem to get the geom_point() working either (I've taken the code for it out below).
library(ggsignif)
library(readxl)
library(svglite)
library(tidyverse)
library(ggplot2)
library(tidyr)
library(dplyr)
url <-'https://static-content.springer.com/esm/art%3A10.1038%2Fs41586-020-2850-3/MediaObjects/41586_2020_2850_MOESM10_ESM.xlsx'
temp <-tempfile()
download.file(url, temp, mode='wb')
myData <- read_excel(path=temp, sheet = "ExFig.5f")
names(myData) <- NULL
view(myData)
Time_post_inj <- (myData[1])
Time_post_inj <- Time_post_inj[-c(1),]
dose_450_ug <- (myData[2])
dose_450_ug <- dose_450_ug[-c(1),]
dose_150_ug <- (myData[4])
dose_150_ug <- dose_150_ug[-c(1),]
dose_100_ug <- (myData[6])
dose_100_ug <- dose_100_ug[-c(1),]
dose_50_ug <- (myData[8])
dose_50_ug <- dose_50_ug[-c(1),]
colnames(Time_post_inj) <-c("Time_Post_Injection")
colnames(dose_450_ug) <-c("dose_450_µg")
colnames(dose_150_ug) <-c("dose_150_µg")
colnames(dose_100_ug) <-c("dose_100_µg")
colnames(dose_50_ug) <-c("dose_50_µg")
Newdata <-data.frame(Time_post_inj, dose_450_ug, dose_150_ug, dose_100_ug, dose_50_ug)
Newdata$Time_Post_Injection <-as.numeric(Newdata$Time_Post_Injection)
Newdata$dose_450_µg <-as.numeric(Newdata$dose_450_µg)
Newdata$dose_150_µg <-as.numeric(Newdata$dose_150_µg)
Newdata$dose_100_µg <-as.numeric(Newdata$dose_100_µg)
Newdata$dose_50_µg <-as.numeric(Newdata$dose_50_µg)
str(Newdata)
ggplot(data=Newdata, aes(x=Time_Post_Injection, y=hCD4_occupancy, group = 1)) + geom_line(aes(y=dose_450_µg)) + geom_line(aes(y=dose_150_µg)) + geom_line(aes(y=dose_100_µg)) + geom_line(aes(y=dose_50_µg))
Newdata
tidyr::pivot_longer(Time_Post_Injection, names_to = "DOSE", values_to = "VALUE") %>%
ggplot2::ggplot(aes(Time_Post_Injection, VALUE, group = DOSE, color = DOSE)) + ggplot2::geom_line()
The following is a full reprex, meaning that if you copy and paste, it will reproduce the plot exactly as below. You can see I have simplified your parsing considerably too; this starts with the url and produces the plot with a lot less data wrangling:
library(ggplot2) # Only load packages you really need
# This format is a handy way of keeping a long string on a single page
url <- paste0("https://static-content.springer.com/esm/art%3A10.",
"1038%2Fs41586-020-2850-3/MediaObjects/41586_2020",
"_2850_MOESM10_ESM.xlsx")
temp <- tempfile()
download.file(url, temp, mode = 'wb')
# Instead of loading an entire library to use one function, we can
# access read_excel by doing readxl::read_excel
myData <- readxl::read_excel(temp, sheet = "ExFig.5f")
# This single line subsets the data frame to chop out the first row
# and the empty columns. It also converts all columns to numeric
NewData <- as.data.frame(lapply(myData[-1, -c(3, 5, 7)], as.numeric))
names(NewData) <-c("Time_Post_Injection", "dose_450_ug",
"dose_150_ug", "dose_100_ug", "dose_50_ug")
# This switches your data to long format, which helps ggplot to work
# We put all the values in one column and have the dosages as labels
# in another column instead of having multiple columns. This allows us
# to map Color to the dosages.
NewData <- cbind(NewData[1], stack(NewData[-1]))
# Now we just tell ggplot to map colours to ind
ggplot(NewData, aes(x = Time_Post_Injection, y = values, color = ind)) +
geom_line() +
geom_point() +
scale_color_discrete(name = "Dose") +
labs(x = "Time Pist Injection") +
theme_bw()
Created on 2020-11-11 by the reprex package (v0.3.0)
Hi the main problem is that you did not get your data into a easy to handle format
library(dplyr)
library(tidyr)
library(ggplot2)
Newdata %>%
# get data in easy to handle format
tidyr::pivot_longer(-Time_Post_Injection, names_to = "DOSE", values_to = "VALUE") %>%
# plot and use the new DOSE column as group and color so you do not need one geom per line! (you can change geom_line() to geom_point also())
ggplot2::ggplot(aes(Time_Post_Injection, VALUE, group = DOSE, color = DOSE)) +
ggplot2::geom_line()

ggforce: geom_mark_ellipse - How to move connectors?

I have some data for which I would like to circle some different subsets. I am using ggplot2 and ggforce to plot the data and draw an ellipse (geom_mark_ellipse) around the data.
I have an issue in that the positions of the connectors on the ellipses (for my data) are in ambiguous positions (at the conjunction of two ellipses, on the border of two ellipses that graze each other).
How can I manually set the position of the connector to the ellipse? Or at least influence them into a particular region?
I have some code below which captures the spirit in which I'm plotting my data. For the purpose of the example, how could I make all of the labels appear in the top left of the plot, or all join the ellipses at x == 0, -2, -4 for each of the factors?
library(tidyverse)
library(ggforce)
x <- c(-1,0,1,-3,-2,2,3,-5,-4,4,5)
t <- c(1,1,1,2,2,2,2,3,3,3,3)
tmp <- as_tibble_col(x, column_name = "x")
tmp <- tmp %>% mutate(t = t)
#How do I move the position of the label connectors on the ellipses?
tmp %>%
ggplot(aes(x=x, y=x)) +
geom_mark_ellipse(aes(label = t, group=t),con.cap = 0) +
geom_point()
Created on 2020-05-05 by the reprex package (v0.3.0)
I've managed to do it for my contrived example, yet to try on my real data, but there is hope.
As shown in the code below, I created data to fill the area (top left) that I didn't want to have labels in, and gave it a factor of "". I manually set the colour of the connectors to NA for that factor, and got rid of the label background for everything. Because the factor is "", the label is an empty string, and nothing shows up. I also set scale_colour_manual to give the colour NA to the ellipse I didn't want to see.
I also filtered the geom_point to not show the data with a factor of "". Finally, I deleted the legend.
library(tidyverse)
library(ggforce)
x <- c(-1,0,1,-3,-2,2,3,-5,-4,4,5)
t <- c(1,1,1,2,2,2,2,3,3,3,3)
tmp <- as_tibble_col(x, column_name = "x")
tmp <- tmp %>% mutate(y=x)
tmp <- tmp %>% mutate(t = t)
#now lets add some dodging data
tmp <- tmp %>% mutate(t = as.character(t))
tmp <- tmp %>% add_row(x=c(-5,2.5,-2.5), y=c(-2.5,5,2.5),t="")
tmp %>%
ggplot(aes(x=x, y=y)) +
geom_mark_ellipse(aes(label = t, group=t, colour=factor(t)),
con.cap = 0, con.colour = c(NA, "black","black","black"),
label.fill=NA) +
scale_colour_manual(values=c(NA, "black", "black", "black")) +
geom_point(data = subset(tmp, t != "")) +
theme(legend.position = "none")
Created on 2020-05-06 by the reprex package (v0.3.0)

Assigning plot to a variable in a loop

I am trying to create 2 line plots.
But I noticed that using a for loop will generate two plots with y=mev2 (instead of a plot based on y=mev1 and another one based on y=mev2).
The code below shows the observation here.
mev1 <- c(1,3,7)
mev2 <- c(9,8,2)
Period <- c(1960, 1970, 1980)
df <- data.frame(Period, mev1, mev2)
library(ggplot2)
# Method 1: Creating plot1 and plot2 without using "for" loop (hard-code)
plot1 <- ggplot(data = df, aes(x=Period, y=unlist(as.list(df[2])))) + geom_line()
plot2 <- ggplot(data = df, aes(x=Period, y=unlist(as.list(df[3])))) + geom_line()
# Method 2: Creating plot1 and plot2 using "for" loop
for (i in 1:2) {
y_var <- unlist(as.list(df[i+1]))
assign(paste("plot", i, sep = ""), ggplot(data = df, aes(x=Period, y=y_var)) + geom_line())
}
Seems like this is due to some ggplot()'s way of working that I am not aware of.
Question:
If I want to use Method 2, how should I modify the logic?
People said that using assign() is not an "R-style", so I wonder what's an alternate way to do this? Say, using list?
One possible answer with no tidyverse command added is :
library(ggplot2)
y_var <- colnames(df)
for (i in 1:2) {
assign(paste("plot", i, sep = ""),
ggplot(data = df, aes_string(x=y_var[1], y=y_var[1 + i])) +
geom_line())
}
plot1
plot2
You may use aes_string. I hope it helps.
EDIT 1
If you want to stock your plot in a list, you can use this :
Initialize your list :
n <- 2 # number of plots
list_plot <- vector(mode = "list", length = n)
names(list_plot) <- paste("plot", 1:n)
Fill it :
for (i in 1:2) {
list_plot[[i]] <- ggplot(data = df, aes_string(x=y_var[1], y=y_var[1 + i])) +
geom_line()
}
Display :
list_plot[[1]]
list_plot[[2]]
For lines in different "plots", you can simplify it with facet_wrap():
library(tidyverse)
df %>%
gather(variable, value, -c(Period)) %>% # wide to long format
ggplot(aes(Period, value)) + geom_line() + facet_wrap(vars(variable))
You can also put it in a loop if necessary and store the results in a list:
# empty list
listed <- list()
# fill the list with the plots
for (i in c(2:3)){
listed[[i-1]] <- df[,-i] %>%
gather(variable, value, -c(Period)) %>%
ggplot(aes(Period, value)) + geom_line()
}
# to get the plots
listed[[1]]
listed[[2]]
Why do you want 2 separate plots? ggplots way to do this would be to get data in long format and then plot.
library(tidyverse)
df %>%
pivot_longer(cols = -Period) %>%
ggplot() + aes(Period, value, color = name) + geom_line()
Here is an alternative approach using a function and lapply. I recognize that you asked how to solve this using a loop. Still, I think it might be useful to consider this approach.
library(ggplot2)
mev1 <- c(1,3,7)
mev2 <- c(9,8,2)
Period <- c(1960, 1970, 1980)
df <- data.frame(Period, mev1, mev2)
myplot <- function(yvar){
plot <- ggplot(df, aes(Period, !!sym(yvar))) + geom_line()
return(plot)
}
colnames <- c("mev1","mev2")
list <- lapply(colnames, myplot)
names(list) <- paste0("plot_", colnames)
# Alternativing naming: names(list) <- paste0("plot", 1:2)
Using this approach you can easily apply your plot function to whatever columns you like. You can specify the columns by name, which may be preferrabe to specifying by position. Plots are saved in a list, and they are named afterwards using the names attribute. In my example I named the plots plot_mev1 and plot_mev2. But you can easily adjust to some other naming. E.g. write names(list) <- paste0("plot", 1:2) to get plot1 and plot2.
Note that I used !!sym() in the ggplot call. This is essentally an alternative to aes_string which was used in the answer of Rémi Coulaud. In this way ggplot understands even in the context of a function or in the context of a loop that "mev1" is a column of your dataset and not just a text string

R: Unexplainable behavior of ggplot inside a function

I have composed a function that develops histograms using ggplot2 on the numerical columns of a dataframe that will be passed to it. The function stores these plots into a list and then returns the list.
However when I run the function I get the same plot again and again.
My code is the following and I provide also a reproducible example.
hist_of_columns = function(data, class, variables_to_exclude = c()){
library(ggplot2)
library(ggthemes)
data = as.data.frame(data)
variables_numeric = names(data)[unlist(lapply(data, function(x){is.numeric(x) | is.integer(x)}))]
variables_not_to_plot = c(class, variables_to_exclude)
variables_to_plot = setdiff(variables_numeric, variables_not_to_plot)
indices = match(variables_to_plot, names(data))
index_of_class = match(class, names(data))
plots = list()
for (i in (1 : length(variables_to_plot))){
p = ggplot(data, aes(x= data[, indices[i]], color= data[, index_of_class], fill=data[, index_of_class])) +
geom_histogram(aes(y=..density..), alpha=0.3,
position="identity", bins = 100)+ theme_economist() +
geom_density(alpha=.2) + xlab(names(data)[indices[i]]) + labs(fill = class) + guides(color = FALSE)
name = names(data)[indices[i]]
plots[[name]] = p
}
plots
}
data(mtcars)
mtcars$am = factor(mtcars$am)
data = mtcars
variables_to_exclude = 'mpg'
class = 'am'
plots = hist_of_columns(data, class, variables_to_exclude)
If you check the list plots you will discover that it contains the same plot repeated.
Simply use aes_string to pass string variables into the ggplot() call. Right now, your plot uses different data sources, not aligned with ggplot's data argument. Below x, color, and fill are separate, unrelated vectors though they derive from same source but ggplot does not know that:
ggplot(data, aes(x= data[, indices[i]], color= data[, index_of_class], fill=data[, index_of_class]))
However, with aes_string, passing string names to x, color, and fill will point to data:
ggplot(data, aes_string(x= names(data)[indices[i]], color= class, fill= class))
Here is strategy using tidyeval that does what you are after:
library(rlang)
library(tidyverse)
hist_of_cols <- function(data, class, drop_vars) {
# tidyeval overhead
class_enq <- enquo(class)
drop_enqs <- enquo(drop_vars)
data %>%
group_by(!!class_enq) %>% # keep the 'class' column always
select(-!!drop_enqs) %>% # drop any 'drop_vars'
select_if(is.numeric) %>% # keep only numeric columns
gather("key", "value", -!!class_enq) %>% # go to long form
split(.$key) %>% # make a list of data frames
map(~ ggplot(., aes(value, fill = !!class_enq)) + # plot as usual
geom_histogram() +
geom_density(alpha = .5) +
labs(x = unique(.$key)))
}
hist_of_cols(mtcars, am, mpg)
hist_of_cols(mtcars, am, c(mpg, wt))

Resources