Related
I have several groups where for each I have several classes for which I measured continuous values:
set.seed(1)
df <- data.frame(value = c(rnorm(100,1,1), rnorm(100,2,1), rnorm(100,3,1),
rnorm(100,3,1), rnorm(100,1,1), rnorm(100,2,1),
rnorm(100,2,1), rnorm(100,3,1), rnorm(100,1,1)),
class = c(rep("c1",100), rep("c2",100), rep("c3",100),
rep("c2",100), rep("c4",100), rep("c1",100),
rep("c4",100), rep("c3",100), rep("c2",100)),
group = c(rep("g1",300), rep("g2",300), rep("g3",300)))
df$class <- factor(df$class, levels =c("c1","c2","c3","c4"))
df$group <- factor(df$group, levels =c("g1","g2","g3"))
Not each group in the data has the same classes, or put differently each group has a subset of all classes.
I'm trying to generate R plotly density curves for each group, color-coded by class, and then combine them all to a single plot using plotly's subplot function.
This is what I'm doing:
library(dplyr)
library(ggplot2)
library(plotly)
set.seed(1)
df <- data.frame(value = c(rnorm(100,1,1), rnorm(100,2,1), rnorm(100,3,1),
rnorm(100,3,1), rnorm(100,1,1), rnorm(100,2,1),
rnorm(100,2,1), rnorm(100,3,1), rnorm(100,1,1)),
class = c(rep("c1",100), rep("c2",100), rep("c3",100),
rep("c2",100), rep("c4",100), rep("c1",100),
rep("c4",100), rep("c3",100), rep("c2",100)),
group = c(rep("g1",300), rep("g2",300), rep("g3",300)))
df$class <- factor(df$class, levels =c("c1","c2","c3","c4"))
df$group <- factor(df$group, levels =c("g1","g2","g3"))
plot.list <- lapply(c("g1","g2","g3"), function(g){
density.df <- do.call(rbind,lapply(unique(dplyr::filter(df, group == g)$class),function(l)
ggplot_build(ggplot(dplyr::filter(df, group == g & class == l),aes(x=value))+geom_density(adjust=1,colour="#A9A9A9"))$data[[1]] %>%
dplyr::select(x,y) %>% dplyr::mutate(class = l)))
plot_ly(x = density.df$x, y = density.df$y, type = 'scatter', mode = 'lines',color = density.df$class) %>%
layout(title=g,xaxis = list(zeroline = F), yaxis = list(zeroline = F))
})
subplot(plot.list,nrows=length(plot.list),shareX=T)
Which gives:
The problems I'd like to fix are:
Have the legend appear only once (right now it repeats for each group) merging all classes
Have the title appear in each of the subplots rather than only for the last plot at is it is now. (I know that I could simply have the group name as the x-axis titles but I'd rather save that space because in reality I have more than 3 groups)
Using plot_ly() it's a little tricky, at least if you'd like to stick with using the color argument to generate multiple traces from the data.
You need to define a legendgroup taking into account your class variable.
This legendgroup however doesn't merge the legend items into one (it just groups them).
Accordingly to avoid duplicated entries in the legend you need to set showlegend = FALSE for the traces you want to hide (regarding the legend).
Edit: this can be done via plotly::style:
set.seed(1)
df <- data.frame(value = c(rnorm(100,1,1), rnorm(100,2,1), rnorm(100,3,1),
rnorm(100,3,1), rnorm(100,1,1), rnorm(100,2,1),
rnorm(100,2,1), rnorm(100,3,1), rnorm(100,1,1)),
class = c(rep("c1",100), rep("c2",100), rep("c3",100),
rep("c2",100), rep("c4",100), rep("c1",100),
rep("c4",100), rep("c3",100), rep("c2",100)),
group = c(rep("g1",300), rep("g2",300), rep("g3",300)))
df$class <- factor(df$class, levels =c("c1","c2","c3","c4"))
df$group <- factor(df$group, levels =c("g1","g2","g3"))
library(dplyr)
library(ggplot2)
library(plotly)
plot.list <- lapply(c("g1","g2","g3"), function(g){
density.df <- do.call(rbind,lapply(unique(dplyr::filter(df, group == g)$class),function(l)
ggplot_build(ggplot(dplyr::filter(df, group == g & class == l),aes(x=value))+geom_density(adjust=1,colour="#A9A9A9"))$data[[1]] %>%
dplyr::select(x,y) %>% dplyr::mutate(class = l)))
p <- plot_ly(data = density.df, x = ~x, y = ~y, type = 'scatter', mode = 'lines', color = ~class, legendgroup = ~class, showlegend = FALSE) %>%
layout(xaxis = list(zeroline = F), yaxis = list(zeroline = FALSE)) %>%
add_annotations(
text = g,
x = 0.5,
y = 1.1,
yref = "paper",
xref = "paper",
xanchor = "middle",
yanchor = "top",
showarrow = FALSE,
font = list(size = 15)
)
if(g == "g1"){
p <- style(p, showlegend = TRUE)
} else if(g == "g2"){
p <- style(p, showlegend = TRUE, traces = 3)
} else {
p <- style(p, showlegend = FALSE)
}
p
})
subplot(plot.list, nrows = length(plot.list), shareX = TRUE) # margin = 0.01
Initial answer:
This can be done by setting showlegend = TRUE only for the first plot and force it to display all available classes via dummy data. Please see the following:
set.seed(1)
df <- data.frame(value = c(rnorm(100,1,1), rnorm(100,2,1), rnorm(100,3,1),
rnorm(100,3,1), rnorm(100,1,1), rnorm(100,2,1),
rnorm(100,2,1), rnorm(100,3,1), rnorm(100,1,1)),
class = c(rep("c1",100), rep("c2",100), rep("c3",100),
rep("c2",100), rep("c4",100), rep("c1",100),
rep("c4",100), rep("c3",100), rep("c2",100)),
group = c(rep("g1",300), rep("g2",300), rep("g3",300)))
df$class <- factor(df$class, levels =c("c1","c2","c3","c4"))
df$group <- factor(df$group, levels =c("g1","g2","g3"))
library(dplyr)
library(ggplot2)
library(plotly)
plot.list <- lapply(c("g1","g2","g3"), function(g){
density.df <- do.call(rbind,lapply(unique(dplyr::filter(df, group == g)$class),function(l)
ggplot_build(ggplot(dplyr::filter(df, group == g & class == l),aes(x=value))+geom_density(adjust=1,colour="#A9A9A9"))$data[[1]] %>%
dplyr::select(x,y) %>% dplyr::mutate(class = l)))
p <- plot_ly(data = density.df, x = ~x, y = ~y, type = 'scatter', mode = 'lines', color = ~class, legendgroup = ~class, showlegend = FALSE) %>%
layout(xaxis = list(zeroline = F), yaxis = list(zeroline = FALSE)) %>%
add_annotations(
text = g,
x = 0.5,
y = 1.1,
yref = "paper",
xref = "paper",
xanchor = "middle",
yanchor = "top",
showarrow = FALSE,
font = list(size = 15)
)
if(g == "g1"){
dummy_df <- data.frame(class = unique(df$class))
dummy_df$x <- density.df$x[1]
dummy_df$y <- density.df$y[1]
p <- add_trace(p, data = dummy_df, x = ~x, y = ~y, color = ~class, type = "scatter", mode = "lines", showlegend = TRUE, legendgroup = ~class, hoverinfo = 'none')
}
p
})
subplot(plot.list, nrows = length(plot.list), shareX = TRUE)
Another approach (avoiding the dummy data workaround) would be to create each trace in a loop (or via lapply) and control it's legend-visibilty according to the first occurrence of the item.
Furthermore, I think it should be possible to control the visibilty of legend items using ?plotly::style. However, I can't control it for single traces currently. I filed an issue here.
Regarding the titles for the subplots please see this.
You can use the following code
library(tidyverse)
library(plotly)
ggplotly(
ggplot(df, aes(x=value, col = class)) +
geom_density(adjust=1) +
facet_wrap(~group, ncol = 1) +
theme_minimal() +
theme(legend.position = 'top')
)
which gives me the following plot
I would like to ask you if you could help me in customizing of colors in a stacked bar chart created by plotly.
The problem is following - I have to recreate a dashboard (from an excel file to a html file). A part of the dashboard is a chart providing us with information about early production of each entity. The chart is a stacked bar chart type by plotly. As each entity is defined by a specific color (defined in RGB) throughout whole dashboard, I need to keep these colors in the donut chart as well. But there is a problem. I always get the following warning:
Warning message:
In RColorBrewer::brewer.pal(N, "Set2") :
n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
and the resulting donut chart containts only one Entity with a not-specified color. Also, the colors in the legend are not those which are defined.
Any idea what to do with it? Thank you so much in advance.
Code:
library(dplyr)
library(plotly)
dt <- as.data.frame(matrix(ncol = 13, nrow = 19))
colnames(dt) <- c("Entity", month.abb)
for (i in 1:nrow(dt)) {
dt[i, 1] <- paste("Entity", i, sep="")
dt[i, -1] <- floor(runif(12, min=0, max=100))
}
# assign colors to entities
dt$"EntityColor" <- c("#074263", "#0B5394", "#3D85C6", "#6D9EEB", "#A4C2F4", "#CFE2F3", "#5B0F00", "#85200C", "#A61C00", "#CC4125", "#DD7E6B", "#E6B8AF", "#F8CBAD", "#F4CCCC", "#274E13", "#38761D", "#E06666", "#CC0000", "#20124D")
data.table::melt(dt) %>%
plot_ly(x = ~variable,
y = ~value,
type = "bar",
color = ~Entity,
marker = list(colors = ~EntityColor)
) %>%
layout(yaxis = list(title = ""),
xaxis = list(title = ""),
barmode = 'stack')
Plot:
Refined approach after comments:
Since the colors turned out to be a bit tricky (see initial suggestion below) I had to break the whole thing down and use a combination of plot_ly() and add_traces() in a loop to make sure that the plotly settings did not apply colors in the wrong order.
The following plot should be exactly what you're looking for.
Plot:
Note that I've appended a continuous numerical column ID. Why? Because you wanted the names in alphabetical order, and the rows are added to the plot in the order they appear in your source. And It's a bit tricky since a straight up ordering using dt %>% arrange((Entity)) would give you Entity1, Enitity10, Entity11 etc. Let me know if you'd like to adjust this in any other way.
Code:
library(dplyr)
library(plotly)
# data
set.seed(123)
dt <- as.data.frame(matrix(ncol = 13, nrow = 19))
colnames(dt) <- c("Entity", month.abb)
for (i in 1:nrow(dt)) {
dt[i, 1] <- paste("Entity", i, sep="")
dt[i, -1] <- floor(runif(12, min=0, max=100))
}
# assign colors to entities
dt$"EntityColor" <- c("#074263", "#0B5394", "#3D85C6", "#6D9EEB", "#A4C2F4", "#CFE2F3", "#5B0F00", "#85200C", "#A61C00", "#CC4125", "#DD7E6B", "#E6B8AF", "#F8CBAD", "#F4CCCC", "#274E13", "#38761D", "#E06666", "#CC0000", "#20124D")
# sort data
dt$ID <- seq.int(nrow(dt))
dt <- dt %>% arrange(desc(ID))
# specify month as factor variable to ensure correct order
months=names(dt)[2:13]
months<- factor(months, levels = c(months))
# plotly setup
p <- plot_ly(type = 'bar')
# add trace for each entity
nrows = nrow(dt)
for(i in 1:nrows) {
p <- p %>% add_trace(x=months, y = unlist(dt[i,2:13], use.names=F), type = 'bar',
#name = paste(dt[i,1], dt[i,14], sep = "_"),
name = dt[i,1],
type = 'bar',
marker=list(color = dt[i,14])) %>%
layout(barmode = 'stack')
}
# Edit layout
p <- p %>% layout(title = list(xanchor='right', text='Correct colors, orderered legend'),
yaxis = list(title = ''),
xaxis = list(title = 'month'))
p
Color correctness verification:
Initial suggestion
Here's an initial suggestion. First of all, color = ~Entity has got to go. And marker = list(color = ~EntityColor) versus marker = list(colors = ~EntityColor) gives two different results. What makes matters even stranger is that the pie chart documentation uses:
marker = list(colors = colors, ...)
... and the bar chart documentation uses:
marker = list(color = c('rgba(204,204,204,1)', 'rgba(222,45,38,0.8)', ...)
...without the s at the end of color.
Either way, you should test both marker = list(color = ~EntityColor) and marker = list(colors = ~EntityColor) and see what's correct for you.
Plot:
Code:
dt <- as.data.frame(matrix(ncol = 13, nrow = 19))
colnames(dt) <- c("Entity", month.abb)
for (i in 1:nrow(dt)) {
dt[i, 1] <- paste("Entity", i, sep="")
dt[i, -1] <- floor(runif(12, min=0, max=100))
}
# assign colors to entities
dt$"EntityColor" <- c("#074263", "#0B5394", "#3D85C6", "#6D9EEB", "#A4C2F4", "#CFE2F3", "#5B0F00", "#85200C", "#A61C00", "#CC4125", "#DD7E6B", "#E6B8AF", "#F8CBAD", "#F4CCCC", "#274E13", "#38761D", "#E06666", "#CC0000", "#20124D")
data.table::melt(dt) %>%
plot_ly(x = ~variable,
y = ~value,
name= ~Entity,
type = "bar",
#color = ~Entity,
marker = list(colors = ~EntityColor)
) %>%
layout(yaxis = list(title = ""),
xaxis = list(title = ""),
barmode = 'stack')
Take a look and see how it works out for you.
I am trying to generate multiple graphs in Plotly for 30 different sales offices. Each graph would have 3 lines: sales, COGS, and inventory. I would like to keep this on one graph with 30 buttons for the different offices. This is the closest solution I could find on SO:
## Create random data. cols holds the parameter that should be switched
l <- lapply(1:100, function(i) rnorm(100))
df <- as.data.frame(l)
cols <- paste0(letters, 1:100)
colnames(df) <- cols
df[["c"]] <- 1:100
## Add trace directly here, since plotly adds a blank trace otherwise
p <- plot_ly(df,
type = "scatter",
mode = "lines",
x = ~c,
y= ~df[[cols[[1]]]],
name = cols[[1]])
## Add arbitrary number of traces
## Ignore first col as it has already been added
for (col in cols[-1]) {
p <- p %>% add_lines(x = ~c, y = df[[col]], name = col, visible = FALSE)
}
p <- p %>%
layout(
title = "Dropdown line plot",
xaxis = list(title = "x"),
yaxis = list(title = "y"),
updatemenus = list(
list(
y = 0.7,
## Add all buttons at once
buttons = lapply(cols, function(col) {
list(method="restyle",
args = list("visible", cols == col),
label = col)
})
)
)
)
print(p)
It works but only on graphs with single lines/traces. How can I modify this code to do the same thing but with graphs with 2 or more traces? or is there a better solution? Any help would be appreciated!
### EXAMPLE 2
#create fake time series data
library(plotly)
set.seed(1)
df <- data.frame(replicate(31,sample(200:500,24,rep=TRUE)))
cols <- paste0(letters, 1:31)
colnames(df) <- cols
#create time series
timeseries <- ts(df[[1]], start = c(2018,1), end = c(2019,12), frequency = 12)
fit <- auto.arima(timeseries, d=1, D=1, stepwise =FALSE, approximation = FALSE)
fore <- forecast(fit, h = 12, level = c(80, 95))
## Add trace directly here, since plotly adds a blank trace otherwise
p <- plot_ly() %>%
add_lines(x = time(timeseries), y = timeseries,
color = I("black"), name = "observed") %>%
add_ribbons(x = time(fore$mean), ymin = fore$lower[, 2], ymax = fore$upper[, 2],
color = I("gray95"), name = "95% confidence") %>%
add_ribbons(x = time(fore$mean), ymin = fore$lower[, 1], ymax = fore$upper[, 1],
color = I("gray80"), name = "80% confidence") %>%
add_lines(x = time(fore$mean), y = fore$mean, color = I("blue"), name = "prediction")
## Add arbitrary number of traces
## Ignore first col as it has already been added
for (col in cols[2:31]) {
timeseries <- ts(df[[col]], start = c(2018,1), end = c(2019,12), frequency = 12)
fit <- auto.arima(timeseries, d=1, D=1, stepwise =FALSE, approximation = FALSE)
fore <- forecast(fit, h = 12, level = c(80, 95))
p <- p %>%
add_lines(x = time(timeseries), y = timeseries,
color = I("black"), name = "observed", visible = FALSE) %>%
add_ribbons(x = time(fore$mean), ymin = fore$lower[, 2], ymax = fore$upper[, 2],
color = I("gray95"), name = "95% confidence", visible = FALSE) %>%
add_ribbons(x = time(fore$mean), ymin = fore$lower[, 1], ymax = fore$upper[, 1],
color = I("gray80"), name = "80% confidence", visible = FALSE) %>%
add_lines(x = time(fore$mean), y = fore$mean, color = I("blue"), name = "prediction", visible = FALSE)
}
p <- p %>%
layout(
title = "Dropdown line plot",
xaxis = list(title = "x"),
yaxis = list(title = "y"),
updatemenus = list(
list(
y = 0.7,
## Add all buttons at once
buttons = lapply(cols, function(col) {
list(method="restyle",
args = list("visible", cols == col),
label = col)
})
)
)
)
p
You were very close!
If for example you want graphs with 3 traces,
You only need to tweak two things:
Set visible the three first traces,
Modify buttons to show traces in groups of three.
My code:
## Create random data. cols holds the parameter that should be switched
library(plotly)
l <- lapply(1:99, function(i) rnorm(100))
df <- as.data.frame(l)
cols <- paste0(letters, 1:99)
colnames(df) <- cols
df[["c"]] <- 1:100
## Add trace directly here, since plotly adds a blank trace otherwise
p <- plot_ly(df,
type = "scatter",
mode = "lines",
x = ~c,
y= ~df[[cols[[1]]]],
name = cols[[1]])
p <- p %>% add_lines(x = ~c, y = df[[2]], name = cols[[2]], visible = T)
p <- p %>% add_lines(x = ~c, y = df[[3]], name = cols[[3]], visible = T)
## Add arbitrary number of traces
## Ignore first col as it has already been added
for (col in cols[4:99]) {
print(col)
p <- p %>% add_lines(x = ~c, y = df[[col]], name = col, visible = F)
}
p <- p %>%
layout(
title = "Dropdown line plot",
xaxis = list(title = "x"),
yaxis = list(title = "y"),
updatemenus = list(
list(
y = 0.7,
## Add all buttons at once
buttons = lapply(0:32, function(col) {
list(method="restyle",
args = list("visible", cols == c(cols[col*3+1],cols[col*3+2],cols[col*3+3])),
label = paste0(cols[col*3+1], " ",cols[col*3+2], " ",cols[col*3+3] ))
})
)
)
)
print(p)
PD: I only use 99 cols because I want 33 groups of 3 graphs
I've got a plotly bar chart in my Shiny app, and I'd like to set specific colors each column in the resulting bar chart.
#Here's some reproducible data
df=data.frame(Month=c("Jan","Feb","Mar","Apr","May","Jun"),Criteria1=c(10,15,20,15,7,6),Criteria2=c(3,8,5,7,9,10),Criteria3=c(11,18,14,9,3,1))
#Plot
colNames <- names(df)[-1] #Month is the first column
# Here is where I set the colors for each `Criteria`, assuming that the order of colors follows the same order as the factor levels of the `Criteria`.
p <- plotly::plot_ly(marker=list(colors=c('#CC1480', '#FF9673', '#E1C8B4')))
for(trace in colNames){
p <- p %>% plotly::add_trace(data = df, x = ~Month, y = as.formula(paste0("~`", trace, "`")), name = trace, type = "bar")
}
p %>%
layout(title = "Trend Over Time",showlegend = FALSE,
xaxis = list(title = ""),
yaxis = list (title = "Monthly Count of QoL Tweets"))
However the resulting plot does not show any of the colors I specify.
What am I doing incorrectly? Any pointers would be very appreciated.
I don't think loop is neccessary here, the following provides as well more control over choosing the color for specific levels when df is melted, the individual levels Criteria1, Criteria2, Criteria3
library(plotly)
library(reshape2)
#Yout data.frame
df <- data.frame(Month = c("Jan","Feb","Mar","Apr","May","Jun"),
Criteria1 = c(10,15,20,15,7,6),
Criteria2 = c(3,8,5,7,9,10),
Criteria3 = c(11,18,14,9,3,1))
melt(df, id.vars = 'Month') %>% plot_ly(x = ~Month, y = ~value, type = 'bar',
color = ~variable,
colors = c(Criteria1 = '#CC1480', Criteria2 = '#FF9673', Criteria3 = '#E1C8B4'))
You could assign your colors to a vector:
colors <- c('#CC1480', '#FF9673', '#E1C8B4')
and then add the traces in a slightly modified loop.
p <- plotly::add_trace(p,
x = df$Month,
y = df[,trace],
marker = list(color = colors[[match(trace, colNames)]]),
name = trace,
type = "bar")
}
which will give you the following graph
Complete code
library("plotly")
df=data.frame(Month=c("Jan", "Feb","Mar", "Apr", "May", "Jun"),
Criteria1 = c(10, 15,20,15,7,6),
Criteria2 = c(3, 8, 5, 7, 9, 10),
Criteria3 = c(11, 18, 14, 9, 3, 1))
colNames <- names(df)[-1] #Month is the first column
colors <- c('#CC1480', '#FF9673', '#E1C8B4')
p <- plotly::plot_ly()
#colNames = c('Criteria1')
for(trace in colNames){
p <- plotly::add_trace(p,
x = df$Month,
y = df[,trace],
marker = list(color = colors[[match(trace, colNames)]]),
name = trace,
type = "bar")
}
p
Using the code below, I have a stacked bar chart as shown.
myDF <- structure(list(Group = structure(1:3, .Label = c("2017-04-02",
"2017-04-09", "2017-04-16"), class = "factor"), Passive = c(4,
1, 0), Promoter = c(12, 1, 4), Detractors = c(0, 0, 0)), .Names = c("Group",
"Passive", "Promoter", "Detractors"), row.names = c(NA, -3L), class = "data.frame")
x <- list(
title = ""
)
y <- list(
title = "Count"
)
p <- plot_ly(myDF, x = ~Group)
if ("Detractors" %in% colnames(myDF[-1])){
p <- add_trace(p, y = ~`Detractors`, name = 'Detractors', type = 'bar',
marker = list(color = '#D52728')) #red
}
if ("Passive" %in% colnames(myDF[-1])){
p <- add_trace(p, y = ~`Passive`, name = 'Passive', type = 'bar',
marker = list(color = '#1F78B4')) #orange
}
if ("Promoter" %in% colnames(myDF[-1])){
p <- add_trace(p, y = ~`Promoter`, name = 'Promoter', type = 'bar',
marker = list(color = '#2BA02D')) #green
}
p <- layout(p, xaxis = x, yaxis = y, barmode = 'stack', legend = list(orientation = 'h'), showlegend=T)
p
I want to show the Net Promoter Scores on top of each bar. The formula I use for calculating the NPS is:
(Number of Promoters — Number of Detractors) / (Number of Respondents) x 100.
So for the first bar it would be ((12 - 0)/16) * 100 = 75. I want to show NPS: 75 on top of the first bar. Similarly for the second and third bar, the number on top will be (1-0)/2*100 = 50 and (4-0)/4*100 = 100 respectively. I will be getting more data for coming weeks, so there will be a bar for each week of data in the future. Is there a way to show this calculated value on top on the bars?
You could add annotations to your layout where the x values are your dates, the y values are the stacked values and the text is the Net Promoter Score, e.g.
df = data.frame(x = c('A', 'B', 'C', 'D'),
y = c(1,3,2,4),
calculated_values = c(20,30,10,50)
)
annotations <- list()
for (i in 1:length(df$calculated_values)) {
annotations[[i]] <- list(x = df$x[[i]],
y = df$y[[i]],
text = df$calculated_values[[i]],
yanchor='bottom',
showarrow = FALSE)
}
plot_ly(df,
x = ~x,
y = ~y,
type = 'bar') %>%
layout(annotations = annotations)
Or for this particular example replace the last two lines of your code with:
annotations <- list()
for (row in rownames(myDF)) {
annotations[[as.integer(row)]] = list(x = as.character(myDF[row,]$Group),
y = sum(myDF[row,][-1]),
text = ((myDF[row,][[3]] - myDF[row,][[4]]) / sum(myDF[row,][-1])) * 100,
yanchor='bottom',
showarrow = FALSE)
}
p <- layout(p, xaxis = x, yaxis = y, barmode = 'stack', legend = list(orientation = 'h'), showlegend=T,
annotations = annotations)
p