I have several groups where for each I have several classes for which I measured continuous values:
set.seed(1)
df <- data.frame(value = c(rnorm(100,1,1), rnorm(100,2,1), rnorm(100,3,1),
rnorm(100,3,1), rnorm(100,1,1), rnorm(100,2,1),
rnorm(100,2,1), rnorm(100,3,1), rnorm(100,1,1)),
class = c(rep("c1",100), rep("c2",100), rep("c3",100),
rep("c2",100), rep("c4",100), rep("c1",100),
rep("c4",100), rep("c3",100), rep("c2",100)),
group = c(rep("g1",300), rep("g2",300), rep("g3",300)))
df$class <- factor(df$class, levels =c("c1","c2","c3","c4"))
df$group <- factor(df$group, levels =c("g1","g2","g3"))
Not each group in the data has the same classes, or put differently each group has a subset of all classes.
I'm trying to generate R plotly density curves for each group, color-coded by class, and then combine them all to a single plot using plotly's subplot function.
This is what I'm doing:
library(dplyr)
library(ggplot2)
library(plotly)
set.seed(1)
df <- data.frame(value = c(rnorm(100,1,1), rnorm(100,2,1), rnorm(100,3,1),
rnorm(100,3,1), rnorm(100,1,1), rnorm(100,2,1),
rnorm(100,2,1), rnorm(100,3,1), rnorm(100,1,1)),
class = c(rep("c1",100), rep("c2",100), rep("c3",100),
rep("c2",100), rep("c4",100), rep("c1",100),
rep("c4",100), rep("c3",100), rep("c2",100)),
group = c(rep("g1",300), rep("g2",300), rep("g3",300)))
df$class <- factor(df$class, levels =c("c1","c2","c3","c4"))
df$group <- factor(df$group, levels =c("g1","g2","g3"))
plot.list <- lapply(c("g1","g2","g3"), function(g){
density.df <- do.call(rbind,lapply(unique(dplyr::filter(df, group == g)$class),function(l)
ggplot_build(ggplot(dplyr::filter(df, group == g & class == l),aes(x=value))+geom_density(adjust=1,colour="#A9A9A9"))$data[[1]] %>%
dplyr::select(x,y) %>% dplyr::mutate(class = l)))
plot_ly(x = density.df$x, y = density.df$y, type = 'scatter', mode = 'lines',color = density.df$class) %>%
layout(title=g,xaxis = list(zeroline = F), yaxis = list(zeroline = F))
})
subplot(plot.list,nrows=length(plot.list),shareX=T)
Which gives:
The problems I'd like to fix are:
Have the legend appear only once (right now it repeats for each group) merging all classes
Have the title appear in each of the subplots rather than only for the last plot at is it is now. (I know that I could simply have the group name as the x-axis titles but I'd rather save that space because in reality I have more than 3 groups)
Using plot_ly() it's a little tricky, at least if you'd like to stick with using the color argument to generate multiple traces from the data.
You need to define a legendgroup taking into account your class variable.
This legendgroup however doesn't merge the legend items into one (it just groups them).
Accordingly to avoid duplicated entries in the legend you need to set showlegend = FALSE for the traces you want to hide (regarding the legend).
Edit: this can be done via plotly::style:
set.seed(1)
df <- data.frame(value = c(rnorm(100,1,1), rnorm(100,2,1), rnorm(100,3,1),
rnorm(100,3,1), rnorm(100,1,1), rnorm(100,2,1),
rnorm(100,2,1), rnorm(100,3,1), rnorm(100,1,1)),
class = c(rep("c1",100), rep("c2",100), rep("c3",100),
rep("c2",100), rep("c4",100), rep("c1",100),
rep("c4",100), rep("c3",100), rep("c2",100)),
group = c(rep("g1",300), rep("g2",300), rep("g3",300)))
df$class <- factor(df$class, levels =c("c1","c2","c3","c4"))
df$group <- factor(df$group, levels =c("g1","g2","g3"))
library(dplyr)
library(ggplot2)
library(plotly)
plot.list <- lapply(c("g1","g2","g3"), function(g){
density.df <- do.call(rbind,lapply(unique(dplyr::filter(df, group == g)$class),function(l)
ggplot_build(ggplot(dplyr::filter(df, group == g & class == l),aes(x=value))+geom_density(adjust=1,colour="#A9A9A9"))$data[[1]] %>%
dplyr::select(x,y) %>% dplyr::mutate(class = l)))
p <- plot_ly(data = density.df, x = ~x, y = ~y, type = 'scatter', mode = 'lines', color = ~class, legendgroup = ~class, showlegend = FALSE) %>%
layout(xaxis = list(zeroline = F), yaxis = list(zeroline = FALSE)) %>%
add_annotations(
text = g,
x = 0.5,
y = 1.1,
yref = "paper",
xref = "paper",
xanchor = "middle",
yanchor = "top",
showarrow = FALSE,
font = list(size = 15)
)
if(g == "g1"){
p <- style(p, showlegend = TRUE)
} else if(g == "g2"){
p <- style(p, showlegend = TRUE, traces = 3)
} else {
p <- style(p, showlegend = FALSE)
}
p
})
subplot(plot.list, nrows = length(plot.list), shareX = TRUE) # margin = 0.01
Initial answer:
This can be done by setting showlegend = TRUE only for the first plot and force it to display all available classes via dummy data. Please see the following:
set.seed(1)
df <- data.frame(value = c(rnorm(100,1,1), rnorm(100,2,1), rnorm(100,3,1),
rnorm(100,3,1), rnorm(100,1,1), rnorm(100,2,1),
rnorm(100,2,1), rnorm(100,3,1), rnorm(100,1,1)),
class = c(rep("c1",100), rep("c2",100), rep("c3",100),
rep("c2",100), rep("c4",100), rep("c1",100),
rep("c4",100), rep("c3",100), rep("c2",100)),
group = c(rep("g1",300), rep("g2",300), rep("g3",300)))
df$class <- factor(df$class, levels =c("c1","c2","c3","c4"))
df$group <- factor(df$group, levels =c("g1","g2","g3"))
library(dplyr)
library(ggplot2)
library(plotly)
plot.list <- lapply(c("g1","g2","g3"), function(g){
density.df <- do.call(rbind,lapply(unique(dplyr::filter(df, group == g)$class),function(l)
ggplot_build(ggplot(dplyr::filter(df, group == g & class == l),aes(x=value))+geom_density(adjust=1,colour="#A9A9A9"))$data[[1]] %>%
dplyr::select(x,y) %>% dplyr::mutate(class = l)))
p <- plot_ly(data = density.df, x = ~x, y = ~y, type = 'scatter', mode = 'lines', color = ~class, legendgroup = ~class, showlegend = FALSE) %>%
layout(xaxis = list(zeroline = F), yaxis = list(zeroline = FALSE)) %>%
add_annotations(
text = g,
x = 0.5,
y = 1.1,
yref = "paper",
xref = "paper",
xanchor = "middle",
yanchor = "top",
showarrow = FALSE,
font = list(size = 15)
)
if(g == "g1"){
dummy_df <- data.frame(class = unique(df$class))
dummy_df$x <- density.df$x[1]
dummy_df$y <- density.df$y[1]
p <- add_trace(p, data = dummy_df, x = ~x, y = ~y, color = ~class, type = "scatter", mode = "lines", showlegend = TRUE, legendgroup = ~class, hoverinfo = 'none')
}
p
})
subplot(plot.list, nrows = length(plot.list), shareX = TRUE)
Another approach (avoiding the dummy data workaround) would be to create each trace in a loop (or via lapply) and control it's legend-visibilty according to the first occurrence of the item.
Furthermore, I think it should be possible to control the visibilty of legend items using ?plotly::style. However, I can't control it for single traces currently. I filed an issue here.
Regarding the titles for the subplots please see this.
You can use the following code
library(tidyverse)
library(plotly)
ggplotly(
ggplot(df, aes(x=value, col = class)) +
geom_density(adjust=1) +
facet_wrap(~group, ncol = 1) +
theme_minimal() +
theme(legend.position = 'top')
)
which gives me the following plot
Related
I am using the R programming language. I trying to learn how to customize hover text in 3d plotly objects as seen here: https://rstudio-pubs-static.s3.amazonaws.com/441420_9a7c15988f3c4f59b2d828eb87ba1634.html
Recently, I have learned how to create a 3d plotly object for some data that I simulated :
library(Rtsne)
library(dplyr)
library(ggplot2)
library(plotly)
library(caret)
library(randomForest)
#data
a = iris
a <- unique(a)
#create two species just to make things easier
s <- c("a","b")
species<- sample(s , 149, replace=TRUE, prob=c(0.3, 0.7))
a$species = species
a$species = as.factor(a$species)
#split data into train/test, and then random forest
index = createDataPartition(a$species, p=0.7, list = FALSE)
train = a[index,]
test = a[-index,]
rf = randomForest(species ~ ., data=train, ntree=50, mtry=2)
#have the model predict the test set
pred = predict(rf, test, type = "prob")
labels = as.factor(ifelse(pred[,2]>0.5, "a", "b"))
confusionMatrix(labels, test$species)
#tsne algorithm
tsne_obj_3 <- Rtsne(test[,-5], perplexity=1, dims=3)
df_m2 <- as.data.frame(tsne_obj_3$Y)
df_m2$labels = test$species
df_m2$color = ifelse(df_m2$labels == "a", "red","blue")
df_m2$petal_length = test$Petal.Length
axis_1 = df_m2$V1
axis_2 = df_m2$V2
axis_3 = df_m2$V3
plot_ly(x=as.vector(axis_1),
y=as.vector(axis_2),
z=axis_3,
type="scatter3d",
mode="markers",
name = "Obs",
marker = list(size = 3)) %>%
add_mesh(x=as.vector(axis_1),
y=as.vector(axis_2),
z=df_m2$pred,
type = "mesh3d",
name = "Preds")
Now, I am trying to customize this plotly object so that different labels appear when you move the mouse over each point, and points corresponding to a given class all have the same color:
p <- plot_ly(type = 'scatter3d', mode = 'markers', colors = "Accent", color = df_m2$color) %>%
add_trace(
x = df_m2$V1,
y = df_m2$V2,
z = df_m2$V3,
marker = list(
size = 3),
name = df_m2$labels,
text = paste("Species: ", df_m2$labels ; "Width: ", df_m2$petal.width ; "color: ", df_m2$color" ),
showlegend = T
) %>%
add_mesh(x=as.vector(axis_1),
y=as.vector(axis_2),
z=df_m2$pred,
type = "mesh3d",
name = "Preds")
%>%
layout(
title = "none",
titlefont = list(
size = 10
),
paper_bgcolor = "#fffff8",
font = t,
xaxis = list(
zeroline = F
),
yaxis = list(
hoverformat = '.2f',
zeroline = F
)
)
p
However, there is an error here. Can someone please show me what I am doing wrong?
Thanks
Not sure what exactly you want to do with the predicted classes, but maybe something like this? (Color corresponds to real species, mouseover also shows prediction).
library(reprex)
reprex({
suppressPackageStartupMessages(invisible(
lapply(c("Rtsne", "dplyr", "ggplot2", "plotly", "caret", "randomForest"),
require, character.only = TRUE)))
#data
a <- unique(iris)
#create two species just to make things easier
set.seed(123)
a$species <- factor(sample(c("a", "b"), 149, replace=TRUE, prob=c(0.3, 0.7)))
#split data into train/test, and then random forest
index = createDataPartition(a$species, p=0.7, list = FALSE)
train = a[index,]
test = a[-index,]
rf <- randomForest(species ~ ., data=train, mtry=2)
#have the model predict the test set
pred <- predict(rf, test, type = "prob")
labels <- predict(rf, test)
confusionMatrix(labels, test$species)
#tsne algorithm
tsne_obj_3 <- Rtsne(test[,-5], perplexity=1, dims=3)
df_m2 <- as.data.frame(tsne_obj_3$Y)
df_m2$labels = toupper(test$species)
df_m2$pred <- labels # you did not define but call pred in plot_ly call
df_m2$color = ifelse(df_m2$labels == "A", "red", "blue")
df_m2$petal_length = test$Petal.Length
axis_1 <- df_m2$V1
axis_2 <- df_m2$V2
axis_3 <- df_m2$V3
plot_ly(type = 'scatter3d', mode = 'markers', colors = c("blue", "red"),
color = df_m2$color) %>%
add_trace(
x = df_m2$V1,
y = df_m2$V2,
z = df_m2$V3,
marker = list(size = 3),
name = df_m2$pred,
text = paste0("Species: ", df_m2$labels, "; Length: ",df_m2$petal_length, "; color: ", df_m2$color),
showlegend = TRUE) %>%
add_mesh(x=as.vector(axis_1),
y=as.vector(axis_2),
z=axis_3, # not sure what z you want here
type = "mesh3d",
name = "Preds") %>%
layout(
title = "none",
titlefont = list(size = 10),
paper_bgcolor = "#fffff8",
font = "Open Sans",
xaxis = list(zeroline = FALSE),
yaxis = list(hoverformat = '.2f', zeroline = FALSE)
)
I am trying to animate this test data.frame but the plotly plot doesn't even show up! The same code works for original plotly data though. I have doublechecked column's class and they are the same as plotly example. I am now puzzled why this fails.
This also works in marker mode but not in lines mode as you see.
total <- data.frame(replicate(4,sample(0:1, 100, rep=TRUE)))
names(total) <- c("date", "frame", "P1.10", "year")
total$date <- as.numeric(as.character(t(rbind(runif(100, min=2000, max=2010)))))
f.rank <- order(total$date)
total$frame[f.rank] <- 1:nrow(total)
total$P1.10 <- as.numeric(as.character(t(rbind(runif(100, min=1, max=10)))))
total$year <- 2000
p <- total %>%
plot_ly(
x = ~date,
y = ~P1.10,
frame = ~frame,
type = 'scatter',
mode = 'lines',
line = list(simplyfy = F)
) %>%
layout(
xaxis = list(
title = "Date",
zeroline = F
),
yaxis = list(
title = "P1.10",
zeroline = F
)
) %>%
animation_opts(
frame = 100,
transition = 0,
redraw = FALSE
) %>%
animation_slider(
hide = T
) %>%
animation_button(
x = 1, xanchor = "right", y = 0, yanchor = "bottom"
)
You have ignored accumulate_by in the example. You also need an ID field. This is the same but using ggplot in combination.
set.seed(123)
library(plotly)
total <- data.frame(replicate(4,sample(0:1, 100, rep=TRUE)))
names(total) <- c("date", "frame", "P1.10", "year")
total$date <- as.numeric(as.character(t(rbind(runif(100, min=2000, max=2010)))))
f.rank <- order(total$date)
total$frame[f.rank] <- 1:nrow(total)
total$ID[f.rank] <- 1:nrow(total)
total$P1.10 <- as.numeric(as.character(t(rbind(runif(100, min=1, max=10)))))
total$year <- 2000
accumulate_by <- function(dat, var) {
var <- lazyeval::f_eval(var, dat)
lvls <- plotly:::getLevels(var)
dats <- lapply(seq_along(lvls), function(x) {
cbind(dat[var %in% lvls[seq(1, x)], ], frame = lvls[[x]])
})
dplyr::bind_rows(dats)
}
total <- total %>%
accumulate_by(~ID)
p <- ggplot(total,aes(ID, P1.10, frame = frame)) +
geom_line()
p <- ggplotly(p) %>%
layout(
title = "",
yaxis = list(
title = "P1.10",
zeroline = F,
tickprefix = "$"
),
xaxis = list(
title = "Date",
zeroline = F,
showgrid = F
)
) %>%
animation_opts(
frame = 100,
transition = 0,
redraw = FALSE
) %>%
animation_slider(
currentvalue = list(
prefix = "Day "
)
)
I am trying to generate multiple graphs in Plotly for 30 different sales offices. Each graph would have 3 lines: sales, COGS, and inventory. I would like to keep this on one graph with 30 buttons for the different offices. This is the closest solution I could find on SO:
## Create random data. cols holds the parameter that should be switched
l <- lapply(1:100, function(i) rnorm(100))
df <- as.data.frame(l)
cols <- paste0(letters, 1:100)
colnames(df) <- cols
df[["c"]] <- 1:100
## Add trace directly here, since plotly adds a blank trace otherwise
p <- plot_ly(df,
type = "scatter",
mode = "lines",
x = ~c,
y= ~df[[cols[[1]]]],
name = cols[[1]])
## Add arbitrary number of traces
## Ignore first col as it has already been added
for (col in cols[-1]) {
p <- p %>% add_lines(x = ~c, y = df[[col]], name = col, visible = FALSE)
}
p <- p %>%
layout(
title = "Dropdown line plot",
xaxis = list(title = "x"),
yaxis = list(title = "y"),
updatemenus = list(
list(
y = 0.7,
## Add all buttons at once
buttons = lapply(cols, function(col) {
list(method="restyle",
args = list("visible", cols == col),
label = col)
})
)
)
)
print(p)
It works but only on graphs with single lines/traces. How can I modify this code to do the same thing but with graphs with 2 or more traces? or is there a better solution? Any help would be appreciated!
### EXAMPLE 2
#create fake time series data
library(plotly)
set.seed(1)
df <- data.frame(replicate(31,sample(200:500,24,rep=TRUE)))
cols <- paste0(letters, 1:31)
colnames(df) <- cols
#create time series
timeseries <- ts(df[[1]], start = c(2018,1), end = c(2019,12), frequency = 12)
fit <- auto.arima(timeseries, d=1, D=1, stepwise =FALSE, approximation = FALSE)
fore <- forecast(fit, h = 12, level = c(80, 95))
## Add trace directly here, since plotly adds a blank trace otherwise
p <- plot_ly() %>%
add_lines(x = time(timeseries), y = timeseries,
color = I("black"), name = "observed") %>%
add_ribbons(x = time(fore$mean), ymin = fore$lower[, 2], ymax = fore$upper[, 2],
color = I("gray95"), name = "95% confidence") %>%
add_ribbons(x = time(fore$mean), ymin = fore$lower[, 1], ymax = fore$upper[, 1],
color = I("gray80"), name = "80% confidence") %>%
add_lines(x = time(fore$mean), y = fore$mean, color = I("blue"), name = "prediction")
## Add arbitrary number of traces
## Ignore first col as it has already been added
for (col in cols[2:31]) {
timeseries <- ts(df[[col]], start = c(2018,1), end = c(2019,12), frequency = 12)
fit <- auto.arima(timeseries, d=1, D=1, stepwise =FALSE, approximation = FALSE)
fore <- forecast(fit, h = 12, level = c(80, 95))
p <- p %>%
add_lines(x = time(timeseries), y = timeseries,
color = I("black"), name = "observed", visible = FALSE) %>%
add_ribbons(x = time(fore$mean), ymin = fore$lower[, 2], ymax = fore$upper[, 2],
color = I("gray95"), name = "95% confidence", visible = FALSE) %>%
add_ribbons(x = time(fore$mean), ymin = fore$lower[, 1], ymax = fore$upper[, 1],
color = I("gray80"), name = "80% confidence", visible = FALSE) %>%
add_lines(x = time(fore$mean), y = fore$mean, color = I("blue"), name = "prediction", visible = FALSE)
}
p <- p %>%
layout(
title = "Dropdown line plot",
xaxis = list(title = "x"),
yaxis = list(title = "y"),
updatemenus = list(
list(
y = 0.7,
## Add all buttons at once
buttons = lapply(cols, function(col) {
list(method="restyle",
args = list("visible", cols == col),
label = col)
})
)
)
)
p
You were very close!
If for example you want graphs with 3 traces,
You only need to tweak two things:
Set visible the three first traces,
Modify buttons to show traces in groups of three.
My code:
## Create random data. cols holds the parameter that should be switched
library(plotly)
l <- lapply(1:99, function(i) rnorm(100))
df <- as.data.frame(l)
cols <- paste0(letters, 1:99)
colnames(df) <- cols
df[["c"]] <- 1:100
## Add trace directly here, since plotly adds a blank trace otherwise
p <- plot_ly(df,
type = "scatter",
mode = "lines",
x = ~c,
y= ~df[[cols[[1]]]],
name = cols[[1]])
p <- p %>% add_lines(x = ~c, y = df[[2]], name = cols[[2]], visible = T)
p <- p %>% add_lines(x = ~c, y = df[[3]], name = cols[[3]], visible = T)
## Add arbitrary number of traces
## Ignore first col as it has already been added
for (col in cols[4:99]) {
print(col)
p <- p %>% add_lines(x = ~c, y = df[[col]], name = col, visible = F)
}
p <- p %>%
layout(
title = "Dropdown line plot",
xaxis = list(title = "x"),
yaxis = list(title = "y"),
updatemenus = list(
list(
y = 0.7,
## Add all buttons at once
buttons = lapply(0:32, function(col) {
list(method="restyle",
args = list("visible", cols == c(cols[col*3+1],cols[col*3+2],cols[col*3+3])),
label = paste0(cols[col*3+1], " ",cols[col*3+2], " ",cols[col*3+3] ))
})
)
)
)
print(p)
PD: I only use 99 cols because I want 33 groups of 3 graphs
How can I create a grouped bar chart in plotly that has a dropdown (or something else), so a viewer can select the grouping variable?
Working example:
library(dplyr)
library(plotly)
library(reshape2)
iris$Sepal.L <- iris$Sepal.Length %>%
cut(breaks = c(4,5,7,8),
labels = c("Length.a","Length.b","Length.c"))
iris$Sepal.W <- iris$Sepal.Width %>%
cut(breaks = c(1,3,5),
labels = c("Width.a","Width.b"))
# Get percentages
data1 <- table(iris$Species, iris$Sepal.L) %>%
prop.table(margin = 1)
data2 <- table(iris$Species, iris$Sepal.W) %>%
prop.table(margin = 1)
# Convert to df
data1 <- data.frame(Var1=row.names(data1), cbind(data1))
row.names(data1) <- NULL
data2 <- data.frame(Var1=row.names(data2), cbind(data2))
row.names(data2) <- NULL
plot_ly(
data = data1,
name = "Length.a",
x = ~Var1, y = ~Length.a,
type = "bar") %>%
add_trace(y=~Length.b, name = "Length.b") %>%
add_trace(y=~Length.c, name = "Length.c")
plot_ly(
data = data2,
name = "Width.a",
x = ~Var1, y = ~Width.a,
type = "bar") %>%
add_trace(y=~Width.b, name = "Width.b")
For example if I would like to select between viewing a plot with table(iris$Species, iris$Sepal.Length) and a plot with table(iris$Species, iris$Sepal.Width)
Bonus:
If it's easy; being able to interactively select the x variable as well would be cool, but not necessary.
You can find a solution here.
The idea is to plot your bar charts (with data1 and data2) all together and to make visible only one at a time.
items <- list(
list(label="Var1",
args=list(list(visible=c(T,T,T,F,F)))),
list(label="Var2",
args=list(list(visible=c(F,F,F,T,T))))
)
plot_ly(data=data1) %>%
add_bars(name = "Length.a",
x = ~Var1, y = ~Length.a, visible=T) %>%
add_bars(name = "Length.b",
x = ~Var1, y = ~Length.b, visible=T) %>%
add_bars(name = "Length.c",
x = ~Var1, y = ~Length.c, visible=T) %>%
add_bars(name = "Width.a",
x = ~Var1, y = ~Width.a, visible=F, data=data2, marker=list(color="#377EB8")) %>%
add_bars(name = "Width.b",
x = ~Var1, y = ~Width.b, visible=F, data=data2, marker=list(color="#FF7F00")) %>%
layout(
title = "Bar chart with drop down menu",
xaxis = list(title="x"),
yaxis = list(title = "y"),
showlegend = T,
updatemenus = list(
list(y = 0.9,
buttons = items)
))
I have a data.frame I'd like to scatter plot using R's plotly with two factors which I'd like to color and shape by.
Here's my data:
set.seed(1)
df <- data.frame(x=rnorm(12),y=rnorm(12),
group=c(rep(1,3),rep(2,3),rep(3,3),rep(4,3)),
treatment=c(rep("A",6),rep("B",6)),
stringsAsFactors=F)
df$group <- factor(df$group,levels=1:4)
df$treatment <- factor(df$treatment,levels=c("A","B"))
Here's how I'm trying to plot:
require(plotly)
plot_ly(marker=list(size=10),type='scatter',mode="markers",x=~df$x,y=~df$y,color=~df$group,symbol=~df$treatment) %>%
add_annotations(text="group,treatment",xref="paper",yref="paper",x=1.02, xanchor="left",y=1.02,yanchor="top",legendtitle=TRUE,showarrow=FALSE) %>%
layout(xaxis=list(title="x"),yaxis=list(title="y"))
which gives me:
Is it possible to get the text of group and treatment in the legend be separated by comma instead of the new line as it is now?
This means that instead of:
1
A
2
A
3
B
4
B
I'll have:
1,A
2,A
3,B
4,B
Sounds trivial but it's one of the cases where Plotly decides whats good for you.
The legend labels are composed of the categories of color and symbol which are all passed in one command. In order to get control over the output, let's add each trace separately.
for (grou in groups) {
for (treat in treatments) {
trace_data <- subset(df, group == grou & treatment == treat)
if (nrow(trace_data) > 0) {
p <- add_trace(p,
x = trace_data$x,
y = trace_data$y,
marker = list(size = 10,
color = group,
symbol = as.integer(charToRaw(treat)) - 65),
type = 'scatter',
mode = "markers",
name = paste(grou, treat, sep = ",")
)
}
}
}
We pass the color (not strictly necessary) via marker and symbol also via marker (both can be passed in the add_trace command as well but then again Plotly decides for you what do to do with it).
The legend label is passed via name.
Note: You need to convert your treatment explicitly because symbol expects either a named symbol or a number (unless your treatments are named diamond or circle)
Complete code
library(utils)
library(plotly)
set.seed(1)
df <- data.frame(x = rnorm(12),
y = rnorm(12),
group = c(rep(1, 3),
rep(2, 3),
rep(3, 3),
rep(4, 3)
),
treatment=c(rep("A", 6),
rep("B", 6)
),
stringsAsFactors = FALSE
)
groups <- unique(df$group)
treatments <- unique(df$treatment)
p <- plot_ly()
for (grou in groups) {
for (treat in treatments) {
trace_data <- subset(df, group == grou & treatment == treat)
if (nrow(trace_data) > 0) {
p <- add_trace(p,
x = trace_data$x,
y = trace_data$y,
marker = list(size = 10,
color = group,
symbol = as.integer(charToRaw(treat)) - 65),
type = 'scatter',
mode = "markers",
name = paste(grou, treat, sep = ",")
)
}
}
}
p <- add_annotations(p,
text = "group,treatment",
xref = "paper",
yref = "paper",
x = 0.96,
xanchor = "left",
y = 1.03,
yanchor = "top",
legendtitle = TRUE,
showarrow = FALSE) %>%
layout(xaxis = list(title = "x"),
yaxis = list(title = "y"))
p