Plotly (R) Legend Won't Appear? - r

I'm trying to create a plot showing the CDFs of two different categories of data, with a legend to show which color corresponds to which (Plotly version 4.9.2.1). For some reason, it's a royal pain in the rear to get the legend to show. Below is a toy example with three of my attempts--only the last one works, but it's obnoxiously contrived and makes the resulting data appear misleadingly dense in the plot. Can anyone explain how to do this right?
library(plotly)
library(magrittr)
color.dat <- runif(30)
x.mat <- matrix(0, nrow=500, ncol=30)
for (i in 1:30){
x.mat[,i] <- rnorm(500, 0, color.dat[i])
}
### Attempt 1, no legend appears at all ###
p <- plot_ly(showlegend=TRUE)
for (i in 1:30){
tmp.cdf <- ecdf(x.mat[,i])
p <- p %>%
add_lines(x=sort(x.mat[,i]), y=tmp.cdf(sort(x.mat[,i])),
name=ifelse(color.dat[i] > 0.5, 'A', 'B'),
showlegend=FALSE,
line=list(color=ifelse(color.dat[i] > 0.5, 'blue', 'orange')))
}
p <- p %>%
add_lines(x=c(0,1), y=c(0,0), name='A',
line=list(color='blue'),
showlegend=TRUE, visible=FALSE) %>%
add_lines(x=c(0,1), y=c(0,0), name='B',
line=list(color='orange'),
showlegend=TRUE, visible=FALSE)
### Attempt 2, legend entry appears only for class B (doesn't appear without invisible traces added at end) ###
p <- plot_ly(showlegend=TRUE)
a.bool <- TRUE
b.bool <- TRUE
for (i in 1:30){
tmp.cdf <- ecdf(x.mat[,i])
if (color.dat[i] > 0.5 && a.bool){
class.bool <- TRUE
a.bool <- FALSE
} else {
class.bool <- FALSE
}
if (color.dat[i] < 0.5 && b.bool){
class.bool <- TRUE
b.bool <- FALSE
} else {
class.bool <- FALSE
}
p <- p %>%
add_lines(x=sort(x.mat[,i]), y=tmp.cdf(sort(x.mat[,i])),
name=ifelse(color.dat[i] > 0.5, 'A', 'B'),
showlegend=class.bool,
line=list(color=ifelse(color.dat[i] > 0.5, 'blue', 'orange')))
}
p <- p %>%
add_lines(x=c(0,1), y=c(0,0), name='A',
line=list(color='blue'),
showlegend=TRUE, visible=FALSE) %>%
add_lines(x=c(0,1), y=c(0,0), name='B',
line=list(color='orange'),
showlegend=TRUE, visible=FALSE)
### Attempt 3, both legend entries appear, but plot is misleading and obscures a lot of detail ###
p <- plot_ly(showlegend=TRUE)
flat.mat.a <- c()
flat.mat.b <- c()
flat.cdf.a <- c()
flat.cdf.b <- c()
for (i in 1:30){
tmp.cdf <- ecdf(x.mat[,i])
if (color.dat[i] > 0.5){
flat.mat.a <- c(flat.mat.a, sort(x.mat[,i]))
flat.cdf.a <- c(flat.cdf.a, tmp.cdf(sort(x.mat[,i])))
} else {
flat.mat.b <- c(flat.mat.b, sort(x.mat[,i]))
flat.cdf.b <- c(flat.cdf.b, tmp.cdf(sort(x.mat[,i])))
}
}
p <- p %>%
add_lines(x=flat.mat.a, y=flat.cdf.a,
showlegend=TRUE, name='A',
line=list(color='blue')) %>%
add_lines(x=flat.mat.b, y=flat.cdf.b,
showlegend=TRUE, name='B',
line=list(color='orange'))

My preferred approach to plotting stuff with ploty is to put the data in dataframe.
After the data preparation steps it's just takes two lines of code to get the plot and the legend.
library(plotly)
library(tidyr)
library(dplyr)
set.seed(42)
color.dat <- runif(30)
x.mat <- matrix(0, nrow=500, ncol=30)
for (i in 1:30){
x.mat[,i] <- rnorm(500, 0, color.dat[i])
}
# Put the data in a dataframe
dfx <- data.frame(x.mat) %>%
tidyr::pivot_longer(everything()) %>%
arrange(name, value) %>%
mutate(id = as.integer(gsub("^X", "", name)),
color = color.dat[id],
color = ifelse(color > 0.5, 'blue', 'orange')) %>%
group_by(name) %>%
mutate(cdf = ecdf(value)(value)) %>%
ungroup()
p <- dfx %>%
group_by(name) %>%
plot_ly(showlegend=TRUE) %>%
add_lines(x = ~value, y =~cdf, color = ~color, colors = c(blue = "blue", orange = "orange"))
p

Related

How to add Plotly multiple surfaces with a for loop in R

I am plotting multiple surfaces graphs in R. This is an example from the Plotly page:
z <- c(
c(8.83,8.89,8.81,8.87,8.9,8.87),
c(8.89,8.94,8.85,8.94,8.96,8.92),
c(8.84,8.9,8.82,8.92,8.93,8.91),
c(8.79,8.85,8.79,8.9,8.94,8.92),
c(8.79,8.88,8.81,8.9,8.95,8.92),
c(8.8,8.82,8.78,8.91,8.94,8.92),
c(8.75,8.78,8.77,8.91,8.95,8.92),
c(8.8,8.8,8.77,8.91,8.95,8.94),
c(8.74,8.81,8.76,8.93,8.98,8.99),
c(8.89,8.99,8.92,9.1,9.13,9.11),
c(8.97,8.97,8.91,9.09,9.11,9.11),
c(9.04,9.08,9.05,9.25,9.28,9.27),
c(9,9.01,9,9.2,9.23,9.2),
c(8.99,8.99,8.98,9.18,9.2,9.19),
c(8.93,8.97,8.97,9.18,9.2,9.18)
)
dim(z) <- c(15,6)
z2 <- z + 1
z3 <- z - 1
fig <- plot_ly(showscale = FALSE)
fig <- fig %>% add_surface(z = ~z)
fig <- fig %>% add_surface(z = ~z2, opacity = 0.98)
fig <- fig %>% add_surface(z = ~z3, opacity = 0.98)
fig
You can watch the result here: https://plotly.com/r/3d-surface-plots/
I'm trying to do the same for a 3D numeric matrix with the following R code:
# Create a tridimensional array
R = 3
v1 = replicate(R, 0)
v2 = replicate(R, 0)
v3 = replicate(R, 0)
AR <- array(c(v1,v2,v3), dim = c(R,R,R))
# 2) Fill the array
for (i in 1:R)
for (j in 1:R)
for (k in 1:R)
AR[i,j,k] <- sample(1:3,1)
#print(AR)
library(plotly)
library(htmlwidgets)
fig <-plot_ly(showscale = FALSE)
#Try to create the fig with a loop
for (k in 1:R)
{ # Abre ciclo for.
s <- AR[,,k] + 10*k
print(s)
fig <- fig %>% add_surface(z = ~s)
} # Cierra ciclo for.
fig
But obtain just the graph of the last added surface. Can you tell me please where is the mistake?
You are a victim of lazy evaluation. (See, for example, here.] for loops use lazy evaluation so your index , k, is evaluated only when fig needs to be evaluated, which happens when you print fig. At this point, k equals R, and so you get R copies of the same surface, overlaid on each other.
You need to force evaluation. One way to do this is with lapply, which forces evaluation by default.
For example,
lapply(
1:R,
function(k) { # Abre ciclo for.
s <- AR[,,k] + 10*k
print(s)
fig <<- fig %>% add_surface(z = ~s)
}
)
# Cierra ciclo for.
fig
[Note the use of <<-.] Giving

Dendrogram plot remove tree labels at end of the branches

Using the example located here https://www.datacamp.com/community/tutorials/hierarchical-clustering-R and the data located https://archive.ics.uci.edu/ml/datasets/seeds# i am trying to remove the labels at the bottom of the dendrogram when using the color_branches
when plot(hclust_avg, labels=FALSE) it works but not later when using color_branches. is there a way to remove them?
`set.seed(786)
seeds_df <- read.csv("seeds_dataset.txt",sep = '\t',header = FALSE)
feature_name <- c('area','perimeter','compactness','length.of.kernel','width.of.kernal','asymmetry.coefficient','length.of.kernel.groove','type.of.seed')
colnames(seeds_df) <- feature_name
seeds_df<- seeds_df[complete.cases(seeds_df), ]
seeds_label <- seeds_df$type.of.seed
seeds_df$type.of.seed <- NULL
seeds_df_sc <- as.data.frame(scale(seeds_df))
dist_mat <- dist(seeds_df_sc, method = 'euclidean')
hclust_avg <- hclust(dist_mat, method = 'average')
cut_avg <- cutree(hclust_avg, k = 3)
suppressPackageStartupMessages(library(dendextend))
avg_dend_obj <- as.dendrogram(hclust_avg)
avg_col_dend <- color_branches(avg_dend_obj, h = 3)
plot(avg_col_dend)`
Figured this out by colouring the the labels white to the background
avg_dend_obj <- as.dendrogram(hclust_avg)
labels_colors(avg_dend_obj) <- "white"
plot(avg_dend_obj)

fixed fill for different sections of a density plot with ggplot

Given draws from a rnorm, and cutoff c I want my plot to use the following colors:
Red for the section that is to the left of -c
Blue for the section in between -c and c
and Green for the section that is to the right of c
For example, if my data is:
set.seed(9782)
mydata <- rnorm(1000, 0, 2)
c <- 1
I want to plot something like this:
But if my data is all to the right of c the whole plot should be green. Similarly, if all is between -c and c or to the left of -c the plot should be all red or blue.
This is the code I wrote:
MinD <- min(mydata)
MaxD <- max(mydata)
df.plot <- data.frame(density = mydata)
if(c==0){
case <- dplyr::case_when((MinD < 0 & MaxD >0) ~ "L_and_R",
(MinD > 0) ~ "R",
(MaxD < 0) ~ "L")
}else{
case <- dplyr::case_when((MinD < -c & MaxD >c) ~ "ALL",
(MinD > -c & MaxD > c) ~ "Center_and_R",
(MinD > -c & MaxD <c) ~ "Center",
(MinD < -c & MaxD < c) ~ "Center_and_L",
MaxD < -c ~ "L",
MaxD > c ~ "R")
}
# Draw the Center
if(case %in% c("ALL", "Center_and_R", "Center", "Center_and_L")){
ds <- density(df.plot$density, from = -c, to = c)
ds_data_Center <- data.frame(x = ds$x, y = ds$y, section="Center")
} else{
ds_data_Center <- data.frame(x = NA, y = NA, section="Center")
}
# Draw L
if(case %in% c("ALL", "Center_and_L", "L", "L_and_R")){
ds <- density(df.plot$density, from = MinD, to = -c)
ds_data_L <- data.frame(x = ds$x, y = ds$y, section="L")
} else{
ds_data_L <- data.frame(x = NA, y = NA, section="L")
}
# Draw R
if(case %in% c("ALL", "Center_and_R", "R", "L_and_R")){
ds <- density(df.plot$density, from = c, to = MaxD)
ds_data_R <- data.frame(x = ds$x, y = ds$y, section="R")
} else{
ds_data_R <- data.frame(x = NA, y = NA, section="R")
}
L_Pr <- round(mean(mydata < -c),2)
Center_Pr <- round(mean((mydata>-c & mydata<c)),2)
R_Pr <- round(mean(mydata > c),2)
filldf <- data.frame(section = c("L", "Center", "R"),
Pr = c(L_Pr, Center_Pr, R_Pr),
fill = c("red", "blue", "green")) %>%
dplyr::mutate(section = as.character(section))
if(c==0){
ds_data <- suppressWarnings(dplyr::bind_rows(ds_data_L, ds_data_R)) %>%
dplyr::full_join(filldf, by = "section") %>% filter(Pr!=0) %>%
dplyr::full_join(filldf, by = "section") %>% mutate(section = ordered(section, levels=c("L","R")))
ds_data <- ds_data[order(ds_data$section), ] %>%
filter(Pr!=0) %>%
mutate(Pr=scales::percent(Pr))
}else{
ds_data <- suppressWarnings(dplyr::bind_rows(ds_data_Center, ds_data_L, ds_data_R)) %>%
dplyr::full_join(filldf, by = "section") %>% mutate(section = ordered(section, levels=c("L","Center","R")))
ds_data <- ds_data[order(ds_data$section), ] %>%
filter(Pr!=0) %>%
mutate(Pr=scales::percent(Pr))
}
fillScale <- scale_fill_manual(name = paste0("c = ", c, ":"),
values = as.character(unique(ds_data$fill)))
p <- ggplot(data = ds_data, aes(x=x, y=y, fill=Pr)) +
geom_area() + fillScale
Alas, I cannot figure out how to assign the colors to the different sections while keeping the percentages as labels for the colors.
We use the density function to create the data frame we'll actually plot. Then, We use the cut function to create groups using ranges of the data values. Finally, we calculate the probability mass for each group and use those as the actual legend labels.
We also create a labeled vector of colors to ensure that the same color always goes with a given range of x-values, regardless of whether the data contains any values within a given range of x-values.
The code below packages all this into a function.
library(tidyverse)
library(gridExtra)
fill_density = function(x, cc=1, adj=1, drop_levs=FALSE) {
# Calculate density values for input data
dens = data.frame(density(x, n=2^10, adjust=adj)[c("x","y")]) %>%
mutate(section = cut(x, breaks=c(-Inf, -1, cc, Inf))) %>%
group_by(section) %>%
mutate(prob = paste0(round(sum(y)*mean(diff(x))*100),"%"))
# Get probability mass for each level of section
# We'll use these as the label values in scale_fill_manual
sp = dens %>%
group_by(section, prob) %>%
summarise %>%
ungroup
if(!drop_levs) {
sp = sp %>% complete(section, fill=list(prob="0%"))
}
# Assign colors to each level of section
col = setNames(c("red","blue","green"), levels(dens$section))
ggplot(dens, aes(x, y, fill=section)) +
geom_area() +
scale_fill_manual(labels=sp$prob, values=col, drop=drop_levs) +
labs(fill="")
}
Now let's run the function on several different data distributions:
set.seed(3)
dat2 = rnorm(1000)
grid.arrange(fill_density(mydata), fill_density(mydata[mydata>0]),
fill_density(mydata[mydata>2], drop_levs=TRUE),
fill_density(mydata[mydata>2], drop_levs=FALSE),
fill_density(mydata[mydata < -5 | mydata > 5], adj=0.3), fill_density(dat2),
ncol=2)

Faceting a plotly heatmap

I'd like to be able to facet an R plotly heatmap.
Here's what I mean:
I have a hierarchically-clustered gene expression dataset:
require(permute)
set.seed(1)
mat <- rbind(cbind(matrix(rnorm(2500,2,1),nrow=25,ncol=500),matrix(rnorm(2500,-2,1),nrow=25,ncol=500)),
cbind(matrix(rnorm(2500,-2,1),nrow=25,ncol=500),matrix(rnorm(2500,2,1),nrow=25,ncol=500)))
rownames(mat) <- paste("g",1:50,sep=".")
colnames(mat) <- paste("s",1:1000,sep=".")
hc.col <- hclust(dist(t(mat)))
dd.col <- as.dendrogram(hc.col)
col.order <- order.dendrogram(dd.col)
hc.row <- hclust(dist(mat))
dd.row <- as.dendrogram(hc.row)
row.order <- order.dendrogram(dd.row)
mat <- mat[row.order,col.order]
I then discretize it to specific expression ranges because that happens to help the resolution of colors for my case. I'm also creating other structures to help me plot the colorbar the way I want it to:
require(RColorBrewer)
mat.intervals <- cut(mat,breaks=6)
interval.mat <- matrix(mat.intervals,nrow=50,ncol=1000,dimnames=list(rownames(mat),colnames(mat)))
interval.cols <- brewer.pal(6,"Set2")
names(interval.cols) <- levels(mat.intervals)
require(reshape2)
interval.df <- reshape2::melt(interval.mat,varnames=c("gene","sample"),value.name="expr")
interval.cols2 <- rep(interval.cols, each=ncol(mat))
color.df <- data.frame(range=c(0:(2*length(interval.cols)-1)),colors=c(0:(2*length(interval.cols)-1)))
color.df <- setNames(data.frame(color.df$range,color.df$colors),NULL)
for (i in 1:(2*length(interval.cols))) {
color.df[[2]][[i]] <- interval.cols[[(i + 1) / 2]]
color.df[[1]][[i]] <- i/(2*length(interval.cols))-(i %% 2)/(2*length(interval.cols))
}
They way I generated the data I know that samples 1-500 are one cluster and samples 501:1000 are the other, so I label them:
interval.df$cluster <- NA
interval.df$cluster[which(interval.df$sample %in% paste("s",1:500,sep="."))] <- "A"
interval.df$cluster[which(interval.df$sample %in% paste("s",501:1000,sep="."))] <- "B"
I thought that adding a sample with not color and interval will create a white column in the heatmap plot that will look like a facet border:
divider.df <- data.frame(gene=unique(interval.df$gene),sample=NA,expr=NA,cluster=NA)
interval.df <- rbind(dplyr::filter(interval.df,cluster == "A"),divider.df,dplyr::filter(interval.df,cluster == "B"))
And now I try plotting:
#add ticks for each cluster
tick.vals <- c("s.158","s.617")
tick.text <- c("A","B")
require(plotly)
plot_ly(z=c(interval.df$expr),x=interval.df$sample,y=interval.df$gene,colors=interval.cols2,type="heatmap",colorscale=color.df,
colorbar=list(title="score",tickmode="array",tickvals=c(1:6),ticktext=names(interval.cols),len=0.2,outlinecolor="white",bordercolor="white",borderwidth=5,bgcolor="white")) %>%
layout(xaxis = list(title = 'Cluster',tickmode = 'array',tickvals = tick.vals,ticktext = tick.text))
But I don't see any separation between the clusters:
Any idea how to achieve such a facet border between the two clusters?
Your example is quite involved so I have reduced it down to a minimal example to focus on the gap you are looking for in the quadrants of your heatmap.
Modified from the examples on the plotly site, here.
library(plotly)
m <- matrix(rnorm(9), nrow = 3, ncol = 3)
p <- plot_ly(
x = c("a", "b", "c"), y = c("d", "e", "f"),
z = m, type = "heatmap"
)
subplot(p, p, p, p, shareX = TRUE, shareY = TRUE, nrows = 2)
If you create a plotly object for each of the quadrants and then use subplot, you will get a result looking similar to this:
N.B. I have cropped out the legend because it was duplicated for the facets, you could merge these into one.

How to plot two 3D graphs on the same plot in R

I'm using the plot3d function in the library rgl. Suppose my data looks something like this.
x <- c(1,2,3,4,5)
y <- c(4,2,1,4,2)
z <- c(2,2,4,5,1)
x2 <- c(1,5,2,3,4)
y2 <- c(2,3,4,1,2)
z2 <- c(3,4,2,3,1)
plot3d(x, y, z)
plot3d(x2, y2, z2)
Using the commands above would give me 2 separate plots. How can I plot both datasets on the same graph? Also I would like to use different symbols for the points in the two different data sets.
I just wrote a function cube and an accompanying one that might be sufficient for your needs:
require(rgl); library('magrittr')
cube <- function(x=0,y=0,z=0, bordered=TRUE,
filled = TRUE, lwd=2, scale=1,
fillcol = gray(.95),
bordercol ='black', ...) {
mytetra <- cube3d()
# Reduce size to unit
mytetra$vb[4,] <- mytetra$vb[4,]/scale*2
for (i in 1:length(x)) {
# Add cube border
if (bordered) {
btetra <- mytetra
btetra$material$lwd <- lwd
btetra$material$front <- 'line'
btetra$material$back <- 'line'
btetra %>% translate3d(x[i], y[i], z[i]) %>% shade3d
}
# Add cube fill
if (filled) {
ftetra <- mytetra
ftetra$vb[4,] <- ftetra$vb[4,]*1.01
ftetra$material$col <- fillcol
ftetra %>% translate3d(x[i], y[i], z[i]) %>% shade3d
}
}
}
tetra <- function(x=0,y=0,z=0, bordered=TRUE,
filled = TRUE, lwd=2, scale=1,
fillcol = gray(.95),
bordercol ='black', ...) {
mytetra <- tetrahedron3d()
# Reduce size to unit
mytetra$vb[4,] <- mytetra$vb[4,]/scale*2
for (i in 1:length(x)) {
# Add cube border
if (bordered) {
btetra <- mytetra
btetra$material$lwd <- lwd
btetra$material$front <- 'line'
btetra$material$back <- 'line'
btetra %>% translate3d(x[i], y[i], z[i]) %>% shade3d
}
# Add cube fill
if (filled) {
ftetra <- mytetra
ftetra$vb[4,] <- ftetra$vb[4,]*1.01
ftetra$material$col <- fillcol
ftetra %>% translate3d(x[i], y[i], z[i]) %>% shade3d
}
}
}
plot3d(x,y,z)
tetra(x,y,z, scale=1/2)
cube(x2,y2,z2, scale=1/2)

Resources