How to make an interactive aluvial plot in R? - r

I want to create an interactive alluvial plot in R. I would like to do something similar to This Sankey diagram. I want a list of IDs to pop when you hover over the flow edge (I don't know the proper term). I can create a basic plot using the code below.
sdata <- read.csv("data.csv", header = TRUE, sep = ",")
view(sdata)
df <- sdata %>%
make_long(col1, col2, col3, col4, col5)
df
dagg <- df %>%
dplyr::group_by(node) %>%
tally()
view(dagg)
df2 <- merge(df, dagg, by.x = 'node', by.y = 'node', all.x = TRUE)
pl <- ggplot(df2, aes(x = x
, next_x = next_x
, node = node
, next_node = next_node
, fill = factor(node)
, label = paste0(node," n=", n)))
pl <- pl +geom_alluvial(flow.alpha = 0.5, color = "gray40", show.legend = TRUE)
pl <- pl +geom_alluvial_text(size = 1.5, color = "white", hjust = 0.7)
pl <- pl + theme(legend.position = "none")
pl <- pl + scale_fill_viridis_d(option = "inferno")
pl <- pl + labs(title = "CML")
pl <- pl + labs(subtitle = "Conc")
pl <- pl + labs(fill = 'Nodes')
pl

Related

How to change the colors of the dots in the graph? ggpubr package

I am using the ggerrorplot () function of the ggpubr package to create the graph below. My question is whether there is any way to change the colors of the dots without changing the color of the point that represents the mean and standard deviation? Observe the image:
My code:
# loading packages
library(ggpubr)
# Create data frame
GROUP <- c()
TEST <- c()
VALUE <- c()
for (i in 0:100) {
gp <- c('Group1','Group2','Group1 and Group2')
ts <- c('Test1','Test2')
GROUP <- append(GROUP, sample(gp, 1))
TEST <- append(TEST, sample(ts, 1))
VALUE <- append(VALUE, sample(1:200, 1))
}
df <- data.frame(GROUP, TEST, VALUE)
# Seed
set.seed(123)
# Plot
ggerrorplot(df, x = "GROUP", y = "VALUE",
desc_stat = "mean_sd",
add = c("jitter"),
color = "TEST",
palette = "jco",
add.params = list(size = 0.2),
order = c('Group1','Group2','Group1 and Group2')
) +
labs(x = '', y = 'Values\n') +
theme(legend.title = element_blank())
Can you accomplish this by simply passing in color to add.params?
# loading packages
library(ggpubr)
#> Loading required package: ggplot2
# Create data frame
GROUP <- c()
TEST <- c()
VALUE <- c()
for (i in 0:100) {
gp <- c('Group1','Group2','Group1 and Group2')
ts <- c('Test1','Test2')
GROUP <- append(GROUP, sample(gp, 1))
TEST <- append(TEST, sample(ts, 1))
VALUE <- append(VALUE, sample(1:200, 1))
}
df <- data.frame(GROUP, TEST, VALUE)
# Seed
set.seed(123)
# Plot
ggerrorplot(df, x = "GROUP", y = "VALUE",
desc_stat = "mean_sd",
add = c("jitter"),
color = "TEST",
palette = "jco",
add.params = list(size = 0.2, color = "red"),
order = c('Group1','Group2','Group1 and Group2')
) +
labs(x = '', y = 'Values\n') +
theme(legend.title = element_blank())
Created on 2021-03-10 by the reprex package (v0.3.0)
Another potential workaround - replicate the plot using ggplot() and geom_linerange(), e.g.
library(ggpubr)
library(ggsci)
library(cowplot)
# Create data frame
GROUP <- c()
TEST <- c()
VALUE <- c()
for (i in 0:100) {
gp <- c('Group1','Group2','Group1 and Group2')
ts <- c('Test1','Test2')
GROUP <- append(GROUP, sample(gp, 1))
TEST <- append(TEST, sample(ts, 1))
VALUE <- append(VALUE, sample(1:200, 1))
}
df <- data.frame(GROUP, TEST, VALUE)
# Seed
set.seed(123)
data_summary <- function(data, varname, groupnames){
require(plyr)
summary_func <- function(x, col){
c(mean = mean(x[[col]], na.rm=TRUE),
sd = sd(x[[col]], na.rm=TRUE))
}
data_sum<-ddply(data, groupnames, .fun=summary_func,
varname)
data_sum <- rename(data_sum, c("mean" = varname))
return(data_sum)
}
df2 <- data_summary(df, varname = "VALUE", groupnames = c("TEST", "GROUP"))
# Plot
p1 <- ggplot(df, aes(x = factor(GROUP, levels = c('Group1','Group2','Group1 and Group2')),
y = VALUE, color = TEST)) +
geom_jitter(shape = 21, fill = "black", stroke = 0,
position = position_jitterdodge(jitter.width = 0.2)) +
geom_linerange(data = df2, aes(ymin=VALUE-sd, ymax=VALUE+sd),
position=position_dodge(width = .75)) +
geom_point(data = df2, aes(y = VALUE), size = 3,
position = position_dodge(width = 0.75)) +
scale_color_jco() +
labs(x = '', y = 'Values\n') +
theme_classic(base_size = 14) +
theme(legend.title = element_blank(),
legend.position = "top")
p2 <- ggerrorplot(df, x = "GROUP", y = "VALUE",
desc_stat = "mean_sd",
add = c("jitter"),
color = "TEST",
palette = "jco",
add.params = list(size = 0.2),
order = c('Group1','Group2','Group1 and Group2')
) +
labs(x = '', y = 'Values\n') +
theme(legend.title = element_blank())
cowplot::plot_grid(p1, p2, nrow = 1, ncol = 2, labels = "AUTO")
When you plot them side-by-side you can see that they aren't exactly the same, but this might work for you nonetheless.
Edit
An advantage of this approach is that you can adjust the 'fill' scale separately if you don't want all the dots to be the same colour, but you do want them to be different to the lines, e.g.
p1 <- ggplot(df, aes(x = factor(GROUP, levels = c('Group1','Group2','Group1 and Group2')),
y = VALUE, color = TEST)) +
geom_jitter(aes(fill = TEST), shape = 21, stroke = 0,
position = position_jitterdodge(jitter.width = 0.2)) +
geom_linerange(data = df2, aes(ymin=VALUE-sd, ymax=VALUE+sd),
position=position_dodge(width = .75)) +
geom_point(data = df2, aes(y = VALUE), size = 3,
position = position_dodge(width = 0.75)) +
scale_color_jco() +
scale_fill_npg() +
labs(x = '', y = 'Values\n') +
theme_classic(base_size = 14) +
theme(legend.title = element_blank(),
legend.position = "top")
p2 <- ggerrorplot(df, x = "GROUP", y = "VALUE",
desc_stat = "mean_sd",
add = c("jitter"),
color = "TEST",
palette = "jco",
add.params = list(size = 0.2),
order = c('Group1','Group2','Group1 and Group2')
) +
labs(x = '', y = 'Values\n') +
theme(legend.title = element_blank())
cowplot::plot_grid(p1, p2, nrow = 1, ncol = 2, labels = "AUTO")

Is there a way to label each cluster generated by lda()

using lda() and ggplot2 I can make a canonical plot with confidence ellipses. Is there a way to add labels for each group on the plot (labeling each cluster with a group from figure legend)?
# for the universality lda(Species~., data=iris) would be analogous
m.lda <- lda(Diet ~ ., data = b)
m.sub <- b %>% dplyr::select(-Diet) %>% as.matrix
CVA.scores <- m.sub %*% m.lda$scaling
m.CV <- data.frame(CVA.scores)
m.CV$Diet <- b$Diet
m.cva.plot <-
ggplot(m.CV, aes(x = LD1, y = LD2)) +
geom_point(aes(color=Diet), alpha=0.5) +
labs(x = "CV1", y = "CV2") +
coord_fixed(ratio=1)
chi2 = qchisq(0.05,2, lower.tail=FALSE)
CIregions.mean.and.pop <-
m.CV %>%
group_by(Diet) %>%
summarize(CV1.mean = mean(LD1),
CV2.mean = mean(LD2),
mean.radii = sqrt(chi2/n()),
popn.radii = sqrt(chi2))
m.cva.plot2 <-
m.cva.plot +
geom_circle(data = CIregions.mean.and.pop,
mapping = aes(x0 = CV1.mean, y0 = CV2.mean, r = mean.radii),
inherit.aes = FALSE) +
geom_circle(data = CIregions.mean.and.pop,
mapping = aes(x0 = CV1.mean, y0 = CV2.mean, r = popn.radii),
linetype = "dashed",
inherit.aes = FALSE)
The labels can be placed with either geom_text or geom_label. In the case below I will use geom_label, with the y coordinate adjusted by adding popn.radii the radii of the outer circles.
The code in the question is adapted to use built-in data set iris, like the question itself says.
m.cva.plot2 +
geom_label(data = CIregions.mean.and.pop,
mapping = aes(x = CV1.mean,
y = CV2.mean + popn.radii,
label = Species),
label.padding = unit(0.20, "lines"),
label.size = 0)
Reproducible code
library(dplyr)
library(ggplot2)
library(ggforce)
library(MASS)
b <- iris
m.lda <- lda(Species~., data=iris) #would be analogous
#m.lda <- lda(Diet ~ ., data = b)
m.sub <- b %>% dplyr::select(-Species) %>% as.matrix
CVA.scores <- m.sub %*% m.lda$scaling
m.CV <- data.frame(CVA.scores)
m.CV$Species <- b$Species
m.cva.plot <-
ggplot(m.CV, aes(x = LD1, y = LD2)) +
geom_point(aes(color=Species), alpha=0.5) +
labs(x = "CV1", y = "CV2") +
coord_fixed(ratio=1)
chi2 = qchisq(0.05,2, lower.tail=FALSE)
CIregions.mean.and.pop <-
m.CV %>%
group_by(Species) %>%
summarize(CV1.mean = mean(LD1),
CV2.mean = mean(LD2),
mean.radii = sqrt(chi2/n()),
popn.radii = sqrt(chi2))
m.cva.plot2 <-
m.cva.plot +
geom_circle(data = CIregions.mean.and.pop,
mapping = aes(x0 = CV1.mean, y0 = CV2.mean, r = mean.radii),
inherit.aes = FALSE) +
geom_circle(data = CIregions.mean.and.pop,
mapping = aes(x0 = CV1.mean, y0 = CV2.mean, r = popn.radii),
linetype = "dashed",
inherit.aes = FALSE)

Converting ggplot object to plotly object creates axis title that overlaps tick values

I had the same issue described in this question:
R: ggplot and plotly axis margin won't change
but when I implemented the solution, I got the following error:
Warning: Ignoring unknown aesthetics: text We recommend that you use the dev version of ggplot2 with ggplotly() Install it with: devtools::install_github('hadley/ggplot2') Error in tmp[[2]] : subscript out of bounds
This code will produce this error on my machine:
library(gapminder)
library(plotly)
library(ggplot2)
lead <- rep("Fred Smith", 30)
lead <- append(lead, rep("Terry Jones", 30))
lead <- append(lead, rep("Henry Sarduci", 30))
proj_date <- seq(as.Date('2017-11-01'), as.Date('2017-11-30'), by = 'day')
proj_date <- append(proj_date, rep(proj_date, 2))
set.seed(1237)
actHrs <- runif(90, 1, 100)
cummActHrs <- cumsum(actHrs)
forHrs <- runif(90, 1, 100)
cummForHrs <- cumsum(forHrs)
df <- data.frame(Lead = lead, date_seq = proj_date,
cActHrs = cummActHrs,
cForHrs = cummForHrs)
makePlot <- function(dat=df, man_level = 'Lead') {
p <- ggplot(dat, aes_string(x='date_seq', y='cActHrs',
group = man_level,
color = man_level),
linetype = 1) +
geom_line() +
geom_line(data=df,
aes_string(x='date_seq', y = 'cForHrs',
group = man_level,
color = man_level),
linetype = 2)
p <- p + geom_point(aes(text=sprintf('%s\nManager: %s\n MTD Actual Hrs: %s\nMTD Forecasted Hrs: %s',
date_seq, Lead, round(cActHrs, 2), round(cForHrs, 2))))
p <- p + theme_classic() + ylab('Hours') + xlab('Date')
gp <- ggplotly(p, tooltip = "text") %>% layout(hovermode = "compare")
### FIX IMPLEMENTED HERE ###
gp[['x']][['layout']][['annotations']][[2]][['x']] <- -0.1
gp %>% layout(margin = list(l = 75))
return(gp)
}
## run the example
p1 <- makePlot()
Try this:
makePlot <- function(dat=df, man_level = "Lead") {
dat$var <- dat[,man_level]
dat$grp <- ""
p <- ggplot(dat, aes(x=date_seq, y=cActHrs,
group = var, color = var,
text=paste0("Manager:", date_seq,"<br>MTD Actual Hrs:", round(cActHrs, 2),
"<br>MTD Forecasted Hrs:", round(cForHrs, 2))),
linetype = 1) +
geom_line() +
geom_line(data=dat,
aes(x=date_seq, y = cForHrs,
group = var, color = var),
linetype = 2) +
geom_point() +
theme_classic() + ylab("Hours") + xlab("Date") +
scale_color_discrete(name=man_level) +
facet_wrap(~grp)
gp <- ggplotly(p, tooltip = "text")
# Set y-axis label position
gp[["x"]][["layout"]][["annotations"]][[2]][["x"]] <- -0.06
# Set legend label position
gp[["x"]][["layout"]][["annotations"]][[3]][["y"]] <- 0.93
gp <- gp %>% layout(margin = list(l = 120, b=70), hovermode = "compare")
return(gp)
}
The problem in your case is the opposite of the linked question. Your axis title is a real axis title, not an annotation. Currently there is no solution to move axis titles in any direction (see https://github.com/lleslie84/plotly.js/pull/1).
Workarounds like adding line breaks to the axis title or adding spaces to the tick labels don't work in your case.
One possible workaround would be to add an annotation with your axis title. The annotation can then be freely moved.
gp <- layout(gp, yaxis = list(title = ""),
margin = list(l = 100),
annotations = c(list(text = "Hours",
x = -0.15,
xref = "paper",
showarrow = F,
textangle = -90))
)
Complete code
library(gapminder)
library(plotly)
library(ggplot2)
lead <- rep("Fred Smith", 30)
lead <- append(lead, rep("Terry Jones", 30))
lead <- append(lead, rep("Henry Sarduci", 30))
proj_date <- seq(as.Date('2017-11-01'), as.Date('2017-11-30'), by = 'day')
proj_date <- append(proj_date, rep(proj_date, 2))
set.seed(1237)
actHrs <- runif(90, 1, 100)
cummActHrs <- cumsum(actHrs)
forHrs <- runif(90, 1, 100)
cummForHrs <- cumsum(forHrs)
df <- data.frame(Lead = lead, date_seq = proj_date,
cActHrs = cummActHrs,
cForHrs = cummForHrs)
makePlot <- function(dat=df, man_level = 'Lead') {
p <- ggplot(dat, aes_string(x='date_seq', y='cActHrs',
group = man_level,
color = man_level),
linetype = 1) +
geom_line() +
geom_line(data=df,
aes_string(x='date_seq', y = 'cForHrs',
group = man_level,
color = man_level),
linetype = 2)
p <- p + geom_point(aes(text=sprintf('%s\nManager: %s\n MTD Actual Hrs: %s\nMTD Forecasted Hrs: %s',
date_seq, Lead, round(cActHrs, 2), round(cForHrs, 2))))
p <- p + theme_classic() + ylab('Hours') + xlab('Date')
gp <- ggplotly(p, tooltip = "text") %>% layout(hovermode = "compare")
### FIX IMPLEMENTED HERE ###
gp <- layout(gp,
yaxis = list(title = ""),
margin = list(l = 100),
annotations = c(list(text = "Hours",
x = -0.15,
xref = "paper",
showarrow = F,
textangle = -90))
)
return(gp)
}
## run the example
p1 <- makePlot()
p1

Creating a horizontal bar chart in R to display sequence of activities

The dataset "patients" is an eventlog of patients visiting a clinic and getting treatment. The script below gives a data frame with traces or sequence of activities in the eventlog, trace_id and absolute frequency of the cases following the particular trace. I wish to create a dynamic horizontal bar chart using ggplot2 or plotly such that the traces are represented like the snapshot attached with the absolute frequency in % at the top of the bar with axes labels.
Thanks and please help!
library("bupaR")
traces(patients, output_traces = T, output_cases = F)
Hope this helps (I am not able to get frequency however)
library(splitstackshape)
tr <- data.frame(traces(patients, output_traces = T, output_cases = F))
tr.df <- cSplit(tr, "trace", ",")
tr.df <- tr.df[,c(1,4:9)]
tr.df <- melt(tr.df, id.vars = "trace_id")
windows()
ggplot(data = tr.df, aes(x = variable,y = trace_id, fill = value, label =
value)) +
geom_tile(colour = "white") +
geom_text(colour = "white", fontface = "bold", size = 2) +
scale_fill_discrete(na.value="transparent") +
theme(legend.position="none")
EDIT 1:
library(splitstackshape)
library(bupaR)
library(ggplot2)
tr <- data.frame(traces(patients, output_traces = T, output_cases = F))
tr.df <- cSplit(tr, "trace", ",")
tr.df <- tr.df[,c(1,4:9)]
tr.df <- melt(tr.df, id.vars = "trace_id")
tr.df <- tr.df[order(tr.df$trace_id),]
tr.label <- data.frame(id = tr$trace_id, freq = tr$absolute_frequency)
tr.label <- tr.label[order(tr.label$id),]
windows()
ggplot(data = tr.df, aes(x = variable,y = trace_id, fill = value, label = value)) +
geom_tile(colour = "white") +
geom_text(colour = "white", fontface = "bold", size = 2) +
scale_fill_discrete(na.value="transparent") +
theme(legend.position="none") +
geom_text(data = tr.label, aes(label = freq, y = id, x = 6), nudge_x = 0.3,
colour = "black", fontface = "bold", inherit.aes = FALSE)

Removing unnecessary labels in the tooptip in R and ggplot2 chart

Upon running the R and ggplot2 script below, the following snapshot is generated. Upon hovering on any box, we get the following tooltip as shown in the plot. My simple requirement is to get rid of the fourth tooltip attribute as it is similar to the third . I guess something needs to be done in the aes() of ggplot command below.Also if the text can be made more clear without increasing the size of the plot or font, please help and thanks.
library(bupaR)
library(ggplot2)
library(scales)
library(plotly)
library(splitstackshape)
tr <- data.frame(traces(patients, output_traces = T, output_cases = F))
tr$Actuals = percent(tr$absolute_frequency/sum(te$absolute_frequency))
tr.df <- cSplit(tr, "trace", ",")
pos <- c(1,4:ncol(tr.df))
tr.df <- tr.df[,..pos]
tr.df <- melt(tr.df, id.vars = "trace_id")
mp1 = ggplot(data = tr.df, aes(x = variable,y = trace_id, fill = value,
label = value)) + geom_tile(colour = "white") +
geom_text(colour = "white", size = 1.9) +
scale_fill_discrete(na.value="transparent") +
theme(legend.position="none") + labs(x = "Traces", y = "Activities")
ggplotly(mp1, height = 500, width = 645)
library(bupaR)
library(ggplot2)
library(scales)
library(plotly)
library(splitstackshape)
tr <- data.frame(traces(patients, output_traces = T, output_cases = F))
tr$Actuals = percent(tr$absolute_frequency/sum(tr$absolute_frequency))
tr.df <- cSplit(tr, "trace", ",")
pos <- c(1,4:ncol(tr.df))
tr.df <- tr.df[,..pos]
tr.df <- melt(tr.df, id.vars = c("trace_id","Actuals"))
mp1 = ggplot(data = tr.df, aes(x = variable, y = trace_id, fill = value, label = value,
text=paste("Variable:",variable,"<br>Trace ID:",trace_id,"<br>Value:",value,"<br>Actuals:",Actuals))) +
geom_tile(colour = "white") +
geom_text(colour = "white", size = 4) +
scale_fill_discrete(na.value="transparent") +
theme(legend.position="none") + labs(x = "Traces", y = "Activities")
gg <- ggplotly(mp1, tooltip="text")
layout(gg, margin=list(l=50, b=50))

Resources