Removing unnecessary labels in the tooptip in R and ggplot2 chart - r

Upon running the R and ggplot2 script below, the following snapshot is generated. Upon hovering on any box, we get the following tooltip as shown in the plot. My simple requirement is to get rid of the fourth tooltip attribute as it is similar to the third . I guess something needs to be done in the aes() of ggplot command below.Also if the text can be made more clear without increasing the size of the plot or font, please help and thanks.
library(bupaR)
library(ggplot2)
library(scales)
library(plotly)
library(splitstackshape)
tr <- data.frame(traces(patients, output_traces = T, output_cases = F))
tr$Actuals = percent(tr$absolute_frequency/sum(te$absolute_frequency))
tr.df <- cSplit(tr, "trace", ",")
pos <- c(1,4:ncol(tr.df))
tr.df <- tr.df[,..pos]
tr.df <- melt(tr.df, id.vars = "trace_id")
mp1 = ggplot(data = tr.df, aes(x = variable,y = trace_id, fill = value,
label = value)) + geom_tile(colour = "white") +
geom_text(colour = "white", size = 1.9) +
scale_fill_discrete(na.value="transparent") +
theme(legend.position="none") + labs(x = "Traces", y = "Activities")
ggplotly(mp1, height = 500, width = 645)

library(bupaR)
library(ggplot2)
library(scales)
library(plotly)
library(splitstackshape)
tr <- data.frame(traces(patients, output_traces = T, output_cases = F))
tr$Actuals = percent(tr$absolute_frequency/sum(tr$absolute_frequency))
tr.df <- cSplit(tr, "trace", ",")
pos <- c(1,4:ncol(tr.df))
tr.df <- tr.df[,..pos]
tr.df <- melt(tr.df, id.vars = c("trace_id","Actuals"))
mp1 = ggplot(data = tr.df, aes(x = variable, y = trace_id, fill = value, label = value,
text=paste("Variable:",variable,"<br>Trace ID:",trace_id,"<br>Value:",value,"<br>Actuals:",Actuals))) +
geom_tile(colour = "white") +
geom_text(colour = "white", size = 4) +
scale_fill_discrete(na.value="transparent") +
theme(legend.position="none") + labs(x = "Traces", y = "Activities")
gg <- ggplotly(mp1, tooltip="text")
layout(gg, margin=list(l=50, b=50))

Related

Using a function within mapply to create and save scatterplots

I am trying to use mapply to automate saving scatterplots from ggplot to a folder.
To do this I have created lists of my x variable and y variable, as well as a list of the grouping variable I would like to colour my points by.
I then tried creating a function, and calling the function with mapply but the only output saved is a single blank image of the last variable in the list. Below is an example dataset.
df <- data.frame("ID" = 1:16)
df$VarA <- c(1,1,1,1,1,1,1,1,1,1,1,14,NA_real_,NA_real_,NA_real_,16)
df$VarB <- c(10,0,0,0,12,12,12,12,0,14,NA_real_,14,16,16,16,16)
df$VarC <- c(10,12,14,16,10,12,14,16,10,12,14,16,10,12,14,16)
df$VarD <- c(10,12,14,16,10,12,14,16,10,12,14,16,10,12,14,16)
df$ControlVarA <- factor(c("Group_1","Group_1","Group_1","Group_1","Group_1", "Group_1",
"Group_2","Group_2","Group_2","Group_2","Group_2","Group_2",
"Group_2","Group_2","Group_2","Group_2"))
df$ControlVarB <- factor(c("Group_1","Group_1","Group_1","Group_1","Group_1", "Group_1",
"Group_1","Group_1","Group_1","Group_1","Group_2","Group_2",
"Group_2","Group_2","Group_2","Group_2"))
df$ControlVarC <- factor(c("Group_2","Group_2","Group_2","Group_2","Group_1", "Group_1",
"Group_2","Group_2","Group_2","Group_2","Group_2","Group_2",
"Group_2","Group_2","Group_2","Group_2"))
Below is the code I used to call the lists for the x, y and colouring variable.
x_lists <- df %>% select(VarA:VarB) %>% colnames(.)
y_lists <- df %>% select(VarC:VarD) %>% colnames(.)
ControlVar_list <- df %>% select(contains("ControlVar")) %>% colnames(.)
Below is the function I have created and the mapply
save_plots <- function(dataset, x, y, z) {
first_plot <- ggplot(dataset) +
geom_point(data = subset(dataset, .data[[z]] == 'Group_1'),
aes(x = .data[[x]], y = .data[[y]], color = .data[[z]], size = 3)) +
geom_point(data = subset(dataset, .data[[z]] == 'Group_2'),
aes(x = .data[[x]], y = .data[[y]], color = .data[[z]], size = 3)) +
geom_smooth(aes(x = .data[[x]], y = .data[[y]], size = 0), method = "lm", colour="black", size=0.5) +
stat_cor(aes(x = .data[[x]], y = .data[[y]], color = .data[[z]],
label = ..rr.label..),
label.y.npc="top", label.x.npc = "left", method = "pearson",
size = 5) +
scale_color_manual(values = c("#C5BEC9", "#F2642b", "#F2642b")) +
labs(title = "test",
x = "VarA",
y = "VarB",
colour = "") +
guides(size = FALSE, color = FALSE) +
theme_bw(base_size = 14)
ggsave(sprintf("C:\\Documents\\%s.tiff", y), first_plot)
grDevices::dev.off()
}
mapply(save_plots, x_lists, y_lists, ControlVar_list, MoreArgs = list(dataset = df))
.data will not work with base R subset function. Try using dplyr::filter
library(tidyverse)
library(ggpubr)
save_plots <- function(dataset, x, y, z) {
first_plot <- ggplot(dataset) +
geom_point(data = filter(dataset, .data[[z]] == 'Group_1'),
aes(x = .data[[x]], y = .data[[y]], color = .data[[z]], size = 3)) +
geom_point(data = filter(dataset, .data[[z]] == 'Group_2'),
aes(x = .data[[x]], y = .data[[y]], color = .data[[z]], size = 3)) +
geom_smooth(aes(x = .data[[x]], y = .data[[y]], size = 0), method = "lm", colour="black", size=0.5) +
stat_cor(aes(x = .data[[x]], y = .data[[y]], color = .data[[z]],
label = ..rr.label..),
label.y.npc="top", label.x.npc = "left", method = "pearson",
size = 5) +
scale_color_manual(values = c("#C5BEC9", "#F2642b", "#F2642b")) +
labs(title = "test",
x = "VarA",
y = "VarB",
colour = "") +
guides(size = FALSE, color = FALSE) +
theme_bw(base_size = 14)
ggsave(sprintf("%s.tiff", y), first_plot)
grDevices::dev.off()
}

How to change the colors of the dots in the graph? ggpubr package

I am using the ggerrorplot () function of the ggpubr package to create the graph below. My question is whether there is any way to change the colors of the dots without changing the color of the point that represents the mean and standard deviation? Observe the image:
My code:
# loading packages
library(ggpubr)
# Create data frame
GROUP <- c()
TEST <- c()
VALUE <- c()
for (i in 0:100) {
gp <- c('Group1','Group2','Group1 and Group2')
ts <- c('Test1','Test2')
GROUP <- append(GROUP, sample(gp, 1))
TEST <- append(TEST, sample(ts, 1))
VALUE <- append(VALUE, sample(1:200, 1))
}
df <- data.frame(GROUP, TEST, VALUE)
# Seed
set.seed(123)
# Plot
ggerrorplot(df, x = "GROUP", y = "VALUE",
desc_stat = "mean_sd",
add = c("jitter"),
color = "TEST",
palette = "jco",
add.params = list(size = 0.2),
order = c('Group1','Group2','Group1 and Group2')
) +
labs(x = '', y = 'Values\n') +
theme(legend.title = element_blank())
Can you accomplish this by simply passing in color to add.params?
# loading packages
library(ggpubr)
#> Loading required package: ggplot2
# Create data frame
GROUP <- c()
TEST <- c()
VALUE <- c()
for (i in 0:100) {
gp <- c('Group1','Group2','Group1 and Group2')
ts <- c('Test1','Test2')
GROUP <- append(GROUP, sample(gp, 1))
TEST <- append(TEST, sample(ts, 1))
VALUE <- append(VALUE, sample(1:200, 1))
}
df <- data.frame(GROUP, TEST, VALUE)
# Seed
set.seed(123)
# Plot
ggerrorplot(df, x = "GROUP", y = "VALUE",
desc_stat = "mean_sd",
add = c("jitter"),
color = "TEST",
palette = "jco",
add.params = list(size = 0.2, color = "red"),
order = c('Group1','Group2','Group1 and Group2')
) +
labs(x = '', y = 'Values\n') +
theme(legend.title = element_blank())
Created on 2021-03-10 by the reprex package (v0.3.0)
Another potential workaround - replicate the plot using ggplot() and geom_linerange(), e.g.
library(ggpubr)
library(ggsci)
library(cowplot)
# Create data frame
GROUP <- c()
TEST <- c()
VALUE <- c()
for (i in 0:100) {
gp <- c('Group1','Group2','Group1 and Group2')
ts <- c('Test1','Test2')
GROUP <- append(GROUP, sample(gp, 1))
TEST <- append(TEST, sample(ts, 1))
VALUE <- append(VALUE, sample(1:200, 1))
}
df <- data.frame(GROUP, TEST, VALUE)
# Seed
set.seed(123)
data_summary <- function(data, varname, groupnames){
require(plyr)
summary_func <- function(x, col){
c(mean = mean(x[[col]], na.rm=TRUE),
sd = sd(x[[col]], na.rm=TRUE))
}
data_sum<-ddply(data, groupnames, .fun=summary_func,
varname)
data_sum <- rename(data_sum, c("mean" = varname))
return(data_sum)
}
df2 <- data_summary(df, varname = "VALUE", groupnames = c("TEST", "GROUP"))
# Plot
p1 <- ggplot(df, aes(x = factor(GROUP, levels = c('Group1','Group2','Group1 and Group2')),
y = VALUE, color = TEST)) +
geom_jitter(shape = 21, fill = "black", stroke = 0,
position = position_jitterdodge(jitter.width = 0.2)) +
geom_linerange(data = df2, aes(ymin=VALUE-sd, ymax=VALUE+sd),
position=position_dodge(width = .75)) +
geom_point(data = df2, aes(y = VALUE), size = 3,
position = position_dodge(width = 0.75)) +
scale_color_jco() +
labs(x = '', y = 'Values\n') +
theme_classic(base_size = 14) +
theme(legend.title = element_blank(),
legend.position = "top")
p2 <- ggerrorplot(df, x = "GROUP", y = "VALUE",
desc_stat = "mean_sd",
add = c("jitter"),
color = "TEST",
palette = "jco",
add.params = list(size = 0.2),
order = c('Group1','Group2','Group1 and Group2')
) +
labs(x = '', y = 'Values\n') +
theme(legend.title = element_blank())
cowplot::plot_grid(p1, p2, nrow = 1, ncol = 2, labels = "AUTO")
When you plot them side-by-side you can see that they aren't exactly the same, but this might work for you nonetheless.
Edit
An advantage of this approach is that you can adjust the 'fill' scale separately if you don't want all the dots to be the same colour, but you do want them to be different to the lines, e.g.
p1 <- ggplot(df, aes(x = factor(GROUP, levels = c('Group1','Group2','Group1 and Group2')),
y = VALUE, color = TEST)) +
geom_jitter(aes(fill = TEST), shape = 21, stroke = 0,
position = position_jitterdodge(jitter.width = 0.2)) +
geom_linerange(data = df2, aes(ymin=VALUE-sd, ymax=VALUE+sd),
position=position_dodge(width = .75)) +
geom_point(data = df2, aes(y = VALUE), size = 3,
position = position_dodge(width = 0.75)) +
scale_color_jco() +
scale_fill_npg() +
labs(x = '', y = 'Values\n') +
theme_classic(base_size = 14) +
theme(legend.title = element_blank(),
legend.position = "top")
p2 <- ggerrorplot(df, x = "GROUP", y = "VALUE",
desc_stat = "mean_sd",
add = c("jitter"),
color = "TEST",
palette = "jco",
add.params = list(size = 0.2),
order = c('Group1','Group2','Group1 and Group2')
) +
labs(x = '', y = 'Values\n') +
theme(legend.title = element_blank())
cowplot::plot_grid(p1, p2, nrow = 1, ncol = 2, labels = "AUTO")

ggplotly graphs are not displayed properly in jupyterlab

when I use ggplotly in jupyterlab the output gets truncated. How can I force jupyter to display it properly ?
For some reason if I just use ggplot the graph can be as large as it needs to be
set.seed(1L)
values <- rnorm(n = 366L)
plot_data <- data.frame(date = seq(as.Date("2011-01-01"), as.Date("2012-01-01"), 1),
value = values,
sign = ifelse(values > 0, "positive", "negative"), size = ifelse(abs(values) > 2, "big", "small")
)
p <- ggplot(data = plot_data) + aes(x = date, y = value, color = sign) + geom_line()
vertical <- as.Date(c("2011-02-01", "2011-10-01"))
names(vertical) <- c("test", "test2")
p <- p + geom_vline(xintercept = vertical, linetype = "dashed", color = "orange")
data_geom <- data.frame(v_x = vertical, v_label = names(vertical), v_y = rep(2.5, 2))
p <- p + geom_text(data = data_geom, aes_string(x = "v_x", label = "v_label", y = "v_y"), colour = "orange", angle = 90, check_overlap = TRUE)
ggplotly(p)
This gives me

Converting ggplot object to plotly object creates axis title that overlaps tick values

I had the same issue described in this question:
R: ggplot and plotly axis margin won't change
but when I implemented the solution, I got the following error:
Warning: Ignoring unknown aesthetics: text We recommend that you use the dev version of ggplot2 with ggplotly() Install it with: devtools::install_github('hadley/ggplot2') Error in tmp[[2]] : subscript out of bounds
This code will produce this error on my machine:
library(gapminder)
library(plotly)
library(ggplot2)
lead <- rep("Fred Smith", 30)
lead <- append(lead, rep("Terry Jones", 30))
lead <- append(lead, rep("Henry Sarduci", 30))
proj_date <- seq(as.Date('2017-11-01'), as.Date('2017-11-30'), by = 'day')
proj_date <- append(proj_date, rep(proj_date, 2))
set.seed(1237)
actHrs <- runif(90, 1, 100)
cummActHrs <- cumsum(actHrs)
forHrs <- runif(90, 1, 100)
cummForHrs <- cumsum(forHrs)
df <- data.frame(Lead = lead, date_seq = proj_date,
cActHrs = cummActHrs,
cForHrs = cummForHrs)
makePlot <- function(dat=df, man_level = 'Lead') {
p <- ggplot(dat, aes_string(x='date_seq', y='cActHrs',
group = man_level,
color = man_level),
linetype = 1) +
geom_line() +
geom_line(data=df,
aes_string(x='date_seq', y = 'cForHrs',
group = man_level,
color = man_level),
linetype = 2)
p <- p + geom_point(aes(text=sprintf('%s\nManager: %s\n MTD Actual Hrs: %s\nMTD Forecasted Hrs: %s',
date_seq, Lead, round(cActHrs, 2), round(cForHrs, 2))))
p <- p + theme_classic() + ylab('Hours') + xlab('Date')
gp <- ggplotly(p, tooltip = "text") %>% layout(hovermode = "compare")
### FIX IMPLEMENTED HERE ###
gp[['x']][['layout']][['annotations']][[2]][['x']] <- -0.1
gp %>% layout(margin = list(l = 75))
return(gp)
}
## run the example
p1 <- makePlot()
Try this:
makePlot <- function(dat=df, man_level = "Lead") {
dat$var <- dat[,man_level]
dat$grp <- ""
p <- ggplot(dat, aes(x=date_seq, y=cActHrs,
group = var, color = var,
text=paste0("Manager:", date_seq,"<br>MTD Actual Hrs:", round(cActHrs, 2),
"<br>MTD Forecasted Hrs:", round(cForHrs, 2))),
linetype = 1) +
geom_line() +
geom_line(data=dat,
aes(x=date_seq, y = cForHrs,
group = var, color = var),
linetype = 2) +
geom_point() +
theme_classic() + ylab("Hours") + xlab("Date") +
scale_color_discrete(name=man_level) +
facet_wrap(~grp)
gp <- ggplotly(p, tooltip = "text")
# Set y-axis label position
gp[["x"]][["layout"]][["annotations"]][[2]][["x"]] <- -0.06
# Set legend label position
gp[["x"]][["layout"]][["annotations"]][[3]][["y"]] <- 0.93
gp <- gp %>% layout(margin = list(l = 120, b=70), hovermode = "compare")
return(gp)
}
The problem in your case is the opposite of the linked question. Your axis title is a real axis title, not an annotation. Currently there is no solution to move axis titles in any direction (see https://github.com/lleslie84/plotly.js/pull/1).
Workarounds like adding line breaks to the axis title or adding spaces to the tick labels don't work in your case.
One possible workaround would be to add an annotation with your axis title. The annotation can then be freely moved.
gp <- layout(gp, yaxis = list(title = ""),
margin = list(l = 100),
annotations = c(list(text = "Hours",
x = -0.15,
xref = "paper",
showarrow = F,
textangle = -90))
)
Complete code
library(gapminder)
library(plotly)
library(ggplot2)
lead <- rep("Fred Smith", 30)
lead <- append(lead, rep("Terry Jones", 30))
lead <- append(lead, rep("Henry Sarduci", 30))
proj_date <- seq(as.Date('2017-11-01'), as.Date('2017-11-30'), by = 'day')
proj_date <- append(proj_date, rep(proj_date, 2))
set.seed(1237)
actHrs <- runif(90, 1, 100)
cummActHrs <- cumsum(actHrs)
forHrs <- runif(90, 1, 100)
cummForHrs <- cumsum(forHrs)
df <- data.frame(Lead = lead, date_seq = proj_date,
cActHrs = cummActHrs,
cForHrs = cummForHrs)
makePlot <- function(dat=df, man_level = 'Lead') {
p <- ggplot(dat, aes_string(x='date_seq', y='cActHrs',
group = man_level,
color = man_level),
linetype = 1) +
geom_line() +
geom_line(data=df,
aes_string(x='date_seq', y = 'cForHrs',
group = man_level,
color = man_level),
linetype = 2)
p <- p + geom_point(aes(text=sprintf('%s\nManager: %s\n MTD Actual Hrs: %s\nMTD Forecasted Hrs: %s',
date_seq, Lead, round(cActHrs, 2), round(cForHrs, 2))))
p <- p + theme_classic() + ylab('Hours') + xlab('Date')
gp <- ggplotly(p, tooltip = "text") %>% layout(hovermode = "compare")
### FIX IMPLEMENTED HERE ###
gp <- layout(gp,
yaxis = list(title = ""),
margin = list(l = 100),
annotations = c(list(text = "Hours",
x = -0.15,
xref = "paper",
showarrow = F,
textangle = -90))
)
return(gp)
}
## run the example
p1 <- makePlot()
p1

Creating a horizontal bar chart in R to display sequence of activities

The dataset "patients" is an eventlog of patients visiting a clinic and getting treatment. The script below gives a data frame with traces or sequence of activities in the eventlog, trace_id and absolute frequency of the cases following the particular trace. I wish to create a dynamic horizontal bar chart using ggplot2 or plotly such that the traces are represented like the snapshot attached with the absolute frequency in % at the top of the bar with axes labels.
Thanks and please help!
library("bupaR")
traces(patients, output_traces = T, output_cases = F)
Hope this helps (I am not able to get frequency however)
library(splitstackshape)
tr <- data.frame(traces(patients, output_traces = T, output_cases = F))
tr.df <- cSplit(tr, "trace", ",")
tr.df <- tr.df[,c(1,4:9)]
tr.df <- melt(tr.df, id.vars = "trace_id")
windows()
ggplot(data = tr.df, aes(x = variable,y = trace_id, fill = value, label =
value)) +
geom_tile(colour = "white") +
geom_text(colour = "white", fontface = "bold", size = 2) +
scale_fill_discrete(na.value="transparent") +
theme(legend.position="none")
EDIT 1:
library(splitstackshape)
library(bupaR)
library(ggplot2)
tr <- data.frame(traces(patients, output_traces = T, output_cases = F))
tr.df <- cSplit(tr, "trace", ",")
tr.df <- tr.df[,c(1,4:9)]
tr.df <- melt(tr.df, id.vars = "trace_id")
tr.df <- tr.df[order(tr.df$trace_id),]
tr.label <- data.frame(id = tr$trace_id, freq = tr$absolute_frequency)
tr.label <- tr.label[order(tr.label$id),]
windows()
ggplot(data = tr.df, aes(x = variable,y = trace_id, fill = value, label = value)) +
geom_tile(colour = "white") +
geom_text(colour = "white", fontface = "bold", size = 2) +
scale_fill_discrete(na.value="transparent") +
theme(legend.position="none") +
geom_text(data = tr.label, aes(label = freq, y = id, x = 6), nudge_x = 0.3,
colour = "black", fontface = "bold", inherit.aes = FALSE)

Resources