No matter what I do, I can not seem to find code to ensure the labels of my Pie chart, do not overlap the Pie chart OR other labels.
I've entered geom_text_repel and adjusted vjust size force x in various ways and nothing works. It works on some charts, and other charts it does not.
---
title: "Untitled"
date: "August 14, 2019"
output: html_document
---
```{r eval = TRUE, echo = FALSE, results = "asis", warning = FALSE, message = FALSE, fig.height = 6.25, fig.width = 12}
library(plyr)
library(dplyr)
library(kableExtra)
library(scales)
library(ggplot2)
library(RODBC)
library(data.table)
library(DT)
library(treemapify)
library(devtools)
library(digest)
library(plotly)
library(shiny)
library(ggrepel)
library(expss)
rptyear <- 2018
colours <- c("A" = "royalblue3", "B" = "red", "C" = "gold", "D" = "green4")
Category <- c("A", "B", "C", "D")
premiumtable <- cbind(rep(c("A","B","C","D"),11), c(rep(2009,4),rep(2010,4),rep(2011,4),rep(2012,4),rep(2013,4),rep(2014,4),rep(2015, 4), rep(2016,4), rep(2017,4), rep(2018,4),rep(2019,4)), as.numeric(c(13223284, 3379574,721217, 2272843,14946074,4274769, 753797,2655032, 15997384, 4952687, 722556,3035566,16244348,5541543,887109,3299966,15841630,6303443,1101696,3751892,14993295, 6993626,1312650,4158196,13946038, 7081457,1317428,4711389, 12800640, 6923012, 1345159, 4911780, 12314663, 6449919, 1395973,5004046,12612704,6968110,1507382,5745079,15311213,8958588,1849069,6819488)))
colnames(premiumtable) <- c("Var1", "Var2", "Freq")
currentPrem <- filter(as.data.table(premiumtable), Var2 == rptyear, Freq != 0)
prempie <- ggplot(currentPrem, aes(x="", y = as.numeric(currentPrem$Freq), fill= Var1))
prempie <- prempie + geom_bar(width = 1, stat = "identity", colour = "black")
prempie <- prempie + ggtitle(paste0("YTD Numbers:")) + coord_polar("y", start = 0)
prempie <- prempie + scale_fill_manual(values = colours)
prempie <- prempie + theme_void()+ theme(plot.title = element_text(face = "bold", size = 20, hjust = .5), legend.position = "none", axis.title=element_text(size=20), axis.title.y = element_blank(), axis.title.x = element_blank())
prempie <- prempie + geom_text_repel(mapping = aes(label = paste0(Var1, "\n $",prettyNum(round(as.numeric(currentPrem$Freq)/1000), big.mark = ",")) , x = 2),position = position_stack( vjust = .5), size = 6, force = 5,direction = "both", segment.size = 0)
```
Thanks for providing the working data/code. If you are open to using the package plotly it is quite good at producing pie charts right out of the box, and requires less fiddling about than ggplot. Here is an example with your data:
library(dplyr)
library(plotly)
#
rptyear <- 2018
colours <- c("A" = "royalblue3", "B" = "red", "C" = "gold", "D" = "green4")
# data
premiumtable <- data.frame(Var1 = rep(c("A","B","C","D"),11),
Var2 = c(rep(2009,4),rep(2010,4),rep(2011,4),rep(2012,4),rep(2013,4),rep(2014,4),rep(2015, 4),rep(2016,4), rep(2017,4),rep(2018,4),rep(2019,4)),
Freq = as.numeric(c(13223284, 3379574,721217, 2272843,14946074,4274769, 753797,2655032, 15997384, 4952687, 722556,3035566,16244348,5541543,887109,3299966,15841630,6303443,1101696,3751892,14993295, 6993626,1312650,4158196,13946038, 7081457,1317428,4711389, 12800640, 6923012, 1345159, 4911780, 12314663, 6449919, 1395973,5004046,12612704,6968110,1507382,5745079,15311213,8958588,1849069,6819488)))
# prepare plot data
currentPrem <-
premiumtable %>%
filter(Var2 == rptyear, Freq != 0) %>%
mutate(Freq = as.numeric(Freq))
# create plot labels
labels = paste0(currentPrem$Var1, "\n $",prettyNum(round(as.numeric(currentPrem$Freq)/1000), big.mark = ","))
# create plot
plot_ly(currentPrem,
labels = ~labels,
values = ~Freq, type = 'pie',
textposition = 'outside',
textinfo = 'label',
colors = colours) %>%
layout(title = paste("YTD Numbers:", rptyear),
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
showlegend = FALSE)
Related
I am using ggVennDiagram to creat a venn diagram. I would like to set the color of categories manually. Here is what I am trying, however the color of border line of the circles is not changed.
x <- list(A=1:5,B=2:7,C=3:6,D=4:9)
ggVennDiagram(x, label = "count", label_alpha = 0,
color = c("A" = "yellow","B" ="steelblue",'C' = 'red', 'D' = 'black') ,
set_color = c("A" = "yellow","B" ="steelblue", 'C' = 'red', 'D' = 'black')) +
scale_fill_gradient(low = "#F4FAFE", high = "#4981BF")
Any Idea how I can match the colors of circle lines with the name of categories?
Thanks
From ggVennDiagram documentation it looks as if you have to build up the venn diagram rather than use the ggVennDiagram function. Maybe this adaptation from the documentation example gives you enough to work on...
Updated to include OP's comment for percentage count.
library(ggplot2)
library(ggVennDiagram)
x <- list(A=1:5,B=2:7,C=3:6,D=4:9)
venn <- Venn(x)
data <- process_data(venn)
ggplot() +
# 1. region count layer
geom_sf(aes(fill = count), data = venn_region(data)) +
# 2. set edge layer
geom_sf(aes(color = name), data = venn_setedge(data), show.legend = TRUE, size = 2) +
# 3. set label layer
geom_sf_text(aes(label = name), data = venn_setlabel(data)) +
# 4. region label layer
geom_sf_label(aes(label = paste0(count, " (", scales::percent(count/sum(count), accuracy = 2), ")")),
data = venn_region(data),
size = 3) +
scale_fill_gradient(low = "#F4FAFE", high = "#4981BF")+
scale_color_manual(values = c("A" = "yellow","B" ="steelblue",'C' = 'red', 'D' = 'black'),
labels = c('D' = 'D = bdiv_human'))+
theme_void()
Created on 2021-12-04 by the reprex package (v2.0.1)
The ggVennDiagram command calls the ggVennDiagram::plot_venn function for plotting colored areas. You can modify this function according to your needs.
See below my suggestion.
plot_venn <- function (x, show_intersect, set_color, set_size, label, label_geom,
label_alpha, label_color, label_size, label_percent_digit,
label_txtWidth, edge_lty, edge_size, ...) {
venn <- Venn(x)
data <- process_data(venn)
p <- ggplot() + geom_sf(aes_string(fill = "count"), data = data#region) +
geom_sf(aes_string(color = "name"), data = data#setEdge,
show.legend = F, lty = edge_lty, size = edge_size, color = set_color) +
geom_sf_text(aes_string(label = "name"), data = data#setLabel,
size = set_size, color = set_color) + theme_void()
if (label != "none" & show_intersect == FALSE) {
region_label <- data#region %>% dplyr::filter(.data$component ==
"region") %>% dplyr::mutate(percent = paste(round(.data$count *
100/sum(.data$count), digits = label_percent_digit),
"%", sep = "")) %>% dplyr::mutate(both = paste(.data$count,
paste0("(", .data$percent, ")"), sep = "\n"))
if (label_geom == "label") {
p <- p + geom_sf_label(aes_string(label = label),
data = region_label, alpha = label_alpha, color = label_color,
size = label_size, lineheight = 0.85, label.size = NA)
}
if (label_geom == "text") {
p <- p + geom_sf_text(aes_string(label = label),
data = region_label, alpha = label_alpha, color = label_color,
size = label_size, lineheight = 0.85)
}
}
if (show_intersect == TRUE) {
items <- data#region %>% dplyr::rowwise() %>% dplyr::mutate(text = stringr::str_wrap(paste0(.data$item,
collapse = " "), width = label_txtWidth)) %>% sf::st_as_sf()
label_coord = sf::st_centroid(items$geometry) %>% sf::st_coordinates()
p <- ggplot(items) + geom_sf(aes_string(fill = "count")) +
geom_sf_text(aes_string(label = "name"), data = data#setLabel,
inherit.aes = F) + geom_text(aes_string(label = "count",
text = "text"), x = label_coord[, 1], y = label_coord[,
2], show.legend = FALSE) + theme_void()
ax <- list(showline = FALSE)
p <- plotly::ggplotly(p, tooltip = c("text")) %>% plotly::layout(xaxis = ax,
yaxis = ax)
}
p
}
Then, you can run the code:
library(ggVennDiagram)
library(ggplot2)
# Replace the plot_venn function with the modified version
assignInNamespace(x="plot_venn", value=plot_venn, ns="ggVennDiagram")
x <- list(A=1:5,B=2:7,C=3:6,D=4:9)
ggVennDiagram(x, label = "count", label_alpha = 0,
color = c("A" = "yellow","B" ="steelblue",'C' = 'red', 'bdiv_human' = 'black') ,
set_color = c("A" = "yellow","B" ="steelblue", 'C' = 'red', 'bdiv_human' = 'black')) +
scale_fill_gradient(low = "#F4FAFE", high = "#4981BF") +
scale_color_gradient(low = "#F4FAFE", high = "#4981BF")
I'm interested in visualizing the location of certain words in sentences. Say I have 500 sentences between 3-5 words long and want to visualize the location of word A in each sentence:
Data:
set.seed(123)
w1 <- sample(LETTERS[1:3], 1000, replace = TRUE)
w2 <- sample(LETTERS[1:5], 1000, replace = TRUE)
w3 <- sample(LETTERS[1:6], 1000, replace = TRUE)
w4 <- sample(c(NA,LETTERS[1:7]), 1000, replace = TRUE)
w5 <- sample(c(NA,LETTERS[1:8]), 1000, replace = TRUE)
df <- data.frame(
position = rep(1:5, each = 1000), # position of word in sentence
word = c(w1, w2, w3, w4, w5) # the words in the sentences
)
I can produce the location plot in base R. But the code involves a very slow for loop and does not have the aesthetic qualities of ggplot2. So how can the same type of visualization be produced faster and in ggplot2?
This is the code that produces the location plot in base R:
# Plot dimensions:
x <- rep(1:5, 100)
y <- 1:500
# Plot parameters:
par(mar=c(2,1.5,1,1.5), par(xpd = T))
# Plot:
plot(y ~ x, type = "n", frame = F, axes = F, ylab="", xlab="",
main="Location of word 'A' in sentences", cex.main=0.9)
axis(1, at=seq(1:5), labels=c("w1", "w2", "w3", "w4", "w5"), cex.axis=0.9)
# Legend:
legend(2.25, 530, c("A", "other", "NA"), fill=c("blue", "orange", "black"),
horiz = T, cex = 0.7, bty = "n", border = "white")
# For loop to print 'A' as color in positions:
for(i in unique(df$position)){
text(i, 1:500, "__________", cex = 1,
col = ifelse(df[df$position==i,]$word=="A", "blue", "orange"))
}
For example using geom_segment, and then with a conditional aesthetic.
I am using ggh4x for the truncated axis.
library(tidyverse)
library(ggh4x)
df <-
df %>% group_by(position) %>%mutate(index = row_number())
ggplot(df, aes( color = word=="A")) +
geom_segment(aes(x = position-.4, xend = position+.4,
y = index, yend = index),
key_glyph= "rect") +
scale_color_manual(name = NULL,
values=c(`TRUE` = "blue", `FALSE` = "orange"),
labels = c(`TRUE` = "A", `FALSE` = "other"),
na.value="black")+
guides(x = "axis_truncated") +
scale_x_continuous(breaks = 1:5, labels = paste0("w", 1:5))+
theme_classic() +
theme(axis.line.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
plot.title = element_text(hjust = .5),
legend.position = "top") +
labs( y = NULL, x = NULL, title = "Location of A")
Here's an initial attempt. (I'm not quite clear, are you looking to show just the first 500 of the 1000 sentences?)
My approach here is to first summarize the data in terms of contiguous sections that are A / other / NA. This way, the plot area is filled exactly without needing to tweak line thickness, and it should plot more quickly by reducing the number of plotted elements.
library(dplyr)
df_plot <- df %>%
mutate(A_spots = case_when(word == "A" ~ "A",
word != "A" ~ "other",
TRUE ~ "NA")) %>%
group_by(position) %>%
mutate(col_chg = A_spots != lag(A_spots, default = ""),
group_num = cumsum(col_chg)) %>%
ungroup() %>%
count(position, group_num, A_spots)
library(ggplot2)
ggplot(df_plot, aes(position, n, fill = A_spots, group = group_num)) +
geom_col() +
scale_x_continuous(name = NULL, breaks = 1:5, #stolen from #tjebo's answer
labels = paste0("w", 1:5))+
scale_fill_manual(
values = c("A" = "blue","other" = "orange", "NA" = "black")) +
labs(title = "Location of word 'A' in sentences") +
theme_minimal()
Got the order in ggboxplot right (first Pre than Post), but can't get it to work in the ggsummarytable (also tried order with asc. and desc.). I added a short reproducible example.
Pre <- runif(50, 5.0, 7.5)
Post <- runif(50,6.5,10)
pre_string <- rep("Pre", times = length(Pre))
post_string <- rep("Post", times = length(Post))
data_merge<-c(Pre,Post)
prepost_merge<-(c(pre_string,post_string))
prepost_merge<-as.factor(prepost_merge)
df<-data.frame(data_merge)
df$group <- prepost_merge
head(df, 2)
# CREATE TABLE #
summary.stats <- df %>%
group_by(group) %>%
get_summary_stats(type = "full")
summary.plot <- ggsummarytable(
summary.stats, x = "group", y = c("n", "median", "q1","q3"),variablename="", order = (c("Pre","Post")),
ggtheme = theme_bw(),axis.line.x = element_blank(), axis.line.y = element_blank(),label_value = "",legend="none",
) + theme_pubr(
base_size = 8,
base_family = "",
border = TRUE,
margin = TRUE,
x.text.angle = 0
) + labs(x="",y="")
Reproducible example -
wrong order (needs to be pre post)
Like ggplot2 , ggsummarytable also can be sorted based on the factor levels.
library(ggpubr)
#Give the factor levels in the order that you want
summary.stats$group <- factor(summary.stats$group, c('Pre', 'Post'))
ggsummarytable(
summary.stats, x = "group", y = c("n", "median", "q1","q3"),
variablename="", order = (c("Pre","Post")),
ggtheme = theme_bw(),
axis.line.x = element_blank(),
axis.line.y = element_blank(),
label_value = "",legend="none",
) + theme_pubr(
base_size = 8,
base_family = "",
border = TRUE,
margin = TRUE,
x.text.angle = 0
) + labs(x="",y="")
I am trying to add a second x axis on a ggplotly plot, not to accommodate a second trace, but for better visualisation.
I have worked out that I do need to add a trace for it, but the question is how. The examples I have found to add simple, transparent traces are not working for my plot which has factors on the y-axis.
Please take it as given that for my purposes I need to use ggplotly and need the second axis. The example I am about to provide is just minimal, the real application has other requirements accommodated by ggplotly (as opposed to straight plotly or ggplot2). Imagine if there were 100 different iris species that people were scrolling through, and that the top axis provides a good guide at first. Using ggplot2, here is the example of what I would like to achieve with ggplotly:
library(tidyverse)
library(plotly)
dat <- iris %>%
group_by(Species) %>%
summarise(meanSL = mean(Sepal.Length, na.rm = TRUE),
count = n())
labels_dup = c("low", "medium", "high")
labels = c("low", "medium\n\nmeans to the right\nof this line are\nso cool", "high")
breaks = c(5,6,7)
limits = c(4,8)
p <- ggplot(dat, aes(x = reorder(as.character(Species),meanSL), y = meanSL)) +
geom_point() +
geom_hline(yintercept = 6, lty = 2) +
coord_flip() +
ggtitle("Means of sepal length by species") +
theme_classic()+
theme(axis.title.x=element_blank(),
axis.title.y=element_blank(),
axis.line.y = element_blank(),
plot.title = element_text(size = 10, hjust = 0.5))
p + scale_y_continuous(breaks = breaks, labels = labels, limits = limits, sec.axis = dup_axis(labels = labels_dup)) +
geom_text(aes(y = 4,label = paste0("n=",count)), size = 3)
and here is the output:
Here is a start to the ggplotly solution:
ax <- list(
side = "bottom",
showticklabels = TRUE,
range = limits,
tickmode = "array",
tickvals = breaks,
ticktext = labels)
ax2 <- list(
overlaying = "x",
side = "top",
showticklabels = TRUE,
range = limits,
tickmode = "array",
tickvals = breaks,
ticktext = labels_dup)
ggplotly(p) %>%
#<need a trace here e.g. add_lines, add_segment. It could either be transparent, or use the vertical line or count text in the plot as shown in the example> %>%
layout(
xaxis = ax,
xaxis2 = ax2)
Edit: Here is less minimal code that produces the warning when I use the suggested fix. I use geom_pointrange instead of stat_summary for reasons related to the hover text:
library(boot)
library(tidyverse)
library(plotly)
boot_sd <- function(x, fun=mean, R=1001) {
fun <- match.fun(fun)
bfoo <- function(data, idx) {
fun(data[idx])
}
b <- boot(x, bfoo, R=R)
sd(b$t)
}
#Summarise the data for use with geom_pointrange and add some hover text for use with plotly:
dat <- iris %>%
mutate(flower_colour = c(rep(c("blue", "purple"), 25), rep(c("blue", "white"), 25), rep(c("white", "purple"), 25))) %>%
group_by(Species) %>%
summarise(meanSL = mean(Sepal.Length, na.rm = TRUE),
countSL = n(),
meSL = qt(0.975, countSL-1) * boot_sd(Sepal.Length, mean, 1001),
lowerCI_SL = meanSL - meSL,
upperCI_SL = meanSL + meSL,
group = "Mean &\nConfidence Interval",
colours_in_species = paste0(sort(unique(flower_colour)), collapse = ",")) %>%
as.data.frame() %>%
mutate(colours_in_species = paste0("colours: ", colours_in_species))
#Some plotting variables
purple <- "#8f11e7"
plot_title_colour <- "#35373b"
axis_text_colour <- "#3c4042"
legend_text_colour <- "#3c4042"
annotation_colour <- "#3c4042"
labels_dup = c("low", "medium", "high")
labels = c("low", "medium\n\nmeans to the right\nof this line are\nso cool", "high")
breaks = c(5,6,7)
limits = c(4,8)
p <- ggplot(dat, aes(x = reorder(as.character(Species),meanSL), text = colours_in_species)) +
geom_text(aes(y = 4.2,label = paste0("n=",countSL)), color = annotation_colour, size = 3) +
geom_pointrange(aes(y = meanSL, ymin=lowerCI_SL, ymax=upperCI_SL,color = group, fill = group), size = 1) +
scale_fill_manual(values = "#f4a01f", name = "Mean &\nConfidence Interval") +
scale_color_manual(values = "#f4a01f", name = "Mean &\nConfidence Interval") +
geom_hline(yintercept = 5, colour = "dark grey", linetype = "dashed") +
geom_hline(yintercept = 6, colour = purple, linetype = "dashed") +
coord_flip() +
ggtitle("Means of sepal length by species") +
theme_classic()+
theme(axis.text.y=element_text(size=10, colour = axis_text_colour),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
axis.line.y = element_blank(),
axis.ticks.y = element_blank(),
plot.title = element_text(size = 12, hjust = 0, colour = plot_title_colour),
legend.justification=c("right", "top"),
legend.box.just = "center",
legend.position ="top",
legend.title.align = "left",
legend.text=element_text(size = 8, hjust = 0.5, colour = legend_text_colour),
legend.title=element_blank())
ax <- list(
side = "top",
showticklabels = TRUE,
range = limits,
tickmode = "array",
tickvals = breaks,
ticktext = labels_dup)
ay <- list(
side = "right")
ax2 <- list(
overlaying = "x",
side = "bottom",
showticklabels = TRUE,
range = limits,
tickmode = "array",
tickvals = breaks,
ticktext = labels_dup,
tickfont = list(size = 11))
ggplotly(p, tooltip = 'text') %>%
add_markers(data = NULL, inherit = TRUE, xaxis = "x2") %>%
layout(
xaxis = ax,
xaxis2 = ax2,
yaxis = ay,
legend = list(orientation = "v", itemclick = FALSE, x = 1.2, y = 1.04),
margin = list(t = 120, l = 60)
)
and the warning is this:
Warning message:
'scatter' objects don't have these attributes: 'label'
Valid attributes include:
'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'stackgroup', 'orientation', 'groupnorm', 'stackgaps', 'text', 'texttemplate', 'hovertext', 'mode', 'hoveron', 'hovertemplate', 'line', 'connectgaps', 'cliponaxis', 'fill', 'fillcolor', 'marker', 'selected', 'unselected', 'textposition', 'textfont', 'r', 't', 'error_x', 'error_y', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'texttemplatesrc', 'hovertextsrc', 'hovertemplatesrc', 'textpositionsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
I get it working by just adding:
add_markers(data = NULL, inherit = TRUE, xaxis = "x2")
And I did also set the tickfont size of your second axis to 11 to match the font size of your original axis.
Although it is working, sometimes changing the zoom (especially when clicking "autoscale") will mess up the scales of the x axes so that they are not in sync anymore. Probably the best option is to limit the available options in the icon bar.
Here is your edited code put into a running shiny app:
library(tidyverse)
library(plotly)
library(shiny)
dat <- iris %>%
group_by(Species) %>%
summarise(meanSL = mean(Sepal.Length, na.rm = TRUE),
count = n())
labels_dup = c("low", "medium", "high")
labels = c("low", "medium\n\nmeans to the right\nof this line are\nso cool", "high")
breaks = c(5,6,7)
limits = c(4,8)
p <- ggplot(dat, aes(x = reorder(as.character(Species),meanSL), y = meanSL)) +
geom_point() +
geom_hline(yintercept = 6, lty = 2) +
coord_flip() +
ggtitle("Means of sepal length by species") +
theme_classic() +
theme(axis.title.x=element_blank(),
axis.title.y=element_blank(),
axis.line.y = element_blank(),
plot.title = element_text(size = 10, hjust = 0.5))
p + scale_y_continuous(breaks = breaks, labels = labels, limits = limits, sec.axis = dup_axis(labels = labels_dup)) +
geom_text(aes(y = 4,label = paste0("n=",count)), size = 3)
ax <- list(
side = "bottom",
showticklabels = TRUE,
range = limits,
tickmode = "array",
tickvals = breaks,
ticktext = labels)
ax2 <- list(
overlaying = "x",
side = "top",
showticklabels = TRUE,
range = limits,
tickmode = "array",
tickvals = breaks,
ticktext = labels_dup,
tickfont = list(size = 11)) # I added this line
shinyApp(
ui = fluidPage(
plotlyOutput("plot")
),
server = function(input, output) {
output$plot <- renderPlotly({
ggplotly(p) %>%
add_markers(data = NULL, inherit = TRUE, xaxis = "x2") %>% # new line
layout(
xaxis = ax,
xaxis2 = ax2)
})
}
)
Update
Below is a running shiny app with the additional example code. Although it is showing a warning that
Warning: 'scatter' objects don't have these attributes: 'label'
the plot is displayed correctly with both x axes.
I assume that the plot not showing correctly is unrelated to the warning above.
library(boot)
library(tidyverse)
library(plotly)
library(shiny)
boot_sd <- function(x, fun=mean, R=1001) {
fun <- match.fun(fun)
bfoo <- function(data, idx) {
fun(data[idx])
}
b <- boot(x, bfoo, R=R)
sd(b$t)
}
#Summarise the data for use with geom_pointrange and add some hover text for use with plotly:
dat <- iris %>%
mutate(flower_colour = c(rep(c("blue", "purple"), 25), rep(c("blue", "white"), 25), rep(c("white", "purple"), 25))) %>%
group_by(Species) %>%
summarise(meanSL = mean(Sepal.Length, na.rm = TRUE),
countSL = n(),
meSL = qt(0.975, countSL-1) * boot_sd(Sepal.Length, mean, 1001),
lowerCI_SL = meanSL - meSL,
upperCI_SL = meanSL + meSL,
group = "Mean &\nConfidence Interval",
colours_in_species = paste0(sort(unique(flower_colour)), collapse = ",")) %>%
as.data.frame() %>%
mutate(colours_in_species = paste0("colours: ", colours_in_species))
#Some plotting variables
purple <- "#8f11e7"
plot_title_colour <- "#35373b"
axis_text_colour <- "#3c4042"
legend_text_colour <- "#3c4042"
annotation_colour <- "#3c4042"
labels_dup = c("low", "medium", "high")
labels = c("low", "medium\n\nmeans to the right\nof this line are\nso cool", "high")
breaks = c(5,6,7)
limits = c(4,8)
p <- ggplot(dat, aes(x = reorder(as.character(Species),meanSL), text = colours_in_species)) +
geom_text(aes(y = 4.2,label = paste0("n=",countSL)), color = annotation_colour, size = 3) +
geom_pointrange(aes(y = meanSL, ymin=lowerCI_SL, ymax=upperCI_SL,color = group, fill = group), size = 1) +
scale_fill_manual(values = "#f4a01f", name = "Mean &\nConfidence Interval") +
scale_color_manual(values = "#f4a01f", name = "Mean &\nConfidence Interval") +
geom_hline(yintercept = 5, colour = "dark grey", linetype = "dashed") +
geom_hline(yintercept = 6, colour = purple, linetype = "dashed") +
coord_flip() +
ggtitle("Means of sepal length by species") +
theme_classic()+
theme(axis.text.y=element_text(size=10, colour = axis_text_colour),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
axis.line.y = element_blank(),
axis.ticks.y = element_blank(),
plot.title = element_text(size = 12, hjust = 0, colour = plot_title_colour),
legend.justification=c("right", "top"),
legend.box.just = "center",
legend.position ="top",
legend.title.align = "left",
legend.text=element_text(size = 8, hjust = 0.5, colour = legend_text_colour),
legend.title=element_blank())
ax <- list(
side = "top",
showticklabels = TRUE,
range = limits,
tickmode = "array",
tickvals = breaks,
ticktext = labels_dup)
ay <- list(
side = "right")
ax2 <- list(
overlaying = "x",
side = "bottom",
showticklabels = TRUE,
range = limits,
tickmode = "array",
tickvals = breaks,
ticktext = labels_dup,
tickfont = list(size = 11))
shinyApp(
ui = fluidPage(
plotlyOutput("plot")
),
server = function(input, output) {
output$plot <- renderPlotly({
ggplotly(p, tooltip = 'text') %>%
add_markers(data = NULL, inherit = TRUE, xaxis = "x2") %>%
layout(
xaxis = ax,
xaxis2 = ax2,
yaxis = ay,
legend = list(orientation = "v", itemclick = FALSE, x = 1.2, y = 1.04),
margin = list(t = 120, l = 60)
)
})
}
)
I use the data frame below:
Name <- c("DCH", "DCH", "DCH", "DGI", "DGI", "DGI", "LDP", "LDP", "LDP",
"RH", "RH", "RH", "TC", "TC", "TC")
Class <- c("Class1", "Class2", "Overlap", "Class1", "Class2", "Overlap",
"Class1", "Class2", "Overlap", "Class1", "Class2", "Overlap", "Class1", "Class2", "Overlap")
count <- c(2077, 1642, 460, 1971, 5708, 566, 2316, 810, 221, 2124, 3601,
413, 2160, 1097, 377)
FinalDF <- data.frame(Name, Class, count)
in order to create the following ggplot.
with :
# Generate the horizontal stacked bar chart plot
stackedBarsDiagram <- function(data, numRows = 5,
barColors = c('lemonchiffon', 'palegreen3', 'deepskyblue2'),
leftlabels = c('MyDatabaseA'), rightlabels = c('MyDatabaseB', 'MyDatabaseC', 'MyDatabaseD', 'MyDatabaseE'),
headerLabels = c("Class1", "Overlap", "Class2"),
#put input$referenceDataset intead of Reference dataset"
headerLabels2 = c(paste("Unique to","DB"), "Overlap", "Unique to Comparison Dataset "),
barThickness = F, rowDensity = 'default', internalFontSize = 12, headerFontSize = 16,
internalFontColor = 'black', headerFontColor = 'black', internalFontWeight = 'standard',
externalFontWeight = 'bold', internalLabelsVisible = T, headerlLabelsVisible = T,
# Default file type of saved file is .png; .pdf is also supported
bordersVisible = T, borderWeight = 'default', plotheight = 25, plotwidth = 25, filename = "StackedBarPlot.png", plotsave = F) {
# Parameters to assist in bar width calculations
minBarWidth = 0.5
maxBarWidth = 0.7
# Calculate bar width parameter
barWidthFactor <- ((maxBarWidth - minBarWidth) / (numRows))
FinalDF <- data
# If proportional bars are specified, display them
if (barThickness == T) {
sumDF <- FinalDF %>%
group_by(Name) %>%
summarize(tot = sum(count)) %>%
mutate(RANK = rank(tot), width = minBarWidth + RANK * barWidthFactor) %>%
arrange(desc(Name))
barWidths <- rep(sumDF$width, each = 3)
print(barWidths)
} else { # If proportional bars aren't specified, just set bar thickness to 0.9
barWidths <- rep(0.9, 5)
}
# Create the stacked bar plot using ggplot()
stackedBarPlot <- ggplot(data = FinalDF) +
geom_col(mapping = aes(x = Name, y = count, fill = Class), width = rep(0.9, 5),
color = "black", position = position_fill(reverse = T)) +
geom_text(size = 4, position = position_fill(reverse = T, vjust = 0.50), color = "black",
mapping = aes(x = Name, y = count, group = Class, label = round(count))) +
annotate('text', size = 5, x = (5 + 1) / 2, y = -0.1, label = c('A'), angle = 90) +
coord_flip() +
scale_fill_manual(values = c('lemonchiffon', 'palegreen3', 'deepskyblue2'), breaks = c("Class1", "Overlap", "Class2"), labels = c(paste("Unique to","DB"), "Overlap", "Unique to Comparison Dataset "),
guide = guide_legend(label.position = 'left', label.hjust = 0, label.vjust = 0.5)) +
# The limits = rev(...) function call ensures that the labels for the bars are plotted in the order
# in which they are specified in the rightLabels and leftLabels parameters in the main stackedBarChart() function call.
# This is necessary since the finalDF$Name order is reversed from the desired order.
scale_x_discrete(limits = rev(levels(FinalDF$Name)), position = 'top') +
# Blank out any default labels of ggplot() for the x and y axes
xlab('') +
ylab('') +
# Specify the style of the full plot area, including the background, legend & text sizes
theme(panel.background = element_rect(fill = 'white'),
plot.margin = unit(c(0.25, 0.25, 0.25, 0.25), 'inches'),
legend.title = element_blank(),
legend.position = 'top',
legend.direction = 'vertical',
legend.key.width = unit(0.15, 'inches'),
legend.key.height = unit(0.15, 'inches'),
legend.text = element_text(face = 'bold', size = 12, color = "black"),
axis.text = element_text(size = 12),
axis.text.x = element_blank(),
axis.ticks = element_blank())
# Display the plotly
print(stackedBarPlot)
}
print(stackedBarsDiagram(data = FinalDF,leftlabels ="DB" , numRows = 6,
barThickness = F,
barColors = c("#FFFACD","#7CCD7C","#00B2EE")))
However when I convert it to interactive with ggplotly():
ggplotly(stackedBarsDiagram(data = FinalDF,leftlabels ="DB" , numRows = 6,
barThickness = F,
barColors = c("#FFFACD", "#7CCD7C", "#00B2EE")))%>%
layout(title = "New plot title", legend = list(orientation = "h", y = -.132, x = 0), annotations = list())
my legend names are not edited properly despite using :
scale_fill_manual(values = c('lemonchiffon', 'palegreen3', 'deepskyblue2'),
breaks = c("Class1", "Overlap", "Class2"),
labels = c(paste("Unique to","DB"), "Overlap", "Unique to Comparison Dataset "),
guide = guide_legend(label.position = 'left', label.hjust = 0, label.vjust = 0.5))
they return to their default names "Class1", "Overlap", "Class2"
I don't know what plotly looks for exactly, but it looks like it doesn't care what your scale_fill_manual labels are and just pulls your fill factor groups as names. So one way would be to just create a label group in your data.
A hacky way is to manually edit the plotly_build() of the plot.
p1 <- plotly_build(p)
p1$x$data[[1]]$name <- "Unique to DB"
Start looking in there and you'll see the attributes of the plot, including hover-text. So this method would be annoying. You could do an lapply with some regex or a gsub, but the first method is likely easier.