Not able to add data label in waterfall chart using ggplot2 - r

I am trying to plot waterfall chart using ggplot2. When I am placing the data labels it is not putting in the right place.
Below is the code I am using
dataset <- data.frame(TotalHeadcount = c(-417, -12, 276, -276, 787, 14), Category = LETTERS[1:6])
dataset$SortedCategory <- factor(dataset$`Category`, levels = dataset$`Category`)
dataset$id <- seq_along(dataset$TotalHeadcount)
dataset$type <- ifelse(dataset$TotalHeadcount > 0, "in", "out")
dataset[dataset$SortedCategory %in% c("A", "F"), "type"] <- "net"
dataset$type <- factor(dataset$type, levels = c("out", "in", "net"))
dataset$end <- cumsum(dataset$`TotalHeadcount`)
dataset$end <- c(head(dataset$end, -1), 0)
dataset$start <- c(0, head(dataset$end, -1))
dataset$value <-dataset$`TotalHeadcount`
library(ggplot2)
strwr <- function(str) gsub(" ", "\n", str)
ggplot(dataset, aes(fill = type))+ geom_rect(aes(x = SortedCategory, xmin = id - 0.45, xmax = id + 0.45, ymin = end, ymax = start))+ scale_x_discrete("", breaks = levels(dataset$SortedCategory), labels = strwr(levels(dataset$SortedCategory)))+ theme_bw()+ theme(panel.border = element_blank(), panel.grid.major = element_blank(), axis.line = element_line(colour = "gray"))+guides(fill=FALSE)
And below is the output. I want the data label to be just at the beginning or at the end of the bar.
I am not very expert in R. Just trying to learn. Any help would be really appreciated.
I was following the below blog
https://learnr.wordpress.com/2010/05/10/ggplot2-waterfall-charts/
but somehow when I write the same code in geom_text it gives me an error. Could be a syntax related issue.

Here is an approach with ggplot.
First the data:
df1 <- data.frame(z = c(-417, -12, 276, -276, 787, 14),
b = LETTERS[1:6])
library(tidyverse)
Calculate the cumsum and the lag of the cumsum for geom_rect coords
df1 %>%
mutate(val = cumsum(z),
lag = c(0, lag(val)[-1]),
b1 = as.numeric(b)) -> df1
ggplot(df1)+
geom_rect(aes(xmin = b1 - 0.45,
xmax = b1 + 0.45, ymin = lag, ymax = val)) +
geom_text(aes(x = b1, y = val, label = val), #or `label = z`
vjust = ifelse(df1$val < df1$lag, -0.2, 1)) + #geom_text vjust depends on the direction of the value
scale_x_continuous(breaks = 1:6, labels = df1$b)
an easier way, but I think the labels position can not be changed at this moment but it is planned:
rect_text_labels_anchor (character) How should rect_text_labels be
positioned? In future releases, we might have support for north or
south anchors, or for directed positioning (negative down, positive
up) etc. For now, only centre is supported.
library(waterfalls)
df1 <- data.frame(z = c(-417, -12, 276, -276, 787, 14),
b = LETTERS[1:6])
you could also color it the same way in ggplot:
df1 %>%
mutate(val = cumsum(z),
lag = c(0, lag(val)[-1]),
b1 = as.numeric(b),
color = ifelse(val <lag, "down", "up")) -> df1
ggplot(df1)+
geom_rect(aes(xmin = b1 - 0.45,
xmax = b1 + 0.45, ymin = lag, ymax = val, fill = color)) +
geom_text(aes(x = b1, y = val, label = z),
vjust = ifelse(df1$val < df1$lag, -0.2, 1)) +
scale_x_continuous(breaks = 1:6, labels = df1$b)
EDIT: answers to the questions in comments.
Filled waterfall:
df1 <- data.frame(z = c(-417, -12, 276, -276, 787, 14),
b = LETTERS[1:6],
group = rep(c("AB", "CD", "EF"), each = 2))
df1 %>%
mutate(val = cumsum(z),
lag = c(0, lag(val)[-1]),
b1 = as.numeric(b),
g1 = as.numeric(group)) -> df1
ggplot(df1)+
geom_rect(aes(xmin = g1 - 0.45,
xmax = g1 + 0.45, ymin = lag, ymax = val, fill = b)) +
geom_text(aes(x = g1, y = val, label = z),
vjust = ifelse(df1$val < df1$lag, -0.2, 1)) +
scale_x_continuous(breaks = 1:3, labels = unique(df1$group))
To answer what went wrong with your geom_text code I would need to see it. Other then that your code works, but it over-complicates things. I advise you to learn a bit of tidyverse functions, data manipulation will be much cleaner then.
One more note, adding back-ticks:
dataset$`TotalHeadcount`
is not necessary when your column names do not contain special characters:
dataset$TotalHeadcount
EDIT2: to change to order on the x axis you would first change the levels of the grouping factor and then do the calculation and plotting:
df1 <- data.frame(z = c(-417, -12, 276, -276, 787, 14),
b = LETTERS[1:6],
group = rep(c("AB", "CD", "EF"), each = 2))
df1 %>%
mutate(group = factor(group, levels = c("AB", "EF", "CD"))) %>%
arrange(group) %>%
mutate(val = cumsum(z),
lag = c(0, lag(val)[-1]),
b1 = as.numeric(b),
g1 = as.numeric(group)) -> df1
ggplot(df1)+
geom_rect(aes(xmin = g1 - 0.45,
xmax = g1 + 0.45, ymin = lag, ymax = val, fill = b)) +
geom_text(aes(x = g1, y = val, label = z),
vjust = ifelse(df1$val < df1$lag, -0.2, 1)) +
scale_x_continuous(breaks = 1:3, labels = unique(df1$group))

Related

Legend for combined graph

I am trying to reproduce this figure (without the Portugal highlight):
The data (and figure) can be found in this link: https://stat.link/uz49al.
I imported and reshaped the data into a long format, but then I got stuck on how it would be possible to rearrange the legend entries in the same order as in the original.
I would very much appreciate your help!
Thanks!
Here is where I got:
# load data
f5_5_data_before <-
read_excel("uz49al.xlsx", sheet = "Figure1.20", range = "A32:E68")
names(f5_5_data_before)[1] <- "Country"
names(f5_5_data_before)[2] <- "Odds_ratio"
names(f5_5_data_before)[3] <- "SE"
names(f5_5_data_before)[4] <- "sig"
names(f5_5_data_before)[5] <- "non_sig"
f5_5_data_before$Country <- as.factor(f5_5_data_before$Country)
f5_5_data_before <- f5_5_data_before %>%
mutate(
category = case_when(
is.na(sig) ~ "Non-significant",
!is.na(sig) ~ "Significant"
),
value = case_when(
category == "Non-significant" ~ non_sig,
category == "Significant" ~ sig
)
)
f5_5_data_before$group2 <- "Before accounting for reading performance"
f5_5_data_after <-
read_excel("uz49al.xlsx", sheet = "Figure1.20", range = "A32:I68")
f5_5_data_after <- f5_5_data_after[, c(1, 6:9)]
names(f5_5_data_after)[1] <- "Country"
names(f5_5_data_after)[2] <- "Odds_ratio"
names(f5_5_data_after)[3] <- "SE"
names(f5_5_data_after)[4] <- "sig"
names(f5_5_data_after)[5] <- "non_sig"
f5_5_data_after$Country <- as.factor(f5_5_data_after$Country)
f5_5_data_after <- f5_5_data_after %>%
mutate(
category = case_when(
is.na(sig) ~ "Non-significant",
!is.na(sig) ~ "Significant"
),
value = case_when(
category == "Non-significant" ~ non_sig,
category == "Significant" ~ sig
)
)
f5_5_data_after$group2 <- "After accounting for reading performance"
# appending in long format
f5_5_data <- rbind(f5_5_data_after, f5_5_data_before)
# shaded rectangle
rect1 <- data.frame(
xmin = 14.5,
xmax = 15.5,
ymin = -Inf,
ymax = Inf
)
# figure
f5_5 <- ggplot() +
geom_col(data = f5_5_data %>% filter(group2 == "After accounting for reading performance"),
aes(x = reorder(Country,-Odds_ratio),
y = value,
fill = category,
colour = group2),
width=0.5,
) +
geom_point(
data = f5_5_data %>% filter(group2 == "Before accounting for reading performance"),
aes(x = Country,
y = value,
fill = category,
colour = group2),
shape = 23,
size = 3,
) +
geom_rect(
data = rect1,
aes(
xmin = xmin,
xmax = xmax,
ymin = ymin,
ymax = ymax
),
alpha = 0.5,
inherit.aes = FALSE
) +
scale_y_continuous(breaks = pretty_breaks(),
limits = c(0, 25),
expand = c(0, 0)) +
labs(x = NULL,
y = "Odds ratio") +
theme(axis.text.x = element_text(angle = 90))
print(f5_5)
This yields the following output:
As you can see, the legend looks substantially different and essentially I got stuck.
One option to achieve your desired result would be via the ggnewscale package which allows for multiple scales for the same aesthetic. Doing so we could map category on the fill aes in both the geom_col and the geom_point but have two different legends:
Note: I simplified your data wrangling code a bit.
library(readxl)
library(dplyr)
library(ggplot2)
library(ggnewscale)
url <- "https://stat.link/uz49al"
download.file(url, destfile = "uz49al.xlsx")
dat <- read_excel("uz49al.xlsx", sheet = "Figure1.20", range = "A32:I68")
dat <- list(
before = setNames(dat[, 1:5], c("Country", "Odds_ratio", "SE", "sig", "non_sig")),
after = setNames(dat[, c(1, 6:9)], c("Country", "Odds_ratio", "SE", "sig", "non_sig"))
) %>%
bind_rows(.id = "group2")
dat <- dat %>%
mutate(
category = if_else(is.na(sig), "nonsig", "sig"),
value = if_else(is.na(sig), non_sig, sig)
) %>%
select(-sig, -non_sig)
group2_labels <- c(after = "After accounting for reading performance", before = "Before accounting for reading performance")
rect1 <- data.frame(xmin = 14.5, xmax = 15.5, ymin = -Inf, ymax = Inf)
ggplot(dat, aes(x = reorder(Country,-Odds_ratio), y = value)) +
geom_col(data = ~filter(.x, group2 == "after"), aes(fill = category), width = 0.5) +
scale_fill_manual(labels = NULL, values = c(sig = "darkblue", nonsig = "steelblue"),
name = group2_labels[["after"]], guide = guide_legend(title.position = "right")) +
new_scale_fill() +
geom_point(data = ~filter(.x, group2 == "before"), aes(fill = category), size = 3, shape = 23, color = "lightblue") +
geom_rect(data = rect1, aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax),
alpha = 0.5, inherit.aes = FALSE) +
scale_fill_manual(labels = NULL, values = c(nonsig = "white", sig = "lightblue"), breaks = c("sig", "nonsig"),
name = group2_labels[["before"]], guide = guide_legend(title.position = "right")) +
scale_y_continuous(breaks = scales::pretty_breaks(), limits = c(0, 25), expand = c(0, 0)) +
labs(x = NULL, y = "Odds ratio") +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
legend.position = "top")

Circular stacked barplot in r [duplicate]

This question already has an answer here:
Circular barchart customization from r-graph-gallery
(1 answer)
Closed 8 months ago.
I am trying to run this code from this link https://www.r-graph-gallery.com/299-circular-stacked-barplot.html.
# library
library(tidyverse)
library(viridis)
# Create dataset
data <- data.frame(
individual=paste( "Mister ", seq(1,60), sep=""),
group=c( rep('A', 10), rep('B', 30), rep('C', 14), rep('D', 6)) ,
value1=sample( seq(10,100), 60, replace=T),
value2=sample( seq(10,100), 60, replace=T),
value3=sample( seq(10,100), 60, replace=T)
)
# Transform data in a tidy format (long format)
data <- data %>% gather(key = "observation", value="value", -c(1,2))
# Set a number of 'empty bar' to add at the end of each group
empty_bar <- 2
nObsType <- nlevels(as.factor(data$observation))
to_add <- data.frame( matrix(NA, empty_bar*nlevels(data$group)*nObsType, ncol(data)) )
colnames(to_add) <- colnames(data)
to_add$group <- rep(levels(data$group), each=empty_bar*nObsType )
data <- rbind(data, to_add)
data <- data %>% arrange(group, individual)
data$id <- rep( seq(1, nrow(data)/nObsType) , each=nObsType)
# Get the name and the y position of each label
label_data <- data %>% group_by(id, individual) %>% summarize(tot=sum(value))
number_of_bar <- nrow(label_data)
angle <- 90 - 360 * (label_data$id-0.5) /number_of_bar # I substract 0.5 because the letter must have the angle of the center of the bars. Not extreme right(1) or extreme left (0)
label_data$hjust <- ifelse( angle < -90, 1, 0)
label_data$angle <- ifelse(angle < -90, angle+180, angle)
# prepare a data frame for base lines
base_data <- data %>%
group_by(group) %>%
summarize(start=min(id), end=max(id) - empty_bar) %>%
rowwise() %>%
mutate(title=mean(c(start, end)))
# prepare a data frame for grid (scales)
grid_data <- base_data
grid_data$end <- grid_data$end[ c( nrow(grid_data), 1:nrow(grid_data)-1)] + 1
grid_data$start <- grid_data$start - 1
grid_data <- grid_data[-1,]
# Make the plot
p <- ggplot(data) +
# Add the stacked bar
geom_bar(aes(x=as.factor(id), y=value, fill=observation), stat="identity", alpha=0.5) +
scale_fill_viridis(discrete=TRUE) +
# Add a val=100/75/50/25 lines. I do it at the beginning to make sur barplots are OVER it.
geom_segment(data=grid_data, aes(x = end, y = 0, xend = start, yend = 0), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
geom_segment(data=grid_data, aes(x = end, y = 50, xend = start, yend = 50), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
geom_segment(data=grid_data, aes(x = end, y = 100, xend = start, yend = 100), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
geom_segment(data=grid_data, aes(x = end, y = 150, xend = start, yend = 150), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
geom_segment(data=grid_data, aes(x = end, y = 200, xend = start, yend = 200), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
# Add text showing the value of each 100/75/50/25 lines
ggplot2::annotate("text", x = rep(max(data$id),5), y = c(0, 50, 100, 150, 200), label = c("0", "50", "100", "150", "200") , color="grey", size=6 , angle=0, fontface="bold", hjust=1) +
ylim(-150,max(label_data$tot, na.rm=T)) +
theme_minimal() +
theme(
legend.position = "none",
axis.text = element_blank(),
axis.title = element_blank(),
panel.grid = element_blank(),
plot.margin = unit(rep(-1,4), "cm")
) +
coord_polar() +
# Add labels on top of each bar
geom_text(data=label_data, aes(x=id, y=tot+10, label=individual, hjust=hjust), color="black", fontface="bold",alpha=0.6, size=5, angle= label_data$angle, inherit.aes = FALSE ) +
# Add base line information
geom_segment(data=base_data, aes(x = start, y = -5, xend = end, yend = -5), colour = "black", alpha=0.8, size=0.6 , inherit.aes = FALSE ) +
geom_text(data=base_data, aes(x = title, y = -18, label=group), hjust=c(1,1,0,0), colour = "black", alpha=0.8, size=4, fontface="bold", inherit.aes = FALSE)
# Save at png
ggsave(p, file="output.png", width=10, height=10)
However, I am not sure why I am not getting the gaps and the scales in my figure (see below). As depicted, the numbers are printed inside the figure and the gaps between different groups of data are not there.
The original figure should be as follows:
There is a bug in the code. group has to be a factor to make the code adding the gaps work. To fix this add data$group <- factor(data$group).
Note: My guess is that the reason for this bug is that as of version 4.0.0 R treats strings in data frames as strings rather than factors. Hence, for versions < 4.0.0 the code worked fine as is.
# library
library(tidyverse)
library(viridis)
#> Loading required package: viridisLite
# Create dataset
data <- data.frame(
individual = paste("Mister ", seq(1, 60), sep = ""),
group = c(rep("A", 10), rep("B", 30), rep("C", 14), rep("D", 6)),
value1 = sample(seq(10, 100), 60, replace = T),
value2 = sample(seq(10, 100), 60, replace = T),
value3 = sample(seq(10, 100), 60, replace = T)
)
# Convert to factor
data$group <- factor(data$group)
# Transform data in a tidy format (long format)
data <- data %>% gather(key = "observation", value = "value", -c(1, 2))
# Set a number of 'empty bar' to add at the end of each group
empty_bar <- 2
nObsType <- nlevels(as.factor(data$observation))
to_add <- data.frame(matrix(NA, empty_bar * nlevels(data$group) * nObsType, ncol(data)))
colnames(to_add) <- colnames(data)
to_add$group <- rep(levels(data$group), each = empty_bar * nObsType)
data <- rbind(data, to_add)
data <- data %>% arrange(group, individual)
data$id <- rep(seq(1, nrow(data) / nObsType), each = nObsType)
# Get the name and the y position of each label
label_data <- data %>%
group_by(id, individual) %>%
summarize(tot = sum(value))
#> `summarise()` has grouped output by 'id'. You can override using the `.groups`
#> argument.
number_of_bar <- nrow(label_data)
angle <- 90 - 360 * (label_data$id - 0.5) / number_of_bar # I substract 0.5 because the letter must have the angle of the center of the bars. Not extreme right(1) or extreme left (0)
label_data$hjust <- ifelse(angle < -90, 1, 0)
label_data$angle <- ifelse(angle < -90, angle + 180, angle)
# prepare a data frame for base lines
base_data <- data %>%
group_by(group) %>%
summarize(start = min(id), end = max(id) - empty_bar) %>%
rowwise() %>%
mutate(title = mean(c(start, end)))
# prepare a data frame for grid (scales)
grid_data <- base_data
grid_data$end <- grid_data$end[c(nrow(grid_data), 1:nrow(grid_data) - 1)] + 1
grid_data$start <- grid_data$start - 1
grid_data <- grid_data[-1, ]
# Make the plot
ggplot(data) +
# Add the stacked bar
geom_bar(aes(x = as.factor(id), y = value, fill = observation), stat = "identity", alpha = 0.5) +
scale_fill_viridis(discrete = TRUE) +
# Add a val=100/75/50/25 lines. I do it at the beginning to make sur barplots are OVER it.
geom_segment(data = grid_data, aes(x = end, y = 0, xend = start, yend = 0), colour = "grey", alpha = 1, size = 0.3, inherit.aes = FALSE) +
geom_segment(data = grid_data, aes(x = end, y = 50, xend = start, yend = 50), colour = "grey", alpha = 1, size = 0.3, inherit.aes = FALSE) +
geom_segment(data = grid_data, aes(x = end, y = 100, xend = start, yend = 100), colour = "grey", alpha = 1, size = 0.3, inherit.aes = FALSE) +
geom_segment(data = grid_data, aes(x = end, y = 150, xend = start, yend = 150), colour = "grey", alpha = 1, size = 0.3, inherit.aes = FALSE) +
geom_segment(data = grid_data, aes(x = end, y = 200, xend = start, yend = 200), colour = "grey", alpha = 1, size = 0.3, inherit.aes = FALSE) +
# Add text showing the value of each 100/75/50/25 lines
ggplot2::annotate("text", x = rep(max(data$id), 5), y = c(0, 50, 100, 150, 200), label = c("0", "50", "100", "150", "200"), color = "grey", size = 6, angle = 0, fontface = "bold", hjust = 1) +
ylim(-150, max(label_data$tot, na.rm = T)) +
theme_minimal() +
theme(
legend.position = "none",
axis.text = element_blank(),
axis.title = element_blank(),
panel.grid = element_blank(),
plot.margin = unit(rep(-1, 4), "cm")
) +
coord_polar() +
# Add labels on top of each bar
geom_text(data = label_data, aes(x = id, y = tot + 10, label = individual, hjust = hjust), color = "black", fontface = "bold", alpha = 0.6, size = 5, angle = label_data$angle, inherit.aes = FALSE) +
# Add base line information
geom_segment(data = base_data, aes(x = start, y = -5, xend = end, yend = -5), colour = "black", alpha = 0.8, size = 0.6, inherit.aes = FALSE) +
geom_text(data = base_data, aes(x = title, y = -18, label = group), hjust = c(1, 1, 0, 0), colour = "black", alpha = 0.8, size = 4, fontface = "bold", inherit.aes = FALSE)
#> Warning: Removed 24 rows containing missing values (position_stack).
#> Warning: Removed 9 rows containing missing values (geom_text).

Perform manual annotations with ggsignif

I am hoping to add manual comparisons and annotations to a ggplot with facets. While the previous stack overflow questions have been helpful, transitioning to multiple pairwise comparison per facet has been tricky.
Fortunately, the authors give a great section on this at the bottom of https://github.com/const-ae/ggsignif titled "Advanced Example". It has been modified to show it's possible to add multiple pairwise comparison in one facet box:
library(ggplot2)
library(ggsignif)
annotation_df <- data.frame(
color = c("E", "E"),
start = c("Good", "Fair"),
end = c("Fair", "Very Good"),
y = c(3.6, 4.7),
label = c("Comp. 1", "Comp. 2")
)
ggplot(diamonds, aes(x = cut, y = carat)) +
geom_boxplot() +
geom_signif(
data = annotation_df,
aes(xmin = start, xmax = end, annotations = label, y_position = y),
textsize = 3, vjust = -0.2,
manual = TRUE
) +
facet_wrap(~color) +
ylim(NA, 5.3)
Great, but when I do something similar, I seem only able to get one number produced. Additionally, I can't get it to graph without fill being set. What might be causing this issue?
library(ggplot2)
library(ggsignif)
library(dplyr)
EnrichmentDF <- data.frame(
cancer = c(rep("BRCA", 3), rep("PRAD", 3), rep("PAAD", 3)),
occurance = c(166, 152, 90, 288, 512, 291, 58, 145, 101),
cluster = rep(1:3)
)
Cancer <- apply(EnrichmentDF, 1, function(x)rep(x[1], x[2])) %>% unlist()
Cluster <- apply(EnrichmentDF, 1, function(x)rep(x[3], x[2])) %>% unlist()
niceTable <- table(Cancer, Cluster)
EnrichmentDF <- as.data.frame(niceTable)
colnames(EnrichmentDF) <- c("Cancer", "Cluster", "NumberOfPatients")
EnrichmentDF$CancerTotalN <- rowSums(niceTable)
EnrichmentDF$Percentage <- round(100 * EnrichmentDF$NumberOfPatients / EnrichmentDF$CancerTotalN, 2)
SignificanceStorage <- runif(9, 0.001, 0.2)
SignificanceDF <- data.frame(Cancer = rep(unique(EnrichmentDF$Cancer), each = ncol(niceTable)),
start = c(1,1,2), end = c(2, 3, 3),
y = c(75, 95, 125),
label = signif(SignificanceStorage, digits=2)
)
EnrichmentDF %>% ggplot(aes(x = Cluster, y= Percentage, fill = Cancer)) + # example did not have fill but breaks without it
geom_bar(aes(fill = Cluster), stat = "identity",
position = "dodge", width = .5) +
geom_signif(
data = SignificanceDF,
aes(xmin = start, xmax = end, annotations = label, y_position = y),
textsize = 3, vjust = -0.2,
manual = TRUE) +
facet_wrap(~Cancer) + ylim(0, 130) +
theme_bw()
This question is similar to this question, but without depreciation fixes leading to consistent overlaps
Thanks to TarJae and the contributors to the post here: https://github.com/const-ae/ggsignif/issues/63, an issue has been found requiring a group = in the parameters of geom_signif when faceting. While the comment on github mentions it's due to duplicated data, I do not see such in the example provided here.
These are the changes:
SignificanceDF$group <- 1:nrow(SignificanceDF)
EnrichmentDF %>% ggplot(aes(x = Cluster, y= Percentage)) +
and have been marked by a double ##
library(ggplot2)
library(ggsignif)
library(dplyr)
EnrichmentDF <- data.frame(
cancer = c(rep("BRCA", 3), rep("PRAD", 3), rep("PAAD", 3)),
occurance = c(166, 152, 90, 288, 512, 291, 58, 145, 101),
cluster = rep(1:3)
)
Cancer <- apply(EnrichmentDF, 1, function(x)rep(x[1], x[2])) %>% unlist()
Cluster <- apply(EnrichmentDF, 1, function(x)rep(x[3], x[2])) %>% unlist()
niceTable <- table(Cancer, Cluster)
EnrichmentDF <- as.data.frame(niceTable)
colnames(EnrichmentDF) <- c("Cancer", "Cluster", "NumberOfPatients")
EnrichmentDF$CancerTotalN <- rowSums(niceTable)
EnrichmentDF$Percentage <- round(100 * EnrichmentDF$NumberOfPatients / EnrichmentDF$CancerTotalN, 2)
SignificanceStorage <- runif(9, 0.001, 0.2)
SignificanceDF <- data.frame(Cancer = rep(unique(EnrichmentDF$Cancer), each = ncol(niceTable)),
start = c(1,1,2), end = c(2, 3, 3),
y = c(75, 95, 125),
label = signif(SignificanceStorage, digits=2)
)
SignificanceDF$group <- 1:nrow(SignificanceDF) ## NEW CODE ADDED
EnrichmentDF %>% ggplot(aes(x = Cluster, y= Percentage)) + ## REMOVED FILL
geom_bar(aes(fill = Cluster), stat = "identity",
position = "dodge", width = .5) +
geom_signif(
data = SignificanceDF,
aes(xmin = start, xmax = end, annotations = label, y_position = y),
textsize = 3, vjust = -0.2,
manual = TRUE) +
facet_wrap(~Cancer) + ylim(0, 130) +
theme_bw()

Error bar sizing skewed when using plotly

I have a chart which has an error bar on it:
However, when I put the chart inside a plotly wrapper, the error bar sizing gets messed up, as shown below:
Does anyone have a solution for keeping the error bar width the same size as the bar, as shown in plot 1, but while keeping the plot rendering with plotly?
library(tidyverse)
library(plotly)
dat <- data.frame(peeps= c("Bill", "Bob", "Becky"),
vals = c(10, 15, 12),
goals = c(8, 13, 10),
grp = c("Bears", "Bears", "Mongoose") %>% as.factor)
p1 <- dat %>%
ggplot(aes(x = peeps, y = vals, fill = grp)) +
geom_bar(stat = "identity") +
geom_errorbar(data = dat,
aes(ymin = goals, ymax = goals),
color = "blue",
size = 1,
linetype = 1) +
scale_y_continuous(expand = c(0, 0)) +
coord_flip()
p1
ggplotly(p1) %>%
layout(legend = list(orientation = "h",
xanchor = "center",
y = -0.15,
x = 0.5))
Using geom_segment() instead of geom_errorbar() is a work-around for this problem.
dat <- data.frame(peeps= c("Bill", "Bob", "Becky") %>% as.factor,
vals = c(10, 15, 12),
goals = c(8, 13, 10),
grp = c("Bears", "Bears", "Mongoose"),
rowid = 1:3)
p1 <- ggplot(data = dat, aes(x = peeps, y = vals, fill = grp, order = rowid)) +
geom_col() +
geom_segment(aes(
x = as.numeric(peeps)-0.45,
xend = as.numeric(peeps)+0.45,
y = goals, yend = goals),
color = "blue",
size = 1) +
scale_y_continuous(expand = c(0, 0)) +
coord_flip()
ggplotly(p1) %>%
layout(legend = list(orientation = "h",
xanchor = "center",
y = -0.15,
x = 0.5))

custom varwidth in ggplot2

df = data.frame(a = c(0, 0), b = c(17, 15),
c = c(35,37), d = c(55,57),
e = c(80, 85), x = c(1, 2),
w1 = c(20, 30), w2 = c(0.2, 0.3))
ggplot(df) +
geom_boxplot(aes(x = x, ymin = a, lower = b, middle = c, upper = d, ymax = e),
stat = "identity")
I have a dataframe containing the values of each quantile for a boxplot, (a-e).
Is it possible use columns w1 or w2 to define the width of the boxplots in ggplot?
My desired result is similar to using varwidth in graphics::boxplot but with custom widths.
graphics::boxplot(mpg~gear, mtcars, varwidth = T)
Don't think this is a duplicate since it seems like the weight argument doesn't work with stat = identity.
Looks like it can be done by using stat_summary.
df = data.frame(a = c(0, 0), b = c(17, 15),
c = c(35,37), d = c(55,57),
e = c(80, 85), x = factor(c(1, 2)),
w = c(0.2, 0.3))
df2 = reshape2::melt(data = df, id = "x")
ff = function(x){
data.frame(
ymin = x[1],
lower = x[2],
middle = x[3],
upper = x[4],
ymax = x[5],
width = x[6]
)
}
ggplot(df2, aes(x, value)) + stat_summary(fun.data = ff, geom = "boxplot")
But i am not sure if this is the best way to do it.

Resources