This question already has an answer here:
Circular barchart customization from r-graph-gallery
(1 answer)
Closed 8 months ago.
I am trying to run this code from this link https://www.r-graph-gallery.com/299-circular-stacked-barplot.html.
# library
library(tidyverse)
library(viridis)
# Create dataset
data <- data.frame(
individual=paste( "Mister ", seq(1,60), sep=""),
group=c( rep('A', 10), rep('B', 30), rep('C', 14), rep('D', 6)) ,
value1=sample( seq(10,100), 60, replace=T),
value2=sample( seq(10,100), 60, replace=T),
value3=sample( seq(10,100), 60, replace=T)
)
# Transform data in a tidy format (long format)
data <- data %>% gather(key = "observation", value="value", -c(1,2))
# Set a number of 'empty bar' to add at the end of each group
empty_bar <- 2
nObsType <- nlevels(as.factor(data$observation))
to_add <- data.frame( matrix(NA, empty_bar*nlevels(data$group)*nObsType, ncol(data)) )
colnames(to_add) <- colnames(data)
to_add$group <- rep(levels(data$group), each=empty_bar*nObsType )
data <- rbind(data, to_add)
data <- data %>% arrange(group, individual)
data$id <- rep( seq(1, nrow(data)/nObsType) , each=nObsType)
# Get the name and the y position of each label
label_data <- data %>% group_by(id, individual) %>% summarize(tot=sum(value))
number_of_bar <- nrow(label_data)
angle <- 90 - 360 * (label_data$id-0.5) /number_of_bar # I substract 0.5 because the letter must have the angle of the center of the bars. Not extreme right(1) or extreme left (0)
label_data$hjust <- ifelse( angle < -90, 1, 0)
label_data$angle <- ifelse(angle < -90, angle+180, angle)
# prepare a data frame for base lines
base_data <- data %>%
group_by(group) %>%
summarize(start=min(id), end=max(id) - empty_bar) %>%
rowwise() %>%
mutate(title=mean(c(start, end)))
# prepare a data frame for grid (scales)
grid_data <- base_data
grid_data$end <- grid_data$end[ c( nrow(grid_data), 1:nrow(grid_data)-1)] + 1
grid_data$start <- grid_data$start - 1
grid_data <- grid_data[-1,]
# Make the plot
p <- ggplot(data) +
# Add the stacked bar
geom_bar(aes(x=as.factor(id), y=value, fill=observation), stat="identity", alpha=0.5) +
scale_fill_viridis(discrete=TRUE) +
# Add a val=100/75/50/25 lines. I do it at the beginning to make sur barplots are OVER it.
geom_segment(data=grid_data, aes(x = end, y = 0, xend = start, yend = 0), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
geom_segment(data=grid_data, aes(x = end, y = 50, xend = start, yend = 50), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
geom_segment(data=grid_data, aes(x = end, y = 100, xend = start, yend = 100), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
geom_segment(data=grid_data, aes(x = end, y = 150, xend = start, yend = 150), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
geom_segment(data=grid_data, aes(x = end, y = 200, xend = start, yend = 200), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
# Add text showing the value of each 100/75/50/25 lines
ggplot2::annotate("text", x = rep(max(data$id),5), y = c(0, 50, 100, 150, 200), label = c("0", "50", "100", "150", "200") , color="grey", size=6 , angle=0, fontface="bold", hjust=1) +
ylim(-150,max(label_data$tot, na.rm=T)) +
theme_minimal() +
theme(
legend.position = "none",
axis.text = element_blank(),
axis.title = element_blank(),
panel.grid = element_blank(),
plot.margin = unit(rep(-1,4), "cm")
) +
coord_polar() +
# Add labels on top of each bar
geom_text(data=label_data, aes(x=id, y=tot+10, label=individual, hjust=hjust), color="black", fontface="bold",alpha=0.6, size=5, angle= label_data$angle, inherit.aes = FALSE ) +
# Add base line information
geom_segment(data=base_data, aes(x = start, y = -5, xend = end, yend = -5), colour = "black", alpha=0.8, size=0.6 , inherit.aes = FALSE ) +
geom_text(data=base_data, aes(x = title, y = -18, label=group), hjust=c(1,1,0,0), colour = "black", alpha=0.8, size=4, fontface="bold", inherit.aes = FALSE)
# Save at png
ggsave(p, file="output.png", width=10, height=10)
However, I am not sure why I am not getting the gaps and the scales in my figure (see below). As depicted, the numbers are printed inside the figure and the gaps between different groups of data are not there.
The original figure should be as follows:
There is a bug in the code. group has to be a factor to make the code adding the gaps work. To fix this add data$group <- factor(data$group).
Note: My guess is that the reason for this bug is that as of version 4.0.0 R treats strings in data frames as strings rather than factors. Hence, for versions < 4.0.0 the code worked fine as is.
# library
library(tidyverse)
library(viridis)
#> Loading required package: viridisLite
# Create dataset
data <- data.frame(
individual = paste("Mister ", seq(1, 60), sep = ""),
group = c(rep("A", 10), rep("B", 30), rep("C", 14), rep("D", 6)),
value1 = sample(seq(10, 100), 60, replace = T),
value2 = sample(seq(10, 100), 60, replace = T),
value3 = sample(seq(10, 100), 60, replace = T)
)
# Convert to factor
data$group <- factor(data$group)
# Transform data in a tidy format (long format)
data <- data %>% gather(key = "observation", value = "value", -c(1, 2))
# Set a number of 'empty bar' to add at the end of each group
empty_bar <- 2
nObsType <- nlevels(as.factor(data$observation))
to_add <- data.frame(matrix(NA, empty_bar * nlevels(data$group) * nObsType, ncol(data)))
colnames(to_add) <- colnames(data)
to_add$group <- rep(levels(data$group), each = empty_bar * nObsType)
data <- rbind(data, to_add)
data <- data %>% arrange(group, individual)
data$id <- rep(seq(1, nrow(data) / nObsType), each = nObsType)
# Get the name and the y position of each label
label_data <- data %>%
group_by(id, individual) %>%
summarize(tot = sum(value))
#> `summarise()` has grouped output by 'id'. You can override using the `.groups`
#> argument.
number_of_bar <- nrow(label_data)
angle <- 90 - 360 * (label_data$id - 0.5) / number_of_bar # I substract 0.5 because the letter must have the angle of the center of the bars. Not extreme right(1) or extreme left (0)
label_data$hjust <- ifelse(angle < -90, 1, 0)
label_data$angle <- ifelse(angle < -90, angle + 180, angle)
# prepare a data frame for base lines
base_data <- data %>%
group_by(group) %>%
summarize(start = min(id), end = max(id) - empty_bar) %>%
rowwise() %>%
mutate(title = mean(c(start, end)))
# prepare a data frame for grid (scales)
grid_data <- base_data
grid_data$end <- grid_data$end[c(nrow(grid_data), 1:nrow(grid_data) - 1)] + 1
grid_data$start <- grid_data$start - 1
grid_data <- grid_data[-1, ]
# Make the plot
ggplot(data) +
# Add the stacked bar
geom_bar(aes(x = as.factor(id), y = value, fill = observation), stat = "identity", alpha = 0.5) +
scale_fill_viridis(discrete = TRUE) +
# Add a val=100/75/50/25 lines. I do it at the beginning to make sur barplots are OVER it.
geom_segment(data = grid_data, aes(x = end, y = 0, xend = start, yend = 0), colour = "grey", alpha = 1, size = 0.3, inherit.aes = FALSE) +
geom_segment(data = grid_data, aes(x = end, y = 50, xend = start, yend = 50), colour = "grey", alpha = 1, size = 0.3, inherit.aes = FALSE) +
geom_segment(data = grid_data, aes(x = end, y = 100, xend = start, yend = 100), colour = "grey", alpha = 1, size = 0.3, inherit.aes = FALSE) +
geom_segment(data = grid_data, aes(x = end, y = 150, xend = start, yend = 150), colour = "grey", alpha = 1, size = 0.3, inherit.aes = FALSE) +
geom_segment(data = grid_data, aes(x = end, y = 200, xend = start, yend = 200), colour = "grey", alpha = 1, size = 0.3, inherit.aes = FALSE) +
# Add text showing the value of each 100/75/50/25 lines
ggplot2::annotate("text", x = rep(max(data$id), 5), y = c(0, 50, 100, 150, 200), label = c("0", "50", "100", "150", "200"), color = "grey", size = 6, angle = 0, fontface = "bold", hjust = 1) +
ylim(-150, max(label_data$tot, na.rm = T)) +
theme_minimal() +
theme(
legend.position = "none",
axis.text = element_blank(),
axis.title = element_blank(),
panel.grid = element_blank(),
plot.margin = unit(rep(-1, 4), "cm")
) +
coord_polar() +
# Add labels on top of each bar
geom_text(data = label_data, aes(x = id, y = tot + 10, label = individual, hjust = hjust), color = "black", fontface = "bold", alpha = 0.6, size = 5, angle = label_data$angle, inherit.aes = FALSE) +
# Add base line information
geom_segment(data = base_data, aes(x = start, y = -5, xend = end, yend = -5), colour = "black", alpha = 0.8, size = 0.6, inherit.aes = FALSE) +
geom_text(data = base_data, aes(x = title, y = -18, label = group), hjust = c(1, 1, 0, 0), colour = "black", alpha = 0.8, size = 4, fontface = "bold", inherit.aes = FALSE)
#> Warning: Removed 24 rows containing missing values (position_stack).
#> Warning: Removed 9 rows containing missing values (geom_text).
Related
I am trying to reproduce this figure (without the Portugal highlight):
The data (and figure) can be found in this link: https://stat.link/uz49al.
I imported and reshaped the data into a long format, but then I got stuck on how it would be possible to rearrange the legend entries in the same order as in the original.
I would very much appreciate your help!
Thanks!
Here is where I got:
# load data
f5_5_data_before <-
read_excel("uz49al.xlsx", sheet = "Figure1.20", range = "A32:E68")
names(f5_5_data_before)[1] <- "Country"
names(f5_5_data_before)[2] <- "Odds_ratio"
names(f5_5_data_before)[3] <- "SE"
names(f5_5_data_before)[4] <- "sig"
names(f5_5_data_before)[5] <- "non_sig"
f5_5_data_before$Country <- as.factor(f5_5_data_before$Country)
f5_5_data_before <- f5_5_data_before %>%
mutate(
category = case_when(
is.na(sig) ~ "Non-significant",
!is.na(sig) ~ "Significant"
),
value = case_when(
category == "Non-significant" ~ non_sig,
category == "Significant" ~ sig
)
)
f5_5_data_before$group2 <- "Before accounting for reading performance"
f5_5_data_after <-
read_excel("uz49al.xlsx", sheet = "Figure1.20", range = "A32:I68")
f5_5_data_after <- f5_5_data_after[, c(1, 6:9)]
names(f5_5_data_after)[1] <- "Country"
names(f5_5_data_after)[2] <- "Odds_ratio"
names(f5_5_data_after)[3] <- "SE"
names(f5_5_data_after)[4] <- "sig"
names(f5_5_data_after)[5] <- "non_sig"
f5_5_data_after$Country <- as.factor(f5_5_data_after$Country)
f5_5_data_after <- f5_5_data_after %>%
mutate(
category = case_when(
is.na(sig) ~ "Non-significant",
!is.na(sig) ~ "Significant"
),
value = case_when(
category == "Non-significant" ~ non_sig,
category == "Significant" ~ sig
)
)
f5_5_data_after$group2 <- "After accounting for reading performance"
# appending in long format
f5_5_data <- rbind(f5_5_data_after, f5_5_data_before)
# shaded rectangle
rect1 <- data.frame(
xmin = 14.5,
xmax = 15.5,
ymin = -Inf,
ymax = Inf
)
# figure
f5_5 <- ggplot() +
geom_col(data = f5_5_data %>% filter(group2 == "After accounting for reading performance"),
aes(x = reorder(Country,-Odds_ratio),
y = value,
fill = category,
colour = group2),
width=0.5,
) +
geom_point(
data = f5_5_data %>% filter(group2 == "Before accounting for reading performance"),
aes(x = Country,
y = value,
fill = category,
colour = group2),
shape = 23,
size = 3,
) +
geom_rect(
data = rect1,
aes(
xmin = xmin,
xmax = xmax,
ymin = ymin,
ymax = ymax
),
alpha = 0.5,
inherit.aes = FALSE
) +
scale_y_continuous(breaks = pretty_breaks(),
limits = c(0, 25),
expand = c(0, 0)) +
labs(x = NULL,
y = "Odds ratio") +
theme(axis.text.x = element_text(angle = 90))
print(f5_5)
This yields the following output:
As you can see, the legend looks substantially different and essentially I got stuck.
One option to achieve your desired result would be via the ggnewscale package which allows for multiple scales for the same aesthetic. Doing so we could map category on the fill aes in both the geom_col and the geom_point but have two different legends:
Note: I simplified your data wrangling code a bit.
library(readxl)
library(dplyr)
library(ggplot2)
library(ggnewscale)
url <- "https://stat.link/uz49al"
download.file(url, destfile = "uz49al.xlsx")
dat <- read_excel("uz49al.xlsx", sheet = "Figure1.20", range = "A32:I68")
dat <- list(
before = setNames(dat[, 1:5], c("Country", "Odds_ratio", "SE", "sig", "non_sig")),
after = setNames(dat[, c(1, 6:9)], c("Country", "Odds_ratio", "SE", "sig", "non_sig"))
) %>%
bind_rows(.id = "group2")
dat <- dat %>%
mutate(
category = if_else(is.na(sig), "nonsig", "sig"),
value = if_else(is.na(sig), non_sig, sig)
) %>%
select(-sig, -non_sig)
group2_labels <- c(after = "After accounting for reading performance", before = "Before accounting for reading performance")
rect1 <- data.frame(xmin = 14.5, xmax = 15.5, ymin = -Inf, ymax = Inf)
ggplot(dat, aes(x = reorder(Country,-Odds_ratio), y = value)) +
geom_col(data = ~filter(.x, group2 == "after"), aes(fill = category), width = 0.5) +
scale_fill_manual(labels = NULL, values = c(sig = "darkblue", nonsig = "steelblue"),
name = group2_labels[["after"]], guide = guide_legend(title.position = "right")) +
new_scale_fill() +
geom_point(data = ~filter(.x, group2 == "before"), aes(fill = category), size = 3, shape = 23, color = "lightblue") +
geom_rect(data = rect1, aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax),
alpha = 0.5, inherit.aes = FALSE) +
scale_fill_manual(labels = NULL, values = c(nonsig = "white", sig = "lightblue"), breaks = c("sig", "nonsig"),
name = group2_labels[["before"]], guide = guide_legend(title.position = "right")) +
scale_y_continuous(breaks = scales::pretty_breaks(), limits = c(0, 25), expand = c(0, 0)) +
labs(x = NULL, y = "Odds ratio") +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
legend.position = "top")
Hi and thanks for reading me
I am working with a pie bar chart in ggplot2 and I would like to make it interactive with Ggiraph, but I am not getting this to happen and I do not understand what I am doing wrong, or if it is not possible for this particular case. Anyone know if there is any solution about it?
The code is the following:
data <- data.frame(
stringsAsFactors = FALSE,
individual = c("INDUSTRIAL","DE COMERCIO INTERIOR",
"DE COMERCIO EXTERIOR",
"DE ATRACCION DE INVERSION EXTRANJERA",
"POLĂTICA DE DESARROLLO PARA LA COMPETITIVIDAD DE LAS MIPYMES","DE MEJORA REGULATORIA",
"EN MATERIA MINERA","DE ABASTO","DE PRECIOS",
"DE PROTECCION AL CONSUMIDOR",
"NACIONAL DE CALIDAD",
"NACIONAL EN MATERIA DE NORMALIZACION, ESTANDARIZACION, EVALUACION DE LA",
"CONFORMIDAD Y METROLOGIA",
"DE INDUSTRIALIZACION, DISTRIBUCION Y CONSUMO DE LOS PRODUCTOS AGRICOLAS, GANADEROS, FORESTALES, MINERALES Y PESQUEROS",
"NACIONAL DE FOMENTO ECONOMICO",
"PARA CREAR Y APOYAR EMPRESAS QUE ASOCIEN A GRUPOS DE ESCASOS RECURSOS EN AREAS URBANAS"),
group = c("A","A","A","A","B","B","B","B",
"B","B","B","C","C","C","C","C"),
value1 = c(30L,3L,7L,3L,0L,3L,1L,2L,0L,1L,
7L,5L,1L,12L,0L,4L),
value2 = c(10L,0L,2L,0L,0L,6L,0L,0L,0L,11L,
7L,6L,0L,3L,1L,0L),
value3 = c(0L,0L,1L,2L,14L,2L,1L,0L,0L,0L,
1L,1L,0L,2L,0L,0L),
value4 = c(9L,0L,13L,8L,2L,5L,1L,1L,0L,0L,
0L,1L,2L,2L,0L,0L)
)
data = data %>%
gather(key = "observation", value="value", -c(1,2))
empty_bar=2
nObsType=nlevels(as.factor(data$observation))
to_add = data.frame( matrix(NA, empty_bar*nlevels(data$group)*nObsType, ncol(data)) )
colnames(to_add) = colnames(data)
to_add$group=rep(levels(data$group), each=empty_bar*nObsType )
data=rbind(data, to_add)
data=data %>% arrange(group, individual)
data$id=rep( seq(1, nrow(data)/nObsType) , each=nObsType)
# Get the name and the y position of each label
label_data= data %>% group_by(id, individual) %>% summarize(tot=sum(value))
number_of_bar=nrow(label_data)
angle= 90 - 360 * (label_data$id-0.5) /number_of_bar # I substract 0.5 because the letter must have the angle of the center of the bars. Not extreme right(1) or extreme left (0)
label_data$hjust<-ifelse( angle < -90, 1, 0)
label_data$angle<-ifelse(angle < -90, angle+180, angle)
# prepare a data frame for base lines
base_data=data %>%
group_by(group) %>%
summarize(start=min(id), end=max(id) - empty_bar) %>%
rowwise() %>%
mutate(title=mean(c(start, end)))
# prepare a data frame for grid (scales)
grid_data = base_data
grid_data$end = grid_data$end[ c( nrow(grid_data), 1:nrow(grid_data)-1)] + 1
grid_data$start = grid_data$start - 1
grid_data=grid_data[-1,]
rhg_cols <- c("#12A09A", "#1E5C4F", "#941B80", "#F19100")
# Make the plot
p <- ggplot(data) +
# Add the stacked bar
ggiraph::geom_bar_interactive(aes(x=as.factor(id), y=value, fill=observation), stat="identity", alpha=0.5) +
#scale_fill_viridis(discrete=TRUE) +
scale_fill_manual(values = rhg_cols)+
# Add a val=100/75/50/25 lines. I do it at the beginning to make sur barplots are OVER it.
geom_segment(data=grid_data, aes(x = end, y = 0, xend = start, yend = 0), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
geom_segment(data=grid_data, aes(x = end, y = 50, xend = start, yend = 50), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
geom_segment(data=grid_data, aes(x = end, y = 100, xend = start, yend = 100), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
geom_segment(data=grid_data, aes(x = end, y = 150, xend = start, yend = 150), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
geom_segment(data=grid_data, aes(x = end, y = 200, xend = start, yend = 200), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
# Add text showing the value of each 100/75/50/25 lines
annotate("text", x = rep(max(data$id),5), y = c(0, 50, 100, 150, 200), label = c("0", "50", "100", "150", "200") , color="grey", size=2 , angle=0, fontface="bold", hjust=1) +
ylim(-25,max(label_data$tot, na.rm=T)) +
theme_minimal() +
theme(
legend.position = "none",
axis.text = element_blank(),
axis.title = element_blank(),
panel.grid = element_blank(),
plot.margin = unit(rep(-1,4), "cm")
) +
coord_polar() +
# Add labels on top of each bar
geom_text(data=label_data, aes(x=id, y=tot+3, label=individual, hjust=hjust), color="black", fontface="bold",alpha=0.6, size=2, angle= label_data$angle, inherit.aes = FALSE )
# Add base line information
#geom_segment(data=base_data, aes(x = start, y = -5, xend = end, yend = -5), colour = "black", alpha=0.8, size=0.6 , inherit.aes = FALSE )
#geom_text(data=base_data, aes(x = title, y = -18, label=group), hjust=c(1,1,0,0), colour = "black", alpha=0.8, size=4, fontface="bold", inherit.aes = FALSE)
p
# interactive plot
girafe(
ggobj = p,
width_svg = 7, height_svg = 4, bg = "#D7E0DA",
options = list(
opts_hover(css = "fill:#5eba7d88;cursor:pointer;"),
opts_hover_theme(css = "fill:red;cursor:pointer;"),
opts_selection(css = "fill:#5eba7d;cursor:pointer;", only_shiny = FALSE, selected = "J"),
opts_tooltip(css = "background-color:white;padding:5px;border-radius:2px;border: black 1px solid;color:black;")
)
)
UPDATE
Now I use geom_col (geom_col_interactive) but when I exported the object to girafe() nothing appears in the viewfinder
The code is the following:
p <- ggplot(data) +
geom_col_interactive(aes(x=as.factor(id), y=value, fill=observation), stat="identity", alpha=0.5) +
scale_fill_manual_interactive(values = rhg_cols)+
annotate("text", x = rep(max(data$id),5), y = c(0, 50, 100, 150, 200), label = c("0", "50", "100", "150", "200") ,
color="grey", size=2 , angle=0, fontface="bold", hjust=1) +
ylim(-25,max(label_data$tot, na.rm=T)) +
theme_minimal() +
theme(
legend.position = "none",
axis.text = element_blank(),
axis.title = element_blank(),
panel.grid = element_blank(),
plot.margin = unit(rep(-1,4), "cm")
) +
coord_polar() +
geom_text_interactive(data=label_data, aes(x=id, y=tot+3, label=individual, hjust=hjust), color="black", fontface="bold",alpha=0.6,
size=2, angle= label_data$angle, inherit.aes = FALSE )
p
girafe(
ggobj = p,
width_svg = 7, height_svg = 4, bg = "#D7E0DA",
options = list(
opts_hover(css = "fill:#5eba7d88;cursor:pointer;"),
opts_hover_theme(css = "fill:red;cursor:pointer;"),
opts_selection(css = "fill:#5eba7d;cursor:pointer;", only_shiny = FALSE, selected = "J"),
opts_tooltip(css = "background-color:white;padding:5px;border-radius:2px;border: black 1px solid;color:black;")
)
)
There are ggplot issues in your code, for example, geom_bar should not be used with a y, use geom_col instead.
With a simple example, here is how to do (from https://www.r-graph-gallery.com/128-ring-or-donut-plot.html)
# load library
library(ggplot2)
library(ggiraph)
# Create test data.
data <- data.frame(
category=c("A", "B", "C"),
count=c(10, 60, 30)
)
# Compute percentages
data$fraction = data$count / sum(data$count)
# Compute the cumulative percentages (top of each rectangle)
data$ymax = cumsum(data$fraction)
# Compute the bottom of each rectangle
data$ymin = c(0, head(data$ymax, n=-1))
# Make the plot
p <- ggplot(data, aes(ymax=ymax, ymin=ymin, xmax=4, xmin=3, fill=category)) +
geom_rect_interactive(aes(tooltip = paste(category,count), data_id = category)) +
coord_polar(theta="y") + # Try to remove that to understand how the chart is built initially
xlim(c(2, 4)) # Try to remove that to see how to make a pie chart
girafe(
ggobj = p,
width_svg = 7, height_svg = 4, bg = "#D7E0DA",
options = list(
opts_hover(css = "fill:#5eba7d88;cursor:pointer;"),
opts_hover_theme(css = "fill:red;cursor:pointer;"),
opts_selection(css = "fill:#5eba7d;cursor:pointer;", only_shiny = FALSE, selected = "J"),
opts_tooltip(css = "background-color:white;padding:5px;border-radius:2px;border: black 1px solid;color:black;")
)
)
I got an error in R, when tried to plot the circular barplot. This is the error - Error: Aesthetics must be either length 1 or the same as the data (9):
This is the structure of my data, just limited one:
structure(list(Symptom = c("Chills", "Chills", "Chills", "Cough",
"Cough", "Cough"), Morbidity = c("Asthma (managed with an inhaler)",
"Asthma (managed with an inhaler)", "Asthma (managed with an inhaler)",
"Asthma (managed with an inhaler)", "Asthma (managed with an inhaler)",
"Asthma (managed with an inhaler)"), Severity = c("Mild", "Moderate",
"Severe", "Mild", "Moderate", "Severe"), Count = c(264L, 53L,
19L, 853L, 158L, 27L), id = c(1L, 1L, 1L, 2L, 2L, 2L)), row.names = c(NA,
6L), class = "data.frame")
And this is what I how I tried to plot:
gather_divided <- data.frame(gather_divided)
gather_divided <- gather_divided %>%
dplyr::select(Symptom, Morbidity, Severity, Count)
# Set a number of 'empty bar' to add at the end of each group
empty_bar <- 2
nObsType <- nlevels(as.factor(gather_divided$Severity))
nObsType
to_add <- data.frame(matrix(NA, empty_bar*nlevels(gather_divided$Morbidity)*nObsType, ncol(gather_divided)) )
colnames(to_add) <- colnames(gather_divided)
to_add$Morbidity <- rep(levels(gather_divided$Morbidity), each=empty_bar*nObsType )
gather_divided <- rbind(gather_divided, to_add)
gather_divided <- gather_divided %>% arrange(Morbidity, Symptom)
gather_divided$id <- rep( seq(1, nrow(gather_divided)/nObsType) , each=nObsType)
# Get the name and the y position of each label
label_data <- gather_divided %>% group_by(id, Symptom) %>% summarize(tot=sum(Count))
number_of_bar <- nrow(label_data)
angle <- 90 - 360 * (label_data$id-0.5) /number_of_bar # I substract 0.5 because the letter must have the angle of the center of the bars. Not extreme right(1) or extreme left (0)
label_data$hjust <- ifelse( angle < -90, 1, 0)
label_data$angle <- ifelse(angle < -90, angle+180, angle)
# prepare a data frame for base lines
base_data <- gather_divided %>%
group_by(Morbidity) %>%
summarize(start=min(id), end=max(id) - empty_bar) %>%
rowwise() %>%
mutate(title=mean(c(start, end)))
# prepare a data frame for grid (scales)
grid_data <- base_data
grid_data$end <- grid_data$end[ c( nrow(grid_data), 1:nrow(grid_data)-1)] + 1
grid_data$start <- grid_data$start - 1
grid_data <- grid_data[-1,]
# Make the plot
p <- ggplot(gather_divided) +
# Add the stacked bar
geom_bar(aes(x=as.factor(id), y=Count, fill=Morbidity), stat="identity", alpha=0.5) +
scale_fill_viridis(discrete=TRUE) +
# Add a val=100/75/50/25 lines. I do it at the beginning to make sur barplots are OVER it.
geom_segment(data=grid_data, aes(x = end, y = 0, xend = start, yend = 0), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
geom_segment(data=grid_data, aes(x = end, y = 500, xend = start, yend = 500), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
geom_segment(data=grid_data, aes(x = end, y = 1000, xend = start, yend = 1000), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
geom_segment(data=grid_data, aes(x = end, y = 1500, xend = start, yend = 1500), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
geom_segment(data=grid_data, aes(x = end, y = 2000, xend = start, yend = 2000), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
# Add text showing the value of each 100/75/50/25 lines
ggplot2::annotate("text", x = rep(max(gather_divided$id),5), y = c(0, 500, 1000, 1500, 2000),
label = c("0", "500", "1000", "1500", "2000") , color="grey", size=6 , angle=0, fontface="bold", hjust=1) +
ylim(-150,max(label_data$tot, na.rm=T)) +
theme_minimal() +
theme(
legend.position = "none",
axis.text = element_blank(),
axis.title = element_blank(),
panel.grid = element_blank(),
plot.margin = unit(rep(-1,4), "cm")
) +
coord_polar() +
# Add labels on top of each bar
geom_text(data=label_data, aes(x=id, y=tot+10, label= Symptom, hjust=hjust), color="black", fontface="bold",alpha=0.6, size=5, angle= label_data$angle, inherit.aes = FALSE ) +
# Add base line information
geom_segment(data=base_data, aes(x = start, y = -5, xend = end, yend = -5), colour = "black", alpha=0.8, size=0.6 , inherit.aes = FALSE ) +
geom_text(data=base_data, aes(x = title, y = -18, label=Morbidity), hjust=c(1,1,0,0), colour = "black", alpha=0.8, size=4, fontface="bold", inherit.aes = FALSE)
p
Did someone else tried to plot a circular marplot and faced the same issue? How to solve this issue? I am hoping to solve it. Thank you in advance for this.
I try to run the code for a circular barplot provided by https://www.r-graph-gallery.com/299-circular-stacked-barplot.html
# library
library(tidyverse)
library(viridis)
# Create dataset
data <- data.frame(
individual=paste( "Mister ", seq(1,60), sep=""),
group=c( rep('A', 10), rep('B', 30), rep('C', 14), rep('D', 6)) ,
value1=sample( seq(10,100), 60, replace=T),
value2=sample( seq(10,100), 60, replace=T),
value3=sample( seq(10,100), 60, replace=T)
)
# Transform data in a tidy format (long format)
data <- data %>% gather(key = "observation", value="value", -c(1,2))
# Set a number of 'empty bar' to add at the end of each group
empty_bar <- 2
nObsType <- nlevels(as.factor(data$observation))
to_add <- data.frame( matrix(NA, empty_bar*nlevels(data$group)*nObsType, ncol(data)) )
colnames(to_add) <- colnames(data)
to_add$group <- rep(levels(data$group), each=empty_bar*nObsType )
data <- rbind(data, to_add)
data <- data %>% arrange(group, individual)
data$id <- rep( seq(1, nrow(data)/nObsType) , each=nObsType)
# Get the name and the y position of each label
label_data <- data %>% group_by(id, individual) %>% summarize(tot=sum(value))
number_of_bar <- nrow(label_data)
angle <- 90 - 360 * (label_data$id-0.5) /number_of_bar # I substract 0.5 because the letter must have the angle of the center of the bars. Not extreme right(1) or extreme left (0)
label_data$hjust <- ifelse( angle < -90, 1, 0)
label_data$angle <- ifelse(angle < -90, angle+180, angle)
# prepare a data frame for base lines
base_data <- data %>%
group_by(group) %>%
summarize(start=min(id), end=max(id) - empty_bar) %>%
rowwise() %>%
mutate(title=mean(c(start, end)))
# prepare a data frame for grid (scales)
grid_data <- base_data
grid_data$end <- grid_data$end[ c( nrow(grid_data), 1:nrow(grid_data)-1)] + 1
grid_data$start <- grid_data$start - 1
grid_data <- grid_data[-1,]
# Make the plot
p <- ggplot(data) +
# Add the stacked bar
geom_bar(aes(x=as.factor(id), y=value, fill=observation), stat="identity", alpha=0.5) +
scale_fill_viridis(discrete=TRUE) +
# Add a val=100/75/50/25 lines. I do it at the beginning to make sur barplots are OVER it.
geom_segment(data=grid_data, aes(x = end, y = 0, xend = start, yend = 0), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
geom_segment(data=grid_data, aes(x = end, y = 50, xend = start, yend = 50), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
geom_segment(data=grid_data, aes(x = end, y = 100, xend = start, yend = 100), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
geom_segment(data=grid_data, aes(x = end, y = 150, xend = start, yend = 150), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
geom_segment(data=grid_data, aes(x = end, y = 200, xend = start, yend = 200), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
# Add text showing the value of each 100/75/50/25 lines
ggplot2::annotate("text", x = rep(max(data$id),5), y = c(0, 50, 100, 150, 200), label = c("0", "50", "100", "150", "200") , color="grey", size=6 , angle=0, fontface="bold", hjust=1) +
ylim(-150,max(label_data$tot, na.rm=T)) +
theme_minimal() +
theme(
legend.position = "none",
axis.text = element_blank(),
axis.title = element_blank(),
panel.grid = element_blank(),
plot.margin = unit(rep(-1,4), "cm")
) +
coord_polar() +
# Add labels on top of each bar
geom_text(data=label_data, aes(x=id, y=tot+10, label=individual, hjust=hjust), color="black", fontface="bold",alpha=0.6, size=5, angle= label_data$angle, inherit.aes = FALSE ) +
# Add base line information
geom_segment(data=base_data, aes(x = start, y = -5, xend = end, yend = -5), colour = "black", alpha=0.8, size=0.6 , inherit.aes = FALSE ) +
geom_text(data=base_data, aes(x = title, y = -18, label=group), hjust=c(1,1,0,0), colour = "black", alpha=0.8, size=4, fontface="bold", inherit.aes = FALSE)
# Save at png
ggsave(p, file="output.png", width=10, height=10)
Then, I ran into several error messages.
Three from running the code under
Get the name and the y position of each label
Warning message:
Factor individual contains implicit NA, consider using forcats::fct_explicit_na
Error in $<-.data.frame(*tmp*, hjust, value = logical(0)) :
replacement has 0 rows, data has 1
Error in $<-.data.frame(*tmp*, angle, value = logical(0)) :
replacement has 0 rows, data has 1
And two from
Make the plot
Warning message:
In max(label_data$tot, na.rm = T) :
no non-missing arguments to max; returning -Inf
Error: Aesthetics must be either length 1 or the same as the data (1): x, xend
Can anyone help me with these errors?
Thanks in advance
I saw this great plot from fivethirty that has a slight overlap of density plots for different colleges. Check out this link at fivethirtyeight.com
How would you replicate this plot with ggplot2?
Specifically how would you get that slight overlap, facet_wrap isn't going to work.
TestFrame <-
data.frame(
Score =
c(rnorm(100, 0, 1)
,rnorm(100, 0, 2)
,rnorm(100, 0, 3)
,rnorm(100, 0, 4)
,rnorm(100, 0, 5))
,Group =
c(rep('Ones', 100)
,rep('Twos', 100)
,rep('Threes', 100)
,rep('Fours', 100)
,rep('Fives', 100))
)
ggplot(TestFrame, aes(x = Score, group = Group)) +
geom_density(alpha = .75, fill = 'black')
As always with ggplot, the key is getting the data in the right format, and then the plotting is pretty straightforward. I'm sure there would be another way to do this, but my approach was to do the density estimation with density() and then to make a sort of manual geom_density() with geom_ribbon(), which takes a ymin and ymax, necessary for moving the shape off the x axis.
The rest of the challenge was in getting the order of the printing correct, since it seems that ggplot will print the widest ribbon first. In the end, the part that requires the bulkiest code is the production of the quartiles.
I also produced some data that is a bit more consistent with the original figure.
library(ggplot2)
library(dplyr)
library(broom)
rawdata <- data.frame(Score = rnorm(1000, seq(1, 0, length.out = 10), sd = 1),
Group = rep(LETTERS[1:10], 10000))
df <- rawdata %>%
mutate(GroupNum = rev(as.numeric(Group))) %>% #rev() means the ordering will be from top to bottom
group_by(Group, GroupNum) %>%
do(tidy(density(.$Score, bw = diff(range(.$Score))/20))) %>% #The original has quite a large bandwidth
group_by() %>%
mutate(ymin = GroupNum * (max(y) / 1.5), #This constant controls how much overlap between groups there is
ymax = y + ymin,
ylabel = ymin + min(ymin)/2,
xlabel = min(x) - mean(range(x))/2) #This constant controls how far to the left the labels are
#Get quartiles
labels <- rawdata %>%
mutate(GroupNum = rev(as.numeric(Group))) %>%
group_by(Group, GroupNum) %>%
mutate(q1 = quantile(Score)[2],
median = quantile(Score)[3],
q3 = quantile(Score)[4]) %>%
filter(row_number() == 1) %>%
select(-Score) %>%
left_join(df) %>%
mutate(xmed = x[which.min(abs(x - median))],
yminmed = ymin[which.min(abs(x - median))],
ymaxmed = ymax[which.min(abs(x - median))]) %>%
filter(row_number() == 1)
p <- ggplot(df, aes(x, ymin = ymin, ymax = ymax)) + geom_text(data = labels, aes(xlabel, ylabel, label = Group)) +
geom_vline(xintercept = 0, size = 1.5, alpha = 0.5, colour = "#626262") +
geom_vline(xintercept = c(-2.5, -1.25, 1.25, 2.5), size = 0.75, alpha = 0.25, colour = "#626262") +
theme(panel.grid = element_blank(),
panel.background = element_rect(fill = "#F0F0F0"),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
axis.title = element_blank())
for (i in unique(df$GroupNum)) {
p <- p + geom_ribbon(data = df[df$GroupNum == i,], aes(group = GroupNum), colour = "#F0F0F0", fill = "black") +
geom_segment(data = labels[labels$GroupNum == i,], aes(x = xmed, xend = xmed, y = yminmed, yend = ymaxmed), colour = "#F0F0F0", linetype = "dashed") +
geom_segment(data = labels[labels$GroupNum == i,], x = min(df$x), xend = max(df$x), aes(y = ymin, yend = ymin), size = 1.5, lineend = "round")
}
p <- p + geom_text(data = labels[labels$Group == "A",], aes(xmed - xlabel/50, ylabel),
label = "Median", colour = "#F0F0F0", hjust = 0, fontface = "italic", size = 4)
Edit
I noticed the original actually does a bit of fudging by stretching out each distribution with a horizontal line (you can see a join if you look closely...). I added something similar with the second geom_segment() in the loop.
Although there is a great & accepted answer available already - I finished my contribution as an alternative avenue without data reformatting.
TestFrame <-
data.frame(
Score =
c(rnorm(50, 3, 2)+rnorm(50, -1, 3)
,rnorm(50, 3, 2)+rnorm(50, -2, 3)
,rnorm(50, 3, 2)+rnorm(50, -3, 3)
,rnorm(50, 3, 2)+rnorm(50, -4, 3)
,rnorm(50, 3, 2)+rnorm(50, -5, 3))
,Group =
c(rep('Ones', 50)
,rep('Twos', 50)
,rep('Threes', 50)
,rep('Fours', 50)
,rep('Fives', 50))
)
require(ggplot2)
require(grid)
spacing=0.05
tm <- theme(legend.position="none", axis.line=element_blank(),axis.text.x=element_blank(),
axis.text.y=element_blank(),axis.ticks=element_blank(),
axis.title.x=element_blank(),axis.title.y=element_blank(),
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(),
plot.background = element_rect(fill = "transparent",colour = NA),
plot.margin = unit(c(0,0,0,0),"mm"))
firstQuintile = quantile(TestFrame$Score,0.2)
secondQuintile = quantile(TestFrame$Score,0.4)
median = quantile(TestFrame$Score,0.5)
thirdQuintile = quantile(TestFrame$Score,0.6)
fourthQuintile = quantile(TestFrame$Score,0.8)
ymax <- 1.5*max(density(TestFrame[TestFrame$Group=="Ones",]$Score)$y)
xmax <- 1.2*max(TestFrame$Score)
xmin <- 1.2*min(TestFrame$Score)
p0 <- ggplot(TestFrame[TestFrame$Group=="Ones",], aes(x = Score, group = Group)) + geom_density(fill = "transparent",colour = NA)+ylim(0-5*spacing,ymax)+xlim(xmin,xmax)+tm
p0 <- p0 + geom_vline(aes(xintercept=firstQuintile),color="gray",size=1.2)
p0 <- p0 + geom_vline(aes(xintercept=secondQuintile),color="gray",size=1.2)
p0 <- p0 + geom_vline(aes(xintercept=thirdQuintile),color="gray",size=1.2)
p0 <- p0 + geom_vline(aes(xintercept=fourthQuintile),color="gray",size=1.2)
p0 <- p0 + geom_vline(aes(xintercept=median),color="darkgray",size=2)
#previous line is a little hack for creating a working empty grid with proper sizing
p1 <- ggplot(TestFrame[TestFrame$Group=="Ones",], aes(x = Score, group = Group)) + geom_density(alpha = .85, fill = 'black', color="white",size=1)+tm+ylim(0,ymax)+xlim(xmin,xmax)+ geom_segment(aes(y=0,x=median(Score),yend=max(density(Score)$y),xend=median(Score)), color="white", linetype=2)
p2 <- ggplot(TestFrame[TestFrame$Group=="Twos",], aes(x = Score, group = Group)) + geom_density(alpha = .85, fill = 'black', color="white",size=1)+tm+ylim(0,ymax)+xlim(xmin,xmax)+ geom_segment(aes(y=0,x=median(Score),yend=max(density(Score)$y),xend=median(Score)), color="white", linetype=2)
p3 <- ggplot(TestFrame[TestFrame$Group=="Threes",], aes(x = Score, group = Group)) + geom_density(alpha = .85, fill = 'black', color="white",size=1)+tm+ylim(0,ymax)+xlim(xmin,xmax)+ geom_segment(aes(y=0,x=median(Score),yend=max(density(Score)$y),xend=median(Score)), color="white", linetype=2)
p4 <- ggplot(TestFrame[TestFrame$Group=="Fours",], aes(x = Score, group = Group)) + geom_density(alpha = .85, fill = 'black', color="white",size=1)+tm+ylim(0,ymax)+xlim(xmin,xmax)+ geom_segment(aes(y=0,x=median(Score),yend=max(density(Score)$y),xend=median(Score)), color="white", linetype=2)
p5 <- ggplot(TestFrame[TestFrame$Group=="Fives",], aes(x = Score, group = Group)) + geom_density(alpha = .85, fill = 'black', color="white",size=1)+tm+ylim(0,ymax)+xlim(xmin,xmax)+ geom_segment(aes(y=0,x=median(Score),yend=max(density(Score)$y),xend=median(Score)), color="white", linetype=2)
f <- grobTree(ggplotGrob(p1))
g <- grobTree(ggplotGrob(p2))
h <- grobTree(ggplotGrob(p3))
i <- grobTree(ggplotGrob(p4))
j <- grobTree(ggplotGrob(p5))
a1 <- annotation_custom(grob = f, xmin = xmin, xmax = xmax,ymin = -spacing, ymax = ymax)
a2 <- annotation_custom(grob = g, xmin = xmin, xmax = xmax,ymin = -spacing*2, ymax = ymax-spacing)
a3 <- annotation_custom(grob = h, xmin = xmin, xmax = xmax,ymin = -spacing*3, ymax = ymax-spacing*2)
a4 <- annotation_custom(grob = i, xmin = xmin, xmax = xmax,ymin = -spacing*4, ymax = ymax-spacing*3)
a5 <- annotation_custom(grob = j, xmin = xmin, xmax = xmax,ymin = -spacing*5, ymax = ymax-spacing*4)
pfinal <- p0 + a1 + a2 + a3 + a4 + a5
pfinal
Using dedicated geom_joy() from ggjoy package:
library(ggjoy)
ggplot(TestFrame, aes(Score, Group)) +
geom_joy()
# dummy data
set.seed(1)
TestFrame <-
data.frame(
Score =
c(rnorm(100, 0, 1)
,rnorm(100, 0, 2)
,rnorm(100, 0, 3)
,rnorm(100, 0, 4)
,rnorm(100, 0, 5))
,Group =
c(rep('Ones', 100)
,rep('Twos', 100)
,rep('Threes', 100)
,rep('Fours', 100)
,rep('Fives', 100))
)
head(TestFrame)
# Score Group
# 1 -0.6264538 Ones
# 2 0.1836433 Ones
# 3 -0.8356286 Ones
# 4 1.5952808 Ones
# 5 0.3295078 Ones
# 6 -0.8204684 Ones