Creating a mosaic plot with percentages - r

Following is the sample dataset that I have:
df <- structure(list(Class = c("A", "B", "C", "D"),
`Attempted` = c(374, 820, 31, 108),
`Missed` = c(291, 311, 5, 15),
`Cancelled` = c(330, 206, 6, 5),
`Unknown` = c(950, 341, 6, 13)),
class = "data.frame", row.names = c(NA, -4L))
I want to create a mosaic plot with 'percentages' instead of absolute numbers. To be precise, I want to see what percentage of 'class A' people out of the total 'class A' population 'missed' their test? And, similarly for other class population.
I have not tried any code yet as I have absolutely no clue how to start. Can anyone please help me with this?

Using only one package, you can do and note I am labeling the cells with the proportions in each class (i.e rows sum up to 1):
library(vcd)
M = as.table(as.matrix(df[,-1]))
names(dimnames(M)) = c("Class","result")
labs <- round(prop.table(M,margin=1), 2)
mosaic(M, pop = FALSE)
labeling_cells(text = labs, margin = 0)(M)
You can also just visualize it with a simple
library(RColorBrewer)
barplot(t(labs),col=brewer.pal(4,"Set2"))
legend("bottomright",legend = colnames(labs),inset=c(0,1.1), xpd=TRUE,
fill =brewer.pal(4,"Set2"),horiz=TRUE,cex=0.7)
If you use ggplot2 and another other gg stuff, you need to pivot your data long:
library(tidyr)
library(dplyr)
library(ggplot2)
df_long = df %>%
pivot_longer(-Class) %>%
group_by(Class) %>%
mutate(total = sum(value),
p = round(100*value/total,digits=1)) %>%
ungroup()
ggplot(df_long,aes(x=Class,y=p,fill=name)) + geom_col() + geom_text(aes(label=p),position=position_stack(vjust=0.2))
If you want to use ggplot2, you need to modify this answer by z.lin, note I take the sqrt to make the smaller plots more visible:
ggplot(df_long,
aes(x = Class, y = p, width = sqrt(total), fill = name)) +
geom_col(colour = "black") +
geom_text(aes(label = p), position = position_stack(vjust = 0.5)) +
facet_grid(~Class, scales = "free_x", space = "free_x") +
theme_void()

Related

How to get r to not remove a row in ggplot - geom_line

I'm trying to produce a graph of growth rates over time based upon the following data which has blanks in two groups.
When I try to make a growth plot of this using geom_line to join points there is no line for group c.
I'm just wondering if there is anyway to fix this
One option would be to get rid of the missing values which prevent the points to be connected by the line:
Making use of the code from the answer I provided on your previous question but adding tidyr::drop_na:
Growthplot <- data.frame(
Site = letters[1:4],
July = 0,
August = c(1, -1, NA, 2),
September = c(3, 2, 3, NA)
)
library(ggplot2)
library(tidyr)
library(dplyr, warn=FALSE)
growth_df <- Growthplot %>%
pivot_longer(-Site, names_to = "Month", values_to = "Length") %>%
mutate(Month = factor(Month, levels = c("July", "August", "September"))) %>%
drop_na()
ggplot(growth_df, aes(x = Month, y = Length, colour = Site, group = Site)) +
geom_point() +
geom_line()+
labs(color = "Site", x = "Month", y = "Growth in cm") +
theme(axis.line = element_line(colour = "black", size = 0.24))

Adding geom_line between data points with different geom_boxplot fill variable

Hi I have a much larger data frame but a sample dummy df is as follows:
set.seed(23)
df = data.frame(name = c(rep("Bob",8),rep("Tom",8)),
topic = c(rep(c("Reading","Writing"),8)),
subject = c(rep(c("English","English","Spanish","Spanish"),4)),
exam = c(rep("First",4),rep("Second",4),rep("First",4),rep("Second",4)),
score = sample(1:100,16))
I have to plot it in the way shown in the picture below (for my original data frame) but with lines connecting the scores corresponding to each name between the first and second class in the exam variable, I tried geom_line(aes(group=name)) but the lines are not connected in the right way. Is there any way to connect the points that also respects the grouping by the fill variable similar to how the position_dodge() helps separate the points by their fill grouping? Thanks a lot!
library(ggplot2)
df %>% ggplot(aes(x=topic,y=score,fill=exam)) +
geom_boxplot(outlier.shape = NA) +
geom_point(size=1.75,position = position_dodge(width = 0.75)) +
facet_grid(~subject,switch = "y")
One option to achieve your desired result would be to group the lines by name and topic and do the dodging of lines manually instead of relying on position_dogde. To this end convert topic to a numeric for the geom_line and shift the position by the necessary amount to align the lines with the dodged points:
set.seed(23)
df <- data.frame(
name = c(rep("Bob", 8), rep("Tom", 8)),
topic = c(rep(c("Reading", "Writing"), 8)),
subject = c(rep(c("English", "English", "Spanish", "Spanish"), 4)),
exam = c(rep("First", 4), rep("Second", 4), rep("First", 4), rep("Second", 4)),
score = sample(1:100, 16)
)
library(ggplot2)
ggplot(df, aes(x = topic, y = score, fill = exam)) +
geom_boxplot(outlier.shape = NA) +
geom_point(size = 1.75, position = position_dodge(width = 0.75)) +
geom_line(aes(
x = as.numeric(factor(topic)) + .75 / 4 * ifelse(exam == "First", -1, 1),
group = interaction(name, topic)
)) +
facet_grid(~subject, switch = "y")

Ggplot - always place 'Total' bar as the farthest right bar using geom_col

I am creating a chart that looks like the below. Problem is that I'd like the grey 'total' bar to always be on the far right hand side.
Current code is below, can anyone please amend/provide any additional code to create this effect?
#plot with reorder
PrevalencePlot <- ggplot(ICSTable4, aes(x = reorder(value, Area), y = value, fill = Statistical_Significance)) +
geom_col() +
scale_fill_manual(values = colours)+
geom_errorbar(aes(ymin=errorbarlowerplot, ymax=errorbarhigherplot),
width=.2, # Width of the error bars
position=position_dodge(.9)) +
theme_bw() +
geom_text(aes(label = valuelabel), vjust = 2.5, colour = "black")+
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
If anyone is able to help then the below data frame could be used to generate the principle I think? Thank you!
df <- data.frame(Area = c("Area1", "Area2", "Area3", "Area4", "Total"),
Value = c(1, 3, 7, 5, 4)
)
Building on the minimal example data, we can make a spartanic version of the plot that addresses the question of ordering the values, and placing a selected column at the end.
df <- data.frame(Area = c("Area1", "Area2", "Area3", "Area4", "Total"),
value = c(1, 3, 7, 5, 4),
Statistical_Significance = c("higher", "lower", "lower", "higher", NA))
It's easier to create the order of the columns before plotting, as we need to create the factors based on the order of value and then reposition the target column ("Total").
df <- df %>%
dplyr::arrange(desc(value)) %>% #arrange by value
dplyr::mutate(Area = forcats::as_factor(Area)) %>% # factor that defines order on x-axis
dplyr::mutate(Area = forcats::fct_relevel(Area, "Total", after = Inf)) # reposition "Total" column
ggplot(df, aes(x = Area, y = value, fill = Statistical_Significance)) +
geom_col() +
theme_bw()

Avoid overlap of points on a timeline (1-D repeling)

I want to create a timeline plot that roughly resembles the example below: lots of overlap at some points, not a lot of overlap at others.
What I need: overlapping images should repel each other where necessary, eliminating or reducing overlap. Ideally I'd be able to implement either a vertical or horizontal repel.
library(tidyverse)
library(ggimage)
test_img <- list.files(system.file("extdata", package="ggimage"), pattern="png", full.names=TRUE)
set.seed(123)
df <-
tibble(date = as.Date(paste0("2020-", round(runif(45, 1, 2)), "-", round(runif(45, 1, 10)))),
group = paste0("Timeline ", rep(1:9, each = 5)),
img = sample(test_img, size = 45, replace = T) )
df %>%
ggplot() +
geom_line(aes(x = date, y = group, group = group), size = 5, alpha = 0.2) +
geom_image(aes(x = date, y = group, image = img, group = group), asp = 1)
Something similar to the repelling in ggbeeswarm::geom_beeswarm or ggrepel::geom_text_repel would be nice, but those don't support images. So I think I need to pre-apply some kind of 1-dimensional packing algorithm, implementing iterative pair-wise repulsion on my vector of dates within each group, to try to find a non-overlapping arrangement.
Any ideas? Thank you so much!
Created on 2021-10-30 by the reprex package (v2.0.1)
Here is the solution I’ve been able to come up with, repurposing the circleRepelLayout function from the awesome packcircles package
into the repel_vector vector function that takes in your overlapping vector and a "repel_radius", and returns, if possible, a non-overlapping version.
I demonstrate the solution with the richtext geom since this is a geom I’ve always wished had repel functionality.
library(packcircles)
library(tidyverse)
library(ggtext)
library(ggimage)
repel_vector <- function(vector, repel_radius = 1, repel_bounds = range(vector)){
stopifnot(is.numeric(vector))
repelled_vector <-
packcircles::circleRepelLayout(x = data.frame(vector, ypos = 1, repel_radius),
xysizecols = c("vector", "ypos", "repel_radius"),
xlim = repel_bounds, ylim = c(0,1),
wrap = FALSE) %>%
as.data.frame() %>%
.$layout.x
return(repelled_vector)
}
overlapping_vec <- c(1, 1.1, 1.2, 10, 10.1, 10.2)
repelled_vec_default <- repel_vector(overlapping_vec)
repelled_vec_tighter <- repel_vector(overlapping_vec, repel_radius = 0.35)
ggplot() +
annotate("richtext", x = overlapping_vec, y = 3, label = "**test**", alpha = 0.5) +
annotate("richtext", x = repelled_vec_default, y = 2, label = "**test**", alpha = 0.5) +
annotate("richtext", x = repelled_vec_tighter, y = 1, label = "**test**", alpha = 0.5) +
scale_y_continuous(breaks = 1:3, labels = c("Tighter repel", "Default repel", "Overlapping points"))
In theory you apply this to 2D repelling as well.
To solve the problem in my question, this can be applied like so:
test_img <- list.files(system.file("extdata", package="ggimage"), pattern="png", full.names=TRUE)
set.seed(123)
df <-
tibble(date = as.Date(paste0("2020-", round(runif(45, 1, 2)), "-", round(runif(45, 1, 10)))),
group = paste0("Timeline ", rep(1:9, each = 5)),
img = sample(test_img, size = 45, replace = T) ) %>%
group_by(group) %>%
mutate(repelled_date = repel_vector(as.numeric(date),
repel_radius = 4,
repel_bounds = range(as.numeric(date)) + c(-3,3)),
repelled_date = as.Date(repelled_date, origin = "1970-01-01"))
df %>%
ggplot() +
geom_line(aes(x = date, y = group, group = group), size = 5, alpha = 0.2) +
geom_image(aes(x = repelled_date, y = group, image = img, group = group), asp = 1)
Created on 2021-10-30 by the reprex package (v2.0.1)

Combine text and image in a geom_label_repel in ggplot

I'm trying to do a line graph and have the last point of each series be labelled by a combination of text and image. I usually use ggrepel package for this and have no problem doing this with text only. My problem is I can't figure out how to add an image in the label.
I thought that a label like Country <img src='https://link.com/to/flag.png' width='20'/> would work and so this is what I've tried to do:
library(dplyr)
library(ggplot2)
library(ggrepel)
# example df
df <- data.frame(
Country = c(rep("France", 5), rep("United Kingdom", 5)),
Ratio = rnorm(10),
Days = c(seq(1, 5, 1), seq(4, 8, 1)),
abbr = c(rep("FR", 5), rep("GB", 5))) %>%
group_by(Country) %>%
# add "label" only to last point of the graph
mutate(label = if_else(Days == max(Days),
# combine text and img of country's flag
true = paste0(Country, " <img src='https://raw.githubusercontent.com/behdad/region-flags/gh-pages/png/", abbr, ".png' width='20'/>"),
false = NA_character_)
)
# line graph
ggplot(data = df, aes(x = Days, y = Ratio, color = Country)) +
geom_line(size = 1) +
theme(legend.position = "none") +
geom_label_repel(aes(label = label),
nudge_x = 1,
na.rm = T)
But this produces the raw label and not the country's name with its flag, as intended:
This is obviously not the way to go, can anyone please help me?
Try this approach using ggtext function geom_richtext(). You can customize other elements if you wish. Here the code:
library(dplyr)
library(ggplot2)
library(ggrepel)
library(ggtext)
# example df
df <- data.frame(
Country = c(rep("France", 5), rep("United Kingdom", 5)),
Ratio = rnorm(10),
Days = c(seq(1, 5, 1), seq(4, 8, 1)),
abbr = c(rep("FR", 5), rep("GB", 5))) %>%
group_by(Country) %>%
# add "label" only to last point of the graph
mutate(label = if_else(Days == max(Days),
# combine text and img of country's flag
true = paste0(Country, " <img src='https://raw.githubusercontent.com/behdad/region-flags/gh-pages/png/", abbr, ".png' width='20'/>"),
false = NA_character_)
)
# line graph
ggplot(data = df, aes(x = Days, y = Ratio, color = Country,label = label)) +
geom_line(size = 1) +
theme(legend.position = "none") +
geom_richtext(na.rm = T,nudge_x = -0.1,nudge_y = -0.1)
Output:

Resources