In this experiment, we tracked presence or absence of bacterial infection in our subject animals. We were able to isolate which type of bacteria was present in our animals and created a plot that has Week Since Experiment Start on the X axis, and Percentage of Animals Positive for bacterial infection on the Y axis. This is a stacked identity ggplot where each geom_bar contains the different identities of the bacteria that were in the infected animals each week. Here is a sample dataset with the corresponding ggplot code and result:
DummyData <- data.frame(matrix(ncol = 5, nrow = 78))
colnames(DummyData) <- c('WeeksSinceStart','BacteriaType','PositiveOccurences','SampleSize','NewSampleSize')
DummyData$WeeksSinceStart <- c(1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,9,9,9,9,9,10,10,10,10)
DummyData$BacteriaType <- c("BactA","BactB","BactD","BactB","BactE","BactA","BactS","BactF","BactE","BactH","BactJ","BactK","BactE","BactB","BactS","BactF","BactL","BactE","BactW","BactH","BactS","BactJ","BactQ","BactN","BactW","BactA","BactD","BactE","BactA","BactC","BactD","BactK","BactL","BactE","BactD","BactA","BactS","BactK","BactB","BactE","BactF","BactH","BactN","BactE","BactL","BactZ","BactE","BactC","BactR","BactD","BactJ","BactN","BactK","BactW","BactR","BactE","BactW","BactA","BactM","BactG","BactO","BactI","BactE","BactD","BactM","BactH","BactC","BactM","BactW","BactA","BactL","BactB","BactE","BactA","BactS","BactH","BactQ","BactF")
PosOcc <- seq(from = 1, to = 2, by = 1)
DummyData$PositiveOccurences <- rep(PosOcc, times = 13)
DummyData$SampleSize <- c(78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,29,29,29,29,29,10,10,10,10)
DummyData$NewSampleSize <- c(78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,29,NA,NA,NA,NA,10,NA,NA,NA)
numcolor <- 20
plotcolors <- colorRampPalette(brewer.pal(8, "Set3"))(numcolor)
#GGplot for Dummy Data
DummyDataPlot <- ggplot(DummyData, aes(x = WeeksSinceStart, y = PositiveOccurences/SampleSize, fill = BacteriaType)) + geom_bar(position = "stack", stat = "identity") +
geom_text(label = DummyData$NewSampleSize, nudge_y = 0.1) +
scale_y_continuous(limits = c(0,0.6), breaks = seq(0, 1, by = 0.1)) + scale_x_continuous(limits = c(0.5,11), breaks = seq(0,10, by =1)) +
labs(
x = "Weeks Since Start",
y = "Proportion Positive") +
scale_fill_manual(values = plotcolors)
The problem: I cannot seem to find a way to position the labels from geom_text directly over each bar. I would also love to add the text "n = " to the sample size value directly over each bar. Thank you for your help!
I have tried different values for position_dodge statement and nudge_y statement with no success.
Sometimes the easiest approach is to do some data wrangling, i.e. one option would be to create a separate dataframe for your labels:
library(ggplot2)
library(dplyr)
dat_label <- DummyData |>
group_by(WeeksSinceStart) |>
summarise(y = sum(PositiveOccurences / SampleSize), SampleSize = unique(SampleSize))
ggplot(DummyData, aes(x = WeeksSinceStart, y = PositiveOccurences / SampleSize, fill = BacteriaType)) +
geom_bar(position = "stack", stat = "identity") +
geom_text(data = dat_label, aes(x = WeeksSinceStart, y = y, label = SampleSize), inherit.aes = FALSE, nudge_y = .01) +
#scale_y_continuous(limits = c(0, 0.6), breaks = seq(0, 1, by = 0.1)) +
scale_x_continuous(limits = c(0.5, 11), breaks = seq(0, 10, by = 1)) +
labs(
x = "Weeks Since Start",
y = "Proportion Positive"
) +
scale_fill_manual(values = plotcolors)
I am making a line plot of several groups and want to make a visualization where one of the groups lines are highlighted
ggplot(df) + geom_line(aes(x=timepoint ,y=var, group = participant_id, color=color)) +
scale_color_identity(labels = c(red = "g1",gray90 = "Other"),guide = "legend")
However, the group lines are partially obscured by the other groups lines
How can I make these lines always on top of other groups lines?
The simplest way to do this is to plot the gray and red groups on different layers.
First, let's try to replicate your problem with a dummy data set:
set.seed(1)
df <- data.frame(
participant_id = rep(1:50, each = 25),
timepoint = factor(rep(0:24, 50)),
var = c(replicate(50, runif(1, 50, 200) + runif(25, 0.3, 1.5) *
sin(0:24/(0.6*pi))^2/seq(0.002, 0.005, length = 25))),
color = rep(sample(c("red", "gray90"), 50, TRUE, prob = c(1, 9)), each = 100)
)
Now we apply your plotting code:
library(ggplot2)
ggplot(df) +
geom_line(aes(x=timepoint ,y=var, group = participant_id, color = color)) +
scale_color_identity(labels = c(red = "g1", gray90 = "Other"),
guide = "legend") +
theme_classic()
This looks broadly similar to your plot. If instead we plot in different layers, we get:
ggplot(df, aes(timepoint, var, group = participant_id)) +
geom_line(data = df[df$color == "gray90",], aes(color = "Other")) +
geom_line(data = df[df$color == "red",], aes(color = "gl")) +
scale_color_manual(values = c("red", "gray90")) +
theme_classic()
Created on 2022-06-20 by the reprex package (v2.0.1)
You can use factor releveling to bring the line (-s) of interest to front.
First, let's plot the data as is, with the red line partly hidden by others.
library(ggplot2)
library(dplyr)
set.seed(13)
df <-
data.frame(timepoint = rep(c(1:100), 20),
participant_id = paste0("p_", sort(rep(c(1:20), 100))),
var = abs(rnorm(2000, 200, 50) - 200),
color = c(rep("red", 100), rep("gray90", 1900)))
ggplot(df) +
geom_line(aes(x = timepoint ,
y = var,
group = participant_id, color = color)) +
scale_color_identity(labels = c(red = "g1", gray90 = "Other"),
guide = "legend")
Now let's bring p_1 to front by making it the last factor level.
df %>%
mutate(participant_id = factor(participant_id)) %>%
mutate(participant_id = relevel(participant_id, ref = "p_1")) %>%
mutate(participant_id = factor(participant_id, levels = rev(levels(participant_id)))) %>%
ggplot() +
geom_line(aes(x=timepoint,
y=var,
group = participant_id,
color = color)) +
scale_color_identity(labels = c(red = "g1", gray90 = "Other"),
guide = "legend")
I'm trying to plot a stacked bar chart with ggplot. Below is my code:
df = data.frame(Y = c(0,0,1,1), X = c(0,1,0,1), N = c(200, 50, 300, 70))
ggplot(data=df, aes(y=N, x= X, fill=Y)) +
geom_bar(position="stack", stat="identity", width=0.7)+
scale_x_discrete(name ="", breaks = c(0, 1), labels=c("No",'Yes')) +
theme(legend.position="none")
I want to show 'No' and 'Yes' as tick label on x axis. But nothing shows up. Does anyone know why my tick labelz do not show up? I do not understand what I did wrong.
You just need to insert factor(X) to make X discrete rather than continuous:
library(tidyverse)
df <- data.frame(Y = c(0, 0, 1, 1), X = c(0, 1, 0, 1), N = c(200, 50, 300, 70))
ggplot(data = df, aes(y = N, x = factor(X), fill = Y)) +
geom_bar(position = "stack", stat = "identity", width = 0.7) +
scale_x_discrete(name = "", breaks = c(0, 1), labels = c("No", "Yes")) +
theme(legend.position = "none")
Created on 2022-06-14 by the reprex package (v2.0.1)
An easier way to do it may be to do the data transformation outside the ggplot function:
library(ggplot2)
library(dplyr)
df = data.frame(Y = c(0,0,1,1), X = c(0,1,0,1), N = c(200, 50, 300, 70))
df %>%
mutate(X = if_else(X == 0, "No", "Yes")) %>%
ggplot(aes(y=N, x= X, fill=Y)) +
geom_bar(position="stack", stat="identity", width=0.7)+
theme(legend.position="none")
I created a heat map that represents the usage time of two products (A and B) that are available in colors C1 and C2. According to the time of use, it is possible to classify how the product was used (God, regular or bad). Within the usage classification there are categories that overlap as a function of time, as described below:
Good: use time greater than or equal to 280 minutes.
Regular: use time between 150 and 350 minutes.
Bad: use time less than or equal to 10 minutes.
I want to create facets for the categories good, regular and bad, without completely separating the facets but overlapping them as shown in the second image below. The attempts I've made have been unsatisfactory. The final aesthetics of the heat map need not be exactly the same as the one shown in the second image, what is necessary is to correctly indicate the classification.
library(ggplot2)
Product <- c("A", "B")
Color <- c("C1", "C2")
Time <- seq(10, 430, 60)
df <- expand.grid(Time = Time,
Color = Color,
Product = Product)
df$Fill_factor <- seq(1, 32, 1)
df$Usage <- ifelse(
df$Time <= 10,
"Bad",
ifelse(
df$Time >= 150 & df$Time <= 350,
"Regular",
ifelse(
df$Time >= 280,
"Good",
"Without classification"
)
)
)
ggplot(data = df,
aes(x = Product,
y = Time,
fill = Fill_factor)) +
geom_tile() +
geom_text(aes(label = Fill_factor),
size = 2.5) +
facet_grid(~ Color) +
scale_y_continuous(breaks = seq(10, 430, 60))
# Fail
ggplot(data = df,
aes(x = Product,
y = Time,
fill = Fill_factor)) +
geom_tile() +
geom_text(aes(label = Fill_factor),
size = 2.5) +
facet_grid(Usage ~ Color) +
scale_y_continuous(breaks = seq(10, 430, 60))
I don't think there is any way to solve this using facets as they exist now. However, you can annotate a few rectangles with text outside the plotting area by setting coord_cartesian(clip = "off") with the right out-of-bounds argument in the scales. Unfortunately, this works better with a continuous x-axis, but you can of course make 'pseudodiscrete' scales.
library(ggplot2)
Product <- c("A", "B")
Color <- c("C1", "C2")
Time <- seq(10, 430, 60)
df <- expand.grid(Time = Time,
Color = Color,
Product = Product)
df$Fill_factor <- seq(1, 32, 1)
# Make annotation data.frame
max_x <- (length(unique(df$Product)) + 0.6)
anno <- data.frame(
Color = "C2",
Usage = c("Good", "Regular", "Bad"),
xmin = max_x,
xmax = max_x + c(0.5, 0.25, 0.25),
ymin = c(280, 160, -20),
ymax = c(460, 340, 40)
)
ggplot(data = df,
aes(x = match(Product, sort(unique(Product))),
y = Time,
fill = Fill_factor)) +
geom_tile() +
geom_text(aes(label = Fill_factor),
size = 2.5) +
# Adding the rectangles from annotation data
geom_rect(
data = anno,
aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax),
fill = c("limegreen", "gold", "tomato"),
inherit.aes = FALSE
) +
# Adding text from annotation data
geom_text(
data = anno,
aes(x = (xmin + xmax) / 2, y = (ymin + ymax) / 2,
label = Usage),
inherit.aes = FALSE, angle = 270
) +
facet_grid(~ Color) +
scale_y_continuous(breaks = seq(10, 430, 60)) +
# Set breaks and labels as they would be for discrete scale
scale_x_continuous(limits = c(0.5, 2.5),
breaks = seq_along(unique(df$Product)),
labels = sort(unique(df$Product)),
oob = scales::oob_keep) +
coord_cartesian(clip = "off") +
theme(legend.box.margin = margin(l = 40))
Created on 2021-08-07 by the reprex package (v1.0.0)