In this experiment, we tracked presence or absence of bacterial infection in our subject animals. We were able to isolate which type of bacteria was present in our animals and created a plot that has Week Since Experiment Start on the X axis, and Percentage of Animals Positive for bacterial infection on the Y axis. This is a stacked identity ggplot where each geom_bar contains the different identities of the bacteria that were in the infected animals each week. Here is a sample dataset with the corresponding ggplot code and result:
DummyData <- data.frame(matrix(ncol = 5, nrow = 78))
colnames(DummyData) <- c('WeeksSinceStart','BacteriaType','PositiveOccurences','SampleSize','NewSampleSize')
DummyData$WeeksSinceStart <- c(1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,9,9,9,9,9,10,10,10,10)
DummyData$BacteriaType <- c("BactA","BactB","BactD","BactB","BactE","BactA","BactS","BactF","BactE","BactH","BactJ","BactK","BactE","BactB","BactS","BactF","BactL","BactE","BactW","BactH","BactS","BactJ","BactQ","BactN","BactW","BactA","BactD","BactE","BactA","BactC","BactD","BactK","BactL","BactE","BactD","BactA","BactS","BactK","BactB","BactE","BactF","BactH","BactN","BactE","BactL","BactZ","BactE","BactC","BactR","BactD","BactJ","BactN","BactK","BactW","BactR","BactE","BactW","BactA","BactM","BactG","BactO","BactI","BactE","BactD","BactM","BactH","BactC","BactM","BactW","BactA","BactL","BactB","BactE","BactA","BactS","BactH","BactQ","BactF")
PosOcc <- seq(from = 1, to = 2, by = 1)
DummyData$PositiveOccurences <- rep(PosOcc, times = 13)
DummyData$SampleSize <- c(78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,29,29,29,29,29,10,10,10,10)
DummyData$NewSampleSize <- c(78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,29,NA,NA,NA,NA,10,NA,NA,NA)
numcolor <- 20
plotcolors <- colorRampPalette(brewer.pal(8, "Set3"))(numcolor)
#GGplot for Dummy Data
DummyDataPlot <- ggplot(DummyData, aes(x = WeeksSinceStart, y = PositiveOccurences/SampleSize, fill = BacteriaType)) + geom_bar(position = "stack", stat = "identity") +
geom_text(label = DummyData$NewSampleSize, nudge_y = 0.1) +
scale_y_continuous(limits = c(0,0.6), breaks = seq(0, 1, by = 0.1)) + scale_x_continuous(limits = c(0.5,11), breaks = seq(0,10, by =1)) +
labs(
x = "Weeks Since Start",
y = "Proportion Positive") +
scale_fill_manual(values = plotcolors)
The problem: I cannot seem to find a way to position the labels from geom_text directly over each bar. I would also love to add the text "n = " to the sample size value directly over each bar. Thank you for your help!
I have tried different values for position_dodge statement and nudge_y statement with no success.
Sometimes the easiest approach is to do some data wrangling, i.e. one option would be to create a separate dataframe for your labels:
library(ggplot2)
library(dplyr)
dat_label <- DummyData |>
group_by(WeeksSinceStart) |>
summarise(y = sum(PositiveOccurences / SampleSize), SampleSize = unique(SampleSize))
ggplot(DummyData, aes(x = WeeksSinceStart, y = PositiveOccurences / SampleSize, fill = BacteriaType)) +
geom_bar(position = "stack", stat = "identity") +
geom_text(data = dat_label, aes(x = WeeksSinceStart, y = y, label = SampleSize), inherit.aes = FALSE, nudge_y = .01) +
#scale_y_continuous(limits = c(0, 0.6), breaks = seq(0, 1, by = 0.1)) +
scale_x_continuous(limits = c(0.5, 11), breaks = seq(0, 10, by = 1)) +
labs(
x = "Weeks Since Start",
y = "Proportion Positive"
) +
scale_fill_manual(values = plotcolors)
I have data with large degrees of separation between "clusters/groups" of values that I hope to make a histogram with, but dividing the bins into equal sized groups has been difficult. I'd like for zero (0) to have it's own bin, the total number of equally spaced bins be < 8 (ideally, to avoid crowding the plot) with an extra empty bin for "..." signifying the large gaps in-between the data values. The actual dataset has 800+ zeros with maybe 5% data >0. Naturally the zeros will over-shadow the rest of the data, but a log transform will fix that. I just can't figure out the best way to break-up the data...
Data looks like this:
set.seed(123)
zero <- runif(50, min=0, max=0)
small <- runif(7, min=0, max=0.1)
medium <- runif(5, min=0, max=0.5)
high <- runif(3, min=1.5, max=2.5)
f <- function(x){
return(data.frame(ID=deparse(substitute(x)), value=x))
}
all <- bind_rows(f(zero), f(small), f(medium), f(high))
all <- as.data.frame(all[,-1])
names(all)[1] <- "value"
My attempt:
bins <- all %>% mutate(bin = cut(all$value, breaks = c(0, seq(0.01:0.4), Inf), right = FALSE)) %>%
count(bin, name = "freq") %>%
add_row(bin = "...", freq = NA_integer_) %>%
mutate(bin = fct_relevel(bin, "...", after = 0.4))
But I get this error:
Error in `mutate()`:
! Problem while computing `bin = fct_relevel(bin, "...", after = 0.5)`.
Caused by error:
! `idx` must contain one integer for each level of `f`
This is not equally spaced, but I'm looking for something like this as labels for my plot:
levels(bins$bin) <- c("0", "0.01-0.05", "0.05-0.1", "0.1-0.2", "0.2-0.3", "0.3-0.4", "...", "2.0+")
ggplot(bins, aes(x = bin, y = freq, fill = bin)) +
geom_histogram(stat = "identity", colour = "black")
You can use cut directly inside ggplot
ggplot(all, aes(cut(value, breaks = c(0, 0.25, 0.5, 3), inc = TRUE))) +
geom_bar() +
scale_y_log10() +
labs(x = "value")
This worked for me (using my own data):
bins <- WET %>% mutate(bin = cut(den, breaks = c(0, seq(0.001, 0.225, 0.15), 0.255, 0.3, Inf), right = FALSE)) %>%
count(bin, name = "freq") %>% # build frequency table, frequency = freq
add_row(bin = "...", freq = NA_integer_) %>% # add empty row for NA
mutate(bin = fct_relevel(bin, "...", after = 3)) # Put factor level "..." after 3! (the 3rd position)
levels(bins$bin) <- c("0", "0.001-0.15", "0.15-0.255", "...", "0.3+")
# fct_relevel(f, "a", after = 2), "..., after = x, x must be an integer! (2nd position)
ggplot(bins, aes(x = bin, y = freq, fill = bin)) +
geom_bar(stat = "identity", colour = "black") +
geom_text(aes(label = freq), vjust = -0.5) +
scale_y_continuous(limits = c(0, 800), expand = expansion(mult = c(0, 0.05))) +
scale_fill_brewer(name = "Density", palette="Greys", breaks = c("0", "0.001-0.15", "0.15-0.255", "0.3+")) +
# Only show these legend values (exclude "...")
labs(title = "Wet seasons - Pink shrimp density (no./m2)",x = "Density range", y = "Frequency") +
theme(plot.title = element_text(hjust = 0.5)) +
theme(axis.text = element_text(size = 9, face = "bold")) +
theme(axis.title = element_text(size = 13, face = "bold")) + # Axis titles
theme(axis.title.x = element_text(vjust = -3)) +
theme(panel.border = element_rect(color = "black", fill = NA, size = 1)) +
# Adjust distance of x-axis title from plot
theme(plot.margin = margin(t = 20, # Top margin
r = 50, # Right margin
b = 40, # Bottom margin
l = 10)) # Left margin
I am trying to add labels to a beeswarm plot I am making using ggplot2. However, it seems as if the labels are pointing to the center line, and not the individual dots. Here is my code:
library(ggbeeswarm)
library(tidyverse)
DataTest <- tibble(Category = c(LETTERS),
Year = runif(26, 2016, 2016),
Size = runif(26, min = 5, max = 10),
SalesGrowth = runif(26, -1, 1))
ggplot() +
coord_flip() +
geom_quasirandom(DataTest,
mapping = aes(factor(Year),
SalesGrowth,
size = Size)) +
geom_label_repel(DataTest %>% filter(Category %in% c('A', 'B', 'C')),
mapping = aes(factor(Year),
SalesGrowth,
label = Category),
box.padding = 2) +
scale_size_binned() +
theme(legend.position = "none")
And here is what the output is looking like visually. I want my labels to point to the respective dots.
This could be achieved like so:
Make use of position_quasirandom in geom_label_repel
As a general rule when using ggrepel, pass the whole data to geom_label_repel and set undesired labels equal to "" instead of filtering the data.
library(ggplot2)
library(ggbeeswarm)
library(ggrepel)
DataTest <- data.frame(Category = c(LETTERS),
Year = runif(26, 2016, 2016),
Size = runif(26, min = 5, max = 10),
SalesGrowth = runif(26, -1, 1))
set.seed(42)
ggplot() +
coord_flip() +
geom_quasirandom(DataTest,
mapping = aes(factor(Year),
SalesGrowth,
size = Size)) +
geom_label_repel(data = DataTest, mapping = aes(factor(Year),
SalesGrowth,
label = ifelse(Category %in% c('A', 'B', 'C'), Category, "")),
position=position_quasirandom(),
box.padding = 2, seed = 42) +
scale_size_binned() +
theme(legend.position = "none")
I am trying to create a plot to track results over days for multiple factors. Ideally I would like my xaxis to be Day, with the day number centered in the middle of the reps for that particular day, the y axis to be result, and the facet will be the Lot (1-4). I am having difficulty making the day centered on the bottom using repeatable text, as the number of reps may vary.
I was using ideas shown in this post: Multi-row x-axis labels in ggplot line chart but have been unable to make any progress.
Here is some code I have been using and the plot that I have so far. The x axis is far too busy and I am trying to consolidate it.
data <- data.frame(System = rep(c("A", "B"), each = 120), Lot = rep(1:4, each = 30),
Day = rep(1:5, each = 6), Rep = rep(1:6, 40), Result = rnorm(240))
library(ggplot2)
ggplot(data, aes(x = interaction(Day, Rep, lex.order = TRUE), y = Result, color = System, group = System)) +
geom_point() +
geom_line() +
theme(legend.position = "bottom") +
facet_wrap(~Lot, ncol = 1) +
geom_vline(xintercept = (which(data$Rep == 1 & data$Day != 1)), color = "gray60")
I'm not 100% sure if this is exactly what you are after but this will center the day on the x-axis.
library(dplyr)
library(tidyr)
library(ggplot2)
df <- data.frame(System = rep(c("A", "B"), each = 120), Lot = rep(1:4, each = 30),
Day = rep(1:5, each = 6), Rep = rep(1:6, 40), Result = rnorm(240))
df <- df %>%
unite(Day_Rep, Day, Rep, sep = ".", remove = F) %>%
mutate(Day_Rep = as.numeric(Day_Rep))
ggplot(df, aes(x = Day_Rep, y = Result, color = System, group = System)) +
geom_point() +
geom_line() +
theme(legend.position = "bottom") +
facet_wrap(~Lot, ncol = 1) +
scale_x_continuous(labels = df$Day, breaks = df$Day + 0.5)+
geom_vline(xintercept = setdiff(unique(df$Day), 1))