In this experiment, we tracked presence or absence of bacterial infection in our subject animals. We were able to isolate which type of bacteria was present in our animals and created a plot that has Week Since Experiment Start on the X axis, and Percentage of Animals Positive for bacterial infection on the Y axis. This is a stacked identity ggplot where each geom_bar contains the different identities of the bacteria that were in the infected animals each week. Here is a sample dataset with the corresponding ggplot code and result:
DummyData <- data.frame(matrix(ncol = 5, nrow = 78))
colnames(DummyData) <- c('WeeksSinceStart','BacteriaType','PositiveOccurences','SampleSize','NewSampleSize')
DummyData$WeeksSinceStart <- c(1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,9,9,9,9,9,10,10,10,10)
DummyData$BacteriaType <- c("BactA","BactB","BactD","BactB","BactE","BactA","BactS","BactF","BactE","BactH","BactJ","BactK","BactE","BactB","BactS","BactF","BactL","BactE","BactW","BactH","BactS","BactJ","BactQ","BactN","BactW","BactA","BactD","BactE","BactA","BactC","BactD","BactK","BactL","BactE","BactD","BactA","BactS","BactK","BactB","BactE","BactF","BactH","BactN","BactE","BactL","BactZ","BactE","BactC","BactR","BactD","BactJ","BactN","BactK","BactW","BactR","BactE","BactW","BactA","BactM","BactG","BactO","BactI","BactE","BactD","BactM","BactH","BactC","BactM","BactW","BactA","BactL","BactB","BactE","BactA","BactS","BactH","BactQ","BactF")
PosOcc <- seq(from = 1, to = 2, by = 1)
DummyData$PositiveOccurences <- rep(PosOcc, times = 13)
DummyData$SampleSize <- c(78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,29,29,29,29,29,10,10,10,10)
DummyData$NewSampleSize <- c(78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,29,NA,NA,NA,NA,10,NA,NA,NA)
numcolor <- 20
plotcolors <- colorRampPalette(brewer.pal(8, "Set3"))(numcolor)
#GGplot for Dummy Data
DummyDataPlot <- ggplot(DummyData, aes(x = WeeksSinceStart, y = PositiveOccurences/SampleSize, fill = BacteriaType)) + geom_bar(position = "stack", stat = "identity") +
geom_text(label = DummyData$NewSampleSize, nudge_y = 0.1) +
scale_y_continuous(limits = c(0,0.6), breaks = seq(0, 1, by = 0.1)) + scale_x_continuous(limits = c(0.5,11), breaks = seq(0,10, by =1)) +
labs(
x = "Weeks Since Start",
y = "Proportion Positive") +
scale_fill_manual(values = plotcolors)
The problem: I cannot seem to find a way to position the labels from geom_text directly over each bar. I would also love to add the text "n = " to the sample size value directly over each bar. Thank you for your help!
I have tried different values for position_dodge statement and nudge_y statement with no success.
Sometimes the easiest approach is to do some data wrangling, i.e. one option would be to create a separate dataframe for your labels:
library(ggplot2)
library(dplyr)
dat_label <- DummyData |>
group_by(WeeksSinceStart) |>
summarise(y = sum(PositiveOccurences / SampleSize), SampleSize = unique(SampleSize))
ggplot(DummyData, aes(x = WeeksSinceStart, y = PositiveOccurences / SampleSize, fill = BacteriaType)) +
geom_bar(position = "stack", stat = "identity") +
geom_text(data = dat_label, aes(x = WeeksSinceStart, y = y, label = SampleSize), inherit.aes = FALSE, nudge_y = .01) +
#scale_y_continuous(limits = c(0, 0.6), breaks = seq(0, 1, by = 0.1)) +
scale_x_continuous(limits = c(0.5, 11), breaks = seq(0, 10, by = 1)) +
labs(
x = "Weeks Since Start",
y = "Proportion Positive"
) +
scale_fill_manual(values = plotcolors)
I am making a line plot of several groups and want to make a visualization where one of the groups lines are highlighted
ggplot(df) + geom_line(aes(x=timepoint ,y=var, group = participant_id, color=color)) +
scale_color_identity(labels = c(red = "g1",gray90 = "Other"),guide = "legend")
However, the group lines are partially obscured by the other groups lines
How can I make these lines always on top of other groups lines?
The simplest way to do this is to plot the gray and red groups on different layers.
First, let's try to replicate your problem with a dummy data set:
set.seed(1)
df <- data.frame(
participant_id = rep(1:50, each = 25),
timepoint = factor(rep(0:24, 50)),
var = c(replicate(50, runif(1, 50, 200) + runif(25, 0.3, 1.5) *
sin(0:24/(0.6*pi))^2/seq(0.002, 0.005, length = 25))),
color = rep(sample(c("red", "gray90"), 50, TRUE, prob = c(1, 9)), each = 100)
)
Now we apply your plotting code:
library(ggplot2)
ggplot(df) +
geom_line(aes(x=timepoint ,y=var, group = participant_id, color = color)) +
scale_color_identity(labels = c(red = "g1", gray90 = "Other"),
guide = "legend") +
theme_classic()
This looks broadly similar to your plot. If instead we plot in different layers, we get:
ggplot(df, aes(timepoint, var, group = participant_id)) +
geom_line(data = df[df$color == "gray90",], aes(color = "Other")) +
geom_line(data = df[df$color == "red",], aes(color = "gl")) +
scale_color_manual(values = c("red", "gray90")) +
theme_classic()
Created on 2022-06-20 by the reprex package (v2.0.1)
You can use factor releveling to bring the line (-s) of interest to front.
First, let's plot the data as is, with the red line partly hidden by others.
library(ggplot2)
library(dplyr)
set.seed(13)
df <-
data.frame(timepoint = rep(c(1:100), 20),
participant_id = paste0("p_", sort(rep(c(1:20), 100))),
var = abs(rnorm(2000, 200, 50) - 200),
color = c(rep("red", 100), rep("gray90", 1900)))
ggplot(df) +
geom_line(aes(x = timepoint ,
y = var,
group = participant_id, color = color)) +
scale_color_identity(labels = c(red = "g1", gray90 = "Other"),
guide = "legend")
Now let's bring p_1 to front by making it the last factor level.
df %>%
mutate(participant_id = factor(participant_id)) %>%
mutate(participant_id = relevel(participant_id, ref = "p_1")) %>%
mutate(participant_id = factor(participant_id, levels = rev(levels(participant_id)))) %>%
ggplot() +
geom_line(aes(x=timepoint,
y=var,
group = participant_id,
color = color)) +
scale_color_identity(labels = c(red = "g1", gray90 = "Other"),
guide = "legend")
Can someone explain to me how can I use the facet_wrap layer in ggplot to make 3 plots in the same graph so that each plot should be in a row and so that the scales of each plot changes freely.
Here are the 3 graphs that I have made because I wanted to see the relation between those three covariates: median_income , pct_immigrant, income_inequality and the percentage of votes for Marine Le Pen individually.
To be precise, those three covariates are not variables that is why I had to filter the data frame elections_2017_long_metrop_covariates_lepen_long first so that I can only keep each observation among the variable covariates
Also if you have any suggestion to better the visualisation of the graphs
graph1 = filter(elections_2017_long_metrop_covariates_lepen_long, covariates == "pct_immigrant")
ggplot(graph1,aes(x = value,y = pct_votes)) + geom_point(size = 3, alpha = 0.5,colour = "#d90502") + expand_limits(x = 0, y = 0:100) +labs(x = "share of immigrants",y = "percentage of votes for Marine Le Pen")
graph2 = filter(elections_2017_long_metrop_covariates_lepen_long, covariates == "income_inequality")
ggplot(graph2,aes(x = value,y = pct_votes)) + geom_point(size = 3, alpha = 0.5,colour = "#d90502") + expand_limits(x = 0, y = 0:100) +labs(x = "income inequality",y = "percentage of votes for Marine Le Pen")
graph3 = filter(elections_2017_long_metrop_covariates_lepen_long, covariates == "median_income")
ggplot(graph2,aes(x = value,y = pct_votes)) + geom_point(size = 3, alpha = 0.5,colour = "#d90502") + expand_limits(x = 0, y = 0:100) +labs(x = "median income",y = "percentage of votes for Marine Le Pen")
You did not gave a complete reproducible example, but I think this should work for you.
To make facets you should pass the filter variable into the facet_wrap() function.
elections_2017_long_metrop_covariates_lepen_long %>%
filter(covariates %in% c('pct_immigrant', 'median_income', 'income_inequality')) %>%
ggplot(aes(x = value,y = pct_votes)) +
geom_point(alpha = 0.5) +
facet_wrap(~covariates)
The full solution considering colors can be:
library(ggplot2)
#Plot
ggplot(subset(elections_2017_long_metrop_covariates_lepen_long
covariates %in% c('pct_immigrant', 'median_income', 'income_inequality')),
aes(x = value,y = pct_votes,color=covariates))+
geom_point(size = 3, alpha = 0.5)+
expand_limits(x = 0, y = 0:100) +
labs(x = "share of immigrants",y = "percentage of votes for Marine Le Pen")+
facet_wrap(.~covariates,scales = 'free',ncol = 1)+
scale_color_manual(values=rep("#d90502",3))
Or:
library(dplyr)
library(ggplot2)
#Code 2
elections_2017_long_metrop_covariates_lepen_long %>%
filter(covariates %in% c('pct_immigrant', 'median_income', 'income_inequality')) %>%
ggplot(aes(x = value,y = pct_votes,color=covariates))+
geom_point(size = 3, alpha = 0.5)+
expand_limits(x = 0, y = 0:100) +
labs(x = "share of immigrants",y = "percentage of votes for Marine Le Pen")+
facet_wrap(.~covariates,scales = 'free',ncol = 1)+
scale_color_manual(values=rep("#d90502",3))
No output showed in lack of data.
I'm trying to add a bivariate legend to my ggplot2 chart but I don't know whether (a) this is possible through some guides options and (b) how to achieve it.
The only way I've managed to produce something close to the desired outcome was by specifically creating a new chart which resembles a legend (named p.legend below) and inserting it, via the cowplot package, somewhere in the original chart (named p.chart below). But surely there must be a better way than this, given that this approach requires creating the legend in the first place and fiddling with its size/location to fit it in the original chart.
Here's code for a dummy example of my approach:
library(tidyverse)
# Create Dummy Data #
set.seed(876)
n <- 2
df <- expand.grid(Area = LETTERS[1:n],
Period = c("Summer", "Winter"),
stringsAsFactors = FALSE) %>%
mutate(Objective = runif(2 * n, min = 0, max = 2),
Performance = runif(2 * n) * Objective) %>%
gather(Type, Value, Objective:Performance)
# Original chart without legend #
p.chart <- df %>%
ggplot(., aes(x = Area)) +
geom_col(data = . %>% filter(Type == "Objective"),
aes(y = Value, fill = Period),
position = "dodge", width = 0.7, alpha = 0.6) +
geom_col(data = . %>% filter(Type == "Performance"),
aes(y = Value, fill = Period),
position = "dodge", width = 0.7) +
scale_fill_manual(values = c("Summer" = "#ff7f00", "Winter" = "#1f78b4"), guide = FALSE) +
theme_minimal() +
theme(panel.grid.major.x = element_blank(),
panel.grid.minor.y = element_blank())
# Create a chart resembling a legend #
p.legend <- expand.grid(Period = c("Summer", "Winter"),
Type = c("Objective", "Performance"),
stringsAsFactors = FALSE) %>%
ggplot(., aes(x = Period, y = factor(Type, levels = c("Performance", "Objective")),
fill = Period, alpha = Type)) +
geom_tile() +
scale_fill_manual(values = c("Summer" = "#ff7f00", "Winter" = "#1f78b4"), guide = FALSE) +
scale_alpha_manual(values = c("Objective" = 0.7, "Performance" = 1), guide = FALSE) +
ggtitle("Legend") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5),
rect = element_rect(fill = "transparent"),
axis.title = element_blank(),
panel.grid.major = element_blank())
# Add legend to original chart #
p.final <- cowplot::ggdraw() +
cowplot::draw_plot(plot = p.chart) +
cowplot::draw_plot(plot = p.legend, x = 0.5, y = 0.65, width = 0.4, height = 0.28, scale = 0.7)
# Save chart #
cowplot::ggsave("Bivariate Legend.png", p.final, width = 8, height = 6, dpi = 500)
... and the resulting chart:
Is there an easier way of doing this?
This might work at some point, but right now the colorbox seems to ignore all breaks, names and labels (#ClausWilke?). Probably because the multiscales package is in really early stages.
Posting since it might work when future readers are here.
library(multiscales)
df %>%
mutate(
period = as.numeric(factor(Period)),
type = as.numeric(factor(Type))
) %>%
ggplot(., aes(x = Area, y = Value, fill = zip(period, type), group = interaction(Area, Period))) +
geom_col(width = 0.7, position = 'dodge') +
bivariate_scale(
"fill",
pal_hue_sat(c(0.07, 0.6), c(0.4, 0.8)),
guide = guide_colorbox(
nbin = 2,
name = c("Period", "Type"), #ignored
breaks = list(1:2, 1:2), #ignored
labels = list(levels(.$Period), levels(.$Type)) #ignored
)