In this experiment, we tracked presence or absence of bacterial infection in our subject animals. We were able to isolate which type of bacteria was present in our animals and created a plot that has Week Since Experiment Start on the X axis, and Percentage of Animals Positive for bacterial infection on the Y axis. This is a stacked identity ggplot where each geom_bar contains the different identities of the bacteria that were in the infected animals each week. Here is a sample dataset with the corresponding ggplot code and result:
DummyData <- data.frame(matrix(ncol = 5, nrow = 78))
colnames(DummyData) <- c('WeeksSinceStart','BacteriaType','PositiveOccurences','SampleSize','NewSampleSize')
DummyData$WeeksSinceStart <- c(1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,9,9,9,9,9,10,10,10,10)
DummyData$BacteriaType <- c("BactA","BactB","BactD","BactB","BactE","BactA","BactS","BactF","BactE","BactH","BactJ","BactK","BactE","BactB","BactS","BactF","BactL","BactE","BactW","BactH","BactS","BactJ","BactQ","BactN","BactW","BactA","BactD","BactE","BactA","BactC","BactD","BactK","BactL","BactE","BactD","BactA","BactS","BactK","BactB","BactE","BactF","BactH","BactN","BactE","BactL","BactZ","BactE","BactC","BactR","BactD","BactJ","BactN","BactK","BactW","BactR","BactE","BactW","BactA","BactM","BactG","BactO","BactI","BactE","BactD","BactM","BactH","BactC","BactM","BactW","BactA","BactL","BactB","BactE","BactA","BactS","BactH","BactQ","BactF")
PosOcc <- seq(from = 1, to = 2, by = 1)
DummyData$PositiveOccurences <- rep(PosOcc, times = 13)
DummyData$SampleSize <- c(78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,29,29,29,29,29,10,10,10,10)
DummyData$NewSampleSize <- c(78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,29,NA,NA,NA,NA,10,NA,NA,NA)
numcolor <- 20
plotcolors <- colorRampPalette(brewer.pal(8, "Set3"))(numcolor)
#GGplot for Dummy Data
DummyDataPlot <- ggplot(DummyData, aes(x = WeeksSinceStart, y = PositiveOccurences/SampleSize, fill = BacteriaType)) + geom_bar(position = "stack", stat = "identity") +
geom_text(label = DummyData$NewSampleSize, nudge_y = 0.1) +
scale_y_continuous(limits = c(0,0.6), breaks = seq(0, 1, by = 0.1)) + scale_x_continuous(limits = c(0.5,11), breaks = seq(0,10, by =1)) +
labs(
x = "Weeks Since Start",
y = "Proportion Positive") +
scale_fill_manual(values = plotcolors)
The problem: I cannot seem to find a way to position the labels from geom_text directly over each bar. I would also love to add the text "n = " to the sample size value directly over each bar. Thank you for your help!
I have tried different values for position_dodge statement and nudge_y statement with no success.
Sometimes the easiest approach is to do some data wrangling, i.e. one option would be to create a separate dataframe for your labels:
library(ggplot2)
library(dplyr)
dat_label <- DummyData |>
group_by(WeeksSinceStart) |>
summarise(y = sum(PositiveOccurences / SampleSize), SampleSize = unique(SampleSize))
ggplot(DummyData, aes(x = WeeksSinceStart, y = PositiveOccurences / SampleSize, fill = BacteriaType)) +
geom_bar(position = "stack", stat = "identity") +
geom_text(data = dat_label, aes(x = WeeksSinceStart, y = y, label = SampleSize), inherit.aes = FALSE, nudge_y = .01) +
#scale_y_continuous(limits = c(0, 0.6), breaks = seq(0, 1, by = 0.1)) +
scale_x_continuous(limits = c(0.5, 11), breaks = seq(0, 10, by = 1)) +
labs(
x = "Weeks Since Start",
y = "Proportion Positive"
) +
scale_fill_manual(values = plotcolors)
Can someone explain to me how can I use the facet_wrap layer in ggplot to make 3 plots in the same graph so that each plot should be in a row and so that the scales of each plot changes freely.
Here are the 3 graphs that I have made because I wanted to see the relation between those three covariates: median_income , pct_immigrant, income_inequality and the percentage of votes for Marine Le Pen individually.
To be precise, those three covariates are not variables that is why I had to filter the data frame elections_2017_long_metrop_covariates_lepen_long first so that I can only keep each observation among the variable covariates
Also if you have any suggestion to better the visualisation of the graphs
graph1 = filter(elections_2017_long_metrop_covariates_lepen_long, covariates == "pct_immigrant")
ggplot(graph1,aes(x = value,y = pct_votes)) + geom_point(size = 3, alpha = 0.5,colour = "#d90502") + expand_limits(x = 0, y = 0:100) +labs(x = "share of immigrants",y = "percentage of votes for Marine Le Pen")
graph2 = filter(elections_2017_long_metrop_covariates_lepen_long, covariates == "income_inequality")
ggplot(graph2,aes(x = value,y = pct_votes)) + geom_point(size = 3, alpha = 0.5,colour = "#d90502") + expand_limits(x = 0, y = 0:100) +labs(x = "income inequality",y = "percentage of votes for Marine Le Pen")
graph3 = filter(elections_2017_long_metrop_covariates_lepen_long, covariates == "median_income")
ggplot(graph2,aes(x = value,y = pct_votes)) + geom_point(size = 3, alpha = 0.5,colour = "#d90502") + expand_limits(x = 0, y = 0:100) +labs(x = "median income",y = "percentage of votes for Marine Le Pen")
You did not gave a complete reproducible example, but I think this should work for you.
To make facets you should pass the filter variable into the facet_wrap() function.
elections_2017_long_metrop_covariates_lepen_long %>%
filter(covariates %in% c('pct_immigrant', 'median_income', 'income_inequality')) %>%
ggplot(aes(x = value,y = pct_votes)) +
geom_point(alpha = 0.5) +
facet_wrap(~covariates)
The full solution considering colors can be:
library(ggplot2)
#Plot
ggplot(subset(elections_2017_long_metrop_covariates_lepen_long
covariates %in% c('pct_immigrant', 'median_income', 'income_inequality')),
aes(x = value,y = pct_votes,color=covariates))+
geom_point(size = 3, alpha = 0.5)+
expand_limits(x = 0, y = 0:100) +
labs(x = "share of immigrants",y = "percentage of votes for Marine Le Pen")+
facet_wrap(.~covariates,scales = 'free',ncol = 1)+
scale_color_manual(values=rep("#d90502",3))
Or:
library(dplyr)
library(ggplot2)
#Code 2
elections_2017_long_metrop_covariates_lepen_long %>%
filter(covariates %in% c('pct_immigrant', 'median_income', 'income_inequality')) %>%
ggplot(aes(x = value,y = pct_votes,color=covariates))+
geom_point(size = 3, alpha = 0.5)+
expand_limits(x = 0, y = 0:100) +
labs(x = "share of immigrants",y = "percentage of votes for Marine Le Pen")+
facet_wrap(.~covariates,scales = 'free',ncol = 1)+
scale_color_manual(values=rep("#d90502",3))
No output showed in lack of data.
I am trying to annotate the plot below in a pairwise fashion - in each facet compare corresponding samples in the variable. Essentially comparing CTR from pos to CTR from neg and so on. I can't seem to get it to work.
Here is my data and plots:
library(ggpubr)
#data.frame
samples <- rep(c('LA', 'EA', 'CTR'), 300)
variable <- sample(c('pos', 'neg'), 900, replace = T)
stim <- rep(c('rp','il'), 450)
population <- sample(c('EM','CM','TEMRA'), 900, replace = T)
values <- runif(900, min = 0, max = 100)
df <- data.frame(samples, variable, stim, population, values)
#test and comparisons
test_comparisons <- list(c('neg', 'pos'))
test <- compare_means(values ~ variable, data = df, method = 'wilcox.test',
group.by = c('samples', 'stim', 'population'))
#plot
ggplot(aes(x= variable, y = values, fill = samples), data = df) +
geom_boxplot(position = position_dodge(0.85)) +
geom_dotplot(binaxis='y', stackdir='center', position =
position_dodge(0.85), dotsize = 1.5) +
facet_grid(population ~ stim, scales = 'free_x') +
stat_compare_means(comparisons = test_comparisons, label = 'p.signif') +
theme_bw()
This only produces 1 comparison per facet between pos and neg instead of 3...What am I doing wrong?
You can use the following code:
samples <- rep(c('LA', 'EA', 'CTR'), 300)
variable <- sample(c('pos', 'neg'), 900, replace = T)
stim <- rep(c('rp','il'), 450)
population <- sample(c('EM','CM','TEMRA'), 900, replace = T)
values <- runif(900, min = 0, max = 100)
df <- data.frame(samples, variable, stim, population, values)
#test and comparisons
test_comparisons <- list(c('neg', 'pos'))
test <- compare_means(values ~ variable, data = df, method = 'wilcox.test',
group.by = c('samples', 'stim', 'population'))
#plot
ggplot(aes(x= variable, y = values, fill = samples), data = df) +
geom_boxplot(position = position_dodge(0.85)) +
geom_dotplot(binaxis='y', stackdir='center', position =
position_dodge(0.85), dotsize = 1.5) +
facet_grid(population ~ stim+samples, scales = 'free_x') +
stat_compare_means(comparisons = test_comparisons, label = 'p.signif') +
theme_bw()
Hope this will rectify your problem
Using ggplot2, how can I blend two graphs? If I graph two sets over data, the second set of data covers up the first set of data. Is there a way to blend both graphs. I already put the alpha value as low as I can. Any lower and I can't see individual points.
demanalyze <- function(infocode, n = 1){
infoname <- filter(infolookup, column_name == infocode)$description
infocolumn <- as.vector(as.matrix(mydata[infocode]))
ggplot(mydata) +
aes(x = infocolumn) +
ggtitle(infoname) +
xlab(infoname) +
ylab("Fraction of votes each canidate recieved") +
xlab(infoname) +
geom_point(aes(y = sanders_vote_fraction, colour = "Bernie Sanders"), size=I(2)) +#, color = alpha("blue",0.02), size=I(1)) +
stat_smooth(aes(y = sanders_vote_fraction), method = "lm", formula = y ~ poly(x, n), size = 1, color = "darkblue", se = F) +
geom_point(aes(y = clinton_vote_fraction, colour = "Hillary Clinton"), size=I(2)) +#, color = alpha("red",0.02), size=I(1)) +
stat_smooth(aes(y = clinton_vote_fraction), method = "lm", formula = y ~ poly(x, n), size = 1, color = "darkred", se = F) +
scale_colour_manual("",
values = c("Bernie Sanders" = alpha("blue",0.005), "Hillary Clinton" = alpha("red",0.005))
) +
guides(colour = guide_legend(override.aes = list(alpha = 1)))
}
By blend, I mean of a there is a red point and a blue point in the same spot, it should show up as purple.
Looking at the plot, my guess is that the issue is a ton of red stacking on top of each other, blocking the blue below. I think you may need to randomize the layering on the graph, which will require generating a single data.frame. Alternatively, if Hillary+Bernie always equals 1, you may be able to just plot that. If they don't, and you don't want to lose too much information, you could plot just one metric of (Hillary)/(Bernie+Hillary).
Example:
geom_point(aes(y = clinton_vote_fraction / ( clinton_vote_fraction + sanders_vote_fraction)
, colour = "Clinton Share"), size=I(2))
And here is an example with the melting approach:
library(dplyr)
library(reshape2)
df <-
data.frame(
metric = rnorm(1000)
, Clinton = rnorm(1000, 48, 10)
) %>%
mutate(Sanders = 100 - Clinton - rnorm(4))
meltDF <-
melt(df, "metric"
, variable.name = "Candidate"
, value.name = "Vote Share")
ggplot(meltDF %>%
arrange(sample(1:nrow(.)))
, aes(x = metric
, y = `Vote Share`
, col = Candidate)) +
geom_point(size = 2, alpha = 0.2) +
geom_smooth(se = FALSE, alpha = 1, show.legend = FALSE) +
scale_colour_manual("",
values = c("Clinton" = "darkblue"
, "Sanders" = "red3")
) +
theme_minimal()
I have a changing df and I am grouping different values c.
With ggplot2 I plot them with the following code to get a scatterplott with multiple linear regression lines (geom_smooth)
ggplot(aes(x = a, y = b, group = c)) +
geom_point(shape = 1, aes(color = c), alpha = alpha) +
geom_smooth(method = "lm", aes(group = c, color = c), se = F)
Now I want to display on each geom_smooth line in the plot a label with the value of the group c.
This has to be dynamic, because I can not write new code when my df changes.
Example: my df looks like this
a b c
----------------
1.6 24 100
-1.4 43 50
1 28 100
4.3 11 50
-3.45 5.2 50
So in this case I would get 3 geom_smooth lines in the plot with different colors.
Now I simply want to add a text label to the plot with "100" next to the geom_smooth with the group c = 100 and a text label with "50"to the line for the group c = 50, and so on... as new groups get introduced in the df, new geom_smooth lines are plotted and need to be labeled.
the whole code for the plot:
ggplot(aes(x = a, y = b, group = c), data = df, na.rm = TRUE) +
geom_point(aes(color = GG, size = factor(c)), alpha=0.3) +
scale_x_continuous(limits = c(-200,2300))+
scale_y_continuous(limits = c(-1.8,1.5))+
geom_hline(yintercept=0, size=0.4, color="black") +
scale_color_distiller(palette="YlGnBu", na.value="white") +
geom_smooth(method = "lm", aes(group = factor(GG), color = GG), se = F) +
geom_label_repel(data = labelInfo, aes(x= max, y = predAtMax, label = label, color = label))
You can probably do it if you pick the location you want the lines labelled. Below, I set them to label at the far right end of each line, and used ggrepel to avoid overlapping labels:
library(ggplot2)
library(ggrepel)
library(dplyr)
set.seed(12345)
df <-
data.frame(
a = rnorm(100,2,0.5)
, b = rnorm(100, 20, 5)
, c = factor(sample(c(50,100,150), 100, TRUE))
)
labelInfo <-
split(df, df$c) %>%
lapply(function(x){
data.frame(
predAtMax = lm(b~a, data=x) %>%
predict(newdata = data.frame(a = max(x$a)))
, max = max(x$a)
)}) %>%
bind_rows
labelInfo$label = levels(df$c)
ggplot(
df
, aes(x = a, y = b, color = c)
) +
geom_point(shape = 1) +
geom_smooth(method = "lm", se = F) +
geom_label_repel(data = labelInfo
, aes(x= max
, y = predAtMax
, label = label
, color = label))
This method might work for you. It uses ggplot_build to access the rightmost point in the actual geom_smooth lines to add a label by it. Below is an adaptation that uses Mark Peterson's example.
library(ggplot2)
library(ggrepel)
library(dplyr)
set.seed(12345)
df <-
data.frame(
a = rnorm(100,2,0.5)
, b = rnorm(100, 20, 5)
, c = factor(sample(c(50,100,150), 100, TRUE))
)
p <-
ggplot(df, aes(x = a, y = b, color = c)) +
geom_point(shape = 1) +
geom_smooth(method = "lm", se = F)
p.smoothedmaxes <-
ggplot_build(p)$data[[2]] %>%
group_by( group) %>%
filter( x == max(x))
p +
geom_text_repel( data = p.smoothedmaxes,
mapping = aes(x = x, y = y, label = round(y,2)),
col = p.smoothedmaxes$colour,
inherit.aes = FALSE)
This came up for me today and I landed on this solution with data = ~fn()
library(tidyverse)
library(broom)
mpg |>
ggplot(aes(x = displ, y = hwy, colour = class, label = class)) +
geom_count(alpha = 0.1) +
stat_smooth(alpha = 0.6, method = lm, geom = "line", se = FALSE) +
geom_text(
aes(y = .fitted), size = 3, hjust = 0, nudge_x = 0.1,
data = ~{
nest_by(.x, class) |>
summarize(broom::augment(lm(hwy ~ displ, data = data))) |>
slice_max(order_by = displ, n = 1)
}
) +
scale_x_continuous(expand = expansion(add = c(0, 1))) +
theme_minimal()
Or do it with a function
#' #examples
#' last_lm_points(df = mpg, formula = hwy~displ, group = class)
last_lm_points <- function(df, formula, group) {
# df <- mpg; formula <- as.formula(hwy~displ); group <- sym("class");
x_arg <- formula[[3]]
df |>
nest_by({{group}}) |>
summarize(broom::augment(lm(formula, data = data))) |>
slice_max(order_by = get(x_arg), n = 1)
}
mpg |>
ggplot(aes(displ, hwy, colour = class, label = class)) +
geom_count(alpha = 0.1) +
stat_smooth(alpha = 0.6, method = lm, geom = "line", se = FALSE) +
geom_text(
aes(y = .fitted), size = 3, hjust = 0, nudge_x = 0.1,
data = ~last_lm_points(.x, hwy~displ, class)
) +
scale_x_continuous(expand = expansion(add = c(0, 1))) +
theme_minimal()