In this experiment, we tracked presence or absence of bacterial infection in our subject animals. We were able to isolate which type of bacteria was present in our animals and created a plot that has Week Since Experiment Start on the X axis, and Percentage of Animals Positive for bacterial infection on the Y axis. This is a stacked identity ggplot where each geom_bar contains the different identities of the bacteria that were in the infected animals each week. Here is a sample dataset with the corresponding ggplot code and result:
DummyData <- data.frame(matrix(ncol = 5, nrow = 78))
colnames(DummyData) <- c('WeeksSinceStart','BacteriaType','PositiveOccurences','SampleSize','NewSampleSize')
DummyData$WeeksSinceStart <- c(1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,9,9,9,9,9,10,10,10,10)
DummyData$BacteriaType <- c("BactA","BactB","BactD","BactB","BactE","BactA","BactS","BactF","BactE","BactH","BactJ","BactK","BactE","BactB","BactS","BactF","BactL","BactE","BactW","BactH","BactS","BactJ","BactQ","BactN","BactW","BactA","BactD","BactE","BactA","BactC","BactD","BactK","BactL","BactE","BactD","BactA","BactS","BactK","BactB","BactE","BactF","BactH","BactN","BactE","BactL","BactZ","BactE","BactC","BactR","BactD","BactJ","BactN","BactK","BactW","BactR","BactE","BactW","BactA","BactM","BactG","BactO","BactI","BactE","BactD","BactM","BactH","BactC","BactM","BactW","BactA","BactL","BactB","BactE","BactA","BactS","BactH","BactQ","BactF")
PosOcc <- seq(from = 1, to = 2, by = 1)
DummyData$PositiveOccurences <- rep(PosOcc, times = 13)
DummyData$SampleSize <- c(78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,29,29,29,29,29,10,10,10,10)
DummyData$NewSampleSize <- c(78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,29,NA,NA,NA,NA,10,NA,NA,NA)
numcolor <- 20
plotcolors <- colorRampPalette(brewer.pal(8, "Set3"))(numcolor)
#GGplot for Dummy Data
DummyDataPlot <- ggplot(DummyData, aes(x = WeeksSinceStart, y = PositiveOccurences/SampleSize, fill = BacteriaType)) + geom_bar(position = "stack", stat = "identity") +
geom_text(label = DummyData$NewSampleSize, nudge_y = 0.1) +
scale_y_continuous(limits = c(0,0.6), breaks = seq(0, 1, by = 0.1)) + scale_x_continuous(limits = c(0.5,11), breaks = seq(0,10, by =1)) +
labs(
x = "Weeks Since Start",
y = "Proportion Positive") +
scale_fill_manual(values = plotcolors)
The problem: I cannot seem to find a way to position the labels from geom_text directly over each bar. I would also love to add the text "n = " to the sample size value directly over each bar. Thank you for your help!
I have tried different values for position_dodge statement and nudge_y statement with no success.
Sometimes the easiest approach is to do some data wrangling, i.e. one option would be to create a separate dataframe for your labels:
library(ggplot2)
library(dplyr)
dat_label <- DummyData |>
group_by(WeeksSinceStart) |>
summarise(y = sum(PositiveOccurences / SampleSize), SampleSize = unique(SampleSize))
ggplot(DummyData, aes(x = WeeksSinceStart, y = PositiveOccurences / SampleSize, fill = BacteriaType)) +
geom_bar(position = "stack", stat = "identity") +
geom_text(data = dat_label, aes(x = WeeksSinceStart, y = y, label = SampleSize), inherit.aes = FALSE, nudge_y = .01) +
#scale_y_continuous(limits = c(0, 0.6), breaks = seq(0, 1, by = 0.1)) +
scale_x_continuous(limits = c(0.5, 11), breaks = seq(0, 10, by = 1)) +
labs(
x = "Weeks Since Start",
y = "Proportion Positive"
) +
scale_fill_manual(values = plotcolors)
How to get ggplot Heatmap (R) to use two colors? One for between a fill value of -.1 to .1 and one for not
ggplot(base, aes(x,y, fill= base$`Equal Opportunity Difference`)) +
geom_tile() +
#axis formatting
scale_x_discrete(breaks = c(10, 20, 30, 40 , 50, 60, 70, 80, 90),
labels = c("10%", "20%","30%", "40%","50%", "60%", "70%","80%", "90%"),
limits = c(10,90)) +
scale_y_discrete(breaks = c(10, 20, 30, 40 , 50, 60, 70, 80, 90),
labels = c("10%", "20%","30%", "40%","50%", "60%", "70%","80%", "90%"),
limits = c(10,90)) +
geom_text(aes(label = signif(base$`Equal Opportunity Difference`,2)), color = "white",
size = 4) +
scale_fill_gradient2(midpoint=c(-.1, s.1), low="#B2182B", high="#2166AC")
This is what I have right now, which isn't working. Also the axis are only showing 10% and 90%
I would appreciate if someone had a solution for that too.
Without a minimal reproducible example it's difficult to guess potential solutions to your problem. Is this what you're hoping to achieve? If not, what do you want to change?
library(ggplot2)
library(scales)
x <- seq(1:10)
y <- seq(1:10)
df <- expand.grid(x = x, y = y)
df$z <- signif(c(runif(50, -10, 0), runif(50, 0, 10)), 2)
df$z_categorised <- cut(df$z, c(seq(-10, -1, 1), seq(1, 10, 1)))
palette_red_blue <- colorRampPalette(colors = c("#B2182B","white", "#2166AC"))
ggplot(df, aes(x = x, y = y, fill = z_categorised)) +
geom_tile(color = "white") +
geom_text(aes(label = z)) +
scale_fill_manual(values = palette_red_blue(19)) +
scale_x_continuous(breaks = seq(0, 10, 1),
labels = percent_format(scale = 10)) +
scale_y_continuous(breaks = seq(0, 10, 1),
labels = percent_format(scale = 10)) +
coord_cartesian(expand = 0)
Created on 2022-06-21 by the reprex package (v2.0.1)
I recently asked this question. However, I am asking a separate question now as the scope of my new question falls outside the range of the last question.
I am trying to create a heatmap in ggplot... however, outside of the axis I am trying to plot geom_tile. The issue is I cannot find a consistent way to get it to work. For example, the code I am using to plot is:
library(colorspace)
library(ggplot2)
library(ggnewscale)
library(tidyverse)
asd <- expand_grid(paste0("a", 1:9), paste0("b", 1:9))
df <- data.frame(
a = asd$`paste0("a", 1:9)`,
b = asd$`paste0("b", 1:9)`,
c = sample(20, 81, replace = T)
)
# From discrete to continuous
df$a <- match(df$a, sort(unique(df$a)))
df$b <- match(df$b, sort(unique(df$b)))
z <- sample(10, 18, T)
# set color palettes
pal <- rev(diverging_hcl(palette = "Blue-Red", n = 11))
palEdge <- rev(sequential_hcl(palette = "Plasma", n = 11))
# plot
ggplot(df, aes(a, b)) +
geom_tile(aes(fill = c)) +
scale_fill_gradientn(
colors = pal,
guide = guide_colorbar(
frame.colour = "black",
ticks.colour = "black"
),
name = "C"
) +
theme_classic() +
labs(x = "A axis", y = "B axis") +
new_scale_fill() +
geom_tile(data = tibble(a = 1:9,
z = z[1:9]),
aes(x = a, y = 0, fill = z, height = 0.3)) +
geom_tile(data = tibble(b = 1:9,
z = z[10:18]),
aes(x = 0, y = b, fill = z, width = 0.3)) +
scale_fill_gradientn(
colors = palEdge,
guide = guide_colorbar(
frame.colour = "black",
ticks.colour = "black"
),
name = "Z"
)+
coord_cartesian(clip = "off", xlim = c(0.5, NA), ylim = c(0.5, NA)) +
theme(aspect.ratio = 1,
plot.margin = margin(10, 15.5, 25, 25, "pt")
)
This produces something like this:
However, I am trying to find a consistent way to plot something more like this (which I quickly made in photoshop):
The main issue im having is being able to manipulate the coordinates of the new scale 'outside' of the plotting area. Is there a way to move the tiles that are outside so I can position them in an area that makes sense?
There are always the two classic options when plotting outside the plot area:
annotate/ plot with coord_...(clip = "off")
make different plots and combine them.
The latter option usually gives much more flexibility and way less headaches, in my humble opinion.
library(colorspace)
library(tidyverse)
library(patchwork)
asd <- expand_grid(paste0("a", 1:9), paste0("b", 1:9))
df <- data.frame(
a = asd$`paste0("a", 1:9)`,
b = asd$`paste0("b", 1:9)`,
c = sample(20, 81, replace = T)
)
# From discrete to continuous
df$a <- match(df$a, sort(unique(df$a)))
df$b <- match(df$b, sort(unique(df$b)))
z <- sample(10, 18, T)
# set color palettes
pal <- rev(diverging_hcl(palette = "Blue-Red", n = 11))
palEdge <- rev(sequential_hcl(palette = "Plasma", n = 11))
# plot
p_main <- ggplot(df, aes(a, b)) +
geom_tile(aes(fill = c)) +
scale_fill_gradientn("C",colors = pal,
guide = guide_colorbar(frame.colour = "black",
ticks.colour = "black")) +
theme_classic() +
labs(x = "A axis", y = "B axis")
p_bottom <- ggplot() +
geom_tile(data = tibble(a = 1:9, z = z[1:9]),
aes(x = a, y = 0, fill = z, height = 0.3)) +
theme_void() +
scale_fill_gradientn("Z",limits = c(0,10),
colors = palEdge,
guide = guide_colorbar(
frame.colour = "black", ticks.colour = "black"))
p_left <- ggplot() +
theme_void()+
geom_tile(data = tibble(b = 1:9, z = z[10:18]),
aes(x = 0, y = b, fill = z, width = 0.3)) +
scale_fill_gradientn("Z",limits = c(0,10),
colors = palEdge,
guide = guide_colorbar( frame.colour = "black", ticks.colour = "black"))
p_left + p_main +plot_spacer()+ p_bottom +
plot_layout(guides = "collect",
heights = c(1, .1),
widths = c(.1, 1))
Created on 2021-02-21 by the reprex package (v1.0.0)
I would like to plot violin plots where x axis is exon however I want to group the plots. This works if its just violins however when I add in the jitters for some reason its not responding the correct aes and is plotting on its own? Here is a reproducible code with a screen shot of the error. thanks!
set.seed(1)
df <- data.frame(
exons = c(rep("e1", 200), rep("e2", 200)),
values = rnorm(400, 200, 40),
group = c(
rep("g1", 75), rep("g2", 75), rep("g3", 50),
rep("g1", 75), rep("g2", 75), rep("g3", 50)
)
)
ggplot(df, aes(y = values, x = exons, fill = group)) +
geom_violin() +
geom_jitter(shape = 16, position = position_jitter(0.07))
so if the plot works the dots should had been plotted within each of the group for each exon, however here it is clearly not.
You probably want both position_dodge() and position_jitterdodge()
library(ggplot2)
ggplot(df, aes(y = values, x = exons, fill = group)) +
geom_violin(position = position_dodge(width = 0.9)) +
geom_point(position = position_jitterdodge(seed = 1, dodge.width = 0.9))
Another option worth mentioning is geom_quasirandom() function from the ggbeeswarm package
library(ggbeeswarm)
ggplot(df, aes(y = values, x = exons, fill = group)) +
geom_violin(position = position_dodge(width = 0.9)) +
geom_quasirandom(dodge.width = 0.9, varwidth = TRUE)
Created on 2019-08-10 by the reprex package (v0.3.0)
Do you mean something like that?
set.seed ( 1)
df = data.frame (
exons = c(rep("e1", 200), rep("e2", 200))
,values = rnorm(400,200,40)
,group = c(rep("g1", 75), rep("g2", 75), rep("g3",50),
rep("g1", 75), rep("g2", 75), rep("g3",50) )
)
ggplot(df, aes(y= values , x= exons , fill = group )) +
geom_violin()+
geom_jitter(shape=16, position=position_jitter(width = NULL, height = NULL))
You can define the degree of jitter in x and y direction.