ggforce facet_zoom error with ggplot2 on R - r

I have a data.frame in R 4.0.2 with a continuous variable in one column and two possible values of a categorical variable (variable 'type': known or novel) in another, which I use to color them differently (using a palette from ggsci 2.9 package). I represent an histogram (stat_bin) with ggplot2 3.3.2 and I want to use the facet_zoom function of ggforce 0.3.2 to zoom only the data belonging to one of the 'types' (using the option zoom.data, as it is done in the volcano example on http://cran.univ-paris1.fr/web/packages/ggforce/vignettes/Visual_Guide.html#contextual-zoom), however I get this error:
Error: Aesthetics must be either length 1 or the same as the data (2000): x
Reproducible example:
library(ggplot2)
library(ggsci)
library(ggforce)
testdata <- as.data.frame(sort(rnorm(1000)))
testdata$type <- "known"
testdata[501:1000,2] <- "novel"
# Working code
ggplot(testdata) +
stat_bin(aes(x=testdata[,1], fill = type), binwidth = 1, color="white") +
scale_fill_npg() + theme_light() +
facet_zoom(xlim = c(0, 4), ylim = c(0, 300), horizontal = TRUE, zoom.size = 0.3)
# Desired code
ggplot(testdata) +
stat_bin(aes(x=testdata[,1], fill = type), data = cbind(testdata, zoom = FALSE), binwidth = 1, color="white") +
stat_bin(aes(x=testdata[testdata$type == "novel",1]), data = cbind(testdata, zoom = TRUE), binwidth = 0.5) +
scale_fill_npg() + theme_light() +
facet_zoom(xlim = c(0, 4), ylim = c(0, 300), horizontal = TRUE, zoom.size = 0.3, zoom.data = zoom)
Thanks!

The issue is that you pass the whole dataset as data in the second stat_bin. Simply pass the subsetted df instead of trying to subset in aes():
BTW: I also renamed the first variable in your data as x.
library(ggplot2)
library(ggsci)
library(ggforce)
set.seed(42)
testdata <- data.frame(x = sort(rnorm(1000)))
testdata$type <- "known"
testdata[501:1000,2] <- "novel"
# Desired code
ggplot(testdata) +
stat_bin(aes(x = x, fill = type), data = cbind(testdata, zoom = FALSE), binwidth = 1, color="white") +
stat_bin(aes(x = x), data = cbind(testdata[testdata$type == "novel", ], zoom = TRUE), binwidth = 0.5) +
scale_fill_npg() + theme_light() +
facet_zoom(xlim = c(0, 4), ylim = c(0, 300), horizontal = TRUE, zoom.size = 0.3, zoom.data = zoom)

To only show the type == "novel" data in the zoomed plot, try this:
library(tidyverse)
library(ggsci)
library(ggforce)
testdata <- data.frame(values = sort(rnorm(1000)))
testdata$type <- "known"
testdata[501:1000,2] <- "novel"
# Desired code
ggplot(testdata) +
stat_bin(aes(x = values, fill = type),
binwidth = 1, color="white") +
scale_fill_npg() + theme_light() +
facet_zoom(zoom.data = ifelse(type == "novel", NA, FALSE),
xlim = c(0, 4), ylim = c(0, 300),
horizontal = TRUE)

Related

Position stacked identity data sample size as geom_text directly over a bar using geom_bar from ggplot2

In this experiment, we tracked presence or absence of bacterial infection in our subject animals. We were able to isolate which type of bacteria was present in our animals and created a plot that has Week Since Experiment Start on the X axis, and Percentage of Animals Positive for bacterial infection on the Y axis. This is a stacked identity ggplot where each geom_bar contains the different identities of the bacteria that were in the infected animals each week. Here is a sample dataset with the corresponding ggplot code and result:
DummyData <- data.frame(matrix(ncol = 5, nrow = 78))
colnames(DummyData) <- c('WeeksSinceStart','BacteriaType','PositiveOccurences','SampleSize','NewSampleSize')
DummyData$WeeksSinceStart <- c(1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,9,9,9,9,9,10,10,10,10)
DummyData$BacteriaType <- c("BactA","BactB","BactD","BactB","BactE","BactA","BactS","BactF","BactE","BactH","BactJ","BactK","BactE","BactB","BactS","BactF","BactL","BactE","BactW","BactH","BactS","BactJ","BactQ","BactN","BactW","BactA","BactD","BactE","BactA","BactC","BactD","BactK","BactL","BactE","BactD","BactA","BactS","BactK","BactB","BactE","BactF","BactH","BactN","BactE","BactL","BactZ","BactE","BactC","BactR","BactD","BactJ","BactN","BactK","BactW","BactR","BactE","BactW","BactA","BactM","BactG","BactO","BactI","BactE","BactD","BactM","BactH","BactC","BactM","BactW","BactA","BactL","BactB","BactE","BactA","BactS","BactH","BactQ","BactF")
PosOcc <- seq(from = 1, to = 2, by = 1)
DummyData$PositiveOccurences <- rep(PosOcc, times = 13)
DummyData$SampleSize <- c(78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,29,29,29,29,29,10,10,10,10)
DummyData$NewSampleSize <- c(78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,29,NA,NA,NA,NA,10,NA,NA,NA)
numcolor <- 20
plotcolors <- colorRampPalette(brewer.pal(8, "Set3"))(numcolor)
#GGplot for Dummy Data
DummyDataPlot <- ggplot(DummyData, aes(x = WeeksSinceStart, y = PositiveOccurences/SampleSize, fill = BacteriaType)) + geom_bar(position = "stack", stat = "identity") +
geom_text(label = DummyData$NewSampleSize, nudge_y = 0.1) +
scale_y_continuous(limits = c(0,0.6), breaks = seq(0, 1, by = 0.1)) + scale_x_continuous(limits = c(0.5,11), breaks = seq(0,10, by =1)) +
labs(
x = "Weeks Since Start",
y = "Proportion Positive") +
scale_fill_manual(values = plotcolors)
The problem: I cannot seem to find a way to position the labels from geom_text directly over each bar. I would also love to add the text "n = " to the sample size value directly over each bar. Thank you for your help!
I have tried different values for position_dodge statement and nudge_y statement with no success.
Sometimes the easiest approach is to do some data wrangling, i.e. one option would be to create a separate dataframe for your labels:
library(ggplot2)
library(dplyr)
dat_label <- DummyData |>
group_by(WeeksSinceStart) |>
summarise(y = sum(PositiveOccurences / SampleSize), SampleSize = unique(SampleSize))
ggplot(DummyData, aes(x = WeeksSinceStart, y = PositiveOccurences / SampleSize, fill = BacteriaType)) +
geom_bar(position = "stack", stat = "identity") +
geom_text(data = dat_label, aes(x = WeeksSinceStart, y = y, label = SampleSize), inherit.aes = FALSE, nudge_y = .01) +
#scale_y_continuous(limits = c(0, 0.6), breaks = seq(0, 1, by = 0.1)) +
scale_x_continuous(limits = c(0.5, 11), breaks = seq(0, 10, by = 1)) +
labs(
x = "Weeks Since Start",
y = "Proportion Positive"
) +
scale_fill_manual(values = plotcolors)

Plot the legend at the bottom in different way when we deal with discrete color filling in ggplot

Suppose we want to plot this data:
library(ggplot2)
library(sf)
library(raster)
library(colorRamps)
min_lon <- 10
max_lon <- 17
min_lat <- 8
max_lat <- 17
grid_size <- 0.5
lon_grids <- 1 + ((max_lon - min_lon)/grid_size)
lat_grids <- 1 + ((max_lat - min_lat)/grid_size)
points <- data.frame(lon = rep(seq(min_lon, max_lon, grid_size), lat_grids), lat = rep(seq(min_lat, max_lat, grid_size), each = lon_grids))
points$Var <- runif(min= 10, max = 48, 285)
points$value <-cut(points$Var, breaks= seq(10.08, 47.80, length.out = 13), dig.lab = 1)
ggplot() +
coord_sf(xlim = c(min_lon, max_lon), ylim = c(min_lat, max_lat)) +
theme_bw()+
geom_raster(data = points, aes(x = lon, y = lat, fill = value), interpolate = FALSE) +
labs(x="Longitude", y="Latitude")+
scale_fill_manual(values = matlab.like(n = 13), name = "[m]",
labels = sprintf("%.2f", seq(10.08, 47.80, length.out = 13)),
guide = guide_legend(reverse = TRUE))+theme(legend.position = "bottom")
This code produces the following graph:
Two problems I am facing here:
To make it discrete, I used the cut function. I chose the breaks= seq(10.08, 47.80, length.out = 13) arbitrary based on the minimum and maximum values with a random length of 13. Is there any criteria to decide the correct range?
Is there any way to make the legend look like this?
One option would be to use e.g. scale_fill_stepsn with guide_binswhich does not require to manually discretize the variable mapped on fill. Additionally I use a custom function to set the breaks of the legend instead of the default mechanism to set the number of breaks.
set.seed(123)
library(ggplot2)
library(colorRamps)
base <- ggplot() +
coord_sf(xlim = c(min_lon, max_lon), ylim = c(min_lat, max_lat)) +
theme_bw() +
geom_raster(data = points, aes(x = lon, y = lat), interpolate = FALSE) +
labs(x = "Longitude", y = "Latitude") +
theme(legend.position = "bottom")
base +
aes(fill = Var) +
scale_fill_stepsn(colors = matlab.like(n = 13), name = "[m]",
breaks = function(x) seq(x[[1]], x[[2]], length.out = 13),
labels = ~ sprintf("%.0f", .x),
guide = guide_bins(axis = FALSE,
show.limits = TRUE))

How to write point values to a lineplot in R?

I have a dataframe of single column with multiple values. I was using basic rplot function like plot() and points(). I successfully plotted the lineplot but I was unable to write point values from the dataframe onto the plot field. Is there anyway to add data values onto the plot?
Below is the following code for test
> x = data.frame(A = rnorm(10))
> plot(x$A, type = "o", pch = 20)**
Sorry, I made an edit to make my question clearer.
Here below is the example plot for 10 random numbers
Plot lines, then add text:
#data
set.seed(1); x = data.frame(A = rnorm(10))
#base plot
plot(x$A, type = "o", pch = 20, ylim = range(x$A * 1.3))
text(x = seq_along(x$A), y = x$A + 0.3, labels = round(x$A, 2), srt = 90)
Or using ggplot with ggrepel for pretty labels:
#ggplot
library(ggplot2)
library(ggrepel) # pretty labels, avoid overlap:
ggplot(cbind(x = seq_along(x$A), x),
aes(x = x, y = A, label = round(A, 2))) +
geom_line() +
geom_point() +
geom_label_repel()
#geom_text_repel()
Probably this is more than what you are asking, but you can add labels to the values you have in your line plot using ggplot:
library(ggplot2)
x = data.frame(A = rnorm(10),
pos = runif(10, 0.1, 0.7))
ggplot(x) +
geom_point(aes(x = A),
y = 0) +
geom_line(aes(x = A),
y = 0) +
geom_segment(aes(x = A,
xend = A,
y = 0,
yend = pos),
linetype = 2) +
geom_label(aes(x = A,
y = pos,
label = round(A, 2)),
size = 3) +
scale_y_continuous(name = "",
limits = c(0, 0.8)) +
guides(y = "none") +
theme_bw()
You could make a base R "type b" equivalent.
The OP hasn't specified that every y value should be set to zero.
library(ggh4x)
#> Loading required package: ggplot2
set.seed(1)
x = data.frame(A = rnorm(10))
ggplot(x, aes(1:10, A)) +
geom_pointpath(shape = NA) +
geom_text(aes(label = round(A,2))) +
labs(x= "Index")
Created on 2022-05-27 by the reprex package (v2.0.1)

Create additional independent legends in ggplot2

I have been struggling with this for hours now. I have the following script:
library(ggplot2)
sims = replicate(1000, sample(c(0,0,0,0,1,1,1,2,2,2), size=3, replace=FALSE))
df = data.frame(x=colSums(sims == 0),
y=colSums(sims == 1))
df$count <- 1
total_counts = aggregate(count ~ ., df, FUN = sum)
min_count = min(total_counts$count)
max_count = max(total_counts$count)
p = (ggplot(df, aes(x=x, y=y))
+ geom_count(aes(color=..n.., size=..n..), alpha=0.8)
+ guides(color = 'legend', size=FALSE)
+ labs(color='Count')
+ scale_colour_gradient(limits = c(min_count, max_count),
breaks = round(seq(min_count, max_count, length.out=5)),
labels = round(seq(min_count, max_count, length.out=5)))
+ scale_size_continuous(range = c(3, 7.5))
)
So far so good. The problem is that I want to add two additional sets of points:
df2 = data.frame(x=c(0, 1, 2, 3),
y=c(1.5253165, 1.0291262, 0.4529617, 0))
df3 = data.frame(x=c(0, 1, 2, 3),
y=c(1.5, 1, 0.5, 0))
To get something like this:
p2 = (p
+ geom_point(data=df2, aes(x=x, y=y), alpha=0.4, color="red", size = 2.5)
+ geom_point(data=df3, aes(x=x, y=y), alpha=0.4, color="green", size = 2.5)
)
The problem is that I am not being capable of adding these new points to the legend. I would like the legend to be in a different "section". Namely, to have an empty string title (to differentiate these points from "Count" title), and to have strings instead of numbers in their labels ("Simulated means" and "Theoretical means", for example).
Is there any way to achieve this?
A trick I learned from #tjebo is that you can use the ggnewscale package to spawn additional legends. At what point in plot construction you call the new scale is important, so you first want to make a geom/stat layer and add the desired scale. Once these are declared, you can use new_scale_colour() and all subsequent geom/stat layers will use a new colour scale.
library(ggplot2)
#> Warning: package 'ggplot2' was built under R version 4.0.5
library(ggnewscale)
#> Warning: package 'ggnewscale' was built under R version 4.0.3
sims = replicate(1000, sample(c(0,0,0,0,1,1,1,2,2,2), size=3, replace=FALSE))
df = data.frame(x=colSums(sims == 0),
y=colSums(sims == 1))
df$count <- 1
total_counts = aggregate(count ~ ., df, FUN = sum)
min_count = min(total_counts$count)
max_count = max(total_counts$count)
df2 = data.frame(x=c(0, 1, 2, 3),
y=c(1.5253165, 1.0291262, 0.4529617, 0))
df3 = data.frame(x=c(0, 1, 2, 3),
y=c(1.5, 1, 0.5, 0))
ggplot(df, aes(x, y)) +
geom_count(aes(colour = after_stat(n), size = after_stat(n)),
alpha = 0.5) +
scale_colour_gradient(
limits = c(min_count, max_count),
breaks = round(seq(min_count, max_count, length.out = 5)),
labels = round(seq(min_count, max_count, length.out = 5)),
guide = "legend"
) +
new_scale_colour() +
geom_point(aes(colour = "Simulated means"),
data = df2, alpha = 0.4) +
geom_point(aes(colour = "Theoretical means"),
data = df3, alpha = 0.4) +
scale_colour_discrete(
name = ""
) +
scale_size_continuous(range = c(3, 7.5), guide = "none")
Created on 2021-04-22 by the reprex package (v1.0.0)
(P.S. sorry for reformatting your code, it just read more easily for myself this way)

Automatically writing scatterplots in ggplot2 to a folder

I have a large number of variables and would like to create scatterplots comparing all variables to a single variable. I have been able to do this in base R using lapply, but I cannot complete the same task in ggplot2 using lapply.
Below is an example dataset.
df <- data.frame("ID" = 1:16)
df$A <- c(1,2,3,4,5,6,7,8,9,10,11,12,12,14,15,16)
df$B <- c(5,6,7,8,9,10,13,15,14,15,16,17,18,18,19,20)
df$C <- c(11,12,14,16,10,12,14,16,10,12,14,16,10,12,14,16)
I define the variables I would like to generate scatterplots with, using the code below:
df_col_names <- df %>% select(A:C) %>% colnames(.)
Below is how I have been able to successfully complete the task of plotting all variables against variable A, using lapply in base R:
lapply(df_col_names, function(x) {
tiff(filename=sprintf("C:\\Documents\\%s.tiff", x),
width = 1000, height = 1000, res=200)
plot(df$A, df[[x]],
pch=19,
cex = 1.5,
ylab = x,
ylim = c(0, 20),
xlim = c(0, 20))
dev.off()
})
Below is my attempt at completing the task in ggplot2 without any success. It generates the tiff images, although they are empty.
lapply(df_col_names, function(x) {
tiff(filename=sprintf("C:\\Documents\\%s.tiff", x),
width = 1000, height = 1000, res=200)
ggplot(df) +
geom_point(data = df,
aes(x = A, y = df_col_names[[x]], size = 3)) +
geom_smooth(aes(x = A, y = df_col_names[[x]], size = 0), method = "lm", size=0.5) +
coord_fixed(ratio = 1, xlim = c(0, 20), ylim = c(0, 20)) +
guides(size = FALSE, color = FALSE) +
theme_bw(base_size = 14)
dev.off()
})
It works for me with ggsave. Also note that you are passing string column names to ggplot so use .data to refer to actual column values.
library(ggplot2)
lapply(df_col_names, function(x) {
ggplot(df) +
geom_point( aes(x = A, y = .data[[x]], size = 3)) +
geom_smooth(aes(x = A, y = .data[[x]], size = 0), method = "lm", size=0.5) +
coord_fixed(ratio = 1, xlim = c(0, 20), ylim = c(0, 20)) +
guides(size = FALSE, color = FALSE) +
theme_bw(base_size = 14) -> plt
ggsave(sprintf("%s.tiff", x), plt)
})

Resources