Create additional independent legends in ggplot2 - r

I have been struggling with this for hours now. I have the following script:
library(ggplot2)
sims = replicate(1000, sample(c(0,0,0,0,1,1,1,2,2,2), size=3, replace=FALSE))
df = data.frame(x=colSums(sims == 0),
y=colSums(sims == 1))
df$count <- 1
total_counts = aggregate(count ~ ., df, FUN = sum)
min_count = min(total_counts$count)
max_count = max(total_counts$count)
p = (ggplot(df, aes(x=x, y=y))
+ geom_count(aes(color=..n.., size=..n..), alpha=0.8)
+ guides(color = 'legend', size=FALSE)
+ labs(color='Count')
+ scale_colour_gradient(limits = c(min_count, max_count),
breaks = round(seq(min_count, max_count, length.out=5)),
labels = round(seq(min_count, max_count, length.out=5)))
+ scale_size_continuous(range = c(3, 7.5))
)
So far so good. The problem is that I want to add two additional sets of points:
df2 = data.frame(x=c(0, 1, 2, 3),
y=c(1.5253165, 1.0291262, 0.4529617, 0))
df3 = data.frame(x=c(0, 1, 2, 3),
y=c(1.5, 1, 0.5, 0))
To get something like this:
p2 = (p
+ geom_point(data=df2, aes(x=x, y=y), alpha=0.4, color="red", size = 2.5)
+ geom_point(data=df3, aes(x=x, y=y), alpha=0.4, color="green", size = 2.5)
)
The problem is that I am not being capable of adding these new points to the legend. I would like the legend to be in a different "section". Namely, to have an empty string title (to differentiate these points from "Count" title), and to have strings instead of numbers in their labels ("Simulated means" and "Theoretical means", for example).
Is there any way to achieve this?

A trick I learned from #tjebo is that you can use the ggnewscale package to spawn additional legends. At what point in plot construction you call the new scale is important, so you first want to make a geom/stat layer and add the desired scale. Once these are declared, you can use new_scale_colour() and all subsequent geom/stat layers will use a new colour scale.
library(ggplot2)
#> Warning: package 'ggplot2' was built under R version 4.0.5
library(ggnewscale)
#> Warning: package 'ggnewscale' was built under R version 4.0.3
sims = replicate(1000, sample(c(0,0,0,0,1,1,1,2,2,2), size=3, replace=FALSE))
df = data.frame(x=colSums(sims == 0),
y=colSums(sims == 1))
df$count <- 1
total_counts = aggregate(count ~ ., df, FUN = sum)
min_count = min(total_counts$count)
max_count = max(total_counts$count)
df2 = data.frame(x=c(0, 1, 2, 3),
y=c(1.5253165, 1.0291262, 0.4529617, 0))
df3 = data.frame(x=c(0, 1, 2, 3),
y=c(1.5, 1, 0.5, 0))
ggplot(df, aes(x, y)) +
geom_count(aes(colour = after_stat(n), size = after_stat(n)),
alpha = 0.5) +
scale_colour_gradient(
limits = c(min_count, max_count),
breaks = round(seq(min_count, max_count, length.out = 5)),
labels = round(seq(min_count, max_count, length.out = 5)),
guide = "legend"
) +
new_scale_colour() +
geom_point(aes(colour = "Simulated means"),
data = df2, alpha = 0.4) +
geom_point(aes(colour = "Theoretical means"),
data = df3, alpha = 0.4) +
scale_colour_discrete(
name = ""
) +
scale_size_continuous(range = c(3, 7.5), guide = "none")
Created on 2021-04-22 by the reprex package (v1.0.0)
(P.S. sorry for reformatting your code, it just read more easily for myself this way)

Related

Position stacked identity data sample size as geom_text directly over a bar using geom_bar from ggplot2

In this experiment, we tracked presence or absence of bacterial infection in our subject animals. We were able to isolate which type of bacteria was present in our animals and created a plot that has Week Since Experiment Start on the X axis, and Percentage of Animals Positive for bacterial infection on the Y axis. This is a stacked identity ggplot where each geom_bar contains the different identities of the bacteria that were in the infected animals each week. Here is a sample dataset with the corresponding ggplot code and result:
DummyData <- data.frame(matrix(ncol = 5, nrow = 78))
colnames(DummyData) <- c('WeeksSinceStart','BacteriaType','PositiveOccurences','SampleSize','NewSampleSize')
DummyData$WeeksSinceStart <- c(1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,9,9,9,9,9,10,10,10,10)
DummyData$BacteriaType <- c("BactA","BactB","BactD","BactB","BactE","BactA","BactS","BactF","BactE","BactH","BactJ","BactK","BactE","BactB","BactS","BactF","BactL","BactE","BactW","BactH","BactS","BactJ","BactQ","BactN","BactW","BactA","BactD","BactE","BactA","BactC","BactD","BactK","BactL","BactE","BactD","BactA","BactS","BactK","BactB","BactE","BactF","BactH","BactN","BactE","BactL","BactZ","BactE","BactC","BactR","BactD","BactJ","BactN","BactK","BactW","BactR","BactE","BactW","BactA","BactM","BactG","BactO","BactI","BactE","BactD","BactM","BactH","BactC","BactM","BactW","BactA","BactL","BactB","BactE","BactA","BactS","BactH","BactQ","BactF")
PosOcc <- seq(from = 1, to = 2, by = 1)
DummyData$PositiveOccurences <- rep(PosOcc, times = 13)
DummyData$SampleSize <- c(78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,29,29,29,29,29,10,10,10,10)
DummyData$NewSampleSize <- c(78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,29,NA,NA,NA,NA,10,NA,NA,NA)
numcolor <- 20
plotcolors <- colorRampPalette(brewer.pal(8, "Set3"))(numcolor)
#GGplot for Dummy Data
DummyDataPlot <- ggplot(DummyData, aes(x = WeeksSinceStart, y = PositiveOccurences/SampleSize, fill = BacteriaType)) + geom_bar(position = "stack", stat = "identity") +
geom_text(label = DummyData$NewSampleSize, nudge_y = 0.1) +
scale_y_continuous(limits = c(0,0.6), breaks = seq(0, 1, by = 0.1)) + scale_x_continuous(limits = c(0.5,11), breaks = seq(0,10, by =1)) +
labs(
x = "Weeks Since Start",
y = "Proportion Positive") +
scale_fill_manual(values = plotcolors)
The problem: I cannot seem to find a way to position the labels from geom_text directly over each bar. I would also love to add the text "n = " to the sample size value directly over each bar. Thank you for your help!
I have tried different values for position_dodge statement and nudge_y statement with no success.
Sometimes the easiest approach is to do some data wrangling, i.e. one option would be to create a separate dataframe for your labels:
library(ggplot2)
library(dplyr)
dat_label <- DummyData |>
group_by(WeeksSinceStart) |>
summarise(y = sum(PositiveOccurences / SampleSize), SampleSize = unique(SampleSize))
ggplot(DummyData, aes(x = WeeksSinceStart, y = PositiveOccurences / SampleSize, fill = BacteriaType)) +
geom_bar(position = "stack", stat = "identity") +
geom_text(data = dat_label, aes(x = WeeksSinceStart, y = y, label = SampleSize), inherit.aes = FALSE, nudge_y = .01) +
#scale_y_continuous(limits = c(0, 0.6), breaks = seq(0, 1, by = 0.1)) +
scale_x_continuous(limits = c(0.5, 11), breaks = seq(0, 10, by = 1)) +
labs(
x = "Weeks Since Start",
y = "Proportion Positive"
) +
scale_fill_manual(values = plotcolors)

Automatically writing scatterplots in ggplot2 to a folder

I have a large number of variables and would like to create scatterplots comparing all variables to a single variable. I have been able to do this in base R using lapply, but I cannot complete the same task in ggplot2 using lapply.
Below is an example dataset.
df <- data.frame("ID" = 1:16)
df$A <- c(1,2,3,4,5,6,7,8,9,10,11,12,12,14,15,16)
df$B <- c(5,6,7,8,9,10,13,15,14,15,16,17,18,18,19,20)
df$C <- c(11,12,14,16,10,12,14,16,10,12,14,16,10,12,14,16)
I define the variables I would like to generate scatterplots with, using the code below:
df_col_names <- df %>% select(A:C) %>% colnames(.)
Below is how I have been able to successfully complete the task of plotting all variables against variable A, using lapply in base R:
lapply(df_col_names, function(x) {
tiff(filename=sprintf("C:\\Documents\\%s.tiff", x),
width = 1000, height = 1000, res=200)
plot(df$A, df[[x]],
pch=19,
cex = 1.5,
ylab = x,
ylim = c(0, 20),
xlim = c(0, 20))
dev.off()
})
Below is my attempt at completing the task in ggplot2 without any success. It generates the tiff images, although they are empty.
lapply(df_col_names, function(x) {
tiff(filename=sprintf("C:\\Documents\\%s.tiff", x),
width = 1000, height = 1000, res=200)
ggplot(df) +
geom_point(data = df,
aes(x = A, y = df_col_names[[x]], size = 3)) +
geom_smooth(aes(x = A, y = df_col_names[[x]], size = 0), method = "lm", size=0.5) +
coord_fixed(ratio = 1, xlim = c(0, 20), ylim = c(0, 20)) +
guides(size = FALSE, color = FALSE) +
theme_bw(base_size = 14)
dev.off()
})
It works for me with ggsave. Also note that you are passing string column names to ggplot so use .data to refer to actual column values.
library(ggplot2)
lapply(df_col_names, function(x) {
ggplot(df) +
geom_point( aes(x = A, y = .data[[x]], size = 3)) +
geom_smooth(aes(x = A, y = .data[[x]], size = 0), method = "lm", size=0.5) +
coord_fixed(ratio = 1, xlim = c(0, 20), ylim = c(0, 20)) +
guides(size = FALSE, color = FALSE) +
theme_bw(base_size = 14) -> plt
ggsave(sprintf("%s.tiff", x), plt)
})

R, ggplot2: How to plot bezier curves that pass through fixed coordinates?

I am helping someone translate hand-drawn economics supply and demand functions into image files that can be included in a Word document. These have been going well using Hmisc::bezier and geom_path modeled after Andrew Heiss's recon plots and using his curve_intersect function. That is, until the author asked that one of the supply curves should pass through a specified set of coordinates. The Hmisc::bezier function only uses the first and last control point as absolute, and bends toward intermediate points so the specified intersection point does not match the curve. I tried creating a spline of 2 bezier curves with the bezier function from the bezier package (v1.1.2, https://cran.r-project.org/web/packages/bezier/bezier.pdf), but this fails with "Error in FUN(X[[i]], ...) : object 'x' not found", which I do not understand or know how to fix.
Please let me know where I am going wrong or if there is a better method! I will include the commented out attempts using various functions. Please excuse the amateurish code, as I am a relative newb at R and ggplot2.
This section not directly relevant to my question
# Graph figures for physical economics, negative oil prices paper
library(reconPlots)
library(dplyr)
library(ggplot2)
library(patchwork)
library(ggrepel)
library(bezier)
library(ggforce)
options(ggrepel.max.time = 1)
options(ggrepel.max.iter = 20000)
#Set seed value for ggrepel
set.seed(52)
# panel (a)
#Set values of curves using the bezier function, each pair of c() values
# is an xy coordinate, and the sets of coordinates control the shape of the
# curve
supply <- Hmisc::bezier(c(1, 5, 6), c(3, 4, 9)) %>%
as_data_frame()
demand <- Hmisc::bezier(c(0, 9, 9), c(6, 6, 6)) %>%
as_data_frame()
label_height <- Hmisc::bezier(c(0, 9, 9), c(8, 8, 8)) %>%
as_data_frame()
# Calculate the intersections of the two curves
intersections <- bind_rows(curve_intersect(supply, demand))
# Calculate point where the curve label(s) intersect a specified height
supply_label <- bind_rows(curve_intersect(supply, label_height))
labels <- data_frame(label = expression("PS"[CR]^DRL),
x = supply_label$x,
y = supply_label$y)
production <- ggplot(mapping = aes(x = x, y = y)) +
#Draw the supply curve. Demand is not drawn in this figure, but the
# intersections of an imaginary demand curve are used to illustrate P0
# and Q0, the intersection point, and the dotted lines
geom_path(data = supply, color = "#0073D9", size = 1) +
geom_segment(data = intersections,
aes(x = x, y = 0, xend = x, yend = y), lty = "dotted") +
geom_segment(data = intersections,
aes(x = 0, y = y, xend = x, yend = y), lty = "dotted") +
#Draw the supply curve label using the intersection calculated above, using
# GGrepel so that the labels do not overlap the curve line
geom_text_repel(data = labels
,aes(x = x, y = y, label = label)
,parse = TRUE
,direction = "x"
,force = 3
,force_pull = 0.1
,hjust = 0
,min.segment.length = 0
) +
#Draw the intersection point based on intersection function between supply
# and the phantom flat demand curve at height y=6
geom_point(data = intersections, size = 3) +
#Use scale functions to set y-axis label, axis intersection point labels,
# and limits of the viewing area
scale_x_continuous(expand = c(0, 0), breaks = intersections$x
,labels = expression(Q[CR]^{DRL-PS})
,limits=c(0,9)
) +
scale_y_continuous(expand = c(0, 0), breaks = c(intersections$y, 9)
,labels = c(expression(P[CR]==frac("$",brl))
,expression(P[CR]))
,limits=c(0,9)
) +
#Use labs function to set x-axis title and title of each graph using the
# caption function so that it displays on the bottom
labs(x = expression(frac(Barrels,Week)),
caption = expression(atop("(a) Driller Production Supply", "of Crude Oil"))
) +
#Set classic theme, x-axis title on right-hand side using larger font of
# relative size 1.2, graph title on left-hand side using same larger font
theme_classic() +
theme(axis.title.y = element_blank(),
axis.title.x = element_text(hjust = 1),
axis.text = element_text(size=rel(1.2)),
plot.caption = element_text(hjust = 0.5, size=rel(1.2))
) +
coord_equal()
# Save the intersections so we can set the same quantity, price for panel (c)
specified_intersections = intersections
# Panel (b)
supply <- Hmisc::bezier(c(3.99, 4), c(0, 9)) %>%
as_data_frame()
demand <- Hmisc::bezier(c(2, 3, 4, 5), c(9, 6.5, 6, 5.5)) %>%
as_data_frame()
demand_capacity <- Hmisc::bezier(c(5, 5), c(0, 5.5)) %>%
as_data_frame()
supply_capacity <- Hmisc::bezier(c(4.999, 5), c(0, 9)) %>%
as_data_frame()
supply_label_height <- Hmisc::bezier(c(0, 9), c(9, 9)) %>%
as_data_frame()
demand_label_height <- Hmisc::bezier(c(0, 9), c(8, 8)) %>%
as_data_frame()
capacity_label_height <- Hmisc::bezier(c(0, 9), c(9, 9)) %>%
as_data_frame()
# Calculate the intersections of the two curves
intersections <- bind_rows(curve_intersect(supply,
demand))
supply_label <- bind_rows(curve_intersect(supply
,supply_label_height))
demand_label <- bind_rows(curve_intersect(demand
,demand_label_height))
capacity_label <- bind_rows(curve_intersect(supply_capacity
,capacity_label_height))
labels <- data_frame(label = c(expression("OD"[CR]^DRL),expression("OS"[CR]^DRL)
,expression("Q"[CR]^CAP)
),
x = c(demand_label$x, supply_label$x
, capacity_label$x
),
y = c(demand_label$y, supply_label$y
, capacity_label$y
)
)
inventory <- ggplot(mapping = aes(x = x, y = y)) +
geom_path(data = supply, color = "#0073D9", size = 1) +
geom_path(data = demand, color = "#FF4036", size = 1) +
geom_path(data = demand_capacity, color = "#FF4036", size = 1) +
geom_path(data = supply_capacity, color = "#0073D9", size = 1, lty = "dashed") +
geom_segment(data = intersections,
aes(x = 0, y = y, xend = x, yend = y), lty = "dotted") +
geom_text_repel(data = labels
,aes(x = x, y = y, label = label)
,parse = TRUE
,direction = "x"
,force = 3
,force_pull = 0.1
,hjust = c(0, 0, 1)
,min.segment.length = 0
) +
geom_point(data = intersections, size = 3) +
scale_x_continuous(expand = c(0, 0), breaks = c(intersections$x
, 5),
labels = c(expression(paste(Q[CR]^{DRL-OS},phantom(12345)))
,expression(Q[CR]^CAP)
)
, limits=c(0,9)) +
scale_y_continuous(expand = c(0, 0), breaks = c(intersections$y, 9),
labels = c(expression(P[CR]),expression(P[CR]))
, limits=c(0,9)) +
labs(x = "Barrels",
caption = expression(atop("(b) Driller Storage / Ownership", "of Crude Oil"))
) +
theme_classic() +
theme(axis.title.y = element_blank(),
axis.title.x = element_text(hjust = 1),
axis.text = element_text(size=rel(1.2)),
plot.caption = element_text(hjust = 0.5, size=rel(1.2))
) +
coord_equal()
Relevant section
# panel (c)
# ggforce package method
#supply <- list(c(1, 4, specified_intersections$x, 5, 7),
# c(3, 4, specified_intersections$y, 7, 9)) %>%
# as_data_frame()
# bezier package method: Fails with "Error in FUN(X[[i]], ...) : object 'x' not found"
t <- seq(0, 2, length=10)
p <- list(c(1, 4, specified_intersections$x, 7, 8),
c(3, 4, specified_intersections$y, 6, 9))
#p <- matrix(c(1,3, 4,4, specified_intersections$x,specified_intersections$y,
# 7,6, 8,9), nrow=5, ncol=2, byrow=TRUE)
supply <- bezier(t=t, p=p) %>%
as_data_frame()
# Original: Fails because it does not pass through the specified intersection
#supply <- Hmisc::bezier(c(1, specified_intersections$x, 8),
# c(3, specified_intersections$y, 9)) %>%
# as_data_frame()
# Hmisc method: Fails because there is no way to get the two curves to appear
# contiguous
#supply1 <- Hmisc::bezier(c(1, 4, specified_intersections$x),
# c(3, 4, specified_intersections$y)) %>%
# as_data_frame()
#supply2 <- Hmisc::bezier(c(specified_intersections$x, 6, 7),
# c(specified_intersections$y, 8, 9)) %>%
# as_data_frame()
#demand <- Hmisc::bezier(c(0, 9), c(specified_intersections$y, specified_intersections$y)) %>%
# as_data_frame()
label_height <- Hmisc::bezier(c(0, 9), c(8, 8)) %>%
as_data_frame()
# Calculate the intersections of the two curves
#intersections <- bind_rows(curve_intersect(supply, demand))
#supply_label <- bind_rows(curve_intersect(supply,
# label_height))
#labels <- data_frame(label = expression("SS"[CR]^DRL),
# x = supply_label$x,
# y = supply_label$y)
sales <- ggplot(mapping = aes(x = x, y = y)) +
# ggforce package method
# geom_bspline(data = supply, color = "#0073D9", size = 1) +
# Original geom_path method
geom_path(data = supply, color = "#0073D9", size = 1) +
# Supply 1 and 2 for Hmisc method
# geom_path(data = supply1, color = "#0073D9", size = 1) +
# geom_path(data = supply2, color = "#0073D9", size = 1) +
geom_segment(data = specified_intersections,
aes(x = x, y = 0, xend = x, yend = y), lty = "dotted") +
geom_segment(data = specified_intersections,
aes(x = 0, y = y, xend = x, yend = y), lty = "dotted") +
# geom_text_repel(data = labels
# ,aes(x = x, y = y, label = label)
# ,parse = TRUE
# ,direction = "x"
# ,force = 3
# ,force_pull = 0.1
# ,hjust = 0
# ,min.segment.length = 0
# ) +
geom_point(data = specified_intersections, size = 3) +
scale_x_continuous(expand = c(0, 0), breaks = specified_intersections$x,
labels = expression(Q[CR]^{DRL-SS}), limits=c(0,9)) +
scale_y_continuous(expand = c(0, 0), breaks = c(specified_intersections$y, 9),
labels = c(expression(P[CR]),expression(P[CR]))) +
labs(x = expression(frac(Barrels,Week)),
caption = expression(atop("(c) Driller Sales Supply", "of Crude Oil"))
) +
theme_classic() +
theme(axis.title.y = element_blank(),
axis.title.x = element_text(hjust = 1),
axis.text = element_text(size=rel(1.2)),
plot.caption = element_text(hjust = 0.5, size=rel(1.2))
) +
coord_equal()
patchwork <- (production | inventory | sales)
patchwork
Graphs before implementation of fixed coordinates. Need to move panel (c) intersection point to match panel (a)
I solved the "Error in FUN(X[[i]], ...) : object 'x' not found" by printing the supply variable and noticing that the bezier function names its rows V1,V2 and not x,y. I needed to set the aesthetics of the geom_path to the correct mapping.
Relevant Section, trimmed to only the bezier method
# panel (c)
# bezier package method
t <- seq(0, 2, length = 100)
p <- matrix(c(1,3, 4,4, specified_intersections$x,specified_intersections$y,
7,6, 8,9), nrow=5, ncol=2, byrow=TRUE)
supply <- bezier::bezier(t=t, p=p, deg=2) %>%
as_data_frame()
sales <- ggplot(mapping = aes(x = x, y = y)) +
# Original geom_path method
geom_path(data = supply, mapping = aes(x = V1, y = V2),
color = "#0073D9", size = 1, inherit.aes = FALSE) +
geom_segment(data = specified_intersections,
aes(x = x, y = 0, xend = x, yend = y), lty = "dotted") +
geom_segment(data = specified_intersections,
aes(x = 0, y = y, xend = x, yend = y), lty = "dotted") +
geom_point(data = specified_intersections, size = 3) +
scale_x_continuous(expand = c(0, 0), breaks = specified_intersections$x,
labels = expression(Q[CR]^{DRL-SS}), limits=c(0,9)) +
scale_y_continuous(expand = c(0, 0), breaks = c(specified_intersections$y, 9),
labels = c(expression(P[CR]),expression(P[CR]))) +
labs(x = expression(frac(Barrels,Week)),
caption = expression(atop("(c) Driller Sales Supply", "of Crude Oil"))
) +
theme_classic() +
theme(axis.title.y = element_blank(),
axis.title.x = element_text(hjust = 1),
axis.text = element_text(size=rel(1.2)),
plot.caption = element_text(hjust = 0.5, size=rel(1.2))
) +
coord_equal()
patchwork <- (production | inventory | sales)
patchwork
This does not solve my larger problem of needing a smooth curve that passes through a specified set of coordinates, as it produces two bezier curves that do not match.
I will do some research on using functions to specify bezier curves and find out if there is some mathematical or programmatic way to specify a bezier curve that passes through a set of fixed coordinates. If I find one, I'll edit this answer.
If anyone knows how to accomplish this, I would appreciate any help!
Kinked bezier curves

ggforce facet_zoom error with ggplot2 on R

I have a data.frame in R 4.0.2 with a continuous variable in one column and two possible values of a categorical variable (variable 'type': known or novel) in another, which I use to color them differently (using a palette from ggsci 2.9 package). I represent an histogram (stat_bin) with ggplot2 3.3.2 and I want to use the facet_zoom function of ggforce 0.3.2 to zoom only the data belonging to one of the 'types' (using the option zoom.data, as it is done in the volcano example on http://cran.univ-paris1.fr/web/packages/ggforce/vignettes/Visual_Guide.html#contextual-zoom), however I get this error:
Error: Aesthetics must be either length 1 or the same as the data (2000): x
Reproducible example:
library(ggplot2)
library(ggsci)
library(ggforce)
testdata <- as.data.frame(sort(rnorm(1000)))
testdata$type <- "known"
testdata[501:1000,2] <- "novel"
# Working code
ggplot(testdata) +
stat_bin(aes(x=testdata[,1], fill = type), binwidth = 1, color="white") +
scale_fill_npg() + theme_light() +
facet_zoom(xlim = c(0, 4), ylim = c(0, 300), horizontal = TRUE, zoom.size = 0.3)
# Desired code
ggplot(testdata) +
stat_bin(aes(x=testdata[,1], fill = type), data = cbind(testdata, zoom = FALSE), binwidth = 1, color="white") +
stat_bin(aes(x=testdata[testdata$type == "novel",1]), data = cbind(testdata, zoom = TRUE), binwidth = 0.5) +
scale_fill_npg() + theme_light() +
facet_zoom(xlim = c(0, 4), ylim = c(0, 300), horizontal = TRUE, zoom.size = 0.3, zoom.data = zoom)
Thanks!
The issue is that you pass the whole dataset as data in the second stat_bin. Simply pass the subsetted df instead of trying to subset in aes():
BTW: I also renamed the first variable in your data as x.
library(ggplot2)
library(ggsci)
library(ggforce)
set.seed(42)
testdata <- data.frame(x = sort(rnorm(1000)))
testdata$type <- "known"
testdata[501:1000,2] <- "novel"
# Desired code
ggplot(testdata) +
stat_bin(aes(x = x, fill = type), data = cbind(testdata, zoom = FALSE), binwidth = 1, color="white") +
stat_bin(aes(x = x), data = cbind(testdata[testdata$type == "novel", ], zoom = TRUE), binwidth = 0.5) +
scale_fill_npg() + theme_light() +
facet_zoom(xlim = c(0, 4), ylim = c(0, 300), horizontal = TRUE, zoom.size = 0.3, zoom.data = zoom)
To only show the type == "novel" data in the zoomed plot, try this:
library(tidyverse)
library(ggsci)
library(ggforce)
testdata <- data.frame(values = sort(rnorm(1000)))
testdata$type <- "known"
testdata[501:1000,2] <- "novel"
# Desired code
ggplot(testdata) +
stat_bin(aes(x = values, fill = type),
binwidth = 1, color="white") +
scale_fill_npg() + theme_light() +
facet_zoom(zoom.data = ifelse(type == "novel", NA, FALSE),
xlim = c(0, 4), ylim = c(0, 300),
horizontal = TRUE)

how to draw two half circles in ggplot in r

How can I make a plot like this with two different-sized half circles (or other shapes such as triangles etc.)?
I've looked into a few options: Another post suggested using some unicode symbol, that didn't work for me. And if I use a vector image, how can I properly adjust the size parameter so the 2 circles touch each other?
Sample data (I would like to make the size of the two half-circles equal to circle1size and circle2size):
df = data.frame(circle1size = c(1, 3, 2),
circle2size = c(3, 6, 5),
middlepointposition = c(1, 2, 3))
And ultimately is there a way to position the half-circles at different y-values too, to encode a 3rd dimension, like so?
Any advice is much appreciated.
What you're asking for is a bar plot in polar coordinates. This can be done easily in ggplot2. Note that we need to map y = sqrt(count) to get the area of the half circle proportional to the count.
df <- data.frame(x = c(1, 2),
type = c("Investors", "Assignees"),
count = c(19419, 1132))
ggplot(df, aes(x = x, y = sqrt(count), fill = type)) + geom_col(width = 1) +
scale_x_discrete(expand = c(0,0), limits = c(0.5, 2.5)) +
coord_polar(theta = "x", direction = -1)
Further styling would have to be applied to remove the gray background, remove the axes, change the color, etc., but that's all standard ggplot2.
Update 1: Improved version with multiple countries.
df <- data.frame(x = rep(c(1, 2), 3),
type = rep(c("Investors", "Assignees"), 3),
country = rep(c("Japan", "Germany", "Korea"), each = 2),
count = c(19419, 1132, 8138, 947, 8349, 436))
df$country <- factor(df$country, levels = c("Japan", "Germany", "Korea"))
ggplot(df, aes(x=x, y=sqrt(count), fill=type)) + geom_col(width =1) +
scale_x_continuous(expand = c(0, 0), limits = c(0.5, 2.5)) +
scale_y_continuous(expand = c(0, 0)) +
coord_polar(theta = "x", direction = -1) +
facet_wrap(~country) +
theme_void()
Update 2: Drawing the individual plots at different locations.
We can do some trickery to take the individual plots and plot them at different locations in an enclosing plot. This works, and is a generic method that can be done with any sort of plot, but it's probably overkill here. Anyways, here is the solution.
library(tidyverse) # for map
library(cowplot) # for draw_text, draw_plot, get_legend, insert_yaxis_grob
# data frame of country data
df <- data.frame(x = rep(c(1, 2), 3),
type = rep(c("Investors", "Assignees"), 3),
country = rep(c("Japan", "Germany", "Korea"), each = 2),
count = c(19419, 1132, 8138, 947, 8349, 436))
# list of coordinates
coord_list = list(Japan = c(1, 3), Germany = c(2, 1), Korea = c(3, 2))
# make list of individual plots
split(df, df$country) %>%
map( ~ ggplot(., aes(x=x, y=sqrt(count), fill=type)) + geom_col(width =1) +
scale_x_continuous(expand = c(0, 0), limits = c(0.5, 2.5)) +
scale_y_continuous(expand = c(0, 0), limits = c(0, 160)) +
draw_text(.$country[1], 1, 160, vjust = 0) +
coord_polar(theta = "x", start = 3*pi/2) +
guides(fill = guide_legend(title = "Type", reverse = T)) +
theme_void() + theme(legend.position = "none") ) -> plotlist
# extract the legend
legend <- get_legend(plotlist[[1]] + theme(legend.position = "right"))
# now plot the plots where we want them
width = 1.3
height = 1.3
p <- ggplot() + scale_x_continuous(limits = c(0.5, 3.5)) + scale_y_continuous(limits = c(0.5, 3.5))
for (country in names(coord_list)) {
p <- p + draw_plot(plotlist[[country]], x = coord_list[[country]][1]-width/2,
y = coord_list[[country]][2]-height/2,
width = width, height = height)
}
# plot without legend
p
# plot with legend
ggdraw(insert_yaxis_grob(p, legend))
Update 3: Completely different approach, using geom_arc_bar() from the ggforce package.
library(ggforce)
df <- data.frame(start = rep(c(-pi/2, pi/2), 3),
type = rep(c("Investors", "Assignees"), 3),
country = rep(c("Japan", "Germany", "Korea"), each = 2),
x = rep(c(1, 2, 3), each = 2),
y = rep(c(3, 1, 2), each = 2),
count = c(19419, 1132, 8138, 947, 8349, 436))
r <- 0.5
scale <- r/max(sqrt(df$count))
ggplot(df) +
geom_arc_bar(aes(x0 = x, y0 = y, r0 = 0, r = sqrt(count)*scale,
start = start, end = start + pi, fill = type),
color = "white") +
geom_text(data = df[c(1, 3, 5), ],
aes(label = country, x = x, y = y + scale*sqrt(count) + .05),
size =11/.pt, vjust = 0)+
guides(fill = guide_legend(title = "Type", reverse = T)) +
xlab("x axis") + ylab("y axis") +
coord_fixed() +
theme_bw()
If you don't need to have ggplot2 map aesthetics other than x and y you could try egg::geom_custom,
# devtools::install_github("baptiste/egg")
library(egg)
library(grid)
library(ggplot2)
d = data.frame(r1= c(1,3,2), r2=c(3,6,5), x=1:3, y=1:3)
gl <- Map(mushroomGrob, r1=d$r1, r2=d$r2, gp=list(gpar(fill=c("bisque","maroon"), col="white")))
d$grobs <- I(gl)
ggplot(d, aes(x,y)) +
geom_custom(aes(data=grobs), grob_fun=I) +
theme_minimal()
with the following grob,
mushroomGrob <- function(x=0.5, y=0.5, r1=0.2, r2=0.1, scale = 0.01, angle=0, gp=gpar()){
grob(x=x,y=y,r1=r1,r2=r2, scale=scale, angle=angle, gp=gp , cl="mushroom")
}
preDrawDetails.mushroom <- function(x){
pushViewport(viewport(x=x$x,y=x$y))
}
postDrawDetails.mushroom<- function(x){
upViewport()
}
drawDetails.mushroom <- function(x, recording=FALSE, ...){
th2 <- seq(0,pi, length=180)
th1 <- th2 + pi
d1 <- x$r1*x$scale*cbind(cos(th1+x$angle*pi/180),sin(th1+x$angle*pi/180))
d2 <- x$r2*x$scale*cbind(cos(th2+x$angle*pi/180),sin(th2+x$angle*pi/180))
grid.polygon(unit(c(d1[,1],d2[,1]), "snpc")+unit(0.5,"npc"),
unit(c(d1[,2],d2[,2]), "snpc")+unit(0.5,"npc"),
id=rep(1:2, each=length(th1)), gp=x$gp)
}
# grid.newpage()
# grid.draw(mushroomGrob(gp=gpar(fill=c("bisque","maroon"), col=NA)))

Resources