Strip plot with many columns - r

How can I replicate a plot like the one below using ggplot?
It is basically a set of strip plots, bound side-by-side. I came across it as an image in a publication, so I don't have the code that made it.
My data source (not plotted) would be something like a list of vectors, similar to those below, each of which can be of a different length.
my_data = list(a=c(1,18,90), b=c(1,5,7,8,80), c=c(1,6), d=c(1,22,35,300))
(That said, using a different data structure would be fine too - e.g. a dataframe with some NA entries)
Thanks for any help!

This seems pretty close:
library(tidyverse)
set.seed(123)
IDs <- letters[1:16]
values <- sample(c(rep(NA, 100000), seq(1e-10, 0, 1e-10)), size = 1600, replace = TRUE)
test_data <- list(IDs = IDs, values = values)
df <- as.data.frame(test_data)
ggplot(df, aes(x = IDs, y = values)) +
geom_point(shape = 95, size = 10) +
geom_vline(xintercept = seq(0.5, 16.5, 1), colour = "grey75") +
theme_classic(base_size = 20) +
scale_x_discrete(expand = c(0.035, 0.035)) +
scale_y_log10(expand = c(0.001, 0.001),
breaks = c(0.0001, 0.001, 0.01, 0.1, 0),
labels = expression(10^-4, 10^-3, 10^-2, 10^-1, 10^0)) +
theme(axis.title = element_blank(),
axis.ticks.x = element_blank(),
panel.border = element_rect(colour = "black", fill = NA, size = 2))
Edit
Here is a more suitable alternative for the data in your example:
library(tidyverse)
my_data = list(a=c(1,18,90), b=c(1,5,7,8,80), c=c(1,6), d=c(1,22,35,300))
df <- stack(my_data)
ggplot(df, aes(x = ind, y = values)) +
geom_errorbarh(aes(xmin = as.numeric(ind) + 0.45,
xmax = as.numeric(ind) - 0.45),
height = 0) +
geom_vline(xintercept = seq(0.5, 16.5, 1), colour = "grey75") +
theme_classic(base_size = 20) +
scale_x_discrete(expand = c(0.125, 0.125)) + # alter these numbers to suit
theme(axis.title = element_blank(),
axis.ticks.x = element_blank(),
panel.border = element_rect(colour = "black", fill = NA, size = 2))
Created on 2021-08-31 by the reprex package (v2.0.1)

Related

Points in ggplot2 not being dodged correctly in R

I am trying to make these plots by dodging points. The points radius differ by size. Ideally, there have to be four points in each "coordinate" as indicated by the dataframe. The didge does not seem to work for two of the points. I tried changing the width to 5 but I can't see the points. Any clue on what's going on?
library(ggplot2)
set.seed(123)
species <- rep(c("A","B","C","D"), each = 5)
x.axis <- rep(c(0.5, 0.5, 1, 1,0.75), each = 4)
y.axis <- rep(c(0.5, 1, 1, 0.5,0.75), each = 4)
value <- c(1,2,10,3,4,5,4,3,2,3,6,5,10,4,5,17,1,10,13,3)
data <- data.frame(specie,value, x.axis, y.axis)
# 2D plots
ggplot(data, aes(x = x.axis, y = y.axis, color = value, size = value)) +
theme_classic() +
geom_point(aes(fill = species, color = species), alpha = 0.7,position = position_dodge(width = 0.05)) +
theme(text=element_text(size=10, family="Arial", color = "black")) +
theme(aspect.ratio = 10/10) +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_rect(colour = "black", size=1)) +
theme(axis.title.y = element_text(margin = margin(t = 0, r = 0, b = 0, l = 0)))
Maybe this is what you are looking for. The way your points get dodged is determined by the grouping. In your case the data by default gets grouped by the variable mapped on the color aes, i.e. species. Hence, at each position or coordinate only points with different species get dodged.
As far as I understand your question you want to dodge all points at the same position or coordinate instead. To this end you could add an id column to your dataset by position, i.e. as we have four points at each position the points are assigned an id running from 1 to 4. This id column could then be mapped on the group aesthetic:
library(dplyr)
library(ggplot2)
data <- data |>
group_by(x.axis, y.axis) |>
mutate(id = row_number())
# 2D plots
ggplot(data, aes(x = x.axis, y = y.axis, size = value)) +
theme_classic() +
geom_point(aes(color = species, group = id), alpha = 0.7, position = position_dodge(width = 0.05)) +
theme(text = element_text(size = 10, family = "Arial", color = "black")) +
theme(aspect.ratio = 10 / 10) +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_rect(colour = "black", size = 1)
) +
theme(axis.title.y = element_text(margin = margin(t = 0, r = 0, b = 0, l = 0)))

ggplot2 Create shaded area with gradient below curve

I would like to create the plot below using ggplot.
Does anyone know of any geom that create the shaded region below the line chart?
Thank you
I think you're just looking for geom_area. However, I thought it might be a useful exercise to see how close we can get to the graph you are trying to produce, using only ggplot:
Pretty close. Here's the code that produced it:
Data
library(ggplot2)
library(lubridate)
# Data points estimated from the plot in the question:
points <- data.frame(x = seq(as.Date("2019-10-01"), length.out = 7, by = "month"),
y = c(2, 2.5, 3.8, 5.4, 6, 8.5, 6.2))
# Interpolate the measured points with a spline to produce a nice curve:
spline_df <- as.data.frame(spline(points$x, points$y, n = 200, method = "nat"))
spline_df$x <- as.Date(spline_df$x, origin = as.Date("1970-01-01"))
spline_df <- spline_df[2:199, ]
# A data frame to produce a gradient effect over the filled area:
grad_df <- data.frame(yintercept = seq(0, 8, length.out = 200),
alpha = seq(0.3, 0, length.out = 200))
Labelling functions
# Turns dates into a format matching the question's x axis
xlabeller <- function(d) paste(toupper(month.abb[month(d)]), year(d), sep = "\n")
# Format the numbers as per the y axis on the OP's graph
ylabeller <- function(d) ifelse(nchar(d) == 1 & d != 0, paste0("0", d), d)
Plot
ggplot(points, aes(x, y)) +
geom_area(data = spline_df, fill = "#80C020", alpha = 0.35) +
geom_hline(data = grad_df, aes(yintercept = yintercept, alpha = alpha),
size = 2.5, colour = "white") +
geom_line(data = spline_df, colour = "#80C020", size = 1.2) +
geom_point(shape = 16, size = 4.5, colour = "#80C020") +
geom_point(shape = 16, size = 2.5, colour = "white") +
geom_hline(aes(yintercept = 2), alpha = 0.02) +
theme_bw() +
theme(panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.minor.y = element_blank(),
panel.border = element_blank(),
axis.line.x = element_line(),
text = element_text(size = 15),
plot.margin = margin(unit(c(20, 20, 20, 20), "pt")),
axis.ticks = element_blank(),
axis.text.y = element_text(margin = margin(0,15,0,0, unit = "pt"))) +
scale_alpha_identity() + labs(x="",y="") +
scale_y_continuous(limits = c(0, 10), breaks = 0:5 * 2, expand = c(0, 0),
labels = ylabeller) +
scale_x_date(breaks = "months", expand = c(0.02, 0), labels = xlabeller)

How to show labels in geom_text that is proportional to geom_bar group variable

I have been trying to output in ggplot a graph that shows labels in percentage value and in proportion to the grouping factor defined in geom_bar. Instead of % values proportionate to the overall population, I would like to output a label value that is proportionate to each sub-group (in this case Place A and Place B) but I have not managed to. See below the reproducible example
Reproducible dataframe
Random<-data.frame(replicate(3,sample(0:3,3024,rep=TRUE)))
Random$Trxn_type <- sample(c("Debit", "Credit"),
size = nrow(Random),
prob = c(0.76, 0.24), replace = TRUE)
Random$YN <- sample(c("Yes", "No"),
size = nrow(Random),
prob = c(0.76, 0.24), replace = TRUE)
Random$Place <- sample(c("PlaceA", "PlaceB"),
size = nrow(Random),
prob = c(0.76, 0.24), replace = TRUE)
Random<-Random[, 4:6]
Then applied the following code
Share<-ggplot(Random, aes(x = YN, fill=Place)) +
scale_fill_brewer(palette="Greens")+
geom_bar(aes(y = ..prop.., group = Place),position = position_dodge()) +
facet_wrap(~ Random$Trxn_type, scales = "free_x", ncol=2)+
theme(strip.text.x = element_text(size = 15, colour = "black"))+
theme(panel.background = element_rect(fill = "white"),legend.position = "bottom")+
scale_y_continuous(labels = percent)+
ylab("Frequency") +
coord_flip()+
xlab("Answers") +
theme(plot.title = element_text(size = 16, face = "bold"),
axis.text=element_text(size=12),
axis.title=element_text(size=12))+
geom_text(aes(y=..prop..,label=scales::percent((..count..)/tapply(..count..,..PANEL..,sum)[..PANEL..])),
stat="count", vjust=-.5, position=position_dodge(.9))
Share
And got the following output
Instead of this percentage distribution I would like to see the % value of replies considering Place A and Place B as two separate populations. Put it more simply I would like the labels to show the % value corresponding to the size of the histogram bars in a way that histograms for Place A in credit to sum up to 100 and histograms for Place B in credit to sum up to 100. The same would apply to debit.
Thanks!
Here is a solution that computes the proportions with dplyr and then pipes the result to ggplot.
I have also put all theme settings in the same call to theme().
I have reposted the data creation code, this time setting the RNG seed in order to make the data example reproducible.
library(dplyr)
library(ggplot2)
Random %>%
count(Trxn_type, YN, Place) %>%
left_join(Random %>% count(Trxn_type, name = "m"), by = "Trxn_type") %>%
mutate(Prop = n/m) %>%
ggplot(aes(x = YN, y = Prop, fill = Place)) +
geom_col(position = position_dodge()) +
geom_text(aes(label = scales::percent(Prop)),
hjust = -0.25,
position = position_dodge(0.9)) +
facet_wrap(~ Trxn_type, scales = "free_x", ncol = 2) +
scale_fill_brewer(palette = "Greens") +
scale_y_continuous(limits = c(0, 1), labels = scales::percent) +
xlab("Answers") +
ylab("Frequency") +
coord_flip() +
theme(panel.background = element_rect(fill = "white"),
legend.position = "bottom",
strip.text.x = element_text(size = 15, colour = "black"),
plot.title = element_text(size = 16, face = "bold"),
axis.text = element_text(size = 12),
axis.title = element_text(size = 12))
Edit.
Following the OP's comment, here is a way to also count by Place. The only change to the code above is the left_join instruction.
left_join(Random %>% count(Trxn_type, Place, name = "m"),
by = c("Trxn_type", "Place")) %>%
Data creation code.
set.seed(1234)
Random <- data.frame(replicate(3,sample(0:3,3024,rep=TRUE)))
Random$Trxn_type <- sample(c("Debit", "Credit"),
size = nrow(Random),
prob = c(0.76, 0.24), replace = TRUE)
Random$YN <- sample(c("Yes", "No"),
size = nrow(Random),
prob = c(0.76, 0.24), replace = TRUE)
Random$Place <- sample(c("PlaceA", "PlaceB"),
size = nrow(Random),
prob = c(0.76, 0.24), replace = TRUE)
Random <- Random[, 4:6]

ggplot2 barplot with dual Y-axis and error bars

I am trying to generate a barplot with dual Y-axis and error bars. I have successfully generated a plot with error bars for one variable but I don't know how to add error bars for another one. My code looks like this. Thanks.
library(ggplot2)
#Data generation
Year <- c(2014, 2015, 2016)
Response <- c(1000, 1100, 1200)
Rate <- c(0.75, 0.42, 0.80)
sd1<- c(75, 100, 180)
sd2<- c(75, 100, 180)
df <- data.frame(Year, Response, Rate,sd1,sd2)
df
# The errorbars overlapped, so use position_dodge to move them horizontally
pd <- position_dodge(0.7) # move them .05 to the left and right
png("test.png", units="in", family="Times", width=2, height=2.5, res=300) #pointsize is font size| increase image size to see the key
ggplot(df) +
geom_bar(aes(x=Year, y=Response),stat="identity", fill="tan1", colour="black")+
geom_errorbar(aes(x=Year, y=Response, ymin=Response-sd1, ymax=Response+sd1),
width=.2, # Width of the error bars
position=pd)+
geom_line(aes(x=Year, y=Rate*max(df$Response)),stat="identity",color = 'red', size = 2)+
geom_point(aes(x=Year, y=Rate*max(df$Response)),stat="identity",color = 'black',size = 3)+
scale_y_continuous(name = "Left Y axis", expand=c(0,0),limits = c(0, 1500),breaks = seq(0, 1500, by=500),sec.axis = sec_axis(~./max(df$Response),name = "Right Y axis"))+
theme(
axis.title.y = element_text(color = "black"),
axis.title.y.right = element_text(color = "blue"))+
theme(
axis.text=element_text(size=6, color = "black",family="Times"),
axis.title=element_text(size=7,face="bold", color = "black"),
plot.title = element_text(color="black", size=5, face="bold.italic",hjust = 0.5,margin=margin(b = 5, unit = "pt")))+
theme(axis.text.x = element_text(angle = 360, hjust = 0.5, vjust = 1.2,color = "black" ))+
theme(axis.line = element_line(size = 0.2, color = "black"),axis.ticks = element_line(colour = "black", size = 0.2))+
theme(axis.ticks.length = unit(0.04, "cm"))+
theme(plot.margin=unit(c(1,0.1,0.1,0.4),"mm"))+
theme(axis.title.y = element_text(margin = margin(t = 0, r = 4, b = 0, l = 0)))+
theme(axis.title.x = element_text(margin = margin(t = 0, r = 4, b = 2, l = 0)))+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank())+
ggtitle("SRG3")+
theme(legend.position="top")+
theme( legend.text=element_text(size=4),
#legend.justification=c(2.5,1),
legend.key = element_rect(size = 1.5),
legend.key.size = unit(0.3, 'lines'),
legend.position=c(0.79, .8), #width and height
legend.direction = "horizontal",
legend.title=element_blank())
dev.off()
and my plot is as follows:
A suggestion for future questions: your example is far from being a minimal reproducible example. All the visuals an the annotations are not related to your problem but render the code overly complex which makes it harder for others to work with it.
The following would be sufficient:
ggplot(df) +
geom_bar(aes(x = Year, y = Response),
stat = "identity", fill = "tan1",
colour = "black") +
geom_errorbar(aes(x = Year, ymin = Response - sd1, ymax = Response + sd1),
width = .2,
position = pd) +
geom_line(aes(x = Year, y = Rate * max(df$Response)),
color = 'red', size = 2) +
geom_point(aes(x = Year, y = Rate * max(df$Response)),
color = 'black', size = 3)
(Notice that I've removed stat = "identity" in all geom_s because this is set by default. Furthermore, y is not a valid aestetic for geom_errorbar() so I omitted that, too.)
Assuming that the additional variable you would like to plot error bars for is Rate * max(df$Response)) and that the relevant standard deviation is sd2, you may simply append
+ geom_errorbar(aes(x = Year, ymin = Rate * max(df$Response) - sd2,
ymax = Rate * max(df$Response) + sd2),
colour = "green",
width = .2)
to the code chunk above. This yields the output below.

Prevent geom_points and their corresponding labels from overlapping

Thanks for the suggested duplicate, this is however not only about the labels, but is also about adjusting the points themselves so they do not overlap.
have a quick look at the plot below...
I need the coloured points, and their corresponding labels, to never overlap. They should be clustered together and all visible, perhaps with some indication that they are spaced and not 100% accurate, perhaps some sort of call out? Open to suggestions on that.
I've tried adding position = 'jitter' to both geom_point and geom_text, but that doesn't seem to be working (assume it is only for small overlaps?)
Ideas?
# TEST DATA
srvc_data <- data.frame(
Key = 1:20,
X = sample(40:80, 20, replace = T),
Y = sample(30:65, 20, replace = T)
)
srvc_data$Z <- with(srvc_data,abs(X-Y))
t1<-theme(
plot.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.line = element_line(size=.4)
)
main_plot <- ggplot(srvc_data, aes(x = X, y = Y),xlim=c(0,100), ylim=c(0,100)) +
t1 +
theme_bw() +
labs(x="X", y="Y") +
scale_x_continuous(limits = c(0, 100)) +
scale_y_continuous(limits = c(0, 100)) +
geom_abline(intercept = 0, slope = 1, colour="blue", size=34, alpha=.1)+
geom_abline(intercept = 0, slope = 1, colour="black", size=.2, alpha=.5,linetype="dashed")+
geom_point(size = 7, aes(color = Z), alpha=.7) +
scale_color_gradient("Gap %\n",low="green", high="red")+
coord_fixed()+
geom_text(aes(label=Key,size=6),show_guide = FALSE)
main_plot
Produces this plot (of course with your random data it will vary)
Thanks in advance.
Here's your plot with ggrepel geom_text_repel:
library(ggrepel)
# TEST DATA
set.seed(42)
srvc_data <- data.frame(
Key = 1:20,
X = sample(40:80, 20, replace = T),
Y = sample(30:65, 20, replace = T)
)
srvc_data$Z <- with(srvc_data,abs(X-Y))
t1<-theme(
plot.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.line = element_line(size=.4)
)
ggplot(srvc_data, aes(x = X, y = Y),xlim=c(0,100), ylim=c(0,100)) +
t1 +
theme_bw() +
labs(x="X", y="Y") +
scale_x_continuous(limits = c(0, 100)) +
scale_y_continuous(limits = c(0, 100)) +
geom_abline(intercept = 0, slope = 1, colour="blue", size=34, alpha=.1)+
geom_abline(intercept = 0, slope = 1, colour="black", size=.2, alpha=.5,linetype="dashed")+
geom_point(size = 7, aes(color = Z), alpha=.7) +
scale_color_gradient("Gap %\n",low="green", high="red")+
coord_fixed()+
geom_text_repel(aes(label=Key,size=6),show_guide = FALSE)

Resources