ggplot two side by side graphs with the same scale - r

I'm trying to create two side by side graphs to compare the values (one absolute values and one proportions). I managed to create some simple graphs, but I cannot figure out if I have to wrap them or use a grid? I just keep getting errors.
My data looks something like this:
recent_quarter <- c(12, 15, 2, 3)
all_data <- c(218, 323, 34, 12)
recent_perc <- c(38,47,6,9)
all_perc <- c(37,55,4,5)
gender <- factor(c("M", "F", "Unknown", "Other"),
levels = c("M", "F", "Unknown", "Other"))
df <- data.frame(gender, all_data, recent_quarter, all_perc,
recent_perc, all_data)
Then I created a simple plot
ggplot(df, aes(x = gender, y = recent_perc)) +
geom_col(fill = "gray70") +
theme_minimal()
For this one, I'd like to add a second plot with the all_perc as the y axis. I'm stumped on how to do this.

You could:
g1 <- ggplot(df, aes(x = gender, y = recent_perc)) +
geom_col(fill = "gray70") +
theme_minimal()
g2 <- g1 + aes(y=all_perc)
cowplot::plot_grid(g1,g2)
gridExtra (as referenced in #Josh's answer) and patchwork are two other ways to do the grid assembly.
Or:
library(tidyverse)
df <- data.frame(gender, all_data, recent_quarter, all_perc, all_data, recent_perc)
df_long <- df %>%
select(gender, ends_with("perc")) %>%
pivot_longer(-gender) ## creates 'name', 'value' columns
ggplot(df_long, aes(gender, value)) + geom_col() +
facet_wrap(~name)

install the package gridExtra and use:
grid.arrange(
ggplot(df, aes(x = gender, y = recent_perc)) +
geom_col(fill = "gray70") +
theme_minimal(),
ggplot(df, aes(x = gender, y = all_perc)) +
geom_col(fill = "gray70") +
theme_minimal(),
ncol = 2)

Related

How can I change the size of a bar in a grouped bar chart when one group has no data? [duplicate]

Is there a way to set a constant width for geom_bar() in the event of missing data in the time series example below? I've tried setting width in aes() with no luck. Compare May '11 to June '11 width of bars in the plot below the code example.
colours <- c("#FF0000", "#33CC33", "#CCCCCC", "#FFA500", "#000000" )
iris$Month <- rep(seq(from=as.Date("2011-01-01"), to=as.Date("2011-10-01"), by="month"), 15)
colours <- c("#FF0000", "#33CC33", "#CCCCCC", "#FFA500", "#000000" )
iris$Month <- rep(seq(from=as.Date("2011-01-01"), to=as.Date("2011-10-01"), by="month"), 15)
d<-aggregate(iris$Sepal.Length, by=list(iris$Month, iris$Species), sum)
d$quota<-seq(from=2000, to=60000, by=2000)
colnames(d) <- c("Month", "Species", "Sepal.Width", "Quota")
d$Sepal.Width<-d$Sepal.Width * 1000
g1 <- ggplot(data=d, aes(x=Month, y=Quota, color="Quota")) + geom_line(size=1)
g1 + geom_bar(data=d[c(-1:-5),], aes(x=Month, y=Sepal.Width, width=10, group=Species, fill=Species), stat="identity", position="dodge") + scale_fill_manual(values=colours)
Some new options for position_dodge() and the new position_dodge2(), introduced in ggplot2 3.0.0 can help.
You can use preserve = "single" in position_dodge() to base the widths off a single element, so the widths of all bars will be the same.
ggplot(data = d, aes(x = Month, y = Quota, color = "Quota")) +
geom_line(size = 1) +
geom_col(data = d[c(-1:-5),], aes(y = Sepal.Width, fill = Species),
position = position_dodge(preserve = "single") ) +
scale_fill_manual(values = colours)
Using position_dodge2() changes the way things are centered, centering each set of bars at each x axis location. It has some padding built in, so use padding = 0 to remove.
ggplot(data = d, aes(x = Month, y = Quota, color = "Quota")) +
geom_line(size = 1) +
geom_col(data = d[c(-1:-5),], aes(y = Sepal.Width, fill = Species),
position = position_dodge2(preserve = "single", padding = 0) ) +
scale_fill_manual(values = colours)
The easiest way is to supplement your data set so that every combination is present, even if it has NA as its value. Taking a simpler example (as yours has a lot of unneeded features):
dat <- data.frame(a=rep(LETTERS[1:3],3),
b=rep(letters[1:3],each=3),
v=1:9)[-2,]
ggplot(dat, aes(x=a, y=v, colour=b)) +
geom_bar(aes(fill=b), stat="identity", position="dodge")
This shows the behavior you are trying to avoid: in group "B", there is no group "a", so the bars are wider. Supplement dat with a dataframe with all the combinations of a and b:
dat.all <- rbind(dat, cbind(expand.grid(a=levels(dat$a), b=levels(dat$b)), v=NA))
ggplot(dat.all, aes(x=a, y=v, colour=b)) +
geom_bar(aes(fill=b), stat="identity", position="dodge")
I had the same problem but was looking for a solution that works with the pipe (%>%). Using tidyr::spread and tidyr::gather from the tidyverse does the trick. I use the same data as #Brian Diggs, but with uppercase variable names to not end up with double variable names when transforming to wide:
library(tidyverse)
dat <- data.frame(A = rep(LETTERS[1:3], 3),
B = rep(letters[1:3], each = 3),
V = 1:9)[-2, ]
dat %>%
spread(key = B, value = V, fill = NA) %>% # turn data to wide, using fill = NA to generate missing values
gather(key = B, value = V, -A) %>% # go back to long, with the missings
ggplot(aes(x = A, y = V, fill = B)) +
geom_col(position = position_dodge())
Edit:
There actually is a even simpler solution to that problem in combination with the pipe. Use tidyr::complete gives the same result in one line:
dat %>%
complete(A, B) %>%
ggplot(aes(x = A, y = V, fill = B)) +
geom_col(position = position_dodge())

Adding a single label per group in ggplot with stat_summary and text geoms

I would like to add counts to a ggplot that uses stat_summary().
I am having an issue with the requirement that the text vector be the same length as the data.
With the examples below, you can see that what is being plotted is the same label multiple times.
The workaround to set the location on the y axis has the effect that multiple labels are stacked up. The visual effect is a bit strange (particularly when you have thousands of observations) and not sufficiently professional for my purposes. You will have to trust me on this one - the attached picture doesn't fully convey the weirdness of it.
I was wondering if someone else has worked out another way. It is for a plot in shiny that has dynamic input, so text cannot be overlaid in a hardcoded fashion.
I'm pretty sure ggplot wasn't designed for the kind of behaviour with stat_summary that I am looking for, and I may have to abandon stat_summary and create a new summary dataframe, but thought I would first check if someone else has some wizardry to offer up.
This is the plot without setting the y location:
library(dplyr)
library(ggplot2)
df_x <- data.frame("Group" = c(rep("A",1000), rep("B",2) ),
"Value" = rnorm(1002))
df_x <- df_x %>%
group_by(Group) %>%
mutate(w_count = n())
ggplot(df_x, aes(x = Group, y = Value)) +
stat_summary(fun.data="mean_cl_boot", size = 1.2) +
geom_text(aes(label = w_count)) +
coord_flip() +
theme_classic()
and this is with my hack
ggplot(df_x, aes(x = Group, y = Value)) +
stat_summary(fun.data="mean_cl_boot", size = 1.2) +
geom_text(aes(y = 1, label = w_count)) +
coord_flip() +
theme_classic()
Create a df_text that has the grouped info for your labels. Then use annotate:
library(dplyr)
library(ggplot2)
set.seed(123)
df_x <- data.frame("Group" = c(rep("A",1000), rep("B",2) ),
"Value" = rnorm(1002))
df_text <- df_x %>%
group_by(Group) %>%
summarise(avg = mean(Value),
n = n()) %>%
ungroup()
yoff <- 0.0
xoff <- -0.1
ggplot(df_x, aes(x = Group, y = Value)) +
stat_summary(fun.data="mean_cl_boot", size = 1.2) +
annotate("text",
x = 1:2 + xoff,
y = df_text$avg + yoff,
label = df_text$n) +
coord_flip() +
theme_classic()
I found another way which is a little more robust for when the plot is dynamic in its ordering and filtering, and works well for faceting. More robust, because it uses stat_summary for the text.
library(dplyr)
library(ggplot2)
df_x <- data.frame("Group" = c(rep("A",1000), rep("B",2) ),
"Value" = rnorm(1002))
counts_df <- function(y) {
return( data.frame( y = 1, label = paste0('n=', length(y)) ) )
}
ggplot(df_x, aes(x = Group, y = Value)) +
stat_summary(fun.data="mean_cl_boot", size = 1.2) +
coord_flip() +
theme_classic()
p + stat_summary(geom="text", fun.data=counts_df)

How can you plot `geom_point()` with `facet_wrap()` using per-group row number as x?

Is there a way to plot geom_point() so that it implicitly uses the row number as x in a facet? Just like plot(y) but also for multiple facets.
The following fails with Error: geom_point requires the following missing aesthetics: x:
df = data.frame(y = rnorm(60), group = rep(c("A", "B", "C"), 20))
ggplot(df, aes(y = y)) +
geom_point() +
facet_wrap(~group)
Naturally, you can do it using something like the following, but it is quite cumbersome.
df = df %>%
group_by(group) %>%
mutate(row = row_number())
ggplot(df, aes(x = row, y = y)) +
geom_point() +
facet_wrap(~group)
You can try this:
ggplot(df, aes(x=seq(y),y = y))+geom_point() + facet_wrap(~group)
In that way you can avoid the creation of an index variable as you mentioned!!!

Keeping unit of measure in facet_wrap while scales="free_y"? [duplicate]

This question already has an answer here:
Setting individual y axis limits with facet wrap NOT with scales free_y
(1 answer)
Closed 4 years ago.
I'm trying to create a facet_wrap() where the unit of measure remains identical across the different plots, while allowing to slide across the y axis.
To clearify with I mean, I have created a dataset df:
library(tidyverse)
df <- tibble(
Year = c(2010,2011,2012,2010,2011,2012),
Category=c("A","A","A","B","B","B"),
Value=c(1.50, 1.70, 1.60, 4.50, 4.60, 4.55)
)
with df, we can create the following plot using facet_wrap:
ggplot(data = df, aes(x=Year, y=Value)) + geom_line() + facet_wrap(.~ Category)
Plot 1
To clarify the differences between both plots, one can use scale = "free_y":
ggplot(data = df, aes(x=Year, y=Value)) + geom_line()
+ facet_wrap(.~ Category, scale="free_y")
Plot 2
Although it's more clear, the scale on the y-axis in plot A isequal to 0.025, while being 0.0125 in B. This could be misleading to someone who's comparing A & B next to each other.
So my question right now is to know whether there exist an elegant way of plotting something like the graph below (with y-scale = 0.025) without having to plot two seperate plots into a grid?
Thanks
Desired result:
Code for the grid:
# Grid
## Plot A
df_A <- df %>%
filter(Category == "A")
plot_A <- ggplot(data = df_A, aes(x=Year, y=Value)) + geom_line() + coord_cartesian(ylim = c(1.5,1.7)) + ggtitle("A")
## Plot B
df_B <- df %>%
filter(Category == "B")
plot_B <- ggplot(data = df_B, aes(x=Year, y=Value)) + geom_line() + coord_cartesian(ylim = c(4.4,4.6)) + ggtitle("B")
grid.arrange(plot_A, plot_B, nrow=1)
Based on the info at Setting individual y axis limits with facet wrap NOT with scales free_y you can you use geom_blank() and manually specified y-limits by Category:
# df from above code
df2 <- tibble(
Category = c("A", "B"),
y_min = c(1.5, 4.4),
y_max = c(1.7, 4.6)
)
df <- full_join(df, df2, by = "Category")
ggplot(data = df, aes(x=Year, y=Value)) + geom_line() +
facet_wrap(.~ Category, scales = "free_y") +
geom_blank(aes(y = y_min)) +
geom_blank(aes(y = y_max))

Annotate with ggplot2 when axis is of class 'date'

I'm dealing a lot with geom_line plots these days. What is the easiest way to annotate on a plot with an axis of class date? Other than to convert the date variable to a different class?
Here's my code:
china_trades %>%
filter(type %in% c("Imports")) %>%
ggplot() +
geom_line(aes(x = month, y = dollars, group = 1)) +
theme_minimal()
I would like to annotate the last data point which is at 2017-10 and 48.
Here's my plot:
Maybe somebody can chime in with a pure gg way of doing this but the directlabels package has this functionality:
china_trades %>%
filter(type %in% c("Imports")) %>%
ggplot() +
geom_line(aes(x = month, y = dollars, group = 1)) +
theme_minimal() +
geom_dl(aes(label = month), method = list(dl.combine("last.points")))
Edit: Here's a gg way using annotate:
x <- as.Date(c('2016-1-1','2016-1-2','2016-1-3','2016-1-4'))
y <- c(4,1,2,3)
df <- data.frame(x,y)
lastDate<- max(x)
lastDateY <- df[x==lastDate,2]
ggplot(df) +
geom_line(aes(x = x, y = y)) +
annotate(geom='text', x=lastDate,y=lastDateY, vjust=-2, label="China")

Resources