Display totals and means next to chart in ggplot - r

I am trying to get my mean and totals to the left of my bar chart like in the example above.
I have my bar charts created in ggplot. Picture and code below. Any advice on how to get the means and totals to display in the right place? Possibly custom_annotations?
Thanks
percentData = stotal %>% #stotal = survey data frame
group_by(qtext, div, response) %>% #qtext is my question text
summarise(N = n()) %>%
mutate(prop = N/sum(N))
percentData$prop = label_percent(accuracy = 1)(percentData$prop) #make percent from decimal
percentData
#colors
myColors <- c("green4","springgreen2","yellow1","orange1","red1","black", "black", "black")
ggplot(stotal)+
geom_bar(aes(x = div, fill = response), position = 'fill', width = 0.5)+
facet_grid(rows = vars(qtext))+
scale_fill_manual (values = myColors)+
coord_flip()+
ylab('')+
xlab('')+
scale_y_continuous(labels = percent)+
ggtitle(i)+
geom_text(data = percentData, aes(fill = response, y = N, label = prop, x = div),
position=position_fill(vjust=0.5))+
theme(strip.text.y = element_text(size = 12, angle = 0, family = "serif"))

One approach to achieve this is by making the table via a second ggplot which can be glued to the main plot by e.g. patchwork. Basically the table plot replicates the main plot with only one category, uses facetting to get the colum layout with the mean and the totals and gets rids of axis, grid, background colors, ...
Using some random example data try this:
library(ggplot2)
library(dplyr)
library(scales)
library(patchwork)
# Random example data
set.seed(42)
stotal <- data.frame(
qtext = rep(c("A", "B"), 50),
div = sample(c("University", "KSAS-HUM"), 100, replace = TRUE),
response = sample(c("Poor", "Fair", "Good", "Very good", "Excellent"), 100, replace = TRUE)
)
stotal$response <- factor(stotal$response, levels = c("Poor", "Fair", "Good", "Very good", "Excellent"))
percentData = stotal %>% #stotal = survey data frame
group_by(qtext, div, response) %>% #qtext is my question text
summarise(N = n()) %>%
mutate(prop = N/sum(N))
#> `summarise()` regrouping output by 'qtext', 'div' (override with `.groups` argument)
percentData$prop = label_percent(accuracy = 1)(percentData$prop) #make percent from decimal
#colors
myColors <- c("green4","springgreen2","yellow1","orange1","red1","black", "black", "black")
p1 <- ggplot(stotal)+
geom_bar(aes(x = div, fill = response), position = 'fill', width = 0.5)+
facet_grid(rows = vars(qtext))+
scale_fill_manual (values = myColors)+
coord_flip()+
ylab('')+
xlab('')+
scale_y_continuous(labels = percent)+
#ggtitle(i)+
geom_text(data = percentData, aes(fill = response, y = N, label = prop, x = div),
position=position_fill(vjust=0.5))+
theme(strip.text.y = element_text(size = 12, angle = 0, family = "serif"))
#> Warning: Ignoring unknown aesthetics: fill
# Table plot
table_data <- stotal %>%
mutate(response = as.numeric(response)) %>%
group_by(qtext, div) %>%
summarise(Mean = mean(response), "Total N" = n()) %>%
mutate(Mean = round(Mean, 1)) %>%
tidyr::pivot_longer(-c(qtext, div), names_to = "var")
#> `summarise()` regrouping output by 'qtext' (override with `.groups` argument)
p2 <- ggplot(table_data, aes(x = div)) +
geom_bar(color = "white", fill = "white", position = 'fill', width = .5)+
#geom_vline(color = "grey", xintercept = c(.5, 1.5, 2.5)) +
geom_text(aes(y = 1, label = value), position=position_fill(vjust=0.5), size = 0.8 * 11 /.pt) +
facet_grid(qtext ~ var, switch = "y") +
coord_flip() +
labs(x = NULL, y = NULL) +
theme_minimal() +
theme(strip.text.y = element_blank()) +
theme(axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
axis.text.x = element_text(color = "transparent"),
axis.ticks.x = element_line(color = "transparent"),
axis.title = element_blank(),
panel.grid = element_blank(), panel.spacing.x = unit(0, "pt"))
# Glue together
p2 + p1 + plot_layout(widths = c(1, 3))

Related

How to rename the bins in ggplot in R

so basically I have created the bins and the have the means of each bin, having these two columns in a dataframe. Now I am plotting these two columns, but I want the exact number as x lable instead of bins. I am considering renaming each bin by its mid-point. please look at the pictures. The first one is my current plot and the second is the plot I want to acheive.
my current plot:
what I want to have:
my data frame is like this:
To reproduce the style of the plot image you included, you can do:
library(tidyverse)
df %>%
mutate(bin_group = gsub("\\(|\\]", "", bin_group)) %>%
separate(bin_group, sep = ",", into = c("lower", "upper")) %>%
mutate(across(lower:upper, as.numeric)) %>%
mutate(`Birth weight (g)` = (upper + lower) / 2) %>%
ggplot(aes(`Birth weight (g)`, mean_28_day_mortality)) +
geom_vline(xintercept = 1500) +
geom_point(shape = 18, size = 4) +
scale_x_continuous(labels = scales::comma) +
labs(title = "One-year mortality", y = NULL) +
theme_bw(base_family = "serif", base_size = 20) +
theme(panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major.y = element_line(color = "black", size = 0.5),
plot.title = element_text(hjust = 0.5))
Edit
To make the specific changes to the range use the limits argument in scale_x_continuous and scale_y_continuous you can do:
library(tidyverse)
df %>%
mutate(bin_group = gsub("\\(|\\]", "", bin_group)) %>%
separate(bin_group, sep = ",", into = c("lower", "upper")) %>%
mutate(across(lower:upper, as.numeric)) %>%
mutate(`Birth weight (g)` = (upper + lower) / 2) %>%
ggplot(aes(`Birth weight (g)`, mean_28_day_mortality)) +
geom_vline(xintercept = 1500) +
geom_point(shape = 18, size = 4) +
scale_x_continuous(labels = scales::comma, limits = c(1350, 1650),
breaks = seq(1350, 1650, 50)) +
scale_y_continuous(limits = c(0, 0.1), name = NULL) +
labs(title = "One-year mortality") +
theme_bw(base_family = "serif", base_size = 20) +
theme(panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major.y = element_line(color = "black", size = 0.5),
plot.title = element_text(hjust = 0.5))
Data used (obtained from image in question using OCR)
df <- structure(list(bin_group = structure(1:10,
levels = c("(1.35e+03,1.38e+03]",
"(1.38e+03,1.41e+03]", "(1.41e+03,1.44e+03]", "(1.44e+03,1.47e+03]",
"(1.47e+03,1.5e+03]", "(1.5e+03,1.53e+03]", "(1.53e+03,1.56e+03]",
"(1.56e+03,1.59e+03]", "(1.59e+03,1.62e+03]", "(1.62e+03,1.65e+03]"
), class = "factor"), mean_28_day_mortality = c(0.0563498, 0.04886257,
0.04467626, 0.04256053, 0.04248667, 0.04009187, 0.03625538, 0.03455094,
0.03349542, 0.02892909)), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -10L))
If you have groups that (I assume) you made with cut, you could pull out the max and min and then calc the mean before you summarize and plot. Note that I made the regex pretty long because I don't personally know if cut always makes left or inclusive or exclusive.
library(tidyverse)
#example like yours
mtcars |>
mutate(grp = cut(hp, 10)) |>
group_by(grp) |>
summarise(mpg_mean = mean(mpg)) |>
ggplot(aes(grp, mpg_mean))+
geom_point()
#solution
mtcars |>
mutate(grp = cut(hp, 10)) |>
extract(grp,
into = c("min", "max"),
remove = FALSE,
regex = "(?:\\(|\\[)(.*),(.*)(?:\\)|\\])",
convert = TRUE) |>
mutate(mean_grp = (min + max)/2)|>
group_by(mean_grp) |>
summarise(mpg_mean = mean(mpg)) |>
ggplot(aes(mean_grp, mpg_mean))+
geom_point()
EDIT
here is another option if you just want to re-label and not actually transform the data:
lab_fun <- function(x){
str_split(x, ",") |>
map_dbl(~parse_number(.x)
|> mean())
}
mtcars |>
mutate(grp = cut(hp, 10)) |>
group_by(grp) |>
summarise(mpg_mean = mean(mpg)) |>
ggplot(aes(grp, mpg_mean))+
geom_point()+
scale_x_discrete(labels = lab_fun)

Text color with geom_label_repel

Not specific to any particular piece of code, is there a relatively straightforward way to change the color of the text in a geom_label_repel box?
Specifically, I have code that produces the below chart
The percentage in the label box is the percent change in 7-day moving average for the most recent week over the week prior. I'd simply like to color the text red when the value is positive and green when it is negative.
The dataframe for this chart can be copied from here.
The plot code is
#endpoint layer
BaseEndpoints <- smDailyBaseData %>% filter(Base %in% AFMCbases) %>%
group_by(Base) %>%
filter(DaysSince == max(DaysSince)) %>%
select(Base, abbv, DaysSince, newRate,label) %>%
ungroup()
ZoomEndpoints <- BaseEndpoints %>% filter(Base != 'Edwards') %>%
mutate(zoom = TRUE)
CAEndPoint <- BaseEndpoints %>% filter(Base == 'Edwards') %>%
mutate(zoom = FALSE)
ZoomEndpoints <- rbind(ZoomEndpoints, CAEndPoint)
BasePlot <- smDailyBaseData %>% filter(Base %in% AFMCbases) %>%
ggplot(mapping = aes(x = as.numeric(DaysSince), y = newRate)) +
geom_line(aes(color=abbv),show.legend = FALSE) +
scale_color_ucscgb() +
geom_point(data = BaseEndpoints,size = 1.5,shape = 21,
aes(color = abbv,fill = abbv), show.legend = FALSE) +
geom_label_repel(data=ZoomEndpoints, aes(label=label), show.legend = FALSE,
vjust = 0, xlim=c(105,200), size=3, direction='y') +
labs(x = "Days Since First Confirmed Case",
y = "% Local Population Infected Daily") +
theme(plot.title = element_text(size = rel(1), face = "bold"),
plot.subtitle = element_text(size = rel(0.7)),
plot.caption = element_text(size = rel(1))) +
facet_zoom(xlim = c(50,120), ylim=c(0,0.011),zoom.data=zoom)
print(BasePlot)
Yes, it's as simple as this:
library(ggplot2)
df <- data.frame(x = c(-1, -1, 1, 1), y = c(-1, 1, 1, -1), value = c(-2, -1, 1, 2))
ggplot(df, aes(x, y)) +
geom_point(size = 3) +
ggrepel::geom_label_repel(aes(label = value, colour = factor(sign(value)))) +
lims(x = c(-100, 100), y = c(-100, 100)) +
scale_colour_manual(values = c("red", "forestgreen"))
EDIT
Now we have a more concrete example, I can see the problem more clearly. There are workarounds such as using ggnewscale or a hand-crafted solution such as Ian Campbell's thorough example. Personally, I would just note that you haven't used the fill scale yet, and this looks pretty good to my eye:
Here's a bit of a hacky solution since you can't have two scale_color_*'s at the same time:
The approach centers on manually assigning the color outside of aes in the geom_label_repel call. Adding one to the grepl result that searches for the minus sign in the label allows you to subset the two colors. You need two colors for each label, I assume for the box and for the text, so I used rep.
smDailyBaseData %>%
ggplot(mapping = aes(x = as.numeric(DaysSince), y = newRate)) +
geom_line(aes(color=abbv),show.legend = FALSE) +
scale_color_ucscgb() +
geom_point(data = BaseEndpoints,size = 1.5,shape = 21,
aes(color = abbv,fill = abbv), show.legend = FALSE) +
geom_label_repel(data=ZoomEndpoints, aes(label=label),
color = rep(c("green","red")[1+grepl("\\-\\d",as.factor(ZoomEndpoints$label))],times = 2),
show.legend = FALSE, vjust = 0, xlim=c(105,200),
size=3, direction='y') +
labs(x = "Days Since First Confirmed Case",
y = "% Local Population Infected Daily") +
theme(plot.title = element_text(size = rel(1), face = "bold"),
plot.subtitle = element_text(size = rel(0.7)),
plot.caption = element_text(size = rel(1))) +
facet_zoom(xlim = c(50,120), ylim=c(0,0.011),zoom.data=zoom)
Data Setup
#source("https://pastebin.com/raw/Vn2abQ4a")
BaseEndpoints <- smDailyBaseData %>%
group_by(Base) %>%
dplyr::filter(DaysSince == max(DaysSince)) %>%
dplyr::select(Base, abbv, DaysSince, newRate,label) %>%
ungroup()
ZoomEndpoints <- BaseEndpoints %>% filter(Base != 'Edwards') %>%
mutate(zoom = TRUE)
CAEndPoint <- BaseEndpoints %>% filter(Base == 'Edwards') %>%
mutate(zoom = FALSE)
ZoomEndpoints <- rbind(ZoomEndpoints, CAEndPoint)

R label with commas but no decimals

My goal is to produce labels with commas, but no decimals. Let's say I have a ggplot with the following section:
geom_text(aes(y = var,
label = scales::comma(round(var))), hjust = 0, nudge_y = 300 )
This is almost what I need. It gives me the commas, but has a decimal. I have seen here (axis labels with comma but no decimals ggplot) that comma_format() could be good, but I think the label in my case needs a data argument, which comma_format() does not take. What can I do?
Update:
As an example of when this problem occurs, see the following, which uses gganimate and has a lot more going on. Code derived from Jon Spring's answer at Animated sorted bar chart with bars overtaking each other
library(gapminder)
library(gganimate)
library(tidyverse)
gap_smoother <- gapminder %>%
filter(continent == "Asia") %>%
group_by(country) %>%
complete(year = full_seq(year, 1)) %>%
mutate(gdpPercap = spline(x = year, y = gdpPercap, xout = year)$y) %>%
group_by(year) %>%
mutate(rank = min_rank(-gdpPercap) * 1) %>%
ungroup() %>%
group_by(country) %>%
complete(year = full_seq(year, .5)) %>%
mutate(gdpPercap = spline(x = year, y = gdpPercap, xout = year)$y) %>%
mutate(rank = approx(x = year, y = rank, xout = year)$y) %>%
ungroup() %>%
arrange(country,year)
gap_smoother2 <- gap_smoother %>% filter(year<=2007 & year>=1999)
gap_smoother3 <- gap_smoother2 %<>% filter(rank<=8)
p <- ggplot(gap_smoother3, aes(rank, group = country,
fill = as.factor(country), color = as.factor(country))) +
geom_tile(aes(y = gdpPercap/2,
height = gdpPercap,
width = 0.9), alpha = 0.8, color = NA) +
geom_text(aes(y = 0, label = paste(country, " ")), vjust = 0.2, hjust = 1) +
geom_text(aes(y = gdpPercap,
label = scales::comma(round(gdpPercap))), hjust = 0, nudge_y = 300 ) +
coord_flip(clip = "off", expand = FALSE) +
scale_x_reverse() +
guides(color = FALSE, fill = FALSE) +
labs(title='{closest_state %>% as.numeric %>% floor}',
x = "", y = "GFP per capita") +
theme(plot.title = element_text(hjust = 0, size = 22),
axis.ticks.y = element_blank(), # These relate to the axes post-flip
axis.text.y = element_blank(), # These relate to the axes post-flip
plot.margin = margin(1,1,1,4, "cm")) +
transition_states(year, transition_length = 1, state_length = 0) +
enter_grow() +
exit_shrink() +
ease_aes('linear')
animate(p, fps = 2, duration = 5, width = 600, height = 500)
In addition to the solution provided by #drf, you need to add scale_y_continuous(scales::comma) to your ggplot commands. But put it before the coord_flip function.
p <- ggplot(gap_smoother3, aes(rank, group = country,
fill = as.factor(country), color = as.factor(country))) +
geom_tile(aes(y = gdpPercap/2,
height = gdpPercap,
width = 0.9), alpha = 0.8, color = NA) +
geom_text(aes(y = gdpPercap,
label = scales::comma(round(gdpPercap), accuracy=1)),
hjust = 0, nudge_y = 300 ) +
scale_y_continuous(labels = scales::comma) +
... etc.

ggplot2 - Turn off legend for one geom with same aesthetic as another geom

I'm making a plot with two different geoms, both use fill. I'd like one geom to have a legend, but the other to not. However adding show.legend=F to the required geom doesn't switch off the legend for that geom.
Example:
library(tidyverse)
library(ggalluvial)
x = tibble(qms = c("grass", "cereal", "cereal"),
move1 = "Birth",
move2 = c("Direct", "Market", "Slaughter"),
move3 = c("Slaughter", "Slaughter", NA),
freq = c(10, 5, 7))
x %>%
mutate(id = qms) %>%
to_lodes_form(axis = 2:4, id = id) %>%
na.omit() %>%
ggplot(aes(x = x, stratum = stratum, alluvium = id,
y = freq, label = stratum)) +
scale_x_discrete(expand = c(.1, .1)) +
geom_flow(aes(fill = qms)) +
geom_stratum(aes(fill = stratum), show.legend=F) +
geom_text(stat = "stratum", size = 3) +
theme_void() +
labs(fill="")
Output:
Desired output:
Question:
How do I turn off the fill legend for one geom, but not the other? I can (if I have to) do this in inkscape/gimp, but would prefer a solution I can version control.
Have a look at the final line of code:
scale_fill_discrete(breaks = c("grass", "cereal"))
That defines the breaks for the fills to only include cereal and grass, as required.
library(tidyverse)
library(ggalluvial)
x = tibble(qms = c("grass", "cereal", "cereal"),
move1 = "Birth",
move2 = c("Direct", "Market", "Slaughter"),
move3 = c("Slaughter", "Slaughter", NA),
freq = c(10, 5, 7))
x %>%
mutate(id = qms) %>%
to_lodes_form(axis = 2:4, id = id) %>%
na.omit() %>%
ggplot(aes(x = x, stratum = stratum, alluvium = id,
y = freq, label = stratum)) +
scale_x_discrete(expand = c(.1, .1)) +
geom_flow(aes(fill = qms)) +
geom_stratum(aes(fill = stratum), show.legend=FALSE) +
geom_text(stat = "stratum", size = 3) +
theme_void() +
labs(fill="") +
scale_fill_discrete(breaks = c("grass", "cereal")) #<- This line!
Created on 2019-03-18 by the reprex package (v0.2.1)

R - How can I add a bivariate legend to my ggplot2 chart?

I'm trying to add a bivariate legend to my ggplot2 chart but I don't know whether (a) this is possible through some guides options and (b) how to achieve it.
The only way I've managed to produce something close to the desired outcome was by specifically creating a new chart which resembles a legend (named p.legend below) and inserting it, via the cowplot package, somewhere in the original chart (named p.chart below). But surely there must be a better way than this, given that this approach requires creating the legend in the first place and fiddling with its size/location to fit it in the original chart.
Here's code for a dummy example of my approach:
library(tidyverse)
# Create Dummy Data #
set.seed(876)
n <- 2
df <- expand.grid(Area = LETTERS[1:n],
Period = c("Summer", "Winter"),
stringsAsFactors = FALSE) %>%
mutate(Objective = runif(2 * n, min = 0, max = 2),
Performance = runif(2 * n) * Objective) %>%
gather(Type, Value, Objective:Performance)
# Original chart without legend #
p.chart <- df %>%
ggplot(., aes(x = Area)) +
geom_col(data = . %>% filter(Type == "Objective"),
aes(y = Value, fill = Period),
position = "dodge", width = 0.7, alpha = 0.6) +
geom_col(data = . %>% filter(Type == "Performance"),
aes(y = Value, fill = Period),
position = "dodge", width = 0.7) +
scale_fill_manual(values = c("Summer" = "#ff7f00", "Winter" = "#1f78b4"), guide = FALSE) +
theme_minimal() +
theme(panel.grid.major.x = element_blank(),
panel.grid.minor.y = element_blank())
# Create a chart resembling a legend #
p.legend <- expand.grid(Period = c("Summer", "Winter"),
Type = c("Objective", "Performance"),
stringsAsFactors = FALSE) %>%
ggplot(., aes(x = Period, y = factor(Type, levels = c("Performance", "Objective")),
fill = Period, alpha = Type)) +
geom_tile() +
scale_fill_manual(values = c("Summer" = "#ff7f00", "Winter" = "#1f78b4"), guide = FALSE) +
scale_alpha_manual(values = c("Objective" = 0.7, "Performance" = 1), guide = FALSE) +
ggtitle("Legend") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5),
rect = element_rect(fill = "transparent"),
axis.title = element_blank(),
panel.grid.major = element_blank())
# Add legend to original chart #
p.final <- cowplot::ggdraw() +
cowplot::draw_plot(plot = p.chart) +
cowplot::draw_plot(plot = p.legend, x = 0.5, y = 0.65, width = 0.4, height = 0.28, scale = 0.7)
# Save chart #
cowplot::ggsave("Bivariate Legend.png", p.final, width = 8, height = 6, dpi = 500)
... and the resulting chart:
Is there an easier way of doing this?
This might work at some point, but right now the colorbox seems to ignore all breaks, names and labels (#ClausWilke?). Probably because the multiscales package is in really early stages.
Posting since it might work when future readers are here.
library(multiscales)
df %>%
mutate(
period = as.numeric(factor(Period)),
type = as.numeric(factor(Type))
) %>%
ggplot(., aes(x = Area, y = Value, fill = zip(period, type), group = interaction(Area, Period))) +
geom_col(width = 0.7, position = 'dodge') +
bivariate_scale(
"fill",
pal_hue_sat(c(0.07, 0.6), c(0.4, 0.8)),
guide = guide_colorbox(
nbin = 2,
name = c("Period", "Type"), #ignored
breaks = list(1:2, 1:2), #ignored
labels = list(levels(.$Period), levels(.$Type)) #ignored
)

Resources