Create labeller to select which strip texts are displayed using facet_manual - r

I have the following plot:
Sample code:
dat = data.frame(grp = rep(c("Group1", "Group2"), 24),
label = rep(c(rep("Yes",2), rep("Rather yes",2), rep("Rather no",2), rep("No",2)), 6),
pct = rep(c(25,25,25,25), 12),
grp2 = c(rep("Total", 8),rep("Age", 24),rep("Gender", 16)),
label2 = c(rep("",8), rep("18-29", 8),rep("30-64", 8),rep("65-80", 8),rep("Male", 8),rep("Female", 8)))
dat$grp2 <- factor(dat$grp2, levels = c("Total", "Gender", "Age"))
# Design for facet_manual (ggh4x-Package)
design <- matrix(1:6,3)
heights <- c(8,16,24)
# Plot
plot <- ggplot2::ggplot(data = dat, ggplot2::aes(x = pct, y = label2, fill = label)) +
ggplot2::geom_bar(stat = 'identity', position = 'stack', width = 0.8, color = 'white') +
ggh4x::facet_manual(grp~grp2, design = design, heights = heights, scales = "free_y", strip.position = "top");plot
I would like very much that Group1 or Group2 is written only once, at the top of the plot. One possibility I found is to select what is shown using labeller. But I do not understand the structure of the underlying labeller object (in the example only the inner label is shown, not as I want 1x outer label at the top and all inner labels).
plot <- ggplot2::ggplot(data = dat, ggplot2::aes(x = pct, y = label2, fill = label)) +
ggplot2::geom_bar(stat = 'identity', position = 'stack', width = 0.8, color = 'white') +
ggh4x::facet_manual(grp~grp2, design = design, heights = heights, scales = "free_y", strip.position = "top",
labeller = function(df) {list(as.character(df[,2]))});plot
Does anyone have a solution? Of course it would also be possible to create two plots and then attach them next to each other. But I would be interested in a solution that generates a plot directly with facets.
I already tried using different labeller functions as well as using facet_nested_wrap as an Alternative.

Using the inputs defined in the question define a labeller function which
accepts a data frame whose columns names will be grp and grp2 and whose 6 rows are the levels of each of the 6 facets. Replace those with the names that should be shown so that in this case if grp2 is Total then use the grp name for grp and otherwise use "" for grp.
To be specific this data frame will be passed to the labeller function:
grp grp2
1 Group1 Total
2 Group1 Gender
3 Group1 Age
4 Group2 Total
5 Group2 Gender
6 Group2 Age
and the labeller function will return this data frame:
grp grp2
1 Group1 Total
2 Gender
3 Age
4 Group2 Total
5 Gender
6 Age
The code follows.
library(ggplot2)
library(ggh4x)
label_fun <- function(data) {
transform(data, grp = ifelse(grp2 == "Total", grp, ""),
grp2 = as.character(grp2))
}
plot <- ggplot(data = dat, aes(x = pct, y = label2, fill = label)) +
geom_bar(stat = 'identity', position = 'stack', width = 0.8, color = 'white') +
facet_manual(~ grp + grp2, design = design, heights = heights, scales = "free_y",
strip.position = "top", labeller = label_fun)
plot
Added
Another approach is to collapse grp and grp2 into a single factor grps.
label_fun2 <- function(x) {
transform(x, grps = ifelse(grepl("Total", grps),
sub("\\.", " - ", grps),
sub("\\..*", "", grps)))
}
levs <- c("Total.Group1", "Gender.Group1", "Age.Group1",
"Total.Group2", "Gender.Group2", "Age.Group2")
dat |>
transform(grps = factor(interaction(grp2, grp), levs)) |>
ggplot(aes(x = pct, y = label2, fill = label)) +
geom_bar(stat = 'identity', position = 'stack', width = 0.8, color = 'white') +
facet_manual(~ grps , design = design, heights = heights, scales = "free_y",
strip.position = "top", labeller = label_fun2)
or if we use this labeller function which uses \n in place of - with the same ggplot2 code
label_fun2 <- function(x) {
transform(x, grps = ifelse(grepl("Total", grps),
sub("\\.", " \n ", grps),
sub("\\..*", "", grps)))
}
Note that levs, above, could be computed using:
dat |>
with(expand.grid(grp2 = levels(grp2), grp = levels(factor(grp)))) |>
with(interaction(grp2, grp))
Yet another approach is to use facet_grid2:
ggplot(dat, aes(x = pct, y = label2, fill = label)) +
geom_bar(stat = 'identity', position = 'stack', width = 0.8, color = 'white') +
facet_grid2(grp2 ~ grp, scales = "free_y", switch = "y")

Related

R ggplot label number of observations per ordered violin with facet wrap

I've got a plot that looks like the output of the following code using the iris data
require(tidyverse)
require(purrr)
require(forcats) # Useful for ordering facets found at [here][1]
# Make some long data and set a custom sorting order using some of t
tbl <- iris %>%
pivot_longer(., cols = 1:4, names_to = "Msr", values_to = "Vls") %>%
mutate(Msr = factor(Msr)) %>%
mutate(plot_fct = fct_cross(Species, Msr)) %>%
mutate(plot_fct = fct_reorder(plot_fct, Vls))
# A functioning factory for minor log breaks found [here][1] (very helpful)
minor_breaks_log <- function(base) {
# Prevents lazy evaluation
force(base)
# Wrap calculation in a function that the outer function returns
function(limits) {
ggplot2:::calc_logticks(
base = base,
minpow = floor(log(limits[1], base = base)),
maxpow = ceiling(log(limits[2], base = base))
)$value
}
}
# Plot the images
ggplot(data = tbl, aes(x =plot_fct, y = Vls, fill = Species)) +
geom_violin() +
coord_flip() + # swap coords
scale_y_log10(labels = function(x) sprintf("%g", x),
minor_breaks = minor_breaks_log(10)) + # format for labels # box fills
theme_bw(base_size = 12) +
annotation_logticks(base = 10, sides = "b") +
facet_wrap(~Species, nrow = 1, scales = "free")
I would now like to list the number of observations per violin on the right side of each facet just inside the maximum border, which I'm sure is possible but cannot seem to find an example that does this sort of labeling, with violins and facets.
ggplot(data = tbl, aes(y = plot_fct, fill = Species)) +
geom_violin(aes(x = Vls)) +
geom_text(aes(label = after_stat(count)), hjust = 1,
stat = "count", position = "fill") +
scale_x_log10(labels = function(x) sprintf("%g", x),
minor_breaks = minor_breaks_log(10)) + # format for labels # box fills
theme_bw(base_size = 12) +
annotation_logticks(base = 10, sides = "b") +
facet_wrap(~Species, nrow = 1, scales = "free")

R ggplot2 : geom_jitter and fill, problem to have the dots on the right boxplot

Here's my R code
ggplot(dat = Table, aes(x = Group, y = value, fill = Type)) +
geom_boxplot(alpha=0.08)+
geom_jitter()+
scale_fill_brewer(palette="Spectral")+
theme_minimal()
Like you can see the dots are in the middle of the boxplots. What can I add in geom_jitter to have each point in the righ boxplot and not in the middle like this ? I also tried geom_point, it gave the same result !
Thanks to the help now It works, but I wanted to add a line to connect the dots and I got this.. can someone tell how to really connect the dots with lines
I think if you group by interaction(Group, Type) and use position_jitterdodge() you should get what you're looking for.
ggplot(mtcars, aes(as.character(am), mpg, color = as.character(vs),
group = interaction(as.character(vs), as.character(am)))) +
geom_boxplot() +
geom_jitter(position = position_jitterdodge()) # same output with geom_point()
Edit - here's an example with manual jittering applied to data where the each subject appears once in each Group.
I looked for a built-in way to do this, and this answer comes close, but I couldn't get it to work in terms of using position_jitterdodge with position defined by the groups of Group/Type, but line grouping defined by id alone and not by Group/Type. Both aesthetics (position adjustment and series identification) rely on the same group parameter, but they each need a different value for it.
Table = data.frame(id = 1:4,
value = rnorm(8),
Group = rep(c("a","b"), each = 4),
Type = c("1", "2"))
library(dplyr)
Table %>%
mutate(x = as.numeric(as.factor(Group)) +
0.2 * scale(as.numeric(as.factor(Type))) +
rnorm(n(), sd = 0.06)) %>%
ggplot(aes(x = Group, y = value, fill = Type, group = interaction(Group, Type))) +
geom_boxplot(alpha=0.2)+
geom_point(aes(x = x)) +
geom_line(aes(x = x, group = id), alpha = 0.1) +
scale_fill_brewer(palette="Spectral")+
theme_minimal()
Best to use position_dodge instead if you want them to line up:
library(ggplot2)
Table <- tibble::tibble(
Group = rep(c("A", "B"), each = 20),
Type = factor(rep(c(1:2, 1:2), each = 10)),
value = rnorm(40, mean = 10)
)
ggplot(dat = Table, aes(x = Group, y = value, fill = Type)) +
geom_boxplot(alpha=0.08)+
geom_point(position = position_dodge(width = 0.75))+
scale_fill_brewer(palette="Spectral")+
theme_minimal()
To add a line, make sure group = ID goes in both the geom_point and geom_line calls:
library(ggplot2)
Table <- tibble::tibble(
Group = rep(c("A", "B"), each = 20),
Type = factor(rep(c(1:2, 1:2), each = 10)),
ID = factor(rep(1:20, times = 2)),
value = rnorm(40, mean = 10)
)
ggplot(dat = Table, aes(x = Group, y = value, fill = Type)) +
geom_boxplot(alpha = 0.08) +
geom_point(aes(group = ID), position = position_dodge(width = 0.75))+
geom_line(aes(group = ID), position = position_dodge(width = 0.75), colour = "grey")+
scale_fill_brewer(palette = "Spectral") +
theme_minimal()

Can't draw a concentric pie chart in R

I have the following data:
Phyla V4 Fl
<chr> <dbl> <dbl>
Proteobacteria 88.58 81.43
Firmicutes 7.33 15.34
Actinobacteriota 1.55 1.94
Bacteroidota 2.20 1.25
I want to display the data using a concentric pie chart. I have a couple of trials:
mycols <- c("#eee0b1", "#da8a67", "#e63e62", "#0033aa")
ggplot(df, aes(x = 2, y = V4, fill = Phyla)) +
geom_bar(stat = "identity", color = "white") +
coord_polar(theta = "y", start = 0)+
geom_text(aes(y = Fl, label = V4), color = "white")+
scale_y_continuous(breaks=min(df$Fl):max(df$Fl)) +
scale_fill_manual(values = mycols) +
theme_void()+
xlim(0.5, 2.5)
This generates
So, I got only one column displayed.
The other trial used this:
pie(x=c(88.58,7.33,1.55,2.2),labels="",
col=c("#eee0b1", "#da8a67", "#e63e62", "#0033aa"))
par(new=TRUE)
pie(x=c(81.43,15.34,1.94, 1.25),labels=c("Proteobacteria","Firmicutes","Actinobacteriota", "Bacteroidota"),radius=.5,
col=c("#eee0b1", "#da8a67", "#e63e62", "#0033aa"))
that generates this figure:
I do not know which is easier to fix to generate the concentric pie. I need to include the color legend and label each pie with the category name (V4, Fl) along with adding the values as percentages.
You may try this
df %>%
pivot_longer(-Phyla, names_to = "type", values_to = "y") %>%
ggplot(aes(x = type, y = y)) +
geom_bar(aes(fill = Phyla), stat = "identity",
color = "white", position = "fill", width=0.7) +
coord_polar(theta = "y", start = pi/2) +
geom_text(aes(y = y, group = Phyla, label = y),
color = "white", position = position_fill(vjust=0.5)) +
geom_text(aes(x = x, y = y, label = type),
data = data.frame(x = c(2.5, 3.5), y = c(0, 0), type = c("V4", "Fl"))
) +
scale_fill_manual(values = mycols) +
scale_x_discrete(limits = c(NA, "V4", "Fl")) +
theme_void()
pivot_longer transforms your data from "wide" to "long", so that you can draw multiple columns.
position="fill" in geom_bar() and position_fill in geom_text() will scale y value into [0,1], so that two columns are aligned.
vjust=0.5 in position_fill will display values to their corresponding areas.
It is a little difficult to label the circle directly using x axis texts, but you can label them manually using geom_text() with a new data.frame(x=c(2.5,3.5),y=c(0,0),type=c("V4","Fl"))

How to add variables in a column as text using ggplot, facet_grid and geom_text

consider the following tibble
library(tidyverse)
df <-tibble(year = rep(1981:2020,4),
x = rep(letters[1:8],20),
y = rnorm(n = 160,0,1),
group = rep(letters[10:13],40))
I want to plot a faceted grid based on variable group and as text in each panel, the years (year) corresponding to each group (group).
Below a failed attempt where years are overlapping and not correct
ggplot() +
geom_line(data = df, aes(x = x, y = y, color = group)) +
geom_text(
data = df,
aes(
x = x,
y = 3,
label = year,
color = group
),
alpha = 0.7,
show.legend = FALSE
) +
facet_grid( ~ group)
Thanks for support!
I'm not sure I understand what you want, but you can try the following
ggplot() +
geom_line(data = df, aes(x = x, y = y, color = group)) +
geom_text(
data = df,
aes(
x = x,
y = 3,
label = year,
color = group
),
alpha = 0.7,
show.legend = FALSE,
position = position_stack(vjust = 1.5),
# stat = "unique"
) +
facet_grid( ~ group)
If you don't want the year to be repeated, uncomment the stat = "unique" line.
UPDATE
If you want a horizontal alignment you can create a new data frame
df2 <- df %>%
group_by(x, group) %>%
summarise(year = str_c(unique(year), collapse=", "))
ggplot() +
geom_line(data = df, aes(x = x, y = y, color = group)) +
geom_text(
data = df2,
aes(
x = x,
y = 1,
label = year,
color = group
),
alpha = 0.7,
show.legend = FALSE,
position = position_stack(vjust = 1.5),
stat = "unique"
) +
facet_grid( ~ group)
but with this alignment labels will overlap. You can try reducing the font-size or using facet_wrap to arrange the panels on two rows.
You can also manipulate strings in df2 and add "\n" where you need it, but I think this cannot be easily adapted to every string.

Condition a ..count.. summation on the faceting variable

I'm trying to annotate a bar chart with the percentage of observations falling into that bucket, within a facet. This question is very closely related to this question:
Show % instead of counts in charts of categorical variables but the introduction of faceting introduces a wrinkle. The answer to the related question is to use stat_bin w/ the text geom and then have the label be constructed as so:
stat_bin(geom="text", aes(x = bins,
y = ..count..,
label = paste(round(100*(..count../sum(..count..)),1), "%", sep="")
)
This works fine for an un-faceted plot. However, with facets, this sum(..count..) is summing over the entire collection of observations without regard for the facets. The plot below illustrates the issue---note that the percentages do not sum to 100% within a panel.
Here the actually code for the figure above:
g.invite.distro <- ggplot(data = df.exp) +
geom_bar(aes(x = invite_bins)) +
facet_wrap(~cat1, ncol=3) +
stat_bin(geom="text", aes(x = invite_bins,
y = ..count..,
label = paste(round(100*(..count../sum(..count..)),1), "%", sep="")
),
vjust = -1, size = 3) +
theme_bw() +
scale_y_continuous(limits = c(0, 3000))
UPDATE: As per request, here's a small example re-producing the issue:
df <- data.frame(x = c('a', 'a', 'b','b'), f = c('c', 'd','d','d'))
ggplot(data = df) + geom_bar(aes(x = x)) +
stat_bin(geom = "text", aes(
x = x,
y = ..count.., label = ..count../sum(..count..)), vjust = -1) +
facet_wrap(~f)
Update geom_bar requires stat = identity.
Sometimes it's easier to obtain summaries outside the call to ggplot.
df <- data.frame(x = c('a', 'a', 'b','b'), f = c('c', 'd','d','d'))
# Load packages
library(ggplot2)
library(plyr)
# Obtain summary. 'Freq' is the count, 'pct' is the percent within each 'f'
m = ddply(data.frame(table(df)), .(f), mutate, pct = round(Freq/sum(Freq) * 100, 1))
# Plot the data using the summary data frame
ggplot(data = m, aes(x = x, y = Freq)) +
geom_bar(stat = "identity", width = .7) +
geom_text(aes(label = paste(m$pct, "%", sep = "")), vjust = -1, size = 3) +
facet_wrap(~ f, ncol = 2) + theme_bw() +
scale_y_continuous(limits = c(0, 1.2*max(m$Freq)))

Resources