geom_text with facet_wrap in ggplot2 when group specified - r

When using ggplot2 to make faceted plots, I'm having trouble getting individual labels in each facet when I also specify a grouping parameter. Without specifying group = ..., things work fine, but I'm trying to make plots of paired data that emphasize the before vs. after treatment changes.
Here is an example:
library(tidyr)
library(ggplot2)
set.seed(253)
data <- data.frame(Subject = LETTERS[1:10],
Day1.CompoundA = rnorm(10, 4, 2),
Day2.CompoundA = rnorm(10, 7, 2),
Day1.CompoundB = rnorm(10, 5, 2),
Day2.CompoundB = rnorm(10, 5.5, 2))
# Compare concentration of compounds by day
A <- t.test(data$Day1.CompoundA, data$Day2.CompoundA, paired = TRUE)
B <- t.test(data$Day1.CompoundB, data$Day2.CompoundB, paired = TRUE)
data.long <- gather(data, key = DayCompound, value = Concentration, -Subject) %>%
separate(DayCompound, c("Day", "Compound"))
# text to annotate graphs
graphLabels <- data.frame(Compound = c("CompoundA", "CompoundB"),
Pval = paste("p =", c(signif(A$p.value, 2),
signif(B$p.value, 2))))
Ok, now that the data are set up, I can make a boxplot just fine:
ggplot(data.long, aes(x = Day, y = Concentration)) +
geom_boxplot() +
facet_wrap(~ Compound) +
geom_text(data = graphLabels, aes(x = 1.5, y = 10, label = Pval))
But if I want to show line plots that emphasize the paired nature of the data by showing each subject in a different color, the facet labels don't work.
ggplot(data.long, aes(x = Day, y = Concentration, color = Subject, group = Subject)) +
geom_point() + geom_line() +
facet_wrap(~ Compound) +
geom_text(data = graphLabels, aes(x = 1.5, y = 10, label = Pval))
# Error in eval(expr, envir, enclos) : object 'Subject' not found
Any suggestions?

When you map aesthetics (i.e. aes(...,color = Subject)) in the top level ggplot() call, those mappings are passed on to each layer, which means that each layer expects data to have variables by those names.
You either need to specify the data and mapping separately in each layer, or unmap them explicitly:
ggplot(data.long, aes(x = Day, y = Concentration, color = Subject, group = Subject)) +
geom_point() + geom_line() +
facet_wrap(~ Compound) +
geom_text(data = graphLabels, aes(x = 1.5, y = 10, label = Pval,color = NULL,group= NULL))
There is also an inherit.aes argument that you can set to FALSE in any layer you don't want pulling in those other mappings, e.g.
ggplot(data.long, aes(x = Day, y = Concentration, color = Subject, group = Subject)) +
geom_point() + geom_line() +
facet_wrap(~ Compound) +
geom_text(data = graphLabels, aes(x = 1.5, y = 10, label = Pval),inherit.aes = FALSE)

Related

R ggplot2 : geom_jitter and fill, problem to have the dots on the right boxplot

Here's my R code
ggplot(dat = Table, aes(x = Group, y = value, fill = Type)) +
geom_boxplot(alpha=0.08)+
geom_jitter()+
scale_fill_brewer(palette="Spectral")+
theme_minimal()
Like you can see the dots are in the middle of the boxplots. What can I add in geom_jitter to have each point in the righ boxplot and not in the middle like this ? I also tried geom_point, it gave the same result !
Thanks to the help now It works, but I wanted to add a line to connect the dots and I got this.. can someone tell how to really connect the dots with lines
I think if you group by interaction(Group, Type) and use position_jitterdodge() you should get what you're looking for.
ggplot(mtcars, aes(as.character(am), mpg, color = as.character(vs),
group = interaction(as.character(vs), as.character(am)))) +
geom_boxplot() +
geom_jitter(position = position_jitterdodge()) # same output with geom_point()
Edit - here's an example with manual jittering applied to data where the each subject appears once in each Group.
I looked for a built-in way to do this, and this answer comes close, but I couldn't get it to work in terms of using position_jitterdodge with position defined by the groups of Group/Type, but line grouping defined by id alone and not by Group/Type. Both aesthetics (position adjustment and series identification) rely on the same group parameter, but they each need a different value for it.
Table = data.frame(id = 1:4,
value = rnorm(8),
Group = rep(c("a","b"), each = 4),
Type = c("1", "2"))
library(dplyr)
Table %>%
mutate(x = as.numeric(as.factor(Group)) +
0.2 * scale(as.numeric(as.factor(Type))) +
rnorm(n(), sd = 0.06)) %>%
ggplot(aes(x = Group, y = value, fill = Type, group = interaction(Group, Type))) +
geom_boxplot(alpha=0.2)+
geom_point(aes(x = x)) +
geom_line(aes(x = x, group = id), alpha = 0.1) +
scale_fill_brewer(palette="Spectral")+
theme_minimal()
Best to use position_dodge instead if you want them to line up:
library(ggplot2)
Table <- tibble::tibble(
Group = rep(c("A", "B"), each = 20),
Type = factor(rep(c(1:2, 1:2), each = 10)),
value = rnorm(40, mean = 10)
)
ggplot(dat = Table, aes(x = Group, y = value, fill = Type)) +
geom_boxplot(alpha=0.08)+
geom_point(position = position_dodge(width = 0.75))+
scale_fill_brewer(palette="Spectral")+
theme_minimal()
To add a line, make sure group = ID goes in both the geom_point and geom_line calls:
library(ggplot2)
Table <- tibble::tibble(
Group = rep(c("A", "B"), each = 20),
Type = factor(rep(c(1:2, 1:2), each = 10)),
ID = factor(rep(1:20, times = 2)),
value = rnorm(40, mean = 10)
)
ggplot(dat = Table, aes(x = Group, y = value, fill = Type)) +
geom_boxplot(alpha = 0.08) +
geom_point(aes(group = ID), position = position_dodge(width = 0.75))+
geom_line(aes(group = ID), position = position_dodge(width = 0.75), colour = "grey")+
scale_fill_brewer(palette = "Spectral") +
theme_minimal()

Different color scale for geom_point and geom_smooth on ggplot

I am trying to plot observations and their grouped regression lines with ggplot as follows:
ggplot(df, aes(x = cabpol.e, y = pred.vote_share, color = coalshare)) +
geom_point() +
scale_color_gradient2(midpoint = 50, low="blue", mid="green", high="red") +
geom_smooth(aes(x = cabpol.e, y = pred.vote_share, group=coalshare1, fill = coalshare1), se = FALSE, method='lm') +
scale_fill_manual(values = c(Junior="blue", Medium="green", Senior="red"))
The problem is that the lines from geom_smooth are all the same color. I tried using scale_fill_manual so that there aren't two different color scales, and manually determining which color corresponds to each group. but instead all the lines appear blue. How can I make each line a different color?
As requested, here is a set of replicable data with the same problem:
set.seed(1000)
dff <- data.frame(x=rnorm(100, 0, 1),
y=rnorm(100, 1, 2),
z=seq(1, 100, 1),
g=rep(c("A", "B"), 50))
ggplot(dff, aes(x = x, y = y, color = z, group = g, fill = g)) +
geom_point() +
scale_color_gradient2(midpoint = 50, low="blue", high="red") +
geom_smooth(se = FALSE, method='lm')
My solution to this problem would be to create multiple geom_smooth calls, and each time subset the data for the desired factor level. This way you are able to pass a different color to each call of geom_smooth. As long as you do not have many factors, this solution is not terribly inefficient.
dff <- data.frame(x=rnorm(100, 0, 1),
y=rnorm(100, 1, 2),
z=seq(1, 100, 1),
g=rep(c("A", "B"), 50))
ggplot(dff, aes(x = x, y = y,
color = z,
group = g)) +
geom_point() +
scale_color_gradient2(midpoint = 50, low="blue", high="red") +
geom_smooth(
aes(x = x, y =y),
color = "red",
method = "lm",
data = filter(dff, g == "A"),
se = FALSE
) +
geom_smooth(
aes(x = x, y =y),
color = "blue",
method = "lm",
data = filter(dff, g == "B"),
se = FALSE
)
Group-trends between the x and y variables can be plotted by using different dataframes for the geom_line (with predicted values) and geom_point (with raw data) functions. Make sure to determine in the ggplot() function that color is always the same variable, and then for geom_line group by the same variable.
p2 <- ggplot(NULL, aes(x = cabpol.e, y = vote_share, color = coalshare)) +
geom_line(data = preds, aes(group = coalshare, color = coalshare), size = 1) +
geom_point(data = df, aes(x = cabpol.e, y = vote_share)) +
scale_color_gradient2(name = "Share of Seats\nin Coalition (%)",
midpoint = 50, low="blue", mid = "green", high="red") +
xlab("Ideological Differences on State/Market") +
ylab("Vote Share (%)") +
ggtitle("Vote Share Won by Coalition Parties in Next Election")

R - geom_bar - 'stack' position without summing the values

I have this data frame
df <- data.frame(profile = rep(c(1,2), times = 1, each = 3), depth = c(100, 200, 300), value = 1:3)
This is my plot
ggplot() +
geom_bar(data = df, aes(x = profile, y = - depth, fill = value), stat = "identity")
My problem is the y labels which doesn't correspond to the depth values of the data frame
To help, my desired plot seems like this :
ggplot() +
geom_point(data = df, aes(x = profile, y = depth, colour = value), size = 20) +
xlim(c(0,3))
But with bar intead of points vertically aligned
nb : I don't want to correct it manually in changing ticks with scale_y_discrete(labels = (desired_labels))
Thanks for help
Considering you want a y-axis from 0 to -300, using facet_grid() seems to be a right option without summarising the data together.
ggplot() + geom_bar(data = df, aes(x = as.factor(profile), y = -depth, fill = value), stat = 'identity') + facet_grid(~ value)
I have it !
Thanks for your replies and to this post R, subtract value from previous row, group by
To resume; the data :
df <- data.frame(profile = rep(c(1,2), times = 1, each = 3), depth = c(100, 200, 300), value = 1:3)
Then we compute the depth step of each profile :
df$diff <- ave(df$depth, df$profile, FUN=function(z) c(z[1], diff(z)))
And finally the plot :
ggplot(df, aes(x = factor(profile), y = -diff, fill = value)) + geom_col()

Using different data in ggplot's geom_rug than I use in the rest of the plot

I am having trouble getting geom_rug to plot some data into an existing plot. Here's an example plot, where I am comparing some visit day to the magnitude of some measurement.
test <- data.frame(
visit = rep(c(0, 1.5, 3.5, 6.5, 12), 5),
mag = rnorm(n = 25)
)
ggplot(test, aes(x = visit, y = mag)) + geom_point()
Which generates the following plot.
I also have some other data, that I'd like to add just as extra marks on the x axis.
vac <- data.frame(
visit = c(2, 4, 6, 8)
)
For reasons I don't understand, I get no plot at all when I run the following code.
ggplot(test, aes(x = visit, y = mag)) + geom_point() +
geom_rug(data=vac, aes(x = visit))
I presume I have messed up on syntax somehow, but I can't seem to figure out what I am doing wrong here. Any suggestions?
You should specify inherit.aes = FALSE in the geom_rug() line, otherwise it inherits y = mag from the main ggplot() call.
ggplot(test, aes(x = visit, y = mag)) +
geom_point() +
geom_rug(data=vac, aes(x = visit), inherit.aes = F)
I would try either this:
ggplot(test, aes(x = visit, y = mag)) + geom_point() +
geom_rug(data=vac, aes(x = visit,y = NULL))
or perhaps a better option this:
ggplot() +
geom_point(data = test,aes(x = visit,y = mag)) +
geom_rug(data=vac, aes(x = visit))

Condition a ..count.. summation on the faceting variable

I'm trying to annotate a bar chart with the percentage of observations falling into that bucket, within a facet. This question is very closely related to this question:
Show % instead of counts in charts of categorical variables but the introduction of faceting introduces a wrinkle. The answer to the related question is to use stat_bin w/ the text geom and then have the label be constructed as so:
stat_bin(geom="text", aes(x = bins,
y = ..count..,
label = paste(round(100*(..count../sum(..count..)),1), "%", sep="")
)
This works fine for an un-faceted plot. However, with facets, this sum(..count..) is summing over the entire collection of observations without regard for the facets. The plot below illustrates the issue---note that the percentages do not sum to 100% within a panel.
Here the actually code for the figure above:
g.invite.distro <- ggplot(data = df.exp) +
geom_bar(aes(x = invite_bins)) +
facet_wrap(~cat1, ncol=3) +
stat_bin(geom="text", aes(x = invite_bins,
y = ..count..,
label = paste(round(100*(..count../sum(..count..)),1), "%", sep="")
),
vjust = -1, size = 3) +
theme_bw() +
scale_y_continuous(limits = c(0, 3000))
UPDATE: As per request, here's a small example re-producing the issue:
df <- data.frame(x = c('a', 'a', 'b','b'), f = c('c', 'd','d','d'))
ggplot(data = df) + geom_bar(aes(x = x)) +
stat_bin(geom = "text", aes(
x = x,
y = ..count.., label = ..count../sum(..count..)), vjust = -1) +
facet_wrap(~f)
Update geom_bar requires stat = identity.
Sometimes it's easier to obtain summaries outside the call to ggplot.
df <- data.frame(x = c('a', 'a', 'b','b'), f = c('c', 'd','d','d'))
# Load packages
library(ggplot2)
library(plyr)
# Obtain summary. 'Freq' is the count, 'pct' is the percent within each 'f'
m = ddply(data.frame(table(df)), .(f), mutate, pct = round(Freq/sum(Freq) * 100, 1))
# Plot the data using the summary data frame
ggplot(data = m, aes(x = x, y = Freq)) +
geom_bar(stat = "identity", width = .7) +
geom_text(aes(label = paste(m$pct, "%", sep = "")), vjust = -1, size = 3) +
facet_wrap(~ f, ncol = 2) + theme_bw() +
scale_y_continuous(limits = c(0, 1.2*max(m$Freq)))

Resources