ggplot2 and facet_grid : add highest value for each plot - r

I am using facet_grid() to plot multiple plot divided per groups of data. For each plot, I want to add in the corner the highest value of the Y axis. I've tried several hacks but it never gives me the expected results. This answer partially helps me but the value I want to add will constantly be changing, therefore I don't see how I can apply it.
Here is a minimal example, I'd like to add the red numbers on the graph below:
library(ggplot2)
data <- data.frame('group'=rep(c('A','B'),each=4),'hour'=rep(c(1,2,3,4),2),'value'=c(5,4,2,3,6,7,4,5))
ggplot(data,aes(x = hour, y = value)) +
geom_line() +
geom_point() +
theme(aspect.ratio=1) +
scale_x_continuous(name ="hours", limits=c(1,4)) +
scale_y_continuous(limits=c(1,10),breaks = seq(1, 10, by = 2))+
facet_grid( ~ group)
Thanks for your help!

library(dplyr)
data2 <- data %>% group_by(group) %>% summarise(Max = max(value))
ggplot(data,aes(x = hour, y = value)) +
geom_line() +
geom_point() +
geom_text(aes(label = Max), x = Inf, y = Inf, data2,
hjust = 2, vjust = 2, col = 'red') +
theme(aspect.ratio=1) +
scale_x_continuous(name ="hours", limits=c(1,4)) +
scale_y_continuous(limits=c(1,10),breaks = seq(1, 10, by = 2))+
facet_grid( ~ group)

This does the trick. If you always have fixed ranges you can position the text manually.
library(ggplot2)
data <- data.frame('group'=rep(c('A','B'),each=4),'hour'=rep(c(1,2,3,4),2),'value'=c(5,4,2,3,6,7,4,5))
ggplot(data,aes(x = hour, y = value)) +
geom_line() +
geom_point() +
geom_text(
aes(x, y, label=lab),
data = data.frame(
x=Inf,
y=Inf,
lab=tapply(data$value, data$group, max),
group=unique(data$group)
),
vjust="inward",
hjust = "inward"
) +
theme(aspect.ratio=1) +
scale_x_continuous(name ="hours", limits=c(1,4)) +
scale_y_continuous(limits=c(1,10),breaks = seq(1, 10, by = 2))+
facet_grid( ~ group)

Related

mean line in every facet_wrap

ggplot(data = results, aes(x = inst, y = value, group = inst)) +
geom_boxplot() +
facet_wrap(~color) +
#geom_line(data = mean,
#mapping = aes(x = inst, y = average, group = 1))
theme_bw()
When I run the code above with the code line commented, it runs and gives the graph below but I want a joining mean lines on the boxplots based on its own color category for each group in facet wraps. Any ideas how can I do that?
Your code is generally correct (though you'll want to add color = color to the aes() specification in geom_line()), so I suspect your mean dataset isn't set up correctly. Do you have means grouped by both your x axis and faceting variable? Using ggplot2::mpg as an example:
library(dplyr) # >= v1.1.0
library(ggplot2)
mean_dat <- summarize(mpg, average = mean(hwy), .by = c(cyl, drv))
ggplot(mpg, aes(factor(cyl), hwy)) +
geom_boxplot() +
geom_line(
data = mean_dat,
aes(y = average, group = 1, color = drv),
linewidth = 1.5,
show.legend = FALSE
) +
facet_wrap(~drv) +
theme_bw()
Alternatively, you could use stat = "summary" and not have to create a means dataframe at all:
ggplot(mpg, aes(factor(cyl), hwy)) +
geom_boxplot() +
geom_line(
aes(group = 1, color = drv),
stat = "summary",
linewidth = 1.5,
show.legend = FALSE
) +
facet_wrap(~drv) +
theme_bw()
# same result as above

Plot labels at ends of lines in stacked area chart

I have the following code
library(ggplot2)
library(dplyr)
# create data
time <- as.numeric(rep(seq(1,7),each=7)) # x Axis
value <- runif(49, 10, 100) # y Axis
group <- rep(LETTERS[1:7],times=7) # group, one shape per group
data <- data.frame(time, value, group)
# stacked area chart
ggplot(data, aes(x=time, y=value, fill=group)) +
geom_area()+
geom_text(data = data %>% filter(time == last(time)), aes(label = group,
x = time + 0.5,
y = value,
color = group)) +
guides(color = FALSE) + theme_bw() +
scale_x_continuous(breaks = scales::pretty_breaks(10))
Where i get
But i am aiming for link
Is there any solution for stacked area plot?
The question code is plotting the text labels in the value's of the last time, when in fact the areas are cumulative. And in reverse order.
Also, the following graph plots data created with the same code but with
set.seed(1234)
Then the data creation code is the same as in the question.
# stacked area chart
ggplot(data, aes(x=time, y=value, fill=group)) +
geom_area()+
geom_text(data = data %>%
filter(time == last(time)) %>%
mutate(value = cumsum(rev(value))),
aes(label = rev(group),
x = time + 0.5,
y = value,
color = rev(group))) +
guides(color = FALSE) + theme_bw() +
scale_x_continuous(breaks = scales::pretty_breaks(10))
Edit.
Following the discussion in the comments to this answer, I have decided to post code based on the comment by user Jake Kaupp.
ggplot(data, aes(x = time, y = value, fill = group)) +
geom_area()+
geom_text(data = data %>% filter(time == last(time)),
aes(x = time + 0.5, y = value,
label = rev(group), color = rev(group)),
position = position_stack(vjust = 0.5)) +
guides(color = FALSE) +
theme_bw() +
scale_x_continuous(breaks = scales::pretty_breaks(10))
You can use the text function to put text wherever you want. For example:
text(7.2, 350, "B", col="brown")
Here we go
time <- as.numeric(rep(seq(1,7),each=8)) # x Axis
value <- runif(56, 10, 100) # y Axis
group <- rep(LETTERS[1:8],times=7) # group, one shape per group
data <- data.frame(time, value, group)
round_df <- function(x, digits) {
# round all numeric variables
# x: data frame
# digits: number of digits to round
numeric_columns <- sapply(x, mode) == 'numeric'
x[numeric_columns] <- round(x[numeric_columns], digits)
x
}
data$value<- round_df(data$value, 2)
# stacked area chart
ggplot(data, aes(x=time, y=value, fill=group)) +
geom_area()+
geom_text(aes(x = time + 0.5, y = value, label=ifelse(time == max(time), group, NA)),position = position_stack(vjust = 0.5),check_overlap = TRUE)+
guides(color = FALSE) + theme_bw()+
scale_x_continuous(breaks = scales::pretty_breaks(10)) +
geom_text(aes(label=ifelse(time != min(time) & time != max(time),value, NA)),position = position_stack(vjust = 0.5),check_overlap = TRUE)+
geom_text(aes(x = time + 0.18,label=ifelse(time == min(time),value, NA)),position = position_stack(vjust = 0.5),check_overlap = TRUE)+
geom_text(aes(x = time - 0.18,label=ifelse(time == max(time),value, NA)),position = position_stack(vjust = 0.5),check_overlap = TRUE)
And get
Factor levels but why not letters? That is the next step :)
UPDATED
just converted factor to char data$group <- as.character(data$group)

Plot standard deviation

I want to plot the standard deviation for 1 line (1 flow serie, the plot will have 2) in a plot with lines or smoth areas. I've seen and applied some code from sd representation and other examples... but it's not working for me.
My original data has several flow values for the same day, of which I've calculated the daily mean and sd. I'm stuck here, don't know if it is possible to represent the daily sd with lines from the column created "called sd" or should I use the original data.
The bellow code is a general example of what I'll apply to my data. The flow, flow1 and sd, are examples of the result calculation of daily mean and sd of the original data.
library(gridExtra)
library(ggplot2)
library(grid)
x <- data.frame(
date = seq(as.Date("2012-01-01"),as.Date("2012-12-31"), by="week"),
rain = sample(0:20,53,replace=T),
flow1 = sample(50:150,53,replace=T),
flow = sample(50:200,53,replace=T),
sd = sample (0:10,53, replace=T))
g.top <- ggplot(x, aes(x = date, y = rain, ymin=0, ymax=rain)) +
geom_linerange() +
scale_y_continuous(limits=c(22,0),expand=c(0,0), trans="reverse")+
theme_classic() +
theme(plot.margin = unit(c(5,5,-32,6),units="points"),
axis.title.y = element_text(vjust = 0.3))+
labs(y = "Rain (mm)")
g.bottom <- ggplot(x, aes(x = date)) +
geom_line(aes(y = flow, colour = "flow")) +
geom_line(aes(y = flow1, colour = "flow1")) +
stat_summary(geom="ribbon", fun.ymin="min", fun.ymax="max", aes(fill=sd), alpha=0.3) +
theme_classic() +
theme(plot.margin = unit(c(0,5,1,1),units="points"),legend.position="bottom") +
labs(x = "Date", y = "River flow (m/s)")
grid.arrange(g.top, g.bottom , heights = c(1/5, 4/5))
The above code gives Error: stat_summary requires the following missing aesthetics: y
Other option is geom_smooth, but as far as I could understand it requires some line equation (I can be wrong, I'm new in R).
Something like this maybe?
g.bottom <- x %>%
select(date, flow1, flow, sd) %>%
gather(key, value, c(flow, flow1)) %>%
mutate(min = value - sd, max = value + sd) %>%
ggplot(aes(x = date)) +
geom_ribbon(aes(ymin = min, ymax = max, fill = key)) +
geom_line(aes(y = value, colour = key)) +
scale_fill_manual(values = c("grey", "grey")) +
theme_classic() +
theme(plot.margin = unit(c(0,5,1,1),units="points"),legend.position="bottom") +
labs(x = "Date", y = "River flow (m/s)")

Make overlapping histogram in with geom_histogram

I am trying to make an overlapping histogram like this:
ggplot(histogram, aes = (x), mapping = aes(x = value)) +
geom_histogram(data = melt(tpm_18_L_SD), breaks = seq(1,10,by = 1),
aes(y = 100*(..count../sum(..count..))), alpha=0.2) +
geom_histogram(data = melt(tpm_18_S_SD), breaks = seq(1,10,by = 1),
aes(y = 100*(..count../sum(..count..))), alpha=0.2) +
geom_histogram(data = melt(tpm_18_N_SD), breaks = seq(1,10,by = 1),
aes(y = 100*(..count../sum(..count..))), alpha=0.2) +
facet_wrap(~variable, scales = 'free_x') +
ylim(0, 20) +
ylab("Percentage of Genes") +
xlab("Standard Deviation")
My code can only make them plot side by side and I would like to also make them overlap. Thank you! I based mine off of the original post where this came from but it did not work for me. It was originally 3 separate graphs which I combined with grid and ggarrange. It looks like this right now.
Here is the code of the three separate graphs.
SD_18_L <- ggplot(data = melt(tpm_18_L_SD), mapping = aes(x = value)) +
geom_histogram(aes(y = 100*(..count../sum(..count..))), breaks = seq(1, 10, by = 1)) +
facet_wrap(~variable, scales = 'free_x') +
ylim(0, 20) +
ylab("Percentage of Genes") +
xlab("Standard Deviation")
SD_18_S <- ggplot(data = melt(tpm_18_S_SD), mapping = aes(x = value)) +
geom_histogram(aes(y = 100*(..count../sum(..count..))), breaks = seq(1, 10, by = 1)) +
facet_wrap(~variable, scales = 'free_x') +
ylim(0, 20) +
ylab("Percentage of Genes") +
xlab("Standard Deviation")
SD_18_N <- ggplot(data = melt(tpm_18_N_SD), mapping = aes(x = value)) +
geom_histogram(aes(y = 100*(..count../sum(..count..))), breaks = seq(1, 10, by = 1)) +
facet_wrap(~variable, scales = 'free_x') +
ylim(0, 20) +
ylab("Percentage of Genes") +
xlab("Standard Deviation")
What my graphs look like now:
ggplot expects dataframes in a long format. I'm not sure what your data looks like, but you shouldn't have to call geom_histogram for each category. Instead, get all your data into a single dataframe (you can use rbind for this) in long format (what you're doing already with melt) first, then feed it into ggplot and map fill to whatever your categorical variable is.
Your call to facet_wrap is what puts them in 3 different plots. If you want them all on the same plot, take that line out.
An example using the iris data:
ggplot(iris, aes(x = Sepal.Length, fill = Species)) +
geom_histogram(alpha = 0.6, position = "identity")
I decreased alpha in geom_histogram so you can see where colors overlap, and added position = "identity" so observations aren't being stacked. Hope that helps!

ggplot2 add sum to chart

Using mtcars as an example, I've produced some violin plots. I wanted to add two things to this chart:
for each group, list n
for each group, sum a third variable (e.g. wt)
I can do (1) with the geom_text code below although (n) is actually plotted on the x axis rather than off to the side.
But I can't work out how to do (2).
Any help much appreciated!
library(ggplot2)
library(gridExtra)
library(ggthemes)
result <- mtcars
ggplot(result, aes(x = gear, y = drat, , group=gear)) +
theme_tufte(base_size = 15) + theme(line=element_blank()) +
geom_violin(fill = "white") +
geom_boxplot(fill = "black", alpha = 0.3, width = 0.1) +
ylab("drat") +
xlab("gear") +
coord_flip()+
geom_text(stat = "count", aes(label = ..count.., y = ..count..))
You can add both of these annotations by creating them in your dataframe temporarily prior to graphing. Using the dplyr package, you can create two new columns, one with the count for each group, and one with the sum of wt for each group. This can then be piped directly into your ggplot using %>% (alternatively, you could save the new dataset and insert it into ggplot the way you have it). Then with some minor edits to your geom_text call and adding a second one, we can create the plot you want. The code looks like this:
library(ggplot2)
library(gridExtra)
library(ggthemes)
library(magrittr)
library(dplyr)
result <- mtcars
result %>%
group_by(gear) %>%
mutate(count = n(), sum_wt = sum(wt)) %>%
ggplot(aes(x = gear, y = drat, , group=gear)) +
theme_tufte(base_size = 15) + theme(line=element_blank()) +
geom_violin(fill = "white") +
geom_boxplot(fill = "black", alpha = 0.3, width = 0.1) +
ylab("drat") +
xlab("gear") +
coord_flip()+
geom_text(aes(label = paste0("n = ", count),
x = (gear + 0.25),
y = 4.75)) +
geom_text(aes(label = paste0("sum wt = ", sum_wt),
x = (gear - 0.25),
y = 4.75))
The new graph looks like this:
Alternatively, if you create a summary data frame named result_sum, then you can manually add that into the geom_text calls.
result <- mtcars %>%
mutate(gear = factor(as.character(gear)))
result_sum <- result %>%
group_by(gear) %>%
summarise(count = n(), sum_wt = sum(wt))
ggplot(result, aes(x = gear, y = drat, , group=gear)) +
theme_tufte(base_size = 15) +
theme(line=element_blank()) +
geom_violin(fill = "white") +
geom_boxplot(fill = "black", alpha = 0.3, width = 0.1) +
ylab("drat") +
xlab("gear") +
coord_flip()+
geom_text(data = result_sum, aes(label = paste0("n = ", count),
x = (as.numeric(gear) + 0.25),
y = 4.75)) +
geom_text(data = result_sum, aes(label = paste0("sum wt = ", sum_wt),
x = (as.numeric(gear) - 0.25),
y = 4.75))
This gives you this:
The benefit to this second method is that the text isn't bold like in the first graph. The bold effect occurs in the first graph due to the text being printed over itself for all observations in the dataframe.
Thanks to those who helped.... I used this in the end which plots the calculated values, one set of classes being text based so using vjust to position the vertical offset.
thanks again!
library(ggplot2)
library(gridExtra)
library(ggthemes)
results <- mtcars
results$gear <- as.factor(as.character(results$gear)) #Turn 'gear' to text to simulate classes, then factorise
result_sum <- results %>%
group_by(gear) %>%
summarise(count = n(), sum_wt = sum(wt))
ggplot(results, aes(x = gear, y = drat, group=gear)) +
theme_tufte(base_size = 15) + theme(line=element_blank()) +
geom_violin(fill = "white") +
geom_boxplot(fill = "black", alpha = 0.3, width = 0.1) +
ylab("drat") +
xlab("gear") +
coord_flip()+
geom_text(data = result_sum, aes(label = paste0("n = ", count), x = (gear), vjust= 0, y = 5.25)) +
geom_text(data = result_sum, aes(label = paste0("sum wt = ", round(sum_wt,0)), x = (gear), vjust= -2, y = 5.25))

Resources