Shade area between lines with ggplot2 - r

I want to shade the area between the grouped lines. I've tried different approaches but I don't manage to do it.
df <- data.frame(year = rep(c(1950:1955), each = 2),
gender = factor(rep(c("male", "female"), times = 6)),
value = c(40526812, 37450509, 43027405,
40135682, 45801088, 43130369,
48579427, 46077640, 50948574,
48493786, 53052094, 50537984))
df |>
ggplot(aes(year, value, group = gender)) +
geom_line()
Thanks in advance!

You could define the boundaries of the area by using a pivot_wider to create the ymax and ymin of the area in geom_ribbon so you don't need to subset per argument. So it will be easier to reproduce. Here an example:
df <- data.frame(year = rep(c(1950:1955), each = 2),
gender = factor(rep(c("male", "female"), times = 6)),
value = c(40526812, 37450509, 43027405,
40135682, 45801088, 43130369,
48579427, 46077640, 50948574,
48493786, 53052094, 50537984))
library(ggplot2)
library(dplyr)
library(tidyr)
area <- df %>%
pivot_wider(names_from = gender, values_from = value) %>%
mutate(
ymax = pmax(male, female),
ymin = pmin(male, female)
)
ggplot(data = df, mapping = aes(year, y = value, group = gender)) +
geom_line() +
geom_ribbon(data = area, mapping = aes(year, ymin = ymin, ymax = ymax), alpha = 0.4, inherit.aes = FALSE )
Created on 2022-07-22 by the reprex package (v2.0.1)

You can use geom_ribon which allows to shade areas within axis values.
A stronger alpha will play on the shader intensity.
ggplot(data=df, aes(year, value, group = gender)) +
geom_line() +
geom_ribbon(data=subset(df, 1950 <= year & year <= 1955),
aes(ymin=rep(subset(df, gender=='female')$value, each=2),
ymax=rep(subset(df, gender=='male')$value, each=2)), fill="blue", alpha=0.5)

Related

Add hline for groups outside of aestetic in ggplot

I have a line chart with metrics for different years and months. I want to add a median line for each month.
library(dplyr)
library(lubridate)
library(ggplot2)
set.seed(52)
data <- tibble(
date = seq.Date(from = as.Date("2017-01-01"), to = date("2020-12-31"), by = "month")
) %>%
mutate(
metric = rnorm(n(), mean = 5, sd = 2),
month = month(date, label = TRUE),
year = as.factor(year(date))
)
ggplot(data, aes(x = month, y = metric, color = year, group = year)) +
geom_line()
I tried using geom_hline() to do this but it only produces a line for the total median.
ggplot(data, aes(x = month, y = metric, color = year, group = year)) +
geom_line() +
geom_hline(aes(yintercept = median(metric), group = month))
Created on 2021-11-18 by the reprex package (v2.0.1)
What is the easiest way to create a median line for each month? Like this:
Is this what you are looking for...?
ggplot(data, aes(x = month, y = metric, color = year, group = year)) +
geom_line() +
geom_tile(data = function(df) df %>% group_by(month) %>%
summarise(metric = median(metric)),
aes(x = month, y = metric), height = 0.05, inherit.aes = FALSE)
Alternatively, thanks to #user20650's comment below, this also works...
ggplot(data, aes(x = month, y = metric, color = year, group = year)) +
geom_line() +
stat_summary(aes(x = month, y = metric),
fun = median,
geom = "tile",
height = 0.05,
inherit.aes = FALSE)
You could use ave.
I didn't get it if you need a median line for each year or through the months, so I will show you both.
Median for each year
ggplot(data, aes(x = month, y = metric, color = year, group = year)) +
geom_line() +
geom_line(aes(y = ave(metric, year, FUN = median)))
Median through months
ggplot(data, aes(x = month, y = metric, color = year, group = year)) +
geom_line() +
geom_line(aes(y = ave(metric, month, FUN = median), colour = "median"), size = 2)
EDIT:
since you are interested in a horizontal line, here you have an alternative solution:
ggplot(data, aes(x = month, y = metric, color = year, group = year)) +
geom_line() +
stat_summary(aes(x = month, y=metric), fun = median, geom = "point", shape = "-", size = 15, inherit.aes = FALSE)
Not sure if this is what you're looking for, but you can simply create a new variable in your dataset with a median metric. Your first group observations by month, then calculate a median metric and then ungroup the data.
data <- data %>% group_by(month) %>% mutate(median_metric=median(metric)) %>% ungroup()
Once you do it, you just specify yintercept to vary by median_metric
ggplot(data ) +
geom_line(aes(x = month, y = metric, color = year, group = year))+
geom_hline(aes(yintercept = median_metric))

Is there a way to add the percentage change label to the end of a graph

I have created a graph to demonstrate the development of four variables. Is there any way to add a label that tells the percentage change(last observation/first observation -1) to the end of the plots to highlight the relative change during the observed period?
Data&plots=
library(tidyverse)
Data <- data.frame(
Wind = c(236,325,470,615,647,821),
Hard_coal= c(591,811,667,681,532,344),
Gas= c(883,841,472,731,678,680),
Bio = c(883,841,811,731,678,680),
year= c("2015","2016","2017","2018","2019","2020"))
Data %>%
pivot_longer(-year) %>%
ggplot(aes(x = year, y = value, color = name, group = name, linetype = name)) +
geom_line(size = 1.5)
Using the ggrepel option offered in Plot labels at ends of lines this could be achieved like so where I make use of dplyrs first and last to compute the percentage change.
Note: I still vote to close this question as a duplicate.
library(tidyr)
library(dplyr)
library(ggplot2)
data_long <- Data %>%
pivot_longer(-year) %>%
mutate(year = as.numeric(year)) %>%
group_by(name) %>%
mutate(change = last(value) / first(value) - 1)
ggplot(data_long, aes(x = year, y = value, color = name, group = name)) +
geom_line(size = 1) +
ggrepel::geom_text_repel(data = filter(data_long, year == max(year)),
aes(label = scales::percent(change)),
direction = "y", nudge_x = .25,
hjust = 0, show.legend = FALSE) +
scale_x_continuous(limits = c(NA, 2020.5)) +
coord_cartesian(clip = "off")

Draw a line on top of stacked bar_plot

I would like to draw a line (or making points) on top of my stacked bar_plots. As I have no real data points I can refer to (only the spereated values and not the sum of them) I don't know how I can add such line. The Code produce this plot:
I want to add this black line(my real data are not linear):
library(tidyverse)
##Create some fake data
data3 <- tibble(
year = 1991:2020,
One = c(31:60),
Two = c(21:50),
Three = c(11:40)
)
##Gather the variables to create a long dataset
new_data3 <- data3 %>%
gather(model, value, -year)
##plot the data
ggplot(new_data3, aes(x = year, y = value, fill=model)) +
geom_bar(stat = "identity",position = "stack")
You can use stat_summary and sum for the summary function:
ggplot(new_data3, aes(year, value)) +
geom_col(aes(fill = model)) +
stat_summary(geom = "line", fun.y = sum, group = 1, size = 2)
Result:
You could get sum by year and plot it with new geom_line
library(dplyr)
library(ggplot2)
newdata4 <- new_data3 %>%
group_by(year) %>%
summarise(total = sum(value))
ggplot(new_data3, aes(x = year, y = value, fill=model)) +
geom_bar(stat = "identity",position = "stack") +
geom_line(aes(year, total, fill = ""), data = newdata4, size = 2)

ggplot2:: Facetting plot with the same reference plot in all panels

I would like to facet a plot, but with a reference plot in each panel. Let me try to show with pictures what I want to achieve: My example data_frame:
require(dplyr)
df <- data_frame( id = c(rep('ctr',40), rep('pat',80)),
class = c(rep('ctr',40), rep(c('a','b'), each = 40)),
rank = rep (1:20,6),
mean = c(rep(seq(3,-3, length.out = 20),2),
rep(seq(1,-4, length.out = 20),2),
rep(seq(-2,-8, length.out = 20),2)),
sd = rep(seq(1.2,0.8, length.out = 20), times = 6),
exam = rep(c('blue','red'), each = 20, times = 3))
My plot:
# first, create reference plot of the 'controls'
require(ggplot2)
p_ctr <- ggplot() +
geom_line(data = filter(df, id == 'ctr'),
aes(x=rank, y=mean, color=exam), linetype=1) +
geom_ribbon(data = filter(df, id == 'ctr'),
aes(x = rank, ymax = mean+sd, ymin = mean-sd,
fill = exam), alpha = .1) +
scale_colour_manual(values = c("#00b6eb","#eb0041")) +
scale_fill_manual(values = c("#00b6eb","#eb0041"))
# then, overlay with plot of 'patients'
p_ctr + geom_line(data = filter(df, id == 'pat'),
aes(x=rank, y=mean, linetype = class)) +
geom_ribbon(data = filter(df, id == 'pat'),
aes(x = rank, ymax = mean+sd, ymin = mean-sd,
group = class),
alpha = .1) +
facet_wrap(~exam)
That is halfway there:
Ideally, however, I would like to plot the different "classes" in separate panels, but with the control plot as a reference in each panel:
Expected result:
I have tried different combinations of facetting, without good result. I guess, there must be a simple solution?
Maybe like so.
library(dplyr)
library(ggplot2)
df1 <- filter(df, id == 'ctr')
df2 <- filter(df, id == 'pat')
df2 <- dplyr::rename(df2, class_2 = class)
p_ctr <- ggplot() +
geom_line(data = df1, aes(x=rank, y=mean, color=exam)) +
geom_ribbon(data = df1,
aes(x = rank, ymax = mean+sd, ymin = mean-sd, fill = exam),
alpha = .1) +
scale_colour_manual(values = c("#00b6eb","#eb0041")) +
scale_fill_manual(values = c("#00b6eb","#eb0041")) +
geom_line(data = df2,
aes(x=rank, y=mean)) +
geom_ribbon(data = df2,
aes(x = rank, ymax = mean+sd, ymin = mean-sd),
alpha = .1) +
facet_grid(class_2 ~ exam)
p_ctr
Using facet_wrap gives me the following error:
error in gList(list(x = 0.5, y = 0.5, width = 1, height = 1, just = "centre", :
only 'grobs' allowed in "gList"
You probably came across this plot while looking for the solution.
p_ctr + geom_line(data = filter(df, id == 'pat'),
aes(x=rank, y=mean)) +
geom_ribbon(data = filter(df, id == 'pat'),
aes(x = rank, ymax = mean+sd, ymin = mean-sd),
alpha = .1) +
# facet_wrap(~exam) +
facet_grid(class ~ exam)
This is basically your reference plot and its overlay, without the linetype and group arguments. Additionally I faceted by class ~ exam. From this plot you see that 'the problem' is that class contains three unique elements: a, b and ctr. That's why I renamed the variable class in df2 to be class_2 which has only two unique elements: a and b. Faceting by class_2 ~ exam then gives the desired output.
I hope this helps.

ggplot2: show relative % in a stacked barplot per group

I'm trying to plot a basic bar chart per group.
As values are pretty big, I want to show for each bar (i.e. group) the % of each group within the bar.
I managed to show percentage of the total, but this is not what I'm expecting : in each bar, I would like that the sum of % equal 100%.
Is there an easy way to do it without changing the dataframe ?
(DF <- data.frame( year = rep(2015:2017, each = 4),
Grp = c("Grp1", "Grp2", "Grp3", "Grp4"),
Value = trunc(rnorm(12, 2000000, 100000))) )
ggplot(DF) +
geom_bar(aes(x = year, y = Value, fill = Grp),
stat = "identity",
position = position_stack()) +
geom_text(aes(x = year, y = Value, group = Grp,
label = percent(Value/sum(Value))) ,
position = position_stack(vjust = .5))
You can create a new variable for percentile by year:
library(dplyr)
library(ggplot2)
library(scales)
DF <- DF %>% group_by(year) %>% mutate(ValuePer=(Value/sum(Value))) %>% ungroup()
ggplot(DF, aes(year, ValuePer, fill = Grp)) +
geom_bar(stat = "identity", position = "fill") +
geom_text(aes(label = percent(ValuePer)),
position = position_fill())+
scale_y_continuous(labels = percent_format())
Use position = "fill" to turn scale into proportions and scale_y_continuous(labels = percent_format()) to turn this scale into percent.
DF <- data.frame( year = rep(2015:2017, each = 4),
Grp = c("Grp1", "Grp2", "Grp3", "Grp4"),
Value = trunc(rnorm(12, 2000000, 100000)))
library(ggplot2)
library(scales)
ggplot(DF, aes(year, Value, fill = Grp)) +
geom_bar(stat = "identity", position = "fill") +
geom_text(aes(label = percent(Value / sum(Value))),
position = position_fill()) +
scale_y_continuous(labels = percent_format())
OK gathering all your tricks, I finally get this :
I need to adjust my DF, what I wanted to avoid, but it remains simple so it works
library(dplyr)
library(ggplot2)
library(scales)
DF <- DF %>% group_by(year) %>% mutate(ValuePer=(Value/sum(Value))) %>% ungroup()
ggplot(DF, aes(year, Value, fill = Grp)) +
geom_bar(stat = "identity", position = "stack") +
geom_text(aes(label = percent(ValuePer)),
position = position_stack()) +
scale_y_continuous(labels = unit_format("M", 1e-6) )
I would use a single geom_text for each bar while filtering data by year (bar) using dplyr. Check if is that what you need:
(DF <- data.frame( year = rep(2015:2017, each = 4),
Grp = c("Grp1", "Grp2", "Grp3", "Grp4"),
Value = trunc(rnorm(12, 2000000, 100000))) )
library(dplyr)
ggplot(DF) +
geom_bar(aes(x = year, y = Value, fill = Grp),
stat = "identity",
position = position_stack()) +
geom_text(data = DF %>% filter(year == 2015),
aes(x = year, y = Value,
label = scales::percent(Value/sum(Value))) ,
position = position_stack(vjust = .5)) +
geom_text(data = DF %>% filter(year == 2016),
aes(x = year, y = Value,
label = scales::percent(Value/sum(Value))) ,
position = position_stack(vjust = .5)) +
geom_text(data = DF %>% filter(year == 2017),
aes(x = year, y = Value,
label = scales::percent(Value/sum(Value))) ,
position = position_stack(vjust = .5))
Argument group is not necessary here. There may be more elegant solutions but that is the one I could think about. Tell me if this is the output you were waiting for:
Maybe creating a new column doing the right computation. I could not figure out how the computation could be done right inside aes(), the way you did you just computed the overall %, the Value should be grouped by year instead.
At least you got yourself the actually value by the Y axis and the Year grouped % inside bars. I would advise changing this labels by stacking something like this:
scale_y_continuous(breaks = seq(0,8*10^6,10^6),
labels = c(0, paste(seq(1,8,1),'M')))
Resulting this:
You can adapt to your context.

Resources