After reading through different posts, I found out how to add a vline of mean to density plots as shown here.
Using the data provided in the above link:
1) How can one add 95% confidence intervals around the mean using geom_ribbon?
CIs can be computed as
#computation of the standard error of the mean
sem<-sd(x)/sqrt(length(x))
#95% confidence intervals of the mean
c(mean(x)-2*sem,mean(x)+2*sem)
2) How can one limit the vline to the region under the curve? You will see in the picture below that vline plots outside the curve.
Sample data very close to my real problem can be found at https://www.dropbox.com/s/bvvfdpgekbjyjh0/test.csv?dl=0
UPDATE
Using real data in the link above, I have tried the following using #beetroot's answer.
# Find the mean of each group
dat=me
library(dplyr)
library(plyr)
cdat <- ddply(data,.(direction,cond), summarise, rating.mean=mean(rating,na.rm=T))# summarize by season and variable
cdat
#ggplot
p=ggplot(data,aes(x = rating)) +
geom_density(aes(colour = cond),size=1.3,adjust=4)+
facet_grid(.~direction, scales="free")+
xlab(NULL) + ylab("Density")
p=p+coord_cartesian(xlim = c(0, 130))+scale_color_manual(name="",values=c("blue","#00BA38","#F8766D"))+
scale_fill_manual(values=c("blue", "#00BA38", "#F8766D"))+
theme(legend.title = element_text(colour="black", size=15, face="plain"))+
theme(legend.text = element_text(colour="black", size = 15, face = "plain"))+
theme(title = red.bold.italic.text, axis.title = red.bold.italic.text)+
theme(strip.text.x = element_text(size=20, color="black",face="plain"))+ # facet labels
ggtitle("SAMPLE A") +theme(plot.title = element_text(size = 20, face = "bold"))+
theme(axis.text = blue.bold.italic.16.text)+ theme(legend.position = "none")+
geom_vline(data=cdat, aes(xintercept=rating.mean, color=cond),linetype="dotted",size=1)
p
## implementing #beetroot's code to restrict lines under the curve and shade CIs around the mean
# I will use ddply for mean and CIs
cdat <- ddply(data,.(direction,cond), summarise, rating.mean=mean(rating,na.rm=T),
sem = sd(rating,na.rm=T)/sqrt(length(rating)),
ci.low = mean(rating,na.rm=T) - 2*sem,
ci.upp = mean(rating,na.rm=T) + 2*sem)# summarize by direction and variable
#In order to limit the lines to the outline of the curves you first need to find out which y values
#of the curves correspond to the means, e.g. by accessing the density values with ggplot_build and
#using approx:
cdat.dens <- ggplot_build(ggplot(data, aes(x=rating, colour=cond)) +
facet_grid(.~direction, scales="free")+
geom_density(aes(colour = cond),size=1.3,adjust=4))$data[[1]] %>%
mutate(cond = ifelse(group==1, "A",
ifelse(group==2, "B","C"))) %>%
left_join(cdat) %>%
select(y, x, cond, rating.mean, sem, ci.low, ci.upp) %>%
group_by(cond) %>%
mutate(dens.mean = approx(x, y, xout = rating.mean)[[2]],
dens.cilow = approx(x, y, xout = ci.low)[[2]],
dens.ciupp = approx(x, y, xout = ci.upp)[[2]]) %>%
select(-y, -x) %>%
slice(1)
cdat.dens
#---
#You can then combine everything with various geom_segments:
ggplot(data, aes(x=rating, colour=cond)) +
geom_density(data = data, aes(x = rating, colour = cond),size=1.3,adjust=4) +facet_grid(.~direction, scales="free")+
geom_segment(data = cdat.dens, aes(x = rating.mean, xend = rating.mean, y = 0, yend = dens.mean, colour = cond),
linetype = "dashed", size = 1) +
geom_segment(data = cdat.dens, aes(x = ci.low, xend = ci.low, y = 0, yend = dens.cilow, colour = cond),
linetype = "dotted", size = 1) +
geom_segment(data = cdat.dens, aes(x = ci.upp, xend = ci.upp, y = 0, yend = dens.ciupp, colour = cond),
linetype = "dotted", size = 1)
Gives this:
You will notice the mean and CIs are not aligned as in the original plot. What am I not doing right #beetroot?
Using the data from the link, you can calculate the mean, se and ci like so (I suggest using dplyr, the successor of plyr):
set.seed(1234)
dat <- data.frame(cond = factor(rep(c("A","B"), each=200)),
rating = c(rnorm(200),rnorm(200, mean=.8)))
library(ggplot2)
library(dplyr)
cdat <- dat %>%
group_by(cond) %>%
summarise(rating.mean = mean(rating),
sem = sd(rating)/sqrt(length(rating)),
ci.low = mean(rating) - 2*sem,
ci.upp = mean(rating) + 2*sem)
In order to limit the lines to the outline of the curves you first need to find out which y values of the curves correspond to the means, e.g. by accessing the density values with ggplot_build and using approx:
cdat.dens <- ggplot_build(ggplot(dat, aes(x=rating, colour=cond)) + geom_density())$data[[1]] %>%
mutate(cond = ifelse(group == 1, "A", "B")) %>%
left_join(cdat) %>%
select(y, x, cond, rating.mean, sem, ci.low, ci.upp) %>%
group_by(cond) %>%
mutate(dens.mean = approx(x, y, xout = rating.mean)[[2]],
dens.cilow = approx(x, y, xout = ci.low)[[2]],
dens.ciupp = approx(x, y, xout = ci.upp)[[2]]) %>%
select(-y, -x) %>%
slice(1)
> cdat.dens
Source: local data frame [2 x 8]
Groups: cond [2]
cond rating.mean sem ci.low ci.upp dens.mean dens.cilow dens.ciupp
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 A -0.05775928 0.07217200 -0.2021033 0.08658471 0.3865929 0.403623 0.3643583
2 B 0.87324927 0.07120697 0.7308353 1.01566320 0.3979347 0.381683 0.4096153
You can then combine everything with various geom_segments:
ggplot() +
geom_density(data = dat, aes(x = rating, colour = cond)) +
geom_segment(data = cdat.dens, aes(x = rating.mean, xend = rating.mean, y = 0, yend = dens.mean, colour = cond),
linetype = "dashed", size = 1) +
geom_segment(data = cdat.dens, aes(x = ci.low, xend = ci.low, y = 0, yend = dens.cilow, colour = cond),
linetype = "dotted", size = 1) +
geom_segment(data = cdat.dens, aes(x = ci.upp, xend = ci.upp, y = 0, yend = dens.ciupp, colour = cond),
linetype = "dotted", size = 1)
As Axeman pointed out you can create a polygon based on the ribbon area as explained in this answer.
So for your data you can subset and add the additional rows like so:
ribbon <- ggplot_build(ggplot(dat, aes(x=rating, colour=cond)) + geom_density())$data[[1]] %>%
mutate(cond = ifelse(group == 1, "A", "B")) %>%
left_join(cdat.dens) %>%
group_by(cond) %>%
filter(x >= ci.low & x <= ci.upp) %>%
select(cond, x, y)
ribbon <- rbind(data.frame(cond = c("A", "B"), x = c(-0.2021033, 0.7308353), y = c(0, 0)),
as.data.frame(ribbon),
data.frame(cond = c("A", "B"), x = c(0.08658471, 1.01566320), y = c(0, 0)))
And add geom_polygon to the plot:
ggplot() +
geom_polygon(data = ribbon, aes(x = x, y = y, fill = cond), alpha = .5) +
geom_density(data = dat, aes(x = rating, colour = cond)) +
geom_segment(data = cdat.dens, aes(x = rating.mean, xend = rating.mean, y = 0, yend = dens.mean, colour = cond),
linetype = "dashed", size = 1) +
geom_segment(data = cdat.dens, aes(x = ci.low, xend = ci.low, y = 0, yend = dens.cilow, colour = cond),
linetype = "dotted", size = 1) +
geom_segment(data = cdat.dens, aes(x = ci.upp, xend = ci.upp, y = 0, yend = dens.ciupp, colour = cond),
linetype = "dotted", size = 1)
Here's the adapted code for your real data. It's just a bit tricky to incorporate two groups instead of one:
cdat <- dat %>%
group_by(direction, cond) %>%
summarise(rating.mean = mean(rating, na.rm = TRUE),
sem = sd(rating, na.rm = TRUE)/sqrt(length(rating)),
ci.low = mean(rating, na.rm = TRUE) - 2*sem,
ci.upp = mean(rating, na.rm = TRUE) + 2*sem)
cdat.dens <- ggplot_build(ggplot(dat, aes(x=rating, colour=interaction(direction, cond))) + geom_density())$data[[1]] %>%
mutate(cond = ifelse((group == 1 | group == 2 | group == 3 | group == 4), "A",
ifelse((group == 5 | group == 6 | group == 7 | group == 8), "B", "C")),
direction = ifelse((group == 1 | group == 5 | group == 9), "EAST",
ifelse((group == 2 | group == 6 | group == 10), "NORTH",
ifelse((group == 3 | group == 7 | group == 11), "SOUTH", "WEST")))) %>%
left_join(cdat) %>%
select(y, x, cond, direction, rating.mean, sem, ci.low, ci.upp) %>%
group_by(cond, direction) %>%
mutate(dens.mean = approx(x, y, xout = rating.mean)[[2]],
dens.cilow = approx(x, y, xout = ci.low)[[2]],
dens.ciupp = approx(x, y, xout = ci.upp)[[2]]) %>%
select(-y, -x) %>%
slice(1)
ggplot() +
geom_density(data = dat, aes(x = rating, colour = cond)) +
geom_segment(data = cdat.dens, aes(x = rating.mean, xend = rating.mean, y = 0, yend = dens.mean, colour = cond),
linetype = "dashed", size = 1) +
geom_segment(data = cdat.dens, aes(x = ci.low, xend = ci.low, y = 0, yend = dens.cilow, colour = cond),
linetype = "dotted", size = 1) +
geom_segment(data = cdat.dens, aes(x = ci.upp, xend = ci.upp, y = 0, yend = dens.ciupp, colour = cond),
linetype = "dotted", size = 1) +
facet_wrap(~direction)
If you want to draw the mean line without building the plot object and without manipulating the data prior to plotting you can use stat_summary():
(
ggplot(data = dat, aes(x = rating, colour = cond))
+ geom_density()
+ stat_summary(
aes(y = rating, x = 0),
geom = 'rect',
fun.data = density_mean_line(dat$rating),
key_glyph = "vline",
size = 1
)
)
giving:
where:
density_mean_line = function(values) {
values_range = range(values, na.rm=TRUE)
function(x) {
density_data = StatDensity$compute_group(
data.frame(x=x),
scales=list(
x=scale_x_continuous(limits = values_range)
)
)
mean_x = mean(x)
data.frame(
xmin=mean_x,
xmax=mean_x,
ymin=0,
ymax=approx(density_data$x, density_data$density, xout=mean_x)$y
)
}
}
And dat is defined as in erc's answer:
set.seed(1234)
dat <- data.frame(cond = factor(rep(c("A","B"), each=200)),
rating = c(rnorm(200),rnorm(200, mean=.8)))
This technique can also be used to generate solid area (of the same colour as the density outline):
(
ggplot(data = dat, aes(x = rating, colour = cond, group = cond))
+ stat_summary(
aes(y = rating, x = 0, fill = cond),
geom = 'rect',
fun.data = density_ci(dat$rating),
size=1
)
+ stat_summary(
aes(y = rating, x = 0),
geom = 'rect',
fun.data = density_mean_line(dat$rating),
key_glyph = "vline",
size = 0.5,
color='grey20'
)
+ geom_density()
)
where:
density_ci = function(values, resolution=100) {
values_range = range(values, na.rm=TRUE)
function(x) {
density_data = StatDensity$compute_group(
data.frame(x=x),
scales=list(
x=scale_x_continuous(limits = values_range)
)
)
mean_x = mean(x)
sem = sd(x) / sqrt(length(x))
ci_lower = mean_x - 1.96 * sem
ci_upper = mean_x + 1.96 * sem
x_values = seq(ci_lower, ci_upper, length.out=resolution)
data.frame(
xmin=x_values,
xmax=x_values,
ymin=rep(0, resolution),
ymax=approx(density_data$x, density_data$density, xout=x_values)$y
)
}
}
Related
I want to create a graph where I can change the line size for each line c(1,2,3) and the alpha values for each line c(0.5,0.6,0.7). I tried to use scale_size_manual but it didn't make any difference. Any ideas on how to proceed?
var <- c("T","T","T","M","M","M","A","A","A")
val <- rnorm(12,4,5)
x <- c(1:12)
df <- data.frame(var,val,x)
ggplot(aes(x= x , y = val, color = var, group = var), data = df) +
scale_color_manual(values = c("grey","blue","black")) + geom_smooth(aes(x = x, y = val), formula = "y ~ x", method = "loess",se = FALSE, size = 1) + scale_x_continuous(breaks=seq(1, 12, 1), limits=c(1, 12)) + scale_size_manual(values = c(1,2,3))
To set the size and alpha values for your lines you have to map on aesthetics. Otherwise scale_size_manual will have no effect:
library(ggplot2)
ggplot(aes(x = x, y = val, color = var, group = var), data = df) +
scale_color_manual(values = c("grey", "blue", "black")) +
geom_smooth(aes(x = x, y = val, size = var, alpha = var), formula = "y ~ x", method = "loess", se = FALSE) +
scale_x_continuous(breaks = seq(1, 12, 1), limits = c(1, 12)) +
scale_size_manual(values = c(1, 2, 3)) +
scale_alpha_manual(values = c(.5, .6, .7))
I'm trying draw multiple density plots in one plot for comparison porpuses. I wanted them to have their confidence interval of 95% like in the following figure. I'm working with ggplot2 and my df is a long df of observations for a certain location that I would like to compare for different time intervals.
I've done some experimentation following this example but I don't have the coding knowledge to achieve what I want.
What i managed to do so far:
library(magrittr)
library(ggplot2)
library(dplyr)
build_object <- ggplot_build(
ggplot(data=ex_long, aes(x=val)) + geom_density())
plot_credible_interval <- function(
gg_density, # ggplot object that has geom_density
bound_left,
bound_right
) {
build_object <- ggplot_build(gg_density)
x_dens <- build_object$data[[1]]$x
y_dens <- build_object$data[[1]]$y
index_left <- min(which(x_dens >= bound_left))
index_right <- max(which(x_dens <= bound_right))
gg_density + geom_area(
data=data.frame(
x=x_dens[index_left:index_right],
y=y_dens[index_left:index_right]),
aes(x=x,y=y),
fill="grey",
alpha=0.6)
}
gg_density <- ggplot(data=ex_long, aes(x=val)) +
geom_density()
gg_density %>% plot_credible_interval(tab$q2.5[[40]], tab$q97.5[[40]])
Help would be much apreaciated.
This is obviously on a different set of data, but this is roughly that plot with data from 2 t distributions. I've included the data generation in case it is of use.
library(tidyverse)
x1 <- seq(-5, 5, by = 0.1)
t_dist1 <- data.frame(x = x1,
y = dt(x1, df = 3),
dist = "dist1")
x2 <- seq(-5, 5, by = 0.1)
t_dist2 <- data.frame(x = x2,
y = dt(x2, df = 3),
dist = "dist2")
t_data = rbind(t_dist1, t_dist2) %>%
mutate(x = case_when(
dist == "dist2" ~ x + 1,
TRUE ~ x
))
p <- ggplot(data = t_data,
aes(x = x,
y = y )) +
geom_line(aes(color = dist))
plot_data <- as.data.frame(ggplot_build(p)$data)
bottom <- data.frame(plot_data) %>%
mutate(dist = case_when(
group == 1 ~ "dist1",
group == 2 ~ "dist2"
)) %>%
group_by(dist) %>%
slice_head(n = ceiling(nrow(.) * 0.1)) %>%
ungroup()
top <- data.frame(plot_data) %>%
mutate(dist = case_when(
group == 1 ~ "dist1",
group == 2 ~ "dist2"
)) %>%
group_by(dist) %>%
slice_tail(n = ceiling(nrow(.) * 0.1)) %>%
ungroup()
segments <- t_data %>%
group_by(dist) %>%
summarise(x = mean(x),
y = max(y))
p + geom_area(data = bottom,
aes(x = x,
y = y,
fill = dist),
alpha = 0.25,
position = "identity") +
geom_area(data = top,
aes(x = x,
y = y,
fill = dist),
alpha = 0.25,
position = "identity") +
geom_segment(data = segments,
aes(x = x,
y = 0,
xend = x,
yend = y,
color = dist,
linetype = dist)) +
scale_color_manual(values = c("red", "blue")) +
scale_linetype_manual(values = c("dashed", "dashed"),
labels = NULL) +
ylab("Density") +
xlab("\U03B2 for AQIv") +
guides(color = guide_legend(title = "p.d.f \U03B2",
title.position = "right",
labels = NULL),
linetype = guide_legend(title = "Mean \U03B2",
title.position = "right",
labels = NULL,
override.aes = list(color = c("red", "blue"))),
fill = guide_legend(title = "Rej. area \U03B1 = 0.05",
title.position = "right",
labels = NULL)) +
annotate(geom = "text",
x = c(-4.75, -4),
y = 0.35,
label = c("RK", "OK")) +
theme(panel.background = element_blank(),
panel.border = element_rect(fill = NA,
color = "black"),
legend.position = c(0.2, 0.7),
legend.key = element_blank(),
legend.direction = "horizontal",
legend.text = element_blank(),
legend.title = element_text(size = 8))
I am making errorbar plot with different linetype
library(ggplot2)
library(plyr)
# Create dataset:
DF <- data.frame(
group = rep(c("a", "b", "c", "d"),each=10),
Ydata = c(seq(1,10,1),seq(5,50,5),seq(20,11,-1),seq(0.3,3,0.3)),
Xdata = c(seq(1,10,1),seq(5,50,5),seq(20,11,-1),seq(0.3,3,0.3)))
# Summarise data:
subDF <- ddply(DF, .(group), summarise,
X = mean(Xdata), Y = mean(Ydata),
X_sd = sd(Xdata, na.rm = T), Y_sd = sd(Ydata))
# Plot data with error bars:
ggplot(subDF, aes(x = X, y = Y,linetype = group)) +
geom_errorbar(aes(x = X,
ymin = (Y-Y_sd),
ymax = (Y+Y_sd)),
width = 1, size = 0.5) +
geom_point(cex = 3) +
scale_linetype_manual(values = c("solid","twodash","longdash","longdash"))
This give me the following plot, but I want the end whiskers to be solid. Anyone could help?
One option to achieve your desired result would be to switch to geom_linerange and add the whiskers via geom_segment like so:
library(ggplot2)
width <- .3
# Plot data with error bars:
ggplot(subDF, aes(x = X, y = Y, linetype = group)) +
geom_segment(aes(
x = X - width, xend = X + width,
y = Y - Y_sd, yend = Y - Y_sd
),
size = 0.5, linetype = "solid"
) +
geom_segment(aes(
x = X - width, xend = X + width,
y = Y + Y_sd, yend = Y + Y_sd
),
size = 0.5, linetype = "solid"
) +
geom_linerange(aes(
x = X,
ymin = (Y - Y_sd),
ymax = (Y + Y_sd)
),
size = 0.5
) +
geom_point(cex = 3) +
scale_linetype_manual(values = c("solid", "twodash", "longdash", "longdash"))
I'm creating an illustration of how loess works. My two queries are at the end of this question. First, setup:
library(tidyverse)
data(melanoma, package = "lattice")
mela <- as_tibble(melanoma)
tric = function(x) if_else(abs(x) < 1, (1 - abs(x)^3)^3, 0)
scl = function(x) (x - min(x))/(max(x) - min(x))
mela1 <- mela %>%
slice(1:9) %>%
mutate(dist = abs(year - year[5]),
scaled = scl(dist),
weight = tric(scaled)
)
mod1 <- lm(incidence ~ year, data = mela1, weights = weight) %>%
augment(., mela1)
mela2 <- mela %>%
slice(10:18) %>%
mutate(dist = abs(year - year[5]),
scaled = scl(dist),
weight = tric(scaled)
)
mod2 <- lm(incidence ~ year, data = mela2, weights = weight) %>%
augment(., mela2)
mela3 <- mela %>%
slice(19:27) %>%
mutate(dist = abs(year - year[5]),
scaled = scl(dist),
weight = tric(scaled)
)
mod3 <- lm(incidence ~ year, data = mela3, weights = weight) %>%
augment(., mela3)
mela4 <- mela %>%
slice(28:37) %>%
mutate(dist = abs(year - year[5]),
scaled = scl(dist),
weight = tric(scaled)
)
mod4 <- lm(incidence ~ year, data = mela4, weights = weight) %>%
augment(., mela4)
The main plot:
col <- rainbow_hcl(start = 12, 4, l = 20)
colB <- rainbow_hcl(start = 12, 4, l = 100)
main <- ggplot(data = mela, aes(x = year, y = incidence)) +
# segment 1
geom_segment(
aes(x = 1936, xend = 1944, y = 2.115717, yend = 2.115717)) +
# segment 2
geom_segment(
aes(x = 1945, xend = 1953, y = 3.473217, yend = 3.473217)) +
# segment 3
geom_segment(
aes(x = 1954, xend = 1962, y = 1.170247, yend = 1.170247)) +
# segment 4
geom_segment(
aes(x = 1963, xend = 1972, y = 2.7, yend = 2.7)) +
geom_point(data = mod1, color = col[1], shape = 1) +
geom_point(data = mod2, color = col[2], shape = 0) +
geom_point(data = mod3, color = col[4], shape = 5) +
geom_point(data = mod4, color = col[3], shape = 2) +
geom_line(data = mod1, aes(x = year, y = .fitted), color = col[1]) +
geom_line(data = mod2, aes(x = year, y = .fitted), color = col[2]) +
geom_line(data = mod3, aes(x = year, y = .fitted), color = col[4]) +
geom_line(data = mod4, aes(x = year, y = .fitted), color = col[3]) +
scale_x_continuous(breaks = c(1940, 1949, 1958, 1967))
Insets
inset1 <- ggplot(data = mod1, aes(x = year, y = weight)) +
geom_line(color = col[1]) +
geom_area(fill = colB[1]) +
theme_void()
inset2 <- ggplot(data = mod2, aes(x = year, y = weight)) +
geom_line(color = col[12) +
geom_area(fill = colB[2]) +
theme_void()
inset3 <- ggplot(data = mod3, aes(x = year, y = weight)) +
geom_line(color = col[3]) +
geom_area(fill = colB[3]) +
theme_void()
inset4 <- ggplot(data = mod4, aes(x = year, y = weight)) +
geom_line(color = col[4]) +
geom_area(fill = colB[4]) +
theme_void()
Question 1: How do I place the four insets so that the y = 0 of the weight function is at the height of the corresponding geom_segment? I would like the inset heights = 2 in the main figure coordinates.
Question 2: How do I set the color of each segment to the color of the corresponding inset?
Not sure whether I got everything right. But I tried my best. (; You could simplify your code considerably
... by binding you models data into one dataframe and also the data for the segments.
... mapping on aesthetics and setting the colors and shape via some named vectors and scale_xxx_manual
For your insets there is no need to make separate plots and trying to put them into the main plot. You could simply add them via an additional geom_line and a geom_ribbon. To get the heights of the segments join the segments data to the models data so that you can set the starting value for the geom_ribbon according to the y value of the segment
library(tidyverse)
library(broom)
library(colorspace)
col <- setNames(col, c("mod1", "mod2", "mod4", "mod3"))
colB <- setNames(colB, c("mod1", "mod2", "mod4", "mod3"))
shapes <- setNames(c(1, 0, 5, 2), c("mod1", "mod2", "mod3", "mod4"))
mods <- list(mod1 = mod1, mod2 = mod2, mod3 = mod3, mod4 = mod4) %>%
bind_rows(.id = "mod")
# segments data
dseg <- tribble(
~mod, ~x, ~xend, ~y,
"mod1", 1936, 1944, 2.115717,
"mod2", 1945, 1953, 3.473217,
"mod3", 1954, 1962, 1.170247,
"mod4", 1963, 1972, 2.7,
)
main <- ggplot(data = mela, aes(x = year, y = incidence)) +
geom_segment(data = dseg, aes(x = x, xend = xend, y = y, yend = y, color = mod)) +
geom_point(data = mods, aes(color = mod, shape = mod)) +
geom_line(data = mods, aes(x = year, y = .fitted, color = mod)) +
scale_color_manual(values = col) +
scale_shape_manual(values = shapes) +
scale_x_continuous(breaks = c(1940, 1949, 1958, 1967)) +
guides(color = FALSE, shape = FALSE, fill = FALSE)
mods1 <- left_join(mods, select(dseg, mod, y), by = "mod")
# Add insets
main +
geom_line(data = mods1, aes(x = year, y = weight + y, color = mod, group = mod)) +
geom_ribbon(data = mods1, aes(x = year, ymin = y, ymax = weight + y, fill = mod, group = mod)) +
scale_fill_manual(values = colB)
I am trying to create a plot in R using ggplot that shows the difference between my two bars in a nice way.
I found an example that did part of what I wanted, but I have two major problems:
It is based on comparing groups of bars, but I only have two, so I added one group with both of them.
I would like to draw the arrow in nicer shape. I attached an image.
Code:
transactions <- c(5000000, 1000000)
time <- c("Q1","Q2")
group <- c("A", "A")
data <- data.frame(transactions, time, group)
library(ggplot2)
fun.data <- function(x){
print(x)
return(data.frame(y = max(x) + 1,
label = paste0(round(diff(x), 2), "cm")))
}
ylab <- c(2.5, 5.0, 7.5, 10)
gg <- ggplot(data, aes(x = time, y = transactions, fill = colors_hc[1], label = round(transactions, 0))) +
geom_bar(stat = "identity", show.legend = FALSE) +
geom_text(position = position_dodge(width = 0.9),
vjust = 1.1) +
geom_line(aes(group = group), position = position_nudge(0.1),
arrow = arrow()) +
stat_summary(aes(x = group, y = transactions),
geom = "label",
fun.data = fun.data,
fontface = "bold", fill = "lightgrey",
inherit.aes = FALSE) +
expand_limits(x = c(0, NA), y = c(0, NA)) +
scale_y_continuous(labels = paste0(ylab, "M"),
breaks = 10 ^ 6 * ylab)
gg
The arrows I am aiming for:
Where I am (ignore the ugliness, didn't style it yet):
This works, but you still need to play around a bit with the axes (or rather beautify them)
library(dplyr)
library(ggplot2)
transactions <- c(5000000, 1000000)
time <- c("Q1","Q2")
group <- c("A", "A")
my_data <- data.frame(transactions, time, group)
fun.data <- function(x){
return(data.frame(y = max(x) + 1,
label = as.integer(diff(x))))
}
my_data %>%
ggplot(aes(x = group, y = transactions, fill = time)) +
geom_bar(stat = 'identity', position = 'dodge') +
geom_text(aes(label = as.integer(transactions)),
position = position_dodge(width = 0.9),
vjust = 1.5) +
geom_line(aes(group = group), position = position_nudge(0.1),
arrow = arrow()) +
stat_summary(aes(x = group, y = transactions),
geom = "label",
size = 5,
position = position_nudge(0.05),
fun.data = fun.data,
fontface = "bold", fill = "lightgrey",
inherit.aes = FALSE)
Edit2:
y_limit <- 6000000
my_data %>%
ggplot(aes(x = time, y = transactions)) +
geom_bar(stat = 'identity',
fill = 'steelblue') +
geom_text(aes(label = as.integer(transactions)),
vjust = 2) +
coord_cartesian(ylim = c(0, y_limit)) +
geom_segment(aes(x = 'Q1', y = max(my_data$transactions),
xend = 'Q1', yend = y_limit)) +
geom_segment(aes(x = 'Q2', y = y_limit,
xend = 'Q2', yend = min(my_data$transactions)),
arrow = arrow()) +
geom_segment(aes(x = 'Q1', y = y_limit,
xend = 'Q2', yend = y_limit)) +
geom_label(aes(x = 'Q2',
y = y_limit,
label = as.integer(min(my_data$transactions)- max(my_data$transactions))),
size = 10,
position = position_nudge(-0.5),
fontface = "bold", fill = "lightgrey")