plot data and their means in the same graph using ggplot - r

using the data set airquality I have written the following code:
library("tidyverse")
data(airquality)
airquality <- na.omit(airquality)
airquality$date <- as.Date(paste("1973", airquality$Month, airquality$Day,
sep="-"))
p1 <- ggplot(airquality, aes(x= date, y = Ozone, col=factor(Month))) +
geom_point() +
geom_line()
p1
Now I would like to plot in the same graph the mean of ozone for each months. How can I do this?

You could add the mean as a dashed line. The easiest way to do this might be to simply pass the data you want to a geom_line layer:
ggplot(airquality, aes(x = date, y = Ozone, col = factor(Month))) +
geom_point() +
geom_line(alpha = 0.5) +
geom_line(data = airquality %>%
group_by(Month) %>%
summarise(Ozone = mean(Ozone),
date = c(first(date), last(date)),
Month = mean(Month)),
linetype = 2, size = 1) +
scale_color_brewer(palette = "Set1") +
theme_minimal(base_size = 16)
If you just want points showing the mean, you could simplify things with stat_mean from ggpubr
ggplot(airquality, aes(x = date, y = Ozone, col = factor(Month))) +
geom_point() +
geom_line(alpha = 0.5) +
ggpubr::stat_mean(size = 5, shape = 21,
aes(fill = factor(Month)), color = "black") +
scale_color_brewer(palette = "Set1") +
scale_fill_brewer(palette = "Set1") +
theme_minimal(base_size = 16)
To join these dots up, you could do:
ggplot(airquality, aes(x = date, y = Ozone, col = factor(Month))) +
geom_point() +
geom_line(alpha = 0.5) +
geom_line(data = airquality %>%
group_by(Month) %>%
summarise(Ozone = mean(Ozone), date = mean(date)),
color = "black", linetype = 2) +
ggpubr::stat_mean(size = 5, shape = 21,
aes(fill = factor(Month)), color = "black") +
scale_color_brewer(palette = "Set1") +
scale_fill_brewer(palette = "Set1") +
theme_minimal(base_size = 16)

Related

Labeling pie charts using ggplot2

I have this code:
as_tibble(earlyCiliated[[]]) %>%
ggplot(aes(x="", y=Phase, fill=Phase)) + geom_col() +
coord_polar("y", start=0) +
geom_text(aes(label = paste0(Phase, "%")))
and my output looks like this:
What am I doing wrong that's causing the labels to all be on top of each other?
I can't completely recreate your plot because I do not have your data. That being said, you can try this:
install.packages("ggrepel")
library(ggrepel)
as_tibble(earlyCiliated[[]]) %>%
ggplot(aes(x="", y=Phase, fill=Phase)) + geom_col() +
coord_polar("y", start=0) +
geom_label_repel(data = earlyCiliated[[]],
aes(y = Phase, label = paste0(Phase, "%")),
size = 4.5, nudge_x = 1, show.legend = FALSE)
This is what it will look like (using other data because none was provided)
library(ggplot2)
library(ggrepel)
library(tidyverse)
df <- data.frame(value = c(15, 25, 32, 28),
group = paste0("G", 1:4))
# Get the positions
df2 <- df %>%
mutate(csum = rev(cumsum(rev(value))),
pos = value/2 + lead(csum, 1),
pos = if_else(is.na(pos), value/2, pos))
ggplot(df, aes(x = "" , y = value, fill = fct_inorder(group))) +
geom_col(width = 1, color = 1) +
coord_polar(theta = "y") +
scale_fill_brewer(palette = "Pastel1") +
geom_label_repel(data = df2,
aes(y = pos, label = paste0(value, "%")),
size = 4.5, nudge_x = 1, show.legend = FALSE) +
guides(fill = guide_legend(title = "Group")) +
theme_void()

geom_density NOT displaying over geom_histogram

I am trying to draw a density curve over histogram using ggplot but to no avail. dlist is a vector with numeric values.
Here is my code:
ggplot() +
geom_histogram(aes(x=dlist), bins = 30, fill = "#B3E4F7") +
geom_density() +
geom_vline(aes(xintercept = mean(dlist)),
color="#D2091F", linetype="dashed",size=1)
You need to set y to ..density... For example:
ggplot(data.frame(dlist), aes(x=dlist, y = ..density..)) +
geom_histogram(bins = 30, fill = "#B3E4F7") +
geom_density() +
geom_vline(aes(xintercept = mean(dlist)),
color="#D2091F", linetype="dashed",size=1)
A reproducible example:
library(ggplot2)
ggplot(mtcars, aes(x = mpg, y = ..density..)) +
geom_histogram(bins = 30, fill = "#B3E4F7") +
geom_density()
The geom_desntity has no data. Put the data in the ggplot() or in all functions.
ggplot(aes(x=dlist)) +
geom_histogram(bins = 30, fill = "#B3E4F7") +
geom_density() +
geom_vline(aes(xintercept = mean(dlist)),color="#D2091F", linetype="dashed",size=1)
However, if you want to compare both, you may want to plot the histogram with a density stat:
ggplot(aes(x=dlist)) +
geom_histogram(aes(y = ..density..),bins = 30, fill = "#B3E4F7") +
geom_density() +
geom_vline(aes(xintercept = mean(dlist)),color="#D2091F", linetype="dashed",size=1)
If you have a numeric vector dlist, you can create a data.frame before ggplot as follows:
dlist <- rnorm(1000)
tibble(dlist = dlist) %>%
ggplot(aes(x=dlist)) +
geom_histogram(aes(y = ..density..),bins = 30, fill = "#B3E4F7") +
geom_density() +
geom_vline(aes(xintercept = mean(dlist)),
color="#D2091F", linetype="dashed",size=1)

Change the color inside the plot

In this plot
library(ggplot2)
df <- data.frame(year = c(2011,2012,2013,2014,2015,2016,2017,2018),
value = c(337,423,551,661,846,1387,2222,3580))
ggplot(df, aes(year, value)) +
geom_point() +
geom_line() +
geom_text(aes(label = value, y = (value - 50)*0.9))
How is it possible to make the color of numbers of value red?
Like this?
library(ggplot2)
df <- data.frame(year = c(2011,2012,2013,2014,2015,2016,2017,2018),
value = c(337,423,551,661,846,1387,2222,3580))
ggplot(df, aes(year, value)) +
geom_point() +
geom_line() +
geom_text(aes(label = value, y = (value - 50)*0.9), color = "red")
Or like this?
library(ggplot2)
df <- data.frame(year = c(2011,2012,2013,2014,2015,2016,2017,2018),
value = c(337,423,551,661,846,1387,2222,3580))
ggplot(df, aes(year, value)) +
geom_point() +
geom_line() +
geom_text(aes(label = value, y = (value - 50)*0.9), color = "red") +
theme(axis.text.y = element_text(colour = "red"))

Error bars on a radar plot?

I'm looking to make a radar plot for multivariate data, a task simple enough for excel.
The problem comes when I would like to also plot some error bars on this. From what I understand, I cannot do this in excel. Is this possible on R?
Or can someone suggest an alternative? I have 32 single value dimensions.
Thanks!
I don't much like radar charts but here are some ideas to get you going, drawing on this approach. I like the look of my option 1 best, but I'm not sure how to solve the gap between var32 and var1 (I have some ideas, but a bit awkward).
library(tidyverse)
library(ggplot2)
library(scales)
# make some mock data
mydata <- data.frame(variable = paste0("Var", 1:32),
midpoint = rnorm(32),
stderr = rnorm(32, 1, 0.1),
stringsAsFactors = FALSE) %>%
mutate(upper = midpoint + 1.96 * stderr,
lower = midpoint - 1.96 * stderr) %>%
mutate(variable = factor(variable, levels = variable))
# Option 1:
mydata %>%
ggplot(aes(x = variable, y = midpoint, group = 1)) +
geom_ribbon(aes(ymin = lower, ymax = upper), fill = "grey50", alpha = 0.5) +
geom_line(colour = "purple") +
theme_light() +
theme(panel.grid.minor = element_blank()) +
coord_polar() +
labs(x = "", y = "")
# Option 2:
mydata %>%
gather(measure, value, -variable, -stderr) %>%
ggplot(aes(x = variable, y = value, colour = measure, group = measure, linetype = measure)) +
geom_polygon(fill = NA) +
theme_light() +
theme(panel.grid.minor = element_blank()) +
coord_polar() +
scale_colour_manual(values = c("steelblue", "black", "steelblue")) +
scale_linetype_manual(values = c(2,1,2)) +
labs(x = "", y = "")
# Option 3:
mydata %>%
ggplot(aes(x = variable, y = midpoint, group = 1)) +
geom_polygon(fill = NA, colour = "purple") +
geom_segment(aes(xend = variable, y = lower, yend = upper), colour = "grey50") +
geom_point(colour = "purple") +
theme_light() +
theme(panel.grid.minor = element_blank()) +
theme(panel.grid.major.x = element_blank()) +
coord_polar() +
labs(x = "", y = "")
Edit / addition
I think I prefer this one:
# Option 4:
mydata %>%
ggplot(aes(x = variable, y = midpoint, group = 1)) +
geom_polygon(aes(y = upper), fill = "grey50", alpha = 0.5) +
geom_polygon(aes(y = lower), fill = "grey99", alpha = 0.7) +
geom_polygon(fill = NA, colour = "purple") +
theme_light() +
theme(panel.grid.minor = element_blank()) +
coord_polar() +
labs(x = "", y = "")

Colour average lines in ggplot

I would like to colour the dashed lines, which are the average values of the two respective categories, with the same colour of the default palette used by ggplot to fill the distributions:
Click here to view the distribution
This is the code used:
library(ggplot2)
print(ggplot(dati, aes(x=ECU_fuel_consumption_L_100Km_CF, fill=Model))
+ ggtitle("Fuel Consumption density histogram, by Model")
+ ylab("Density")
+ geom_density(alpha=.3)
+ scale_x_continuous(breaks=pretty(dati$ECU_fuel_consumption_L_100Km_CF, n=10))
+ geom_vline(aes(xintercept = mean(ECU_fuel_consumption_L_100Km_CF[dati$Model == "500X"])), linetype="dashed", size=1)
+ geom_vline(aes(xintercept = mean(ECU_fuel_consumption_L_100Km_CF[dati$Model == "Renegade"])), linetype="dashed", size=1)
)
Thank you all in advance!
No reproducible example, but you probably want to do something like this:
library(dplyr)
# make up some data
d <- data.frame(x = c(mtcars$mpg, mtcars$hp),
var = rep(c('mpg', 'hp'), each = nrow(mtcars)))
means <- d %>% group_by(var) %>% summarize(m = mean(x))
ggplot(d, aes(x, fill = var)) +
geom_density(alpha = 0.3) +
geom_vline(data = means, aes(xintercept = m, col = var),
linetype = "dashed", size = 1)
This approach is extendable to any number of groups.
An option that doesn't require pre-calculation, but is also a bit more hacky, is:
ggplot(d, aes(x, fill = var)) +
geom_density(alpha = 0.3) +
geom_vline(aes(col = 'hp', xintercept = x), linetype = "dashed", size = 1,
data = data.frame(x = mean(d$x[d$var == 'hp']))) +
geom_vline(aes(col = 'mpg', xintercept = x), linetype = "dashed", size = 1,
data = data.frame(x = mean(d$x[d$var == 'mpg'])))

Resources