Colour average lines in ggplot - r

I would like to colour the dashed lines, which are the average values of the two respective categories, with the same colour of the default palette used by ggplot to fill the distributions:
Click here to view the distribution
This is the code used:
library(ggplot2)
print(ggplot(dati, aes(x=ECU_fuel_consumption_L_100Km_CF, fill=Model))
+ ggtitle("Fuel Consumption density histogram, by Model")
+ ylab("Density")
+ geom_density(alpha=.3)
+ scale_x_continuous(breaks=pretty(dati$ECU_fuel_consumption_L_100Km_CF, n=10))
+ geom_vline(aes(xintercept = mean(ECU_fuel_consumption_L_100Km_CF[dati$Model == "500X"])), linetype="dashed", size=1)
+ geom_vline(aes(xintercept = mean(ECU_fuel_consumption_L_100Km_CF[dati$Model == "Renegade"])), linetype="dashed", size=1)
)
Thank you all in advance!

No reproducible example, but you probably want to do something like this:
library(dplyr)
# make up some data
d <- data.frame(x = c(mtcars$mpg, mtcars$hp),
var = rep(c('mpg', 'hp'), each = nrow(mtcars)))
means <- d %>% group_by(var) %>% summarize(m = mean(x))
ggplot(d, aes(x, fill = var)) +
geom_density(alpha = 0.3) +
geom_vline(data = means, aes(xintercept = m, col = var),
linetype = "dashed", size = 1)
This approach is extendable to any number of groups.
An option that doesn't require pre-calculation, but is also a bit more hacky, is:
ggplot(d, aes(x, fill = var)) +
geom_density(alpha = 0.3) +
geom_vline(aes(col = 'hp', xintercept = x), linetype = "dashed", size = 1,
data = data.frame(x = mean(d$x[d$var == 'hp']))) +
geom_vline(aes(col = 'mpg', xintercept = x), linetype = "dashed", size = 1,
data = data.frame(x = mean(d$x[d$var == 'mpg'])))

Related

plot data and their means in the same graph using ggplot

using the data set airquality I have written the following code:
library("tidyverse")
data(airquality)
airquality <- na.omit(airquality)
airquality$date <- as.Date(paste("1973", airquality$Month, airquality$Day,
sep="-"))
p1 <- ggplot(airquality, aes(x= date, y = Ozone, col=factor(Month))) +
geom_point() +
geom_line()
p1
Now I would like to plot in the same graph the mean of ozone for each months. How can I do this?
You could add the mean as a dashed line. The easiest way to do this might be to simply pass the data you want to a geom_line layer:
ggplot(airquality, aes(x = date, y = Ozone, col = factor(Month))) +
geom_point() +
geom_line(alpha = 0.5) +
geom_line(data = airquality %>%
group_by(Month) %>%
summarise(Ozone = mean(Ozone),
date = c(first(date), last(date)),
Month = mean(Month)),
linetype = 2, size = 1) +
scale_color_brewer(palette = "Set1") +
theme_minimal(base_size = 16)
If you just want points showing the mean, you could simplify things with stat_mean from ggpubr
ggplot(airquality, aes(x = date, y = Ozone, col = factor(Month))) +
geom_point() +
geom_line(alpha = 0.5) +
ggpubr::stat_mean(size = 5, shape = 21,
aes(fill = factor(Month)), color = "black") +
scale_color_brewer(palette = "Set1") +
scale_fill_brewer(palette = "Set1") +
theme_minimal(base_size = 16)
To join these dots up, you could do:
ggplot(airquality, aes(x = date, y = Ozone, col = factor(Month))) +
geom_point() +
geom_line(alpha = 0.5) +
geom_line(data = airquality %>%
group_by(Month) %>%
summarise(Ozone = mean(Ozone), date = mean(date)),
color = "black", linetype = 2) +
ggpubr::stat_mean(size = 5, shape = 21,
aes(fill = factor(Month)), color = "black") +
scale_color_brewer(palette = "Set1") +
scale_fill_brewer(palette = "Set1") +
theme_minimal(base_size = 16)

How do I change the color of the regression lines in ggPlot?

I made a visualization of a regression. Currently this is what the graph looks like.
The regression lines are hard to see since they are the same color as the scatter plot dots.
My question is, how do I make the regression lines a different color from the scatter plot dots?
Here is my code:
(ggplot(data=df, mapping=aes(x='score', y='relent',
color='factor(threshold)'))+
geom_point()+
scale_color_manual(values=['darkorange', 'purple'])+
geom_smooth(method='lm',
formula = 'y ~ x+I(x**2)',se=False, )+
geom_vline(xintercept = 766, color = "red", size = 1, linetype = "dashed")+
labs(y = "Yield",
x = "Score")+
theme_bw()
)
One option to achieve your desired result would be to "duplicate" your threshold column with different values, e.g. in the code below I map 0 on 2 and 1 on 3. This duplicated column could then be mapped on the color aes inside geom_smooth and allows to set different colors for the regression lines.
My code below uses R or ggplot2 but TBMK the code could be easily adapted to plotnine:
n <- 1000
df <- data.frame(
relent = c(runif(n, 100, 200), runif(n, 150, 250)),
score = c(runif(n, 764, 766), runif(n, 766, 768)),
threshold = c(rep(0, n), rep(1, n))
)
df$threshold_sm <- c(rep(2, n), rep(3, n))
library(ggplot2)
p <- ggplot(data = df, mapping = aes(x = score, y = relent, color = factor(threshold))) +
scale_color_manual(values = c("darkorange", "purple", "blue", "green")) +
geom_vline(xintercept = 766, color = "red", size = 1, linetype = "dashed") +
labs(
y = "Yield",
x = "Score"
) +
theme_bw()
p +
geom_point() +
geom_smooth(aes(color = factor(threshold_sm)),
method = "lm",
formula = y ~ x + I(x**2), se = FALSE
)
A second option would be to add some transparency to the points so that the lines stand out more clearly and by the way deals with the overplotting of the points:
p +
geom_point(alpha = .3) +
geom_smooth(aes(color = factor(threshold)),
method = "lm",
formula = y ~ x + I(x**2), se = FALSE
) +
guides(color = guide_legend(override.aes = list(alpha = 1)))
Compare:
iris %>%
ggplot(aes(Petal.Length, Sepal.Width, color = Species)) +
geom_point() +
geom_smooth(method = "lm", aes(group = Species))
With:
iris %>%
ggplot(aes(Petal.Length, Sepal.Width)) +
geom_point(aes(color = Species)) +
geom_smooth(method = "lm", aes(group = Species))
When aes(color = ...) is specified inside of ggplot(), it is applied to both of the subsequent geoms. Moving it to geom_point() applies it to the points only.

ggplot: color points by density as they approach a specific value?

I have a dataset containing 1,000 values for a model, these values are all within the same range (y=40-70), so the points overlap a ton. I'm interested in using color to show the density of the points converging on a single value (y=56.72) which I have indicated with a horizontal dashed line on the plot below. How can I color these points to show this?
ggplot(data, aes(x=model, y=value))+
geom_point(size=1) +
geom_hline(yintercept=56.72,
linetype="dashed",
color = "black")
I think that you should opt for an histogram or density plot:
n <- 500
data <- data.frame(model= rep("model",n),value = rnorm(n,56.72,10))
ggplot(data, aes(x = value, y = after_stat(count))) +
geom_histogram(binwidth = 1)+
geom_density(size = 1)+
geom_vline(xintercept = 56.72, linetype = "dashed", color = "black")+
theme_bw()
Here is your plot with the same data:
ggplot(data, aes(x = model, y = value))+
geom_point(size = 1) +
geom_hline(yintercept = 56.72, linetype = "dashed", color = "black")
If your model is iterative and do converge to the value, I suggest you plot as a function of the iteration to show the convergence. An other option, keeping a similar plot to your, is dodging the position of the points :
ggplot(data, aes(x = model, y = value))+
geom_point(position = position_dodge2(width = 0.2),
shape = 1,
size = 2,
stroke = 1,
alpha = 0.5) +
geom_hline(yintercept = 56.72, linetype = "dashed", color = "black")
Here is a color density plot as you asked:
library(dplyr)
library(ggplot2)
data %>%
mutate(bin = cut(value, breaks = 10:120)) %>%
dplyr::group_by(bin) %>%
mutate(density = dplyr::n()) %>%
ggplot(aes(x = model, y = value, color = density))+
geom_point(size = 1) +
geom_hline(yintercept = 56.72, linetype = "dashed", color = "black")+
scale_colour_viridis_c(option = "A")
I would suggest to use the alpha parameter within the geom_point. You should use a value close to 0.
ggplot(data, aes(x=model, y=value)) +
geom_point(size=1, alpha = .1) +
geom_hline(yintercept=56.72, linetype="dashed", color = "black")

Add in legend to ggplot

I know this question is similar to ones that has been asked before but the suggested solutions don't seem to apply.
I set up the problem as follows
mat1 <- NULL
mat2 <- NULL
mat1 <- data.frame(matrix(nrow =16, ncol =2, data = rnorm(32, 0, 1)))
mat2 <- data.frame(matrix(nrow =16, ncol =2, data = rnorm(32, 0, 1)))
mat1[,1] = mat2[,1] = 1:16
colnames(mat1) = c("Window", "CM")
colnames(mat2) = c("Window", "FM")
ggplot() +
geom_line(data = mat1, aes(x = mat1$Window, y= mat1$CM), linetype ="twodash", color ="steelblue") +
geom_line(data = mat2, aes(x = mat2$Window, y= mat2$FM), color = "black") +
theme_classic() + xlab("Quater after alpha assessment") + ylab("Estimated Coefficient") + labs(fill = "cohort model")
I want to add in a legend. Specifically i want the blue line to be labelled as CM and the black line to be labelled as FM
In these kind of scenarios I think it is often the easiest to bring your data into the appropriate format for ggplot. Then you can properly use all of the ggplot toolset.
library(tidyverse)
mat3 = bind_cols(mat1, mat2) %>%
select(-Window1) %>%
gather(type, value, -Window)
mat3 %>%
ggplot(aes(x = Window, y = value, group = type, color = type, linetype = type)) +
geom_line() +
scale_color_manual("cohort model",
values = c("CM" = "steelblue","FM" = "black"),
breaks = c("CM", "FM")) +
scale_linetype_manual("cohort model",
values = c("twodash", "solid"),
breaks = c("CM", "FM")) +
labs(x = "Quater after alpha assessment", y = "Estimated Coefficient") +
theme_classic()
I assume the simplest way to do this would be to use annote():
ggplot() +
geom_line(data = mat1, aes(x = mat1$Window, y= mat1$CM), linetype ="twodash", color ="steelblue") +
geom_line(data = mat2, aes(x = mat2$Window, y= mat2$FM), color = "black") +
theme_classic() + xlab("Quater after alpha assessment") + ylab("Estimated Coefficient") + labs(fill = "cohort model") +
xlim(NA,18) +
annotate(geom="text", x=16.5, y=1.51232841, label="CM", color="blue", size=3) +
annotate(geom="text", x=16.5, y=-0.487350382, label="FM", color="black", size=3)
You can easily change and adjust the position with x= and y=. I also slightly extended the upper limit of x-scale so that the text fits in.
Of course, I don't know if that's enough for you. Otherwise, you could also add a text field as legend. But this would be the easiest and fastest way.

Error bars on a radar plot?

I'm looking to make a radar plot for multivariate data, a task simple enough for excel.
The problem comes when I would like to also plot some error bars on this. From what I understand, I cannot do this in excel. Is this possible on R?
Or can someone suggest an alternative? I have 32 single value dimensions.
Thanks!
I don't much like radar charts but here are some ideas to get you going, drawing on this approach. I like the look of my option 1 best, but I'm not sure how to solve the gap between var32 and var1 (I have some ideas, but a bit awkward).
library(tidyverse)
library(ggplot2)
library(scales)
# make some mock data
mydata <- data.frame(variable = paste0("Var", 1:32),
midpoint = rnorm(32),
stderr = rnorm(32, 1, 0.1),
stringsAsFactors = FALSE) %>%
mutate(upper = midpoint + 1.96 * stderr,
lower = midpoint - 1.96 * stderr) %>%
mutate(variable = factor(variable, levels = variable))
# Option 1:
mydata %>%
ggplot(aes(x = variable, y = midpoint, group = 1)) +
geom_ribbon(aes(ymin = lower, ymax = upper), fill = "grey50", alpha = 0.5) +
geom_line(colour = "purple") +
theme_light() +
theme(panel.grid.minor = element_blank()) +
coord_polar() +
labs(x = "", y = "")
# Option 2:
mydata %>%
gather(measure, value, -variable, -stderr) %>%
ggplot(aes(x = variable, y = value, colour = measure, group = measure, linetype = measure)) +
geom_polygon(fill = NA) +
theme_light() +
theme(panel.grid.minor = element_blank()) +
coord_polar() +
scale_colour_manual(values = c("steelblue", "black", "steelblue")) +
scale_linetype_manual(values = c(2,1,2)) +
labs(x = "", y = "")
# Option 3:
mydata %>%
ggplot(aes(x = variable, y = midpoint, group = 1)) +
geom_polygon(fill = NA, colour = "purple") +
geom_segment(aes(xend = variable, y = lower, yend = upper), colour = "grey50") +
geom_point(colour = "purple") +
theme_light() +
theme(panel.grid.minor = element_blank()) +
theme(panel.grid.major.x = element_blank()) +
coord_polar() +
labs(x = "", y = "")
Edit / addition
I think I prefer this one:
# Option 4:
mydata %>%
ggplot(aes(x = variable, y = midpoint, group = 1)) +
geom_polygon(aes(y = upper), fill = "grey50", alpha = 0.5) +
geom_polygon(aes(y = lower), fill = "grey99", alpha = 0.7) +
geom_polygon(fill = NA, colour = "purple") +
theme_light() +
theme(panel.grid.minor = element_blank()) +
coord_polar() +
labs(x = "", y = "")

Resources