How to allow selected points and confidence intervals to display on graph? - r

I have a dataset with species SP1 and SP2 and abiotic variables AB1 and AB2. I want to plot all of them on the graph but with modifications
Points: Display on SP1 and SP2 but NOT the AB1 and AB2 points on the graph
Lines: The AB lines should be black, dashed (see code attempt that did not work)
Confidence intervals: Only the SP lines need to have confidence intervals. The confidence intervals should have alpha = 0.2
set.seed(111)
var <- rep(c("SP1","SP2","AB1","AB2"), times = 5)
var.val <- rnorm(20,5,1)
level <- rep(c(100,200,300,400), each = 5)
df <- data.frame(var, var.val, level)
ggplot(df, aes(x = level, y = var.val, col = var, group = var)) +
geom_point(aes(fill = var),colour="white",pch=21, size=4, stroke = 1, alpha = 0.7, position = pd) + theme_classic() +
geom_smooth(method="lm", formula = y ~ x ) +
scale_linetype_manual(values = c("dashed", "dashed","solid", "solid")) +
scale_colour_manual(values = c("black","black","red","blue"))

You can achieve this by passing filtered versions of the data frame to each layer:
ggplot(df, aes(x = level, y = var.val, col = var, group = var)) +
geom_point(aes(fill = var), colour="white",pch=21, size=4, stroke = 1,
alpha = 0.7, data = df[df$var %in% c("SP1", "SP2"),]) +
theme_classic() +
geom_smooth(data = df[df$var %in% c("SP1", "SP2"),],
method = "lm", formula = y ~ x, alpha = 0.2) +
geom_smooth(data = df[!df$var %in% c("SP1", "SP2"),],
method = "lm", formula = y ~ x, se = FALSE, linetype = 2 ) +
scale_linetype_manual(values = c("dashed", "dashed","solid", "solid")) +
scale_colour_manual(values = c("black","black","red","blue"))
Though if AB and SP are conceptually two different variables, you might want to consider pivoting to a wider format.

Related

How to stagger the equation of line + Rsquared for multiple factors on a R ggplot?

I have four factors each with it's unique regression line and Rsquared. I want to do three things:
Place the Rsq in line with the equation of the line (currently its at the bottom)
Arrange the four equations on seperate lines
Arrange the four equations on the top, right.
set.seed(111)
var <- rep(c("SP1","SP2","AB1","AB2"), times = 5)
var.val <- rnorm(20,5,1)
level <- rep(c(100,200,300,400), each = 5)
df <- data.frame(var, var.val, level)
df <- df[order(-level),]
ggplot(df, aes(x = level, y = var.val, col = var, group = var, linetype = var)) +
geom_point(aes(fill = var), colour="white",pch=21, size=4, stroke = 1,
alpha = 0.7, data = df[df$var %in% c("SP1", "SP2"),]) +
theme_classic() +
geom_smooth(data = df[df$var %in% c("SP1", "SP2"),],
method = "lm", formula = y ~ x, alpha = 0.2) +
geom_smooth(data = df[!df$var %in% c("SP1", "SP2"),],
method = "lm", formula = y ~ x, se = FALSE) +
scale_linetype_manual(values = c("dotdash", "dashed","solid", "solid")) +
scale_colour_manual(values = c("black","black","red","blue")) +
stat_poly_eq(formula = y ~ x, aes(label = paste("atop(", ..eq.label.., ",", ..rr.label.., ")")), label.y = 0.9, parse = TRUE, size = 3)
The atop() function creates a line break between two equations. You can use list() instead.
For right top alignment, you can give values in decimals as a sequence as below. Each equation needs different values, so that there is no overlap.
ggplot(df, aes(x = level, y = var.val, col = var, group = var, linetype = var)) +
geom_point(aes(fill = var), colour="white",pch=21, size=4, stroke = 1,
alpha = 0.7, data = df[df$var %in% c("SP1", "SP2"),]) +
theme_classic() +
geom_smooth(data = df[df$var %in% c("SP1", "SP2"),],
method = "lm", formula = y ~ x, alpha = 0.2) +
geom_smooth(data = df[!df$var %in% c("SP1", "SP2"),],
method = "lm", formula = y ~ x, se = FALSE) +
scale_linetype_manual(values = c("dotdash", "dashed","solid", "solid")) +
scale_colour_manual(values = c("black","black","red","blue")) +
stat_poly_eq(formula = y ~ x, aes(label = paste("list(", ..eq.label.., ",", ..rr.label.., ")")),
label.x = 0.9,
label.y = seq(0.85, 1, by =0.05), parse = TRUE, size = 3)

DotDash and TwoDash linetypes not working in ggplot2 R

I am trying to plot twodash and dotdash, but I only get dash as the output.
set.seed(111)
var <- rep(c("SP1","SP2","AB1","AB2"), times = 5)
var.val <- rnorm(20,5,1)
level <- rep(c(100,200,300,400), each = 5)
df <- data.frame(var, var.val, level)
ggplot(df, aes(x = level, y = var.val, col = var, group = var)) +
geom_point(aes(fill = var), colour="white",pch=21, size=4, stroke = 1,
alpha = 0.7, data = df[df$var %in% c("SP1", "SP2"),]) +
theme_classic() +
geom_smooth(data = df[df$var %in% c("SP1", "SP2"),],
method = "lm", formula = y ~ x, alpha = 0.2) +
geom_smooth(data = df[!df$var %in% c("SP1", "SP2"),],
method = "lm", formula = y ~ x, se = FALSE, linetype = 2 ) +
scale_linetype_manual(values = c("dotdash", "twodash","solid", "solid")) +
scale_colour_manual(values = c("black","black","red","blue"))
Try setting linetype as part of the aesthetic in your ggplot call:
ggplot(df, aes(x = level, y = var.val, col = var, group = var, linetype = var)) + # set linetype
geom_point(aes(fill = var), colour="white",pch=21, size=4, stroke = 1,
alpha = 0.7, data = df[df$var %in% c("SP1", "SP2"),]) +
theme_classic() +
geom_smooth(data = df[df$var %in% c("SP1", "SP2"),],
method = "lm", formula = y ~ x, alpha = 0.2) +
geom_smooth(data = df[!df$var %in% c("SP1", "SP2"),],
method = "lm", formula = y ~ x, se = FALSE) +
scale_linetype_manual(values = c("dotdash", "twodash","solid", "solid")) +
scale_colour_manual(values = c("black","black","red","blue"))

How to order continuous x-axis on regression graph in ggplot2 R?

I want x-axis to go from 400 to 100. Here is my attempt, which did not work.
set.seed(111)
var <- rep(c("SP1","SP2","AB1","AB2"), times = 5)
var.val <- rnorm(20,5,1)
level <- rep(c(100,200,300,400), each = 5)
df <- data.frame(var, var.val, level)
df <- df[order(-level),]
ggplot(df, aes(x = level, y = var.val, col = var, group = var, linetype = var)) + # set linetype
geom_point(aes(fill = var), colour="white",pch=21, size=4, stroke = 1,
alpha = 0.7, data = df[df$var %in% c("SP1", "SP2"),]) +
theme_classic() +
geom_smooth(data = df[df$var %in% c("SP1", "SP2"),],
method = "lm", formula = y ~ x, alpha = 0.2) +
geom_smooth(data = df[!df$var %in% c("SP1", "SP2"),],
method = "lm", formula = y ~ x, se = FALSE) +
scale_linetype_manual(values = c("dotdash", "dashed","solid", "solid")) +
scale_colour_manual(values = c("black","black","red","blue"))
Try adding scale_x_reverse()
library(tidyverse)
set.seed(111)
var <- rep(c("SP1","SP2","AB1","AB2"), times = 5)
var.val <- rnorm(20,5,1)
level <- rep(c(100,200,300,400), each = 5)
df <- data.frame(var, var.val, level)
df <- df[order(-level),]
ggplot(df, aes(x = level, y = var.val, col = var, group = var, linetype = var)) + # set linetype
geom_point(aes(fill = var), colour="white",pch=21, size=4, stroke = 1,
alpha = 0.7, data = df[df$var %in% c("SP1", "SP2"),]) +
theme_classic() +
geom_smooth(data = df[df$var %in% c("SP1", "SP2"),],
method = "lm", formula = y ~ x, alpha = 0.2) +
geom_smooth(data = df[!df$var %in% c("SP1", "SP2"),],
method = "lm", formula = y ~ x, se = FALSE) +
scale_linetype_manual(values = c("dotdash", "dashed","solid", "solid")) +
scale_colour_manual(values = c("black","black","red","blue")) +
scale_x_reverse()
Created on 2022-05-04 by the reprex package (v2.0.1)

How to color the area between two geom_smooth lines?

I have 3 columns in a data frame from which I want to create a visualisation with geom_smooth() :
ggplot(my_data_frame) +
aes(x = fin_enquete,
y = intentions,
colour = candidat) +
geom_point(alpha = 1/6,
shape = "circle",
size = .5L) +
geom_smooth(mapping = aes(y = erreur_inf),
size = .5L,
span = .42,
se = F) +
geom_smooth(mapping = aes(y = erreur_sup),
size = .5L,
span = .42,
se = F) +
geom_smooth(method = "loess",
size = 1.5L,
span = .42,
se = F) +
labs(x = "Date de fin d'enquĂȘte",
y = "Pourcentage d'intentions de vote") +
theme_minimal() +
theme(text = element_text(family = "DIN Pro")) +
coord_cartesian(expand = F) +
easy_remove_legend()
3 lines with geom_smooth
I would like to color the area between the upper and the lower line. I know the geom_ribbon() function but I am not sure I can use it in this situation.
Does anybody have a solution?
Have a nice day!
You could use geom_ribbon and calculate the loess model yourself within the geom_ribbon call?
Toy random data
dat <- data.frame(x=1:100, y=runif(100), y2=runif(100)+1, y3=runif(100)+2)
Now suppose we want a smoothed ribbon between y and y3, with y2 drawn as a line between them:
ggplot( dat , aes(x, y2)) +
geom_ribbon(aes(ymin=predict(loess(y~x)),
ymax=predict(loess(y3~x))), alpha=0.3) +
geom_smooth(se=F)
You could use lapply() smooth to calculate the range of df values such as (5,11,13) to calculate the smooths and plot only the two edges of the se.
Sample code:
library(ggplot2)
ggplot(data = mtcars,
mapping = aes(x = wt,
y = mpg)) +
geom_point(size = 2)+
lapply(c(5,11, 13), function (i) {
geom_smooth(
data = ~ cbind(., facet_plots = i),
method = lm,
se=F,
formula = y ~ splines::bs(x, i)
)
})+
#facet_wrap(vars(facet_plots))
geom_ribbon(
stat = "smooth",
method = "loess",
se = TRUE,
alpha = 0, # or, use fill = NA
colour = "black",
linetype = "dotted")+
theme_minimal()
Plot:

How do I change the color of the regression lines in ggPlot?

I made a visualization of a regression. Currently this is what the graph looks like.
The regression lines are hard to see since they are the same color as the scatter plot dots.
My question is, how do I make the regression lines a different color from the scatter plot dots?
Here is my code:
(ggplot(data=df, mapping=aes(x='score', y='relent',
color='factor(threshold)'))+
geom_point()+
scale_color_manual(values=['darkorange', 'purple'])+
geom_smooth(method='lm',
formula = 'y ~ x+I(x**2)',se=False, )+
geom_vline(xintercept = 766, color = "red", size = 1, linetype = "dashed")+
labs(y = "Yield",
x = "Score")+
theme_bw()
)
One option to achieve your desired result would be to "duplicate" your threshold column with different values, e.g. in the code below I map 0 on 2 and 1 on 3. This duplicated column could then be mapped on the color aes inside geom_smooth and allows to set different colors for the regression lines.
My code below uses R or ggplot2 but TBMK the code could be easily adapted to plotnine:
n <- 1000
df <- data.frame(
relent = c(runif(n, 100, 200), runif(n, 150, 250)),
score = c(runif(n, 764, 766), runif(n, 766, 768)),
threshold = c(rep(0, n), rep(1, n))
)
df$threshold_sm <- c(rep(2, n), rep(3, n))
library(ggplot2)
p <- ggplot(data = df, mapping = aes(x = score, y = relent, color = factor(threshold))) +
scale_color_manual(values = c("darkorange", "purple", "blue", "green")) +
geom_vline(xintercept = 766, color = "red", size = 1, linetype = "dashed") +
labs(
y = "Yield",
x = "Score"
) +
theme_bw()
p +
geom_point() +
geom_smooth(aes(color = factor(threshold_sm)),
method = "lm",
formula = y ~ x + I(x**2), se = FALSE
)
A second option would be to add some transparency to the points so that the lines stand out more clearly and by the way deals with the overplotting of the points:
p +
geom_point(alpha = .3) +
geom_smooth(aes(color = factor(threshold)),
method = "lm",
formula = y ~ x + I(x**2), se = FALSE
) +
guides(color = guide_legend(override.aes = list(alpha = 1)))
Compare:
iris %>%
ggplot(aes(Petal.Length, Sepal.Width, color = Species)) +
geom_point() +
geom_smooth(method = "lm", aes(group = Species))
With:
iris %>%
ggplot(aes(Petal.Length, Sepal.Width)) +
geom_point(aes(color = Species)) +
geom_smooth(method = "lm", aes(group = Species))
When aes(color = ...) is specified inside of ggplot(), it is applied to both of the subsequent geoms. Moving it to geom_point() applies it to the points only.

Resources