Adding uncertainty bands to a smooth spline in a scatterplot - r

I have a following scatterplot with a smooth spline
a<-rep(1:50,len=500)
b<-sample(0:5000,500)
c<-round(seq(0,600,len=500))
data_frame<-as.data.frame(cbind(a,b,c))
names(data_frame)<-c("ID","toxin_level","days_to_event")
plot(data_frame$days_to_event,data_frame$toxin_level, xlim=c(600,0),xlab="days before the event",ylab="Toxin level",type="p")
abline(v=0,col="red")
x <- data_frame$days_to_event
y <- data_frame$toxin_level
fit.sp = smooth.spline(y ~ x, nknots=20)
lines(fit.sp, col="blue")
This is the resulting plot
I was wondernig if it is possible to somehow add confidence bands to this curve? I deally I would like it to be in a transparent blue, but any color including gray is OK.

Updated: using scale_x_reverse to match your graph more precisely...
How about this using ggplot2?
library(ggplot2)
ggplot(data_frame, aes(x = days_to_event, y = toxin_level)) + geom_point() +
geom_vline(xintercept = 0, color = "red") + scale_x_reverse() +
xlab("Days before the event") + ylab("Toxin Level") +
geom_smooth(method = lm, se = TRUE)
Which gives this:
Or to match your question a bit more:
ggplot(data_frame, aes(x = days_to_event, y = toxin_level)) + geom_point(shape = 1) +
geom_vline(xintercept = 0, color = "red") + scale_x_reverse() +
xlab("Days before the event") + ylab("Toxin Level") +
geom_smooth(method = lm, se = TRUE, color = "blue", fill = "lightblue") +
theme_bw()

Related

How to color the area between two geom_smooth lines?

I have 3 columns in a data frame from which I want to create a visualisation with geom_smooth() :
ggplot(my_data_frame) +
aes(x = fin_enquete,
y = intentions,
colour = candidat) +
geom_point(alpha = 1/6,
shape = "circle",
size = .5L) +
geom_smooth(mapping = aes(y = erreur_inf),
size = .5L,
span = .42,
se = F) +
geom_smooth(mapping = aes(y = erreur_sup),
size = .5L,
span = .42,
se = F) +
geom_smooth(method = "loess",
size = 1.5L,
span = .42,
se = F) +
labs(x = "Date de fin d'enquĂȘte",
y = "Pourcentage d'intentions de vote") +
theme_minimal() +
theme(text = element_text(family = "DIN Pro")) +
coord_cartesian(expand = F) +
easy_remove_legend()
3 lines with geom_smooth
I would like to color the area between the upper and the lower line. I know the geom_ribbon() function but I am not sure I can use it in this situation.
Does anybody have a solution?
Have a nice day!
You could use geom_ribbon and calculate the loess model yourself within the geom_ribbon call?
Toy random data
dat <- data.frame(x=1:100, y=runif(100), y2=runif(100)+1, y3=runif(100)+2)
Now suppose we want a smoothed ribbon between y and y3, with y2 drawn as a line between them:
ggplot( dat , aes(x, y2)) +
geom_ribbon(aes(ymin=predict(loess(y~x)),
ymax=predict(loess(y3~x))), alpha=0.3) +
geom_smooth(se=F)
You could use lapply() smooth to calculate the range of df values such as (5,11,13) to calculate the smooths and plot only the two edges of the se.
Sample code:
library(ggplot2)
ggplot(data = mtcars,
mapping = aes(x = wt,
y = mpg)) +
geom_point(size = 2)+
lapply(c(5,11, 13), function (i) {
geom_smooth(
data = ~ cbind(., facet_plots = i),
method = lm,
se=F,
formula = y ~ splines::bs(x, i)
)
})+
#facet_wrap(vars(facet_plots))
geom_ribbon(
stat = "smooth",
method = "loess",
se = TRUE,
alpha = 0, # or, use fill = NA
colour = "black",
linetype = "dotted")+
theme_minimal()
Plot:

How do I change the color of the regression lines in ggPlot?

I made a visualization of a regression. Currently this is what the graph looks like.
The regression lines are hard to see since they are the same color as the scatter plot dots.
My question is, how do I make the regression lines a different color from the scatter plot dots?
Here is my code:
(ggplot(data=df, mapping=aes(x='score', y='relent',
color='factor(threshold)'))+
geom_point()+
scale_color_manual(values=['darkorange', 'purple'])+
geom_smooth(method='lm',
formula = 'y ~ x+I(x**2)',se=False, )+
geom_vline(xintercept = 766, color = "red", size = 1, linetype = "dashed")+
labs(y = "Yield",
x = "Score")+
theme_bw()
)
One option to achieve your desired result would be to "duplicate" your threshold column with different values, e.g. in the code below I map 0 on 2 and 1 on 3. This duplicated column could then be mapped on the color aes inside geom_smooth and allows to set different colors for the regression lines.
My code below uses R or ggplot2 but TBMK the code could be easily adapted to plotnine:
n <- 1000
df <- data.frame(
relent = c(runif(n, 100, 200), runif(n, 150, 250)),
score = c(runif(n, 764, 766), runif(n, 766, 768)),
threshold = c(rep(0, n), rep(1, n))
)
df$threshold_sm <- c(rep(2, n), rep(3, n))
library(ggplot2)
p <- ggplot(data = df, mapping = aes(x = score, y = relent, color = factor(threshold))) +
scale_color_manual(values = c("darkorange", "purple", "blue", "green")) +
geom_vline(xintercept = 766, color = "red", size = 1, linetype = "dashed") +
labs(
y = "Yield",
x = "Score"
) +
theme_bw()
p +
geom_point() +
geom_smooth(aes(color = factor(threshold_sm)),
method = "lm",
formula = y ~ x + I(x**2), se = FALSE
)
A second option would be to add some transparency to the points so that the lines stand out more clearly and by the way deals with the overplotting of the points:
p +
geom_point(alpha = .3) +
geom_smooth(aes(color = factor(threshold)),
method = "lm",
formula = y ~ x + I(x**2), se = FALSE
) +
guides(color = guide_legend(override.aes = list(alpha = 1)))
Compare:
iris %>%
ggplot(aes(Petal.Length, Sepal.Width, color = Species)) +
geom_point() +
geom_smooth(method = "lm", aes(group = Species))
With:
iris %>%
ggplot(aes(Petal.Length, Sepal.Width)) +
geom_point(aes(color = Species)) +
geom_smooth(method = "lm", aes(group = Species))
When aes(color = ...) is specified inside of ggplot(), it is applied to both of the subsequent geoms. Moving it to geom_point() applies it to the points only.

How to use ggplot2 legend to denote different geoms

I am using ggplot2 to plot points from a .csv file that is just a column used a x values and a column used a y values. I am a little confused as to how ggplot decides what to make a legend for and haven't found any good examples online.
I would like the legend to show that geom_point is stress vs strain, and my geom_smooth is the best fit line.
Here is my code:
library(ggplot2)
imported = read.csv("data.csv")
Strain = imported$Strain
Stress = imported$Stress..N.m.2.
err = .0005
gg <-
ggplot(imported, aes(x=Strain, y=Stress)) +
geom_point(aes(group = "Points"), shape = 79, colour = "black", size = 2, stroke = 4) +
geom_smooth(method = "lm", se = FALSE, color = "orange") +
geom_errorbarh(xmin = Strain - err, xmax = Strain + err, show.legend = TRUE) +
theme_gray() + ggtitle("Stress vs Strain") +
theme(legend.position = "top")
gg
And it is producing the following plot:
my plot
Edit: added approach at top to create legend for each geom, by creating dummy mapping to separate aesthetics.
library(ggplot2)
ggplot(mtcars, aes(mpg, wt)) +
geom_point(aes(color = "point")) + # dummy mapping to color
geom_smooth(method = "lm", se = FALSE, color = "orange",
aes(linetype = "best fit")) + # dummy mapping to linetype
geom_errorbarh(aes(xmin = mpg - 2, xmax = mpg + 1)) +
scale_color_manual(name = "Stress vs. Strain", values = "black") +
scale_linetype_manual(name = "Best fit line", values = "solid")
original answer:
Note the difference in legend here:
library(ggplot2)
ggplot(mtcars, aes(mpg, wt, color = as.character(cyl))) +
geom_point() +
geom_errorbarh(aes(xmin = mpg - 2, xmax = mpg + 1),
show.legend = TRUE) # error bars reflected in legend
ggplot(mtcars, aes(mpg, wt, color = as.character(cyl))) +
geom_point() +
geom_errorbarh(aes(xmin = mpg - 2, xmax = mpg + 1),
show.legend = FALSE) # error bars not shown in legend

Removing points from geom_bar legend ggplot r

This is my data.
Mod <- as.factor(c(rep("GLM",5),rep("MLP",5),rep("RF",5),rep("SVML",5),rep("SVMR",5)))
Manifold <- as.factor(rep(c("LLE","Iso","PCA","MDS","kPCA"),5))
ROC <- runif(25,0,1)
Sens <- runif(25,0,1)
Spec <- runif(25,0,1)
df <- data.frame("Mod"= Mod, "Manifold"= Manifold, "ROC" = ROC, "Sens" = sens, "Spec" = spec)
And I am making this graph
resul3 <- ggplot(df, aes(x = Mod, y = ROC, fill= Manifold)) +
geom_bar(stat = "identity", position = "dodge", color = "black") +
ylab("ROC & Specificity") +
xlab("Classifiers") +
theme_bw() +
ggtitle("Classifiers' ROC per Feature Extraction Plasma") +
geom_point(aes(y=Spec), color = "black", position=position_dodge(.9)) +
scale_fill_manual(name = "Feature \nExtraction", values = c("#FFEFCA",
"#EDA16A" ,"#C83741", "#6C283D", "#62BF94"))
first graph
And what I want is another legend with tittle "Specificity" and a single black point. I dont want the point to be inside the Manifolds legend.
Something like this but without the points inside the manifold squares
Changing the geom_point line, adding a scale_color_manual and using the override as seen in #drmariod's answer will result in this plot:
ggplot(df, aes(x = Mod, y = ROC, fill= Manifold)) +
geom_bar(stat = "identity", position = "dodge", color = "black") +
ylab("ROC & Specificity") +
xlab("Classifiers") +
theme_bw() +
ggtitle("Classifiers' ROC per Feature Extraction Plasma") +
geom_point(aes(y=Spec, color = "Specificity"), position=position_dodge(.9)) +
scale_fill_manual(name = "Feature \nExtraction", values = c("#FFEFCA",
"#EDA16A" ,"#C83741", "#6C283D", "#62BF94")) +
scale_color_manual(name = NULL, values = c("Specificity" = "black")) +
guides(fill = guide_legend(override.aes = list(shape = NA)))
You can overwrite the aesthetics for shape and set it to NA like this
ggplot(df, aes(x = Mod, y = ROC, fill= Manifold)) +
geom_bar(stat = "identity", position = "dodge", color = "black") +
ylab("ROC & Specificity") +
xlab("Classifiers") +
theme_bw() +
ggtitle("Classifiers' ROC per Feature Extraction Plasma") +
geom_point(aes(y=Spec), color = "black", position=position_dodge(.9)) +
scale_fill_manual(name = "Feature \nExtraction", values = c("#FFEFCA",
"#EDA16A" ,"#C83741", "#6C283D", "#62BF94")) +
guides(fill = guide_legend(override.aes = list(shape = NA)))

How to add curve which fits bar chart

How can I fit the bar chart with a curve similar the density plot for a histogram?
library(ggplot2)
library(plyr)
y<-hist(rnorm(1000),breaks=30)$count
df<-data.frame(x=1:length(y),y=y,key="A")
df2<-data.frame(x=1:length(y),y=y*0.4,key="B")
df<-rbind(df,df2)
p<-ggplot(df,aes(x=x))
p<-p + geom_bar(subset=.(key =="A"),aes(y = y),stat="identity",fill = "blue", alpha = 0.2)
p<-p + geom_bar(subset=.(key =="B"),aes(y = y),stat="identity",fill = "blue", alpha = 0.2)
#p<-p + geom_density(subset=.(key =="A"), aes(y=y),alpha=.2, fill="#0000FF")
p
Use ..density.. to do the transformation:
data = data.frame(x = rnorm(500))
ggplot(data) +
geom_histogram(aes(x = x, y = ..density..)) +
geom_density(aes(x), colour = I('red'))
For your data:
ggplot(df) +
geom_histogram(aes(y, ..density.., fill=key)) +
geom_density(aes(y, colour = key))
And try to avoid names like 'df' (df is a R function i guess)

Resources