Fit curve in spaghetti plot - r

For descriptive plots in R studio, I would like to fit a regression curve in my spaghetti plot. To create the spaghetti plot I used:
library(lattice)
GCIP <- data_head$GCIP
time_since_on <- data_head$time_since_on
Patient <- data_head$Patient
Eye <-data_head$Eye
xyplot(GCIP~time_since_on, groups = Patient, type='b', data=data_head)
and I've got this plot
Then I wanted to fit a polynomial curve, so I used this code:
plot.new<- plot(time_since_on,GCIP)
lines(lowess(GCIP ~ time_since_on))
This is what I've got:
What I want is to fit a curve like the one I've got in the image 2 but over the spaghetti plot (with the longitudinal data for each subject).
I've tried to use this code:
library(ggplot2)
library(reshape2)
GCIP <- data_head$GCIP
time_since_on <- data_head$time_since_on
Patient.ID <- data_head$Patient.ID
Eye <-data_head$Eye
Visit <-data_head$Visit
Patient<-data_head$Patient
ggplot(data = reprex, aes(x,y)) +
geom_point(alpha=1, size=2) +
aes(colour=Patient.ID) +
geom_text(aes(label=label), size=2, colour='white') +
geom_path(aes(group=Patient.ID))
ggplot(data= reprex, aes(x = time_since_on, y = GCIP)) +
geom_point(size = 2, alpha= 1, aes(color = Patient.ID)) + #colour points by group
geom_path(aes(group = Patient.ID)) + #spaghetti plot
stat_smooth(method = "lm", formula = y ~ x, aes(group = Patient.ID, colour = group)) + #line of best fit by group
ylab("GCIP (volume)") + xlab("time_since_on (months)") +
theme_bw()
But I don't get anything from this.
COuld anyone help me please?
Here an example taken from the internet
Million Thanks.
Lili

Related

ggplot for linear-log regression model?

How do I plot a log linear model in R?
Currently, I am doing this but am not sure if it's the right/efficient way:
data(food)
model1 <- lm(food_exp~log(income), data = food)
temp_var <- predict(model1, interval="confidence")
new_df <- cbind(food, temp_var)
head(new_df)
ggplot(new_df, aes(x = income, y = food_exp))+
geom_point() +
geom_smooth(aes(y=lwr), color = "red", linetype = "dashed")+
geom_smooth(aes(y=upr), color = "red", linetype = "dashed")+
geom_smooth(aes(y = fit), color = "blue")+
theme_economist()
you can use geom_smooth and putting your formula directly in. It should yield the same as your fit (which you can check by also plotting that)
ggplot(new_df, aes(x = Sepal.Width, y = Sepal.Length))+
geom_point() +
geom_point(aes(y=fit), color="red") + #your original fit
geom_smooth(method=lm, formula=y~log(x)) #ggplot fit
If you don't car about extracting the parameters and just want the plot, you can plot directly in ggplot2.
Some fake data for plotting:
library(tidyverse)
set.seed(454)
income <- VGAM::rpareto(n = 100, scale = 20, shape = 2)*1000
food_exp <- rnorm(100, income*.3+.1, 3)
food <- data.frame(income, food_exp)
Now within ggplot2, use the geom_smooth function and specify that you want a linear model. Additionally, you can directly transform the income in the aes argument:
ggplot(food, aes(x = log(income), y = food_exp))+
geom_point()+
geom_smooth(method = "lm")+
theme_bw()+
labs(
title = "Log Linear Model Food Expense as a Function of Log(income)",
x = "Log(Income)",
y = "Food Expenses"
)
This will work for confidence intervals, but adding prediction intervals, you'll need to do what you did earlier with fitting the model, generating the prediction intervals.

How do I plot mixed-effects linear regression?

I'm trying to make a boxplot to visualize this regression model
library(lme4)
lmer(dv1 ~ intervention + (1|id/area),
data=data,
REML=T)
In this experiment, the control and treatment intervention are both applied to a subject within discrete areas.
Here's the data I'm using
data <- data.frame("id" = 1:2,
"intervention" = c(rep("a",27),rep("b", 27)),
"area" = 1:3,
"dv1" = rnorm(54),
"dv2" = rnorm(54),
"dv3" = rnorm(54))
data$area <- as.factor(data$area)
data$id <- as.factor(data$id)
Here's what I've tried
library(ggplot2)
ggplot(data,aes(x=area,y=dv1,col=intervention)) +
geom_point() +
geom_boxplot(alpha=0.2) +
facet_wrap(~id) +
ggtitle("DV1") +
xlab("Intervention") +
ylab("DV1")
Instead of the red points overlaying the red boxplot, they're all over the place. How do I fix this?
Edit: I used the jitter options that u/eipi10 suggested and this is what I have now.
ggplot(data,aes(x=area,y=dv1,col=intervention)) +
geom_point(position=position_jitterdodge(dodge.width=0.75, jitter.height=0, jitter.width=0.25), alpha=0.6) +
geom_boxplot(alpha=0.2, size=0.3) +
facet_wrap(~id) +
ggtitle("DV1") +
xlab("Area") +
ylab("DV1")

Plotting the overall trend using ggplot for longitudinal data

I want to plot the overall trend for longitudinal data. I am using the sleepstudy data in lme4 package to demonstrate my problem.
library("lme4")
library("ggplot2")
p1 <- ggplot(data = sleepstudy, aes(x = Days, y = Reaction, group = Subject))
p1 + geom_line() + geom_point(aes(col = Subject) ,size=2)
When i plot the longitudinal trajectories for each individual, I got this plot
Here I am interested in finding the overall trend based on all subjects. For an example based on the above plot we can see an upward trend in general. In general this trend can be anything like linear, quadratic etc. Is this any way to plot this overall trend ?
I tried this . But i got smoothed curves for each subject instead of getting the overall trend
p1 + geom_point() + geom_smooth(method = "lm")
Can anyone help me figure this out ?
Thank you
Don't know if I understand correctly:
library("lme4")
library("ggplot2")
ggplot(data = sleepstudy, aes(x = Days, y = Reaction))+
geom_point(aes(colour = Subject), alpha = .3)+
geom_smooth()+
theme(legend.position = "none")
As you can see you'll have loess function:
> geom_smooth()` using method = 'loess' and formula 'y ~ x'
If you need lm just specify the argument method into geom_smooth.
library("lme4")
library("ggplot2")
ggplot(data = sleepstudy, aes(x = Days, y = Reaction))+
geom_point(aes(colour = Subject), alpha = .3)+
geom_smooth(method = "lm")+
labs(title = "Linear Model (LM)")+
theme(legend.position = "none")
The result:

quadratic fit curve in Spaghetti plot. Lme?

I am trying to fit a quadratic curve over my spaghetti plot. In the beginning I did it only with ggplot like this:
library(ggplot2)
library(reshape2)
GCIP <- data_head$GCIP
Patient.ID <- data_head$Patient.ID
Eye <-data_head$Eye
Visit <-data_head$Visit
Patient<-data_head$Patient
data_head$time_since_on <- as.numeric(as.character(data_head$time_since_on))
ggplot(data = data_head, aes(x= time_since_on, y=GCIP)) +
geom_point(alpha=1, size=2) +
aes(colour=Patient.ID) +
geom_path(aes(group='Patient.ID'))
ggplot(data= data_head, aes(x = time_since_on, y = GCIP)) +
geom_point(size = 2, alpha= 1, aes(color = Patient.ID)) + #colour points by group
geom_path(aes(group = Patient.ID)) + #spaghetti plot
stat_smooth(method = "lm", formula = y ~ poly(x,2)) + #line of best fit by group
ylab("GCIP (volume)") + xlab("time_since_on (months)") +
theme_bw()
The problem is that I am not sure this code takes into account that each line contains different timepoints of 1 patient, so the line fitted should take that also into account.
Could you please tell me if this is correct?
Here you can see the graph I get
I am not sure and maybe is better to generate a lme model (but in that case I don't know how to introduce the quadratic fitting in the model).
I also did this:
data_head <- read.csv("/Users/adrianaroca-fernandez/Desktop/Analysis/Long_100418_2/N=lit.csv", sep=";", dec=",")
library(ggplot2)
library(reshape2)
library(lme4)
library(lsmeans)
GCIP <- data_head$GCIP
Patient.ID <- data_head$Patient.ID
Eye <-data_head$Eye
Visit <-data_head$Visit
Patient<-data_head$Patient
data_head$time_since_on <- as.numeric(as.character(data_head$time_since_on))
time_since_on <-data_head$time_since_on
time_since_on2 <- time_since_on^2
quadratic.model <-lm(GCIP ~ time_since_on + time_since_on2)
summary(quadratic.model)
time_since_onvalues <- seq(0, 250, 0.1)
predictedGCIP <- predict(quadratic.model,list(time_since_on=time_since_onvalues, time_since_on2=time_since_onvalues^2))
plot(time_since_on, GCIP, pch=16, xlab = "time_since_on (months)", ylab = "GCIP", cex.lab = 1.3, col = "blue")
lines(time_since_onvalues, predictedGCIP, col = "darkgreen", lwd = 3)
The problem is that I am still unable to introduce (1|Patient.ID) as a mixed effect. And I lose my spaghetti plot in this case, having just the dots. Here the result:
What do you think is better or how should I code this?
Thanks.
lili

Statistical Model Representation with ggplot2

I will ask my question with a study case and then I'll make my question more general.
Let's first import some libraries and create some data:
require(visreg)
require(ggplot2)
y = c(rnorm(40,10,1), rnorm(20,11,1), rnorm(5,12,1))
x=c(rep(1,40), rep(2,20), rep(3,5))
dt=data.frame(x=x, y=y)
and run a linear regression of y on x and graph the data and the model with ggplot2
m1 = lm(y~x, data=dt)
ggplot(dt, aes(x,y)) + geom_point() + geom_smooth(formula = y~x, method="anova", data=dt)
Now I would like to consider my xvariable as a nominal variable. So I slightly change my data and run the following model.
y = c(rnorm(40,10,1), rnorm(20,11,1), rnorm(5,12,1))
x=factor(c(rep(1,40), rep(2,20), rep(3,5))) # this line has changed!
dt=data.frame(x=x, y=y)
m2 = lm(y~x, data=dt)
How can I plot this model m2 with ggplot2? And more globally how can I directly tell ggplot to consider the object m2 in order to create representation of the model?
What I aim to do is the kind of things that can be done using the visreg package
visreg(m2)
So, is there any visreg-like solution for ggplot? something like
ggplot(..,aes(..)) + super_geom_smooth(model = m2)
This is not much different from #rnso's idea. geom_jitter() adds more flavour. I also change the colour of median bar. Hope this helps you!
ggplot(data = m2$model, aes(x = x, y = y)) +
geom_boxplot(fill = "gray90") +
geom_jitter() +
theme_bw() +
stat_summary(geom = "crossbar", width = 0.65, fatten = 0, color = "blue",
fun.data = function(x){return(c(y=median(x), ymin=median(x), ymax=median(x)))})
Following using boxplot is very similar to your desired graph:
ggplot(dt, aes(x,y))+ geom_boxplot(aes(group=x), alpha=0.5)+ geom_jitter()
Just FYI, visreg can now output a gg object:
visreg(m2, gg=TRUE)

Resources