I need to include age adjustment in the geom_smooth line I am adding to my ggscatter plot.
my data looks like~
table link
structure(list(Time = c(0L, 0L, 0L, 0L, 6L, 12L, 18L, 18L, 0L,
12L, 18L, 6L), group = structure(c(1L, 1L, 2L, 2L, 1L, 3L, 3L,
3L, 3L, 4L, 4L, 1L), .Label = c("A", "B", "C", "D"), class = "factor"),
Age = c(77, 70.2, 69.9, 65.7, 66.2, 66.7, 67.2, 67.7, 66.8,
67.8, 68.3, 68.8), Average = c(96L, 90L, 94L, 94L, 96L, 96L,
92L, 120L, 114L, 109L, 113L, 103L)), row.names = c(NA, 12L
), class = "data.frame")
What I currently have (the 'Average" value have dependency in age..):
ggscatter(dtable, "Time","Average",conf.int = TRUE)+theme_bw()+
geom_smooth(aes(group=1),method='lm')+facet_wrap(~groups)
What I would like to have is something like:
ggscatter(dtable, "Time","Average",conf.int = TRUE)+theme_bw()+
geom_smooth(aes(group=1),method='lm', adjust= ~age)+facet_wrap(~groups)
With adjustment per each group mean age
Any suggestions?
Here is I think what you are after.
First, we need to fit the more complicated model because ggplot does not have a functionality for multivariable models (yet)
fit <- lm(Average ~ Time + group + Age, data = tdata)
Then we can use some functionality from the broom package to add the predictions and associated standard errors. With these in hand we can manually build the plot using the geom_line and geom_ribbon geoms
library(broom)
tdata %>%
bind_cols(augment(fit)) %>%
ggplot(aes(Time, Average))+
geom_point()+
geom_line(aes(x = Time, y = .fitted), size = 2, color = "blue")+
geom_ribbon(aes(ymin = .fitted + .se.fit*2, ymax = .fitted - .se.fit*2), alpha = .2)+
facet_wrap(~group)+
theme_bw()
Additionally, if you wanted to look at pooled vs non-pooled estimates
fit_no_pool <- lm(Average ~ Time + group + Age, data = tdata)
fit_complete_pool <- lm(Average ~ Time + Age, data = tdata)
library(broom)
tdata %>%
bind_cols(augment(fit_no_pool) %>% setNames(sprintf("no_pool%s", names(.)))) %>%
bind_cols(augment(fit_complete_pool) %>% setNames(sprintf("pool%s", names(.)))) %>%
ggplot(aes(Time, Average))+
geom_point()+
# Non-Pooled Estimates
geom_line(aes(x = Time, y = no_pool.fitted, color = "blue"), size = 2)+
geom_ribbon(aes(ymin = no_pool.fitted + no_pool.se.fit*2,
ymax = no_pool.fitted - no_pool.se.fit*2), alpha = .2)+
# Pooled Estimates
geom_line(aes(x = Time, y = pool.fitted, color = "orange"), size = 2)+
geom_ribbon(aes(ymin = pool.fitted + pool.se.fit*2,
ymax = pool.fitted - pool.se.fit*2), alpha = .2)+
facet_wrap(~group)+
scale_color_manual(name = "Regression",
labels = c("Pooled", "Non-Pooled"),
values = c("blue", "orange"))+
theme_bw()
One way to go is to run your model with Age as an additional predictor in your model. then use predict to get the predicted value with CIs. Append to your data then use ggplot to plot. I know you want to facet by group, so it might be worth putting it into your model as well. Just a thought. The steps would be the same.
df <- structure(list(Time = c(0L, 0L, 0L, 0L, 6L, 12L, 18L, 18L, 0L,
12L, 18L, 6L), group = structure(c(1L, 1L, 2L, 2L, 1L, 3L, 3L,
3L, 3L, 4L, 4L, 1L), .Label = c("A", "B", "C", "D"), class = "factor"),
Age = c(77, 70.2, 69.9, 65.7, 66.2, 66.7, 67.2, 67.7, 66.8,
67.8, 68.3, 68.8), Average = c(96L, 90L, 94L, 94L, 96L, 96L,
92L, 120L, 114L, 109L, 113L, 103L)), row.names = c(NA, 12L
), class = "data.frame")
#model adjusted for age
mod <- lm(Average ~ Time + Age, data = df)
#get prediction with CIS
premod <- predict(mod, interval = "predict")
#append to data
df2 <- cbind(df,premod)
#add prediction to ggplot with scatter plot
ggplot(df2) +
geom_point(aes(x=Time,y=Average)) +
geom_line(aes(x=Time, y = fit)) +
geom_ribbon(aes(x = Time,ymin = lwr, ymax = upr), alpha = .1)+
facet_wrap(~group)+
theme_bw()
Related
i have a df and I am trying to perform logistic regression to predict the variable var12. Before that I want to choose which variables will i use in this model. I want to calculate the person correlation and p value for every variable over the variable var12 and perhaps plot them in order to check linearity from the visual also. Can anyone help? thank you very much
structure(list(id = c(1, 3, 5, 10, 11, 13, 15, 17, 18, 21),
var1 = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), var2 = c(0.1,
0.77, 0.75, 0.09, 0.84, 0.52, 0.45, 0.27, 0.71, 0.15), var3 = c("D",
"D", "B", "B", "B", "E", "E", "C", "C", "B"), var4 = c(5L,
5L, 6L, 7L, 7L, 6L, 6L, 7L, 7L, 7L), var5 = c(0L, 0L, 2L,
0L, 0L, 2L, 2L, 0L, 0L, 0L), var6 = c(55L, 55L, 52L, 46L,
46L, 38L, 38L, 33L, 33L, 41L), var7 = c(50L, 50L, 50L,
50L, 50L, 50L, 50L, 68L, 68L, 50L), var8 = c("B12", "B12",
"B12", "B12", "B12", "B12", "B12", "B12", "B12", "B12"),
var9 = c("Regular", "Regular", "Diesel", "Diesel", "Diesel",
"Regular", "Regular", "Diesel", "Diesel", "Diesel"), var10 = c(1217L,
1217L, 54L, 76L, 76L, 3003L, 3003L, 137L, 137L, 60L), var11 = c("R82",
"R82", "R22", "R72", "R72", "R31", "R31", "R91", "R91", "R52"
), var12 = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1)), row.names = c(NA,
10L), class = "data.frame")
You'll want to make dummy variables out of the character vars. Note I reset the values of var12 in your example.
#reset var12 to have better data to play with
seed(100)
df$var12 <- sample(c(0,1), 10, replace = TRUE)
#libraries needed
library(dplyr)
library(tidyverse)
library(fastDummies)
#isolate variables and create dummy variables
cor_data <- df %>%
select(!c("id","var12")) %>%
dummy_cols() %>% #create dummy variables
select_if(is.numeric) #isoalate just numeric variables
#collect variables and the pairs to a dataframe
data_all <- data.frame()
for (colname in names(cor_data)){
r <- paste0(colname)
data <- data.frame(x = cor_data[colname], y = df["var12"])
names(data) <- c("x", "y")
dd <- data.frame(vars = r, data = data) %>%
group_by(vars) %>%
nest()
data_all <- rbind(data_all, dd)
}
#define model
myModel <- function(df){
cor.test(df$data.x, df$data.y, method = "spearman")
}
#run model on the data pairs
data_all <- data_all %>%
mutate(model = map(data, myModel))
#use broom::glance to add the results to the dataframe
glance <- data_all %>%
mutate(glance = map(model, broom::glance)) %>%
unnest(glance, .drop = TRUE)
#extract the stats you want and graph
glance %>%
select(statistic, p.value) %>%
ggplot() +
geom_col(aes(x = reorder(vars, desc(statistic)), y = statistic)) +
geom_text(aes(x = reorder(vars, desc(statistic)), y = statistic,
label = paste0("p.value = ", round(p.value, 5))),
color = "white", hjust = 1.2, angle = 90) +
labs(x = "variable", title= "Correlation with var12") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
I have a mixed model with a interaction of two continuous variables. I understand how to use predict() for a continuous by categorical interaction, but can't find any information on how to use predict() to generate graphs of continuous by continuous interactions. So far I have:
#the data
mydata<-structure(list(Week = c(3L, 3L, 6L, 6L, 5L, 6L, 3L, 1L, 4L, 3L,
1L, 2L, 6L, 6L, 6L, 6L), X2 = c(20.8, 21.4, 22.2, 21.9, 21, 21.8,
16.6, 15.6, 21.9, 19.8, 17.5, 12.5, 20.1, 20.5, 21.7, 22.3),
X1 = c(78L, 90L, 81L, 44L, 9L, 35L, 99L, 17L, 1L, 7L, 23L,
14L, 9L, 77L, 84L, 1L), Y = c(14.97469781, 19.88267242, 15.59780954,
9.633809968, 15.12038794, 10.43636012, 10.7436911, 16.71840387,
12.43274774, 10.90741585, 8.79514591, 14.1932374, 8.776376951,
9.995133069, 12.38314719, 9.611533444)), class = "data.frame", row.names = c(NA,
-16L))
#assigning 'Week' as a factor
mydata$Week<-as.factor(mydata$Week)
#the model
model1<-glmer(Y~X1*X2+(1|Week),data=mydata, family=Gamma(link='log'))
NEWDATA <-
expand.grid(
X1 = seq(1, 99, length = 100),
X2 = seq(12.5, 22.3, length = 100),
Week = levels(mydata$Week)
)
PREDMASS <-
predict(model1,
newdata = NEWDATA,
re.form = ~ (1 | Week))
PREDSFRAME <- cbind(NEWDATA, PREDMASS)
head(PREDSFRAME)
If the interaction were between a continuous and a categorical variable, I would then use the code below, but this doesn't work:
ggplot(PREDSFRAME, aes(x = X1, y = PREDMASS)) +
geom_line() +
geom_point(data = mydata,
facet_grid(. ~ X2) +
aes(y = Y),
alpha = 0.3)
Any suggestions?
I think you actually want the facet_grid outside the geom_point() function. If you run it that way you won't get an error.
ggplot(PREDSFRAME, aes(x = X1, y = PREDMASS)) +
geom_line() +
geom_point(data = mydata,
aes(y = Y),
alpha = 0.3)+
facet_grid(. ~ X2)
But what you get is a grid of plots for every value of X2 (because it is continuous), which is also not what you want.
What you need to do is specify some X2 values that you would create different plots (or regression lines of), since I am assuming (perhaps incorrectly) that you don't want to plot every possible combination (which is a 2D plane, as #HongOoi suggests).
I know you ask for a solution using predict, which perhaps you can solve with the above information, but I offer this "pre-made" solution from sjPlot that I find quick and helpful:
library(sjPlot)
plot_model(model1, type="pred",terms=c("X1","X2"))
I am trying to use geom_label_repel to add labels to a couple of data points on a plot. In this case, they happen to be outliers on box plots. I've got most of the code working, I can label the outlier, but for some reason I am getting multiple labels (equal to my sample size for the entire data set) mapped to that point. I'd like just one label for this outlier.
Example:
Here is my data:
dput(sus_dev_data)
structure(list(time_point = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L), .Label = c("3", "8", "12"), class = "factor"),
days_to_pupation = c(135L, 142L, 143L, 155L, 149L, 159L,
153L, 171L, 9L, 67L, 53L, 49L, 72L, 67L, 55L, 64L, 60L, 122L,
53L, 51L, 49L, 53L, 50L, 56L, 44L, 47L, 60L)), row.names = c(1L,
2L, 3L, 4L, 5L, 6L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 18L, 20L, 21L, 22L, 23L, 24L, 26L, 27L, 28L, 29L, 30L), class = "data.frame")
and my code...
####################################################################################################
# Time to pupation statistical analysis
####################################################################################################
## linear model
pupation_Model=lm(sus_dev_data$days_to_pupation~sus_dev_data$time_point)
pupationANOVA=aov(pupation_Model)
summary(pupationANOVA)
# Tukey test to study each pair of treatment :
pupationTUKEY <- TukeyHSD(x=pupationANOVA, which = 'sus_dev_data$time_point',
conf.level=0.95)
## Function to generate significance labels on box plot
generate_label_df <- function(pupationTUKEY, variable){
# Extract labels and factor levels from Tukey post-hoc
Tukey.levels <- pupationTUKEY[[variable]][,4]
Tukey.labels <- data.frame(multcompLetters(Tukey.levels, reversed = TRUE)['Letters'])
#I need to put the labels in the same order as in the boxplot :
Tukey.labels$treatment=rownames(Tukey.labels)
Tukey.labels=Tukey.labels[order(Tukey.labels$treatment) , ]
return(Tukey.labels)
}
#generate labels using function
labels<-generate_label_df(pupationTUKEY , "sus_dev_data$time_point")
#rename columns for merging
names(labels)<-c('Letters','time_point')
# obtain letter position for y axis using means
pupationyvalue<-aggregate(.~time_point, data=sus_dev_data, max)
#merge dataframes
pupationfinal<-merge(labels,pupationyvalue)
####################################################################################################
# Time to pupation plot
####################################################################################################
# Plot of data
(pupation_plot <- ggplot(sus_dev_data, aes(time_point, days_to_pupation)) +
Alex_Theme +
geom_boxplot(fill = "grey80", outlier.size = 0.75) +
geom_text(data = pupationfinal, aes(x = time_point, y = days_to_pupation,
label = Letters),vjust=-2,hjust=.5, size = 4) +
#ggtitle(expression(atop("Days to pupation"))) +
labs(y = 'Days to pupation', x = 'Weeks post-hatch') +
scale_y_continuous(limits = c(0, 200)) +
scale_x_discrete(labels=c("3" = "13", "8" = "18",
"12" = "22")) +
geom_label_repel(aes(x = 1, y = 9),
label = '1')
)
Here's a shorter example to demonstrate what is going on. Essentially, your labels are beng recycled to be the same length as the data.
df = data.frame(x=1:5, y=1:5)
ggplot(df, aes(x,y, color=x)) +
geom_point() +
geom_label_repel(aes(x = 1, y = 1), label = '1')
You can override this by providing new data for the ggrepel
ggplot(df, aes(x,y, color=x)) +
geom_point() +
geom_label_repel(data = data.frame(x=1, y=1), label = '1')
Based on your data, you have 3 outliers (one in each group), you can manually identify them by applying the classic definition of outliers by John Tukey (Upper: Q3+1.5*IQR and Lower: Q1-1.5*IQR) (but you are free to set your own rules to define an outlier). You can use the function quantile and IQR to get those points.
Here, I incorporated them in a sequence of pipe using dplyr package:
library(tidyverse)
Outliers <- sus_dev_data %>% group_by(time_point) %>%
mutate(Out_up = ifelse(days_to_pupation > quantile(days_to_pupation,0.75)+1.5*IQR(days_to_pupation), "Out","In"))%>%
mutate(Out_Down = ifelse(days_to_pupation < quantile(days_to_pupation,0.25)-1.5*IQR(days_to_pupation), "Out","In")) %>%
filter(Out_up == "Out" | Out_Down == "Out")
# A tibble: 3 x 4
# Groups: time_point [3]
time_point days_to_pupation Out_up Out_Down
<fct> <int> <chr> <chr>
1 3 9 In Out
2 8 122 Out In
3 12 60 Out In
As mentioned by #dww, you need to pass a new dataframe to geom_label_repel if you want your outliers to be single labeled. So, here we use the dataframe Outliers to feed the geom_label_repel function:
library(ggplot2)
library(ggrepel)
ggplot(sus_dev_data, aes(time_point, days_to_pupation)) +
#Alex_Theme +
geom_boxplot(fill = "grey80", outlier.size = 0.75) +
geom_text(data = pupationfinal, aes(x = time_point, y = days_to_pupation,
label = Letters),vjust=-2,hjust=.5, size = 4) +
#ggtitle(expression(atop("Days to pupation"))) +
labs(y = 'Days to pupation', x = 'Weeks post-hatch') +
scale_y_continuous(limits = c(0, 200)) +
scale_x_discrete(labels=c("3" = "13", "8" = "18",
"12" = "22")) +
geom_label_repel(inherit.aes = FALSE,
data = Outliers,
aes(x = time_point, y = days_to_pupation, label = "Out"))
And you get the following graph:
I hope it helps you to figure it how to label all your outliers.
I would simply like to increase the font size of the title for a gganimate object. I used the following code but it didnt change the size of the title. I changed the title font using theme.
Libraries
library(ggplot2)
library(gganimate)
library(ggpubr)
Example Data
structure(list(subject = c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L
), treatment = c("a", "a", "a", "b", "b", "b", "a", "a", "a"),
time = c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), outcome1 = c(80L,
75L, 74L, 90L, 81L, 76L, 90L, 89L, 87L), outcome2 = c(15L,
14L, 12L, 16L, 15L, 15L, 17L, 14L, 12L)), class = "data.frame", row.names = c(NA,
-9L))
Example Code
ggplot(dat2, aes(x=treatment, y=outcome1, fill=treatment)) +
geom_violin() +
guides(fill = FALSE) +
scale_fill_manual(values=c("#00AFBB", "#FC4E07")) +
stat_compare_means(aes(label = ..p.format..), paired = FALSE, label.x.npc = 0.5) +
theme(plot.title = element_text(size = 20, face = "bold")) +
labs(title = 'Week: {frame_time}', x = 'Diet', y = 'Outcome1 (mm)') +
transition_time(time) +
ease_aes('linear')
animate(p1, duration = 12, fps = 1)
Thanks for reading!
I an answered my own question. Never mind, theme(plot.title = element_text(size = 20, face = "bold")) is working! Made a mistake earlier in printing the graph.
I am trying to add the values of R2 in scatter plots for several data set and also using facet_grid. So, I want to add several text (values of R2, each one for each data set) in each plot. I have been looking for similar examples, but I couldn't get the right approach, because I don't know how to set the x and y position for the text.
This a very short sample of my original data:
dput(test)
structure(list(code = c("AT0ENK1", "AT0ENK1", "AT0ENK1", "AT0ENK1",
"AT0ENK1", "AT0ENK1", "AT0ENK1", "AT0ENK1", "AT0ILL1", "AT0ILL1",
"AT0ILL1", "AT0ILL1", "AT0ILL1", "AT0ILL1", "AT0ILL1", "AT0ILL1"
), model = structure(c(2L, 2L, 2L, 2L, 6L, 6L, 6L, 6L, 2L, 2L,
2L, 2L, 6L, 6L, 6L, 6L), .Label = c("Obs", "EMEP", "LOTO", "MATCH",
"MINNI", "WRFF", "WRFM"), class = "factor"), O3 = c(118.037246704102,
105.963432312012, 102.795967102051, 107.245376586914,
101.879364013672,
124.914794921875, 129.386352539062, 115.475601196289,
96.2464294433594,
113.553771972656, 108.113143920898, 95.6128845214844,
104.497161865234,
111.243560791016, 121.166435241699, 118.756866455078), O3obs =
c(144.424,
151.726, 151.866, 139.439, 144.424, 151.726, 151.866, 139.439,
164.202, 171.715, 158.06, 137.473, 164.202, 171.715, 158.06,
137.473), r2 = c(0.485277006453918, 0.485277006453918,
0.485277006453918,
0.485277006453918, 0.277829662775301, 0.277829662775301,
0.277829662775301,
0.277829662775301, 0.0429530296631768, 0.0429530296631768,
0.0429530296631768,
0.0429530296631768, 0.0332266668960316, 0.0332266668960316,
0.0332266668960316,
0.0332266668960316)), .Names = c("code", "model", "O3", "O3obs",
"r2"), class = "data.frame", row.names = c(1L, 2L, 3L, 4L, 125L,
126L, 127L, 128L, 187L, 188L, 189L, 190L, 311L, 312L, 313L, 314L
))
And I tried it with:
ggplot( test, aes(O3obs,O3, group= model)) +
geom_point(aes(color=model),size=1)+xlim(0,200) + ylim (0,200) +
geom_abline(intercept = 0, slope = 1) + facet_wrap(~code) +
geom_text(data=test, aes(color = model, label = paste("R2: ", round(r2,2), sep="")), x=180, y=Inf, show.legend = F)
But the values of R2 are overlapped.
Any suggestion? How can I add the values of R2 for each data in each plot?
When you specify x and y in geom_text you are assigning the same coordinates for all the text so it would make sense that they overlap. I usually get around this by creating a data frame that has x and y coordinates for each group. For your data this could look like:
require(dplyr)
require(ggplot2)
new_data = test %>% group_by(code, model) %>% summarise(r2 = max(r2))
new_data$xposition = 40
new_data$yposition = c(200,170,200,170)
ggplot( test, aes(O3obs,O3, group= model))+
geom_point(aes(color=model),size=1)+xlim(0,200) + ylim (0,200) +
geom_abline(intercept = 0, slope = 1) + facet_wrap(~code) +
geom_text(data=new_data,aes(x = xposition, y = yposition, color=model, label = paste("R2: ",
round(r2,2),sep="")),show.legend = F)