I am trying to loop my multiple linear regression plot and summaries, but I keep encountering an error in R that states Error: More than one expression parsed. I am not sure how to fix this or if there is a better way to achieve what I want to do which is mainly:
Plot a multiple linear regression plot with Group as the colour
Get summary for each of the linear regression lines based on Group
Compute regression summary
Perform anova to determine differences
colNames <- names(df)[c(35:39)]
for(i in colNames){
plt <- ggplot(df,
aes_string(x=df$MachineLength, y=i, fill=df$Group, color=be_nlyl$Group)) +
geom_smooth(method=lm) +
geom_point(size = 2, alpha=0.7) +
labs(title="Machine", subtitle = "Machine Type") +
theme_bw() +
theme(plot.title = element_text(hjust=0.5, face="bold"),
plot.subtitle = element_text(hjust=0.5))
print(plt)
lm_A <- lm(formula = i ~ MachineLength, data = subset(be_nlyl, Group == "A"))
summary(lm_A) %>% print()
lm_B <- lm(formula = i ~ MachineLength, data = subset(be_nlyl, Group == "B"))
summary(lm_B) %>% print()
clz.lm <- lm(formula = i ~ Group + MachineLength + Group:MachineLength, data = df)
summary(clz.lm) %>% print()
ano.lm <- Anova(lm(i ~ MachineLength*Group, data = df))
print(ano.lm)
}
Anyone have ideas of how to implement above? Thank you!
Try the following :
Create lists of length colNames to store all the outputs so that instead of just printing the output we can store them as well.
Use for loop over the index of colNames instead of actual column names so that you can use that as an index to store the output for different objects.
aes_string has be deprecated so we use .data pronoun to pass column name as variable.
Use sprintf to create formula string which is passed in lm function.
library(ggplot2)
colNames <- names(df)[c(35:39)]
plt <- vector('list', length(colNames))
lm_A <- vector('list', length(colNames))
summary_lm_A <- vector('list', length(colNames))
summary_lm_B <- vector('list', length(colNames))
lm_B <- vector('list', length(colNames))
clz.lm <- vector('list', length(colNames))
summary_clz.lm <- vector('list', length(colNames))
ano.lm <- vector('list', length(colNames))
for(i in seq_along(colNames)) {
var <- colNames[i]
plt[[i]] <- ggplot(df, aes(MachineLength, .data[[var]], fill= Group, color= Group)) +
geom_smooth(method=lm) +
geom_point(size = 2, alpha=0.7) +
labs(title="Machine", subtitle = "Machine Type") +
theme_bw() +
theme(plot.title = element_text(hjust=0.5, face="bold"),
plot.subtitle = element_text(hjust=0.5))
lm_A[[i]] <- lm(sprintf('%s~MachineLength', var), data = subset(df, Group == "A"))
summary_lm_A[[i]] <- summary(lm_A[[i]])
lm_B[[i]] <- lm(sprintf('%s~MachineLength', var), data = subset(df, Group == "B"))
summary_lm_B[[i]] <- summary(lm_B[[i]])
clz.lm[[i]] <- lm(sprintf('%s~Group + MachineLength + Group:MachineLength', var), data = df)
summary_clz.lm[[i]] <- summary(clz.lm[[i]])
ano.lm[[i]] <- Anova(lm(sprintf('%s~MachineLength*Group', var), data = df))
}
Related
I am running the analysis for scRNA trajectory and sligshot lineage analysis and it gives me the error Error in as.vector(x) : no method for coercing this S4 class to a vector
attaching the code used
``
library(dplyr)
library(ggbeeswarm)
library(slingshot)
library(SingleCellExperiment)
sshot <- list()
sshot[[1]] <- readRDS("out/20210118-slingshot_nt_97_99_0.3.Rds")
for (i in 1:length(sshot) ) {
sc_exp <- sshot[[i]]
coldata <- as.data.frame(colData(sc_exp))
tmp <- coldata %>% select(starts_with("slingPseudotime"))}
tmp$seurat_clusters <- coldata$seurat_clusters
slingshot_lineage <- file.path("out", paste0("slingshot_lineage_", i, ".pdf"))}
my_color <- hue_pal()(length(levels(
as.factor(as.factor(sc_exp$seurat_clusters)))) + 1)}
long_tmp <- gather(tmp,key, value, -seurat_clusters)
long_tmp <- sample_frac(long_tmp, 0.5)
g <- ggplot(long_tmp, aes(x=value, y=key)) + geom_quasirandom(groupOnX = FALSE, aes(colour = factor(seurat_clusters)), alpha=1) + scale_colour_manual(values = my_color , name = "Clusters") + theme_classic() + theme(text = element_text(size = 14)) + xlab("Slingshot pseudotime") + ylab("Lineages")
ggtitle(paste0("Cells ordered by Slingshot pseudotime")) + scale_y_discrete(labels=c("1","2","3") )
ggsave(slingshot_lineage,g)
pdf2png(slingshot_lineage)
}
ln <- slingLineages(sshot[[1]])
lntype here
`
I was expecting to get the slingshot lineage plots and instead i get the error
I am just a few months into using R and this is my first post. I am looking to create a function that takes fields from a data frame, filters the outliers via quantiles, then writes the regression parameters as annotations on a scatter plot. The filtering and plotting work correctly but I get an error in the linear model. Can I convert those fields to execute in the model?
Error message:
Error in model.frame.default(formula = df[, field1] ~ df[, field2], drop.unused.levels = TRUE) :
invalid type (list) for variable 'df[, field1]'
Here is the function:
scatter_filtered <- function(df,field1,field2,field3) {
range1 <- quantile(df[, field1], probs= c(0.1,0.9), na.rm=TRUE)
range2 <- quantile(df[, field2], probs= c(0.1,0.9), na.rm=TRUE)
low_end1 <- range1[1]
high_end1 <- range1[2]
low_end2 <- range2[1]
high_end2 <- range2[2]
df %>%
filter(df[, field1] > low_end1, df[, field1] < high_end1,
df[, field2] > low_end2, df[, field2] < high_end2) %>%
model <- lm(df[,field1] ~ df[, field2])
r_output <- round(glance(model)$r.squared, digits = 5)
r_adj_output <- round(glance(model)$adj.r.squared, digits = 5)
p_output <- round(glance(model)$p.value, digits = 5) %>%
ggplot(aes_string(x = field1, y = field2, color = field3)) +
geom_point() +
geom_smooth(method="lm", se=FALSE)
# annotate("text", label = paste("r_sq:",r_output), x=0.1, y=0.1, parse=TRUE) +
# annotate("text", label = paste("p-val:",p_output), x=0.1, y=0.1, parse=TRUE)
}
I need to create some gam plots in ggplot. I can do them with the general plot function, but am unsure how to do with ggplot. Here is my code and plots with the regular plot function. I'm using the College data set from the ISLR package.
train.2 <- sample(dim(College)[1],2*dim(College)[1]/3)
train.college <- College[train.2,]
test.college <- College[-train.2,]
gam.college <- gam(Outstate~Private+s(Room.Board)+s(Personal)+s(PhD)+s(perc.alumni)+s(Expend)+s(Grad.Rate), data=train.college)
par(mfrow=c(2,2))
plot(gam.college, se=TRUE,col="blue")
See update below old answer.
Old answer:
There is an implementation of GAM plotting using ggplot2 in voxel library. Here is how you would go about it:
library(ISLR)
library(mgcv)
library(voxel)
library(tidyverse)
library(gridExtra)
data(College)
set.seed(1)
train.2 <- sample(dim(College)[1],2*dim(College)[1]/3)
train.college <- College[train.2,]
test.college <- College[-train.2,]
gam.college <- gam(Outstate~Private+s(Room.Board)+s(Personal)+s(PhD)+s(perc.alumni)+s(Expend)+s(Grad.Rate), data=train.college)
vars <- c("Room.Board", "Personal", "PhD", "perc.alumni","Expend", "Grad.Rate")
map(vars, function(x){
p <- plotGAM(gam.college, smooth.cov = x) #plot customization goes here
g <- ggplotGrob(p)
}) %>%
{grid.arrange(grobs = (.), ncol = 2, nrow = 3)}
after a bunch of errors: In plotGAM(gam.college, smooth.cov = x) :
There are one or more factors in the model fit, please consider plotting by group since plot might be unprecise
To compare to the plot.gam:
par(mfrow=c(2,3))
plot(gam.college, se=TRUE,col="blue")
You might also want to plot the observed values:
map(vars, function(x){
p <- plotGAM(gam.college, smooth.cov = x) +
geom_point(data = train.college, aes_string(y = "Outstate", x = x ), alpha = 0.2) +
geom_rug(data = train.college, aes_string(y = "Outstate", x = x ), alpha = 0.2)
g <- ggplotGrob(p)
}) %>%
{grid.arrange(grobs = (.), ncol = 3, nrow = 2)}
or per group (especially important if you used the by argument (interaction in gam).
map(vars, function(x){
p <- plotGAM(gam.college, smooth.cov = x, groupCovs = "Private") +
geom_point(data = train.college, aes_string(y = "Outstate", x = x, color= "Private"), alpha = 0.2) +
geom_rug(data = train.college, aes_string(y = "Outstate", x = x, color= "Private" ), alpha = 0.2) +
scale_color_manual("Private", values = c("#868686FF", "#0073C2FF")) +
theme(legend.position="none")
g <- ggplotGrob(p)
}) %>%
{grid.arrange(grobs = (.), ncol = 3, nrow = 2)}
Update, 08. Jan. 2020.
I currently think the package mgcViz offers superior functionality compared to the voxel::plotGAMfunction. An example using the above data set and models:
library(mgcViz)
viz <- getViz(gam.college)
print(plot(viz, allTerms = T), pages = 1)
plot customization is similar go ggplot2 syntax:
trt <- plot(viz, allTerms = T) +
l_points() +
l_fitLine(linetype = 1) +
l_ciLine(linetype = 3) +
l_ciBar() +
l_rug() +
theme_grey()
print(trt, pages = 1)
This vignette shows many more examples.
I am new to R and have been trying to figure this out for a while. Basically, I have a data frame, and various y variables. I am trying to write a function that will allow me to come up with a customized graph template for the many different y variables that I have. I am trying the following code below but I am met with this error:
1: In eval(expr, envir, enclos) : NAs introduced by coercion
2: In aes_string(xvar[max(which(complete.cases(yvar)))], yvar[max(which(complete.cases(yvar)))], :
NAs introduced by coercion
The code works if I add the variables in directly and not through a function. I believe that it is something to do with how the function plugs in the xvar into the as.numeric() function. I am not sure but any of you knows how to deal with this?
test <- function (Data, xvar, yvar){
# Plot data
plot <- ggplot(subset(Data,!is.na((yvar))), aes_string(xvar, yvar)) + geom_line(colour="darkblue") + theme_bw()
# Add Trendline for recent data
plot <- plot + geom_smooth(data=subset(Data, xvar > as.numeric(xvar)[max(which(complete.cases(yvar)))-8]), method = "lm")
# Label most recent data
plot + geom_text(data = Data, aes_string(xvar[max(which(complete.cases(yvar)))],
yvar[max(which(complete.cases(yvar)))],
label = as.numeric(yvar)[max(which(complete.cases(yvar)))],
hjust= -0.5, vjust = 0.5))
As xvar is probably (you do not show a reproducible example) a character vector of length 1, subsetting like xvar[] will not yield the desired result.
You could try something like
library(ggplot2)
f <- function(data, xvar, yvar) {
ggplot(data, aes_string(xvar, yvar)) +
geom_point() +
geom_smooth(data=subset(data, eval(parse(text=xvar)) > 5), method = "lm")
}
or
f <- function(data, xvar, yvar) {
ggplot(data, aes_string(xvar, yvar)) +
geom_point() +
geom_smooth(data = data[data[, xvar]>5, ], method = "lm")
}
f(mtcars, "cyl", "disp")
I think #LukeA has gotten you practically all the way there, but here is an example that uses your data and adds a few more columns to help demonstrate how you can pass column names into ggplot inside your own function.
It uses your variable names. It subsets your data into a data.frame with non-missing values for y, and then it subsets your data into a separate data.frame that allows you to add additional filtering criteria to your smoothing function.
library(zoo)
set.seed(72)
X1 <- as.yearqtr(seq(as.Date("2010/3/1"), by = "quarter", length.out = 10))
Y1 <- as.vector(c(124,315,363,574,345,434,141,512,142,647))
Y2 <- sample(Y1)
Y3 <- sample(Y1)
Data1 <- data.frame(X1, Y1, Y2, Y3)
plot_function <- function(data, xvar, yvar){
# remove rows with NA on yvar
mydata1 <- data[!is.na(data[, yvar]), ]
# remove rows with NA on yvar and subset yvar above some threshold
mydata2 <- data[!is.na(data[, yvar]) & data[, yvar] > 400, ]
# plot it
myplot <- ggplot(mydata1, aes_string(xvar, yvar)) +
geom_line(colour="darkblue") +
scale_x_yearqtr(limits = c(min(mydata1[, xvar]), max(mydata1[, xvar])), format = "%YQ%q") +
geom_smooth(data = mydata2, aes_string(xvar, yvar), method = "lm") +
geom_text(data = mydata1, aes_string(xvar, yvar, label = yvar), hjust= -0.5, vjust = 0.5) +
theme_bw()
return(myplot)
}
plot_function(data = Data1, xvar = "X1", yvar = "Y1")
plot_function(data = Data1, xvar = "X1", yvar = "Y2")
plot_function(data = Data1, xvar = "X1", yvar = "Y3")
I have a data frame created the following way.
library(ggplot2)
x <- data.frame(letters[1:10],abs(rnorm(10)),abs(rnorm(10)),type="x")
y <- data.frame(letters[1:10],abs(rnorm(10)),abs(rnorm(10)),type="y")
# in reality the number of row could be larger than 10 for each x and y
all <- rbind(x,y)
colnames(all) <- c("name","val1","val2","type")
What I want to do is to create a faceted ggplot that looks roughly like this:
Hence each facet above is the correlation plot of the following:
# Top left facet
subset(all,type=="x")$val1
subset(all,type=="y")$val1
# Top right facet
subset(all,type=="x")$val1
subset(all,type=="y")$val2
# ...etc..
But I'm stuck with the following code:
p <- ggplot(all, aes(val1, val2))+ geom_smooth(method = "lm") + geom_point() +
facet_grid(type ~ )
# Calculate correlation for each group
cors <- ddply(all, c(type ~ ), summarise, cor = round(cor(val1, val2), 2))
p + geom_text(data=cors, aes(label=paste("r=", cor, sep="")), x=0.5, y=0.5)
What's the right way to do it?
Some of your code was incorrect. This works for me:
p <- ggplot(all, aes(val1, val2))+ geom_smooth(method = "lm") + geom_point() +
facet_grid(~type)
# Calculate correlation for each group
cors <- ddply(all, .(type), summarise, cor = round(cor(val1, val2), 2))
p + geom_text(data=cors, aes(label=paste("r=", cor, sep="")), x=1, y=-0.25)
Edit: Following OP's comment and edit. The idea is to re-create the data with all four combinations and then facet.
# I consider the type in your previous data to be xx and yy
dat <- data.frame(val1 = c(rep(all$val1[all$type == "x"], 2),
rep(all$val1[all$type == "y"], 2)),
val2 = rep(all$val2, 2),
grp1 = rep(c("x", "x", "y", "y"), each=10),
grp2 = rep(c("x", "y", "x", "y"), each=10))
p <- ggplot(dat, aes(val1, val2)) + geom_point() + geom_smooth(method = "lm") +
facet_grid(grp1 ~ grp2)
cors <- ddply(dat, .(grp1, grp2), summarise, cor = round(cor(val1, val2), 2))
p + geom_text(data=cors, aes(label=paste("r=", cor, sep="")), x=1, y=-0.25)
Since your data is not in the appropriate format, some reshaping is necessary before it can be plotted.
Firstly, reshape the data to the long format:
library(reshape2)
allM <- melt(all[-1], id.vars = "type")
Split the values along type and val1 vs. val2:
allList <- split(allM$value, interaction(allM$type, allM$variable))
Create a list of all combinations:
allComb <- unlist(lapply(c(1, 3),
function(x)
lapply(c(2 ,4),
function(y)
do.call(cbind, allList[c(x, y)]))),
recursive = FALSE)
Create a new dataset:
allNew <- do.call(rbind,
lapply(allComb, function(x) {
tmp <- as.data.frame(x)
tmp <- (within(tmp, {xval <- names(tmp)[1];
yval <- names(tmp)[2]}))
names(tmp)[1:2] <- c("x", "y")
tmp}))
Plot:
library(ggplot2)
p <- ggplot(allNew, aes(x = x, y = y)) +
geom_smooth(method = "lm") +
geom_point() +
facet_grid(yval ~ xval)
# Calculate correlation for each group
library(plyr)
cors <- ddply(allNew, .(yval, xval), summarise, cor = round(cor(x, y), 2))
p + geom_text(data=cors, aes(label=paste("r=", cor, sep="")), x=0.5, y=0.5)
There is an additional package ggpubr available now addressing exactly this issue with the stat_cor() function.
library(tidyverse)
library(ggpubr)
ggplot(all, aes(val1, val2))+
geom_smooth(method = "lm") +
geom_point() +
facet_grid(~type) +
stat_cor()