Plot logistic regression using parameters in ggplot2 - r

I would like to plot a logistic regression directly from the parameter estimates using ggplot2, but not quite sure how to do it.
For example, if I had 1500 draws of alpha and beta parameter estimates, I could plot each of the lines thus:
alpha_post = rnorm(n=1500,mean=1.1,sd = .15)
beta_post = rnorm(n=1500,mean=1.8,sd = .19)
X_lim = seq(from = -3,to = 2,by=.01)
for (i in 1:length(alpha_post)){
print(i)
y = exp(alpha_post[i] + beta_post[i]*X_lim)/(1+ exp(alpha_post[i] + beta_post[i]*X_lim) )
if (i==1){plot(X_lim,y,type="l")}
else {lines(X_lim,y,add=T)}
}
How would I do this in ggplot2? I know how to use geom_smooth(), but this is a little different.

As always in ggplot, you want to make a data.frame with all data that needs to be plotted:
d <- data.frame(
alpha_post = alpha_post,
beta_post = beta_post,
X_lim = rep(seq(from = -3,to = 2,by=.01), each = length(alpha_post))
)
d$y <- with(d, exp(alpha_post + beta_post * X_lim) / (1 + exp(alpha_post + beta_post * X_lim)))
Then the plotting itself becomes quite easy:
ggplot(d, aes(X_lim, y, group = alpha_post)) + geom_line()
If you want to be more fancy, add a summary line with e.g. the mean:
ggplot(d, aes(X_lim, y)) +
geom_line(aes(group = alpha_post), alpha = 0.3) +
geom_line(size = 1, color = 'firebrick', stat = 'summary', fun.y = 'mean')

Related

How to add R^2 and regression values to multi-factorial design in ggplot2

I have a two x two design. I need to add the R2 and regression values for each factor -- color coded on to the graph. I used partially used this answer to modify the code for this problem, but I still obtain only one regression line. Also, the regression equations are not printing clearly. I need four regression equations color-coded.
fertilizer <- c("N","N","N","N","N","N","N","N","N","N","N","N","P","P","P","P","P","P","P","P","P","P","P","P","N","N","N","N","N","N","N","N","N","N","N","N","P","P","P","P","P","P","P","P","P","P","P","P")
level <- c("low","low","high","high","low","low","high","high","low","low","high","high","low","low","high","high","low","low","high","high","low","low","high","high","low","low","high","high","low","low","high","high","low","low","high","high","low","low","high","high","low","low","high","high","low","low","high","low")
growth <- c(0,0,1,2,90,5,2,5,8,55,1,90,2,4,66,80,1,90,2,33,56,70,99,100,66,80,1,90,2,33,0,0,1,2,90,5,2,2,5,8,55,1,90,2,4,66,0,0)
repro <- c(1,90,2,4,66,80,1,90,2,33,56,70,99,100,66,80,1,90,2,33,0,0,1,2,90,5,2,2,5,8,55,1,90,2,4,66,0,0,0,0,1,2,90,5,2,5,8,55)
df <- data.frame(fertilizer, level, growth, repro)
lm_eqn = function(df){
m = lm(growth ~ repro, df);
eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2,
list(a = format(coef(m)[1], digits = 2),
b = format(coef(m)[2], digits = 2),
r2 = format(summary(m)$r.squared, digits = 3)))
as.character(as.expression(eq));
}
eq <- ddply(df,.(fertlizer + level),lm_eqn)
ggplot(df, aes(x=growth, y=repro, color = fertilizer)) + theme_bw() + geom_point(aes(colour = factor(fertilizer)), size = 0.1,alpha = 0.3) +
geom_smooth(method='lm',se=FALSE, aes(colour = factor(fertilizer)), formula = y ~ x)+ scale_color_manual(values=c("#E69F00", "#1B9E77")) +
facet_wrap(.~level, scales = "free") + theme(legend.position = "none") + theme(aspect.ratio = 1.75/1) + geom_text(data=eq,aes(x = 50, y = 25,label=V1), parse = TRUE, inherit.aes=FALSE, size = 2)
There are a lot of ways to get to non-overlapping, this is very basic and very much manual.
Add a new column to eq for mapping with geom_text(aes(y = y_pos)), instead of the constant used currently.
eq$y_pos <- c(24, 36, 8, 24)
ggplot(df, aes(x=growth, y=repro, color = fertilizer)) +
geom_smooth(method='lm',se=FALSE, aes(colour = factor(fertilizer)), formula = y ~ x) +
geom_point(aes(colour = factor(fertilizer)), size = 0.1,alpha = 0.3) +
# change here
geom_text(data=eq,aes(x = 50, y = y_pos, label=V1), parse = TRUE, inherit.aes=FALSE, size = 2) +
# ----
scale_color_manual(values=c("#E69F00", "#1B9E77")) +
facet_wrap(.~level, scales = "free") +
theme_bw() +
theme(legend.position = "none",
aspect.ratio = 1.75/1)
Maybe a more elegant and flexible solution is to extract the model's intercept and set that value as the y-position for each equation. Or you could extract the model value at a given x-value and use that.
Happy to share one of those if it helps, but lots of time for publication plots I fall back to manual text placement, just like this.

How can I add a different legend for different quantiles in the same graph in ggplot2?

ggplotRegression <- function (fit) {
require(ggplot2)
ggplot(fit$model, aes_string(x = names(fit$model)[2], y =
names(fit$model)[1])) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
labs(title = paste("Adj R2 = ",signif(summary(fit)$adj.r.squared,
5),
"Intercept =",signif(fit$coef[[1]],5 ),
" Slope =",signif(fit$coef[[2]], 5),
" P =",signif(summary(fit)$coef[2,4], 5)))
}
taus <-c(0.05 , 0.25, 0.50 , 0.75, 0.90 , 0.95)
m <- ggplotRegression( lm(formula = BMI ~ height_in_m
+weight_in_kg+ Highest_Education_level +
wealth_index + age_in_year_groups, data = dat_new))
m+geom_quantile(quantiles=taus, lwd=1.5 , col="green4",
fill=taus)
Now I want to add specific colours for each quantiles and also add spcific legend for each quantiles .
Many ggplot statistics let you use the results of the calculation enclosed in .., for example with geom_density you can use ..count.. in the aes.
With geom_quantile you can use ..quantile..
df <- data_frame(x = rnorm(100), y = rnorm(100))
ggplot(df, aes(x, y)) +
geom_point() +
geom_quantile(aes(colour = as.factor(..quantile..)))
The trick is to find out what these variables are called. Geoms that need to calculate statistics, such as geom_quantile and geom_density, have an associated ggproto object such as StatQuantile and StatDensity which has the code for the calculations in an element called compute_group.
The last command of StatQuantile$compute_group is
plyr::ldply(quantiles, quant_pred, data = data, method = method,
formula = formula, weight = weight, grid = grid, method.args = method.args)
The function here, quant_pred - which you can see with ggplot2:::quant_pred, returns a list. The components of this list, including quantile, can be used in the aes.

How do I get the equation for a regression line in log-log plot in ggplot2?

I've a log-log plot, I got the regression line by using:
geom_smooth(formula = y ~ x, method='lm')
But now I'd like to obtain the equation of this line (e.g. y=a*x^(-b)) and print it. I managed to get it in a lin-lin plot but not in this case.
Here's the code:
mydataS<-data.frame(DurPeak_h[],IntPeak[],IntPeakxDurPeak[],ID[]) #df peak
names(mydataS)<-c("x","y","ID","IDEVENT")
plotID<-ggplot(mydataS, aes(x=x, y=y, label=IDEVENT)) +
geom_text(check_overlap = TRUE, hjust = 0, nudge_x = 0.02)+
geom_point(colour="black", size = 2) + geom_point(aes(colour = ID)) +
geom_quantile(quantiles = qs, colour="green")+
scale_colour_gradient(low = "white", high="red") +
scale_x_log10(limits = c(min(DurEnd_h),max(DurEnd_h))) +
scale_y_log10(limits = c(min(IntEnd),max(IntEnd))) +
geom_smooth(formula = y ~ x, method='lm')
ggsave(height=7,"plot.pdf")
mydataS<-data.frame(DurPeak_h[],IntPeak[],IntPeakxDurPeak[],ID[])
names(mydataS)<-c("x","y","ID","IDEVENT")
model <- lm(y~x, header = T)
summary(model)
use the intercept value given as "b" and the coefficient as your "a"
Did it with a workaround: using nls to calculate the two parameters a and b, precisely:
nlsPeak <- coef(nls(y ~ a*(x)^b, data = mydataS, start = list(a=30, b=-0.1)))
then plotting the line with annotate (see some examples here) and finally printing the equation using the function:
power_eqn = function(ds){
m = nls(y ~ a*x^b, start = list(a=30, b=-0.1), data = ds);
eq <- substitute(italic(y) == a ~italic(x)^b,
list(a = format(coef(m)[1], digits = 4),
b = format(coef(m)[2], digits = 2)))
as.character(as.expression(eq));
}
called as follow:
annotate("text",x = 3, y = 180,label = power_eqn(mydataS), parse=TRUE, col="black") +
Hope it helps!

Add datapoints to existing scatterplot

I have an existing ggplot2 scatterplot which shows the results of a parameter against from normal database. I then want to add two additional points to this graph which I would pass as command line arguments to my script script age value1 value2. I would like to show these points as red with an r and l geom_text above each point. I have the following code so far but do not know how to add the finishing touches
pkgLoad <- function(x)
{
if (!require(x,character.only = TRUE))
{
install.packages(x,dep=TRUE, repos='http://star-www.st-andrews.ac.uk/cran/')
if(!require(x,character.only = TRUE)) stop("Package not found")
}
}
pkgLoad("ggplot2")
#load current normals database
df<-data.frame(read.csv("dat_normals.txt", sep='\t', header=T))
args<-commandArgs(TRUE)
#specify what each argument is
age <- args[1]
rSBR <- args[2]
lSBR <- args[3]
# RUN REGRESSION AND APPEND PREDICTION INTERVALS
lm_fit = lm(SBR ~ Age, data = df)
sbr_with_pred = data.frame(df, predict(lm_fit, interval='prediction'))
p <- ggplot(sbr_with_pred, aes(x=Age, y=SBR)) +
geom_point(shape=19, alpha=1/4) +
geom_smooth(method = 'lm', aes(fill = 'confidence'), alpha = 0.5) +
geom_ribbon(aes(y = fit, ymin = lwr, ymax = upr,
fill = 'prediction'), alpha = 0.2) +
scale_fill_manual('Interval', values = c('green', 'blue')) +
theme_bw() +
theme(legend.position = "none")
ggsave(filename=paste("/home/data/wolf/FV_DAT/dat_results.png",sep=""))
browseURL(paste("/home/data/wolf/FV_DAT/dat_results.png",sep""))
Essentially, I want to see if the 2 new points fall within the 95% confidence intervals from the normal database (blue ribbon)
Your example is not reproducible. It is really constructive to create data and reproducible example. It is not a waste of time. For the solution, I write what it is said in the comment. You add a new layer with new data.
newdata <- data.frame(Age = args[1],
SBR = c(args[2],args[3]))
p + geom_point(data=newdata,colour="red",size=10)
For example:
sbr_with_pred <- data.frame(Age = sample(15:36,50,rep=T),
SBR = rnorm(50))
p <- ggplot(sbr_with_pred, aes(x=Age, y=SBR)) +
geom_point(shape=19, alpha=1/4) +
geom_smooth(method = 'lm', aes(fill = 'confidence'), alpha = 0.5)
args <- c(20,rnorm(1),rnorm(2))
newdata <- data.frame(Age = args[1],
SBR = c(args[2],args[3]))
p + geom_point(data=newdata,colour="red",size=10)

Computing weighted average using lowess mthod in R

I am trying to use the lowess method from R to compute the weighted average of a data set which is not uniformly distributed along x axis. For example, the first 5 data points are like this, where the first column is the x and the second is the y.
375.0 2040.0
472.0 5538.0
510.0 4488.0
573.0 2668.0
586.0 7664.0
I used the following command in R:
x<-read.table(add,header=FALSE,sep="\t")
y<-lowess(x[,1],x[,2],f=0.01)
write.table(y, file = results , sep = "\t", col.names =FALSE, row.names =FALSE)
The output looks like this:
The green line shows the average computed by the smooth function in matlab (tri-cubic kernel), and the red line is the average line computed by lowess method in R. The blue dots are the data points.
I can't find why the method in R does not work. Do you have any idea?
Here is a link to part of the data.
Thanks a lot for your help.
Th smooth function in matlab is like a filter ,
yy = smooth(y)
yy(1) = y(1)
yy(2) = (y(1) + y(2) + y(3))/3
yy(3) = (y(1) + y(2) + y(3) + y(4) + y(5))/5 ## convolution of size 5
yy(4) = (y(2) + y(3) + y(4) + y(5) + y(6))/5
I think it is better to do a simple smooth here.
Here some attempts using loess, lowesss with f = 0.2(1/5) and using smooth.spline
I am using ggplot2 to plot ( to use geom_jitter with some alpha )
library(ggplot2)
dat <- subset(data, V2 < 5000)
#dat <- data
xy <- lowess(dat$V1,dat$V2,f = 0.8)
xy <- as.data.frame(do.call(cbind,xy))
p1<- ggplot(data = dat, aes(x= V1, y = V2))+
geom_jitter(position = position_jitter(width = .2), alpha= 0.1)+
geom_smooth()
xy <- lowess(dat$V1,dat$V2,f = 0.2)
xy <- as.data.frame(do.call(cbind,xy))
xy.smooth <- smooth.spline(dat$V1,dat$V2)
xy.smooth <- data.frame(x= xy.smooth$x,y = xy.smooth$y)
p2 <- ggplot(data = dat, aes(x= V1, y = V2))+
geom_jitter(position = position_jitter(width = .2), alpha= 0.1)+
geom_line(data = xy, aes(x=x, y = y, group = 1 ), color = 'red')+
geom_line(data = xy.smooth, aes(x=x, y = y, group = 1 ), color = 'blue')
library(gridExtra)
grid.arrange(p1,p2)

Resources