Plot residuals in R [duplicate] - r

Given two variables, x and y, I run a dynlm regression on the variables and would like to plot the fitted model against one of the variables and the residual on the bottom showing how the actual data line differs from the predicting line. I've seen it done before and I've done it before, but for the life of me I can't remember how to do it or find anything that explains it.
This gets me into the ballpark where I have a model and two variables, but I can't get the type of graph I want.
library(dynlm)
x <- rnorm(100)
y <- rnorm(100)
model <- dynlm(x ~ y)
plot(x, type="l", col="red")
lines(y, type="l", col="blue")
I want to generate a graph that looks like this where you see the model and the real data overlaying each other and the residual plotted as a separate graph on the bottom showing how the real data and the model deviate.

This should do the trick:
library(dynlm)
set.seed(771104)
x <- 5 + seq(1, 10, len=100) + rnorm(100)
y <- x + rnorm(100)
model <- dynlm(x ~ y)
par(oma=c(1,1,1,2))
plotModel(x, model) # works with models which accept 'predict' and 'residuals'
and this is the code for plotModel,
plotModel = function(x, model) {
ymodel1 = range(x, fitted(model), na.rm=TRUE)
ymodel2 = c(2*ymodel1[1]-ymodel1[2], ymodel1[2])
yres1 = range(residuals(model), na.rm=TRUE)
yres2 = c(yres1[1], 2*yres1[2]-yres1[1])
plot(x, type="l", col="red", lwd=2, ylim=ymodel2, axes=FALSE,
ylab="", xlab="")
axis(1)
mtext("residuals", 1, adj=0.5, line=2.5)
axis(2, at=pretty(ymodel1))
mtext("observed/modeled", 2, adj=0.75, line=2.5)
lines(fitted(model), col="green", lwd=2)
par(new=TRUE)
plot(residuals(model), col="blue", type="l", ylim=yres2, axes=FALSE,
ylab="", xlab="")
axis(4, at=pretty(yres1))
mtext("residuals", 4, adj=0.25, line=2.5)
abline(h=quantile(residuals(model), probs=c(0.1,0.9)), lty=2, col="gray")
abline(h=0)
box()
}

what you're looking for is resid(model). Try this:
library(dynlm)
x <- 10+rnorm(100)
y <- 10+rnorm(100)
model <- dynlm(x ~ y)
plot(x, type="l", col="red", ylim=c(min(c(x,y,resid(model))), max(c(x,y,resid(model)))))
lines(y, type="l", col="green")
lines(resid(model), type="l", col="blue")

Related

Plotting multiple plots - rescaling of axes

I found, that axes were rescaled during multiple plotting using par(new=T) parameter.
An example to demonstrate this:
a <- seq(1,10, by = 0.25)
b <- sin(a)
c <- sin(2*a)+1
d <- sin(0.5*a)+2
df <- data.frame(a,b,c,d)
plot(df$a, df$b, type="l")
par(new=T)
plot(df$a, df$c, type="l", col="blue")
par(new=T)
plot(df$a, df$d, type="l", col="red")
This is the result.
Instead of real scales, I have a transformed curves.
And this is the real result:
I used parameters axes=F, xlab="", ylab="" and did not see this "rescaling".
I find it very dangerous, that it is so easy to transform the data during plotting if you do not control y-limits.
Are there better ways to control y-limits than looking for min and max values in all plotted data to avoid this "rescaling" effect?
I have several quite big files and each of them gives only one line from 10 in one plot and I have several plots on one page to compare my data.
The code for the last "correct" image:
plot(df$a, df$b, type="l", ylim=c(-1.5,3.5))
par(new=T)
plot(df$a, df$c, type="l", ylim=c(-1.5,3.5), col="blue", axes=F, xlab="", ylab="")
par(new=T)
plot(df$a, df$d, type="l", ylim=c(-1.5,3.5), col="red", axes=F, xlab="", ylab="")
#Create an empty plot with enough xlim and ylim to accomodate all data
plot(1, 1, xlim = range(df[,1]), ylim = range(df[,-1]), type = "n", ann = FALSE)
#Draw the three lines
lines(df$a, df$b)
lines(df$a, df$c, col="blue")
lines(df$a, df$d, col="red")

In R plotting line with different color above threshold limits

I have the following data and code in R:
x <- runif(1000, -9.99, 9.99)
mx <- mean(x)
stdevs_3 <- mx + c(-3, +3) * sd(x/5) # Statndard Deviation 3-sigma
And I plotted as line (alongwith 3 standard deviation and mean lines) in R:
plot(x, t="l", main="Plot of Data", ylab="X", xlab="")
abline(h=mx, col="red", lwd=2)
abline(h=stdevs_3, lwd=2, col="blue")
What I want to do:
Anywhere on the plot, whenever line is crossing 3 sigma thresholds (blue lines), above or below it, line should be in different color than black.
I tried this, but did not work:
plot(x, type="l", col= ifelse(x < stdevs_3[[1]],"red", "black"))
abline(h=mx, col="red", lwd=2)
abline(h=stdevs_3, lwd=2, col="blue")
Is there any other way?
This is what is requested, but it appears meaningless to me because of the arbitrary division of x by 5:
png( )
plot(NA, xlim=c(0,length(x)), ylim=range(x), main="Plot of Data", ylab="X", xlab="", )
stdevs_3 <- mx + c(-3, +3) * sd(x/5)
abline(h=mx, col="red", lwd=2)
abline(h=stdevs_3, lwd=2, col="blue")
segments( 0:999, head(x,-1), 1:1000, tail(x,-1) , col=c("black", "red")[
1+(abs(tail(x,-1)) > mx+3*sd(x/5))] )
dev.off()

Interpolation on a Curve in R

I have a dataset called dataframe (a 2d table) and a best fit curve as:
scatter.smooth(dataframe, xlab="", ylab="")
What code would I need to realize and evaluate (get numerical value of) a Y value on that best fit curve at a single x value?
Try
set.seed(1)
dataframe <- data.frame(x=runif(100), y=runif(100))
scatter.smooth(dataframe, xlab="", ylab="")
res <- with(dataframe, loess.smooth(x, y, evaluation = 200))
lengths(res)
# x y
# 200 200
x <- 0.5
y <- res$y[res$x>=x][1]
points(x, y, col="blue", pch = 19, cex=2)

Plot margins for support vector machine

i just set up a SVM in R with e1071.
Unfortunately the plot of the margin and die hyperplane does not look as desired. I wanted the margin to pass through the support vectors. Shouldnt this be the case?
Can anybody spot my mistake?
Here is my code:
rm(list=ls(all=TRUE))
x1s <- c(.5,1,1,2,3,3.5, 1,3.5,4,5,5.5,6)
x2s <- c(3.5,1,2.5,2,1,1.2, 5.8,3,4,5,4,1)
ys <- c(rep(+1,6), rep(-1,6))
my.data <- data.frame(x1=x1s, x2=x2s, type=as.factor(ys))
my.data
library('e1071')
svm.model <- svm(type ~ ., data=my.data, type='C-classification', kernel='linear',scale=FALSE)
plot(my.data[,-3],col=(ys+3)/2, pch=19, xlim=c(-1,6), ylim=c(-1,6))
points(my.data[svm.model$index,c(1,2)],col="blue",cex=2)
w <- t(svm.model$coefs) %*% svm.model$SV
b <- -svm.model$rho
p <- svm.model$SV
abline(a=-b/w[1,2], b=-w[1,1]/w[1,2], col="black", lty=1)
abline(a=--b/p[1,2], b=-w[1,1]/w[1,2], col="orange", lty=3)
abline(a=--b/p[3,2], b=-w[1,1]/w[1,2], col="orange", lty=3)
Your last 2 commands should be
abline(a=(-b-1)/w[1,2], b=-w[1,1]/w[1,2], col="orange", lty=3)
abline(a=(-b+1)/w[1,2], b=-w[1,1]/w[1,2], col="orange", lty=3)
Another way
plot(my.data[,-3],col=(ys+3)/2, pch=19, xlim=c(-1,6), ylim=c(-1,6))
points(my.data[svm.model$index,c(1,2)],col="blue",cex=2)
x1min = min(x1s); x1max = max(x1s);
x2min = min(x2s); x2max = max(x2s);
coef1 = sum(svm.model$coefs*x1s[svm.model$index]);
coef2 = sum(svm.model$coefs*x2s[svm.model$index]);
lines(c(x1min,x1max), (svm.model$rho-coef1*c(x1min, x1max))/coef2)
lines(c(x1min,x1max), (svm.model$rho+1-coef1*c(x1min, x1max))/coef2, lty=2)
lines(c(x1min,x1max), (svm.model$rho-1-coef1*c(x1min, x1max))/coef2, lty=2)

How do I plot multiple probability distributions side-by-side in R?

I want to plot several probability distributions side-ways (density on the x-axis, variable on y-axis). Each distribution will be associated with a different category, and I want them side-by-side so that I can compare between them. This is a bit like a box-plot but instead I want an theoretical probability distribution that I will specify giving parameters. So if they were all normal distributions, I would simply provide the mean and std deviation for each. Thanks.
do you mean something like this?
x <- seq(-10, 10, length=100)
normal.dist <- dnorm(x, 0, 2)
f.dist <- df(x, 3, 4)
t.dist <- dt(x, 3)
chi.dist <- dchisq(x,3)
par(mfrow=c(2,2))
plot(x, normal.dist, type='l', lty=1 )
plot(x, f.dist, type='l', lty=1, xlab="x value", col='blue')
plot(x, t.dist, type='l', lty=1, xlab="x value", col='red')
plot(x, chi.dist, type='l', lty=1, xlab="x value", col='green')
see also Roman Luštrik's very helpful link as well as the helfiles (e.g. ?dnorm).
Rotated axis
x <- seq(-10, 10, length=100)
normal.dist <- dnorm(x, 0, 1)
normal.dist2 <- dnorm(x, 0, 2)
normal.dist3 <- dnorm(x, 0, 3)
normal.dist4 <- dnorm(x, 0, 4)
par(mfrow=c(2,2))
plot(normal.dist, x, type='l', lty=1 )
plot(normal.dist2, x, type='l', lty=1, col='red' )
plot(normal.dist3, x, type='l', lty=1, col='green' )
plot(normal.dist4, x, type='l', lty=1, col='blue' )
You can set up a frame for plot display and specify how many plots you want to show in a frame using par(mfrow()), for example:
par(mfrow=c(2,2))
plot(first plot)
plot(second plot)
hist(third histogram)
boxplot(fourth boxplot)
See the following link for the full description:
http://www.statmethods.net/advgraphs/layout.html

Resources