i just set up a SVM in R with e1071.
Unfortunately the plot of the margin and die hyperplane does not look as desired. I wanted the margin to pass through the support vectors. Shouldnt this be the case?
Can anybody spot my mistake?
Here is my code:
rm(list=ls(all=TRUE))
x1s <- c(.5,1,1,2,3,3.5, 1,3.5,4,5,5.5,6)
x2s <- c(3.5,1,2.5,2,1,1.2, 5.8,3,4,5,4,1)
ys <- c(rep(+1,6), rep(-1,6))
my.data <- data.frame(x1=x1s, x2=x2s, type=as.factor(ys))
my.data
library('e1071')
svm.model <- svm(type ~ ., data=my.data, type='C-classification', kernel='linear',scale=FALSE)
plot(my.data[,-3],col=(ys+3)/2, pch=19, xlim=c(-1,6), ylim=c(-1,6))
points(my.data[svm.model$index,c(1,2)],col="blue",cex=2)
w <- t(svm.model$coefs) %*% svm.model$SV
b <- -svm.model$rho
p <- svm.model$SV
abline(a=-b/w[1,2], b=-w[1,1]/w[1,2], col="black", lty=1)
abline(a=--b/p[1,2], b=-w[1,1]/w[1,2], col="orange", lty=3)
abline(a=--b/p[3,2], b=-w[1,1]/w[1,2], col="orange", lty=3)
Your last 2 commands should be
abline(a=(-b-1)/w[1,2], b=-w[1,1]/w[1,2], col="orange", lty=3)
abline(a=(-b+1)/w[1,2], b=-w[1,1]/w[1,2], col="orange", lty=3)
Another way
plot(my.data[,-3],col=(ys+3)/2, pch=19, xlim=c(-1,6), ylim=c(-1,6))
points(my.data[svm.model$index,c(1,2)],col="blue",cex=2)
x1min = min(x1s); x1max = max(x1s);
x2min = min(x2s); x2max = max(x2s);
coef1 = sum(svm.model$coefs*x1s[svm.model$index]);
coef2 = sum(svm.model$coefs*x2s[svm.model$index]);
lines(c(x1min,x1max), (svm.model$rho-coef1*c(x1min, x1max))/coef2)
lines(c(x1min,x1max), (svm.model$rho+1-coef1*c(x1min, x1max))/coef2, lty=2)
lines(c(x1min,x1max), (svm.model$rho-1-coef1*c(x1min, x1max))/coef2, lty=2)
Related
Given two variables, x and y, I run a dynlm regression on the variables and would like to plot the fitted model against one of the variables and the residual on the bottom showing how the actual data line differs from the predicting line. I've seen it done before and I've done it before, but for the life of me I can't remember how to do it or find anything that explains it.
This gets me into the ballpark where I have a model and two variables, but I can't get the type of graph I want.
library(dynlm)
x <- rnorm(100)
y <- rnorm(100)
model <- dynlm(x ~ y)
plot(x, type="l", col="red")
lines(y, type="l", col="blue")
I want to generate a graph that looks like this where you see the model and the real data overlaying each other and the residual plotted as a separate graph on the bottom showing how the real data and the model deviate.
This should do the trick:
library(dynlm)
set.seed(771104)
x <- 5 + seq(1, 10, len=100) + rnorm(100)
y <- x + rnorm(100)
model <- dynlm(x ~ y)
par(oma=c(1,1,1,2))
plotModel(x, model) # works with models which accept 'predict' and 'residuals'
and this is the code for plotModel,
plotModel = function(x, model) {
ymodel1 = range(x, fitted(model), na.rm=TRUE)
ymodel2 = c(2*ymodel1[1]-ymodel1[2], ymodel1[2])
yres1 = range(residuals(model), na.rm=TRUE)
yres2 = c(yres1[1], 2*yres1[2]-yres1[1])
plot(x, type="l", col="red", lwd=2, ylim=ymodel2, axes=FALSE,
ylab="", xlab="")
axis(1)
mtext("residuals", 1, adj=0.5, line=2.5)
axis(2, at=pretty(ymodel1))
mtext("observed/modeled", 2, adj=0.75, line=2.5)
lines(fitted(model), col="green", lwd=2)
par(new=TRUE)
plot(residuals(model), col="blue", type="l", ylim=yres2, axes=FALSE,
ylab="", xlab="")
axis(4, at=pretty(yres1))
mtext("residuals", 4, adj=0.25, line=2.5)
abline(h=quantile(residuals(model), probs=c(0.1,0.9)), lty=2, col="gray")
abline(h=0)
box()
}
what you're looking for is resid(model). Try this:
library(dynlm)
x <- 10+rnorm(100)
y <- 10+rnorm(100)
model <- dynlm(x ~ y)
plot(x, type="l", col="red", ylim=c(min(c(x,y,resid(model))), max(c(x,y,resid(model)))))
lines(y, type="l", col="green")
lines(resid(model), type="l", col="blue")
I have the following data and code in R:
x <- runif(1000, -9.99, 9.99)
mx <- mean(x)
stdevs_3 <- mx + c(-3, +3) * sd(x/5) # Statndard Deviation 3-sigma
And I plotted as line (alongwith 3 standard deviation and mean lines) in R:
plot(x, t="l", main="Plot of Data", ylab="X", xlab="")
abline(h=mx, col="red", lwd=2)
abline(h=stdevs_3, lwd=2, col="blue")
What I want to do:
Anywhere on the plot, whenever line is crossing 3 sigma thresholds (blue lines), above or below it, line should be in different color than black.
I tried this, but did not work:
plot(x, type="l", col= ifelse(x < stdevs_3[[1]],"red", "black"))
abline(h=mx, col="red", lwd=2)
abline(h=stdevs_3, lwd=2, col="blue")
Is there any other way?
This is what is requested, but it appears meaningless to me because of the arbitrary division of x by 5:
png( )
plot(NA, xlim=c(0,length(x)), ylim=range(x), main="Plot of Data", ylab="X", xlab="", )
stdevs_3 <- mx + c(-3, +3) * sd(x/5)
abline(h=mx, col="red", lwd=2)
abline(h=stdevs_3, lwd=2, col="blue")
segments( 0:999, head(x,-1), 1:1000, tail(x,-1) , col=c("black", "red")[
1+(abs(tail(x,-1)) > mx+3*sd(x/5))] )
dev.off()
I have a dataset with actual and modelled values. I have split the dataset into two periods Jan 2003- Dec 2006 and Jan 2007- Dec 2012 and plotted trend lines - there are two trends lines for actual values (for 2 time periods) and 2 for modelled. I have attached a picture - I want to control the length of the lines so that they start and stop at the right time - but I cannot figure this out! I have attached the code that I have managed so far - I'm still fairly new.Also attached a picture in case the above is not clear. Thanks!
I'm not sure if there is a way I could put a vertical line at 2007 (Jan) and use this line as the reference to start and stop the respective lines?
plot(NULL, type="n", xlim=x.limit, ylim=c(-30, 30), xlab="Year", ylab="Equivalent Water Depth (cm)", axes=F, cex.lab=0.9)
box(lwd=1.5)
abline(h=0, col="gray50", lty=1, lwd=1)
axis(1, seq(2003, 2013, 1), cex.axis=0.7)
axis(2, seq(-40, 40, 10), las=1, cex.axis=0.7, tck=-0.03)
minor.tick(nx=4, ny=0, tick.ratio=0.5)
lines(tws.avg.VNB[,2] ~ tws.avg.VNB[,1], type="l", lwd=2, col=1)
tws.slope1 <- round(as.vector(coef(lm(SPLIT.1.ALL.VNB[,2] ~ SPLIT.1.ALL.VNB[,1]))[2]), 2)
tws.sdev1 <- round(as.vector(coef(summary(lm(SPLIT.1.ALL.VNB[,2] ~ SPLIT.1.ALL.VNB[,1])))[, "Std. Error"][2]), 2)
mtext(paste("GRACE Trend: 2003-2007 (cm/yr): ", tws.slope1, "±", tws.sdev1, sep=""), cex=0.5, side=1, line=-1.8, adj=0.15)
abline(lm(SPLIT.1.ALL.VNB[,2] ~ SPLIT.1.ALL.VNB[,1]), lwd=2, lty=2, col="deepskyblue")
tws.slope2 <- round(as.vector(coef(lm(SPLIT.2.ALL.VNB[,2] ~ SPLIT.2.ALL.VNB[,1]))[2]), 2)
tws.sdev2 <- round(as.vector(coef(summary(lm(SPLIT.2.ALL.VNB[,2] ~ SPLIT.2.ALL.VNB[,1])))[, "Std. Error"][2]), 2)
mtext(paste("GRACE Trend: 2007-2012 (cm/yr): ", tws.slope2, "±", tws.sdev2, sep=""), cex=0.5, side=1, line=-1.1, adj=0.15)
abline(lm(SPLIT.2.ALL.VNB[,2] ~ SPLIT.2.ALL.VNB[,1]), lwd=2, lty=2, col="deepskyblue")
lines(VNB.OBS.TWS[,1] ~ tws.avg.VNB[,1], type="l", lwd=2, col="red")
tws.slope3 <- round(as.vector(coef(lm(SPLIT.1.ALL.VNB[,6] ~ SPLIT.1.ALL.VNB[,1]))[2]), 2)
tws.sdev3 <- round(as.vector(coef(summary(lm(SPLIT.1.ALL.VNB[,6] ~ SPLIT.1.ALL.VNB[,1])))[, "Std. Error"][2]), 2)
mtext(paste("OBSERVED Trend: 2003-2007 (cm/yr): ", tws.slope3, "±", tws.sdev3, sep=""), cex=0.5, side=1, line=-1.8, adj=0.85)
abline(lm(SPLIT.1.ALL.VNB[,6] ~ SPLIT.1.ALL.VNB[,1]), lwd=2, lty=2, col="forestgreen")
tws.slope4 <- round(as.vector(coef(lm(SPLIT.2.ALL.VNB[,6] ~ SPLIT.2.ALL.VNB[,1]))[2]), 2)
tws.sdev4 <- round(as.vector(coef(summary(lm(SPLIT.2.ALL.VNB[,6] ~ SPLIT.2.ALL.VNB[,1])))[, "Std. Error"][2]), 2)
mtext(paste("OBSERVED Trend: 2007-2012 (cm/yr): ", tws.slope4, "±", tws.sdev4, sep=""), cex=0.5, side=1, line=-1.1, adj=0.85)
abline(lm(SPLIT.2.ALL.VNB[,6] ~ SPLIT.2.ALL.VNB[,1]), lwd=2, lty=2, col="forestgreen")
legend("bottomright", "(a)", bty="n", cex=0.8)
legend("top", legend=expression(Delta~TWS~(GRACE), GRACE~TREND, Delta~TWS~(OBSERVED), OBSERVED~TREND),
lty=c(1,4,1,4), lwd=c(2,2,2,2), col=c(1,"deepskyblue","red","forestgreen"),
bty="n", horiz=T, cex=0.6)
Look into the package zoo. Among many other features, it implements a new class specifically for time series that keeps track of the time base, and the plot.zoo method makes use of it. As a very, very minimal example, you can try something like a following:
a <- zoo(rnorm(5), 1:5)
b <- zoo(rpois(5, 1), 1:5)
plot(cbind(a, b))
A base R solution is also pretty simple:
a <- rnorm(5)
b <- rpois(5, 1)
plot(a ~ 1:5, xlim = c(0, 10))
points(b ~ 6:10)
I am doing a polynomial regression in R for the following data but I cannot display the correct graph of the polynomial of 2rd degree. I got the equation of polynomial of degree 2 right, however I did something wrong in the last part of the script. Could anyone help? Thanks
Here is my script:
Vegetation_Cover <- c(5,0,10,40,100,30,80,2,70,2,0)
NDVI <- c(0.35,0.32,0.36,0.68,0.75,0.48,0.75,0.35,0.70,0.34,0.28)
plot(Vegetation_Cover,NDVI, main=list
("Vegetation Cover and NDVI",cex=1.5),pch=20,cex=1.4,col="gray0")
sample1 <- data.frame(Vegetation_Cover, NDVI)
sample1
fit2 <- lm(sample1$NDVI ~ sample1$Vegetation_Cover + I(sample1$Vegetation_Cover^2))
summary(fit2)
lm(formula = sample1$NDVI ~ sample1$Vegetation_Cover + I(sample1$Vegetation_Cover^2))
anova(fit2)
pol2 <- function(x) fit2$coefficient[3]*x^2 + fit2$coefficient[2]*x + fit2$coefficient[1]
plot(sample1$Vegetation_Cover, sample1$NDVI, type="p", lwd=3)
pol2 <- function(x) fit2$coefficient[3]*x^2 + fit2$coefficient[2]*x + fit2$coefficient[1]
curve(pol2, col="red", lwd=2)
points(sample1$Vegetation_Cover, sample1$NDVI, type="p", lwd=3)
It looks like you're just missing add=TRUE for your call to curve. This seems to plot what you're looking for:
pol2 <- function(x) fit2$coefficient[3]*x^2 + fit2$coefficient[2]*x + fit2$coefficient[1]
plot(sample1$Vegetation_Cover, sample1$NDVI, type="p", lwd=3)
curve(pol2, col="red", lwd=2, add=T)
In the 4th example of layout function, which can be generated by example(layout),
I want to overlay the line plot on the barplot of samples from a normal distriubtion.
I tried lines(), plot( , add=TRUE), but with no luck.
How can I do that? Or do I have to take a different route from using layout?
Here is one approach:
library(TeachingDemos)
x <- pmin(3, pmax(-3, stats::rnorm(50)))
y <- pmin(3, pmax(-3, stats::rnorm(50)))
xhist <- hist(x, breaks=seq(-3,3,0.5), plot=FALSE)
yhist <- hist(y, breaks=seq(-3,3,0.5), plot=FALSE)
top <- max(c(xhist$density, yhist$density))
xrange <- c(-3,3)
yrange <- c(-3,3)
nf <- layout(matrix(c(2,0,1,3),2,2,byrow=TRUE), c(3,1), c(1,3), TRUE)
layout.show(nf)
par(mar=c(3,3,1,1))
plot(x, y, xlim=xrange, ylim=yrange, xlab="", ylab="")
par(mar=c(0,3,1,1))
bx.out <- barplot(xhist$density, axes=FALSE, ylim=c(0, top), space=0)
updateusr( bx.out[1:2], 0:1, xhist$mids[1:2], 0:1 )
xdens <- density(x)
lines(xdens$x, xdens$y, col='blue')
par(mar=c(3,0,1,1))
by.out <- barplot(yhist$density, axes=FALSE, xlim=c(0, top), space=0, horiz=TRUE)
updateusr( 0:1, by.out[1:2], 0:1, yhist$mids[1:2] )
ydens <- density(y)
lines(ydens$y, ydens$x, col='blue')
Note the change from counts to density so that the "heights" of the bars will match with the density and the use of updateusr from the TeachingDemos package to match the coordinate systems. Instead of updateusr you could also specify width and xlim to the barplot function.