Use SVM predict raster file in R - r

I am new to R. And I already have a SVM model in R. Right now, I have two raster image, one is the elevation, another one is the slope. The elevation and slope would be used as the predictors for SVM. And I also want to plot the results as a map.
Right now my code is as follow, but the predict for the two raster image input return all 0. It should be 0 or 1. Anything wrong?
library("e1071")
tornado=read.csv(file="~/Desktop/new.csv",header=TRUE,sep=",")
err<- rep(0,5)
m<-0
for (i in c(1:5)) {
#split the data sets into testing and training
training.indices <- sample(nrow(tornado), 1800)
training <- rep(FALSE, nrow(tornado))
training[training.indices] <- TRUE
tornado.input<- tornado[training,]
tornado.input=data.frame(tornado.input)
tornado=data.frame(tornado)
tornado$Sig <- factor(tornado$Sig)
model <- svm(Sig~slope+elevation, data=tornado.input)
pred<- predict(model, tornado[!training,] )
ConfM1<- table(tornado$Sig[!training], pred=pred)
err[i]<-(sum(ConfM1)-sum(diag(ConfM1)))/sum(ConfM1)
}
library("raster")
library("rgdal")
elevation <- raster("~/Desktop/elevation.tif")
slope<- raster("~/Desktop/slope.tif")
#plot(elevation)
#plot(slope)
logo <- brick(elevation, slope)
r1 <- predict(logo,model)
plot(r1)

Maybe it is a bit late to answer this question but I have had the same issue. The raster::predict function does not seem to provide the same output as stats:predict.
My alternative solution is simply to extract the values from your predictor rasters (slope and elevation), then use ggplot to project the results spatially.
####Convert raster into dataframe
logo_df <- as.data.frame(values(logo))
logo_df[c("x","y")] <- coordinates(logo)
logo_df <- logo_df[complete.cases(logo_df),] # in case you had holes in your raster
#### predict to this new data
pred <- predict(model, logo_df, probability = T)
logo_df$svm.fit <- attr(pred, "probabilities")[,2]
###map the predictions
ggplot(logo_df, aes(x,y,fill=svm.fit)) +
geom_tile() +
scale_fill_gradientn(colours = rev(colorRamps::matlab.like(100))) +
coord_fixed()

I was having this problem and found that when I renamed the layers of my RasterStack to be their variable names, and added the type option, it worked!
e.g.
names(logo)<-c("elevation","slope")
r1<-predict(logo,model,type="response")

Related

R: How to plot custom range of polynomial produced by lm poly fit

I'm confused by the coefficients produced by the output of lm
Here's a copy of the data I'm working with
(postprocessed.csv)
"","time","value"
"1",1,2.61066016308988
"2",2,3.41246054742996
"3",3,3.8608767964033
"4",4,4.28686048552237
"5",5,4.4923132964825
"6",6,4.50557049744317
"7",7,4.50944447661246
"8",8,4.51097373134893
"9",9,4.48788748823809
"10",10,4.34603985656981
"11",11,4.28677073671406
"12",12,4.20065901625172
"13",13,4.02514194962519
"14",14,3.91360194972916
"15",15,3.85865748409081
"16",16,3.81318053258601
"17",17,3.70380706527433
"18",18,3.61552922363713
"19",19,3.61405310598722
"20",20,3.64591327503384
"21",21,3.70234435835577
"22",22,3.73503970503372
"23",23,3.81003078640584
"24",24,3.88201196162666
"25",25,3.89872518158949
"26",26,3.97432743542362
"27",27,4.2523675144599
"28",28,4.34654855854847
"29",29,4.49276038902684
"30",30,4.67830892029687
"31",31,4.91896819673664
"32",32,5.04350767355202
"33",33,5.09073406942046
"34",34,5.18510849382162
"35",35,5.18353176529036
"36",36,5.2210776270173
"37",37,5.22643491929207
"38",38,5.11137006553725
"39",39,5.01052467981257
"40",40,5.0361056705898
"41",41,5.18149486951409
"42",42,5.36334869132276
"43",43,5.43053620818444
"44",44,5.60001072279525
I have fitted a 4th order polynomial to this data using the following script:
library(ggplot2)
library(matrixStats)
library(forecast)
df_input <- read.csv("postprocessed.csv")
x <- df_input$time
y <- df_input$value
df <- data.frame(x, y)
poly4model <- lm(y~poly(x, degree=4), data=df)
v <- seq(30, 40)
vv <- poly4model$coefficients[1] +
poly4model$coefficients[2] * v +
poly4model$coefficients[3] * (v ^ 2) +
poly4model$coefficients[4] * (v ^ 3) +
poly4model$coefficients[5] * (v ^ 4)
pdf("postprocessed.pdf")
plot(df)
lines(v, vv, col="red", pch=20, lw=3)
dev.off()
I initially tried using the predict function to do this, but couldn't get that to work, so resorted to implementing this "workaround" using some new vectors v and vv to store the data for the line in the region I am trying to plot.
Ultimatly, I am trying to do this:
Fit a 4th order polynomial to the data
Plot the 4th order polynomial over the range of data in one color
Plot the 4th order polynomial over the range from the last value to the last value + 10 (prediction) in a different color
At the moment I am fairly sure using v and vv to do this is not "the best way", however I would have thought it should work. What is happening is that I get very large values.
Here is a screenshot from Desmos. I copied and pasted the same coefficients as shown by typing poly4model$coefficients into the console. However, something must have gone wrong because this function is nothing like the data.
I think I've provided enough info to be able to run this short script. However I will add the pdf as well.
It is easiest to use the predict function to create your line. To do that, you pass the model and a data frame with the desired independent variables to the predict function.
x <- df_input$time
y <- df_input$value
df <- data.frame(x, y)
poly4model <- lm(y~poly(x, degree=4), data=df)
v <- seq(30, 40)
#Notice the column in the dataframe is the same variable name
# as the variable in the model!
predict(poly4model, data.frame(x=v))
plot(df)
lines(v, predict(poly4model, data.frame(x=seq(30, 40))), col="red", pch=20, lw=3)
NOTE
The function poly "Returns or evaluates orthogonal polynomials of degree 1 to degree over the specified set of points x: these are all orthogonal to the constant polynomial of degree 0." To return the "normal" polynomial coefficients one needs to use the "raw=TRUE" option in the function.
poly4model <- lm(y~poly(x, degree=4, raw=TRUE), data=df)
Now your equation above will work.

Loop to plot multiple ROC curves in one unique plot using ROCR

I am using ROCR package to generate ROC curves. I already have a loop to generate multiple ROC plots from multiple files. I have 30 files. But I would like to combine all 30 ROC curves in one plot (different colors if possible). I know that there are few posts about it, but I would like to use my own loop and modify it. Any suggestions?
My script:
library(ROCR)
labels <- read.table(file="c:/data1/input")
files <- list.files(path="c:/data2/", pattern="*.txt")
for(i in files){
predictions <- read.table(paste("c:/data2/",i,sep=""))
pred <- prediction(predictions, labels)
perf <- performance(pred,"tpr","fpr")
pdf(paste0("c:/data2/", i,"ROC.pdf"))
plot(perf)
dev.off()
}
When you call plot() on a prediction object, it generates a new plot. One way is to use lines() and to call the values directly. First I generate 30 models, since you did not provide your data:
library(gbm)
library(mlbench)
library(colorspace)
data(DNA)
Pal = qualitative_hcl(30)
dat = DNA[,-c(1:2)]
dat$Class = as.numeric(dat$Class == "ei")
idx = split(sample(nrow(dat)),1:nrow(dat) %% 30)
mdl = lapply(1:30,function(i){
mod = gbm(Class~.,data=dat[idx[[i]],])
predictions = predict(mod,dat[-idx[[i]],],n.trees=10,type="response")
labels = dat[-idx[[i]],"Class"]
list(labels=labels,predictions=predictions)
})
names(mdl) = paste("model",1:30)
Now we start a empty plot, iterate through the 30 models, essentially the part you need is lines(...) :
plot(NULL,xlim=c(0,1),ylim=c(0,1),
xlab="False positive rate",ylab="True positive rate")
for(i in 1:30){
predictions = mdl[[i]]$predictions
labels = mdl[[i]]$labels
pred = prediction(predictions, labels)
perf <- performance(pred,"tpr","fpr")
lines(perf#x.values[[1]],perf#y.values[[1]],col=Pal[i])
}
legend("bottomright",fill=Pal,names(mdl),ncol=5,cex=0.7)
For your example trying something like this:
files <- list.files(path="c:/data2/", pattern="*.txt")
mdl = lapply(files,read.table)
names(mdl) = gubs(".txt","",files)
plot(NULL,xlim=c(0,1),ylim=c(0,1),
xlab="False positive rate",ylab="True positive rate")
for(i in 1:30){
pred = prediction(mdl[[i]], labels)
perf <- performance(pred,"tpr","fpr")
lines(perf#x.values[[1]],perf#y.values[[1]],col=Pal[i])
}
legend("bottomright",fill=Pal,names(mdl),ncol=5,cex=0.7)

Remove linear trend from raster stack R

Trying remove the linear trend (detrend) from a monthly precipitation raster stack for the US from 1979-2015 (https://www.northwestknowledge.net/metdata/data/monthly/pr_gridMET.nc). These data are large enough that using those data as an example would be a bit unruly here so I am going to use the data from the raster package for sake of efficiency. The working model I have currently is to use `raster"::calc`` on a linear model and pull the residuals. My understanding is that those residuals are the detrended series, but I am not 100% sure that is correct. The code I am using is as follows:
library(raster)
fn <- raster(system.file("external/test.grd", package="raster"))
fn2 <- fn+1000
fn3 <- fn +500
fn4 <- fn +750
fn5 <- fn+100
fns <- stack(fn, fn2, fn3, fn4, fn5)
time <- 1:nlayers(fns)
# Get residuals to detrend the raw data
get_residuals <- function(x) {
if (is.na(x[1])){
rep(NA, length(x)) }
else {
m <- lm(x~time)
q <- residuals(m)
return(q)
}
}
detrended_fns <- calc(fns, get_residuals) # Create our residual (detrended) time series stack
I feel like I'm missing something here. Can anyone confirm that I'm on the right track here? If I'm not any suggestions on how to properly detrend these data would be helpful! thanks!
The residuals remove the slope and the intercept and you get anomalies. Perhaps you only want to remove the slope? In that case you could add the intercept to the residuals in get_residuals
q <- residuals(m) + coefficients(m)[1]
Or better:
q <- residuals(m) + predict(m)[1]
So that you use year 1 (and not year 0) as the base, and it would also work if time is, say, 2000:2004
You could also take the last year, mid year, or average as base.

Bootstrapping regression coefficients from random subsets of data

I’m attempting to perform a regression calibration on two variables using the yorkfit() function in the IsoplotR package. I would like to estimate the confidence interval of the bootstrapped slope coefficient from this model; however, instead of using the typical bootstrap method below, I’d like to only perform the iterations on 75% of the data (randomly selected) at a time. So far, using the following sample data, I managed to bootstrap the slope coefficient result of the yorkfit() function:
library(boot)
library(IsoplotR)
X <- c(9.105,8.987,8.974,8.994,8.996,8.966,9.035,9.215,9.239,
9.307,9.227,9.17, 9.102)
Y <- c(28.1,28.9,29.6,29.5,29.0,28.8,28.5,27.3,27.1,26.5,
27.0,27.5,28.4)
n <- length(X)
sX <- X*0.02
sY <- Y*0.05
rXY <- rep(0.8,n)
dat <- cbind(X,sX,Y,sY,rXY)
fit <- york(dat)
boot.test <- function(data,indices){
sample = data[indices,]
mod = york(sample)
return (mod$b)
}
result <- boot(data=dat, statistic = boot.test, R=1000)
boot.ci(result, type = 'bca')
...but I'm not really sure where to go from here. Any help to move me in the right direction would be greatly appreciated. I’m new to R so I apologize if question is ambiguous. Thanks.
Based on the package documentation, you should be able to use the ran.gen argument, with sim="parametric", to sample using a custom function. In this case, the sample is a certain percent of the total observations, chosen at random. Something like the following should accomplish what you want:
result <- boot(
data=dat,
statistic =boot.test,
R=1000,
sim="parametric",
ran.gen=function(data, percent){
n=nrow(data)
indic=runif(n)
data[rank(indic, ties.method="random")<=round(n*percent,0),]
},
percent=0.75)

Problems with points and apply R for linear discriminant analysis

I have some coding question, which arise doing some exercises in linear discriminant analysis. We are using the Iris data:
## Read in dataset, set seed, load package
Iris <- iris[,-(1:2)]
grIris <- as.integer(iris[,"Species"])
set.seed(16)
library(MASS)
## Read n
n <- nrow(Iris)
As you can see, we delte the first and second column of iris. What I want to do is a bootstrap for this data using linear discriminant analysis, here is my code:
ind <- replicate(B,sample(seq(1:n),n,replace=TRUE))
This generates the indices I want to use. Note B is some large number, e.g. 1000. Now I want to use apply, but why does the following code doesn't work?
bst.sample <- apply(ind,2,lda(Species~Petal.Length+Petal.Width,data=Iris[ind,]))
where Species, Petal.Length etc. are the data from iris. If I use a for loop everything works fine, but of course I would like to implement in this more elegant way.
My second question is about points. I also wanted to calculate the estimated means, which I've done by the following code
est.lda <- vector("list",B)
est.qda <- vector("list",B)
mu_hat_1 <- mu_hat_2 <- mu_hat_3 <- matrix(0,ncol=B,nrow=2)
for (i in 1:B){
est.lda[[i]] <- lda(Species~Petal.Length+Petal.Width,data=Iris[ind[,i],])
mu_hat_1[,i] <- est.lda[[i]]$means[1,]
mu_hat_2[,i] <- est.lda[[i]]$means[2,]
mu_hat_3[,i] <- est.lda[[i]]$means[3,]
est.qda[[i]] <- qda(Species~Petal.Length+Petal.Width,data=Iris[ind[,i],])
}
plot(mu_hat_1[1,],mu_hat_1[2,],pch=4)
points(mu_hat_2[1,],mu_hat_2[2,],pch=4,col=2)
points(mu_hat_3[1,],mu_hat_3[2,],pch=4,col=3)
The plot at the end should show three region with the expected mean of the three classes. However just the first plot is shown.
Thank you for your help.
B <- 10
ind <- replicate(B,sample(seq(1:n),n,replace=TRUE))
#you need to pass a function to apply
bst.sample <- apply(ind,2,
function(i) lda(Species~Petal.Length+Petal.Width,data=Iris[i,]))
#extract means
bst.means <- lapply(bst.sample,function(x) x$means)
#bind means into array
library(abind)
bst.means <- do.call(function(...) abind(..., along=3), bst.means)
#you need to make sure that alle points are inside the axis limits
plot(bst.means[1,1,],bst.means[1,2,],
xlim=range(bst.means[,1,]), ylim=range(bst.means[,2,]),
xlab=dimnames(bst.means)[[2]][1],ylab=dimnames(bst.means)[[2]][2],
col=1)
points(bst.means[2,1,],bst.means[2,2,], col=2)
points(bst.means[3,1,],bst.means[3,2,], col=3)
legend("topleft", legend=dimnames(bst.means)[[1]], col=1:3, pch=1)

Resources