Related
I am trying to run corLocal on 2 stacks (average temperatures, day of the year for spring- over a 17 year period. I.e. 17 tiff files for temp and 17 tiff files for day of the year). I've used the following line
p<-corLocal(stack1,stack2,method="kendall") ##or pearson
I would like to get the p value and sens slope value as 2 separate rasters but I am not sure what my output is - it ranges between -0.5 and 0.5. Thank you,
p<-corLocal(stack1,stack2,method="kendall")
p value and slope value 2 separate rasters files
Example data
library(terra)
set.seed(0)
s <- r <- rast(ncol=10, nrow=10, nlyr=17)
values(r) <- runif(size(r))
values(s) <- runif(size(s))
sr <- sds(r,s)
To get the Kendall correlation coefficient for each cell (across the 17 layers).
ken <- lapp(sr, \(x,y) {
out <- rep(NA, nrow(x))
for (i in 1:nrow(x)) {
out[i] <- cor(x[i,], y[i,], "kendall", use="complete.obs")
}
out
})
And to get the p-value
pken <- lapp(sr, \(x,y) {
out <- rep(NA, nrow(x))
for (i in 1:nrow(x)) {
out[i] <- cor(x[i,], y[i,], "kendall", use="complete.obs")
out[i] <- cor.test(x[i,], y[i,], method="kendall", use="complete.obs")$p.value)
}
out
})
For completeness: the corLocal method (called focalPairs in "terra") can be usd to compute the focal correlation between layers.
library(terra)
r <- rast(system.file("ex/logo.tif", package="terra"))
set.seed(0)
r[[1]] <- flip(r[[1]], "horizontal")
r[[2]] <- flip(r[[2]], "vertical") + init(rast(r,1), runif)
r[[3]] <- init(rast(r,1), runif)
Kendall correlation coefficient and p-value
x <- focalPairs(r, w=5, \(x, y) cor(x, y, "kendall", use="complete.obs"))
y <- focalPairs(r, w=5, \(x, y) cor.test(x, y, method="kendall", use="complete.obs")$p.value)
I'm trying to run the following function mentioned below using OptimParallel in R on a certain data set. The code is as follows:
install.packages("optimParallel")
install.packages('parallel')
library(parallel)
library(optimParallel)
library(doParallel)
library(data.table)
library(Rlab)
library(HDInterval)
library(mvtnorm)
library(matrixStats)
library(dplyr)
library(cold)
## Bolus data:
data("bolus")
d1 <- bolus
d1$group <- ifelse(d1$group == "2mg",1,0)
colnames(d1) <- c("index",'group',"time","y")
d2 <- d1 %>% select(index, y, group, time)
colnames(d2) <- c('index','y','x1','x2') ### Final data
## Modification of the objective function:
## Another approach:
dpd_poi <- function(x,fixed = c(rep(FALSE,5))){
params <- fixed
dpd_1 <- function(p){
params[!fixed] <- p
alpha <- params[1]
beta_0 <- params[2]
beta_1 <- params[3]
beta_2 <- params[4]
rho <- params[5]
add_pi <- function(d){
k <- beta_0+(d[3]*beta_1)+(d[4]*beta_2)
k1 <- exp(k) ## for Poisson regression
d <- cbind(d,k1)
}
dat_split <- split(x , f = x$index)
result <- lapply(dat_split, add_pi)
result <- rbindlist(result)
result <- as.data.frame(result)
colnames(result) <- c('index','y','x1','x2','lamb')
result_split <- split(result, f = result$index)
expression <- function(d){
bin <- as.data.frame(combn(d$y , 2))
pr <- as.data.frame(combn(d$lamb , 2))
## Evaluation of the probabilities:
f_jk <- function(u,v){
dummy_func <- function(x,y){
ppois(x, lambda = y)
}
dummy_func_1 <- function(x,y){
ppois(x-1, lambda = y)
}
k <- mapply(dummy_func,u,v)
k_1 <- mapply(dummy_func_1,u,v)
inv1 <- inverseCDF(as.matrix(k), pnorm)
inv2 <- inverseCDF(as.matrix(k_1), pnorm)
mean <- rep(0,2)
lower <- inv2
upper <- inv1
corr <- diag(2)
corr[lower.tri(corr)] <- rho
corr[upper.tri(corr)] <- rho
prob <- pmvnorm(lower = lower, upper = upper, mean = mean, corr = corr)
prob <- (1+(1/alpha))*(prob^alpha)
## First expression: (changes for Poisson regression)
lam <- as.vector(t(v))
v1 <- rpois(1000, lambda = lam[1])
v2 <- rpois(1000, lambda = lam[2])
all_possib <- as.data.frame(rbind(v1,v2))
new_func <- function(u){
k <- mapply(dummy_func,u,v)
k_1 <- mapply(dummy_func_1,u,v)
inv1_1 <- inverseCDF(as.matrix(k), pnorm)
inv2_1 <- inverseCDF(as.matrix(k_1), pnorm)
mean1 <- rep(0,2)
lower1 <- inv2_1
upper1 <- inv1_1
corr1 <- diag(2)
corr1[lower.tri(corr1)] <- rho
corr1[upper.tri(corr1)] <- rho
prob1 <- pmvnorm(lower = lower1, upper = upper1, mean = mean1, corr = corr1)
prob1 <- prob1^(alpha)
}
val <- apply(all_possib, 2, new_func)
val_s <- mean(val) ## approximation
return(val_s - prob)
}
final_res <- mapply(f_jk, bin, pr)
final_value <- sum(final_res)
}
u <- sapply(result_split,expression)
return(sum(u))
}
}
## run the objective function:
cl <- makeCluster(25)
setDefaultCluster(cl=cl)
clusterExport(cl,c('d2','val'))
clusterEvalQ(cl,c(library(data.table), library(Rlab),library(HDInterval),library(mvtnorm),library(matrixStats),library(dplyr),library(cold)))
val <- dpd_poi(d2, c(0.5,FALSE,FALSE,FALSE,FALSE))
optimParallel(par = c(beta_0 =1, beta_1 =0.1 ,beta_2 = 1,rho=0.2),fn = val ,method = "L-BFGS-B",lower = c(-10,-10,-10,0),upper = c(Inf,Inf,Inf,1))
stopCluster(cl)
After running for some time, it returns the following error:
checkForRemoteErrors(val)
9 nodes produced errors; first error: missing value where TRUE/FALSE needed
However, when I make a minor change in the objective function (pick 2 random numbers from rpois instead of 1000) and run the same code using optim, it converges and gives me a proper result. This is a Monte Carlo simulation and it does not make sense to draw so few Poisson variables. I have to use optimParllel, otherwise, it takes way too long to converge. I could also run this code using simulated data.
I'm unable to figure out where the issue truly lies. I truly appreciate any help in this regard.
I have a data set and want to essentially fit a linear model with a rolling time window, find the fitted values and calculate the errors in the estimate. I have functions which calculate the error and I have the start of the algorithm, but I keep getting null time series with the algorithm below. Can anybody spot a fix for it?
rollerOLS <- function(data, measure, predict, predictor){
error <- c()
m <- dim(data)[1]
for(i in 1:(floor(m/142)-10)){
data.new <- as.data.frame(data[c((1+(142*(i-1))):((i+9)*142)),])
data.pred <- as.data.frame(data[c((1+(142*(i+9))):((i+10)*142)-1),])
n <- dim(data.new)[1]
k <- dim(data.pred)[1]
x <- data.new[-1,predictor]
y <- data.new[-n, predict]
mod <- lm(y ~ x)
ts <- predict.lm(mod, newdata = data.frame(data.pred[, predictor]), interval="none")
actual <- data.pred[-k,predict]
error[i] <- measure(ts, actual)
}
return(mod)
}
Note that 142 is specific to my data set.
The problem was in the ts line and here is the fix.
rollerOLS <- function(data, measure, predict, predictor){
error <- c()
m <- dim(data)[1]
for(i in 1:(floor(m/142)-10)){
data.new <- as.data.frame(data[c((1+(142*(i-1))):((i+9)*142)),])
data.pred <- as.data.frame(data[c((1+(142*(i+9))):((i+10)*142)-1),])
n <- dim(data.new)[1]
k <- dim(data.pred)[1]
x <- data.new[-1,predictor]
y <- data.new[-n, predict]
mod <- lm(y ~ x)
ts <- mod$coefficients[1] + mod$coefficients[2]*data.pred[-1,predictor]
actual <- data.pred[-k,predict]
error[i] <- measure(ts, actual)
}
return(error)
}
I have used the svm function in the e1071 package of R software to model my data using variables selected by my feature selection method. I have obtained predictions from this model using the predict.svm function in the same package. I want to compute the value of the objective function of the svm model using the R software. How can I do this?
Below is my code for my first feature selection technique-Information Gain
P1<-Fold1T$Class_NASQ
InfGainF1 <- information.gain(P1~., Fold1T[,-20])
subset <- cutoff.k(InfGainF1, 8)
f <- as.simple.formula(subset, "P1")
ModelInGF1<-svm(as.factor(P1)~ NSDQ.COMP+S.P.100+S.P.500+NYSE.COMP+NYSE.A.M.MKT +
RSEL.2000+ALL.ORD+HG.SENG ,data=Fold1T[,-20], kernel="radial",gamma=0.5,cost=16)
PredictInGF1<-predict(ModelInGF1,NewData=Fold1V[,-20])
######### Accuracy ########
confusionMatrix(PredictInGF1, P1)
Thanks
While learning about SVR back in 2010 I explored how predicted values are computed. To do this, I went over the file "svminternals.pdf" located in the e1071/doc subfolder and play my custom code (shown after the toy data) using the following data set
ToyData <- data.frame(X1=c(12.4,14.6,13.4,12.9,15.2,13.6,9.2), X2=c(2.1,9.2,1.9,0.8,1.1,8.6,1.1),Y=c(14.2,16.9,15.5,14.7,17.3,16,10.9))
You may explore the following code to see if is somehow helpful to you.
#LINEAR KERNEL
ToyData <- read.csv("ToyData.csv", header=T)
X <- as.matrix(ToyData[,1:2])
Y <- as.vector(ToyData[,3])
SVRLinear <- svm (X, Y, kernel="linear", epsilon=0.1, cost=1, scale=FALSE)
V <- as.matrix(SVRLinear$SV)
Vt <- t(V)
A <- as.matrix(SVRLinear$coefs)
(r <- SVRLinear$rho)
write.csv(V, file="SVLinear.csv")
write.csv(A, file="CoefsLinear.csv")
F <- (X %*% Vt) %*% A - r
write.csv(F, file="FittedLinear.csv")
#RBF KERNEL: Exp[(-gamma||x-z||^2)/2]
ToyData <- read.csv("ToyData.csv", header=T)
X <- as.matrix(ToyData[,1:2])
Y <- as.vector(ToyData[,3])
SVRRadial <- svm (X, Y, kernel="radial", epsilon=0.1, gamma=0.1, cost=5, scale=FALSE)
V <- as.matrix(SVRRadial$SV)
A <- as.matrix(SVRRadial$coefs)
(g <- SVRRadial$gamma)
(r <- SVRRadial$rho)
write.csv(V, file="SVRadial.csv")
write.csv(A, file="CoefsRadial.csv")
Kernel <- matrix(0, nrow(X), nrow(V))
for (i in 1:nrow(X)) {
for (j in 1:nrow(V)) {
Xi <- X[i,]
Vj <- V[j,]
XiMinusVj <- Xi - Vj
SumSqXiMinusVj <- XiMinusVj %*% XiMinusVj
Kernel[i,j] <- exp(-g*SumSqXiMinusVj)
}
}
F <- Kernel %*% A - r
write.csv(F, file="FittedRadial.csv")
I want to add the answer how to reproduce the predict value with the model parameter when scale option is open.In e1071,data are default scaled internally (both x and y variables) to zero mean and unit variance. The center and scale values are returned and used for later predictions.(http://www.inside-r.org/node/57517). According to the above code,I write the following code which may help to you.
ToyData <- data.frame(X1=c(12.4,14.6,13.4,12.9,15.2,13.6,9.2), X2=c(2.1,9.2,1.9,0.8,1.1,8.6,1.1),Y=c(14.2,16.9,15.5,14.7,17.3,16,10.9))
X <- as.matrix(ToyData[,1:2])
Y <- as.vector(ToyData[,3])
SVRRadial <- svm (X, Y, kernel="radial", epsilon=0.1, gamma=0.1, cost=5)
pred<-predict(SVRRadial,X)
toys<-ToyData
#scale the feature
sc_x<-data.frame(SVRRadial$x.scale)
for(col in row.names(sc_x)){
toys[[col]]<-(ToyData[[col]]-sc_x[[col,1]])/sc_x[[col,2]]
}
#compute the predict value, the method is same to the above code
X<-as.matrix(toys[,1:2])
V <- as.matrix(SVRRadial$SV)
A <- as.matrix(SVRRadial$coefs)
g <- SVRRadial$gamma
r <- SVRRadial$rho
Kernel <- matrix(0, nrow(X), nrow(V))
for (i in 1:nrow(X)) {
for (j in 1:nrow(V)) {
Xi <- X[i,]
Vj <- V[j,]
XiMinusVj <- Xi - Vj
SumSqXiMinusVj <- XiMinusVj %*% XiMinusVj
Kernel[i,j] <- exp(-g*SumSqXiMinusVj)
}
}
F <- Kernel %*% A - r
#restore the predict value from standard format to original format
my_pred<-F
sc_y<-data.frame(SVRRadial$y.scale)
my_pred<-my_pred*sc_y[[2]]+sc_y[[1]]
summary(my_pred-pred)
reference link:How to reproduce predict.svm in R?
I am just really getting into trying to write MLE commands in R that function and look similar to native R functions. In this attempt I am trying to do a simple MLE with
y=b0 + x*b1 + u
and
u~N(0,sd=s0 + z*s1)
However, even such a simple command I am having difficulty coding. I have written a similar command in Stata in a handful of lines
Here is the code I have written so far in R.
normalreg <- function (beta, sigma=NULL, data, beta0=NULL, sigma0=NULL,
con1 = T, con2 = T) {
# If a formula for sigma is not specified
# assume it is the same as the formula for the beta.
if (is.null(sigma)) sigma=beta
# Grab the call expression
mf <- match.call(expand.dots = FALSE)
# Find the position of each argument
m <- match(c("beta", "sigma", "data", "subset", "weights", "na.action",
"offset"), names(mf), 0L)
# Adjust names of mf
mf <- mf[c(1L, m)]
# Since I have two formulas I will call them both formula
names(mf)[2:3] <- "formula"
# Drop unused levels
mf$drop.unused.levels <- TRUE
# Divide mf into data1 and data2
data1 <- data2 <- mf
data1 <- mf[-3]
data2 <- mf[-2]
# Name the first elements model.frame which will be
data1[[1L]] <- data2[[1L]] <- as.name("model.frame")
data1 <- as.matrix(eval(data1, parent.frame()))
data2 <- as.matrix(eval(data2, parent.frame()))
y <- data1[,1]
data1 <- data1[,-1]
if (con1) data1 <- cbind(data1,1)
data2 <- unlist(data2[,-1])
if (con2) data2 <- cbind(data2,1)
data1 <- as.matrix(data1) # Ensure our data is read as matrix
data2 <- as.matrix(data2) # Ensure our data is read as matrix
if (!is.null(beta0)) if (length(beta0)!=ncol(data1))
stop("Length of beta0 need equal the number of ind. data2iables in the first equation")
if (!is.null(sigma0)) if (length(sigma0)!=ncol(data2))
stop("Length of beta0 need equal the number of ind. data2iables in the second equation")
# Set initial parameter estimates
if (is.null(beta0)) beta0 <- rep(1, ncol(data1))
if (is.null(sigma0)) sigma0 <- rep(1, ncol(data2))
# Define the maximization function
normMLE <- function(est=c(beta0,sigma0), data1=data1, data2=data2, y=y) {
data1est <- as.matrix(est[1:ncol(data1)], nrow=ncol(data1))
data2est <- as.matrix(est[(ncol(data1)+1):(ncol(data1)+ncol(data2))],
nrow=ncol(data1))
ps <-pnorm(y-data1%*%data1est,
sd=data2%*%data2est)
# Estimate a vector of log likelihoods based on coefficient estimates
llk <- log(ps)
-sum(llk)
}
results <- optim(c(beta0,sigma0), normMLE, hessian=T,
data1=data1, data2=data2, y=y)
results
}
x <-rnorm(10000)
z<-x^2
y <-x*2 + rnorm(10000, sd=2+z*2) + 10
normalreg(y~x, y~z)
At this point the biggest issue is finding an optimization routine that does not fail when the some of the values return NA when the standard deviation goes negative. Any suggestions? Sorry for the huge amount of code.
Francis
I include a check to see if any of the standard deviations are less than or equal to 0 and return a likelihood of 0 if that is the case. Seems to work for me. You can figure out the details of wrapping it into your function.
#y=b0 + x*b1 + u
#u~N(0,sd=s0 + z*s1)
ll <- function(par, x, z, y){
b0 <- par[1]
b1 <- par[2]
s0 <- par[3]
s1 <- par[4]
sds <- s0 + z*s1
if(any(sds <= 0)){
return(log(0))
}
preds <- b0 + x*b1
sum(dnorm(y, preds, sds, log = TRUE))
}
n <- 100
b0 <- 10
b1 <- 2
s0 <- 2
s1 <- 2
x <- rnorm(n)
z <- x^2
y <- b0 + b1*x + rnorm(n, sd = s0 + s1*z)
optim(c(1,1,1,1), ll, x=x, z=z,y=y, control = list(fnscale = -1))
With that said it probably wouldn't be a bad idea to parameterize the standard deviation in such a way that it is impossible to go negative...