I am trying to get lapply to run models (more specifically path models from the piecewiseSEM package). These path models use separate models (from the nlme package) and then are combine to build the final path model. I've had to utilize some custom made functions from this post to get the models to work. However, now when I try and run the path model using lapply-created objects the models do not run. However they run perfectly fine when not using lapply. I want to use lapply because I want to also utilize Parlapply later on. Here is a reproducible example:
This is the code that I used for custom functions for the package nlme:
library(nlme)
library(piecewiseSEM)
#### corHaversine - spatial correlation with haversine distance
# Calculates the geodesic distance between two points specified by radian latitude/longitude using Haversine formula.
# output in km
haversine <- function(x0, x1, y0, y1) {
a <- sin( (y1 - y0)/2 )^2 + cos(y0) * cos(y1) * sin( (x1 - x0)/2 )^2
v <- 2 * asin( min(1, sqrt(a) ) )
6371 * v
}
# function to compute geodesic haversine distance given two-column matrix of longitude/latitude
# input is assumed in form decimal degrees if radians = F
# note fields::rdist.earth is more efficient
haversineDist <- function(xy, radians = F) {
if (ncol(xy) > 2) stop("Input must have two columns (longitude and latitude)")
if (radians == F) xy <- xy * pi/180
hMat <- matrix(NA, ncol = nrow(xy), nrow = nrow(xy))
for (i in 1:nrow(xy) ) {
for (j in i:nrow(xy) ) {
hMat[j,i] <- haversine(xy[i,1], xy[j,1], xy[i,2], xy[j,2])
}
}
as.dist(hMat)
}
## for most methods, machinery from corSpatial will work without modification
Initialize.corHaversine <- nlme:::Initialize.corSpatial
recalc.corHaversine <- nlme:::recalc.corSpatial
Variogram.corHaversine <- nlme:::Variogram.corSpatial
corFactor.corHaversine <- nlme:::corFactor.corSpatial
corMatrix.corHaversine <- nlme:::corMatrix.corSpatial
coef.corHaversine <- nlme:::coef.corSpatial
"coef<-.corHaversine" <- nlme:::"coef<-.corSpatial"
## Constructor for the corHaversine class
corHaversine <- function(value = numeric(0), form = ~ 1, mimic = "corSpher", nugget = FALSE, fixed = FALSE) {
spClass <- "corHaversine"
attr(value, "formula") <- form
attr(value, "nugget") <- nugget
attr(value, "fixed") <- fixed
attr(value, "function") <- mimic
class(value) <- c(spClass, "corStruct")
value
} # end corHaversine class
environment(corHaversine) <- asNamespace("nlme")
Dim.corHaversine <- function(object, groups, ...) {
if (missing(groups)) return(attr(object, "Dim"))
val <- Dim.corStruct(object, groups)
val[["start"]] <- c(0, cumsum(val[["len"]] * (val[["len"]] - 1)/2)[-val[["M"]]])
## will use third component of Dim list for spClass
names(val)[3] <- "spClass"
val[[3]] <- match(attr(object, "function"), c("corSpher", "corExp", "corGaus", "corLin", "corRatio"), 0)
val
}
environment(Dim.corHaversine) <- asNamespace("nlme")
## getCovariate method for corHaversine class
getCovariate.corHaversine <- function(object, form = formula(object), data) {
if (is.null(covar <- attr(object, "covariate"))) { # if object lacks covariate attribute
if (missing(data)) { # if object lacks data
stop("need data to calculate covariate")
}
covForm <- getCovariateFormula(form)
if (length(all.vars(covForm)) > 0) { # if covariate present
if (attr(terms(covForm), "intercept") == 1) { # if formula includes intercept
covForm <- eval(parse(text = paste("~", deparse(covForm[[2]]),"-1",sep=""))) # remove intercept
}
# can only take covariates with correct names
if (length(all.vars(covForm)) > 2) stop("corHaversine can only take two covariates, 'lon' and 'lat'")
if ( !all(all.vars(covForm) %in% c("lon", "lat")) ) stop("covariates must be named 'lon' and 'lat'")
covar <- as.data.frame(unclass(model.matrix(covForm, model.frame(covForm, data, drop.unused.levels = TRUE) ) ) )
covar <- covar[,order(colnames(covar), decreasing = T)] # order as lon ... lat
}
else {
covar <- NULL
}
if (!is.null(getGroupsFormula(form))) { # if groups in formula extract covar by groups
grps <- getGroups(object, data = data)
if (is.null(covar)) {
covar <- lapply(split(grps, grps), function(x) as.vector(dist(1:length(x) ) ) ) # filler?
}
else {
giveDist <- function(el) {
el <- as.matrix(el)
if (nrow(el) > 1) as.vector(haversineDist(el))
else numeric(0)
}
covar <- lapply(split(covar, grps), giveDist )
}
covar <- covar[sapply(covar, length) > 0] # no 1-obs groups
}
else { # if no groups in formula extract distance
if (is.null(covar)) {
covar <- as.vector(dist(1:nrow(data) ) )
}
else {
covar <- as.vector(haversineDist(as.matrix(covar) ) )
}
}
if (any(unlist(covar) == 0)) { # check that no distances are zero
stop("cannot have zero distances in \"corHaversine\"")
}
}
covar
} # end method getCovariate
environment(getCovariate.corHaversine) <- asNamespace("nlme")
Here is the reproducible example/problem with the mtcars dataset:
set.seed(42) ## for sake of reproducibility
mtcars <- within(mtcars, {
lon <- runif(nrow(mtcars))
lat <- runif(nrow(mtcars))
})
#this makes a list of dataframes
empty_list<-replicate(n = 10,
expr = mtcars,
simplify = F)
#doing it the lapply method
model1<-lapply(empty_list, FUN = function(i)
nlme::gls(disp ~ wt,
correlation = corHaversine(form=~lon+lat,mimic="corSpher"),
data = i)
)
model2<-lapply(empty_list, FUN = function(i)
nlme::gls(wt ~ hp,
correlation = corHaversine(form=~lon+lat,mimic="corSpher"),
data = i)
)
model1.2<-psem(model1[[1]],model2[[1]], data = empty_list[[1]])
summary(model1.2, .progressBar = F, standardize = "scale")
This results in this error:
Error in max(sapply(nm[dfdetect], nrow)) :
invalid 'type' (list) of argument
But when I do this without lapply, it works out fine:
model3<-nlme::gls(disp ~ wt,
correlation = corHaversine(form=~lon+lat,mimic="corSpher"),
data = empty_list[[1]])
model4<-nlme::gls(wt ~ hp,
correlation = corHaversine(form=~lon+lat,mimic="corSpher"),
data = empty_list[[1]])
model3.4<-psem(model3, model4)
summary(model3.4, .progressBar = F, standardize = "scale")
Related
I tried to calculate BDe Score in R without using the built in function of different R packages.
`
library(bnlearn)
library(tidyverse)
# Load the ALARM network
# load("http://www.bnlearn.com/bnrepository/alarm/alarm.bif.gz")
alarmNetwork_ls <- read.bif("alarm.bif.gz")
# Load the ALARM data
data("alarm")
# Select a subset of the data for testing
test_data <- alarm[sample(nrow(alarm), 1000), ]
# The functions above match on names;
# the name of one of the nodes in the network is "LVFAILURE",
# but this name in the alarm dataset is "LVF".
# We fixed the column name using the code below.
test_data <- test_data %>%
rename(
HISTORY = HIST,
HREKG = HREK,
HRSAT = HRSA,
PRESS = PRSS,
EXPCO2 = ECO2,
MINVOL = MINV,
MINVOLSET = MVS,
HYPOVOLEMIA = HYP,
ANAPHYLAXIS = APL,
INSUFFANESTH = ANES,
PULMEMBOLUS = PMB,
INTUBATION = INT,
KINKEDTUBE = KINK,
DISCONNECT = DISC,
LVEDVOLUME = LVV,
STROKEVOLUME = STKV,
CATECHOL = CCHL,
LVFAILURE = LVF,
ERRLOWOUTPUT = ERLO,
ERRCAUTER = ERCA,
SHUNT = SHNT,
PVSAT = PVS,
ARTCO2 = ACO2,
VENTALV = VALV,
VENTLUNG = VLNG,
VENTTUBE = VTUB,
VENTMACH = VMCH
)
# calculate log-likelihood of data under the network
log_likelihood <- function(data, bn) {
n <- nrow(data)
nodes <- nodes(bn)
parents <- parents(bn)
logprob <- rep(0, n)
for (i in 1:n) {
prob <- 1
for (j in 1:length(nodes)) {
node <- nodes[[j]]
node_name <- node$name
node_parents <- parents[[j]]
if (length(node_parents) == 0) {
prob_node <- cpquery(bn, node_name, list(), data[i,])
} else {
parent_values <- data[i,node_parents]
prob_node <- cpquery(bn, node_name, list(parents = parent_values), data[i,])
}
prob <- prob * prob_node
}
logprob[i] <- log(prob)
}
return(sum(logprob))
}
# calculate number of parameters in the model
num_params <- function(bn) {
nodes <- nodes(bn)
parents <- parents(bn)
n_params <- 0
for (i in 1:length(nodes)) {
node <- nodes[[i]]
node_states <- length(node$levels[[1]])
n_parents <- length(parents[[i]])
n_params <- n_params + node_states * (n_parents + 1)
}
return(n_params)
}
# calculate BDe score
BDe_score <- function(data, bn) {
n <- nrow(data)
LL <- log_likelihood(data, bn)
d <- ncol(data)
k <- num_params(bn)
score <- LL - 0.5 * log(n) * k
return(score)
}
# test function on alarm data and network
BDe_score(test_data, alarmNetwork_ls)
`
I tried ro run the above code but got follwing error:
Error in check.nodes(nodes = node, graph = x, max.nodes = 1) : no node specified.
I know there are several R packages to calculate BDe score but can anyone help me to resolve my issue without using those built-in functions? Or if anyone can help me to code the proposition 18.2 of Probabilistic Graphical Models: Principles and Techniques Book by Daphne Koller and Nir Friedman
So I'm trying to process the data ("final_ts") in order to make a forecast for the time series using kernel functions and doing parallel computing. The analysis is made this way: 1) analyse the empirical time series, 2) run a standard variable
selection to subset only those variables and their time-lags that provide the best validation error. 3) Run the same analysis described above to choose the optimal regularization parameters and kernel function of the regularized algorithm. As a result I show the RMSE (root mean squared error).
The code is not mine, and I'm trying to set it without problems, but as I do not have so much experience, I can't understand how to solve (I've spent 2 days trying to find the solution so if you help me I would be so grateful) the problem which occured: "The object '....' not found".
So the MAIN CODE looks like this (I'll try to make explanation on what happens, please don't judge me):
#download the libraries:
rm(list=ls(all=TRUE))
suppressMessages(library(Matrix))
suppressMessages(library(quantreg))
suppressMessages(library(parallel))
suppressMessages(library(compiler))
suppressMessages(library(lars))
suppressMessages(library(elasticnet))
suppressMessages(library(caret))
options(warn=-1)
#####################################################################################
#Download the sources (I'll show them in the end as a code)
###########Take the file###
ShowPlot = FALSE
lags = TRUE
ModelName = 'final_ts'
FileName = paste(ModelName, '.txt', sep = '')
########Calculate the logspace and the standard error functions #################
logspace <- function(d1, d2, n) exp(log(10)*seq(d1, d2, length.out=n))
std_err <- function(x) sd(x)/sqrt(length(x))
############# Choose the kernel function: we have 4 of them, but we try the second one #####
Kernel.Options = c('Exponential.Kernel', 'Epanechnikov.Kernel', 'TriCubic.Kernel', 'Matern.Kernel')
Regression.Kernel = Kernel.Options[1]
############# Choose the parameters for cross validation ####
lambda = logspace(-3,0,15)
if(Regression.Kernel == 'Exponential.Kernel'){
tht = seq(from = 0., to = 10, length = 30)
}else{
tht = seq(from = 0.1, to = 3, length = 20)
}
parameters_on_grid = expand.grid(tht, lambda)
### Read Time series
d = as.matrix(read.table(FileName, header= T))
######################
original.Embedding = c('Pro', 'Syn','Piceu')
original.TargetList = original.Embedding
d = d[, original.Embedding]
#### Here you take combinations of lags (best lag are 1 - 2) #####
x.lag = 1; y.lag = 2; z.lag = 1
sp.lag.selection = c(x.lag, y.lag, z.lag)
lagged.time.series = make.lagged.ts(d, sp.lag.selection)
d = lagged.time.series$time.series
original.col = lagged.time.series$original.variables
if(lags == TRUE){ var.sel = original.col; }else{ var.sel = colnames(d)}
##### Names and embedding in the laged dataset
if(lags == TRUE){ colnames(d) = Embedding = TargetList = LETTERS[1:ncol(d)]}else{
Embedding = TargetList = original.Embedding
}
##### length of training and test set (2 points for testing, 28 - for training)
length.testing = 2
length.training = nrow(d) - length.testing
#### Preserve training for the interactions
ts.train.preserved = d[1:length.training, var.sel]
std.ts.train = Standardizza(ts.train.preserved)
#### Preserve testing for the test (you want your algorithm to learn the real structure of the model)
ts.test.preserved = d[(length.training + 1):nrow(d), var.sel]
#### Training set:
d.training = Standardizza(d[1:length.training, ])
#### You now need to standardize the test set using mean and sd of the training set
d.testing = Standardizza.test(ts.test.preserved,ts.train.preserved)
############## Prepare for parallel computing
Lavoratori = detectCores() - 2
cl <- parallel::makeCluster(Lavoratori, setup_strategy = "sequential")
####
RegressionType = ELNET_fit_
alpha = 0.85
### should you compute all the variables or not?
BestModel = BestModelLOOCV(cl, d.training, TargetList, Embedding, parameters_on_grid, RegressionType,alpha)
I also found a fourmula for calculation of kernel function (it may be put in a SPECIAL r (SOURCE 1)):
Exponential.Kernel <- function(dst, theta){
dbar <- mean(dst)
krnl <- exp(-theta*dst/dbar)
return(krnl)
}
Exponential.Kernel <- cmpfun(Exponential.Kernel)
The formulas for finding the best leave-one-out cross-validation parameters is below (SOURCE 2):
########################### Cross Validation (Leave-one-out) ##################################
BestModelLOOCV <- function(cl, X, TargetList, Embedding, grid, RegressionType,alpha){
mine_output = Jacobian_(cl, X, TargetList, Embedding, grid, RegressionType,alpha)
theta_opt = mine_output$th
lambda_opt = mine_output$lm
mine_c0 = mine_output$c0
mine_output = mine_output$J
J_ = list()
C0_ = do.call(cbind, lapply(1:ncol(X), function(x, M) unlist(M[[x]]), mine_c0))
colnames(C0_) = sapply(TargetList,function(x) paste("c0_", x, sep = ""))
for(k in 1:(nrow(X) - 1)){
J_[[k]] = do.call(rbind, lapply(1:ncol(X), function(x, M, i) unlist(M[[x]][i,]), mine_output, k))
rownames(J_[[k]]) = Embedding
colnames(J_[[k]]) = Embedding
}
BestCoefficients = list()
BestCoefficients$J = J_
BestCoefficients$c0 = C0_
BestParameters = list()
BestParameters$BestTH = theta_opt
BestParameters$BestLM = lambda_opt
return(list(BestCoefficients = BestCoefficients, BestParameters = BestParameters))
}
#####Compute the jacobian
Jacobian_ <- function(cl, X, TargetList, Embedding, grid, RegressionType,alpha){
J = c0 = list()
th = lm = c()
n_ = 1
FUN = match.fun(RegressionType)
for(trg in TargetList){
RegularizedParameters <- LOOCrossValidation(cl, X, trg, Embedding, grid, RegressionType,alpha)
########## Now compute the optimum regularized coefficients
J[[n_]] = FUN(X, trg, Embedding, RegularizedParameters$BestTH, RegularizedParameters$BestLM,alpha)
th = c(th, RegularizedParameters$BestTH)
lm = c(lm, RegularizedParameters$BestLM)
c0[[n_]] = J[[n_]]$c0
J[[n_]] = J[[n_]][-1]
n_ = n_ + 1
}
return(list(J = J, c0 = c0, th = th, lm = lm))
}
In order to compute the elastic-net regularization function you may use this formula (SOURCE 3):
ELNET_fit_ <- function(time.series, targ_col, Embedding, theta, lambda,alp){
Edim <- length(Embedding)
coeff_names <- sapply(colnames(time.series),function(x) paste("d", targ_col, "d", x, sep = ""))
block <- cbind(time.series[2:dim(time.series)[1],targ_col],time.series[1:(dim(time.series)[1]-1),])
block <- as.data.frame(apply(block, 2, function(x) (x-mean(x))/sd(x)))
lib <- 1:dim(block)[1]
pred <- 1:dim(block)[1]
coeff <- array(0,dim=c(length(pred),Edim + 1))
colnames(coeff) <- c('c0', coeff_names)
coeff <- as.data.frame(coeff)
for (ipred in 1:length(pred)){
libs = lib[-pred[ipred]]
q <- matrix(as.numeric(block[pred[ipred],2:dim(block)[2]]),
ncol=Edim, nrow=length(libs), byrow = T)
distances <- sqrt(rowSums((block[libs,2:dim(block)[2]] - q)^2))
### Kernel
Krnl = match.fun(Regression.Kernel)
Ws = Krnl(distances, theta)
############ Fit function
x = as.matrix(block[libs,2:dim(block)[2]])
y = as.matrix(block[libs,1])
x = x[seq_along(y), ]
y = y[seq_along(y)]
Ws = Ws[seq_along(y)]
x = Ws * cbind(1, x)
y = Ws * y
fit <- enet(x, y, lambda = lambda, normalize = TRUE, intercept = FALSE)
coeff[ipred,] <- predict(fit, s = alp, type="coefficients", mode="fraction")$coefficients
}
return(coeff)
}
ELNET_fit_ <- cmpfun(ELNET_fit_)
The auxiliary formulas for computation are as follows (SOURCE 4):
TakeLag <- function(X, species.to.lag, num.lag){
tmp = matrix(0, nrow(X), num.lag)
tmp[,1] = X[,species.to.lag]
tmp[1, 1] = NA
tmp[2:nrow(X), 1] = X[1:(nrow(X) - 1), species.to.lag]
if(num.lag > 1){
for(lag. in 2:num.lag){
tmp[,lag.] = X[,species.to.lag]
tmp[1, lag.] = NA
tmp[2:nrow(X), lag.] = tmp[1:(nrow(tmp) - 1), lag.-1]
}
}
tmp
}
make.lagged.ts <- function(X,sp.lag.selection ){
### X = time series
### sp.lag is a vector whose entry are the lags of each variable
### e.g., sp.lag = c(x.lag, y.lag, ..., u.lag)
s = list()
for(i in 1:length(sp.lag.selection)){
Lag.sp = TakeLag(X, original.Embedding[i], sp.lag.selection[i])
s[[i]] = cbind(X[,original.Embedding[i]], Lag.sp)
}
X = do.call(cbind,s)
### Remove the NA
X = X[-c(1:max(sp.lag.selection)),]
### Save the position of the unlagged variables
original.col = c()
for(k in 1:length(sp.lag.selection)){
if(k == 1){ original.col = c(original.col, 1)}else{
num.lags = sum(unlist(lapply(1:(k-1), function(x,X) X[x], sp.lag.selection)))
original.col = c(original.col, k + num.lags )
}
}
return(list(time.series = X, original.variables = original.col))
}
take.coeff <- function(X, col.to.extract, original.emb){
### To use when prediction are made using lagged variables
### Take as input the sequence X of Jacobian along the attractor
### and the species to look at
### return a new sequence of Jacobian of the interaction among those species
m = lapply(1:length(X$J), function(t, M, specie) M$J[[t]][specie,specie],
X, col.to.extract)
for(i in 1:length(m)){
colnames(m[[i]]) = rownames(m[[i]]) =original.emb
}
return(m)
}
Standardizza <- function(X){
### This return y = (x-meanx)/stdx
for(i in 1:ncol(X)){
X[,i] = (X[,i]- mean(X[,i]))/sd(X[,i])
}
return(X)
}
Standardizza.test <- function(X, Y){
### X = test set
### Y = training set
### This return y = (x-meanY)/stdY
for(i in 1:ncol(X)){
X[,i] = (X[,i]- mean(Y[,i]))/sd(Y[,i])
}
return(X)
}
###########################
#### Here you compute the quality of the forecast as mean correlation coefficient
The problem in the main code sounds like: object 'Regression.Kernel' not found, but I see it in the code, it's written. Maybe the problem is connected with the type of it? But if I take away the quotes in order to make it a "closure", I cannot impose the function restrictions.
Please, help me if you can as I don't know how to solve.
The original dataset ("final_ts.txt"):
decy Temp CTD_S OxFix Pro Syn Piceu Naneu
2011.74221 27.60333 36.20700 27.26667 58638.33333 13107.00000 799.66667 117.66667
2011.74401 26.97950 36.13400 27.05000 71392.50000 13228.50000 1149.00000 116.50000
2011.74617 24.99750 35.34450 24.80000 264292.00000 27514.00000 2434.50000 132.50000
2011.74692 24.78400 35.25800 25.82500 208996.50000 39284.00000 3761.75000 220.75000
2011.74774 27.34225 35.86800 27.82500 114617.25000 23115.00000 2337.00000 139.75000
2011.74950 26.47875 36.18175 27.20000 97008.00000 9775.75000 855.50000 77.50000
2011.75583 26.86500 36.14575 27.47500 76255.00000 10226.75000 783.00000 99.50000
2011.75654 27.04550 36.04950 27.60000 95017.75000 10546.25000 915.25000 77.75000
2011.75962 27.06567 36.46367 26.56667 75750.00000 10194.33333 687.00000 44.00000
2011.76101 27.44700 36.48150 27.90000 38556.50000 8204.75000 791.25000 118.75000
2011.76169 27.32325 36.50075 27.80000 29848.50000 8995.50000 727.00000 159.25000
2011.76245 26.87050 36.57350 26.40000 36323.50000 10897.00000 792.00000 87.50000
2011.76349 27.43900 36.89325 27.90000 17308.50000 9678.50000 559.00000 149.00000
2011.77171 26.74050 36.90550 26.10000 20976.50000 7516.00000 489.50000 41.50000
2011.77224 26.53500 36.77500 27.22500 27229.00000 7578.00000 606.75000 159.50000
2011.77288 26.65450 36.78500 27.32500 37897.50000 10493.50000 1008.75000 209.50000
2011.77444 27.24150 36.73800 26.80000 15551.00000 8159.50000 479.00000 70.50000
2011.77505 26.67560 36.74240 27.30000 27887.80000 5290.80000 510.00000 101.20000
2011.77568 27.65125 36.69225 28.10000 12850.00000 9944.75000 640.75000 120.00000
2011.77693 28.11500 36.32750 27.85000 5694.00000 10288.50000 507.00000 32.00000
2011.77751 28.61950 36.26325 28.72500 20486.75000 10465.00000 430.50000 82.75000
2011.77814 28.60425 36.23100 28.70000 27974.50000 6977.25000 554.00000 80.50000
2011.77968 28.47200 35.69000 28.40000 126778.00000 2840.00000 537.00000 27.00000
2011.78087 28.89400 35.60650 28.35000 49250.00000 5533.00000 1004.00000 5.50000
2011.78190 28.74100 35.46200 28.80000 35698.00000 1298.00000 308.00000 23.00000
2011.78713 28.80500 35.50100 28.70000 99450.00000 5410.00000 637.50000 50.50000
2011.78887 28.39250 35.90900 28.25000 116562.00000 3758.50000 582.50000 60.00000
2011.79078 28.10550 36.40150 28.20000 13403.00000 11285.00000 472.00000 73.50000
2011.79261 27.25650 36.78350 27.45000 11205.00000 10576.00000 630.00000 74.00000
Please, help if you have any guess as I don't have an idea what has gone wrong.
library(GLMsData)
data(fluoro)
lambda <- seq(-2,2,0.5)
lm.out <- list()
for(i in length(lambda)){
if(i != 0){
y <- (fluoro$Dose^lambda-1)/lambda
} else {
y <- log(fluoro$Dose)
}
lm.out[[i]] <- lm(y[i]~Time, data = fluoro, na.exclude = T)
}
print(lm.out)
Error in model.frame.default(formula = y[i] ~ Time, data = fluoro, drop.unused.levels = TRUE) : variable lengths differ (found for 'Time')
I am trying to use various transformations of the response variable and fit these corresponding models, and obtain residual plots for each model.
I need a help. Thanks
Here is a corrected version of the for loop in the question.
data(fluoro, package = "GLMsData")
lambda <- seq(-2, 2, 0.5)
lm.out <- list()
for(i in 1:length(lambda)){
if(lambda[i] != 0){
y <- (fluoro$Dose^lambda[i]-1)/lambda[i]
} else {
y <- log(fluoro$Dose)
}
lm.out[[i]] <- lm(y ~ Time, data = fluoro, na.action = na.exclude)
}
print(lm.out)
And a version with a boxcox function defined and used in a lapply loop.
boxcox <- function(x, lambda, na.rm = FALSE){
if(na.rm) x <- x[!is.na(x)]
if(lambda == 0){
log(x)
} else {
(x^lambda - 1)/lambda
}
}
lm_out2 <- lapply(lambda, \(l){
lm(boxcox(Dose, lambda = l) ~ Time, data = fluoro, na.action = na.exclude)
})
Check that both ways above produce the same results.
coef_list <- sapply(lm.out, coef)
coef_list2 <- sapply(lm_out2, coef)
identical(coef_list, coef_list2)
#[1] TRUE
smry_list <- lapply(lm.out, summary)
smry_list2 <- lapply(lm_out2, summary)
pval_list <- sapply(smry_list, \(fit) fit$coefficients[, "Pr(>|t|)"])
pval_list2 <- sapply(smry_list2, \(fit) fit$coefficients[, "Pr(>|t|)"])
identical(pval_list, pval_list2)
#[1] TRUE
R2_list <- sapply(smry_list, "[[", "r.squared")
R2_list2 <- sapply(smry_list2, "[[", "r.squared")
identical(R2_list, R2_list2)
#[1] TRUE
I am new to Neural Networks and I wrote the following code of Feed forward neural network as to perform 3 bit Binary Counter. What I found with the output of my code was when I tried predicting after training with any of the one 3-bit input it almost always predicts next state wrong , but when I pass cntr_inp(matrix that contains all inputs) as input it predicts next state right for the corresponding state. I couldn't find out what's happening and stuck for some time now.
If someone could find out the mistake I'm doing would me helpful.
Thanks.
#Sigmoid Function
sigmoid <- function(z){
1.0/(1.0+exp(-z))
}
#Derivative of Sigmoid
sigmoid.derivative <- function(z){
z*(1-z)
}
#Randomly Generates Weights
create.weights <- function(layers){
weights <- vector("list",length(layers)-1)
for(i in 1:length(weights)){
weights[[i]] <- matrix(runif(layers[i]*layers[i+1]),nrow = layers[i],ncol = layers[i+1])
}
return(weights)
}
#Randomly Generates Biases
create.biases <- function(layers){
biases <- vector("list",length(layers)-1)
for(i in 1:length(biases)){
biases[[i]] <- runif(layers[i+1])
}
return(biases)
}
#Feedforward / Forward Propogation
feedforward <- function(inp,weights,biases){
layer <- vector("list",length(layers)-1)
for(i in 1:(length(layers)-1)){
if(i==1){
layer[[i]] <- inp %*% weights[[i]] + biases[[i]]
layer_dim <- dim(layer[[i]])
layer[[i]] <- matrix(sigmoid(as.numeric(layer[[i]])),nrow = layer_dim[1],ncol = layer_dim[2],byrow = FALSE)
}else{
layer[[i]] <- layer[[i-1]] %*% weights[[i]] + biases[[i]]
layer_dim <- dim(layer[[i]])
layer[[i]] <- matrix(sigmoid(as.numeric(layer[[i]])),nrow = layer_dim[1],ncol = layer_dim[2],byrow = FALSE)
}
}
return(layer)
}
#Calculating Delta Values
calculate_delta <- function(net_out,out,weights){
Slope <- lapply(net_out,sigmoid.derivative)
delta <- vector("list",length(Slope))
for(i in length(Slope):1){
if(i == length(Slope)){
delta[[i]] <- (out-net_out[[i]]) * Slope[[i]]
}else{
Error_hidden <- delta[[i+1]] %*% t(weights[[i+1]])
delta[[i]] <- Error_hidden * Slope[[i]]
}
}
return(delta)
}
#Updating Weights
updating_weights <- function(weights,inp,delta,net_out,step.size){
for(i in length(weights):1){
if(i != 1){
weights[[i]] <- weights[[i]] + t(net_out[[i-1]]) %*% delta[[i]] * step.size
}else{
weights[[i]] <- weights[[i]] + t(inp) %*% delta[[i]] * step.size
}
}
return(weights)
}
#Updating Biases
updating_biases <- function(biases,delta,step.size){
for(i in 1:length(biases)){
biases[[i]] <- biases[[i]] + colSums(delta[[i]]) * step.size
}
return(biases)
}
#Complete Neural Net operations (Forward and Backward Propogation)
Neural.Net <- function(layers = NULL,inp,out,epoch,step.size = 0.01,error.threshold = 0.01){
weights <- create.weights(layers)
biases <- create.biases(layers)
for(i in 1:epoch){
##Back Propogation
net_out <- feedforward(inp,weights,biases)
delta <- calculate_delta(net_out,out,weights)
weights <- updating_weights(weights,inp,delta,net_out,step.size)
biases <- updating_biases(biases,delta,step.size)
avg_error <- mean(abs(out - net_out[[length(net_out)]]))
##Printing Output for every epoch
cat("\014")
cat("------- Feed Forward Neural Nets -------\n")
cat("Inputs: ",layers[1],"\n")
cat("Outputs: ",layers[length(layers)],"\n")
cat("Hidden Layers:", length(layers)-2,"\n")
cat(paste("Epoch :",i," Avg_error = ",avg_error,"\n"))
cat("Output Values:\n\n")
print(net_out[[length(net_out)]])
Sys.sleep(0.002)
##
if(avg_error <= error.threshold){
message("Optimum values found")
break
}
}
return(list(weights = weights,biases = biases,Net_out = net_out))
}
Neural.Net.Predict <- function(Model,Test.inp){
layer <- feedforward(inp = Test.inp,weights = Model$weights,biases = Model$biases)
return(layer[length(layer)])
}
#Input and Hyper parameters to Neural Network
layers <- c(3,7,3)
cntr_inp <- matrix(c(0,0,0,
0,0,1,
0,1,0,
0,1,1,
1,0,0,
1,0,1,
1,1,0,
1,1,1),byrow = T,nrow = 8,ncol = 3)
cntr_out <- matrix(c(0,0,1,
0,1,0,
0,1,1,
1,0,0,
1,0,1,
1,1,0,
1,1,1,
0,0,0),byrow = T,nrow = 8,ncol = 3)
Output <- Neural.Net(layers = layers,inp = cntr_inp,out = cntr_out,epoch = 100000,step.size = 0.8,error.threshold = 0.01)
###Predicts wrong when one input point is provided
# I don't understand why this is predicting wrong. Need help understanding here
Neural.Net.Predict(Output,matrix(c(1,0,0),ncol = 3,byrow = TRUE))
###Predicts right for all inputs
Neural.Net.Predict(Output,cntr_inp)
I am trying to run a multivariate regression with different layers in a RasterStack using focal {raster} or localFun {raster}. With the help of a similar post and the raster reference manual, my code works fine with single RasterLayers as input (see reproducible, albeit probably 'clunky', example below). However, I would like to do this using different layers in a RasterStack as described in {SECTION2} of the code below.
I would very much appreciate any advice.
Thank you
CODE:
library(raster)
#%%%%%%%%%%%%%%%%%%%%%
## SECTION1
#%%%%%%%%%%%%%%%%%%%%%
# create test data
set.seed(0)
resp = expl = raster(nrow=10, ncol=10)
# response variable
resp = setValues(resp,runif(100,min=15,max=45))
# explanatory variable
expl = setValues(expl,runif(100,min=2,max=6))
expl = expl * resp
resp[1:5] = NA; expl[1:5] = NA # add some NA values
par(mfrow=c(1,2))
plot(resp); plot(expl)
#..............................................................
# check global lm() results
data1.df = na.omit(as.data.frame(stack(list(resp=resp,expl=expl))))
head(data1.df)
data1.lm = lm(resp ~ expl, data=data1.df)
(data1.lmSum = summary(data1.lm))
data1.lmSum$coefficients[1];data1.lmSum$coefficients[2];data1.lmSum$coefficients[8]
data1.lmSum$r.squared
data1.lmSum$sigma
# pf(data1.lmSum$fstatistic[1], data1.lmSum$fstatistic[2], data1.lmSum$fstatistic[3],lower.tail = FALSE)
#..............................................................
# lm function for focal {raster} with RasterLayers
# output coefficients, r-squared, residual standard error and p-value(F stat)
# Calculate focal ("moving window") weight
fw = focalWeight(resp, 2, "Gauss")
# focal regression:
lm.focal = function(x, y, ...) {
if(all(is.na(x) & is.na(y))) {NA}
else {
m = lm(y~x)
summary(m)$r.squared #r-squared
# summary(m)$coefficients #intercept and slope together
#---> Error in setValues(x, value) : cannot use a matrix with these dimensions
# summary(m)$coefficients[1] #intercept
# summary(m)$coefficients[2] #slope
# summary(m)$coefficients[8] #p-value
# summary(m)$sigma #residual standard error
}
}
#---> How to output all at once?
lm.focal.out1 = localFun(resp, expl, w=fw, fun=lm.focal, na.rm=TRUE)
plot(lm.focal.out1)
#%%%%%%%%%%%%%%%%%%%%%
## SECTION2
#%%%%%%%%%%%%%%%%%%%%%
# create test data
set.seed(1)
resp = expl1 = expl2 = expl3 = expl4 = raster(nrow=10, ncol=10)
# x1 response variable
resp = setValues(resp,runif(100,min=15,max=45))
# x3 explanatory variables
expl1 = setValues(expl,runif(100,min=2,max=6))
expl1 = expl1 * resp
expl2 = expl1 * resp/runif(100,min=1,max=4)
expl3 = ((expl1 * resp) / 1.5 )/10
expl4 = ((expl1 * resp) / runif(100,min=0.5,max=2))/100
# add some NA values
resp[1:5] = NA; expl1[1:5] = NA; expl2[1:5] = NA; expl3[1:5] = NA; expl4[1:5] = NA
#stack RasterLayers
stack1 = stack(list(resp=resp,expl1=expl1,expl2=expl2,expl3=expl3,expl4=expl4))
# par(mfrow=c(1,1))
plot(stack1)
#..............................................................
# check global lm() results
stack1.df = na.omit(as.data.frame(stack1))
head(stack1.df)
stack1.lm = lm(resp ~ expl1+expl2+expl3+expl4, data=stack1.df)
(stack1.lmSum = summary(stack1.lm))
stack1.lmSum$coefficients[1]
stack1.lmSum$coefficients[2];stack1.lmSum$coefficients[3];stack1.lmSum$coefficients[4];stack1.lmSum$coefficients[5]
stack1.lmSum$r.squared
stack1.lmSum$sigma
pf(stack1.lmSum$fstatistic[1], stack1.lmSum$fstatistic[2], stack1.lmSum$fstatistic[3],lower.tail = FALSE)
#..............................................................
# lm function for focal {raster} with RasterStack
# output coefficients, r-squared, residual standard error and p-value(F stat)
# Calculate focal ("moving window") weight
fw.s = focalWeight(stack1, 2, "Gauss")
# focal regression with raster stack:
lm.focal.stack = function(x, ...) {
if(all(is.na(x) )) {NA}
else {
m = lm(x[1]~x[2]+x[3]+x[4]+x[5])
summary(m)$r.squared #r-squared
# summary(m)$coefficients #intercept and slope together
#---> Error in setValues(x, value) : cannot use a matrix with these dimensions
# summary(m)$coefficients[1] #intercept
# summary(m)$coefficients[2] #slope
# pf(summary(m)$fstatistic[1], summary(m)$fstatistic[2], summary(m)$fstatistic[3],lower.tail = FALSE) #p-value
# summary(m)$sigma #residual standard error
}
}
#---> How to output all at once?
lm.focal.stack.out1 <- focal(stack1, w=fw.s, fun=lm.focal.stack, na.rm=TRUE)
#---> unable to find an inherited method for function ‘focal’ for signature ‘"RasterStack"’
#plot(lm.focal.stack.out1)
#-----------------------------------------------------------
> sessionInfo()
R version 3.3.1 (2016-06-21)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows >= 8 x64 (build 9200)
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] raster_2.5-8 sp_1.2-3
loaded via a namespace (and not attached):
[1] rgdal_1.1-10 tools_3.3.1 Rcpp_0.12.5 grid_3.3.1 lattice_0.20-33
Not sure if you still need this answered, but I had the same issue and made a function called localFunStack to do the job of vector output from the local function as a rasterStack object, with a little hack to get the right layer names:
# localFun modified to write out a layer stack
localFunStack <- function(x, y, ngb=5, fun, ...) {
compareRaster(x,y)
rasterList <- list()
nc1 <- 1:(ngb*ngb)
nc2 <- ((ngb*ngb)+1):(2*(ngb*ngb))
if (canProcessInMemory(x, n=2*ngb)) {
vx <- getValuesFocal(x, 1, nrow(x), ngb=ngb)
vy <- getValuesFocal(y, 1, nrow(y), ngb=ngb)
v <- apply(cbind(vx, vy), 1, function(x, ...) fun(x[nc1], x[nc2], ...))
for (j in 1:nrow(v)) {
if (length(rasterList) < j) {
rasterList[[j]] <- raster(x)
}
values(rasterList[[j]]) <- v[j,]
}
}
else {
tr <- blockSize(out)
pb <- pbCreate(tr$n, label='localFun', ...)
for (i in 1:tr$n) {
vx <- getValuesFocal(x, tr$row[i], tr$nrows[i], ngb=ngb)
vy <- getValuesFocal(y, tr$row[i], tr$nrows[i], ngb=ngb)
v <- apply(cbind(vx, vy), 1, function(x, ...) fun(x[nc1], x[nc2], ...))
for (j in 1:nrow(v)) {
if (length(rasterList) < j) {
rasterList[[j]] <- raster(x)
}
rasterList[[j]] <- writeValues(rasterList[[j]], v[j,], tr$row[i])
}
}
}
return(stack(rasterList))
}
# local regression function
lm.focal <- function(x, y, ...) {
if(all(is.na(x) & is.na(y)) || all(is.na(x)) || all(is.na(y))) {rep(NA, 8)}
else {
m <- lm(y~x)
coef <- summary(m)$coef
if (nrow(coef) == 1) { # Add NAs for cases where the response is constant
coef <- rbind(coef, rep(NA, 4))
rownames(coef) <- rownames(nm)
}
coef <- as.vector(coef)
names(coef) <- c( outer( rownames(nm), colnames(nm) ,FUN=paste ,sep=" "))
coef
# summary(m)$r.squared #r-squared
# summary(m)$sigma #residual standard error
}
}
lm.focal.out = localFunStack(expl, resp, ngb=5, fun=lm.focal, na.rm=TRUE)
m <- lm(resp ~ expl)
nm <- summary(m)$coefficients
names(lm.focal.out) <- c(outer(rownames(nm), colnames(nm), FUN=paste, sep=" "))
plot(lm.focal.out)