Function over vector not working in R - r

I want to run a function over a vector, using the lapply() command. This is my attempt, reproducible:
set.seed(10101)
# define parameters
N <- 1000
a <- 0.3
# create vector of variables
e <- rnorm(N)
l <- rchisq(N, df = 3, ncp = 0)
k <- rbeta(N, shape1 = 2, shape2 = 5, ncp = 0)
# vector over which to run function
g <- c(1, 0.5, 0.3, 0.1, -0.2, -0.5, -1)
# define function
CES <- function(g,a,e){
exp(e)*(a*l^g+(1-a)*k^g)^(1/g)
}
# result
y <- lapply(g,CES)
I get the error
Error in FUN(X[[i]], ...) : argument "e" is missing, with no default
What is the mistake? Is it due to the fact that the elements of the function (l,k,e) are themselves vectors?

You have to do
y <- lapply(g, CES, a=a, e=e)
It is to follow the description of the error. The parameters a= and e= of your function CES() have no default - so you have to deliver them to the function. You can use the ...-argument of the function lapply() to do that.
Alternativly (see the comment from #Cath) you can change the definition of your function:
CES <- function(g, a=a, e=e) exp(e)*(a*l^g+(1-a)*k^g)^(1/g)
y <- lapply(g, CES) # now works
or (a bad variant):
CES <- function(g) exp(e)*(a*l^g+(1-a)*k^g)^(1/g)
y <- lapply(g, CES) # now works

Related

Include function in linear constraint in r

I am trying to set up an optimization with the constraint: sum(abs(x-y))*0.5 where x are my decision variables and y are given inputs.
x <- c(0.25, 0.3, 0.05, 0.25, 0.15)
y <- rep(0.2, 5)
I have added the function
AcS <- function(g, h, dir, rhs) {
AS <- sum(abs(g-h))*0.5
return(AS)
L_constraint(L = L, dir = dir, rhs = rhs)
}
And then with the ROI package attempted to create the constraint as: AcS_cons <- L_constraint(AcS(x, y), "<=", 0.25)
With
c1 <- c(1:24, -25)
C <- matrix(c1,nrow=5,ncol=5,byrow=TRUE)
I then have my optimization problem setup to solve for x as:
QPL <- OP(Q_objective(Q = C, L = rep(0, NCOL(count_y))),
act_share_2,
max = FALSE)
When i run this I get the following error: Error in .check_constraints.L_constraint(constr, x) :
dimension missmatch! OP has 5 variables the constraints have 1
Any help on how to adjust the above, or how to setup a constraint for my function AcS would be appreciated.

Time varying parameter-matrix in deSolve R

I am struggling with this for so long. I have a logistic growth function where the growth parameter
r is a matrix. The model is constructed in a way that I have as an output two N the N1 and N2.
I would like to be able to change the r parameter over time. When time < 50 I would like
r = r1 where
r1=matrix(c(
2,3),
nrow=1, ncol=2
When time >= 50 I would like r=r2 where
r2=matrix(c(
1,2),
nrow=1, ncol=2
Here is my function. Any help is highly appreciated.
rm(list = ls())
library(deSolve)
model <- function(time, y, params) {
with(as.list(c(y,params)),{
N = y[paste("N",1:2, sep = "")]
dN <- r*N*(1-N/K)
return(list(c(dN)))
})
}
r=matrix(c(
4,5),
nrow=1, ncol=2)
K=100
params <- list(r,K)
y<- c(N1=0.1, N2=0.2)
times <- seq(0,100,1)
out <- ode(y, times, model, params)
plot(out)
I would like ideally something like this but it does not work
model <- function(time, y, params) {
with(as.list(c(y,params)),{
N = y[paste("N",1:2, sep = "")]
r = ifelse(times < 10, matrix(c(1,3),nrow=1, ncol=2),
ifelse(times > 10, matrix(c(1,4),nrow=1, ncol=2), matrix(c(1,2),nrow=1, ncol=2)))
print(r)
dN <- r*N*(1-N/K)
return(list(c(dN)))
})
}
Thank you for your time.
Here a generic approach that uses an extended version of the approx function. Note also some further simplifications of the model function and the additional plot of the parameter values.
Edit changed according to the suggestion of Lewis Carter to make the parameter change at t=3, so that the effect can be seen.
library(simecol) # contains approxTime, a vector version of approx
model <- function(time, N, params) {
r <- approxTime(params$signal, time, rule = 2, f=0, method="constant")[-1]
K <- params$K
dN <- r*N*(1-N/K)
return(list(c(dN), r))
}
signal <- matrix(
# time, r[1, 2],
c( 0, 2, 3,
3, 1, 2,
100, 1, 2), ncol=3, byrow=TRUE
)
## test of the interpolation
approxTime(signal, c(1, 2.9, 3, 100), rule = 2, f=0, method="constant")
params <- list(signal = signal, K = 100)
y <- c(N1=0.1, N2=0.2)
times <- seq(0, 10, 0.1)
out <- ode(y, times, model, params)
plot(out)
For a small number of state variables like in the example, separate signals with approxfun from package stats will look less generic but may be slighlty faster.
As a further improvement, one may consider to replace the "hard" transitions with a more smooth one. This can then directly be formulated as a function without the need of approx, approxfun or approxTime.
Edit 2:
Package simecol imports deSolve, and we need only a small function from it. So instead of loading simecol it is also possible to include the approxTime function explicitly in the code. The conversion from data frame to matrix improves performance, but a matrix is preferred anyway in such cases.
approxTime <- function(x, xout, ...) {
if (is.data.frame(x)) {x <- as.matrix(x); wasdf <- TRUE} else wasdf <- FALSE
if (!is.matrix(x)) stop("x must be a matrix or data frame")
m <- ncol(x)
y <- matrix(0, nrow=length(xout), ncol=m)
y[,1] <- xout
for (i in 2:m) {
y[,i] <- as.vector(approx(x[,1], x[,i], xout, ...)$y)
}
if (wasdf) y <- as.data.frame(y)
names(y) <- dimnames(x)[[2]]
y
}
If you want to pass a matrix parameter you should pass a list of parameters and you can modify it inside the model when your time limit is exceeded (in the example below you don't even have to pass the r matrix to the model function)
library(deSolve)
model <- function(time, y, params) {
with(as.list(c(y,params)),{
if(time < 3) r = matrix(c(2,3), nrow = 1, ncol = 2)
else r = matrix(c(1,3), nrow = 1, ncol = 2)
N = y[paste("N",1:2, sep = "")]
dN <- r*N*(1-N/K)
return(list(c(dN)))
})
}
y <- c(N1=0.1, N2=0.2)
params <- list(r = matrix(c(0,0), nrow = 1, ncol = 2), K=100)
times <- seq(0,10,0.1)
out <- ode(y, times, model, params)
plot(out)
You can see examples of this for instance with Delay Differential Equations ?dede

Error in nrow(X) : object 'X' not found , but it gets defined

I am trying to implement logistic regression and the function works manually, but for some reason I get the error "Error in nrow(X) : object 'X' not found ", even though X is defined before the nrow command. I use the UCI Data "Adult" to test it.
If i try to run the function manually there is no error. Can anyone explain that?
# Sigmoidfunction
sigmoid <- function(z){
g <- 1/(1+exp(-z))
return(g)
}
# Costfunction
cost <- function(theta){
n <- nrow(X)
g <- sigmoid(X %*% theta)
J <- (1/n)*sum((-Y*log(g)) - ((1-Y)*log(1-g)))
return(J)
}
log_reg <- function(datafr, m){
# Train- und Testdaten Split
sample <- sample(1:nrow(datafr), m)
df_train <- datafr[sample,]
df_test <- datafr[-sample,]
num_features <- ncol(datafr) - 1
num_label <- ncol(datafr)
label_levels <- levels(datafr[, num_label])
datafr[, num_features+1] <- ifelse(datafr[, num_label] == names(table(datafr[,num_label]))[1], 0, 1)
# Predictor variables
X <- as.matrix(df_train[, 1:num_features])
X_test <- as.matrix(df_test[, 1:num_features])
# Add ones to X
X <- cbind(rep(1, nrow(X)), X)
X_test <- cbind(rep(1, nrow(X_test)), X_test)
# Response variable
Y <- as.matrix(df_train[, num_label] )
Y <- ifelse(Y == names(table(Y))[1], 0, 1)
Y_test <- as.matrix(df_test[, num_label] )
Y_test <- ifelse(Y_test == names(table(Y_test))[1], 0, 1)
# Intial theta
initial_theta <- rep(0, ncol(X))
# Derive theta using gradient descent using optim function
theta_optim <- optim(par=initial_theta, fn=cost)
predictions <- ifelse(sigmoid(X_test%*%theta_optim$par)>=0.5, 1, 0)
# Generalization error
error_rate <- sum(predictions!=Y_test)/length(Y_test)
return(error_rate)
}
### Adult Data
data <- read.table('https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data',
sep = ',', fill = F, strip.white = T)
colnames(data) <- c('age', 'workclass', 'fnlwgt', 'education',
'education_num', 'marital_status', 'occupation', 'relationship', 'race', 'sex',
'capital_gain', 'capital_loss', 'hours_per_week', 'native_country', 'income')
# Featureselection
datafr <- data[, c("age", "education_num", "hours_per_week", "income")]
log_reg(datafr = datafr, m = 20)
You are calling cost() in which you refer to X, but X has not been defined in cost(). Either define it within log_reg() after you have defined X, or, better, make X a parameter for cost().
cost <- function(theta, X, Y){
n <- nrow(X)
g <- sigmoid(X %*% theta)
J <- (1/n)*sum((-Y*log(g)) - ((1-Y)*log(1-g)))
return(J)
}
And later
theta_optim <- optim(par=initial_theta, fn=cost, X=X, Y=Y)
In general, try to avoid having variables used in a function which are not defined explicitly as arguments to that function. Otherwise you will always end up with problems like this one.
Also, how did I find it out? I used traceback():
> traceback()
5: nrow(X) at #2
4: fn(par, ...)
3: (function (par)
fn(par, ...))(c(0, 0, 0, 0))
2: optim(par = initial_theta, fn = cost) at #33
1: log_reg(datafr = datafr, m = 20)

Creating a function in R but getting a replacement has length zero error

I tried to create a function f and create the function so when a value x is inserted, it spits out a function f from y.But, when I try to run the code to plot, it gives me an error that says that my y_value has no length.
f <- function(x){
if (x<0){
print(y_values<-x*x*x)
}
if(x>0 & x<=1){
print(y_values<-x*x)
}
if(x>1){
print(y_values<-sqrt(x))
}
}
x_values <- seq(-2, 2, by = 0.1)
y_values <- rep(NA, length(x_values))
for (i in seq_along(x_values)) {
x <- x_values[i]
y_values[i] <- f(x)
}
# output
plot(x_values, y_values, type = "l")
Two issues:
From ?print
‘print’ prints its argument and returns it invisibly (via
‘invisible(x)’)
So all your function f does is print the values to the console (instead of returning them).
As per your definition of f, the function does not know how to deal with x=0; so this will create a problem when you store the output of f(0) later.
We can fix these issues by slightly altering f as
f <- function(x) {
y_values <- NA
if (x<0){
y_values<-x*x*x
}
if(x>0 & x<=1){
y_values<-x*x
}
if(x>1){
y_values<-sqrt(x)
}
return(y_values)
}
Then
x_values <- seq(-2, 2, by = 0.1)
y_values <- rep(NA, length(x_values))
for (i in seq_along(x_values)) {
x <- x_values[i]
y_values[i] <- f(x)
}
plot(x_values, y_values, type = "l")
You could also use Vectorize to obtain a vectorised function f2, which allows you to pass x_values as a vector, thereby avoiding the explicit for loop:
f2 <- Vectorize(f)
x_values <- seq(-2, 2, by = 0.1)
y_values <- f2(x_values)
The resulting plot is the same.
I would recommend you explore other methods for coding something like this:
here is one option that doesn't use a for loop. If you are simply working on using for loops then the fix Mauritus Evers made should work for you.
library(tidyverse)
data.frame(x_values = seq(-2, 2, by = 0.1)) %>%
mutate(y_values = case_when(x_values < 0 ~ x_values^3,
x_values>=0 & x_values<=1 ~ x_values^2,
x_values>1 ~ sqrt(x_values))) %>%
ggplot(aes(x_values, y_values)) + geom_point()
note that I changed your code to produce output when x_value = 0.

Multi-data likelihood function and mle2 function from bbmle package in R

I have written a custom likelihood function that fits a multi-data model that integrates mark-recapture and telemetry data (sensu Royle et al. 2013 Methods in Ecology and Evolution). The likelihood function is designed to be flexible in terms of whether and how many covariates are specified for different linear models in different likelihood components which is determined by values supplied as function arguments (i.e., data matrices "detcovs" and "dencovs" in my code). The likelihood function works when I directly supply it to optimization functions (e.g., optim or nlm), but does not play nice with the mle2 function in the bbmle package. My problem is that I continually run into the following error: "some named arguments in 'start' are not arguments to the specified log-likelihood function". This is my first attempt at writing custom likelihood functions so I'm sure there are general coding conventions of which I'm unaware that make such tasks much more efficient and amendable to the mle2 function. Below is my likelihood function, code creating the staring value objects, and code calling the mle2 function. Any advice how to solve the error problem and general comments on writing cleaner functions is welcome. Many thanks in advance.
Edit: As requested, I have simplified the likelihood function and provided code to simulate reproducible data to which the model can be fit. Included in the simulation code are 2 custom functions and use of the raster function from the raster package. Hopefully, I have sufficiently simplified everything to enable others to troubleshoot. Again, many thanks for your help!
Jared
Likelihood function:
CSCR.RSF.intlik2.EXAMPLE <- function(alpha0,sigma,alphas=NULL,betas=NULL,n0,yscr=NULL,K=NULL,X=X,trapcovs=NULL,Gden=NULL,Gdet=NULL,ytel=NULL,stel=NULL,
dencovs=NULL,detcovs=NULL){
#
# this version of the code handles a covariate on log(Density). This is starting value 5
#
# start = vector of starting values
# yscr = nind x ntraps encounter matrix
# K = number of occasions
# X = trap locations
# Gden = matrix with grid cell coordinates for density raster
# Gdet = matrix with gride cell coordinates for RSF raster
# dencovs = all covariate values for all nGden pixels in density raster
# trapcovs = covariate value at trap locations
# detcovs = all covariate values for all nGrsf pixels in RSF raster
# ytel = nguys x nGdet matrix of telemetry fixes in each nGdet pixels
# stel = home range center of telemetered individuals, IF you wish to estimate it. Not necessary
# alphas = starting values for RSF/detfn coefficients excluding sigma and intercept
# alpha0 = starting values for RSF/detfn intercept
# sigma = starting value for RSF/detfn sigma
# betas = starting values for density function coefficients
# n0 = starting value for number of undetected individuals on log scale
#
n0 = exp(n0)
nGden = nrow(Gden)
D = e2dist(X,Gden)
nGdet <- nrow(Gdet)
alphas = alphas
loglam = alpha0 -(1/(2*sigma*sigma))*D*D + as.vector(trapcovs%*%alphas) # ztrap recycled over nG
psi = exp(as.vector(dencovs%*%betas))
psi = psi/sum(psi)
probcap = 1-exp(-exp(loglam))
#probcap = (exp(theta0)/(1+exp(theta0)))*exp(-theta1*D*D)
Pm = matrix(NA,nrow=nrow(probcap),ncol=ncol(probcap))
ymat = yscr
ymat = rbind(yscr,rep(0,ncol(yscr)))
lik.marg = rep(NA,nrow(ymat))
for(i in 1:nrow(ymat)){
Pm[1:length(Pm)] = (dbinom(rep(ymat[i,],nGden),rep(K,nGden),probcap[1:length(Pm)],log=TRUE))
lik.cond = exp(colSums(Pm))
lik.marg[i] = sum( lik.cond*psi )
}
nv = c(rep(1,length(lik.marg)-1),n0)
part1 = lgamma(nrow(yscr)+n0+1) - lgamma(n0+1)
part2 = sum(nv*log(lik.marg))
out = -1*(part1+ part2)
lam = t(exp(a0 - (1/(2*sigma*sigma))*t(D2)+ as.vector(detcovs%*%alphas)))# recycle zall over all ytel guys
# lam is now nGdet x nG!
denom = rowSums(lam)
probs = lam/denom # each column is the probs for a guy at column [j]
tel.loglik = -1*sum( ytel*log(probs) )
out = out + tel.loglik
out
}
Data simulation code:
library(raster)
library(bbmle)
e2dist <- function (x, y){
i <- sort(rep(1:nrow(y), nrow(x)))
dvec <- sqrt((x[, 1] - y[i, 1])^2 + (x[, 2] - y[i, 2])^2)
matrix(dvec, nrow = nrow(x), ncol = nrow(y), byrow = F)
}
spcov <- function(R) {
v <- sqrt(nrow(R))
D <- as.matrix(dist(R))
V <- exp(-D/2)
cov1 <- t(chol(V)) %*% rnorm(nrow(R))
Rd <- as.data.frame(R)
colnames(Rd) <- c("x", "y")
Rd$C <- as.numeric((cov1 - mean(cov1)) / sd(cov1))
return(Rd)
}
set.seed(1234)
co <- seq(0.3, 0.7, length=5)
X <- cbind(rep(co, each=5),
rep(co, times=5))
B <- 10
co <- seq(0, 1, length=B)
Z <- cbind(rep(co, each=B), rep(co, times=B))
dencovs <- cbind(spcov(Z),spcov(Z)[,3]) # ordered as reading raster image from left to right, bottom to top
dimnames(dencovs)[[2]][3:4] <- c("dencov1","dencov2")
denr.list <- vector("list",2)
for(i in 1:2){
denr.list[[i]] <- raster(
list(x=seq(0,1,length=10),
y=seq(0,1,length=10),
z=t(matrix(dencovs[,i+2],10,10,byrow=TRUE)))
)
}
B <- 20
co <- seq(0, 1, length=B)
Z <- cbind(rep(co, each=B), rep(co, times=B))
detcovs <- cbind(spcov(Z),spcov(Z)[,3]) # ordered as reading raster image from left to right, bottom to top
dimnames(detcovs)[[2]][3:4] <- c("detcov1","detcov2")
detcov.raster.list <- vector("list",2)
trapcovs <- matrix(0,J,2)
for(i in 1:2){
detr.list[[i]] <- raster(
list(x=seq(0,1,length=20),
y=seq(0,1,length=20),
z=t(matrix(detcovs[,i+2],20,20,byrow=TRUE)))
)
trapcovs[,i] <- extract(detr.list[[i]],X)
}
alpha0 <- -3
sigma <- 0.15
alphas <- c(1,-1)
beta0 <- 3
betas <- c(-1,1)
pixelArea <- (dencovs$y[2] - dencovs$y[1])^2
mu <- exp(beta0 + as.matrix(dencovs[,3:4])%*%betas)*pixelArea
EN <- sum(mu)
N <- rpois(1, EN)
pi <- mu/sum(mu)
s <- dencovs[sample(1:nrow(dencovs), size=N, replace=TRUE, prob=pi),1:2]
J <- nrow(X)
K <- 10
yc <- d <- p <- matrix(NA, N, J)
D <- e2dist(s,X)
loglam <- t(alpha0 - t((1/(2*sigma*sigma))*D*D) + as.vector(trapcovs%*%alphas))
p <- 1-exp(-exp(loglam))
for(i in 1:N) {
for(j in 1:J) {
yc[i,j] <- rbinom(1, K, p[i,j])
}
}
detected <- apply(yc>0, 1, any)
yscr <- yc[detected,]
ntel <- 5
nfixes <- 100
poss.tel <- which(s[,1]>0.2 & s[,1]<0.8 & s[,2]>0.2 & s[,2]<0.8)
stel.id <- sample(poss.tel,ntel)
stel <- s[stel.id,]
ytel <- matrix(NA,ntel,nrow(detcovs))
d <- e2dist(stel,detcovs[,1:2])
lam <- t(exp(1 - t((1/(2*sigma*sigma))*d*d) + as.vector(as.matrix(detcovs[,3:4])%*%alphas)))
for(i in 1:ntel){
ytel[i,] <- rmultinom(1,nfixes,lam[i,]/sum(lam[i,]))
}
Specify starting values and call mle2 function:
start1 <- list(alpha0=alpha0,sigma=sigma,alphas=alphas,betas=betas,n0=log(N-nrow(yscr)))
parnames(CSCR.RSF.intlik2.EXAMPLE) <- names(start)
out1 <- mle2(CSCR.RSF.intlik2.EXAMPLE,start=start1,method="SANN",optimizer="optim",
data=list(yscr=yscr,K=K,X=X,trapcovs=trapcovs,Gden=dencovs[,1:2],Gdet=detcovs[,1:2],
ytel=ytel,stel=stel,dencovs=as.matrix(dencovs[,3:4]),detcovs=as.matrix(detcovs[,3:4]))
)

Resources