Generate a vector of functions - r

I have a function of a single variable, and various parameters.
For each value of one of the parameters (the others are fixed) there is a single root of the function. From a vector of the parameter I would like to generate a vector of the roots (using uniroot).
The actual example I'm working on is a bit messy, but I'll give it. Here are the fixed parameters:
eta_inf = -0.0139
eta_0 = 178.5
lambda = 2.4954
m = 0.83094
Here is the function:
crossFnc <- function(gamma_dot) tau - gamma_dot*(eta_inf + (eta_0-eta_inf)/(1 + (lambda*gamma_dot)^m))
Here is an example of a root for a particular value of the tau parameter:
tau=10
uniroot(crossFnc, c(0,1))$root
[1] 0.06900807
I would like to generate a vector of these roots, for example, for:
tau <- seq(0,10,length.out=101)
Thanks,
Steve

Maybe you could use a for loop:
my.roots <- vector()
tau.seq <- seq(0,10,length.out=101)
for (i in seq_along(tau.seq)) {
tau <- tau.seq[i]
my.roots[i] <- uniroot(crossFnc, c(0,1))$root
}
#> head(my.roots)
#[1] 0.000000000 0.000566379 0.001142346 0.001726677 0.002257765 0.002848007

Make use of sapply:
# Notice the second argument
crossFnc <- function(gamma_dot, tau) {
tau - gamma_dot*(eta_inf + (eta_0-eta_inf)/(1 + (lambda*gamma_dot)^m))
}
# I only use length.out = 10
tau <- seq(0,10,length.out=10)
# Apply function(x) to every value in tau
myRoots <- sapply(tau, function(x) {
uniroot(crossFnc, c(0,1), tau=x)$root
})
myRoots
>[1] 0.000000000 0.006433349 0.013166577 0.020236503 0.027594321 0.035253401 0.043217816 0.051493442 0.060087456
>[10] 0.069008069

Related

How to fasten going through all independent variable combinations?

I want to write function combination_rsquare(y, data, factor_number) where
y - A vector - dependent variable
data - A data frame containing independent variables
factor_number - vector or numeric which tells how many elements in combination should be included.
Let's consider my function :
combination_rsquare <- function(y, data, factor_number = c(2, 3)) {
name_vec <- c()
r_sq <- c()
for (j in seq_along(factor_number)) {
# Defining combinations
comb_names <- combn(colnames(data), factor_number[j])
for (i in 1:ncol(comb_names)) {
#Append model r-squared for each combination
r_sq<- append(
r_sq,
summary(lm(y ~ ., data = data[comb_names[1:factor_number[j], i]]))$r.squared
)
# Create vector containing model names seperated by "+"
name_vec <- append(
name_vec,
paste(comb_names[1:factor_number[j], i], collapse = "+")
)
}
}
data.frame(name_vec, r_sq)
}
Let's have a look how my function works on data :
Norm <- rnorm(100)
Unif <- runif(100)
Exp <- rexp(100)
Pois <- rpois(100,1)
Weib <- rweibull(100,1)
df <- data.frame(Unif, Exp, Pois, Weib)
combination_rsquare(Norm, df)
name_vec r_sq
1 Unif+Exp 0.02727265
2 Unif+Pois 0.02912956
3 Unif+Weib 0.01613404
4 Exp+Pois 0.04853872
5 Exp+Weib 0.03252025
6 Pois+Weib 0.03573252
7 Unif+Exp+Pois 0.05138219
8 Unif+Exp+Weib 0.03571401
9 Unif+Pois+Weib 0.04112936
10 Exp+Pois+Weib 0.06209911
Okay - so we have it! Everything is working! However - If I'm putting very large data frame to my function and adding new features to be calculated (adjusted R.squared, AIC, BIC and so on) it's taking ages! My question is - is there any possibility how can I make this function works faster ? i.e. maybe the double loop can be omitted, or maybe there is R build function for creating such combinations ?
To summarize - How can I make combination_rsquare() to calculate faster ?

How do I run a function for multiple parameters, return an output and have this in a single data table?

I have developed code that calculates a value for a given set of parameters, this works for a single set of parameters.
library(spatstat)
library(ggplot2)
library(dplyr)
library(tidyr)
#Generating a clustered landscape
dim <- 2000
radiusCluster<-100
lambdaParent<-.02
lambdaDaughter<-30
hosts<-900
randmod<-0
numbparents<-rpois(1,lambdaParent*dim)
xxParent<-runif(numbparents,0+radiusCluster,dim-radiusCluster)
yyParent<-runif(numbparents,0+radiusCluster,dim-radiusCluster)
numbdaughter<-rpois(numbparents,(lambdaDaughter))
sumdaughter<-sum(numbdaughter)
theta<-2*pi*runif(sumdaughter)
rho<-radiusCluster*sqrt(runif(sumdaughter))
xx0=rho*cos(theta)
yy0=rho*sin(theta)
xx<-rep(xxParent,numbdaughter)
yy<-rep(yyParent,numbdaughter)
xx<-xx+xx0
yy<-yy+yy0
cds<-data.frame(xx,yy)
is_outlier<-function(x){
x > dim| x < 0
}
cds<-cds[!(is_outlier(cds$xx)|is_outlier(cds$yy)),]
sampleselect<-sample(1:nrow(cds),hosts,replace=F)
cds<-cds%>%slice(sampleselect)
randfunction<-function(x){
x<-runif(length(x),0,dim)
}
randselect<-sample(1:nrow(cds),floor(hosts*randmod),replace=F)
cds[randselect,]<-apply(cds[randselect,],1,randfunction)
landscape<-ppp(x=cds$xx,y=cds$yy,window=owin(xrange=c(0,dim),yrange=c(0,dim)))
ggplot(data.frame(landscape))+geom_point(aes(x=x,y=y))+coord_equal()+theme_minimal()
#Calculating a metric for clustering
kk<-Kest(landscape)
plot(kk)
kk_iso<-kk$iso
kk_pois<-kk$theo
kk_div_na<-kk_iso/kk_pois
kk_div_0<-replace_na(kk_div_na,0)
kk_mean<-round(mean(kk_div_0),3)
So I can say for radiusCluster of 100 and randmod of 0, I get a kk_mean of "value". I want to use radiusCluster and randmod as my variables and run this experiment for a set of these variables. I begin by generating the data table that I want.
random_parameter<-rep(c(0,.5,1),3)
radiusCluster_parameter<-rep(c(100,300,600),each=3)
Cluster_metric<-rep(NA,length(radiusCluster_parameter))
parameter_table<-data.frame(random_parameter,radiusCluster_parameter,Cluster_metric)
colnames(parameter_table)<-c("r", "rho", "sigma")
Here r is randmod, rho is radiusCluster and sigma is kk_mean.
Then I create a function of the above code for generating the clustered landscape and calculating the metric.
cluster_function <- function (dim,
lambdaParent,
lambdaDaughter,
hosts,
randmod,
radiusCluster) {
numbparents <- rpois(1, lambdaParent * dim)
xxParent <- runif(numbparents, 0 + radiusCluster, dim - radiusCluster)
yyParent <- runif(numbparents, 0 + radiusCluster, dim - radiusCluster)
numbdaughter <- rpois(numbparents, (lambdaDaughter))
sumdaughter <- sum(numbdaughter)
theta <- 2 * pi * runif(sumdaughter)
rho <- radiusCluster * sqrt(runif(sumdaughter))
xx0 = rho * cos(theta)
yy0 = rho * sin(theta)
xx <- rep(xxParent, numbdaughter)
yy <- rep(yyParent, numbdaughter)
xx <- xx + xx0
yy <- yy + yy0
cds <- data.frame(xx, yy)
is_outlier <- function(x) {
x > dim | x < 0
}
cds <- cds[!(is_outlier(cds$xx) | is_outlier(cds$yy)), ]
sampleselect <- sample(1:nrow(cds), hosts, replace = F)
cds <- cds %>% slice(sampleselect)
randfunction <- function(x) {
x <- runif(length(x), 0, dim)
}
randselect <- sample(1:nrow(cds), floor(hosts * randmod), replace = F)
cds[randselect, ] <- apply(cds[randselect, ], 1, randfunction)
landscape<-ppp(x=cds$xx,y=cds$yy,window=owin(xrange=c(0,dim),yrange=c(0,dim)))
ggplot(data.frame(landscape))+geom_point(aes(x=x,y=y))+coord_equal()+theme_minimal()
kk<-Kest(landscape)
plot(kk)
kk_iso<-kk$iso
kk_pois<-kk$theo
kk_div_na<-kk_iso/kk_pois
kk_div_0<-replace_na(kk_div_na,0)
kk_mean<-round(mean(kk_div_0),3)
}
I then try running cluster_function for a set of parameters, however, this does not work.
cluster_function(dim <- 2000,
lambdaParent <-.02,
lambdaDaughter<-30,
hosts<-900,
randmod<-0,
radiusCluster<-0)
The parameters are defined in the global environment but nothing happens. So I decide to remove the landscape and ggplot command from the function and call the function to an output. Then hopefully the output will be data frame of the co ordinates that I generated in cds and can be used in a ppp() function and be plottable.
output<-cluster_function(dim <- 2000,
lambdaParent <-.02,
lambdaDaughter<-30,
hosts<-900,
randmod<-0,
radiusCluster<-0)
Output is numeric (empty). How can I get the function to work for the parameters in the cluster_function() and is it possible to run this for multiple parameters? I was thinking something along the lines of:
for (i in length(parameter_table)){
cluster_function(dim <- 2000,
lambdaParent <-.02,
lambdaDaughter<-30,
hosts<-900,
randmod<-parameter_table[i,"r"],
radiusCluster<-parameter_table[i,"rho"])
I then try running cluster_function for a set of parameters, however, this does not work
It looks like it's working to me ;) Do you want the ggplot to be printed? You can addp <- ggplot(...) followed be print(p) to see it (you may need to refresh the plot viewer...).
Output is numeric (empty). How can I get the function to work
Add an explicit return: return(cds)
And you can of course run the function multiple times. A for loop works, or you could check out purrr::pmap() or mapply(). Good luck!

Assign single output for multiple-output function to new function

I have a function that gives me a single output which is however composed of two elements. Example for it would be:
example <- function(x){
sin <- sin(x)
cos <- cos(x)
output <- cbind(sin, cos)
return(output)
}
Now my idea is to plot separately sin and cos, each as functions of x. I would like to avoid writing a separate function in this context since the two objects are better to be calculated all at once.
If I try :
x_grid = seq(0,1,0,0.05)
plot(x_grid, sapply(x_grid, FUN = example[1]))
I get the following error message :
Error in example[1] : object of type 'closure' is not subsettable
How to proceed then? (notice that I use sapply because I need my function to deal with more than a single value of x in my real case).
If you're looking for a non-base graphics solution:
library(ggplot2)
example3 <- function(x){
data.frame(
x = x,
sin = sin(x),
cos = cos(x)
)
}
x_grid=seq(0,1,0.05)
ggplot(data = example3(x_grid),
aes(x=x)) +
geom_line(aes(y = sin), color = "blue") +
geom_line(aes(y = cos), color = "red")
With the output:
Your function is vectorized so you can input a vector and extract each column by example(x_grid)[, "sin"] or example(x_grid)[, "cos"].
example(x_grid)
# sin cos
# [1,] 0.000000000 1.000000000
# [2,] 0.049979169 0.998750260
# [3,] 0.099833417 0.995004165
example(x_grid)[, "sin"]
# [1] 0.000000000 0.049979169 0.099833417 0.149438132 0.198669331
# [6] 0.247403959 0.295520207 0.342897807 0.389418342 0.434965534
Note: In this case, sapply is not recommended because the function itself has been vectorized. sapply will make it inefficient. Here is an illustration by benchmark:
library(microbenchmark)
bm <- microbenchmark(
basic = example(x_grid)[, 1],
sapply = sapply(x_grid, function(x) example(x)[1]),
times = 1000L
)
ggplot2::autoplot(bm)
If you want to plot both the two functions, matplot() can plot each column of one matrix.
x_grid <- seq(0, 10, 0.05)
matplot(x_grid, example(x_grid), type = "l")
Appears to be an extra parameter to seq
x_grid <- seq(0, 1, 0.05)
Slight modification to pass variable to function and then subset
plot(x_grid, sapply(x_grid, function(x) example(x)[1]))
Another approach for function which uses a list and then the function can be subset by name
example2 <- function(x) {
within(list(), {
sin <- sin(x)
cos <- cos(x)
})
}
plot(x_grid, sapply(x_grid, function(x) example2(x)$sin))
Unless the example is simplified, the following works without sapply
plot(x_grid, example2(x_grid)$sin)
Plotting both results
lapply(example2(x_grid), plot, x_grid)

fminsearch on a single variable

Using R's help page example on fminsearch as a starting point:
# Rosenbrock function
rosena <- function(x, a) 100*(x[2]-x[1]^2)^2 + (a-x[1])^2 # min: (a, a^2)
fminsearch(rosena, c(-1.2, 1), a = sqrt(2))
# x = (1.414214 2.000010) , fval = 1.239435e-11
I want to evaluate something like this but with only one variable such as:
rosena <- function(x, a) 100*(x[1]-x[1]^2)^2 + (a-x[1])^2
but when I run
fminsearch(rosena, c(1), a = sqrt(2))
It gives the error: Error in X[2:d1, ] : incorrect number of dimensions
fminsearch seems to want a vector of length greater than or equal to 2, but no less, however for this example, the vector requires length 1
Note: fminsearch is in the "pracma" package
It looks like a bug in the pracma package.
The anms function is dropping a dimension upon a subscript, relevant excerpts:
d <- length(x0) # i.e. 1
d1 <- d + 1 # i.e. 2
...
X <- matrix(0, nrow = d1, ncol = d)
...
X <- X[o, ] # could put drop = FALSE here
I think you should post a bug with the author of the package.

Reqsubsets results differ with coef() for model with linear dependencies

while using Regsubsets from package leaps on data with linear dependencies, I found that results given by coef() and by summary()$which differs. It seems that, when linear dependencies are found, reordering changes position of coefficients and coef() returns wrong values.
I use mtcars just to "simulate" the problem I had with other data. In first example there is no issue of lin. dependencies and best given model by BIC is mpg~wt+cyl and both coef(),summary()$which gives the same result. In second example I add dummy variable so there is possibility of perfect multicollinearity, but variables in this order (dummy in last column) don't cause the problem. In last example after changing order of variables in dataset, the problem finally appears and coef(),summary()$which gives different models. Is there anything incorrect in this approach? Is there any other way to get coefficients from regsubsets?
require("leaps") #install.packages("leaps")
###Example1
dta <- mtcars[,c("mpg","cyl","am","wt","hp") ]
bestSubset.cars <- regsubsets(mpg~., data=dta)
(best.sum <- summary(bestSubset.cars))
#
w <- which.min(best.sum$bic)
best.sum$which[w,]
#
best.sum$outmat
coef(bestSubset.cars, w)
#
###Example2
dta2 <- cbind(dta, manual=as.numeric(!dta$am))
bestSubset.cars2 <- regsubsets(mpg~., data=dta)
(best.sum2 <- summary(bestSubset.cars2))
#
w <- which.min(best.sum2$bic)
best.sum2$which[w,]
#
coef(bestSubset.cars2, w)
#
###Example3
bestSubset.cars3 <- regsubsets(mpg~., data=dta2[,c("mpg","manual","am","cyl","wt","hp")])
(best.sum3 <- summary(bestSubset.cars3))
#
w <- which.min(best.sum3$bic)
best.sum3$which[w,]
#
coef(bestSubset.cars3, w)
#
best.sum2$which
coef(bestSubset.cars2,1:4)
best.sum3$which
coef(bestSubset.cars3,1:4)
The order of vars by summary.regsubsets and regsubsets are different. The generic function coef() of regsubsets calls those two in one function, and the results are in mess if you are trying to force.in or using formula with fixed order. Changing some lines in the coef() function might help. Try codes below, see if it works!
coef.regsubsets <- function (object, id, vcov = FALSE, ...)
{
s <- summary(object)
invars <- s$which[id, , drop = FALSE]
betas <- vector("list", length(id))
for (i in 1:length(id)) {
# added
var.name <- names(which(invars[i, ]))
thismodel <- which(object$xnames %in% var.name)
names(thismodel) <- var.name
# deleted
#thismodel <- which(invars[i, ])
qr <- .Fortran("REORDR", np = as.integer(object$np),
nrbar = as.integer(object$nrbar), vorder = as.integer(object$vorder),
d = as.double(object$d), rbar = as.double(object$rbar),
thetab = as.double(object$thetab), rss = as.double(object$rss),
tol = as.double(object$tol), list = as.integer(thismodel),
n = as.integer(length(thismodel)), pos1 = 1L, ier = integer(1))
beta <- .Fortran("REGCF", np = as.integer(qr$np), nrbar = as.integer(qr$nrbar),
d = as.double(qr$d), rbar = as.double(qr$rbar), thetab = as.double(qr$thetab),
tol = as.double(qr$tol), beta = numeric(length(thismodel)),
nreq = as.integer(length(thismodel)), ier = numeric(1))$beta
names(beta) <- object$xnames[qr$vorder[1:qr$n]]
reorder <- order(qr$vorder[1:qr$n])
beta <- beta[reorder]
if (vcov) {
p <- length(thismodel)
R <- diag(qr$np)
R[row(R) > col(R)] <- qr$rbar
R <- t(R)
R <- sqrt(qr$d) * R
R <- R[1:p, 1:p, drop = FALSE]
R <- chol2inv(R)
dimnames(R) <- list(object$xnames[qr$vorder[1:p]],
object$xnames[qr$vorder[1:p]])
V <- R * s$rss[id[i]]/(object$nn - p)
V <- V[reorder, reorder]
attr(beta, "vcov") <- V
}
betas[[i]] <- beta
}
if (length(id) == 1)
beta
else betas
}
Another solution that works for me is to randomize the order of the column(independent variables) in your dataset before running the regsubsets. The idea is that after reorder hopefully the highly correlated columns will be far apart from each other and will not trigger the reorder behavior in the regsubsets algorithm.

Resources