Why lapply works and apply doesn't? - r

My data:
df_1 <- data.frame(
x = replicate(
n = 3,
expr = runif(n = 30, min = 20, max = 100)
),
y = sample(
x = 1:3, size = 30, replace = TRUE
)
)
The follow code with lapply works:
lapply(X = names(df_1)[c(1:3)], FUN = function(x) {
pairwise.t.test(
x = df_1[, x],
g = df_1[['y']],
p.adj = 'bonferroni'
)
})
But, with apply doesn't:
apply(X = names(df_1)[c(1:3)], MARGIN = 2, FUN = function(x) {
pairwise.t.test(
x = df_1[, x],
g = df_1[['y']],
p.adj = 'bonferroni'
)
})
Error in apply(X = names(df_1)[c(1:3)], MARGIN = 2, FUN = function(x) { :
dim(X) must have a positive length
Why the problem? Are they not equivalent?

For apply you should instead use
apply(X = df_1[1:3], MARGIN = 2, FUN = function(x) {
pairwise.t.test(
x = x,
g = df_1[['y']],
p.adj = 'bonferroni'
)
})
that is because from ?apply
apply returns a vector if MARGIN has length 1 and an array of dimension dim(X)[MARGIN] otherwise.
In your attempt you are using names(df_1)[c(1:3)] as argument to apply which has
dim(names(df_1)[c(1:3)])[2]
#NULL
Hence, you get the error.

Related

Change Error Message to An Instruction for Users

When I run this R code I get Error in order(res2$seed): argument 1 is not a vector as an error message in the function call at first instance but when I change the range of i to be something different like in function call at second instance, I get the expected data frame format that I want.
The Function
abc <- function(a, z, n, ar11, p, d, q, sd = sd, j1, arr1, n_cores){
future::plan(future::multisession)
n_cores <- parallel::detectCores()
cl <- parallel::makeCluster(n_cores)
doParallel::registerDoParallel(cores = n_cores)
message('processing...')
`%dopar%` <- foreach::`%dopar%`
i <- a:z
res <- foreach::foreach(i = a:z, .packages = c('foreach', 'forecast')) %dopar% {
set.seed(i)
mod <- stats::arima.sim(n = n, model = list(ar = c(ar11), order = c(p, d, q)), sd = sd)
best.mod <- forecast::auto.arima(mod, ic = "aicc")
(cf <- best.mod$coef)
if (length(cf) == 0) {
rep(NA, 2)
} else if (all(grepl(c("ar1|intercept"), names(cf))) &
substr(cf["ar1"], 1, j1) %in% arr1) {
c(cf, seed = i)
} else {
rep(NA, 2)
}
}
message(' done!\n')
res1 = res[!sapply(res, anyNA)]
parallel::stopCluster(cl)
options(max.print = .Machine$integer.max)
res2 <- tibble::tibble(Reduce(function(...) merge(..., all = T), lapply(res1, function(x) as.data.frame(t(x)))))
res2[order(res2$seed), ]
res2 <- Reduce(function(...) merge(..., all = T), lapply(res1, function(x) as.data.frame(t(x))))
res2[order(res2$seed), ]
}
Call Function at First Instance
abc(a = 280000, z = 281000, n = 10, p = 1, d = 0, q = 0, ar11 = 0.8, sd = 1, j1 = 4, arr1 = "0.80")
#Error in order(res2$seed) : argument 1 is not a vector
Call Function at Second Instance
abc(a = 289800, z = 289989, n = 10, p = 1, d = 0, q = 0, ar11 = 0.8, sd = 1, j1 = 4, arr1 = "0.80")
#ar1 seed
#1 0.8000000 289805
#2 0.8000368 289989
I want to change Error in order(res2$seed): argument 1 is not a vector when need be to instruction for this R function useers to Try another range of seeds
You can either look before you leap by testing if the seed column exists:
abc <- function(a, z, n, ar11, p, d, q, sd = sd, j1, arr1, n_cores){
# ...code as in OP...
res2 <- tibble::tibble(Reduce(function(...) merge(..., all = T), lapply(res1, function(x) as.data.frame(t(x)))))
if (!("seed" %in% colnames(res2))) {
warning("Try another range of seeds", call. = FALSE)
} else {
res2[order(res2$seed), ]
}
}
abc(a = 280000, z = 281000, n = 10, p = 1, d = 0, q = 0, ar11 = 0.8, sd = 1, j1 = 4, arr1 = "0.80")
# processing...
# done!
#
# Warning message:
# Try another range of seeds
Or ask for forgiveness instead of permission using tryCatch() and suppressWarnings() for a slightly more generic approach:
abc <- function(a, z, n, ar11, p, d, q, sd = sd, j1, arr1, n_cores){
# ...code as in OP...
res2 <- tibble::tibble(Reduce(function(...) merge(..., all = T), lapply(res1, function(x) as.data.frame(t(x)))))
tryCatch(
suppressWarnings(res2[order(res2$seed), ]),
error = \(err) {
if (grepl("argument 1 is not a vector", err$message)) {
warning("Try another range of seeds", call. = FALSE)
} else {
stop(err)
}
}
)
}
abc(a = 280000, z = 281000, n = 10, p = 1, d = 0, q = 0, ar11 = 0.8, sd = 1, j1 = 4, arr1 = "0.80")
# processing...
# done!
#
# Warning message:
# Try another range of seeds
That said, it’s better in my opinion to throw an error than a warning when a function doesn’t return the expected output. Especially if other code will depend on the result of this function. You can throw an error with your desired message by replacing warning() with stop().

How can I get the values of all arguments used when evaluating a call object in a specified environment

say I have a function f as
f = function(x = 1, y, z, t) { x + y + z}
and a list l such
l = list(Y = 2, t = "test")
I can evaluate f in l like
eval(quote(f(y = Y, z = 3)), envir = l)
6
My question is that I'd like to get all the values of the arguments that ends up being used by the function f ie. a function magic that would take a call object and an environment and would return the values of all the arguments that would be used in evaluating the expression.
For instance:
call_obj = quote(f(y = Y, z = 3))
magic(call_obj, envir = l)
# I get a named list which value is list(1,2,3,"test")
# For that matter I do not even need the default arguments values (x)
EDIT: Adding a bounty for a base-r answer (while #Artem Sokolov provided a purrr-rlang one, extracting a couple relevant functions would still be fine though)
tidyverse solution
# Identify the variables in l that can be used to specify arguments of f
args1 <- l[ intersect( names(formals(f)), names(l) ) ]
# Augment the call with these variables
call_obj2 <- rlang::call_modify( call_obj, !!!args1 )
# f(y = Y, z = 3, t = "test")
# Evaluate the arguments of the call in the context of l and combine with defaults
purrr::list_modify( formals(f),
!!!purrr::map(rlang::call_args(call_obj2), eval, l) )
base R solution
# As above
args1 <- l[ intersect( names(formals(f)), names(l) ) ]
# Augment the call with variables in args1
l1 <- modifyList( as.list(call_obj), args1 )[-1]
# Evaluate the arguments in the context of l and combine with defaults
modifyList(formals(f), lapply(l1, eval, l))
Output for both solutions
# $x
# [1] 1
#
# $y
# [1] 2
#
# $z
# [1] 3
#
# $t
# [1] "test"
How about this one:
magic <- function(call_obj, envir) {
call_fun <- as.list(as.call(call_obj))[[1]]
call_obj <- match.call(match.fun(call_fun), as.call(call_obj))
## arguments supplied in call
call_args <- as.list(call_obj)[-1]
## arguments from function definition
fun_args <- formals(match.fun(call_fun))
## match arguments from call with list
new_vals_call <- lapply(call_args, function(x) eval(x, envir = envir))
## match arguments from function definition with list
## because everything (including NULL) can be a valid function argument we cannot directly use mget()
in_list <- sapply(names(fun_args), function(x, env) exists(x, envir = env), as.environment(envir))
new_vals_formals <- mget(names(fun_args), envir = as.environment(envir), ifnotfound = "")[in_list]
## values in the call take precedence over values from the list (can easily be reversed if needed)
new_vals_complete <- modifyList(fun_args, new_vals_formals, keep.null = TRUE)
new_vals_complete <- modifyList(new_vals_complete, new_vals_call, keep.null = TRUE)
## Construct a call object (if you want only the list of arguments return new_vals_complete)
as.call(c(call_fun, new_vals_complete))
}
# -------------------------------------------------------------------------
f <- function(x = 1, y, z, t) { x + y + z}
## Tests
## basic test
magic(quote(f(y = Y, z = 3)), list(Y = 2, t = "test"))
#> f(x = 1, y = 2, z = 3, t = "test")
## precedence (t defined twice)
magic(quote(f(y = Y, z = 3, t=99)), list(Y = 2, t = "test"))
#> f(x = 1, y = 2, z = 3, t = 99)
## missing values (z is missing)
magic(quote(f(y = Y)), list(Y = 2, t = "test"))
#> f(x = 1, y = 2, z = , t = "test")
## NULL values in call
magic(quote(f(y = Y, z = NULL)), list(Y = 2, t = "test"))
#> f(x = 1, y = 2, z = NULL, t = "test")
## NULL values in list
magic(quote(f(y = Y, z = 3)), list(Y = 2, t = NULL))
#> f(x = 1, y = 2, z = 3, t = NULL)
## NULL values and precendece
magic(quote(f(y = Y, z = 3, t= NULL)), list(Y = 2, t = "test"))
#> f(x = 1, y = 2, z = 3, t = NULL)
magic(quote(f(y = Y, z = 3, t=99)), list(Y = 2, t = NULL))
#> f(x = 1, y = 2, z = 3, t = 99)
## call with subcalls
magic(quote(f(y = sin(pi), z = 3)), list(Y = 2, t = "test"))
#> f(x = 1, y = 1.22460635382238e-16, z = 3, t = "test")
magic(quote(f(y = Y, z = 3)), list(Y = sin(pi), t = "test"))
#> f(x = 1, y = 1.22460635382238e-16, z = 3, t = "test")
## call with additional vars (g is not an argument of f) -> error: unused arguments
magic(quote(f(g = Y, z = 3)), list(Y = 2, t = "test"))
## list with with additional vars (g is not an argument of f) -> vars are ignored
magic(quote(f(y = Y, z = 3)), list(Y = 2, t = "test", g=99))
#> f(x = 1, y = 2, z = 3, t = "test")
## unnamed arguments
magic(quote(f(99, y = Y, z = 3)), list(Y = 2, t = "test"))
#> f(x = 99, y = 2, z = 3, t = "test")
magic(quote(f(99, y = Y, 77)), list(Y = 2, t = "test"))
#> f(x = 99, y = 2, z = 77, t = "test")
Strictly Base R... Also supports unnamed arguments in call_obj.
Function definition
magic <- function(call_obj, envir) {
#browser()
# Get all formal args
Formals <- formals(as.character(call_obj))
# fix names of call_obj to allow unnamed args
unnamed <- which(names(call_obj)[-1] == "")
# ignore extra arguments names if too many args (issue a warning?)
unnamed <- unnamed[unnamed <= length(Formals)]
# check for names conflicts
named <- which(names(call_obj)[-1] != "")
if (any(unnamed > named))
stop("Unnamed arguments cannot follow named arguments in call_obj")
if (any(names(Formals)[unnamed] %in% names(call_obj)))
stop("argument names conflicting in call_obj; ",
"avoid unnamed arguments if possible")
names(call_obj)[unnamed + 1] <- names(Formals)[unnamed]
# Replace defaults by call_obj values
for (nn in intersect(names(call_obj), names(Formals))) {
Formals[nn] <- call_obj[nn]
}
# Check for other values in envir
for (mm in names(which(sapply(Formals, class) == "name"))) {
if (mm %in% names(envir))
Formals[mm] <- envir[mm]
else if (Formals[mm] %in% names(envir))
Formals[mm] <- envir[which(names(envir) == Formals[[mm]])]
}
print(as.call(c(as.list(as.call(call_obj))[[1]], Formals)))
return(invisible(Formals))
}
Example
f = function(x = 1, y, z, t) { x + y + z}
l = list(Y = 2, t = "test")
call_obj = quote(f(y = Y, z = 3))
magic(call_obj, envir = l)
Results (printed)
f(x = 1, y = 2, z = 3, t = "test")
Returned object (invisibly, for assignment)
$x
[1] 1
$y
[1] 2
$z
[1] 3
$t
[1] "test"
Although we got there through different ways, all the results from AEF's tests concur with mine.

How execute pairwise.t.test into a list with `for` loop?

My list (lt):
df_1 <- data.frame(
x = replicate(
n = 2,
expr = runif(n = 30, min = 20, max = 100)
),
y = sample(
x = 1:3, size = 30, replace = TRUE
)
)
lt <- split(
x = df_1,
f = df_1[['y']]
)
vars <- names(df_1)[1:2]
I try:
for (i in vars) {
for (i in i) {
print(pairwise.t.test(x = lt[, i], g = lt[['y']], p.adj = 'bonferroni'))
}
}
But, the error message is:
Error in lista[, i] : incorrect number of dimensions
What's problem?
We don't need to split
pairwise.t.test(unlist(df_1[1:2]), g = rep(df_1$y, 2), p.adj = 'bonferroni')
#Pairwise comparisons using t tests with pooled SD
#data: unlist(df_1[1:2]) and rep(df_1$y, 2)
# 1 2
#2 1.00 -
#3 0.91 1.00

define breaks for hist2d in R

is there a simple way to define breaks instead of nbins for a 2d histogram (hist2d) in R?
I want to define the range for the x- and yaxis for a 2D histogram and the number of bins for each dimension.
My example:
# example data
x <- sample(-1:100, 2000, replace=T)
y <- sample(0:89, 2000, replace=T)
# create 2d histogram
h2 <- hist2d(x,y,nbins=c(23,19),xlim=c(-1,110), ylim=c(0,95),xlab='x',ylab='y',main='hist2d')
This results in this 2D histogram output 1
----------------------------
2-D Histogram Object
----------------------------
Call: hist2d(x = x, y = y, nbins = c(23, 19), xlab = "x", ylab = "y",
xlim = c(-1, 110), ylim = c(0, 95), main = "hist2d")
Number of data points: 2000
Number of grid bins: 23 x 19
X range: ( -1 , 100 )
Y range: ( 0 , 89 )
I need
X range: ( -1 , 110 )
Y range: ( 0 , 95 )
instead.
My attempt to define the xlim and ylim only extends the plot but does not define the axis range for the histogram. I know that there would be no data in the additional bins.
Is there a way to define
xbreaks = seq(-1,110,5)
ybreaks = seq(0,95,5)
instead of using nbins which divides the range from minimum to maximum into the given number of bins?
Thank you for your help
I changed the code a little bit and this version should work the with explicitly defining the breaks for both axes. First you have to load the function. Then you can give the x.breaks and y.breaks options with x.breaks=seq(0,10,0.1).
If same.scale is true, you only need x.breaks
The return value addionaly contains the number of bins and the relative counts.
Also, you can include a legend if wanted, by setting legend=TRUE. For that you need to have the package Fields
hist2d_breaks = function (x, y = NULL, nbins = 200,same.scale = FALSE, na.rm = TRUE,
show = TRUE, col = c("black", heat.colors(12)), FUN = base::length,
xlab, ylab,x.breaks,y.breaks, ...)
{
if (is.null(y)) {
if (ncol(x) != 2)
stop("If y is ommitted, x must be a 2 column matirx")
y <- x[, 2]
x <- x[, 1]
}
if (length(nbins) == 1)
nbins <- rep(nbins, 2)
nas <- is.na(x) | is.na(y)
if (na.rm) {
x <- x[!nas]
y <- y[!nas]
}
else stop("missinig values not permitted if na.rm=FALSE")
if(same.scale){
x.cuts = x.breaks;
y.cuts = x.breaks;
}else{
x.cuts <- x.breaks
y.cuts <- y.breaks
}
index.x <- cut(x, x.cuts, include.lowest = TRUE)
index.y <- cut(y, y.cuts, include.lowest = TRUE)
m <- tapply(x, list(index.x, index.y), FUN)
if (identical(FUN, base::length))
m[is.na(m)] <- 0
if (missing(xlab))
xlab <- deparse(substitute(xlab))
if (missing(ylab))
ylab <- deparse(substitute(ylab))
if (show){
if(legend){
image.plot(x.cuts, y.cuts, m, col = col, xlab = xlab, ylab = ylab,
...)
}else{
image(x.cuts, y.cuts, m, col = col, xlab = xlab, ylab = ylab,
...)
}
}
midpoints <- function(x) (x[-1] + x[-length(x)])/2
retval <- list()
retval$counts <- m
retval$counts_rel <- m/max(m)
retval$x.breaks = x.cuts
retval$y.breaks = y.cuts
retval$x = midpoints(x.cuts)
retval$y = midpoints(y.cuts)
retval$nobs = length(x)
retval$bins = c(length(x.cuts),length(y.cuts))
retval$call <- match.call()
class(retval) <- "hist2d"
retval
}
The call of (my data) then brings the following:
hist2d_breaks(df,x.breaks=seq(0,10,1),y.breaks=seq(-10,10,1),legend=TRUE)
brings up the following plot
2D Histogram with breaks
Revise the "hist2d" as follows
hist2d_range<-function (x, y = NULL, nbins = 200, same.scale = TRUE, na.rm = TRUE,
show = TRUE, col = c("black", heat.colors(12)), FUN = base::length,
xlab, ylab,range=NULL, ...)
{
if (is.null(y)) {
if (ncol(x) != 2)
stop("If y is ommitted, x must be a 2 column matirx")
y <- x[, 2]
x <- x[, 1]
}
if (length(nbins) == 1)
nbins <- rep(nbins, 2)
nas <- is.na(x) | is.na(y)
if (na.rm) {
x <- x[!nas]
y <- y[!nas]
}
else stop("missinig values not permitted if na.rm=FALSE")
if (same.scale) {
if(is.null(range))
{
x.cuts <- seq(from = min(x, y), to = max(x, y), length = nbins[1] +
1)
y.cuts <- seq(from = min(x, y), to = max(x, y), length = nbins[2] +
1)
}else{
x.cuts <- seq(from = range[1], to = range[2], length = nbins[1] + 1)
y.cuts <- seq(from = range[1], to = range[2], length = nbins[1] + 1)
}
}
else {
x.cuts <- seq(from = min(x), to = max(x), length = nbins[1] +
1)
y.cuts <- seq(from = min(y), to = max(y), length = nbins[2] +
1)
}
index.x <- cut(x, x.cuts, include.lowest = TRUE)
index.y <- cut(y, y.cuts, include.lowest = TRUE)
m <- tapply(x, list(index.x, index.y), FUN)
if (identical(FUN, base::length))
m[is.na(m)] <- 0
if (missing(xlab))
xlab <- deparse(substitute(xlab))
if (missing(ylab))
ylab <- deparse(substitute(ylab))
if (show)
image(x.cuts, y.cuts, m, col = col, xlab = xlab, ylab = ylab,
...)
midpoints <- function(x) (x[-1] + x[-length(x)])/2
retval <- list()
retval$counts <- m
retval$x.breaks = x.cuts
retval$y.breaks = y.cuts
retval$x = midpoints(x.cuts)
retval$y = midpoints(y.cuts)
retval$nobs = length(x)
retval$call <- match.call()
class(retval) <- "hist2d"
retval
}
This function has an additional argument "range".
The revised point is as follows.
if(is.null(range))
{
x.cuts <- seq(from = min(x, y), to = max(x, y), length = nbins[1] +
1)
y.cuts <- seq(from = min(x, y), to = max(x, y), length = nbins[2] +
1)
}else{
x.cuts <- seq(from = range[1], to = range[2], length = nbins[1] + 1)
y.cuts <- seq(from = range[1], to = range[2], length = nbins[1] + 1)
}

How to modify a function for different dim?

I have got a function which was used for his data dim(1000*1000).My data are the same but with different dim (500*1300).How Can I adapt the function to my dims?
image.arr = array(dim = c(1000, 1000, 20)))
interpolated.lst = vector(mode = "list", length = 1000)
system.time(
{
for(i in 1:1200){
interpolated.lst[[i]] =
apply(image.arr[i, , ], 1,
FUN = function(x){
imageSpline(x = dates, y = x, xout = 1:365)$y
}
)
}
}
)
The code uses apply to go over the rows of the images, hence only the width needs to be provided. Just replace this:
interpolated.lst = vector(mode = "list", length = nrow(image.arr))
system.time(
for(i in seq_len(nrow(image.arr))) {
interpolated.lst[[i]] =
apply(image.arr[i, , ], 1,
FUN = function(x) imageSpline(x = dates, y = x, xout = 1:365)$y)
})

Resources