match.fun provide error with functions defined inside functions - r

I get error when try to apply match.fun to the functions define within other functions.
x <- matrix(rnorm(10*100), nrow=100) # data sample
descStats <- function(x, stats = c("n", "min", "max", "srange", "mean", "median", "sd")) {
n <- function(x, ...) sum(!is.na(x), ...)
srange <- function(x, ...) max(x, ...) - min(x, ...)
fun <- function(x) {
result <- vapply(stats, function(z) match.fun(z)(x, na.rm=TRUE), FUN.VALUE=numeric(1))
}
if (is.vector(x)) {
result <- fun(x)
}
if (is.matrix(x) || is.data.frame(x)) {
result <- t(apply(x, 2, fun))
}
return(result)
}
descStats(x)
## Error in get(as.character(FUN), mode = "function", envir = envir) :
## object 'n' of mode 'function' was not found
If I define n and srange outside of descStats function it works fine.
n <- function(x, ...) sum(!is.na(x), ...)
srange <- function(x, ...) max(x, ...) - min(x, ...)
descStats2 <- function(x, stats = c("n", "min", "max", "srange", "mean", "median", "sd")) {
fun <- function(x) {
result <- vapply(stats, function(z) match.fun(z)(x, na.rm=TRUE), FUN.VALUE=numeric(1))
}
if (is.vector(x)) {
result <- fun(x)
}
if (is.matrix(x) || is.data.frame(x)) {
result <- t(apply(x, 2, fun))
}
return(result)
}
descStats2(x)
## n min max srange mean median sd
## [1,] 100 -2.303839 2.629366 4.933205 0.03711611 0.14566523 1.0367947
## [2,] 100 -1.968923 2.169382 4.138305 -0.03917503 0.02239458 0.9048509
## [3,] 100 -2.365891 2.424077 4.789968 -0.08012138 -0.23515910 1.0438133
## [4,] 100 -2.740045 2.127787 4.867832 0.03978241 0.15363449 0.9778891
## [5,] 100 -1.598295 2.603525 4.201820 0.23796616 0.16376239 1.0428915
## [6,] 100 -1.550385 1.684155 3.234540 -0.11114479 -0.09264598 0.8260126
## [7,] 100 -2.438641 3.268796 5.707438 0.02948100 -0.05594740 1.0481331
## [8,] 100 -1.716407 2.795340 4.511747 0.22463606 0.16296613 0.9555129
## [9,] 100 -2.359165 1.975993 4.335158 -0.33321888 -0.17580933 0.9784788
## [10,] 100 -2.139267 2.838986 4.978253 0.15540182 0.07803265 1.0149671
Another way it's use eval(call(FUN, args)). For instance.
descStats3 <- function(x, stats = c("n", "min", "max", "srange", "mean", "median", "sd")) {
n <- function(x, ...) sum(!is.na(x), ...)
srange <- function(x, ...) max(x, ...) - min(x, ...)
fun <- function(x) {
result <- vapply(stats, function(z) eval(call(z, x, na.rm=TRUE)), FUN.VALUE=numeric(1))
}
if (is.vector(x)) {
result <- fun(x)
}
if (is.matrix(x) || is.data.frame(x)) {
result <- t(apply(x, 2, fun))
}
return(result)
}
descStats3(x)
## n min max srange mean median sd
## [1,] 100 -2.303839 2.629366 4.933205 0.03711611 0.14566523 1.0367947
## [2,] 100 -1.968923 2.169382 4.138305 -0.03917503 0.02239458 0.9048509
## [3,] 100 -2.365891 2.424077 4.789968 -0.08012138 -0.23515910 1.0438133
## [4,] 100 -2.740045 2.127787 4.867832 0.03978241 0.15363449 0.9778891
## [5,] 100 -1.598295 2.603525 4.201820 0.23796616 0.16376239 1.0428915
## [6,] 100 -1.550385 1.684155 3.234540 -0.11114479 -0.09264598 0.8260126
## [7,] 100 -2.438641 3.268796 5.707438 0.02948100 -0.05594740 1.0481331
## [8,] 100 -1.716407 2.795340 4.511747 0.22463606 0.16296613 0.9555129
## [9,] 100 -2.359165 1.975993 4.335158 -0.33321888 -0.17580933 0.9784788
## [10,] 100 -2.139267 2.838986 4.978253 0.15540182 0.07803265 1.0149671
identical(descStats2(x), descStats3(x))
## [1] TRUE
Why descStats not work?

It's relatively easy (and illustrative) to write your own version of match.fun. I've called my function fget to indicate that it's a version of get specifically designed for functions, and hence obeys the regular scoping rules for functions. (If you're not sure what they are, think about this code: c <- 10; c(c, 5))
#' Find a function with specified name.
#'
#' #param name length one character vector giving name
#' #param env environment to start search in.
#' #examples
#' c <- 10
#' fget("c")
fget <- function(name, env = parent.frame()) {
if (identical(env, emptyenv())) {
stop("Could not find function called ", name, call. = FALSE)
}
if (exists(name, env, inherits = FALSE) && is.function(env[[name]])) {
env[[name]]
} else {
fget(name, parent.env(env))
}
}
The implementation is as a straightforward recursive function: the base case is the emptyenv(), the eventual ancestor of every environment, and for each environment along the stack of parents, we check to see that both an object called name exists, and that it is a function.
It works in the simple test case provided by #nograpes because the environment defaults to the calling environment:
fun <- function(x) {
n <- sum
fget('n')(x)
}
fun(10)
# [1] 10

it is a scope problem. Looking in the code of match.fun you get the answer.
match.fun scope is the envir <- parent.frame(2)
get scope is in the envir = as.environment(-1) = parent.frame(1)
I think we can't pass the envir as an argument.
One solution is to use get as presented by #nograpes ( unsafe) or to hack match.fun and change
envir <- parent.frame(2) to envir <- parent.frame(1)

For reasons I don't completely understand yet, if you use get instead of match.fun, everything works fine.
x <- matrix(rnorm(10*100), nrow=100) # data sample
descStats <- function(x, stats = c("n", "min", "max", "srange", "mean", "median", "sd")) {
n <- function(x, ...) sum(!is.na(x), ...)
srange <- function(x, ...) max(x, ...) - min(x, ...)
fun <- function(x) {
# get added here.
result <- vapply(stats, function(z) get(z)(x, na.rm=TRUE), FUN.VALUE=numeric(1))
}
if (is.vector(x)) {
result <- fun(x)
}
if (is.matrix(x) || is.data.frame(x)) {
result <- t(apply(x, 2, fun))
}
return(result)
}
descStats(x)

Related

decorate a function to count the number of times it gets called while preserving the original functions args

I want to write a decorator function that adds a counter to a function, counting the number of times it was called. E.g.
foo <- function(x) {x}
foo <- counter_decorator(foo)
foo(1)
foo(1)
# => the counter gets incremented with each call and has the value 2 now
The approach below basically works, but:
I want the inner function (which is returned by the decorator) to have the same formal args as the original function and not just ellipsis (i.e. ...). I am not sure how to accomplish that. Any ideas?
Not sure if the whole approach is a good one. Alternatives or improvements are appreciated.
Here is what I did so far:
# Init or reset counter
counter_init <- function() {
.counters <<- list()
}
# Decorate a function with a counter
#
# Each time the function is called the counter is incremented
#
# fun: function to be decorated
# fun_name: name in .counters list to store number of times in
#
counter_decorator <- function(fun, fun_name = NULL)
{
# use function name if no name is passed explicitly
if (is.null(fun_name)) {
fun_name <- deparse(substitute(fun))
}
fun <- force(fun) # deep copy to prevent infinite recursion
function(...) { # ==> ellipsis not optimal!
n <- .counters[[fun_name]]
if (is.null(n)) {
n <- 0
}
.counters[[fun_name]] <<- n + 1
fun(...)
}
}
Now let's create some functions and decorate them.
library(dplyr) # for pipe
# Create functions and decorate them with a counter
# create and decorate in second call
add_one <- function(x) {
x + 1
}
add_one <- counter_decorator(add_one)
# create and decorate the piping way by passing the fun_name arg
add_two <- {function(x) {
x + 2
}} %>% counter_decorator(fun_name = "add_two")
mean <- counter_decorator(mean)
counter_init()
for (i in 1:100) {
add_one(1)
add_two(1)
mean(1)
}
What we get in the .counters list is
> .counters
$add_one
[1] 100
$add_two
[1] 100
$mean
[1] 100
which is basically what I want.
1) The trace command can be used. Use untrace to undo the trace or set .counter to any desired value to start over again from that value.
f <- function(x) x
trace(f, quote(.counter <<- .counter + 1), print = FALSE)
.counter <- 0
f(1)
## [1] 1
f(1)
## [1] 1
.counter
## [1] 2
2) This variation stores the counter in an attribute of f.
f <- function(x) x
trace(f, quote(attr(f, "counter") <<- attr(f, "counter") + 1), print = FALSE)
attr(f, "counter") <- 0
f(1)
## [1] 1
f(1)
## [1] 1
attr(f, "counter")
## [1] 2
3) This variation stores the counter in an option.
f <- function(x) x
trace(f, quote(options(counter = getOption("counter", 0) + 1)), print = FALSE)
f(1)
## [1] 1
f(1)
## [1] 1
getOption("counter")
## [1] 2
This method stores the counter within the wrapper function itself instead of somewhere in the users environment or package environment. (There's nothing wrong with the latter; the former can be problematic or at least annoying/discourteous.)
The biggest side-effect (liability?) of this is when the package is detached or reloaded (i.e., during development), then the counter list is cleared/re-initialized.
counter_decorator <- function(fun) {
.counter <- 0L
fun2 <- function(...) {
.counter <<- .counter + 1L
cl <- match.call()
cl[[1]] <- fun
eval.parent(cl)
}
formals(fun2) <- formals(args(fun))
fun2
}
Demo:
foo <- function(x, y) x + y
foo2 <- counter_decorator(foo)
get(".counter", envir = environment(foo2))
# [1] 0
foo2(5, 9)
# [1] 14
foo2(5, 11)
# [1] 16
foo2(5, 13)
# [1] 18
get(".counter", envir = environment(foo2))
# [1] 3
Same formals:
formals(foo)
# $x
# $y
formals(foo2)
# $x
# $y
Edited (twice) to better track primitives where formals(.) is NULL; in that case, we can use formals(args(fun)).
Adapted for your preferred methodology, albeit with a little poetic liberty:
counters <- local({
.counters <- list()
function(init = FALSE) {
out <- .counters # will return counters *before* initialization
if (init) .counters <<- list()
out
}
})
counter_decorator <- function(fun, fun_name) {
if (missing(fun_name)) {
fun_name <- deparse(substitute(fun))
}
count <- get(".counters", envir = environment(counters))
count[[fun_name]] <- 0L
assign(".counters", count, envir = environment(counters))
fun2 <- function(...) {
.count <- get(".counters", envir = environment(counters))
.count[[fun_name]] <- if (is.null(.count[[fun_name]])) 1L else .count[[fun_name]] + 1L
assign(".counters", .count, envir = environment(counters))
cl <- match.call()
cl[[1]] <- fun
eval.parent(cl)
}
formals(fun2) <- formals(args(fun))
fun2
}
add_one <- function(x) {
x + 1
}
add_one <- counter_decorator(add_one)
add_two <- {function(x) {
x + 2
}} %>% counter_decorator(fun_name = "add_two")
new_mean <- counter_decorator(mean)
for (i in 1:100) {
add_one(1)
add_two(1)
new_mean(1)
}
counters()
# $add_one
# [1] 100
# $add_two
# [1] 100
# $mean
# [1] 100
formals(new_mean)
# $x
# $...
Initialization is not strictly required. Re-initialization returns the counters before reinitializing, so you don't need a double-call to get the values and then reset (and if you don't care about previous values, just ignore its return).
counters(TRUE)
# $add_one
# [1] 100
# $add_two
# [1] 100
# $mean
# [1] 100
counters()
# list()
add_one(10)
# [1] 11
counters()
# $add_one
# [1] 1

r - apply a function on data n number of times

I would like to apply the same function certain number of times on a vector using the output from the function every time.
A simplified example with a simple function just to demonstrate:
# sample vector
a <- c(1,2,3)
# function to be applied n times
f1 <- function(x) {
x^2 + x^3
}
I would like to apply f1 on a, n number of times, for example here lets say 3 times.
I heard purrr::reduce or purrr::map() might be a good idea for this but couldn't make it work.
The desired output if n = 3 would be equal to f1(f1(f1(a))).
Let's use Reduce (no external library requirements, generally good performance). I'll modify the function slightly to accept a second (ignored) argument:
f1 <- function(x, ign) x^2 + x^3
Reduce(f1, 1:3, init = a)
# [1] 1.872000e+03 6.563711e+09 1.102629e+14
Here's what's happening. Reduce:
uses a binary function to successively combine the elements of a given vector and a possibly given initial value.
The first argument is the function to use, and it should accept two arguments. The first is the value from the previous execution of the function in this reduction. On the first call of the function, it uses the init= value provided.
First call:
f1(c(1,2,3), 1)
# [1] 2 12 36
Second call:
f1(c(2,12,36), 2)
# [1] 12 1872 47952
Third call:
f1(c(12,1872,47952), 3)
# [1] 1.872000e+03 6.563711e+09 1.102629e+14
The second argument 1:3 is used just for its length. Anything of the proper length will work.
If you don't want to redefine f1 just for this reduction, you can always do
Reduce(function(a,ign) f1(a), ...)
Benchmark:
library(microbenchmark)
r <- Reduce(function(a,b) call("f1", a), 1:3, init=quote(a))
triple_f1 <- function(a) f1(f1(f1(a)))
microbenchmark::microbenchmark(
base = Reduce(function(a,ign) f1(a), 1:3, a),
accum = a %>% accumulate(~ .x %>% f1, .init = f1(a)) %>% extract2(3),
reduc = purrr::reduce(1:3, function(a,ign) f1(a), .init=a),
whil = {
i <- 1
a <- c(1,2,3)
while (i < 10) {
i <- i + 1
a <- f1(a)
}
},
forloop = {
out <- a
for(i in seq_len(3)) out <- f1(out)
},
evaluated = {
r <- Reduce(function(a,b) call("f1", a), 1:3, init=quote(a))
eval(r)
},
precompiled = eval(r),
anotherfun = triple_f1(a)
)
# Unit: microseconds
# expr min lq mean median uq max neval
# base 5.101 7.3015 18.28691 9.3010 10.8510 848.302 100
# accum 294.201 328.4015 381.21204 356.1520 402.6510 823.602 100
# reduc 27.000 38.1005 57.55694 45.2510 54.2005 747.401 100
# whil 1717.300 1814.3510 1949.03100 1861.8510 1948.9510 2931.001 100
# forloop 1110.001 1167.1010 1369.87696 1205.5010 1292.6500 9935.501 100
# evaluated 6.702 10.2505 22.18598 13.3015 15.5510 715.301 100
# precompiled 2.300 3.2005 4.69090 4.0005 4.5010 26.800 100
# anotherfun 1.400 2.0515 12.85201 2.5010 3.3505 1017.801 100
i <- 1
while (i < 10) {
i <- i + 1
x <- f(x)
}
Here is an option with accumulate
library(tidyverse)
n <- 3
a %>%
accumulate(~ .x %>%
f1, .init = f1(a)) %>%
extract2(n)
#[1] 1.872000e+03 6.563711e+09 1.102629e+14
NOTE: accumulate is similar to the base R option Reduce with accumulate = TRUE
checking with the OP's output
f1(f1(f1(a)))
#[1] 1.872000e+03 6.563711e+09 1.102629e+14
Or use a for loop (no external libraries used)
out <- a
for(i in seq_len(n)) out <- f1(out)
out
#[1] 1.872000e+03 6.563711e+09 1.102629e+14
Here's another way to do it with Reduce:
setting the stage
a <- 1:3
f1 <- function(x) x^2 + x^3
constructing a call and evaluating it
N <- 3 # how many times?
r <- Reduce(function(a,b) call("f1", a), rep(NA, N), init=a)
# f1(f1(f1(1:3)))
eval(r)
# [1] 1.872000e+03 6.563711e+09 1.102629e+14
alternative 2
# N defined as above
Reduce(function(x,y) y(x), replicate(N,f1), init=a)
# [1] 1.872000e+03 6.563711e+09 1.102629e+14
alternative 3 (recursive with a global-like variable)
doit <- function(N) {
i <- 0
function(fun, x){
i <<- i +1
if(i < N) Recall(fun, fun(x)) else fun(x)
}
}
doit(3)(f1, a)
# [1] 1.872000e+03 6.563711e+09 1.102629e+14
... or even
doit <- function(N, fun, x) (function(fun, x)
if((N <<- N - 1) > 0)
Recall(fun, fun(x)) else
fun(x))(fun, x)
doit(3, f1, a)
# [1] 1.872000e+03 6.563711e+09 1.102629e+14

passing ellipsis arguments to map function purrr package, R

I want to use ellipsis parameters inside map function of purrr package. this is a toy example:
f1<-function(x,a=NA,b=NA,prs=seq(0, 1, 0.25),SW=T){
if(SW){
res<-data.frame(name1=a,name2=b,t(quantile(x, prs, na.rm = T)), mean=mean(x, na.rm = T), sd=sd(x, na.rm = T),
NAs=length(x[is.na(x)]),n=length(x[!is.na(x)]),SWp=shapiro.test(x)$p.value,stringsAsFactors =F)
}else
{
res<-data.frame(name1=a,name2=b,t(quantile(x, prs, na.rm = T)), mean=mean(x, na.rm = T), sd=sd(x, na.rm = T),
NAs=length(x[is.na(x)]),n=length(x[!is.na(x)]),stringsAsFactors =F)
}
return(res)
}
f1(c(NA,rnorm(25),NA),SW=F)
f1(c(NA,rnorm(25),NA))
now I want to use f1 inside another function f2:
f2<-function(df,...){
res<-map_df(colnames(df),~f1(df[,.],a=.,...))
return(res)
}
where ... is intended mainly to manipulate SW and a or b parameters in f1 function. however f2 is not doing what I want as can be seen here
f2(iris[,-5])
f2(iris[,-5],SW=F)
I appreciate any guide in how to use addecuatelly ... inside map
You just need to pass the ellipses through the map_df() call as well. Otherwise they can't get into the inner f1() call.
f2 <- function(df, ...){
res <- map_df(colnames(df), ~f1(df[,.], a=., ...), ...)
return(res)
}
You can also capture the ellipses early on in your second function, and use do.call to add them to your first function later on. This makes it more explicit where and how they are used.
f2 <- function(df, ...){
params <- list(...)
res <- map_df(colnames(df), ~ do.call(
f1, c(list(x = df[,.], a=.), params)))
return(res)
}
MrFlick solution did not work for me: I think indeed you also need to pass the ... to the anonymous function, which then requires using function(x,...) instead of ~ (as suggested by #dmi3kno).
That means you need the quite surprising triple ... call:
map(x, function(x, ...) mean(x, trim=0, ...), ...)
Example:
library(purrr)
x <- list(c(1,2), c(1,2,NA))
fo1 <- function(...) map(x, ~mean(., trim=0, ...), ...)
fo2 <- function(...) map(x, function(x, ...) mean(x, trim=0, ...), ...)
fo1()
#> Warning in if (na.rm) x <- x[!is.na(x)]: the condition has length > 1 and only
#> the first element will be used
#> Warning in if (na.rm) x <- x[!is.na(x)]: the condition has length > 1 and only
#> the first element will be used
#> [[1]]
#> [1] 1.5
#>
#> [[2]]
#> [1] 1.5
fo2()
#> [[1]]
#> [1] 1.5
#>
#> [[2]]
#> [1] NA
fo2(na.rm=TRUE)
#> [[1]]
#> [1] 1.5
#>
#> [[2]]
#> [1] 1.5
Created on 2020-11-16 by the reprex package (v0.3.0)
For this issue, I've found that rlang::exec() allows you to pass ... to purrr::map() when combined with an anonymous function, like this:
f2 <- function(df, ...){
res <- map(colnames(df), function(x) rlang::exec("f1", df[,x], ...))
return(res)
}

Subsetting with negative indices: best practices?

Say I have a function for subsetting (this is just a minimal example):
f <- function(x, ind = seq(length(x))) {
x[ind]
}
(Note: one could use only seq(x) instead of seq(length(x)), but I don't find it very clear.)
So, if
x <- 1:5
ind <- c(2, 4)
ind2 <- which(x > 5) # integer(0)
I have the following results:
f(x)
[1] 1 2 3 4 5
f(x, ind)
[1] 2 4
f(x, -ind)
[1] 1 3 5
f(x, ind2)
integer(0)
f(x, -ind2)
integer(0)
For the last result, we would have wanted to get all x, but this is a common cause of error (as mentionned in the book Advanced R).
So, if I want to make a function for removing indices, I use:
f2 <- function(x, ind.rm) {
f(x, ind = `if`(length(ind.rm) > 0, -ind.rm, seq(length(x))))
}
Then I get what I wanted:
f2(x, ind)
[1] 1 3 5
f2(x, ind2)
[1] 1 2 3 4 5
My question is:
Can I do something cleaner and that doesn't need passing seq(length(x)) explicitly in f2 but using directly the default value of f's parameter ind when ind.rm is integer(0)?
If you anticipate having "empty" negative indices a lot, you can get a performance improvement for these cases if you can avoid the indexing used by x[seq(x)] as opposed to just x. In other words, if you are able to combine f and f2 into something like:
new_f <- function(x, ind.rm){
if(length(ind.rm)) x[-ind.rm] else x
}
There will be a huge speedup in the case of empty negative indices.
n <- 1000000L
x <- 1:n
ind <- seq(0L,n,2L)
ind2 <- which(x>n+1) # integer(0)
library(microbenchmark)
microbenchmark(
f2(x, ind),
new_f(x, ind),
f2(x, ind2),
new_f(x, ind2)
)
all.equal(f2(x, ind), new_f(x, ind)) # TRUE - same result at about same speed
all.equal(f2(x, ind2), new_f(x, ind2)) # TRUE - same result at much faster speed
Unit: nanoseconds
expr min lq mean median uq max neval
f2(x, ind) 6223596 7377396.5 11039152.47 9317005 10271521 50434514 100
new_f(x, ind) 6190239 7398993.0 11129271.17 9239386 10202882 59717093 100
f2(x, ind2) 6823589 7992571.5 11267034.52 9217149 10568524 63417978 100
new_f(x, ind2) 428 1283.5 5414.74 6843 7271 14969 100
What you have isn't bad, but if you want to avoid passing the default value of a default argument you could restructure like this:
f2 <- function(x, ind.rm) {
`if`(length(ind.rm) > 0, f(x,-ind.rm), f(x))
}
which is slightly shorter than what you have.
On Edit
Based on the comments, it seems you want to be able to pass a function nothing (rather than simply not pass at all), so that it uses the default value. You can do so by writing a function which is set up to receive nothing, also known as NULL. You can rewrite your f as:
f <- function(x, ind = NULL) {
if(is.null(ind)){ind <- seq(length(x))}
x[ind]
}
NULL functions as a flag which tells the receiving function to use a default value for the parameter, although that default value must be set in the body of the function.
Now f2 can be rewritten as
f2 <- function(x, ind.rm) {
f(x, ind = `if`(length(ind.rm) > 0, -ind.rm, NULL))
}
This is slightly more readable than what you have, but at the cost of making the original function slightly longer.
To implement "parameter1 = if(cond1) then value1 else default_value_of_param1", I used formals to get default parameters as a call:
f <- function(x, ind.row = seq_len(nrow(x)), ind.col = seq_len(ncol(x))) {
x[ind.row, ind.col]
}
f2 <- function(x, ind.row.rm = integer(0), ind.col.rm = integer(0)) {
f.args <- formals(f)
f(x,
ind.row = `if`(length(ind.row.rm) > 0, -ind.row.rm, eval(f.args$ind.row)),
ind.col = `if`(length(ind.col.rm) > 0, -ind.col.rm, eval(f.args$ind.col)))
}
Then:
> x <- matrix(1:6, 2)
> f2(x, 1:2)
[,1] [,2] [,3]
> f2(x, , 1:2)
[1] 5 6
> f2(x, 1, 2)
[1] 2 6
> f2(x, , 1)
[,1] [,2]
[1,] 3 5
[2,] 4 6
> f2(x, 1, )
[1] 2 4 6
> f2(x)
[,1] [,2] [,3]
[1,] 1 3 5
[2,] 2 4 6

big64 - sum() on a vector of NA produces odd results

When using big64 package, summing a vector of NAs to another vector of integers yields an inaccurate result. Depending on whether the NA vector is summed first or last, the results will be either 0 or twice the correct answer, respectively.
Notice that converting the NA vector away from integer64 will remove the issue.
However, when experimenting with other small values in place of y, the results were awfully strange.
For example:
40 + 35 = 75 but
35 + 40 = 80
Any thoughts as to what is going on?
EXAMPLE:
library(bit64)
x <- as.integer64(c(20, 20))
y <- as.integer64(c(NA, NA))
sum(y, x, na.rm=TRUE)
# integer64
# [1] 80 # <~~~ Twice the correct value
sum(x, y, na.rm=TRUE)
# integer64
# [1] 0 # <~~~~ Incorrect 0. Should be 40.
## Removing the NAs does not help.
y <- y[!is.na(y)]
## A vector of 0's gives the same issue
y <- as.integer64(c(0, 0))
## Same results
sum(y, x, na.rm=TRUE)
# integer64
# [1] 80
sum(x, y, na.rm=TRUE)
# integer64
# [1] 0
## Converting to numeric does away with the issue (but is not a viable workaround, for obvious reasons)
y <- as.numeric(y)
sum(y, x, na.rm=TRUE)
# [1] 1.97626e-322
sum.integer64(y, x, na.rm=TRUE)
# integer64
# [1] 40
sum(x, y, na.rm=TRUE)
# integer64
# [1] 40
Give y a single value, and the results are also very out of place
y <- as.integer64(c(35, NA, NA))
sum.integer64(x, if (!all(is.na(y))) removeNA(y), na.rm=TRUE)
sum.integer64(x, y[[1]], na.rm=TRUE)
sum.integer64(y[[1]], x, na.rm=TRUE)
## No NA's present
sum.integer64(as.integer64(35), x)
# integer64
# [1] 80
sum.integer64(x, as.integer64(35))
# integer64
# [1] 70
Not an answer, but an exploration. Hope it might help you.
From the sum.integer64 function of the bit64 package:
function (..., na.rm = FALSE)
{
l <- list(...)
ret <- double(1)
if (length(l) == 1) {
.Call("sum_integer64", l[[1]], na.rm, ret)
oldClass(ret) <- "integer64"
ret
}
else {
ret <- sapply(l, function(e) {
if (is.integer64(e)) {
.Call("sum_integer64", e, na.rm, ret)
ret
}
else {
as.integer64(sum(e, na.rm = na.rm))
}
})
oldClass(ret) <- "integer64"
sum(ret, na.rm = na.rm)
}
}
Here is your example:
library(bit64)
x <- as.integer64(c(20, 20))
y <- as.integer64(c(NA, NA))
na.rm <- TRUE
l <- list(y, x)
ret <- double(1)
ret
#[1] 0
# We use the sapply function as in the function:
ret <- sapply(l, function(e) { .Call("sum_integer64", e, na.rm, ret) })
oldClass(ret) <- "integer64"
ret
#integer64
#[1] 40 40 <-- twice the value "40"
sum(ret, na.rm = na.rm)
# integer64
#[1] 80 <-- twice the expected value, as you said
Here we decompose the calculation, for each vector:
ret <- double(1)
ret2 <- NULL
ret2[1] <- .Call("sum_integer64", y, na.rm, ret)
ret2[2] <- .Call("sum_integer64", x, na.rm, ret)
oldClass(ret2) <- "integer64"
ret2
#integer64
#[1] 0 40 <-- only once the value "40", and "0" because of NaNs
sum(ret2, na.rm = na.rm)
#integer64
#[1] 40 <- expected value

Resources