When using ls() in a function, it lists arguments of the function even if they've not been evaluated yet (even if they are missing from the call with no default value).
fun <- function(x,y,z,m){
a <- 1
y <- 1
force(z)
print(ls())
mget(ls())
}
fun(i,j,42)
# [1] "a" "m" "x" "y" "z"
Error in mget(ls()) : object 'i' not found
How can I list only evaluated variables ?
In that case I would be happy with a modified list giving either of :
# [1] "a" "y" "z"
# [1] "a" "y"
Alternatively (or additionally), a logical list telling me if arguments have been evaluated (or overwritten) would be great : in that case list(x = FALSE, y = TRUE, z = TRUE, m = FALSE)
Well, this is kind of close, there is a is_promise function in pryr. It expects a symbol but the unexported version is_promise2 can take a name. So something like this maybe
fun <- function(x,y,z,m){
a <- 1
y <- 1
force(z)
mget(ls()[!sapply(ls(), pryr:::is_promise2, environment())])
}
fun(i, j, 42)
which at least gets rid of the message about i. But doesn't seem to capture x. But just like is_promise2 does, I think you're going to have to dip into c/c++ land to find out information about evaluation/promise status because I think R tries to hide most of that from the user.
MrFlick's answer is what I was looking for, additional relevant information can be gathered using the function below, which is wrapped around trace for ease of use.
Better sample data
defined_in_global <- 1
enclosing_fun <- function(){
defined_in_enclos <- quote(qux)
function(not_evaluated,
overridden = "bar",
forced = "baz",
defined_in_global,
defined_in_enclos,
missing_with_default = 1,
missing_overriden,
missing_absent){
overridden <- TRUE
missing_overridden <- "a"
new_var <- 1
}
}
How to use, without trying to evaluate
fun <- enclosing_fun()
diagnose_vars(fun)
fun(not_evaluated = foo)
#> Tracing fun(not_evaluated = foo) on exit
#> name evaluable type is_formal missing absent_from_call is_promise has_default_value default_value called_with_value exists_in_parent exists_in_enclos
#> 1 not_evaluated FALSE <NA> TRUE FALSE FALSE TRUE FALSE NA foo FALSE FALSE
#> 2 overridden TRUE logical TRUE FALSE TRUE FALSE TRUE "bar" <NA> FALSE FALSE
#> 3 forced FALSE <NA> TRUE TRUE TRUE TRUE TRUE "baz" <NA> FALSE FALSE
#> 4 defined_in_global FALSE <NA> TRUE TRUE TRUE FALSE FALSE NA <NA> TRUE TRUE
#> 5 defined_in_enclos FALSE <NA> TRUE TRUE TRUE FALSE FALSE NA <NA> FALSE TRUE
#> 6 missing_with_default FALSE <NA> TRUE TRUE TRUE TRUE TRUE 1 <NA> FALSE FALSE
#> 7 missing_overriden FALSE <NA> TRUE TRUE TRUE FALSE FALSE NA <NA> FALSE FALSE
#> 8 missing_absent FALSE <NA> TRUE TRUE TRUE FALSE FALSE NA <NA> FALSE FALSE
#> 9 missing_overridden TRUE character FALSE NA NA NA NA NA <NA> FALSE FALSE
#> 10 new_var TRUE double FALSE NA NA NA NA NA <NA> FALSE FALSE
How to use, trying to evaluate
diagnose_vars(fun, eval = TRUE)
fun(not_evaluated = foo)
#> Tracing fun(not_evaluated = foo) on exit
#> name evaluable type is_formal missing absent_from_call is_promise has_default_value default_value called_with_value exists_in_parent exists_in_enclos
#> 1 not_evaluated TRUE <NA> TRUE FALSE FALSE TRUE FALSE NA foo FALSE FALSE
#> 2 overridden FALSE logical TRUE FALSE TRUE FALSE TRUE "bar" <NA> FALSE FALSE
#> 3 forced FALSE character TRUE TRUE TRUE TRUE TRUE "baz" <NA> FALSE FALSE
#> 4 defined_in_global TRUE <NA> TRUE TRUE TRUE FALSE FALSE NA <NA> TRUE TRUE
#> 5 defined_in_enclos TRUE <NA> TRUE TRUE TRUE FALSE FALSE NA <NA> FALSE TRUE
#> 6 missing_with_default FALSE double TRUE TRUE TRUE TRUE TRUE 1 <NA> FALSE FALSE
#> 7 missing_overriden TRUE <NA> TRUE TRUE TRUE FALSE FALSE NA <NA> FALSE FALSE
#> 8 missing_absent TRUE <NA> TRUE TRUE TRUE FALSE FALSE NA <NA> FALSE FALSE
#> 9 missing_overridden FALSE character FALSE NA NA NA NA NA <NA> FALSE FALSE
#> 10 new_var FALSE double FALSE NA NA NA NA NA <NA> FALSE FALSE
The code
diagnose_vars <- function(f, eval = FALSE, on.exit = TRUE, ...) {
eval(substitute(
if(on.exit) trace(..., what =f, exit = quote({
diagnose_vars0(eval, print = TRUE)
untrace(f)}))
else trace(..., what =f, tracer = diagnose_vars0(eval, print = TRUE),
exit = substitute(untrace(f)), ...)
))
invisible(NULL)
}
diagnose_vars0 <- function(eval = FALSE, print = FALSE){
f_env <- parent.frame()
mc <- eval(quote(match.call()), f_env)
f <- eval.parent(mc[[1]],2)
f_parent_env <- parent.frame(2)
f_enclos <- rlang::fn_env(f)
vars <- ls(f_env)
fmls <- eval(quote(formals()), f_env)
fml_nms <- names(fmls)
fml_syms <- rlang::syms(fml_nms)
mc_args <- as.list(mc)[-1]
# compute complete df cols when possible
is_formal <- vars %in% fml_nms
# build raw df, with NA cols when necessary to initiate
data <- data.frame(row.names = vars,
name = vars,
evaluable = NA,
type = NA,
is_formal,
missing = NA,
absent_from_call = NA,
is_promise = NA,
has_default_value = NA)
# absent_from_call : different from missing when variable is overriden
data[fml_nms, "absent_from_call"] <- ! fml_nms %in% names(mc_args)
# promise
data[fml_nms, "is_promise"] <- sapply(fml_nms, pryr:::is_promise2, f_env)
# missing
data[fml_nms, "missing"] <- sapply(fml_syms, function(x)
eval(substitute(missing(VAR), list(VAR = x)), f_env))
# has default values
formal_has_default_value <- !sapply(fmls,identical, alist(x=)[[1]])
data[fml_nms, "has_default_value"] <- formal_has_default_value
# default values
data$default_value <-
vector("list",length(vars))
data$default_value[] <- NA
data[fml_nms[formal_has_default_value], "default_value"] <-
sapply(fmls[formal_has_default_value], deparse)
# called_with_value
data[names(mc_args), "called_with_value"] <-
sapply(mc_args, deparse)
# exists
data$exists_in_parent <- sapply(vars, exists, envir= f_parent_env)
data$exists_in_enclos <- sapply(vars, exists, envir= f_enclos)
# types
if(eval){
types <- sapply(vars, function(x)
try(eval(bquote(typeof(.(as.symbol(x)))), f_env),silent = TRUE))
data$type <- ifelse(startsWith(types,"Error"), NA, types)
data$evaluable <- is.na(data$type)
} else {
data$evaluable <-
with(data,!is_formal | (!is_promise & !missing))
data$type[data$evaluable] <-
sapply(mget(vars[data$evaluable], f_env), typeof)
}
# arrange
data <- rbind(data[fml_nms,],data[!data$name %in% fml_nms,])
row.names(data) <- NULL
if (print) print(data) else data
}
Related
I cannot understand the properties of logical (boolean) values TRUE, FALSE and NA when used with logical OR (|) and logical AND (&). Here are some examples:
NA | TRUE
# [1] TRUE
NA | FALSE
# [1] NA
NA & TRUE
# [1] NA
NA & FALSE
# [1] FALSE
Can you explain these outputs?
To quote from ?Logic:
NA is a valid logical object. Where a component of x or y is NA, the
result will be NA if the outcome is ambiguous. In other words NA &
TRUE evaluates to NA, but NA & FALSE evaluates to FALSE. See the
examples below.
The key there is the word "ambiguous". NA represents something that is "unknown". So NA & TRUE could be either true or false, but we don't know. Whereas NA & FALSE will be false no matter what the missing value is.
It's explained in help("|"):
NA is a valid logical object. Where a component of x or y
is NA, the result will be NA if the outcome is ambiguous. In
other words NA & TRUE evaluates to NA, but NA & FALSE
evaluates to FALSE. See the examples below.
From the examples in help("|"):
x <- c(NA, FALSE, TRUE)
names(x) <- as.character(x)
outer(x, x, "&") ## AND table
# <NA> FALSE TRUE
# <NA> NA FALSE NA
# FALSE FALSE FALSE FALSE
# TRUE NA FALSE TRUE
outer(x, x, "|") ## OR table
# <NA> FALSE TRUE
# <NA> NA NA TRUE
# FALSE NA FALSE TRUE
# TRUE TRUE TRUE TRUE
Given a data.frame with some type of a flag or identifier column, I would like to be able to flag the surrounding (leading and lagging) records by some time window parameter, n. So given:
df <- data.frame(
id = letters[1:26],
flag = FALSE
)
df$flag[10] <- TRUE
df$flag[17] <- TRUE
I would like to write something like:
flag_surrounding <- function(flag, n) {
# should flag surrounding -n to +n records with condition flag
}
# expected results for n = 2, n = 1...
df
# id flag flag_n2 flag_n1
# 1 a FALSE FALSE FALSE
# 2 b FALSE FALSE FALSE
# 3 c FALSE FALSE FALSE
# 4 d FALSE FALSE FALSE
# 5 e FALSE FALSE FALSE
# 6 f FALSE FALSE FALSE
# 7 g FALSE FALSE FALSE
# 8 h FALSE TRUE FALSE
# 9 i FALSE TRUE TRUE
# 10 j TRUE TRUE TRUE
# 11 k FALSE TRUE TRUE
# 12 l FALSE TRUE FALSE
# 13 m FALSE FALSE FALSE
# 14 n FALSE FALSE FALSE
# 15 o FALSE TRUE FALSE
# 16 p FALSE TRUE TRUE
# 17 q TRUE TRUE TRUE
# 18 r FALSE TRUE TRUE
# 19 s FALSE TRUE FALSE
# 20 t FALSE FALSE FALSE
# 21 u FALSE FALSE FALSE
# 22 v FALSE FALSE FALSE
# 23 w FALSE FALSE FALSE
# 24 x FALSE FALSE FALSE
# 25 y FALSE FALSE FALSE
# 26 z FALSE FALSE FALSE
I started writing some things using dplyr::lead and dplyr::lag and variants with cumsum, but I felt like this is already in a package somewhere, but couldn't find it quickly (and not really sure how to phrase this as a question for googling) - maybe someone has better recall than me :)
The following does the trick (using ideas from this post), but feels a bit clunky and error prone. I'd be curious to get other approaches/techniques and/or something more robust from a package.
library(dplyr)
flag_surrounding <- function(flag, n) {
as.logical(cumsum(lead(flag, n, default = FALSE)) - cumsum(lag(flag, n + 1, default = FALSE)))
}
df %>%
mutate(flag_n2 = flag_surrounding(flag, 2),
flag_n1 = flag_surrounding(flag, 1))
Here's a simple solution in base:
set.seed(4)
df <- data.frame(
id = letters[1:26],
flag = as.logical(rbinom(n = 26, size = 1, prob = 0.1))
)
lead_lag_flag = function(x, n) {
flagged = which(x)
to_flag = sapply(flagged, function(z) (z - n):(z + n))
to_flag = pmax(0, to_flag)
to_flag = pmin(length(x), to_flag)
to_flag = unique(to_flag)
new_flag = rep(FALSE, length(x))
new_flag[to_flag] = TRUE
return(new_flag)
}
df$flag_n1 = lead_lag_flag(df$flag, 1)
df$flag_n2 = lead_lag_flag(df$flag, 2)
df
# id flag flag_n1 flag_n2
# 1 a FALSE FALSE FALSE
# 2 b FALSE FALSE FALSE
# 3 c FALSE FALSE FALSE
# 4 d FALSE FALSE FALSE
# 5 e FALSE FALSE FALSE
# 6 f FALSE FALSE TRUE
# 7 g FALSE TRUE TRUE
# 8 h TRUE TRUE TRUE
# 9 i TRUE TRUE TRUE
# 10 j FALSE TRUE TRUE
# 11 k FALSE FALSE TRUE
# 12 l FALSE FALSE TRUE
# 13 m FALSE TRUE TRUE
# 14 n TRUE TRUE TRUE
# 15 o FALSE TRUE TRUE
# 16 p FALSE TRUE TRUE
# 17 q TRUE TRUE TRUE
# 18 r FALSE TRUE TRUE
# 19 s TRUE TRUE TRUE
# 20 t FALSE TRUE TRUE
# 21 u FALSE TRUE TRUE
# 22 v TRUE TRUE TRUE
# 23 w FALSE TRUE TRUE
# 24 x FALSE FALSE TRUE
# 25 y FALSE FALSE FALSE
# 26 z FALSE FALSE FALSE
Another base alternative:
n <- 1
nm <- paste0("flag", n)
i <- -n:n
df[ , nm] <- FALSE
ix <- rep(which(df$flag), each = length(i)) + i
ix <- ix[ix > 0 & ix <= nrow(d)]
df[ix, nm] <- TRUE
df
# id flag flag1
# 1 a FALSE FALSE
# 2 b FALSE FALSE
# 3 c FALSE FALSE
# 4 d FALSE FALSE
# 5 e FALSE FALSE
# 6 f FALSE FALSE
# 7 g FALSE FALSE
# 8 h FALSE FALSE
# 9 i FALSE TRUE
# 10 j TRUE TRUE
# 11 k FALSE TRUE
# 12 l FALSE FALSE
# 13 m FALSE FALSE
# 14 n FALSE FALSE
# 15 o FALSE FALSE
# 16 p FALSE TRUE
# 17 q TRUE TRUE
# 18 r FALSE TRUE
# 19 s FALSE FALSE
# 20 t FALSE FALSE
# 21 u FALSE FALSE
# 22 v FALSE FALSE
# 23 w FALSE FALSE
# 24 x FALSE FALSE
# 25 y FALSE FALSE
# 26 z FALSE FALSE
Given the vectors:
vect1 <- c(TRUE,FALSE,FALSE,NA,NA,NA,TRUE,FALSE,NA,FALSE)
vect2 <- c(TRUE,NA,FALSE,NA,FALSE,TRUE,FALSE,NA,TRUE,NA)
vect3 <- vect1 | vect2
vect3 #c(TRUE,NA,FALSE,NA,NA,TRUE,TRUE,NA,TRUE,NA)
Is there a vectorized infix function x that evaluates elements like this:
TRUE x TRUE #TRUE
TRUE x FALSE #TRUE
FALSE x TRUE #TRUE
FALSE x FALSE #FALSE
TRUE x NA #TRUE
NA x TRUE #TRUE
FALSE x NA #FALSE - would have been NA with ordinary "|"
NA x FALSE #FALSE - would have been NA with ordinary "|"
NA x NA #NA
Producing a vector vect4 like this:
vect4 #c(TRUE,FALSE,FALSE,NA,FALSE,TRUE,TRUE,FALSE,TRUE,FALSE)
Or is there any other simple method to output vect4 from vect1 and vect2?
You can compute the paralell maximum (with na.rm = TRUE) and convert to logical:
as.logical(pmax(vect1, vect2, na.rm = TRUE))
# [1] TRUE FALSE FALSE NA FALSE TRUE TRUE FALSE TRUE FALSE
Note that by computing maxima of logical vectors, TRUE is interpreted as integer 1 and FALSE as integer 0.
I have a very large data set including 250 string and numeric variables. I want to compare one after another columns together. For example, I am going to compare (difference) the first variable with second one, third one with fourth one, fifth one with sixth one and so on.
For example (The structure of the data set is something like this example), I want to compare number.x with number.y, day.x with day.y, school.x with school.y and etc.
number.x<-c(1,2,3,4,5,6,7)
number.y<-c(3,4,5,6,1,2,7)
day.x<-c(1,3,4,5,6,7,8)
day.y<-c(4,5,6,7,8,7,8)
school.x<-c("a","b","b","c","n","f","h")
school.y<-c("a","b","b","c","m","g","h")
city.x<- c(1,2,3,7,5,8,7)
city.y<- c(1,2,3,5,5,7,7)
You mean, something like this?
> number.x == number.y
[1] FALSE FALSE FALSE FALSE FALSE FALSE TRUE
> length(which(number.x==number.y))
[1] 1
> school.x == school.y
[1] TRUE TRUE TRUE TRUE FALSE FALSE TRUE
> test.day <- day.x == day.y
> test.day
[1] FALSE FALSE FALSE FALSE FALSE TRUE TRUE
EDIT: Given your example variables above, we have:
df <- data.frame(number.x,
number.y,
day.x,
day.y,
school.x,
school.y,
city.x,
city.y,
stringsAsFactors=FALSE)
n <- ncol(df) # no of columns (assumed EVEN number)
k <- 1
comp <- list() # comparisons will be stored here
while (k <= n-1) {
l <- (k+1)/2
comp[[l]] <- df[,k] == df[,k+1]
k <- k+2
}
After which, you'll have:
> comp
[[1]]
[1] FALSE FALSE FALSE FALSE FALSE FALSE TRUE
[[2]]
[1] FALSE FALSE FALSE FALSE FALSE TRUE TRUE
[[3]]
[1] TRUE TRUE TRUE TRUE FALSE FALSE TRUE
[[4]]
[1] TRUE TRUE TRUE FALSE TRUE FALSE TRUE
To get the comparison result between columns k and k+1, you look at the (k+1)/2 element of comp - i.e to get the comparison results between columns 7 & 8, you look at the comp element 8/2=4:
> comp[[4]]
[1] TRUE TRUE TRUE FALSE TRUE FALSE TRUE
EDIT 2: To have the comparisons as new columns in the dataframe:
new.names <- rep('', n/2)
for (i in 1:(n/2)) {
new.names[i] <- paste0('V', i)
}
cc <- as.data.frame(comp, optional=TRUE)
names(cc) <- new.names
df.new <- cbind(df, cc)
After which, you have:
> df.new
number.x number.y day.x day.y school.x school.y city.x city.y V1 V2 V3 V4
1 1 3 1 4 a a 1 1 FALSE FALSE TRUE TRUE
2 2 4 3 5 b b 2 2 FALSE FALSE TRUE TRUE
3 3 5 4 6 b b 3 3 FALSE FALSE TRUE TRUE
4 4 6 5 7 c c 7 5 FALSE FALSE TRUE FALSE
5 5 1 6 8 n m 5 5 FALSE FALSE FALSE TRUE
6 6 2 7 7 f g 8 7 FALSE TRUE FALSE FALSE
7 7 7 8 8 h h 7 7 TRUE TRUE TRUE TRUE
I cannot understand the properties of logical (boolean) values TRUE, FALSE and NA when used with logical OR (|) and logical AND (&). Here are some examples:
NA | TRUE
# [1] TRUE
NA | FALSE
# [1] NA
NA & TRUE
# [1] NA
NA & FALSE
# [1] FALSE
Can you explain these outputs?
To quote from ?Logic:
NA is a valid logical object. Where a component of x or y is NA, the
result will be NA if the outcome is ambiguous. In other words NA &
TRUE evaluates to NA, but NA & FALSE evaluates to FALSE. See the
examples below.
The key there is the word "ambiguous". NA represents something that is "unknown". So NA & TRUE could be either true or false, but we don't know. Whereas NA & FALSE will be false no matter what the missing value is.
It's explained in help("|"):
NA is a valid logical object. Where a component of x or y
is NA, the result will be NA if the outcome is ambiguous. In
other words NA & TRUE evaluates to NA, but NA & FALSE
evaluates to FALSE. See the examples below.
From the examples in help("|"):
x <- c(NA, FALSE, TRUE)
names(x) <- as.character(x)
outer(x, x, "&") ## AND table
# <NA> FALSE TRUE
# <NA> NA FALSE NA
# FALSE FALSE FALSE FALSE
# TRUE NA FALSE TRUE
outer(x, x, "|") ## OR table
# <NA> FALSE TRUE
# <NA> NA NA TRUE
# FALSE NA FALSE TRUE
# TRUE TRUE TRUE TRUE