I recently posed this question and thankfully was pointed to withRestarts() which seems pretty awesome and powerful to me :-) Now I'm eager to understand R's error handling capabilities in a bit more detail.
Actual questions
What is the recommended usage of simpleCondition()? Never used it before, but I thought it might be useful for designing custom errors and warnings that are in fact "true" conditions. Could it be used to build a database of specific conditions for which specific handlers are available?
Is there a way to "freeze" a certain state of the entire R workspace and return to it to restart a computation at a certain point? I'm aware of save.image(), but AFAIU, this doesn't store the "state" of the search path (search() or searchpaths()).
For those interested
Two code examples
illustration of my current use of withRestarts in dependence on this blog post
attempt to define a "custom condition"
I'd appreciate any comments/suggestion on what to do better ;-)
Example 1
require("forecast")
autoArimaFailsafe <- function(
x,
warning=function(w, ...) {
message("autoArimaFailsafe> warning:")
message(w)
invokeRestart("donothing")},
error=function(e, ...) {
message("autoArimaFailsafe> error:")
message(e)
invokeRestart("abort")}
) {
withRestarts(
out <- tryCatch(
{
expr <- expression(auto.arima(x=x))
return(eval(expr))
},
warning=warning,
error=error
),
donothing=function(...) {
return(eval(expr))
},
abort=function(...) {
message("aborting")
return(NULL)
}
)
}
data(AirPassengers)
autoArimaFailsafe(x=AirPassengers)
autoArimaFailsafe(x="a")
Example 2
require("forecast")
autoArimaFailsafe <- function(
x,
warning=function(w, ...) {
message("autoArimaFailsafe> warning")
invokeRestart("donothing")},
error=function(e, ...) {
message("autoArimaFailsafe> error")
invokeRestart("abort")},
condition=function(cond, ...) {
out <- NULL
message(cond)
condmsg <- conditionMessage(c=cond)
condclass <- class(cond)
if (any(class(cond) == "simpleWarning")) {
out <- warning(w=cond)
} else if (any(class(cond) == "simpleError")) {
out <- error(e=cond)
} else if (any(class(cond) == "simpleCondition")) {
if (condmsg == "invalid class: character") {
out <- invokeRestart("forcedefault")
}
}
return(out)
}
) {
withRestarts(
out <- tryCatch(
{
expr <- expression(auto.arima(x=x))
if (class(x) == "character") {
expr <- signalCondition(
simpleCondition("invalid class: character",
call=as.call(expr))
)
}
return(eval(expr))
},
condition=condition
),
donothing=function(...) {return(eval(expr))},
abort=function(...) {
message("aborting")
return(NULL)
},
forcedefault=function(...) {
data(AirPassengers)
expr <- expression(auto.arima(x=AirPassengers))
return(eval(expr))
}
)
}
autoArimaFailsafe(x=AirPassengers)
autoArimaFailsafe(x=NULL)
autoArimaFailsafe(x="a")
This post references the inspiration for R's condition handling.
For 1., I think of simpleCondition as illustrating how one can construct custom conditions, e.g,.
myCondition <-
function(message, call=NULL, type=c("overflow", "underflow", "zero"))
{
type <- match.arg(type) # only allowed types past here
class <- c(type, "my", "condition")
structure(list(message = as.character(message), call = call),
class = class)
}
is a constructor for making custom conditions
> myCondition("oops")
<overflow: oops>
> myCondition("oops", type="underflow")
<underflow: oops>
These conditions can be used in tryCatch or withCallingHandlers
xx <- tryCatch({
signalCondition(myCondition("oops", type="underflow"))
}, underflow=function(e) {
message("underflow: ", conditionMessage(e))
NA # return value, assigned to xx
})
These are S3 classes so can have a linear hierarchy -- bad and worse are both subclasses of error.
myError <-
function(message, call=NULL, type=c("bad", "worse"))
{
type <- match.arg(type)
class <- c(type, "error", "condition")
structure(list(message=as.character(message), call=call),
class=class)
}
One might also create an error that extends the 'simpleError' S3 class as cond <- simpleError("oops"); class(cond) = c("myerr", class(cond)
With tryCatch we just get access to a single handler, the first (in the sense described on ?tryCatch) to match the class of condition
tryCatch({
stop(myError("oops", type="worse"))
}, bad = function(e) {
message("bad error: ", conditionMessage(e))
}, worse = function(e) {
message("worse error: ", conditionMessage(e)) # here's where we end up
}, error=function(e) {
message("error: ", conditionMessage(e))
})
With withCallingHandlers we have the opportunity to hit multiple handlers, provided we don't invoke a restart
withCallingHandlers({
stop(myError("oops", type="bad"))
}, bad = function(e) { # here...
message("bad error: ", conditionMessage(e))
}, worse = function(e) {
message("worse error: ", conditionMessage(e))
}, error=function(e) { # ...and here...
message("error: ", conditionMessage(e))
}) # ...and top-level 'error'
withCallingHandlers({
x <- 1
warning(myError("oops", type="bad"))
"OK"
}, bad = function(e) { # here, but continue at the restart
message("bad warning: ", conditionMessage(e))
invokeRestart("muffleWarning")
}, worse = function(e) {
message("worse warning: ", conditionMessage(e))
})
I'm not so sure about your question 2; I think this is the situation that calling handlers are designed to address -- the entire frame where the condition was invoked is poised waiting to continue, once you invoke the restart.
Related
I am running a pretty long function that deals with reddit comment data from RedditExtractoR:
# Create an empty data frame to store the thread information
threads_df = data.frame(date = character(), title = character(), url = character(), subreddit = character())
# Bind threads
threads_df = bind_rows(
data.frame(threads1, subreddit = "SSBM"),
data.frame(threads2, subreddit = "funny"),
data.frame(threads3, subreddit = "meltyblood"),
data.frame(threads4, subreddit = "bloomington")
)
# Get the comments from each thread
comments_df = data.frame()
for (i in 1:nrow(threads_df)) {
result = get_thread_content(threads_df$url[i])[[2]]
result$subreddit = threads_df$subreddit[i]
comments_df = bind_rows(comments_df, result)
print(paste("Completed thread", i, "of", nrow(threads_df)))
if (nrow(result) == 0) {
stop("Failed to retrieve comments for thread", i)
}
if (i %% 100 == 0) {
print("Checking for timeouts")
Sys.sleep(10)
}
}
After getting to thread 115/977, I am greeted with the following error:
Error in `bind_rows()`:
! Can't combine `..1$comment_id` <character> and `..2$comment_id` <integer>.
---
Backtrace:
1. dplyr::bind_rows(comments_df, result)
4. vctrs::vec_rbind(!!!dots, .names_to = .id)
I have tried using trycatch to skip the error only to compile even more errors that I don't understand. It would be ideal to just skip threads that generated this error. I tried the following to do that, but it only complicated things past a level that I can comprehend:
# Get the comments from each thread
comments_df = data.frame()
for (i in 1:nrow(threads_df)) {
result = tryCatch(
expr = {
get_thread_content(threads_df$url[i])[[2]]
},
error = function(e) {
NULL
}
)
if (is.null(result)) {
print(paste("Skipped thread", i, "due to bind_rows error"))
next
}
result$subreddit = threads_df$subreddit[i]
comments_df = bind_rows(comments_df, result)
print(paste("Completed thread", i, "of", nrow(threads_df)))
if (nrow(result) == 0) {
stop("Failed to retrieve comments for thread", i)
}
if (i %% 100 == 0) {
print("Checking for timeouts")
Sys.sleep(10)
}
}
Does anyone have any insight into why this original error might be happening and what I could try to fix it? If not, is there maybe another way to bypass the error?
Basically, try something as follows:
tryCatch(expr = {stop("stop message")},
error = function(e) {
cat(conditionMessage(e))
cat(as.character(expr))
})
with expect output to be something like: "stop(\"stop message\")", but fails as expr cannot be found... Any way to print expr within the scope without having to do something as follows?
expr <- eval('stop("stop message")')
tryCatch(expr = {expr},
error = function(e) {
cat(conditionMessage(e))
cat(as.character(expr))
})
I'm not sure I'd really recommend this, but you could walk up the call stack to find the tryCatch call and extract the parameter there. Here's a helper function to find a call in the call stack
findStackFun <- function(fun) {
for(cx in sys.calls()) {
if (deparse(cx[[1]]) == fun) {
return(cx)
}
}
return(NULL)
}
Then you could run
tryCatch(expr = {stop("stop message")},
error = function(e) {
cat(conditionMessage(e))
call <- findStackFun("tryCatch")
cat(deparse(call$expr))
})
I am trying to write my own test-function (test_if) that returns both the result of the test as well as an optional error message. The function is based on the validate_that function in the assertthat-package.
The test_if function seems to work, however, I further want to use test_if in a more specific function (check_input) that analyses user-inputs in shiny. There I have a problem, that the check_input-function only works, if I define the test_if function inside the check_input function.
I suppose that the problem is caused by some search scope or environment problem. However, I am really a newbie to environments in R.
How can I get my check_input-function work without the need to define the test_if function inside it?
Many thanks, Silke
Here is my minimal working example:
library(assertthat)
test_if <- function(...,msg=NULL) {
test <- validate_that(...,msg=msg)
if (is.logical(test)) {
return(list(assertation=test,msg=NULL))
}
if (is.character(test)) {
return(list(assertation=FALSE,msg=test))
}
}
test_if(2==3)
test_if(3==3)
test_if(2==3,3==4,msg="something is wrong")
### To check different inputs
check_input1 <- function(value1 = NULL,value2 = NULL) {
test_if <- function(...,msg=NULL) {
test <- validate_that(...,msg=msg)
if (is.logical(test)) {
return(list(assertation=test,msg=NULL))
}
if (is.character(test)) {
return(list(assertation=FALSE,msg=test))
}
}
error_msg <- ""
error_status <- FALSE
check <- test_if(is.numeric(value1))
error_msg <- check$msg
error_status <- check$assertation
return(list(error_msg=error_msg,error_status=error_status))
}
check_input2 <- function(value1 = NULL,value2 = NULL) {
error_msg <- ""
error_status <- FALSE
check <- test_if(is.numeric(value1))
error_msg <- check$msg
error_status <- check$assertation
return(list(error_msg=error_msg,error_status=error_status))
}
check_input1(value1=1)
check_input2(value1=1)
Here is an example:
setGeneric("loadBim",
function(pl_info, ...) {
standardGeneric("loadBim")
})
setMethod("loadBim",
signature(pl_info = "PlInfo"),
function(pl_info) {
loadFFDF(pl_info#ff_dir_trio["bim"])
})
setGeneric("loadFam",
function(pl_info, ...) {
standardGeneric("loadFam")
})
setMethod("loadFam",
signature(pl_info = "PlInfo"),
function(pl_info) {
loadFFDF(pl_info#ff_dir_trio["fam"])
})
setGeneric("loadFrq",
function(pl_info, ...) {
standardGeneric("loadFrq")
})
setMethod("loadFrq",
signature(pl_info = "PlInfo"),
function(pl_info) {
loadFFDF(pl_info#ff_dir_trio["frq"])
})
All these S4 methods are similar, they differ only for some file extension names: bim, fam, and frq. I am wondering is there some metaproramming technique available for simplifying them (generating them programmatically)?
I found out one solution:
loadPlinkMeta = gtools::defmacro(ext, method_name, expr = {
setGeneric(method_name,
function(pl_info, ...) {
standardGeneric(method_name)
})
setMethod(method_name,
signature(pl_info = "PlInfo"),
function(pl_info) {
loadFFDF(pl_info#ff_dir_trio[ext])
})
})
loadPlinkMeta("bim", "loadBim")
loadPlinkMeta("fam", "loadFam")
loadPlinkMeta("frq", "loadFrq")
If you find a bug in a package, it's usually possible to patch the problem with fixInNamespace, e.g. fixInNamespace("mean.default", "base").
For S4 methods, I'm not sure how to do it though. The method I'm looking at is in the gWidgetstcltk package. You can see the source code with
getMethod(".svalue", c("gTabletcltk", "guiWidgetsToolkittcltk"))
I can't find the methods with fixInNamespace.
fixInNamespace(".svalue", "gWidgetstcltk")
Error in get(subx, envir = ns, inherits = FALSE) :
object '.svalue' not found
I thought setMethod might do the trick, but
setMethod(".svalue", c("gTabletcltk", "guiWidgetsToolkittcltk"),
definition = function (obj, toolkit, index = NULL, drop = NULL, ...)
{
widget = getWidget(obj)
sel <- unlist(strsplit(tclvalue(tcl(widget, "selection")),
" "))
if (length(sel) == 0) {
return(NA)
}
theChildren <- .allChildren(widget)
indices <- sapply(sel, function(i) match(i, theChildren))
inds <- which(visible(obj))[indices]
if (!is.null(index) && index == TRUE) {
return(inds)
}
if (missing(drop) || is.null(drop))
drop = TRUE
chosencol <- tag(obj, "chosencol")
if (drop)
return(obj[inds, chosencol, drop = drop])
else return(obj[inds, ])
},
where = "package:gWidgetstcltk"
)
Error in setMethod(".svalue", c("gTabletcltk", "guiWidgetsToolkittcltk"), :
the environment "gWidgetstcltk" is locked; cannot assign methods for function ".svalue"
Any ideas?
How about the old-school way of getting the source, applying the change and rebuilding?
you can first get the generic out, and then fix the generic by setMethod in your global environment, and then assign it back to that namespace
.svalue <- gWidgetstcltk:::.svalue
setMethod(".svalue", c("gTabletcltk", "guiWidgetsToolkittcltk"),
definition = function (obj, toolkit, index = NULL, drop = NULL, ...)
{
widget = getWidget(obj)
sel <- unlist(strsplit(tclvalue(tcl(widget, "selection")),
" "))
if (length(sel) == 0) {
return(NA)
}
theChildren <- .allChildren(widget)
indices <- sapply(sel, function(i) match(i, theChildren))
inds <- which(visible(obj))[indices]
if (!is.null(index) && index == TRUE) {
return(inds)
}
if (missing(drop) || is.null(drop))
drop = TRUE
chosencol <- tag(obj, "chosencol")
if (drop)
return(obj[inds, chosencol, drop = drop])
else return(obj[inds, ])
}#,
#where = "package:gWidgetstcltk"
)
assignInNamespace(".svalue", .svalue, ns = "gWidgetstcltk")