Is there a destructor in R reference class? - r

Just as a test:
myclass = setRefClass("myclass",
fields = list(
x = "numeric",
y = "numeric"
))
myclass$methods(
dfunc = function(i) {
message("In dfunc, I save x and y...")
obj = .self
base::save(obj, file="/tmp/obj.rda")
}
)
myclass$methods(
print = function() {
if (.self$x > 10) {
stop("x is too large!")
}
message(paste("x: ", .self$x))
message(paste("y: ", .self$y))
}
)
myclass$methods(
initialize = function(x=NULL, y=NULL, obj=NULL) {
if(is.null(obj)) {
.self$x = x
.self$y = y
}
else {
.self$x = obj$x
.self$y = obj$y
}
}
)
myclass$methods(
finalize = function() {
message("I am finalizing this thing...")
}
)
Then try to create and remove an object:
u = myclass(15, 6)
u$print()
rm(u)
The finalize function is not called at all...

When you call rm you just remove the object reference from the enviroment, but you don't destroy the element.
That is the work of the garbage collector that is designed to automatically destroy objects when they have nomore reference (like in this case). Anyway, the garbage collector is triggered by some special events (e.g. too much memory used etc.), so it is not automatically invoked when you call rm (it will be probably called later later).
Anyway, you can force the garbage collector, even if this is usually discouraged, by calling gc().
u = myclass(15, 6)
rm(u)
gc()
# > I am finalizing this thing...
As you can see by running the above code, your finalize method is indeed called after gc()

Related

Using R, how to scope internal functions within a MAIN function?

My young son and I were playing a board game called Snails Pace. Simple enough, so I sat down to show him the game on the R-console.
Helper function
num.round = function(n, by=5)
{
byidx = (n %% by == 0); # these already are indexed well
new = by * as.integer((n + by) / by);
res = n;
res[!byidx] = new[!byidx];
res;
}
Primary function
snails.pace = function(moves = 200, finish.line = 8,
snail.x = NULL,
snail.y = NULL,
snail.col = NULL
)
{
if(is.null(snail.x)) { snail.x = 0*(1:6); }
if(is.null(snail.y)) { snail.y = 1*(1:6); }
if(is.null(snail.col)) { snail.col = c("orange", "blue", "pink", "green", "yellow", "red"); }
snail.rank = 0*snail.x;
crank = 1; # current rank
move.number = 0;
snails.plot = function(snail.x, snail.y, snail.rank, move.number, moves, finish.line, crank)
{
xmax = max(10, max(snail.x) );
ymax = max(8, max(snail.y) );
plot(snail.x, snail.y,
col=snail.col,
pch=16, cex=5,
xlim=c(0, num.round(xmax, 5) ),
ylim=c(0, num.round(ymax, 4) ),
axes=FALSE,
frame.plot=FALSE,
xlab="", ylab="",
main=paste0("Move #", move.number, " of ", moves)
);
#axis(gr.side("bottom"));
axis(1);
has.rank = (snail.rank != 0);
snails.lab = paste0(snail.x, "*", snail.rank);
snails.lab[!has.rank] = snail.x[!has.rank];
text(snail.x, y=snail.y, labels=snails.lab, col="black");
abline(v = finish.line, col="gray", lty="dashed");
}
snails.update = function(snail.x, snail.y, snail.rank, move.number, moves, finish.line, crank)
{
x = readline(prompt="Press [enter] to continue, [ESC] to quit");
n = sample(1:6, 1);
snail.x[n] = 1 + snail.x[n];
if( (snail.rank[n] == 0) && (snail.x[n] >= finish.line) )
{
snail.rank[n] = crank;
crank = 1 + crank;
# update to MAIN environment
assign("snail.rank", snail.rank, envir=parent.frame() );
assign("crank", crank, envir=parent.frame() );
}
snail.x;
}
snails.plot(snail.x, snail.y, snail.rank, move.number, moves, finish.line, crank);
while(move.number < moves)
{
move.number = 1 + move.number;
snail.x = snails.update(snail.x, snail.y, snail.rank, move.number, moves, finish.line, crank);
snails.plot(snail.x, snail.y, snail.rank, move.number, moves, finish.line, crank);
}
}
Game play
snails.pace();
Question: how to scope internal functions within MAIN environoment?
The MAIN function is snails.pace(). You will notice in the internal function snails.update, I update two variables and assign them back to the MAIN scope using assign.
Is there a way at the MAIN level I can define all the variables and just USE them within all internal functions without having to assign them back or returning the updating values?
As you can see in my CODE, I call all of the variables into the functions and either "back assign" or return any changes. I would prefer to just set a new env() or something and have MAIN work like R-Global seems to. Any suggestions on how to do that?
That is, my internal functions would not pass anything in: snails.plot = function() and snails.update = function() AS they would get the LOCAL environment variables (defined as within MAIN defined as snails.pace()). And ideally update the LOCAL environment variables by updating the value within the internal function.
Update
So it appears that I can drop the function passing. See:
snails.pace2 = function(moves = 200, finish.line = 8,
snail.x = NULL,
snail.y = NULL,
snail.col = NULL
)
{
if(is.null(snail.x)) { snail.x = 0*(1:6); }
if(is.null(snail.y)) { snail.y = 1*(1:6); }
if(is.null(snail.col)) { snail.col = c("orange", "blue", "pink", "green", "yellow", "red"); }
snail.rank = 0*snail.x;
crank = 1; # current rank
move.number = 0;
snails.plot = function()
{
xmax = max(10, max(snail.x) );
ymax = max(8, max(snail.y) );
plot(snail.x, snail.y,
col=snail.col,
pch=16, cex=5,
xlim=c(0, num.round(xmax, 5) ),
ylim=c(0, num.round(ymax, 4) ),
axes=FALSE,
frame.plot=FALSE,
xlab="", ylab="",
main=paste0("Move #", move.number, " of ", moves)
);
#axis(gr.side("bottom"));
axis(1);
has.rank = (snail.rank != 0);
snails.lab = paste0(snail.x, "*", snail.rank);
snails.lab[!has.rank] = snail.x[!has.rank];
text(snail.x, y=snail.y, labels=snails.lab, col="black");
abline(v = finish.line, col="gray", lty="dashed");
}
snails.update = function()
{
x = readline(prompt="Press [enter] to continue, [ESC] to quit");
n = sample(1:6, 1);
snail.x[n] = 1 + snail.x[n];
if( (snail.rank[n] == 0) && (snail.x[n] >= finish.line) )
{
snail.rank[n] = crank;
crank = 1 + crank;
# update to MAIN environment
assign("snail.rank", snail.rank, envir=parent.frame() );
assign("crank", crank, envir=parent.frame() );
}
snail.x;
}
snails.plot();
while(move.number < moves)
{
move.number = 1 + move.number;
snail.x = snails.update();
snails.plot();
}
}
#MrFlick is correct about the lexical scoping, if I understand the above correctly. If an internal updates something from MAIN, it has to assign it back to MAIN I guess <<- or assign ... parent. Is there not a way to tell the internal SUBFUNCTIONS to SCOPE at the same level of MAIN?
There are two completely different concepts called "parent" in R: the parent.frame() of a call, and the parent.env() of an environment.
parent.frame() walks up the chain of the stack of calls. If you have a recursive function that calls itself, it will appear multiple times in that chain.
In general, it's dangerous to use parent.frame(), because even if the context in which you use it now makes it clear which environment will be the parent.frame(), at some future time you might change your program (e.g. make the internal function into a recursive one, or call it from another internal function), and then parent.frame() will refer to something different.
The parent.env() function applies to an environment; parent.env(environment()) gives you the enclosing environment of the current one. If you call parent.env(environment()) it will always refer to the environment where your current function was defined. It doesn't matter how you called it, just how you defined it. So you always know what will happen if you assign there, and it's much safer in the long term than using parent.frame().
The <<- "super-assignment" works with enclosing environments, not the stack of calls. If you do var <<- value, then as long as you are sure that var was defined in the enclosing function, you can be sure that's what gets modified.
One flaw in R is that it doesn't enforce the existence of var there, so that's why some people say <<- is "sloppy". If you accidentally forget to define it properly, or spell it wrong, R will search back through the whole chain of environments to try to do what you asked, and if it never finds a matching variable, it will do the assignment in the global environment. You almost never want to do that: keep side effects minimal.
So, to answer the question "Is there a way at the MAIN level I can define all the variables and just USE them within all internal functions without having to assign them back or returning the updating values?": as you found in your edit, the nested function can read the value of any variable in the MAIN function without requiring any special code. To modify those variables, be sure both snail.rank and crank are defined in MAIN, then use <<- in the nested function to assign new values to them.
To have a function f defined within another function main such that f has the same scope as main surround the entire body of f with eval.parent(substitute({...})) like this:
main <- function() {
f <- function() eval.parent(substitute({
a <- a + 1
b <- 0.5
}))
a <- 1
f()
f()
10 * a + b
}
main()
## [1] 30.5
The gtools package has defmacro which allows the same thing and uses the same technique internally. Also see the wrapr package.

Inside R6 class definition: 'object not found' (or: how to define 'local' objects in R6 classes)

I want to define an R6 class that sets up, updates and closes a progress bar. For these 3 tasks, I have 3 functions. The first, setup_progressbar(), calls R's txtProgressbar() which returns an object (say, pb) which needs to be passed on to the second and third functions, update_progressbar() and close_progressbar(). But the object pb is not found by the latter two functions.
library(R6)
myprogressbar <- R6Class("my_progress_bar",
public = list(
n = numeric(1),
initialize = function(n) {
stopifnot(n >= 1)
self$n <- n
},
setup_progressbar = function() {
pb <- txtProgressBar(max = self$n)
},
update_progressbar = function(i) {
setTxtProgressBar(pb, i)
},
close_progressbar = function () {
close(pb)
cat("\n")
}
))
mypb <- myprogressbar$new(10)
mypb$setup_progressbar()
mypb$update_progressbar(3) # Error in setTxtProgressBar(pb, i) : object 'pb' not found
I tried to add pb to self in the hope it would be found, but then I obtain "cannot add bindings to a locked environment".
Note: In my actual (non-minimal) example, the i is found/provided/visible, so that's not an additional problem (most likely this is just a problem in the above minimal working example once fixed beyond the 'pb' not found error).
The following works:
library(R6)
myprogressbar <- R6Class("my_progress_bar",
public = list(
n = numeric(1),
pb = NULL, # provide as argument
initialize = function(n, pb = NULL) { # provide with default so that $new() doesn't require 'pb'
stopifnot(n >= 1)
self$n <- n
},
setup_progressbar = function() {
self$pb <- txtProgressBar(max = self$n)
},
update_progressbar = function(i) {
setTxtProgressBar(self$pb, i)
},
close_progressbar = function () {
close(self$pb)
cat("\n")
}
))
mypb <- myprogressbar$new(10)
mypb$setup_progressbar()
mypb$update_progressbar(3)

Using multiple constructors for R classes and subclasses

I would like to use multiple constructors in my R S4 class.
I have an object that has three slots. To make that object, sometimes I want to just give the values for the three slots outright. But sometimes I'd like to provide a matrix, and I have a function that can take a matrix and return what those three slots should be.
At first, it seems like I could write a function as a constructor. So I could write objectFromMatrix(matrix) --> object with three slots. The problem is that I also have sub-classes that inherit from that main class, and I want to be able to use that constructor with them as well.
So I could just write functions as extra constructors for each of the subclasses, but that would be a bit tedious and not super OO-like.
To make my problem a little more tangible, I'll try to write a minimal example below. I'll write it in Java, but I'm a bit rusty so let me know if it doesn't make sense.
Desired structure, in Java:
// An abode is a place where you live and it has a size
class Abode {
int size = 1;
// Main constructor that just assigns args to fields
Abode(int size) {
this.size = size;
}
// Alternative constructor that takes in a different datatype
// and computes args to assign to fields
Abode(string description) {
if(description eq "Large") {
this.size = 5;
}
if(description eq "Small") {
this.size = 1;
}
}
// To keep it simple, a house is just an abode with a name
class House extends Abode {
String name;
House(int size, String name) {
super(size);
this.name = name;
}
House(string size, String name) {
super(size);
this.name = name;
}
}
This implementation works nicely because I can call Abode("big") or House("big", "Casa de me"), and both of those get passed to the extra constructor I built in the Abode class.
Keeping up with the house analogy, this is the best I've been able to do in R:
# An abode is a place you live and it has a size
setClass("Abode",
slots =
list(size = "numeric")
)
# Alternative constructor that takes in a different datatype
# and computes args to assign to fields
abode_constructor_2 <- function(sizeString) {
if (sizeString == "big") {return new("Abode", size = 5)}
if (sizeString == "small") {return new("Abode", size = 1)}
}
# A house is an abode with a name
setClass("House",
slots =
list(name = "string"),
contains = "Abode"
)
# I already defined this constructor but I have to do it again
house_constructor_2 <- function(sizeString, name) {
if (sizeString == "big") {return new("House", size = 5, name = name)}
if (sizeString == "small") {return new("House", size = 1, name = name)}
}
In case it helps, here is a minimal example of the real context where this problem is coming up. I define an extra constructor for the Sensor class, sensor_constructor_2, as a function. But then, when I have a class that inherits from Sensor, I have to make that constructor over again.
# A sensor has three parameters
setClass("Sensor",
slots =
list(Rmin = "numeric", Rmax = "numeric", delta = "numeric")
)
# I also like to make sensors from a matrix
sensor_constructor_2 <- function(matrix) {
params <- matrix_to_params(matrix)
return (new("Sensor", Rmin = params[1], Rmax = params[2], delta = params[3]))
}
# A redoxSensor is just a sensor with an extra field
setClass("redoxSensor",
slots =
list(e0 = "numeric"),
contains = "Sensor"
)
# Goal: make this extra constructor unnecessary by making sensor_constructor_2 a property of the sensor class
extraConstructor_redox <- function(matrix, e0) {
params <- matrix_to_params(matrix)
return (new("redoxSensor", Rmin = params[1], Rmax = params[2], delta = params[3]), e0 = e0)
}
There is no reason why you can't do this with one S4 constructor by using default arguments and a little extra logic, along the lines of
setClass("Abode",
slots = list(size = "numeric")
) -> Abode
setClass("House",
slots = list(name = "character"),
contains = "Abode"
) -> House
createDwelling <- function(size=0,name,sizeString){
if(!missing(sizeString)){
if(sizeString == "Large") size <- 5
else if(sizeString == "Small") size <- 1
else stop("invalid sizeString")
}
if(missing(name)) return(Abode(size=size))
else return(House(size=size,name=name))
}
example usage:
> createDwelling(size=3)
An object of class "Abode"
Slot "size":
[1] 3
> createDwelling(sizeString="Small")
An object of class "Abode"
Slot "size":
[1] 1
> createDwelling(sizeString="Small",name="my house")
An object of class "House"
Slot "name":
[1] "my house"
Slot "size":
[1] 1

R: Values saved into a list within %dopar% / foreach are not available downstream in global environment

I am trying to run the following code in parallel using dopar / foreach , but I can't figure out how to actually save the values into the list and have them appear in the global environment further down in the script.
I have that first line of code to initialize the seurat.object list. I am importing the list into the foreach. and assigning a new value to each of the list's elements in there too, using <<-, which should mean it will be saved into the global environment. Why is the updated seurat.objects list not preserved outside of the foreach?
1a. Scale only (without nUMI regression):
1b. Scale with nUMI regression and store in a new object:
seurat.objects <- list(scaled=NULL, scaled.regressed=NULL)
registerDoFuture()
cl <- makeCluster(2, outfile="")
plan(cluster, workers = cl)
result <- foreach(object=names(seurat.objects),
.export = ls(.GlobalEnv)) %dopar% {
selectObject(object)
if( ! file.exists(object.path)) {
if(object == "scaled") {
assign('seurat.objects[["scaled"]]', ScaleData(seurat.object,
do.scale = T, do.center = T, display.progress = F))
}
if(object == "scaled.regressed") {
assign('seurat.objects[["scaled.regressed"]]',
ScaleData(seurat.object,
vars.to.regress = "nUMI",
do.scale = T, do.center = T, display.progress = F))
}
saveRDS(seurat.objects[[object]], file=object.path)
} else { # Found scaled .Rds
x <- readRDS(object.path)
seurat.objects[[object]] <<- x
rm(x)
}
}
stopCluster(cl)
The selectObject function is defined before the above code, as follows:
selectObject <- function(object) {
if(object == "scaled") {
scaling <<- "_scaleOnly"
pca.result <<- "pca.scaled"
object.path <<- path.scaled.object
pca.result.path <<- paste0(clustering.path, "2_pca/pcaObject_",
age, scaling, ".Rds")
}
if(object == "scaled.regressed") {
scaling <<- "_scale_nUMIregress"
pca.result <<- "pca.scaled.regressed"
object.path <<- path.scaled.regressed.object
pca.result.path <<- paste0(clustering.path, "2_pca/pcaObject_",
age, scaling, ".Rds")
}
}
When I try to inspect the contents of seurat.objects, the list in which the data should have been stored, I get:
> seurat.objects
$scaled
NULL
$scaled.regressed
NULL

Output to pdf not working with ReferenceClasses methods in R?

Output to pdf not working with ReferenceClasses methods in R?
This is an example taken from the ReferenceClasses R doc, with some minor
modification:
mEdit = setRefClass("mEdit", fields = list(data="matrix", edits="list"))
mEdit$methods(
edit = function(i, j, value) {
backup = list(i, j, data[i, j])
data[i, j] <<- value
edits <<- c(edits, list(backup))
invisible(value)
}
)
mEdit$methods(
undo = function() {
prev = edits
if(length(prev)) {
prev = prev[[length(prev)]]
}
else {
stop("No more edits to undo!")
}
edit(prev[[1]], prev[[2]], prev[[3]])
length(edits) <<- length(edits) - 2
invisible(prev)
}
)
mEdit$methods(
show = function() {
message("ClassName: ", classLabel(class(.self)))
message("Data:")
methods::show(data)
message("Undo list length: ", length(edits))
}
)
mEdit$methods(
.DollarNames.mEdit = function(x, pattern) {
grep(pattern, getRefClass(class(x))$methods(), value=TRUE)
}
)
x = matrix(1:24, 3, 8)
xx = mEdit(data=x)
xx$edit(2,2,0)
xx$show()
xx$edit(3, 5, 1)
xx$show()
xx$undo()
xx$show()
mv = setRefClass(
"matrixViewer",
fields=c("viewerDevice", "viewerFile"),
contains="mEdit"
)
mv$methods(
.DollarNames.mEdit = function(x, pattern) {
grep(pattern, getRefClass(class(x))$methods(), value=TRUE)
}
)
mv$methods(
view = function() {
## dd = dev.cur();
## dev.set(viewerDevice)
## devAskNewPage(FALSE)
image(
data,
main=paste("After", length(edits), "edits")
)
## dev.set(dd)
}
)
mv$methods(
edit = function(i,j, value) {
callSuper(i,j, value)
view()
}
)
mv$methods(
initialize = function(file="./mv.pdf", ...) {
viewerFile <<- file
## pdf(viewerFile)
## viewerDevice <<- dev.cur()
## dev.set(dev.prev())
callSuper(...)
}
)
mv$methods(
finalize = function() {
dev.off(viewerDevice)
}
)
x = matrix(rnorm(64, 0, 34), 8, 8)
xx = mv(file="/tmp/x.pdf", data=x)
xx$edit(2,2,0)
xx$edit(3, 5, 1)
xx$edit(4, 4, 2.3)
xx$undo()
xx$view()
Note that I have commented out those lines concerning switch
of output devices, so it uses the default device all through,
otherwise when the view method
is called, the plot is not written to the pdf file at all.
Any idea why this is happening?
Call rm on xx and then call garbage collection. finalize will then be called which will invoke dev.off and the pdf will be written. This assumes everything is uncommented.
rm(xx)
gc()
Also your .DollarNames should be
.DollarNames.mEdit = function(x, pattern) {
grep(pattern, getRefClass(class(x))$methods(), value=TRUE)
}
.DollarNames.matrixViewer = function(x, pattern) {
grep(pattern, getRefClass(class(x))$methods(), value=TRUE)
}
and are not methods of the Reference class. They are external functions seperate to the Reference classes.
So the main takeaway here is that finalize is not called until the object is garbage collected.

Resources