Creating Binary Search Tree in R - r

I have this code for creating a Binary Search Tree in a R6 class.
Creating a Node & BST class. In BST class, I am defining insert_recur function to create the BST by appropriately inserting the data.
library(R6)
Node <- R6Class(
classname = 'Node',
public = list(
val = NULL,
left = NULL,
right = NULL,
initialize = function(val = NULL, left = NULL, right = NULL){
self$val <- val
self$left <- left
self$right <- right
}
)
)
BST <- R6Class(
classname = 'BST',
public = list(
root = NULL,
# node = NULL,
insert = function(data){
if(is.null(self$root)){
self$node <- Node$new(data)
}else{
self$insert_recur(data, self$root)
}
},
insert_recur = function(data, cur_node){
if(data < cur_node$val){
if(is.null(cur_node$self)){
cur_node$left <- Node$new(data)
}else{
insert_recur(data, cur_node$left)
}
}else if(data > cur_node$val){
if(is.null(cur_node$self)){
cur_node$right <- Node$new(data)
}else{
insert_recur(data, cur_node$right)
}
}else{
print('value already in tree')
}
},
get_height = function(cur_node){
if(is.null(cur_node$val)){
return(-1)
}else{
return(max(self$get_height(cur_node$left),self$get_height(cur_node$right))+1)
}
}
)
)
bst <- BST$new()
bst$insert(3)
bst$insert(2)
bst$insert(1)
bst$insert(5)
bst$insert(6)
bst$insert(4)
bst$insert(7)
However I am getting this error -
Error in self$node <- Node$new(data) : cannot add bindings to a locked environment
If I put node <- NULL in the BST class, then the recursion fails & all nodes are NULL.
What will be the correct implementation?

Your Node implementation is fine. The BST isn't quite right though. It should have a NULL root node only. The problem lies in your insert_recur function. It's not possible for cur_node$self to ever be NULL, and the logic would seem to indicate that your if statements should be checking for the absence of cur_node$left and cur_node$right instead. Also, you need to remember to use self$insert_recur. The logic of your get_height argument doesn't seem right to me either. The following implementation seems to work as expected:
BST <- R6Class(
classname = 'BST',
public = list(
root = NULL,
insert = function(data) {
if(is.null(self$root)) {
self$root <- Node$new(data)
} else {
self$insert_recur(data, self$root)
}
},
insert_recur = function(data, cur_node) {
if(data < cur_node$val) {
if(is.null(cur_node$left)) {
cur_node$left <- Node$new(data)
} else {
self$insert_recur(data, cur_node$left)
}
} else if(data > cur_node$val){
if(is.null(cur_node$right)){
cur_node$right <- Node$new(data)
}else{
self$insert_recur(data, cur_node$right)
}
}else{
print('value already in tree')
}
},
get_height = function(cur_node){
if(is.null(cur_node$left) & is.null(cur_node$right)){
return(0)
}else{
return(max(self$get_height(cur_node$left),
self$get_height(cur_node$right)) + 1)
}
}
)
)
This allows
bst <- BST$new()
bst$insert(3)
bst$insert(2)
bst$insert(1)
bst$insert(5)
bst$insert(6)
bst$insert(4)
bst$insert(7)
bst$get_height(bst$root)
#> [1] 3
bst$get_height(bst$root$right)
#> [1] 2
Created on 2022-09-24 with reprex v2.0.2

Related

Equality comparison in an if statement in R

I have a list of data.frames:
book=list(ask,bid)
and I want to iterate through each data.frame like so:
book.total_volumes <- function(book) {
bid_total_volume=0
ask_total_volume=0
for(book in book) {
if(book=="bid"){
for(value in book[,"volumes"]) {
bid_total_volume=bid_total_volume+value }
}
if(book=="ask"){
for(value in book[,"volumes"]) {
ask_total_volume=ask_total_volume+value }
}
}
print(bid_total_volume)
print(ask_total_volume)
}
book.total_volumes(book)
when doing the if statement, how can I check if the current book name is equals to "bid" or if it is equals to "ask
The issue here is that the elements of you list book aren't named. Another issue is that I don't think you can define book in book in a loop, this will overwrite the first book variable...
Here is the quick answer using lapply function. It apply the same function to all elements of a list and return the values in the same order.
ask = data.frame(title = c("hello", "book"),
volumes = c(1, 42))
bid = data.frame(title = c("hello", "book"),
volumes = c(12, 1))
book=list(ask,bid)
print(unlist(lapply(book, function(x) sum(x[,"volumes"]))))
A longer version of this could be below. Note that list in R are also ordered.
book.total_volumes <- function(book) {
res <- c(ask = 0, bid = 0)
for(i in 1:length(book)){
res[i] = sum(book[[i]][, "volumes"])
# You can replace sum with a loop but it's good practice in R to vectorise code.
}
print(res)
}
book.total_volumes(book)
If you name the elements of you list, this code will work but this is not very optimal as functions exist to do this in less code.
book=list(ask = ask, bid = bid)
book.total_volumes <- function(book) {
bid_total_volume=0
ask_total_volume=0
for(i in seq_along(book)) {
if(names(book)[i] == "bid"){
for(value in book[[i]][,"volumes"]) {
bid_total_volume = bid_total_volume + value
}
}
if(names(book)[i]=="ask"){
for(value in book[[i]][,"volumes"]) {
ask_total_volume =ask_total_volume + value
}
}
}
print(bid_total_volume)
print(ask_total_volume)
}
book.total_volumes(book)

vapply: values must be length 11

I'm using the function drive_time() from the placement package by Derek Darves, which outputs the time and length between two points via Google Maps.
However, not even the example code works for me:
howfar_kms <- drive_time(
address="350 5th Ave, New York, NY 10118, USA",
dest="1600 Amphitheatre Pkwy, Mountain View, CA 94043",
auth="standard_api", privkey="", clean=FALSE,
add_date='today', verbose=FALSE, travel_mode="bicycling",
units="imperial"
)
It gives this error:
Error in vapply(json, function(x) { :
values must be length 11,
but FUN(X[[1]]) result is length 0
It should be noted that yesterday the very same piece of code worked. I guess this is a problem of being dependent on things like real-time traffic info and so on.
Anyway I don't know how to work it out because vapply is part of the inner working of the function I guess.
Do you have any idea?
This is the full function, thanks #lmo for the tip.
function (address, dest, auth = "standard_api", privkey = NULL,
clientid = NULL, clean = "TRUE", travel_mode = "driving",
units = "metric", verbose = FALSE, add_date = "none", language = "en-EN",
messages = FALSE, small = FALSE)
{
options(stringsAsFactors = F)
if (!grepl("standard_api|work", auth))
stop("Invalid auth paramater. Must be 'standard_api' or 'work'.")
if (is.null(privkey))
stop("You must specify a valid API key or an empty string (''). To request a key, see:\n\t https://developers.google.com/maps/documentation/javascript/get-api-key#get-an-api-key")
if (auth == "work" & is.null(clientid))
stop("You must specify a client ID with the work authentication method!")
if (!grepl("driving|bicycling|transit|walking", travel_mode,
ignore.case = TRUE))
stop("You must specify a valid travel mode.")
if (!grepl("metric|imperial", units))
stop("Invalid units paramater. Must be 'metric' or 'imperial'")
if (length(address) > 1 & length(address) != length(dest))
stop("Address must be singular or the same length as destination!")
if (!is.vector(c(address, dest), mode = "character"))
stop("Address and destination must be character vectors!")
if (!grepl("today|fuzzy|none", add_date))
stop("Invalid add_date paramater. Must be 'today', 'fuzzy', or 'none'")
if (privkey == "" & travel_mode == "transit")
stop("You must specify a valid API key to use the transit mode!")
if (!is.logical(messages))
stop("messages must be logical! Please choose TRUE or FALSE.")
if (clean) {
if (verbose)
cat("Cleaning origin addresses...\n")
address <- placement::address_cleaner(address, verbose = verbose)
if (verbose)
cat("Cleaning destination addresses...\n")
dest <- placement::address_cleaner(dest, verbose = verbose)
}
not_nambia <- function(x) {
x[is.na(x)] <- ""
return(x)
}
address <- not_nambia(address)
dest <- not_nambia(dest)
enc <- urltools::url_encode(address)
dest <- urltools::url_encode(dest)
if (auth == "standard_api") {
inbound <- data.frame(address = enc, dest = dest)
baserl <- "https://maps.googleapis.com/maps/api/distancematrix/json?origins="
inbound$full_url <- paste0(baserl, inbound$address, "&destinations=",
inbound$dest, "&units=", tolower(units), "&mode=",
tolower(travel_mode), "&language=", language, "&key=",
privkey)
togoogle <- inbound$full_url
}
if (auth == "work") {
togoogle <- placement::google_encode64(enc, dest = dest,
gmode = "dtime", privkey = privkey, clientid = clientid,
verbose = verbose, units = units)
}
if (verbose)
cat("Sending locations (n=", length(togoogle), ") to Google for distance calculation...\n",
sep = "")
json <- placement::pull_geo_data(togoogle, tmout = 10, messages = messages)
if (json[[1]]$status == "REQUEST_DENIED") {
stop(paste0("Request sent to Google, but response returned REQUEST_DENIED. Error details:\n",
json[[1]]$error_message))
}
coord <- t(vapply(json, function(x) {
if (!is.null(x$status)) {
if (x$status == "OK") {
if (!is.null(x$rows$elements[[1]]$status)) {
if (x$rows$elements[[1]]$status == "OK") {
origin <- as.character(x$origin_addresses)
destination <- as.character(x$destination_addresses)
dist_num <- as.character(x$rows$elements[[1]]$distance$value/1000)
if (units == "imperial")
dist_num <- as.character(as.numeric(dist_num) *
0.621371)
dist_txt <- as.character(x$rows$elements[[1]]$distance$text)
time_secs <- as.character(x$rows$elements[[1]]$duration$value)
time_mins <- as.character(as.numeric(time_secs) *
0.0166667)
time_hours <- as.character(as.numeric(time_secs) *
0.000277778)
time_txt <- as.character(x$rows$elements[[1]]$duration$text)
return_stat <- as.character(x$rows$elements[[1]]$status)
status <- as.character(x$status)
error_message <- ""
return(c(origin, destination, dist_num, dist_txt,
time_secs, time_mins, time_hours, time_txt,
return_stat, status, error_message))
}
else {
return(c(as.character(x$origin_addresses),
as.character(x$destination_addresses),
rep(NA, 6), x$rows$elements[[1]]$status,
x$status, ""))
}
}
}
else if (x$status == "CONNECTION_ERROR" & !is.null(x$error_message)) {
return(c(rep(NA, 9), x$status, x$error_message))
}
}
else {
return(c(rep(NA, 10), "Non-conforming response object: check source data/url for this record"))
}
}, character(11)))
if (is.matrix(coord)) {
out <- as.data.frame(coord)
}
else if (length(coord) == 11) {
out <- data.frame(t(unlist(coord)))
}
colnames(out) <- c("origin", "destination", "dist_num", "dist_txt",
"time_secs", "time_mins", "time_hours", "time_txt", "return_stat",
"status", "error_message")
nums <- c("dist_num", "time_secs", "time_mins", "time_hours")
out[, nums] <- vapply(out[, nums], function(x) {
x <- round(as.numeric(x), digits = 2)
return(x)
}, numeric(nrow(out)))
out$input_url <- togoogle
if (small)
out <- out[, c("dist_num", "time_hours")]
if (!add_date == "none") {
out$geocode_dt <- Sys.Date()
if (add_date == "fuzzy")
out$geocode_dt <- out$geocode_dt + stats::runif(nrow(out),
1, 30)
}
if (verbose) {
cat("Finished.", nrow(out[out$return_stat == "OK", ]),
"of", nrow(out), "distance calculations were successful.\n")
if (units == "imperial") {
len <- "miles"
}
else {
len <- "kilometers"
}
message("Note: numeric distances in the 'dist_num' column are expressed in ",
len, ".\n")
}
return(out)
}

Sift through each row in a dataframe and manually classify it

Can someone recommend an efficient way to sift through each row in a dataframe and manually classify it? For example I might be wanting to separate spam from e-mail, or shortlist job adverts, job applicants, or dating agency profiles (I understand Tinder does this by getting you to swipe left or right).
My dataset is small enough to classify manually. I suppose if it was larger I might only want to manually classify a portion of it in order to train a machine-learning algorithm such as Naive Bayes to finish the task for me.
I'll show you what I've got at the moment, but this isn't a particularly original task, so there must be a less crude way to do this that someone has already thought of! (As a newcomer, I'm impressed by the power of R, but also nonplussed when little tasks like clearing the screen or capturing a keystroke turn out to be non-trivial)
# Let us suppose I am using this built-in dataset to draw up a
# shortlist of where I might wish to go on holiday
df <- data.frame(state.x77);
# pp - define a task-specific pretty print function
pp <- function(row) {
print(row); # Example dataset is simple enough to just print the entire row
}
# cls - clear the screen (this hack works on Windows but I've commented it for now)
cls <- function() {
#system("powershell -ExecutionPolicy Bypass -command (New-Object -ComObject Wscript.Shell).SendKeys([string][char]12)");
}
# It would halve the number of keystrokes needed if I knew a way to read
# a single character
readcharacter <- readline;
sift <- function(df, pp)
{
classification = rep('', nrow(df));
for (nRow in 1:nrow(df))
{
cls();
pp(df[nRow,]);
cat("\nEnter 'a' to discard, 'd' to keep, 'q' to quit\n");
char <- '';
while (char != 'a' && char != 'd' && char != 'q') {
char <- readcharacter();
}
if (char == 'q')
break;
classification[nRow] = char;
}
return(cbind(df,classification=classification));
}
result = sift(df, pp);
cls();
cat("Shortlist:\n");
print(row.names(result[result$classification=='d',]));
So how does the StackOverflow community feel about me using this Shiny app to solve my problem? I wouldn't expect to see Shiny used in this early part of data analysis - normally it only comes into play once we have some results we'd like to explore or present dynamically.
Learning Shiny was fun and useful, but I'd much prefer it if a less complicated answer could be found.
library(shiny);
#
# shortlist - function that allows us to shortlist through the rows in a data frame efficiently
#
shortlist <- function(df, sTitle, sRowName) {
createUI <- function() {
listHeading <- list(
textOutput(outputId = "Progress"),
tags$br(),
fluidRow(
column(width=1, sRowName),
column(width=9, textOutput(outputId = "RowName"))));
listFields <- lapply(names(df), function(sFieldname) {
return(fluidRow(
column(width=1, sFieldname),
column(width=9, textOutput(outputId = sFieldname))));
});
listInputs <- list(
tags$br(),
tags$table(
tags$tr(
tags$td(" "),
tags$td(actionButton(inputId="Up", label="W", disabled=TRUE, width="100%"))),
tags$tr(
tags$td(width="100px", actionButton(inputId="Discard", label="Discard, A", width="100%")),
tags$td(width="100px", actionButton(inputId="Down", label="S", disabled=TRUE, width="100%")),
tags$td(width="100px", actionButton(inputId="Keep", label="Keep, D", width="100%")))),
tags$script("
// JavaScript implemented keyboard shortcuts, including lots of conditions to
// ensure we're finished processing one keystroke before we start the next.
var bReady = false;
$(document).on('shiny:recalculating', function(event) {
bReady = false;
});
$(document).on('shiny:recalculated', function(event) {
setTimeout(function() {bReady = true;}, 500);
});
$(document).on('keypress', function(event) {
if (bReady) {
switch(event.key.toLowerCase()) {
case 'a':
document.getElementById('Discard').click();
bReady = false;
break;
case 'd':
document.getElementById('Keep').click();
bReady = false;
break;
}
}
});
// End of JavaScript
"));
listPanel <- list(
title = sTitle,
tags$br(),
conditionalPanel(
condition = paste("input.Keep + input.Discard <", nrow(df)),
append(append(listHeading, listFields), listInputs)));
listShortlist <- list(
tags$hr(),
tags$h4("Shortlist:"),
dataTableOutput(outputId="Shortlist"));
ui <- do.call(fluidPage, append(listPanel, listShortlist));
return(ui);
}
app <- shinyApp(ui = createUI(), server = function(input, output) {
classification <- rep('', nrow(df));
getRow <- reactive({
return (input$Keep + input$Discard + 1);
});
classifyRow <- function(nRow, char) {
if (nRow <= nrow(df)) {
classification[nRow] <<- char;
}
# In interactive mode, automatically stop the app when we're finished
if ( interactive() && nRow >= nrow(df) ) {
stopApp(classification);
}
}
observeEvent(input$Discard, {classifyRow(getRow() - 1, 'a')});
observeEvent(input$Keep, {classifyRow(getRow() - 1, 'd')});
output$Progress = renderText({paste("Showing record", getRow(), "of", nrow(df))});
output$RowName = renderText({row.names(df)[getRow()]});
lapply(names(df), function(sFieldname) {
output[[sFieldname]] <- renderText({df[getRow(), sFieldname]});
});
output$Shortlist <- renderDataTable(options = list(paging = FALSE, searching = FALSE), {
# Mention the 'keep' input to ensure this code is called when the 'keep' button
# is pressed. That way the shortlist gets updated when an item to be added to it.
dummy <- input$Keep;
# Construct the shortlist
shortlist <- data.frame(row.names(df[classification == 'd',]));
colnames(shortlist) <- sRowName;
return(shortlist);
});
});
if (interactive()) {
classification <- runApp(app);
return(cbind(df, classification = classification));
} else {
return(app);
}
}
#
# And now some example code.
# Shortlist the built in state.x77 data set (let us suppose I am drawing up
# a shortlist of where I might wish to go on holiday)
#
df <- data.frame(state.x77);
result <- shortlist(df = df, "Choose states", "State");
if (interactive()) {
cat("Shortlist:\n");
print(row.names(result[result$classification == 'd',]));
} else {
return (result);
}

Instantiation of reference classes within reference classes - problems with lock() and immutability

I have come across some behaviour from R reference classes I would like to work around. In the following code, reference class B has two fields of reference class A in it.
These fields in B appear to be instantiated (possibly twice) with a zero-argument (default) versions of reference class A before B's initialize() method is called. These instances are then replaced with the correct versions of instance A during B's initialization process. The problem is that if I use lock() from B's instance generator, the initial empty instantiation's of A cannot be replaced in B. Another problem is that reference class A needs a default value in initialize [or a missing(c) test].
Help - suggestions - etc. appreciated.
A <- setRefClass('A',
fields = list(
count = 'numeric'
),
methods = list(
initialize = function (c=0) {
cat('DEBUG: A$initialize(c); where c='); cat(c); cat('\n')
count <<- c
}
)
)
instance.of.A <- A$new(10)
str(instance.of.A)
B <- setRefClass('B',
field = list(
a = 'A',
b = 'A'
),
methods = list(
initialize = function(c) {
a <<- instance.of.A
b <<- getRefClass('A')$new(c)
}
)
)
instance.of.b <- B$new(100)
str(instance.of.b)
Here are two possible solutions:
Don't set fields attribute:
B <- setRefClass('B',
methods = list(
initialize = function(c) {
.self$a = instance.of.A
.self$b =getRefClass('A')$new(c)
}
)
)
Set fields, but use the ANY class:
B <- setRefClass('B',
field = (a="ANY", b="ANY"),
methods = list(
initialize = function(c) {
a <<- instance.of.A
b <<- getRefClass('A')$new(c)
}
)
)
The downside of both these solutions is the type isn't enforced in a and b, i.e.
B$a = "Fred"
is now possible.
Drawing on the above, the solution I am using is (a little long because of the type checking):
A <- setRefClass('A',
fields = list(
count = function(x) {
if (!missing(x)) {
if(class(x) != 'numeric')
stop('Class A: count set by non-number')
.self$count.private <- x
}
.self$count.private
}
),
methods = list(
initialize = function (c=0) {
cat('DEBUG: A$initialize(c); where c='); cat(c); cat('\n')
count <<- c
}
)
)
instance.of.A <- A$new(10)
str(instance.of.A)
B <- setRefClass('B',
field = list(
a = function(x) {
if (!missing(x)) {
if(!inherits(x, 'envRefClass') || class(x)[1] != 'A')
stop('Class B: expecting instance of class A')
.self$a.private <- x
}
.self$a.private
},
b = function(x) {
if (!missing(x)) {
if(!inherits(x, 'envRefClass') || class(x)[1] != 'A')
stop('Class B: expecting instance of class A')
.self$b.private <- x
}
.self$b.private
}
),
methods = list(
initialize = function(c) {
a <<- instance.of.A
b <<- getRefClass('A')$new(c)
}
)
)
instance.of.b <- B$new(100)
str(instance.of.b)

How to patch an S4 method in an R package?

If you find a bug in a package, it's usually possible to patch the problem with fixInNamespace, e.g. fixInNamespace("mean.default", "base").
For S4 methods, I'm not sure how to do it though. The method I'm looking at is in the gWidgetstcltk package. You can see the source code with
getMethod(".svalue", c("gTabletcltk", "guiWidgetsToolkittcltk"))
I can't find the methods with fixInNamespace.
fixInNamespace(".svalue", "gWidgetstcltk")
Error in get(subx, envir = ns, inherits = FALSE) :
object '.svalue' not found
I thought setMethod might do the trick, but
setMethod(".svalue", c("gTabletcltk", "guiWidgetsToolkittcltk"),
definition = function (obj, toolkit, index = NULL, drop = NULL, ...)
{
widget = getWidget(obj)
sel <- unlist(strsplit(tclvalue(tcl(widget, "selection")),
" "))
if (length(sel) == 0) {
return(NA)
}
theChildren <- .allChildren(widget)
indices <- sapply(sel, function(i) match(i, theChildren))
inds <- which(visible(obj))[indices]
if (!is.null(index) && index == TRUE) {
return(inds)
}
if (missing(drop) || is.null(drop))
drop = TRUE
chosencol <- tag(obj, "chosencol")
if (drop)
return(obj[inds, chosencol, drop = drop])
else return(obj[inds, ])
},
where = "package:gWidgetstcltk"
)
Error in setMethod(".svalue", c("gTabletcltk", "guiWidgetsToolkittcltk"), :
the environment "gWidgetstcltk" is locked; cannot assign methods for function ".svalue"
Any ideas?
How about the old-school way of getting the source, applying the change and rebuilding?
you can first get the generic out, and then fix the generic by setMethod in your global environment, and then assign it back to that namespace
.svalue <- gWidgetstcltk:::.svalue
setMethod(".svalue", c("gTabletcltk", "guiWidgetsToolkittcltk"),
definition = function (obj, toolkit, index = NULL, drop = NULL, ...)
{
widget = getWidget(obj)
sel <- unlist(strsplit(tclvalue(tcl(widget, "selection")),
" "))
if (length(sel) == 0) {
return(NA)
}
theChildren <- .allChildren(widget)
indices <- sapply(sel, function(i) match(i, theChildren))
inds <- which(visible(obj))[indices]
if (!is.null(index) && index == TRUE) {
return(inds)
}
if (missing(drop) || is.null(drop))
drop = TRUE
chosencol <- tag(obj, "chosencol")
if (drop)
return(obj[inds, chosencol, drop = drop])
else return(obj[inds, ])
}#,
#where = "package:gWidgetstcltk"
)
assignInNamespace(".svalue", .svalue, ns = "gWidgetstcltk")

Resources