Equality comparison in an if statement in R - r

I have a list of data.frames:
book=list(ask,bid)
and I want to iterate through each data.frame like so:
book.total_volumes <- function(book) {
bid_total_volume=0
ask_total_volume=0
for(book in book) {
if(book=="bid"){
for(value in book[,"volumes"]) {
bid_total_volume=bid_total_volume+value }
}
if(book=="ask"){
for(value in book[,"volumes"]) {
ask_total_volume=ask_total_volume+value }
}
}
print(bid_total_volume)
print(ask_total_volume)
}
book.total_volumes(book)
when doing the if statement, how can I check if the current book name is equals to "bid" or if it is equals to "ask

The issue here is that the elements of you list book aren't named. Another issue is that I don't think you can define book in book in a loop, this will overwrite the first book variable...
Here is the quick answer using lapply function. It apply the same function to all elements of a list and return the values in the same order.
ask = data.frame(title = c("hello", "book"),
volumes = c(1, 42))
bid = data.frame(title = c("hello", "book"),
volumes = c(12, 1))
book=list(ask,bid)
print(unlist(lapply(book, function(x) sum(x[,"volumes"]))))
A longer version of this could be below. Note that list in R are also ordered.
book.total_volumes <- function(book) {
res <- c(ask = 0, bid = 0)
for(i in 1:length(book)){
res[i] = sum(book[[i]][, "volumes"])
# You can replace sum with a loop but it's good practice in R to vectorise code.
}
print(res)
}
book.total_volumes(book)
If you name the elements of you list, this code will work but this is not very optimal as functions exist to do this in less code.
book=list(ask = ask, bid = bid)
book.total_volumes <- function(book) {
bid_total_volume=0
ask_total_volume=0
for(i in seq_along(book)) {
if(names(book)[i] == "bid"){
for(value in book[[i]][,"volumes"]) {
bid_total_volume = bid_total_volume + value
}
}
if(names(book)[i]=="ask"){
for(value in book[[i]][,"volumes"]) {
ask_total_volume =ask_total_volume + value
}
}
}
print(bid_total_volume)
print(ask_total_volume)
}
book.total_volumes(book)

Related

Use of environment in validate_that in R

I am trying to write my own test-function (test_if) that returns both the result of the test as well as an optional error message. The function is based on the validate_that function in the assertthat-package.
The test_if function seems to work, however, I further want to use test_if in a more specific function (check_input) that analyses user-inputs in shiny. There I have a problem, that the check_input-function only works, if I define the test_if function inside the check_input function.
I suppose that the problem is caused by some search scope or environment problem. However, I am really a newbie to environments in R.
How can I get my check_input-function work without the need to define the test_if function inside it?
Many thanks, Silke
Here is my minimal working example:
library(assertthat)
test_if <- function(...,msg=NULL) {
test <- validate_that(...,msg=msg)
if (is.logical(test)) {
return(list(assertation=test,msg=NULL))
}
if (is.character(test)) {
return(list(assertation=FALSE,msg=test))
}
}
test_if(2==3)
test_if(3==3)
test_if(2==3,3==4,msg="something is wrong")
### To check different inputs
check_input1 <- function(value1 = NULL,value2 = NULL) {
test_if <- function(...,msg=NULL) {
test <- validate_that(...,msg=msg)
if (is.logical(test)) {
return(list(assertation=test,msg=NULL))
}
if (is.character(test)) {
return(list(assertation=FALSE,msg=test))
}
}
error_msg <- ""
error_status <- FALSE
check <- test_if(is.numeric(value1))
error_msg <- check$msg
error_status <- check$assertation
return(list(error_msg=error_msg,error_status=error_status))
}
check_input2 <- function(value1 = NULL,value2 = NULL) {
error_msg <- ""
error_status <- FALSE
check <- test_if(is.numeric(value1))
error_msg <- check$msg
error_status <- check$assertation
return(list(error_msg=error_msg,error_status=error_status))
}
check_input1(value1=1)
check_input2(value1=1)

Is there a variable listing in RStudio (or R) like in SPSS?

RStudio provides a nice function View (with uppercase V) to take a look into the data, but with R it's still nasty to get orientation in a large data set. The most common options are...
names(df)
str(df)
If you're coming from SPSS, R seems like a downgrade in this respect. I wondered whether there is a more user-friendly option? I did not find a ready-one, so I'd like to share my solution with you.
Using RStudio's built-in function View, it's white simple to have a variable listing for a data.frame similar to the one in SPSS. This function creates a new data.frame with the variable information and displays in the RStudio GUI via View.
# Better variables view
Varlist = function(sia) {
# Init varlist output
varlist = data.frame(row.names = names(sia))
varlist[["comment"]] = NA
varlist[["type"]] = NA
varlist[["values"]] = NA
varlist[["NAs"]] = NA
# Fill with meta information
for (var in names(sia)) {
if (!is.null(comment(sia[[var]]))) {
varlist[[var, "comment"]] = comment(sia[[var]])
}
varlist[[var, "NAs"]] = sum(is.na(sia[[var]]))
if (is.factor(sia[[var]])) {
varlist[[var, "type"]] = "factor"
varlist[[var, "values"]] = paste(levels(sia[[var]]), collapse=", ")
} else if (is.character(sia[[var]])) {
varlist[[var, "type"]] = "character"
} else if (is.logical(sia[[var]])) {
varlist[[var, "type"]] = "logical"
n = sum(!is.na(sia[[var]]))
if (n > 0) {
varlist[[var, "values"]] = paste(round(sum(sia[[var]], na.rm=T) / n * 100), "% TRUE", sep="")
}
} else if (is.numeric(sia[[var]])) {
varlist[[var, "type"]] = typeof(sia[[var]])
n = sum(!is.na(sia[[var]]))
if (n > 0) {
varlist[[var, "values"]] = paste(min(sia[[var]], na.rm=T), "...", max(sia[[var]], na.rm=T))
}
} else {
varlist[[var, "type"]] = typeof(sia[[var]])
}
}
View(varlist)
}
My recommendation is to store that as a file (e.g., Varlist.R) and whever you need it, just type:
source("Varlist.R")
Varlist(df)
Again please take note of the uppercase V using as function name.
Limitation: When working with data.frame, the listing will not be updated unless Varlist(df) is run again.
Note: R has a built-in option to view data with print. If working with pure R, just replace the View(varlist) by print(varlist). Yet, depending on screen size, Hmisc::describe() could be a better option for the console.

Sift through each row in a dataframe and manually classify it

Can someone recommend an efficient way to sift through each row in a dataframe and manually classify it? For example I might be wanting to separate spam from e-mail, or shortlist job adverts, job applicants, or dating agency profiles (I understand Tinder does this by getting you to swipe left or right).
My dataset is small enough to classify manually. I suppose if it was larger I might only want to manually classify a portion of it in order to train a machine-learning algorithm such as Naive Bayes to finish the task for me.
I'll show you what I've got at the moment, but this isn't a particularly original task, so there must be a less crude way to do this that someone has already thought of! (As a newcomer, I'm impressed by the power of R, but also nonplussed when little tasks like clearing the screen or capturing a keystroke turn out to be non-trivial)
# Let us suppose I am using this built-in dataset to draw up a
# shortlist of where I might wish to go on holiday
df <- data.frame(state.x77);
# pp - define a task-specific pretty print function
pp <- function(row) {
print(row); # Example dataset is simple enough to just print the entire row
}
# cls - clear the screen (this hack works on Windows but I've commented it for now)
cls <- function() {
#system("powershell -ExecutionPolicy Bypass -command (New-Object -ComObject Wscript.Shell).SendKeys([string][char]12)");
}
# It would halve the number of keystrokes needed if I knew a way to read
# a single character
readcharacter <- readline;
sift <- function(df, pp)
{
classification = rep('', nrow(df));
for (nRow in 1:nrow(df))
{
cls();
pp(df[nRow,]);
cat("\nEnter 'a' to discard, 'd' to keep, 'q' to quit\n");
char <- '';
while (char != 'a' && char != 'd' && char != 'q') {
char <- readcharacter();
}
if (char == 'q')
break;
classification[nRow] = char;
}
return(cbind(df,classification=classification));
}
result = sift(df, pp);
cls();
cat("Shortlist:\n");
print(row.names(result[result$classification=='d',]));
So how does the StackOverflow community feel about me using this Shiny app to solve my problem? I wouldn't expect to see Shiny used in this early part of data analysis - normally it only comes into play once we have some results we'd like to explore or present dynamically.
Learning Shiny was fun and useful, but I'd much prefer it if a less complicated answer could be found.
library(shiny);
#
# shortlist - function that allows us to shortlist through the rows in a data frame efficiently
#
shortlist <- function(df, sTitle, sRowName) {
createUI <- function() {
listHeading <- list(
textOutput(outputId = "Progress"),
tags$br(),
fluidRow(
column(width=1, sRowName),
column(width=9, textOutput(outputId = "RowName"))));
listFields <- lapply(names(df), function(sFieldname) {
return(fluidRow(
column(width=1, sFieldname),
column(width=9, textOutput(outputId = sFieldname))));
});
listInputs <- list(
tags$br(),
tags$table(
tags$tr(
tags$td(" "),
tags$td(actionButton(inputId="Up", label="W", disabled=TRUE, width="100%"))),
tags$tr(
tags$td(width="100px", actionButton(inputId="Discard", label="Discard, A", width="100%")),
tags$td(width="100px", actionButton(inputId="Down", label="S", disabled=TRUE, width="100%")),
tags$td(width="100px", actionButton(inputId="Keep", label="Keep, D", width="100%")))),
tags$script("
// JavaScript implemented keyboard shortcuts, including lots of conditions to
// ensure we're finished processing one keystroke before we start the next.
var bReady = false;
$(document).on('shiny:recalculating', function(event) {
bReady = false;
});
$(document).on('shiny:recalculated', function(event) {
setTimeout(function() {bReady = true;}, 500);
});
$(document).on('keypress', function(event) {
if (bReady) {
switch(event.key.toLowerCase()) {
case 'a':
document.getElementById('Discard').click();
bReady = false;
break;
case 'd':
document.getElementById('Keep').click();
bReady = false;
break;
}
}
});
// End of JavaScript
"));
listPanel <- list(
title = sTitle,
tags$br(),
conditionalPanel(
condition = paste("input.Keep + input.Discard <", nrow(df)),
append(append(listHeading, listFields), listInputs)));
listShortlist <- list(
tags$hr(),
tags$h4("Shortlist:"),
dataTableOutput(outputId="Shortlist"));
ui <- do.call(fluidPage, append(listPanel, listShortlist));
return(ui);
}
app <- shinyApp(ui = createUI(), server = function(input, output) {
classification <- rep('', nrow(df));
getRow <- reactive({
return (input$Keep + input$Discard + 1);
});
classifyRow <- function(nRow, char) {
if (nRow <= nrow(df)) {
classification[nRow] <<- char;
}
# In interactive mode, automatically stop the app when we're finished
if ( interactive() && nRow >= nrow(df) ) {
stopApp(classification);
}
}
observeEvent(input$Discard, {classifyRow(getRow() - 1, 'a')});
observeEvent(input$Keep, {classifyRow(getRow() - 1, 'd')});
output$Progress = renderText({paste("Showing record", getRow(), "of", nrow(df))});
output$RowName = renderText({row.names(df)[getRow()]});
lapply(names(df), function(sFieldname) {
output[[sFieldname]] <- renderText({df[getRow(), sFieldname]});
});
output$Shortlist <- renderDataTable(options = list(paging = FALSE, searching = FALSE), {
# Mention the 'keep' input to ensure this code is called when the 'keep' button
# is pressed. That way the shortlist gets updated when an item to be added to it.
dummy <- input$Keep;
# Construct the shortlist
shortlist <- data.frame(row.names(df[classification == 'd',]));
colnames(shortlist) <- sRowName;
return(shortlist);
});
});
if (interactive()) {
classification <- runApp(app);
return(cbind(df, classification = classification));
} else {
return(app);
}
}
#
# And now some example code.
# Shortlist the built in state.x77 data set (let us suppose I am drawing up
# a shortlist of where I might wish to go on holiday)
#
df <- data.frame(state.x77);
result <- shortlist(df = df, "Choose states", "State");
if (interactive()) {
cat("Shortlist:\n");
print(row.names(result[result$classification == 'd',]));
} else {
return (result);
}

R storing variable value in alist

I am trying to use a function to modify another function default settings through formals but my problem is that when I check my function defaults afterwards then nothing has changed. My code (minus unrelated stuff) is:
ScouringSettings <- function(min.MAF=NULL, eq.thresh=NULL){
if (is.null(min.MAF) && is.null(eq.thresh)){
maf <- paste0("Minimum MAF criterion is: ", formals(GeneScour)$min.maf)
eq <- paste0("ChiĀ² HW equilibrium threshold: ", formals(GeneScour)$min.eq)
cat(paste(maf, eq, sep="\n"))
} else if (is.null(eq.thresh)) {
formals(GeneScour) <- alist(gene=, min.maf = min.MAF, min.eq = formals(GeneScour)$min.eq)
} else if (is.null()){
formals(GeneScour) <- alist(gene=, min.maf = formals(GeneScour)$min.maf, min.eq = eq.thresh)
} else {
formals(GeneScour) <- alist(gene=, min.maf = min.maf, min.eq = eq.thresh)
}
}
I thought that maybe it was because of a problem of scope or something so I tried printing out the defaults while still being in my first function and it printed :
$gene
$min.maf
min.MAF
$min.eq
formals(GeneScour)$min.eq
And even when I forcefully type
formals(GeneScour) <- alist(gene=, min.maf = 2, min.eq = formals(GeneScour)$min.eq)
The modification is not carried over outside of the ScouringSettings.
I am a bit lost, how could I manage that ?

How to patch an S4 method in an R package?

If you find a bug in a package, it's usually possible to patch the problem with fixInNamespace, e.g. fixInNamespace("mean.default", "base").
For S4 methods, I'm not sure how to do it though. The method I'm looking at is in the gWidgetstcltk package. You can see the source code with
getMethod(".svalue", c("gTabletcltk", "guiWidgetsToolkittcltk"))
I can't find the methods with fixInNamespace.
fixInNamespace(".svalue", "gWidgetstcltk")
Error in get(subx, envir = ns, inherits = FALSE) :
object '.svalue' not found
I thought setMethod might do the trick, but
setMethod(".svalue", c("gTabletcltk", "guiWidgetsToolkittcltk"),
definition = function (obj, toolkit, index = NULL, drop = NULL, ...)
{
widget = getWidget(obj)
sel <- unlist(strsplit(tclvalue(tcl(widget, "selection")),
" "))
if (length(sel) == 0) {
return(NA)
}
theChildren <- .allChildren(widget)
indices <- sapply(sel, function(i) match(i, theChildren))
inds <- which(visible(obj))[indices]
if (!is.null(index) && index == TRUE) {
return(inds)
}
if (missing(drop) || is.null(drop))
drop = TRUE
chosencol <- tag(obj, "chosencol")
if (drop)
return(obj[inds, chosencol, drop = drop])
else return(obj[inds, ])
},
where = "package:gWidgetstcltk"
)
Error in setMethod(".svalue", c("gTabletcltk", "guiWidgetsToolkittcltk"), :
the environment "gWidgetstcltk" is locked; cannot assign methods for function ".svalue"
Any ideas?
How about the old-school way of getting the source, applying the change and rebuilding?
you can first get the generic out, and then fix the generic by setMethod in your global environment, and then assign it back to that namespace
.svalue <- gWidgetstcltk:::.svalue
setMethod(".svalue", c("gTabletcltk", "guiWidgetsToolkittcltk"),
definition = function (obj, toolkit, index = NULL, drop = NULL, ...)
{
widget = getWidget(obj)
sel <- unlist(strsplit(tclvalue(tcl(widget, "selection")),
" "))
if (length(sel) == 0) {
return(NA)
}
theChildren <- .allChildren(widget)
indices <- sapply(sel, function(i) match(i, theChildren))
inds <- which(visible(obj))[indices]
if (!is.null(index) && index == TRUE) {
return(inds)
}
if (missing(drop) || is.null(drop))
drop = TRUE
chosencol <- tag(obj, "chosencol")
if (drop)
return(obj[inds, chosencol, drop = drop])
else return(obj[inds, ])
}#,
#where = "package:gWidgetstcltk"
)
assignInNamespace(".svalue", .svalue, ns = "gWidgetstcltk")

Resources