Parsing a string in R - r

I am writing a R script which I will be running as a script from the command line. I am passing command line arguments to the script using keywords as follows:
myscript.R --arg1=100 --arg2='hello' --arg3=3.14159
I want to write an R function that will return the command line values into a dictionary like object (i.e. a list of lists in R), filling unsupplied arguments with defaults.
for e.g.
parseArguments <- function() {
options <- commandArgs(TRUE)
# options now contains "--arg1=100 --arg2='hello' --arg3=3.14159"
# parse string held in variable named options and stuff into a list
# .... Do some string manip ....
args <- list()
args['arg1'] <- 100
args['arg2'] <- 'hello'
args['arg3'] <- 3.14159
args['arg4'] <- 123 # Not found in parsed line so we use a hard coded default
return (args)
}
Can someone help fill in the blanks?

> parseArguments <- function() {
+ text1 = "--arg1=100 --arg2='hello' --arg3=3.14159"
+ eval(parse( text= gsub("\\s", ";", gsub("--","", text1))))
+ args <- list()
+ args['arg1'] <- arg1
+ args['arg2'] <- arg2
+ args['arg3'] <- arg3
+ args['arg4'] <- 123 # Not found in parsed line so we use a hard coded default
+
+ return (args)
+ }
> argres <- parseArguments()
> argres
$arg1
[1] 100
$arg2
[1] "hello"
$arg3
[1] 3.14159
$arg4
[1] 123
To address the unkown number of arguments modification to the problem:
parseArguments <- function() {
text1 = "--arg1=100 --arg2='hello' --arg3=3.14159"
eval(parse(text=gsub("\\s", ";", gsub("--","", text1))))
args <- list()
for( ar in ls()[! ls() %in% c("text1", "args")] ) {args[ar] <- get(ar)}
return (args)
}
argres <- parseArguments()
argres
#---------
$arg1
[1] 100
$arg2
[1] "hello"
$arg3
[1] 3.14159

You can split each argument into
the name and value parts, using strsplit or a regular expression.
The following does not try to check the type of the arguments:
everything will be returned as a string.
parseArgs <- function(...) {
o <- commandArgs(TRUE)
# The defaults arguments should be named
defaults <- list(...)
stopifnot( length(defaults) == length(names(defaults)) )
stopifnot( all( names(defaults) != "" ) )
# All the arguments should be of the form "--foo=bar"
re <- "^--(.*?)=(.*)"
stopifnot( all(grepl(re, o)) )
# Extract the values and names
r <- gsub(re, "\\2", o)
names(r) <- gsub("^--(.*?)=(.*)", "\\1", o)
r <- as.list(r)
# Add the default values
missing <- setdiff( names(defaults), names(r) )
append(r, defaults[missing])
}
print( parseArgs() )
print( parseArgs(foo=1, bar=2) ) # With default values

Related

How to mask subsequences of string with a pattern string

I have a main string that looks like this:
my_main <- "ABCDEFGHIJ"
What I want to do is to sequentially mask at every position with another pattern string:
my_pattern <- "x*x" # the length could be varied from 1 up to length of my_main
Every character that overlap with * will be kept, other will be replaced with x.
The final result is a vector of strings that contain these:
xBxDEFGHIJ
AxCxEFGHIJ
ABxDxFGHIJ
ABCxExGHIJ
ABCDxFxHIJ
ABCDExGxIJ
ABCDEFxHxJ
ABCDEFGxIx
Next if the pattern is
my_pattern <- "xx**x"
The result would be:
xxCDxFGHIJ
AxxDExGHIJ
ABxxEFxHIJ
ABCxxFGxIJ
ABCDxxGHxJ
ABCDExxHIx
How can I achieve that?
This might be a little over-complicated, but it's a start:
I'm going to reuse Reduce_frame from https://stackoverflow.com/a/70945868/3358272.
Reduce_frame <- function(data, expr, init) {
expr <- substitute(expr)
out <- rep(init[1][NA], nrow(data))
for (rn in seq_len(nrow(data))) {
out[rn] <- init <- eval(expr, envir = data[rn,])
}
out
}
From here, let's split the pattern into a frame (for ease of access, if nothing else):
repl <- subset(
data.frame(p = strsplit(my_pattern, "")[[1]], i = seq_len(nchar(my_pattern))),
p != "*")
repl
# p i
# 1 x 1
# 3 x 3
From here, we can do it once with:
tail(Reduce_frame(repl, `substring<-`(init, i, i, p), init = my_main), 1)
# [1] "xBxDEFGHIJ"
Which means we can iterate fairly easily:
sapply(c(0, seq_len(nchar(my_main) - nchar(my_pattern))), function(offset) {
tail(Reduce_frame(transform(repl, i = i + offset),
`substring<-`(init, i, i, p), init = my_main), 1)
})
# [1] "xBxDEFGHIJ" "AxCxEFGHIJ" "ABxDxFGHIJ" "ABCxExGHIJ" "ABCDxFxHIJ" "ABCDExGxIJ" "ABCDEFxHxJ" "ABCDEFGxIx"
To use your second pattern,
my_pattern <- "xx**x"
repl <- transform(...) # from above
## the rest of this code is unchanged from above
sapply(c(0, seq_len(nchar(my_main) - nchar(my_pattern))), function(offset) {
tail(Reduce_frame(transform(repl, i = i + offset),
`substring<-`(init, i, i, p), init = my_main), 1)
})
# [1] "xxCDxFGHIJ" "AxxDExGHIJ" "ABxxEFxHIJ" "ABCxxFGxIJ" "ABCDxxGHxJ" "ABCDExxHIx"
So this can be easily functionized:
Reduce_frame <- ... # defined above
func <- function(S, pattern) {
stopifnot(nchar(S) >= nchar(pattern))
repl <- subset(
data.frame(p = strsplit(pattern, "")[[1]], i = seq_len(nchar(pattern))),
p != "*")
sapply(c(0, seq_len(nchar(S) - nchar(pattern))), function(offset) {
tail(Reduce_frame(transform(repl, i = i + offset),
`substring<-`(init, i, i, p), init = S), 1)
})
}
func("ABCDEFGHIJ", "x*x")
# [1] "xBxDEFGHIJ" "AxCxEFGHIJ" "ABxDxFGHIJ" "ABCxExGHIJ" "ABCDxFxHIJ" "ABCDExGxIJ" "ABCDEFxHxJ" "ABCDEFGxIx"
func("ABCDEFGHIJ", "xx**x")
# [1] "xxCDxFGHIJ" "AxxDExGHIJ" "ABxxEFxHIJ" "ABCxxFGxIJ" "ABCDxxGHxJ" "ABCDExxHIx"
Here's one way using strsplit, grepl, and paste.
f <- \(mm, mp) {
m <- el(strsplit(mm, ''))
p <- el(strsplit(mp, ''))
i <- which(!grepl(p, pattern='\\*'))
vapply(c(0L, seq_len(length(m) - max(i))), \(j) {
m[i + j] <- p[i]
paste(m, collapse='')
}, vector('character', 1L))
}
f('ABCDEFGHIJ', 'x*x')
# [1] "xBxDEFGHIJ" "AxCxEFGHIJ" "ABxDxFGHIJ" "ABCxExGHIJ" "ABCDxFxHIJ"
# [6] "ABCDExGxIJ" "ABCDEFxHxJ" "ABCDEFGxIx"
f('ABCDEFGHIJ', 'x**x')
# [1] "xBCxEFGHIJ" "AxCDxFGHIJ" "ABxDExGHIJ" "ABCxEFxHIJ" "ABCDxFGxIJ"
# [6] "ABCDExGHxJ" "ABCDEFxHIx"
f('ABCDEFGHIJ', 'xx**x')
# [1] "xxCDxFGHIJ" "AxxDExGHIJ" "ABxxEFxHIJ" "ABCxxFGxIJ" "ABCDxxGHxJ"
# [6] "ABCDExxHIx"
f('ABCDEFGHIJ', 'kk**krr')
# [1] "kkCDkrrHIJ" "AkkDEkrrIJ" "ABkkEFkrrJ" "ABCkkFGkrr"
f('ABCDEFGHIJ', 'kk**kr*r')
# [1] "kkCDkrGrIJ" "AkkDEkrHrJ" "ABkkEFkrIr"
Here is an approach along the same lines as r2evans' answer but relying on some stringr functions which should be more efficient than the base equivalents:
library(stringr)
f <- function(main, r_pattern) {
shift <- nchar(main) - nchar(r_pattern) + 1
idx <- as.data.frame(str_locate_all(r_pattern, "[^*]+")[[1]])
x_pattern <- str_split(r_pattern, "\\*+")[[1]]
Reduce(
function(x, y)
`str_sub<-`(
x,
seq(idx$start[y], length.out = shift),
seq(idx$end[y], length.out = shift),
omit_na = FALSE,
x_pattern[y]
),
seq(nrow(idx)),
init = main
)
}
f("ABCDEFGHIJ", "x*x")
[1] "xBxDEFGHIJ" "AxCxEFGHIJ" "ABxDxFGHIJ" "ABCxExGHIJ" "ABCDxFxHIJ" "ABCDExGxIJ" "ABCDEFxHxJ" "ABCDEFGxIx"
f("ABCDEFGHIJ", "xx**x")
[1] "xxCDxFGHIJ" "AxxDExGHIJ" "ABxxEFxHIJ" "ABCxxFGxIJ" "ABCDxxGHxJ" "ABCDExxHIx"
# Edit after OP comment:
f(my_main, "KK**KRR")
[1] "KKCDKRRHIJ" "AKKDEKRRIJ" "ABKKEFKRRJ" "ABCKKFGKRR"
Here is Ruby code that produces the desired result. I am presenting it in the event that a reader wishes to convert it to R, possibly with modification, of course.
You should be able to read the code even if you don't know Ruby, as long as you understand that:
'abc'.size returns 3;
0..8 is a range of integers between 0 and 8, inclusive;
'abc' << 'd' returns 'abcd';
7.modulo(3) returns 1;
'abcd'[2] returns 'c', 2 being an index; and
s == 'x' ? 'x' : my_main[j] reads, "if the string s (which will be 'x' or '*') equals 'x' return 'x', else return the character of my_main at index j.
The Ruby code (somewhat simplified from what would normally be written) is as follows.
def doit(my_main, my_pattern)
msz = my_main.size
psz = my_pattern.size
(0..msz-psz).map do |i|
s = ''
(0..msz-1).each do |j|
s << (my_pattern[(j-i).modulo(msz)] == 'x' ? 'x' : my_main[j])
end
s
end
end

Getting name of an object from list in Map

Given the following data:
list_A <- list(data_cars = mtcars,
data_air = AirPassengers,
data_list = list(A = 1,
B = 2))
I would like to print names of objects available across list_A.
Example:
Map(
f = function(x) {
nm <- deparse(match.call()$x)
print(nm)
# nm object is only needed to properly name flat file that may be
# produced within Map call
if (any(class(x) == "list")) {
length(x) + 1
} else {
length(x) + 1e6
saveRDS(object = x,
file = tempfile(pattern = make.names(nm), fileext = ".RDS"))
}
},
list_A
)
returns:
[1] "dots[[1L]][[1L]]"
[1] "dots[[1L]][[2L]]"
[1] "dots[[1L]][[3L]]"
$data_cars
NULL
$data_air
NULL
$data_list
[1] 3
Desired results
I would like to get:
`data_cars`
`data_air`
`data_list`
Update
Following the comments, I have modified the example to make it more reflective of my actual needs which are:
While using Map to iterate over list_A I'm performing some operations on each element of the list
Periodically I want to create a flat file with name reflecting name of object that was processed
In addition to list_A, there are also list_B, list_C and so forth. Therefore, I would like to avoid calling names(list) inside the function f of the Map as I will have to modify it n number of times. The solution I'm looking to find should lend itself for:
Map(function(l){...}, list_A)
So I can later replace list_A. It does not have to rely on Map. Any of the apply functions would do; same applied to purrr-based solutions.
Alternative example
do_stuff <- function(x) {
nm <- deparse(match.call()$x)
print(nm)
# nm object is only needed to properly name flat file that may be
# produced within Map call
if (any(class(x) == "list")) {
length(x) + 1
} else {
length(x) + 1e6
saveRDS(object = x,
file = tempfile(pattern = make.names(nm), fileext = ".RDS"))
}
}
Map(do_stuff, list_A)
As per the notes below, I want to avoid having to modify do_stuff function as I will be looking to do:
Map(do_stuff, list_A)
Map(do_stuff, list_B)
Map(do_stuff, list_...)
We could wrap it into a function, and do it in two steps:
myFun <- function(myList){
# do stuff
res <- Map(
f = function(x) {
#do stuff
head(x)
},
myList)
# write to a file, here we might add control
# if list is empty do not output to a file
for(i in names(res)){
write.table(res[[ i ]], file = paste0(i, ".txt"))
}
}
myFun(list_A)
Would something like this work ?
list_A2 <- Map(list, x = list_A,nm = names(list_A) )
trace(do_stuff, quote({ nm <- x$nm; x<- x$x}), at=3)
Map(do_stuff, list_A2)

In R, is it possible to create a new object inside a function and pass it to the parent environment?

The function would look something like:
function(input, FUN, output) {
output <- FUN(input)
return(input)
}
Where output is an unquoted name of an object to be created.
Let's skip the part where this is probably a bad idea: is this sort of thing possible? How would you go about doing it?
Clean code would just return it.
But you have other options:
the <<- operator
the assign() function where you can list the environment to assign to
Here is a trivial example:
R> foo <- function(x=21) { y <<- 2*x; return(3*x) }
R> foo(10)
[1] 30
R> y
[1] 20
R>
1) Try this:
fun <- function(input, FUN, output = "output", envir = parent.frame()) {
envir[[output]] <- FUN(input)
input
}
fun(4, sqrt)
## [1] 4
output
## [1] 2
Note that if hardcoding the output variable name to output is ok then the assignment could be written:
envir$output <- FUN(input)
2) Another possibility if you want to output both the input and output yet avoiding side effects is to return both in a list:
fun2 <- function(input, FUN, output = "output")
setNames(list(input, FUN(input)), c("input", output))
fun2(4, sqrt)
giving:
$input
[1] 4
$output
[1] 2
2a) A variation of this is:
devtools::install_github("ggrothendieck/gsubfn")
library(gsubfn) # list[...] <- ...
list[input, output] <- fun2(sqrt)
giving:
> input
[1] 4
> output
[1] 2
3) Yet another possibility is to pass the input in an attribute:
fun3 <- function(input, FUN) {
out <- FUN(input)
attr(out, "input") <- input
out
}
fun3(4, sqrt)
giving:
[1] 2
attr(,"input")
[1] 4

R .Last.call feature - similar to .Last.value

Similarly to .Last.value is there any way to access last call? Below expected results of potential .Last.call.
sum(1, 2)
# [1] 3
str(.Last.call)
# language sum(1, 2)
The bests if it would not require to parse file from file system.
The last.call package is no longer on cran, but you can still get the code:
# -----------------------------------------------------------------------
# FUNCTION: last.call
# Retrieves a CALL from the history and returns an unevaluated
# call.
#
# There are two uses for such abilities.
# - To be able to recall the previous commands, like pressing the up key
# on the terminal.
# - The ability to get the line that called the function.
#
# TODO:
# - does not handle commands seperated by ';'
#
# -----------------------------------------------------------------------
last.call <-
function(n=1) {
f1 <- tempfile()
try( savehistory(f1), silent=TRUE )
try( rawhist <- readLines(f1), silent=TRUE )
unlink(f1)
if( exists('rawhist') ) {
# LOOK BACK max(n)+ LINES UNTIL YOU HAVE n COMMANDS
cmds <- expression()
n.lines <- max(abs(n))
while( length(cmds) < max(abs(n)) ) {
lines <- tail( rawhist, n=n.lines )
try( cmds <- parse( text=lines ), silent=TRUE )
n.lines <- n.lines + 1
if( n.lines > length(rawhist) ) break
}
ret <- rev(cmds)[n]
if( length(ret) == 1 ) return(ret[[1]]) else return(ret)
}
return(NULL)
}
Now, to use it:
sum(1, 2)
# [1] 3
last.call(2)
# sum(1, 2)
I've modified this code to output text strings of the preceding commands / calls in a manner that preserves how there were formatted across lines in the original call, sot that I can use cat() to output the calls (for a function that emails me when the preceding function is done running). Here's the code:
lastCall <- function(num.call = 1) {
history.file <- tempfile()
try(savehistory(history.file), silent = TRUE )
try(raw.history <- readLines(history.file), silent = TRUE )
unlink(history.file)
if (exists('raw.history') ) {
# LOOK BACK max(n)+ LINES UNTIL YOU HAVE n COMMANDS
commands <- expression()
num.line <- max(abs(num.call) + 1)
while (length(commands) < max(abs(num.call) + 1)) {
lines <- tail(raw.history, n = num.line)
try(commands <- parse(text = lines), silent = TRUE)
num.line <- num.line + 1
if (num.line > length(raw.history)) break
}
ret <- rev(commands)[num.call + 1]
if (length(ret) == 1) {
a <- ret[1]
} else {
a <- ret
}
# a <- rev(commands)[num.call + 1]
out <- lapply(a, deparse) %>%
sapply(paste, sep = "\n", collapse = "\n")
}
out
}
Enjoy!

Create a list from a character vector fed to a function

I would like to pass variables into a function and use them as a list, and I have a function that splits the items of a character vector by the "=", and places them into a list. It works fine of course when the equal sign is included in the character like this:
my.function <- function(x) {
args <- x
newl <- list()
for ( i in 1:length(args) ) {
keyval <- strsplit(args[[i]],"=")[[1]];
key <- keyval[1]; val <- keyval[2];
newl[[ key ]] <- val;
}
return(newl)
}
char<- c("name=value_1", "title=title", "show=show")
my.function(char)
$name
[1] "value_1"
$title
[1] "title"
$show
[1] "show"
And then I am able to use these arguments inside the function just by doing this:
args[['title']]
But I would like to pass variables to the function, and not just characters. So I would like the function to be able to work when I do this:
value_1 = "A"
show= TRUE
title= paste("This is my title for ", value_1, sep="")
my.function(name=value_1, title=title, show=show)
I could just paste the values like this:
char= c( paste("name=", value_1, sep=""),
paste("title=", title, sep=""),
paste("show=", show, sep=""))
But I was wondering if there was a better method to pass these variables as arguments to the function. Thank you for your help!
You can use ...:
my.function <- function(...) list(...)
This function just creates a list based on the ussed arguments.
value_1 <- "A"
show <- TRUE
title <- paste("This is my title for ", value_1, sep="")
my.function(name = value_1, title = title, show = show)
$name
[1] "A"
$title
[1] "This is my title for A"
$show
[1] TRUE
This function generates a character vector based on the function call's arguments:
my.function <- function(...) {
argList <- list(...)
res <- paste(names(argList), unlist(argList), sep = "=")
return(res)
}
my.function(name=value_1, title=title, show=show)
[1] "name=A" "title=This is my title for A" "show=TRUE"
This function is similar to your one. It illustrates how you can access the arguments of the function call:
my.function <- function(...) {
argList <- list(...)
newl <- list()
for (i in seq_along(argList)) {
key <- names(argList)[i]
val <- argList[[i]]
newl[[key]] <- val
}
return(newl)
}
my.function(name = value_1, title = title, show = show)
$name
[1] "A"
$title
[1] "This is my title for A"
$show
[1] TRUE

Resources