How to mask subsequences of string with a pattern string - r

I have a main string that looks like this:
my_main <- "ABCDEFGHIJ"
What I want to do is to sequentially mask at every position with another pattern string:
my_pattern <- "x*x" # the length could be varied from 1 up to length of my_main
Every character that overlap with * will be kept, other will be replaced with x.
The final result is a vector of strings that contain these:
xBxDEFGHIJ
AxCxEFGHIJ
ABxDxFGHIJ
ABCxExGHIJ
ABCDxFxHIJ
ABCDExGxIJ
ABCDEFxHxJ
ABCDEFGxIx
Next if the pattern is
my_pattern <- "xx**x"
The result would be:
xxCDxFGHIJ
AxxDExGHIJ
ABxxEFxHIJ
ABCxxFGxIJ
ABCDxxGHxJ
ABCDExxHIx
How can I achieve that?

This might be a little over-complicated, but it's a start:
I'm going to reuse Reduce_frame from https://stackoverflow.com/a/70945868/3358272.
Reduce_frame <- function(data, expr, init) {
expr <- substitute(expr)
out <- rep(init[1][NA], nrow(data))
for (rn in seq_len(nrow(data))) {
out[rn] <- init <- eval(expr, envir = data[rn,])
}
out
}
From here, let's split the pattern into a frame (for ease of access, if nothing else):
repl <- subset(
data.frame(p = strsplit(my_pattern, "")[[1]], i = seq_len(nchar(my_pattern))),
p != "*")
repl
# p i
# 1 x 1
# 3 x 3
From here, we can do it once with:
tail(Reduce_frame(repl, `substring<-`(init, i, i, p), init = my_main), 1)
# [1] "xBxDEFGHIJ"
Which means we can iterate fairly easily:
sapply(c(0, seq_len(nchar(my_main) - nchar(my_pattern))), function(offset) {
tail(Reduce_frame(transform(repl, i = i + offset),
`substring<-`(init, i, i, p), init = my_main), 1)
})
# [1] "xBxDEFGHIJ" "AxCxEFGHIJ" "ABxDxFGHIJ" "ABCxExGHIJ" "ABCDxFxHIJ" "ABCDExGxIJ" "ABCDEFxHxJ" "ABCDEFGxIx"
To use your second pattern,
my_pattern <- "xx**x"
repl <- transform(...) # from above
## the rest of this code is unchanged from above
sapply(c(0, seq_len(nchar(my_main) - nchar(my_pattern))), function(offset) {
tail(Reduce_frame(transform(repl, i = i + offset),
`substring<-`(init, i, i, p), init = my_main), 1)
})
# [1] "xxCDxFGHIJ" "AxxDExGHIJ" "ABxxEFxHIJ" "ABCxxFGxIJ" "ABCDxxGHxJ" "ABCDExxHIx"
So this can be easily functionized:
Reduce_frame <- ... # defined above
func <- function(S, pattern) {
stopifnot(nchar(S) >= nchar(pattern))
repl <- subset(
data.frame(p = strsplit(pattern, "")[[1]], i = seq_len(nchar(pattern))),
p != "*")
sapply(c(0, seq_len(nchar(S) - nchar(pattern))), function(offset) {
tail(Reduce_frame(transform(repl, i = i + offset),
`substring<-`(init, i, i, p), init = S), 1)
})
}
func("ABCDEFGHIJ", "x*x")
# [1] "xBxDEFGHIJ" "AxCxEFGHIJ" "ABxDxFGHIJ" "ABCxExGHIJ" "ABCDxFxHIJ" "ABCDExGxIJ" "ABCDEFxHxJ" "ABCDEFGxIx"
func("ABCDEFGHIJ", "xx**x")
# [1] "xxCDxFGHIJ" "AxxDExGHIJ" "ABxxEFxHIJ" "ABCxxFGxIJ" "ABCDxxGHxJ" "ABCDExxHIx"

Here's one way using strsplit, grepl, and paste.
f <- \(mm, mp) {
m <- el(strsplit(mm, ''))
p <- el(strsplit(mp, ''))
i <- which(!grepl(p, pattern='\\*'))
vapply(c(0L, seq_len(length(m) - max(i))), \(j) {
m[i + j] <- p[i]
paste(m, collapse='')
}, vector('character', 1L))
}
f('ABCDEFGHIJ', 'x*x')
# [1] "xBxDEFGHIJ" "AxCxEFGHIJ" "ABxDxFGHIJ" "ABCxExGHIJ" "ABCDxFxHIJ"
# [6] "ABCDExGxIJ" "ABCDEFxHxJ" "ABCDEFGxIx"
f('ABCDEFGHIJ', 'x**x')
# [1] "xBCxEFGHIJ" "AxCDxFGHIJ" "ABxDExGHIJ" "ABCxEFxHIJ" "ABCDxFGxIJ"
# [6] "ABCDExGHxJ" "ABCDEFxHIx"
f('ABCDEFGHIJ', 'xx**x')
# [1] "xxCDxFGHIJ" "AxxDExGHIJ" "ABxxEFxHIJ" "ABCxxFGxIJ" "ABCDxxGHxJ"
# [6] "ABCDExxHIx"
f('ABCDEFGHIJ', 'kk**krr')
# [1] "kkCDkrrHIJ" "AkkDEkrrIJ" "ABkkEFkrrJ" "ABCkkFGkrr"
f('ABCDEFGHIJ', 'kk**kr*r')
# [1] "kkCDkrGrIJ" "AkkDEkrHrJ" "ABkkEFkrIr"

Here is an approach along the same lines as r2evans' answer but relying on some stringr functions which should be more efficient than the base equivalents:
library(stringr)
f <- function(main, r_pattern) {
shift <- nchar(main) - nchar(r_pattern) + 1
idx <- as.data.frame(str_locate_all(r_pattern, "[^*]+")[[1]])
x_pattern <- str_split(r_pattern, "\\*+")[[1]]
Reduce(
function(x, y)
`str_sub<-`(
x,
seq(idx$start[y], length.out = shift),
seq(idx$end[y], length.out = shift),
omit_na = FALSE,
x_pattern[y]
),
seq(nrow(idx)),
init = main
)
}
f("ABCDEFGHIJ", "x*x")
[1] "xBxDEFGHIJ" "AxCxEFGHIJ" "ABxDxFGHIJ" "ABCxExGHIJ" "ABCDxFxHIJ" "ABCDExGxIJ" "ABCDEFxHxJ" "ABCDEFGxIx"
f("ABCDEFGHIJ", "xx**x")
[1] "xxCDxFGHIJ" "AxxDExGHIJ" "ABxxEFxHIJ" "ABCxxFGxIJ" "ABCDxxGHxJ" "ABCDExxHIx"
# Edit after OP comment:
f(my_main, "KK**KRR")
[1] "KKCDKRRHIJ" "AKKDEKRRIJ" "ABKKEFKRRJ" "ABCKKFGKRR"

Here is Ruby code that produces the desired result. I am presenting it in the event that a reader wishes to convert it to R, possibly with modification, of course.
You should be able to read the code even if you don't know Ruby, as long as you understand that:
'abc'.size returns 3;
0..8 is a range of integers between 0 and 8, inclusive;
'abc' << 'd' returns 'abcd';
7.modulo(3) returns 1;
'abcd'[2] returns 'c', 2 being an index; and
s == 'x' ? 'x' : my_main[j] reads, "if the string s (which will be 'x' or '*') equals 'x' return 'x', else return the character of my_main at index j.
The Ruby code (somewhat simplified from what would normally be written) is as follows.
def doit(my_main, my_pattern)
msz = my_main.size
psz = my_pattern.size
(0..msz-psz).map do |i|
s = ''
(0..msz-1).each do |j|
s << (my_pattern[(j-i).modulo(msz)] == 'x' ? 'x' : my_main[j])
end
s
end
end

Related

Turn `=` into `<-` using only base R

Problem
Turn assignment equal signs into assignment arrows.
Use base R only (no styler or formatR).
Context
https://github.com/ropensci/drake/issues/562
Example
Input:
f = function(x = 1){}
Desired output:
f <- function(x = 1){}
Posted in-issue but might as well try for some SO pts:
library(magrittr)
raw_src <- "z = {f('#') # comment
x <- 5
y = 'test'
}"
# so we can have some tasty parse data
first <- parse(text = raw_src, keep.source = TRUE)
# this makes a nice data frame of the tokenized R source including line and column positions of the source bits
src_info <- getParseData(first, TRUE)
# only care about those blasphemous = assignments
elements_with_equals_assignment <- subset(src_info, token == "EQ_ASSIGN")
# take the source and split it into lines
raw_src_lines <- strsplit(raw_src, "\n")[[1]]
# for as many instances in the data frame replace the = with <-
for (idx in 1:nrow(elements_with_equals_assignment)) {
stringi::stri_sub(
raw_src_lines[elements_with_equals_assignment[idx, "line1"]],
elements_with_equals_assignment[idx, "col1"],
elements_with_equals_assignment[idx, "col2"]
) <- "<-"
}
# put the lines back together and do the thing
parse(
text = paste0(raw_src_lines, collapse="\n"),
keep.source = FALSE
)[[1]] %>%
deparse() %>%
cat(sep = "\n")
## z <- {
## f("#")
## x <- 5
## y <- "test"
## }

Getting name of an object from list in Map

Given the following data:
list_A <- list(data_cars = mtcars,
data_air = AirPassengers,
data_list = list(A = 1,
B = 2))
I would like to print names of objects available across list_A.
Example:
Map(
f = function(x) {
nm <- deparse(match.call()$x)
print(nm)
# nm object is only needed to properly name flat file that may be
# produced within Map call
if (any(class(x) == "list")) {
length(x) + 1
} else {
length(x) + 1e6
saveRDS(object = x,
file = tempfile(pattern = make.names(nm), fileext = ".RDS"))
}
},
list_A
)
returns:
[1] "dots[[1L]][[1L]]"
[1] "dots[[1L]][[2L]]"
[1] "dots[[1L]][[3L]]"
$data_cars
NULL
$data_air
NULL
$data_list
[1] 3
Desired results
I would like to get:
`data_cars`
`data_air`
`data_list`
Update
Following the comments, I have modified the example to make it more reflective of my actual needs which are:
While using Map to iterate over list_A I'm performing some operations on each element of the list
Periodically I want to create a flat file with name reflecting name of object that was processed
In addition to list_A, there are also list_B, list_C and so forth. Therefore, I would like to avoid calling names(list) inside the function f of the Map as I will have to modify it n number of times. The solution I'm looking to find should lend itself for:
Map(function(l){...}, list_A)
So I can later replace list_A. It does not have to rely on Map. Any of the apply functions would do; same applied to purrr-based solutions.
Alternative example
do_stuff <- function(x) {
nm <- deparse(match.call()$x)
print(nm)
# nm object is only needed to properly name flat file that may be
# produced within Map call
if (any(class(x) == "list")) {
length(x) + 1
} else {
length(x) + 1e6
saveRDS(object = x,
file = tempfile(pattern = make.names(nm), fileext = ".RDS"))
}
}
Map(do_stuff, list_A)
As per the notes below, I want to avoid having to modify do_stuff function as I will be looking to do:
Map(do_stuff, list_A)
Map(do_stuff, list_B)
Map(do_stuff, list_...)
We could wrap it into a function, and do it in two steps:
myFun <- function(myList){
# do stuff
res <- Map(
f = function(x) {
#do stuff
head(x)
},
myList)
# write to a file, here we might add control
# if list is empty do not output to a file
for(i in names(res)){
write.table(res[[ i ]], file = paste0(i, ".txt"))
}
}
myFun(list_A)
Would something like this work ?
list_A2 <- Map(list, x = list_A,nm = names(list_A) )
trace(do_stuff, quote({ nm <- x$nm; x<- x$x}), at=3)
Map(do_stuff, list_A2)

Proper way to have two functions access a single function's environment?

Based on the answer provided in1088639, I set up a pair of functions which both access the same sub-function's environment. This example works, but I wanted to see if I'd missed some cleaner way to "connect" both top-level functions to the internal environment.
( Back story: I wanted to write a pair of complementary functions which shared a variable, e.g. "count" in this example, and meet CRAN package requirements which do not allow functions to modify the global environment. )
static.f <- function() {
count <- 0
f <- function(x) {
count <<- count + 1
return( list(mean=mean(x), count=count) )
}
return( f )
}
# make sure not to delete this command, even tho' it's not
# creating a function.
f1 <- static.f()
statfoo <- function(x){
tmp<-f1(x)
tmp<- list(tmp,plus=2)
return(tmp)
}
statbar <- function(x){
tmp<-f1(x)
tmp<- list(tmp,minus=3)
return(tmp)
}
Sample output:
> statfoo(5)
[[1]]
[[1]]$mean
[1] 5
[[1]]$count
[1] 1
$plus
[1] 2
Rgames> statfoo(5)
[[1]]
[[1]]$mean
[1] 5
[[1]]$count
[1] 2
$plus
[1] 2
> statbar(4)
[[1]]
[[1]]$mean
[1] 4
[[1]]$count
[1] 3
$minus
[1] 3
> statfoo(5)
[[1]]
[[1]]$mean
[1] 5
[[1]]$count
[1] 4
$plus
[1] 2
A cleaner method would be to use an object oriented approach. There is already an answer using reference classes.
A typical object oriented approach with classes would create a class and then create a singleton object, i.e. a single object of that class. Of course it is a bit wasteful to create a class only to create one object from it so here we provide a proto example. (Creating a function to enclose count and the function doing the real work has a similar problem -- you create an enclosing function only to run it once.) The proto model allows one to create an object directly bypassing the need to create a class only to use it once. Here foobar is the proto object with property count and methods stats, statfoo and statbar. Note that we factored out stats to avoid duplicating its code in each of statfoo and statbar. (continued further down)
library(proto)
foobar <- proto(count = 0,
stats = function(., x) {
.$count <- .$count + 1
list(mean = mean(x), count = .$count)
},
statfoo = function(., x) c(.$stats(x), plus = 2),
statbar = function(., x) c(.$stats(x), plus = 3)
)
foobar$statfoo(1:3)
foobar$statbar(2:4)
giving:
> foobar$statfoo(1:3)
$mean
[1] 2
$count
[1] 1
$plus
[1] 2
> foobar$statbar(2:4)
$mean
[1] 3
$count
[1] 2
$plus
[1] 3
A second design would be to have statfoo and statbar as independent functions and only keep count and stats in foobar (continued further down)
library(proto)
foobar <- proto(count = 0,
stats = function(., x) {
.$count <- .$count + 1
list(mean = mean(x), count = .$count)
}
)
statfoo <- function(x) c(foobar$stats(x), plus = 2)
statbar <- function(x) c(foobar$stats(x), plus = 3)
statfoo(1:3)
statbar(2:4)
giving similar output to the prior example.
Third approach Of course the second variation could easily be implemented by using local and a function getting us close to where you started. This does not use any packages but does not create a function only to throw it away:
foobar <- local({
count <- 0
function(x) {
count <<- count + 1
list(mean = mean(x), count = count)
}
})
statfoo <- function(x) c(foobar(x), plus = 2)
statbar <- function(x) c(foobar(x), plus = 3)
statfoo(1:3)
statbar(2:4)
Another simple option is tocreate an environment and assign it to both functions. Here I use simpler functions for illustrative purposes, but this can be easily extended:
f1 <- function() {count <<- count + 1; return(paste("hello", count))}
f2 <- function() {count <<- count + 1; return(paste("goodbye", count))}
environment(f1) <- environment(f2) <- list2env(list(count=0))
Then:
> f1()
[1] "hello 1"
> f2()
[1] "goodbye 2"
> f1()
[1] "hello 3"
Both functions have the same environment.
You can use reference class like this:
foobar <- setRefClass(
'foobar',
fields = list(count='numeric'),
methods = list(
initialize=function() {
.self$initFields(count = 0L)
},
statfoo = function(x) {
count <<- count + 1L
list(list(mean=mean(x), count=count), plus=2)
},
statbar = function(x){
count <<- count + 1L
list(list(mean=mean(x), count=count), minus=3)
}
)
)()
foobar$statfoo(5)
foobar$statbar(3)
It makes it relatively clear that neither statfoo nor statbar is a pure function.
You could get rid of the factory functions, and more explicitly use environments. A solution like this would work as well
.env<-(function() {
count <- 0
f <- function(x) {
count <<- count + 1
return( list(mean=mean(x), count=count))
}
return(environment())
})()
statfoo <- function(x){
list(.env$f(x),plus=2)
}
statbar <- function(x){
list(.env$f(x),minus=3)
}
The .env variable is created by immediately executing an anonymous function to get its environment. We then extract the function from the environment itself to modify its values.

R .Last.call feature - similar to .Last.value

Similarly to .Last.value is there any way to access last call? Below expected results of potential .Last.call.
sum(1, 2)
# [1] 3
str(.Last.call)
# language sum(1, 2)
The bests if it would not require to parse file from file system.
The last.call package is no longer on cran, but you can still get the code:
# -----------------------------------------------------------------------
# FUNCTION: last.call
# Retrieves a CALL from the history and returns an unevaluated
# call.
#
# There are two uses for such abilities.
# - To be able to recall the previous commands, like pressing the up key
# on the terminal.
# - The ability to get the line that called the function.
#
# TODO:
# - does not handle commands seperated by ';'
#
# -----------------------------------------------------------------------
last.call <-
function(n=1) {
f1 <- tempfile()
try( savehistory(f1), silent=TRUE )
try( rawhist <- readLines(f1), silent=TRUE )
unlink(f1)
if( exists('rawhist') ) {
# LOOK BACK max(n)+ LINES UNTIL YOU HAVE n COMMANDS
cmds <- expression()
n.lines <- max(abs(n))
while( length(cmds) < max(abs(n)) ) {
lines <- tail( rawhist, n=n.lines )
try( cmds <- parse( text=lines ), silent=TRUE )
n.lines <- n.lines + 1
if( n.lines > length(rawhist) ) break
}
ret <- rev(cmds)[n]
if( length(ret) == 1 ) return(ret[[1]]) else return(ret)
}
return(NULL)
}
Now, to use it:
sum(1, 2)
# [1] 3
last.call(2)
# sum(1, 2)
I've modified this code to output text strings of the preceding commands / calls in a manner that preserves how there were formatted across lines in the original call, sot that I can use cat() to output the calls (for a function that emails me when the preceding function is done running). Here's the code:
lastCall <- function(num.call = 1) {
history.file <- tempfile()
try(savehistory(history.file), silent = TRUE )
try(raw.history <- readLines(history.file), silent = TRUE )
unlink(history.file)
if (exists('raw.history') ) {
# LOOK BACK max(n)+ LINES UNTIL YOU HAVE n COMMANDS
commands <- expression()
num.line <- max(abs(num.call) + 1)
while (length(commands) < max(abs(num.call) + 1)) {
lines <- tail(raw.history, n = num.line)
try(commands <- parse(text = lines), silent = TRUE)
num.line <- num.line + 1
if (num.line > length(raw.history)) break
}
ret <- rev(commands)[num.call + 1]
if (length(ret) == 1) {
a <- ret[1]
} else {
a <- ret
}
# a <- rev(commands)[num.call + 1]
out <- lapply(a, deparse) %>%
sapply(paste, sep = "\n", collapse = "\n")
}
out
}
Enjoy!

Parsing a string in R

I am writing a R script which I will be running as a script from the command line. I am passing command line arguments to the script using keywords as follows:
myscript.R --arg1=100 --arg2='hello' --arg3=3.14159
I want to write an R function that will return the command line values into a dictionary like object (i.e. a list of lists in R), filling unsupplied arguments with defaults.
for e.g.
parseArguments <- function() {
options <- commandArgs(TRUE)
# options now contains "--arg1=100 --arg2='hello' --arg3=3.14159"
# parse string held in variable named options and stuff into a list
# .... Do some string manip ....
args <- list()
args['arg1'] <- 100
args['arg2'] <- 'hello'
args['arg3'] <- 3.14159
args['arg4'] <- 123 # Not found in parsed line so we use a hard coded default
return (args)
}
Can someone help fill in the blanks?
> parseArguments <- function() {
+ text1 = "--arg1=100 --arg2='hello' --arg3=3.14159"
+ eval(parse( text= gsub("\\s", ";", gsub("--","", text1))))
+ args <- list()
+ args['arg1'] <- arg1
+ args['arg2'] <- arg2
+ args['arg3'] <- arg3
+ args['arg4'] <- 123 # Not found in parsed line so we use a hard coded default
+
+ return (args)
+ }
> argres <- parseArguments()
> argres
$arg1
[1] 100
$arg2
[1] "hello"
$arg3
[1] 3.14159
$arg4
[1] 123
To address the unkown number of arguments modification to the problem:
parseArguments <- function() {
text1 = "--arg1=100 --arg2='hello' --arg3=3.14159"
eval(parse(text=gsub("\\s", ";", gsub("--","", text1))))
args <- list()
for( ar in ls()[! ls() %in% c("text1", "args")] ) {args[ar] <- get(ar)}
return (args)
}
argres <- parseArguments()
argres
#---------
$arg1
[1] 100
$arg2
[1] "hello"
$arg3
[1] 3.14159
You can split each argument into
the name and value parts, using strsplit or a regular expression.
The following does not try to check the type of the arguments:
everything will be returned as a string.
parseArgs <- function(...) {
o <- commandArgs(TRUE)
# The defaults arguments should be named
defaults <- list(...)
stopifnot( length(defaults) == length(names(defaults)) )
stopifnot( all( names(defaults) != "" ) )
# All the arguments should be of the form "--foo=bar"
re <- "^--(.*?)=(.*)"
stopifnot( all(grepl(re, o)) )
# Extract the values and names
r <- gsub(re, "\\2", o)
names(r) <- gsub("^--(.*?)=(.*)", "\\1", o)
r <- as.list(r)
# Add the default values
missing <- setdiff( names(defaults), names(r) )
append(r, defaults[missing])
}
print( parseArgs() )
print( parseArgs(foo=1, bar=2) ) # With default values

Resources