Indian currency format uses a comma separator after every 2 digits, except for the last section which is 3 digits. Can one suggest a function in R that can achieve that.
Example:
input 12345678.23 output 1,23,45,678.23
i/p: 4356, o/p: 4,356
i/p: 435, o/p: 435
I don't know of any native way to do this, but the following function will achieve it for you:
nums <- function(n) {
dec <- round(n %% 1, 2)
dec <- ifelse(dec < 0.01, "", substr(dec, 2, 4))
int <- n %/% 1
ints <- vapply(int, function(x) {
x <- as.character(x)
len <- nchar(x)
if(len <= 3) return(x)
rev_x <- paste(rev(unlist(strsplit(x, ""))), collapse = "")
str <- paste0(substr(rev_x, 1, 3), ",")
str2 <- substr(rev_x, 4, 100)
str2 <- gsub("(\\d{2})", "\\1,", str2)
rev_x <- paste0(str, str2)
return(paste(rev(unlist(strsplit(rev_x, ""))), collapse = ""))
}, character(1))
return(sub("^,", "", paste0(ints, dec)))
}
You can use it like this:
nums(c(1234.12, 342, 35123251.12))
#> [1] "1,234.12" "342" "3,51,23,251.12"
Here might be one option
f <- Vectorize(function(x, digits = 2) {
r <- ""
if (grepl(".", x, fixed = TRUE)) {
r <- c(sub(".*(?=\\.)", "", x, perl = TRUE))
x <- sub("\\..*", "", x)
}
n <- nchar(x)
r <- paste0(substr(x, n - 2, n), r)
x <- substr(x, 1, n - 3)
while (nchar(x)) {
n <- nchar(x)
r <- c(substr(x, n - 1, n), r)
x <- substr(x, 1, n - 2)
}
paste0(r, collapse = ",")
})
and you will see
> f(c(12345678.23, 4356, 435, 900425, 1230010.45))
[1] "1,23,45,678.23" "4,356" "435" "9,00,425"
[5] "12,30,010.45"
Related
I found this below function to detect repeated sequence. I integrate the function into Monte Carlo Simulation to calculate the probability. The function I have is too long and takes too much time during the simulation. I would appreciate if anyone can help to simply the function and in turn fasten any simulation depends on it.
V1 <- c(68,71,72,69,80,78,80,81,84,82,67,73,65,68,66,70,69,72,74,73,68,75,70,72,75,73,69,75,74,79,80,78,80,81,79,82,69,73,67,66,70,72,69,72,75,80,68,69,71,77,70,73)
Check_repeat_Seq <- function(vector){
k <- 2:25
Lall <- setNames(lapply(k, function(i) table(zoo::rollapply(vector, width = i, toString))), k)
L <- Filter(length, lapply(Lall, function(x) x[x == max(x) & x > 1]))
dat <- data.frame(seq_rep=sapply(L, length))
dat$repeat_length <- as.numeric(rownames(dat))
return(max(dat$repeat_length))
}
Check_repeat_Seq(V1)
#### Can you please simplify the following also to calculate the sum of repeated.####
Check_repeat_Seq_no_overlap_sum <- function(vector){
k <- 2:25
Lall <- setNames(lapply(k, function(i) table(zoo::rollapply(vector, width = i, toString))), k)
L <- Filter(length, lapply(Lall, function(x) x[x == max(x) & x > 1]))
vec <- unlist(unname(L))
nms <- names(vec)
is_le <- function(i) any(grepl(nms[i], tail(nms, -i)) & (vec[i] <= tail(vec, -i)))
LL <- vec[ ! sapply(seq_along(nms), is_le) ]
dat <- data.frame(seq_rep=sapply(L, length))
dat$repeat_length <- as.numeric(rownames(dat))
dat$total_repeat <- dat$seq_rep*dat$repeat_length
return(sum(dat$total_repeat))
}
##### the original function should return data Frame as follows
Check_All_repeat_Seq<- function(vector){
k <- 2:25
Lall <- setNames(lapply(k, function(i) table(zoo::rollapply(vector, width = i, toString))), k)
L <- Filter(length, lapply(Lall, function(x) x[x == max(x) & x > 1]))
vec <- unlist(unname(L))
nms <- names(vec)
is_le <- function(i) any(grepl(nms[i], tail(nms, -i)) & (vec[i] <= tail(vec, -i)))
LL <- vec[ ! sapply(seq_along(nms), is_le) ]
dat <- data.frame(seq_rep=sapply(L, length))
dat$repeat_length <- as.numeric(rownames(dat))
dat$total_repeat <- dat$seq_rep*dat$repeat_length
return(sum(dat))
}
please help simplifying the code with the same output
Update
An even faster iterative approach leveraging the Cantor pairing function:
allDup <- function(x) {
duplicated(x) | duplicated(x, fromLast = TRUE)
}
fPair <- function(i, j) {
# Cantor pairing function
k <- j + (i + j)*(i + j + 1L)/2L
match(k, unique(k))
}
Check_repeat_Seq3 <- function(v) {
v <- match(v, unique(v))
vPair <- fPair(head(v, -1), tail(v, -1))
blnKeep <- allDup(vPair)
idx <- which(blnKeep)
len <- 1L
while (length(idx)) {
len <- len + 1L
vPair <- fPair(vPair[blnKeep], v[idx + len])
blnKeep <- allDup(vPair)
idx <- idx[blnKeep]
}
return(len)
}
# benchmark against the rollaply solution
V1 <- c(68,71,72,69,80,78,80,81,84,82,67,73,65,68,66,70,69,72,74,73,68,75,70,72,75,73,69,75,74,79,80,78,80,81,79,82,69,73,67,66,70,72,69,72,75,80,68,69,71,77,70,73)
Check_repeat_Seq <- function(vector){
k <- 2:25
Lall <- setNames(lapply(k, function(i) table(zoo::rollapply(vector, width = i, toString))), k)
L <- Filter(length, lapply(Lall, function(x) x[x == max(x) & x > 1]))
dat <- data.frame(seq_rep=sapply(L, length))
dat$repeat_length <- as.numeric(rownames(dat))
return(max(dat$repeat_length))
}
Check_repeat_Seq(V1)
#> [1] 4
Check_repeat_Seq3(V1)
#> [1] 4
microbenchmark::microbenchmark(Check_repeat_Seq(V1), Check_repeat_Seq3(V1))
#> Unit: microseconds
#> expr min lq mean median uq max neval
#> Check_repeat_Seq(V1) 38445.7 40860.95 43153.058 42249.25 44051.15 60593.2 100
#> Check_repeat_Seq3(V1) 103.9 118.65 150.713 149.05 160.05 465.2 100
Original Solution
Check_repeat_Seq2 <- function(v) {
m <- matrix(c(head(v, -1), tail(v, -1)), ncol = 2)
idx <- which(duplicated(m) | duplicated(m, fromLast = TRUE))
len <- 2L
while (length(idx)) {
len <- len + 1L
m <- matrix(v[sequence(rep(len, length(idx)), idx)], ncol = len, byrow = TRUE)
idx <- idx[duplicated(m) | duplicated(m, fromLast = TRUE)]
}
return(len - 1L)
}
UPDATE 2
This should return your dat data.frame:
Check_repeat_Seq3 <- function(v) {
v1 <- match(v, unique(v))
vPair <- fPair(head(v1, -1), tail(v1, -1))
blnKeep <- allDup(vPair)
idx <- which(blnKeep)
if (length(idx)) {
len <- 1L
seq_rep <- integer(length(v)/2)
while (length(idx)) {
len <- len + 1L
vPair <- fPair(vPair[blnKeep], v1[idx + len])
blnKeep <- allDup(vPair)
seq_rep[len] <- nrow(unique(matrix(v[sequence(rep(len, length(blnKeep)), idx)], ncol = len, byrow = TRUE)))
idx <- idx[blnKeep]
}
len <- 2:len
return(data.frame(seq_rep = seq_rep[len], repeat_length = len, total_repeat = seq_rep[len]*len))
} else {
return(data.frame(seq_rep = integer(0), repeat_length = integer(0), total_repeat = integer(0)))
}
}
To keep my script records clean, I'd like to output vector inputs with rep() instead of repeated values in chain. Please see my example below, using dput():
v<-c(rep(1,2), rep(2,4), rep(NA,5))
dput(v)
>c(1, 1, 2, 2, 2, 2, NA, NA, NA, NA, NA)
unknown_function(v)
>c(rep(1,2), rep(2,4), rep(NA,5))
Surely trivial, but I cannot find any simple solution. Suggestions for unknown_function(), please?
rle will compute the values and lengths and from that we can paste it together:
with(rle(format(v)), paste0("c(", toString(paste0("rep(", values, ",", lengths, ")")), ")"))
## [1] "c(rep( 1,2), rep( 2,4), rep(NA,5))"
You can write a function using the stucture of rle and change it to crunch also NA and combine it with the method from #g-grothendieck.
dputRle <- function (x, nmin=3) {
if (!is.vector(x) && !is.list(x))
stop("'x' must be a vector of an atomic type")
n <- length(x)
if (n <= 1L)
return(x)
y <- x[-1L] != x[-n] | is.na(x[-1L]) != is.na(x[-n])
i <- c(which(y), n)
lengths = diff(c(0L, i))
paste0("c(", toString(unlist(sapply(seq(i), function(y) {
if(lengths[y] <= nmin) {rep(x[i[y]], lengths[y])
} else {paste0("rep(", x[i[y]], ",", lengths[y], ")")}
}))), ")")
}
v <- c(rep(1,2), rep(2,4), rep(NA,5), 1)
dputRle(v, 1)
#[1] "c(rep(1,2), rep(2,4), rep(NA,5), 1)"
dputRle(v)
#"c(1, 1, rep(2,4), rep(NA,5), 1)"
v <- 1
dputRle(v)
#[1] 1
v <- numeric(0)
dputRle(v)
#numeric(0)
Or alternative.
dputRle2 <- function (x) {
if (!is.vector(x) && !is.list(x))
stop("'x' must be a vector of an atomic type")
n <- length(x)
if (n <= 1L)
return(x)
y <- x[-1L] != x[-n] | is.na(x[-1L]) != is.na(x[-n])
i <- c(which(y), n)
paste0("rep(c(", toString(x[i]), "), c(", toString(diff(c(0L, i))), "))")
}
v <- c(rep(1,2), rep(2,4), rep(NA,5), 1)
dputRle2(v)
#[1] "rep(c(1, 2, NA, 1), c(2, 4, 5, 1))"
There are some ways to use rle with c and rep. All of the following will produce the same vector.
c(1, 2, 2, 3, 3, 3)
c(1, rep(2, 2), rep(3, 3))
c(1, rep(c(2, 3), c(2, 3)))
rep(c(1, 2, 3), c(1, 2, 3))
rep(1:3, 1:3)
I made a calculation for a nested loop, then I tried to transform it to lapply but it doesn't show the same result, do you know why is it? This is my code:
#list
l <- list()
l[[1]] <- matrix(c(4, 3, 20, 10), ncol=2)
l[[2]] <- matrix(c(3, 3, 40, 12), ncol=2)
l[[3]] <- matrix(c(2, 3, 60, 10), ncol=2)
#loop
##index
s <- 1:length(l)
#for loop
zzz <- list()
for (i in s){
zzz[[i]] <- apply(X = l[[i]], MARGIN = 1,
FUN = function(x) spDistsN1(l[[i]], x, longlat = T))
zzz
}
#lapply loop
yyy <- lapply(s, function(x){
apply(X = l[[i]], MARGIN = 1,
FUN = function(x) spDistsN1(l[[i]], x, longlat = T))
})
And they output aren't identical, why?
identical(zzz,yyy)
[1] FALSE
We can change the code to
library(sp)
yyy <- lapply(s, function(i) apply(l[[i]], 1, FUN= function(x)
spDistsN1(l[[i]], x, longlat = TRUE)))
identical(zzz, yyy)
#[1] TRUE
In the OP's code, the anonymous function call used in both function is x, so within the spDistsN1, the x was coming from the whole matrix instead of the row
I'm trying to subtract each unique pair-wise ps from the for loop in my function below. To do so, I first find unique pair-wise ps using combn(p, 2) and second use outer to subtract each unique pair from each other.
In both steps, I get error. Is there a fix for the error?
prop <- function(n, yes, a, b = a){
p <- list()
for(i in 1:length(n)){
p[[i]] <- rbeta(2, a[i] + yes[i], b[i] + (n[i] - yes[i]))
}
outer(combn(p, 2), FUN = "-") # Gives Error
}
prop(n = c(10, 20, 30), yes = rep(5, 3), a = rep(1, 3))
By default, it is simplify = TRUE in combn. So, even though the output is a list, it is simplified to have a dim attribute by converting each of the the list as elements in a matrix. As the m is 2, there are 2 list elements for each comparison, extract those elements using [[ and subtract
combn(p, 2, FUN = function(x) x[[1]]- x[[2]])
-full function
prop <- function(n, yes, a, b = a){
p <- list()
for(i in 1:length(n)){
p[[i]] <- rbeta(2, a[i] + yes[i], b[i] + (n[i] - yes[i]))
}
combn(p, 2, FUN = function(x) x[[1]]- x[[2]])
}
prop(n = c(10, 20, 30), yes = rep(5, 3), a = rep(1, 3))
If we wanted to include another argument how
prop <- function(n, yes, a, b = a, how= "one.two"){
delta <- switch(how,
one.two = function(x) x[[1]] - x[[2]],
two.one = function(x) x[[2]] - x[[1]])
p <- list()
for(i in 1:length(n)){
p[[i]] <- rbeta(2, a[i] + yes[i], b[i] + (n[i] - yes[i]))
}
out <- combn(p, 2, FUN = delta)
nm1 <- paste0("p", combn(seq_along(p), 2, FUN = paste, collapse="-"))
colnames(out) <- nm1
out
}
prop(n = c(10, 20, 30), yes = rep(5, 3), a = rep(1, 3), how = "one.two")
prop(n = c(10, 20, 30), yes = rep(5, 3), a = rep(1, 3), how = "two.one")
I have been looking for quite a while but it seems the answer always seems to be to use eval(parse(text="1+1")).
I have a column in my data frame, it has a list of strings such as "1+1*6", "1*4/3" etc. I wish to compute these into a new column without using the eval(parse( functions as I am looking to do it over 8 million rows.
It is basically an attempt to answer the question: Given the numbers 1:9 find all the solutions where (A_B_C) / (D_E_F) = GHI, where A:I are the numbers 1:9 (without repeating) and the underscores are one of the four operators *, /, +,-, also without repeating.
I created a dataframe with all the permutations of 1:9 and for each of these I calculated the permutations of the four operators.
require(gtools)
x <- permutations(n = 9, r = 9, v = 1:9)
y <- permutations(n = 4, r = 4, v = c("*", "/", "+", "-"))
for(i in 1:nrow(x)){
for(j in 1:nrow(y)){
math <- paste("(", x[i,1], y[j,1], x[i,2],y[j,2], x[i,3],")", "/", "(", x[i,4] ,y[j,3], x[i,5] ,y[j,4], x[i,6],")")
equals <- eval(parse(text=math))
sum <- as.numeric(paste0(x[i,7], x[i,8], x[i,9]))
if(sum==equals) {
print(c(i,j))
}
}
}
However this takes far too long, hence I am trying to remove the time consuming eval(parse(..
Any help would be really appreciated. Thanks!
Freddie
Vectorisation is key
math <- apply(
y,
1,
function(j){
paste("(", x[, 1], j[1], x[, 2], j[2], x[, 3],")/(", x[, 4], j[3], x[, 5], j[4], x[, 6], ")")
}
)
math <- apply(math, 2, paste, collapse = ",")
math <- paste("c(", math, ")")
equals <- sapply(parse(text = math), eval)
sum <-matrix(x[, 7] * 100 + x[, 8] * 10 + x[, 9], nrow = nrow(x), ncol = nrow(y))
abs(sum - equals) < 1e-8
Let's see what the difference in speed is
require(gtools)
x <- permutations(n = 9, r = 9, v = 1:9)
y <- permutations(n = 4, r = 4, v = c("*", "/", "+", "-"))
x <- x[sample(nrow(x), 40), ]
y <- y[sample(nrow(y), 20), ]
library(microbenchmark)
microbenchmark(
loop = for(i in 1:nrow(x)){
for(j in 1:nrow(y)){
math <- paste("(", x[i,1], y[j,1], x[i,2],y[j,2], x[i,3],")", "/", "(", x[i,4] ,y[j,3], x[i,5] ,y[j,4], x[i,6],")")
equals <- eval(parse(text=math))
sum <- as.numeric(paste0(x[i,7], x[i,8], x[i,9]))
if(sum==equals) {
print(c(i,j))
}
}
},
vectorised = {
math <- apply(
y,
1,
function(j){
paste("(", x[, 1], j[1], x[, 2], j[2], x[, 3],")/(", x[, 4], j[3], x[, 5], j[4], x[, 6], ")")
}
)
math <- apply(math, 2, paste, collapse = ",")
math <- paste("c(", math, ")")
equals <- sapply(parse(text = math), eval)
sum <-matrix(x[, 7] * 100 + x[, 8] * 10 + x[, 9], nrow = nrow(x), ncol = nrow(y))
abs(sum - equals) < 1e-8
}
)
The results:
Unit: milliseconds
expr min lq mean median uq max neval cld
loop 158.666383 162.084918 167.477490 165.880665 170.258076 240.43746 100 b
vectorised 8.540623 8.966214 9.613615 9.142515 9.413117 17.88282 100 a