Subset with only consecutive numbers - r

I want to generate all subset of {1,2,3,4} with only consecutive numbers. (For example I want subset {1}, {1,2} or {2,3,4} but not {2,4}. )
This is what I have been trying:
library(ggm)
p2<-powerset(1:4, sort = TRUE, nonempty = TRUE)
m2<-p2
for (i in 1:length(p2)){
ifelse(length(p2[[i]]) <2, m2<-m2, ifelse(max(diff(as.numeric(p2[[i]])))>1, m2<-m2[-
c(i)],m2<-m2))
}
I want to first generate power set of {1,2,3,4} and exclude subsets with inconsecutive numbers. But when I am doing the
m2<-m2[- c(i)]
command in the 2nd ifelse to exclude subsets with inconsecutive numbers, I believe I change the index of power set so I keep getting the wrong subsets as I desired.
Any suggestions on how to do it correctly?
Thanks!

You can get all unique ascending sequences between 1 and 4 in base R with the following one-liner:
apply(which(upper.tri(diag(4), TRUE), TRUE), 1, function(x) x[1]:x[2])
#> [[1]]
#> [1] 1
#>
#> [[2]]
#> [1] 1 2
#>
#> [[3]]
#> [1] 2
#>
#> [[4]]
#> [1] 1 2 3
#>
#> [[5]]
#> [1] 2 3
#>
#> [[6]]
#> [1] 3
#>
#> [[7]]
#> [1] 1 2 3 4
#>
#> [[8]]
#> [1] 2 3 4
#>
#> [[9]]
#> [1] 3 4
#>
#> [[10]]
#> [1] 4

Related

Get an element from a vector with its attribute in R

gregexpr returns a list containing a vector with some additional data:
[[1]]
[1] 21 136 409 512 587 693
attr(,"match.length")
[1] 3 4 5 5 4 9
How do I extract just one element with a corresponding attribute at once?
[[1]]
[1] 409
attr(,"match.length")
[1] 5
UPD: The final object must be compatible with regmatches function.
In general, there's no way for R to know that elements of the vector correspond 1-1 with elements of one of its attributes.
If you know this is true (as it is with gregexpr results), then the way to tell R about it is to set a class on the object, and write your own subsetting code. For example,
`[.gregexpr_result` <- function(x, i) {
attrs <- lapply(x, function(element) {
allattrs <- attributes(element)
allattrs[["match.length"]] <- allattrs[["match.length"]][i]
allattrs
})
x <- lapply(x, `[`, i)
for (j in seq_along(x))
attributes(x[[j]]) <- attrs[[j]]
x
}
x <- paste(letters[1:2], letters[1:2])
result <- gregexpr("b", x)
class(result) <- "gregexpr_result"
result
#> [[1]]
#> [1] -1
#> attr(,"match.length")
#> [1] -1
#> attr(,"index.type")
#> [1] "chars"
#> attr(,"useBytes")
#> [1] TRUE
#>
#> [[2]]
#> [1] 1 3
#> attr(,"match.length")
#> [1] 1 1
#> attr(,"index.type")
#> [1] "chars"
#> attr(,"useBytes")
#> [1] TRUE
#>
#> attr(,"class")
#> [1] "gregexpr_result"
result[2]
#> [[1]]
#> [1] NA
#> attr(,"match.length")
#> [1] NA
#> attr(,"index.type")
#> [1] "chars"
#> attr(,"useBytes")
#> [1] TRUE
#>
#> [[2]]
#> [1] 3
#> attr(,"match.length")
#> [1] 1
#> attr(,"index.type")
#> [1] "chars"
#> attr(,"useBytes")
#> [1] TRUE
Created on 2022-11-20 with reprex v2.0.2
We may do
out <- lapply(lst1, `[`, 3)
attr(out, "match.length") <- attr(lst1, "match.length")[3]
-output
> out
[[1]]
[1] 409
attr(,"match.length")
[1] 5
data
lst1 <- structure(list(c(21, 136, 409, 512, 587, 693)),
match.length = c(3,
4, 5, 5, 4, 9))

append a globally defined list from inside of a function in R

I am using the walk function to iterate over my list of lists and append a list element to every sub-list.
.insideFunction <- function(sublistName, arg2){
newListElement <- "Hello"
newListElement <- as.list(newListElement)
names(newListElement) <- "newListElement"
myList[[sublistName]] <- append(myList[[sublistName]], newListElement)
}
walk(names(myList), .insideFunction, someTable)
The problem is that the list myList, which is defined globally doesn't change.
I am currently using the global assignment operator inside of the .insideFunction to force R to overwrite the sublist.
myList[[sublistName]] <<- append(myList[[sublistName]], newListElement)
How can I avoid using the global assignment operator, but still append the globally defined list from inside a function?
Use map instead of walk to create a modified version of a list by applying a function to every element e.g. add 2 to each sub list:
library(purrr)
data <- list(
list("foo", 1),
list("bar", 1)
)
data
#> [[1]]
#> [[1]][[1]]
#> [1] "foo"
#>
#> [[1]][[2]]
#> [1] 1
#>
#>
#> [[2]]
#> [[2]][[1]]
#> [1] "bar"
#>
#> [[2]][[2]]
#> [1] 1
newListElement <- "Hello"
newListElement <- as.list(newListElement)
names(newListElement) <- "newListElement"
data %>% map(~ .x %>% c(newListElement))
#> [[1]]
#> [[1]][[1]]
#> [1] "foo"
#>
#> [[1]][[2]]
#> [1] 1
#>
#> [[1]]$newListElement
#> [1] "Hello"
#>
#>
#> [[2]]
#> [[2]][[1]]
#> [1] "bar"
#>
#> [[2]][[2]]
#> [1] 1
#>
#> [[2]]$newListElement
#> [1] "Hello"
Created on 2022-04-22 by the reprex package (v2.0.0)

Get subsets between one element and the previous same element

Consider a vector:
vec <- c(1, 3, 4, 3, 3, 1, 1)
I'd like to get, for each element of the vector, a subset of the values in between the nth element and its previous occurrence.
The expected output is:
f(vec)
# [[1]]
# [1] 1
#
# [[2]]
# [1] 3
#
# [[3]]
# [1] 4
#
# [[4]]
# [1] 3 4 3
#
# [[5]]
# [1] 3 3
#
# [[6]]
# [1] 1 3 4 3 3 1
#
# [[7]]
# [1] 1 1
We may loop over the sequence of the vector, get the index of the last match of the same element ('i1') from the previous elements of the vector and get the sequence (:) to subset the vector
lapply(seq_along(vec), function(i) {
i1 <- tail(which(vec[1:(i-1)] == vec[i]), 1)[1]
i1[is.na(i1)] <- i
vec[i1:i]
})
-output
[[1]]
[1] 1
[[2]]
[1] 3
[[3]]
[1] 4
[[4]]
[1] 3 4 3
[[5]]
[1] 3 3
[[6]]
[1] 1 3 4 3 3 1
[[7]]
[1] 1 1

Change every element of every list (nested lists) using purrr

I have hundreds of observations of census data - each feature is stored within a list with the name census. I am trying to perform an action
a) on all elements of all lists: I want to make all non character elements numeric.
b) a named element present within each list: I want to remove a prefix from a named column in every list
A toy example below.
Census is a nested list within a list
library(tidyverse)
library(purrr)
POA_CODE = c("POA101","POA102")
dogs = c(4,4)
cats = c(3,2)
children = c(0, 1)
salary = c(100, 120)
employed.prop = c(1,0.5)
pets <- list(POA_CODE, as.integer(dogs), as.integer(cats))
children <-list(POA_CODE, as.integer(children))
employment <-list(POA_CODE, salary, employed.prop)
census <- list(pets, children, employment)
Attempt to change all non-numeric elements in every list to numeric
#change all non-numeric elements to numeric
census_num <- census %>%
map(function(x){
ifelse(is.character == TRUE, x,
as.numeric(x))}
)
I get the following error message:
Error in is.character == TRUE :
comparison (1) is possible only for atomic and list types
Attempt to remove prefix from every postcode in census[[]]$'POA_CODE'
#Remove "POA" prefix from every postcode
census_code <- pmap(census, ~.x[["POA_CODE"]],function(x){
str_replace(POA_CODE,"POA","")
})
I get the error
Error: Element 2 of `.l` must have length 1 or 3, not 2
You have a nested list, so you need nested maps :
library(purrr)
map(census, function(x) map_if(x, is.character, ~as.numeric(sub('POA', '', .x))))
#[[1]]
#[[1]][[1]]
#[1] 101 102
#[[1]][[2]]
#[1] 4 4
#[[1]][[3]]
#[1] 3 2
#[[2]]
#[[2]][[1]]
#[1] 101 102
#[[2]][[2]]
#[1] 0 1
#[[3]]
#[[3]][[1]]
#[1] 101 102
#[[3]][[2]]
#[1] 100 120
#[[3]][[3]]
#[1] 1.0 0.5
In base R, we can solve it with nested lapply :
lapply(census, function(x) lapply(x, function(y)
if(is.character(y)) as.numeric(sub('POA', '', y)) else y))
You could use rapply() in base R:
rapply(
census,
function(x) if(is.character(x)) as.numeric(sub("^\\D+","", x)) else x,
how = "replace")
#> [[1]]
#> [[1]][[1]]
#> [1] 101 102
#>
#> [[1]][[2]]
#> [1] 4 4
#>
#> [[1]][[3]]
#> [1] 3 2
#>
#>
#> [[2]]
#> [[2]][[1]]
#> [1] 101 102
#>
#> [[2]][[2]]
#> [1] 0 1
#>
#>
#> [[3]]
#> [[3]][[1]]
#> [1] 101 102
#>
#> [[3]][[2]]
#> [1] 100 120
#>
#> [[3]][[3]]
#> [1] 1.0 0.5
or purrr::map_depth()
library(purrr)
map_depth(census, 2, ~if(is.character(.)) as.numeric(sub("^\\D+","", .)) else .)
#> [[1]]
#> [[1]][[1]]
#> [1] 101 102
#>
#> [[1]][[2]]
#> [1] 4 4
#>
#> [[1]][[3]]
#> [1] 3 2
#>
#>
#> [[2]]
#> [[2]][[1]]
#> [1] 101 102
#>
#> [[2]][[2]]
#> [1] 0 1
#>
#>
#> [[3]]
#> [[3]][[1]]
#> [1] 101 102
#>
#> [[3]][[2]]
#> [1] 100 120
#>
#> [[3]][[3]]
#> [1] 1.0 0.5
We can use rrapply with parse_number
library(rrapply)
library(readr)
rrapply(census, f = function(x) if(is.character(x)) readr::parse_number(x) else x)
#[[1]]
#[[1]][[1]]
#[1] 101 102
#[[1]][[2]]
#[1] 4 4
#[[1]][[3]]
#[1] 3 2
#[[2]]
#[[2]][[1]]
#[1] 101 102
#[[2]][[2]]
#[1] 0 1
#[[3]]
#[[3]][[1]]
#[1] 101 102
#[[3]][[2]]
#[1] 100 120
#[[3]][[3]]
#[1] 1.0 0.5

Remove outliers based on a preceding value

How to remove outliers using a criterion that a value cannot be more than 2-fold higher then its preceding one.
Here is my try:
x<-c(1,2,6,4,10,20,50,10,2,1)
remove_outliers <- function(x, na.rm = TRUE, ...) {
for(i in 1:length(x))
x < (x[i-1] + 2*x)
x
}
remove_outliers(y)
expected outcome: 1,2,4,10,20,2,1
Thanks!
I think the first 10 should be removed in your data because 10>2*4. Here's a way to do what you want without loops. I'm using the dplyr version of lag.
library(dplyr)
x<-c(1,2,6,4,10,20,50,10,2,1)
x[c(TRUE,na.omit(x<=dplyr::lag(x)*2))]
[1] 1 2 4 20 10 2 1
EDIT
To use this with a data.frame:
df <- data.frame(id=1:10, x=c(1,2,6,4,10,20,50,10,2,1))
df[c(TRUE,na.omit(df$x<=dplyr::lag(df$x,1)*2)),]
id x
1 1 1
2 2 2
4 4 4
6 6 20
8 8 10
9 9 2
10 10 1
A simple sapply:
bool<-sapply(seq_along(1:length(x)),function(i) {ifelse(x[i]<2*x[i-1],FALSE,TRUE)})
bool
[[1]]
logical(0)
[[2]]
[1] TRUE
[[3]]
[1] TRUE
[[4]]
[1] FALSE
[[5]]
[1] TRUE
[[6]]
[1] TRUE
[[7]]
[1] TRUE
[[8]]
[1] FALSE
[[9]]
[1] FALSE
[[10]]
[1] FALSE
resulting in:
x[unlist(bool)]
[1] 1 2 4 10 20 1

Resources