Creating unique object names for list entries - r

I have example data as follows:
listoflists=
list(
list(a = c(1,'a',3,4) , b = c(5,'b',6,7) ),
list(a = c(1,'a',2,6) , b = c(5,'b',0,8) ),
list(d = c(1,'a',2,6) , b = c(5,'b',0,8) ),
list(d = c(1,'a',2,3) , b = c(5,'b',0,8) , a = c(5,'b',0,8)),
list(d = c(1,'a',1,1))
)
I would like rename the names (such as a and b), (only) if they have already been used. For example by adding a number at the end.
I am having some trouble thinking of the right way to approach this..
Any suggestions?

Maybe there is a more elegant way but just going through the list and renaming items while keeping count of each name's uses works fine:
# all used names
names.used <- unique(unlist(sapply(listoflists, names)))
# usage counter for each name
names.n <- setNames(rep(0, length(names.used)), names.used)
for (i in seq_along(listoflists)) {
for (j in seq_along(listoflists[[i]])) {
name.ij <- names(listoflists[[i]])[j]
# rename second and further occurrences
if (names.n[name.ij] > 0) {
names(listoflists[[i]])[j] <- paste0(name.ij, names.n[name.ij])
}
# update counter
names.n[name.ij] <- names.n[name.ij] + 1
}
}
# [[1]]
# [[1]]$a
# [1] "1" "a" "3" "4"
#
# [[1]]$b
# [1] "5" "b" "6" "7"
#
#
# [[2]]
# [[2]]$a1
# [1] "1" "a" "2" "6"
#
# [[2]]$b1
# [1] "5" "b" "0" "8"
#
#
# [[3]]
# [[3]]$d
# [1] "1" "a" "2" "6"
#
# [[3]]$b2
# [1] "5" "b" "0" "8"
#
#
# [[4]]
# [[4]]$d1
# [1] "1" "a" "2" "3"
#
# [[4]]$b3
# [1] "5" "b" "0" "8"
#
# [[4]]$a2
# [1] "5" "b" "0" "8"
#
#
# [[5]]
# [[5]]$d2
# [1] "1" "a" "1" "1"

Another approach would be to alter the elements names using make.unique:
nms = lapply(listoflists, names)
nms = relist(make.unique(unlist(nms), ""), nms)
ll2 = Map(setNames, listoflists, nms)
str(ll2)

Related

Convert matrix values in vector with some conditions

I'd like to convert matrix values in vector with some conditions. In my example:
# Create my matrix
mymatrix <-matrix(
# Create a numeric variable
abs(rnorm(300)),
# No of rows
nrow = 10,
# No of columns
ncol = 3,
# By default matrices are in column-wise order
# So this parameter decides how to arrange the matrix
byrow = TRUE
)
# Naming rows
rownames(mymatrix) = 1:10
# Naming columns
colnames(mymatrix ) = c("1", "2", "3")
mymatrix
# 1 2 3
#1 0.85882558 1.38755611 0.369197570
#2 1.58785948 1.13064411 1.542977629
#3 0.35293056 1.44036121 1.806414543
#4 0.02709663 1.25620400 0.794001157
#5 0.34426152 0.32365824 2.026024465
#6 0.03608507 1.12315562 1.072635275
#7 0.39055300 0.49463748 0.645037388
#8 0.33406392 0.63543332 0.005055208
#9 1.04796081 0.04062249 2.330948193
#10 0.42538451 0.24574490 0.268357588
I'd like to convert my matrix to vector (myvector) using a custom rule:
If mymatrix[,1]is the maximum value in the row and mymatrix[,1]>=0.95 then the vector result is "1", but if mymatrix[,1]<0.95 than the result is "misclassified", but for mymatrix[,2] and mymatrix[,3] the result ("2") or ("3") is the maximum value inside each row. My desirable output is:
myvector
#[1] "2" "1" "3" "2" "3" "2" "3" "2" "1" "misclassified"
Please, any ideas?
Here's a vectorised option -
#Get the column number of max value in each row
res <- max.col(mymatrix)
#Get row number where column 1 is highest
inds <- which(res == 1)
#If those value is less than 0.95 make it 'misclassified'
res[inds][mymatrix[inds, 1] < 0.95] <- 'misclassified'
res
#[1] "2" "1" "3" "2" "3"
#[6] "2" "3" "2" "3" "misclassified"
It looks like you want to apply a function over your rows. So apply would be appropriate here:
apply(mymatrix, 1, \(x) { y <- which.max(x)
if (y == 1) {if (x[y] >= 0.95) "1" else "misclassified"} else as.character(y)})
[1] "2" "1" "3" "2" "3"
[6] "2" "3" "2" "3" "misclassified"
You can try apply + ifelse
apply(
mymatrix,
1,
function(x) {
ifelse(max(x) >= 0.95,
colnames(mymatrix)[which.max(x)],
"misclassified"
)
}
)

Pipe that leads to a map ends up giving a list of incorrect length

Using the combn function, I want to generate all possible combinations of the vector c("1", "2", "3") when choosing 2 elements (m = 2.) The code looks like this:
comparisons <- combn(c("1", "2", "3"), m = 2)
[,1] [,2] [,3]
[1,] "1" "1" "2"
[2,] "2" "3" "3"
I then transpose this data-frame, so it becomes this:
comparisons <- t(comparisons)
[,1] [,2]
[1,] "1" "2"
[2,] "1" "3"
[3,] "2" "3"
The last step is to generate a list, where each element is a row from this transposed data-frame. I used map, and it gave me exactly what I wanted:
comparisons <- map(1:3, ~ comparisons[.x, ])
[[1]]
[1] "1" "2"
[[2]]
[1] "1" "3"
[[3]]
[1] "2" "3"
This is all fine and dandy, but when I try to pipe all of these together in one nice assignment, the resulting list is incorrect.
comparisons <- combn(c("1", "2", "3"), m = 2) %>%
t() %>%
map(1:3, ~ .[.x, ])
[[1]]
NULL
[[2]]
NULL
[[3]]
NULL
[[4]]
NULL
[[5]]
NULL
[[6]]
NULL
Here is the thing, when I turn your matrix into a tibble and then a list I get to your desired output. Since every data frame/tibble is also a list so every column is equivalent to one element of the list.
package(purrr)
comparisons %>%
as_tibble() %>%
as.list() %>% # Up here it will get your desire output but if you want to transpose it however you can run the last line of code.
transpose()
$a # Before running transpose
[1] "1" "2"
$b
[1] "1" "3"
$c
[1] "2" "3"
# After running tranpose
[[1]]
[[1]]$a
[1] "1"
[[1]]$b
[1] "1"
[[1]]$c
[1] "2"
[[2]]
[[2]]$a
[1] "2"
[[2]]$b
[1] "3"
[[2]]$c
[1] "3"

extracting data from two columns

I have a data frame that looks like this.
It refers to words and their structure
df <- data.frame(word = c("pokkoitta", "demna", "ningatinggo ", "tengkeam", "bampana", "njam"), structure = c("CvC:vvC:v", "CvCCv", "CvCvCvNCv", "CvNCvvC", "CvNCvCv" , "NCvC"))
The second column indicates the structure of the first column. If in the second column a C:, NC or CC combination occurs, I need to extract from the first column, which these refer to.
So I would need:
kk C:
kk C:
mn CC
ngg NC
ngk NC
mp NC
nj NC
One thing that needs to be taken into account is that a simple count does not work on 2 letters in the left column, which correspond to 1 letter in the right columne, namely ng|sy|kh = C (not CC, as they represent one phoneme)
Also, in one word, more than one of these combinations can occur
Thx
Update:
This would be the matching pattern with regex:
(nj|ngk|ngg|nc|nt|nd|mp|mb) = NC
(ng|sy|kh) = C
[b-df-hj-np-tv-xz])\\1+) = C:
([b-df-hj-np-tv-z]) = C
(') = :
((a|e|i|o|u)\\1+) = v:
(a|e|i|o|u) = v
Interesting problem. I might have just re-invented the algorithm used to find those structures, but it seems to work.
df <- data.frame(
word=c("pokkoitta", "demna", "ningatinggo", "tengkeam", "bampana", "njam"),
structure=c("CvC:vvC:v", "CvCCv", "CvCvCvNCv", "CvNCvvC", "CvNCvCv", "NCvC"),
stringsAsFactors=FALSE)
pat <- data.frame(str=c("NC", "C", "C:", "C", "v:", "v"),
rex=c("nj|ngk|ngg|nc|nt|nd|mp|mb",
"ng|sy|kh",
"([b-df-hj-np-tv-xz])\\1+",
"[b-df-hj-np-tv-z]",
"(a|e|i|o|u)\\1+",
"a|e|i|o|u"), stringsAsFactors=FALSE)
xs <- xw <- df[,1]
for (i in 1:nrow(pat)) {
rx <- gregexpr(pat[i, 2], xs)
mc <- regmatches(xs, rx)
mp <- sapply(mc, function(x) format(paste("", x), width=6))
mc[lengths(mc) != 0] <- mp[lengths(mc) != 0]
regmatches(xw, rx) <- mc
regmatches(xs, rx) <- paste("", format(pat[i, 1], width=5))
}
phon <- trimws(cbind(word=xw, structure=xs))
phon <- apply(phon, 1, strsplit, " +")
phon <- lapply(phon, function(x) do.call(cbind, x))
head(phon, 3)
# [[1]]
# word structure
# [1,] "p" "C"
# [2,] "o" "v"
# [3,] "kk" "C:"
# [4,] "o" "v"
# [5,] "i" "v"
# [6,] "tt" "C:"
# [7,] "a" "v"
#
# [[2]]
# word structure
# [1,] "d" "C"
# [2,] "e" "v"
# [3,] "m" "C"
# [4,] "n" "C"
# [5,] "a" "v"
#
# [[3]]
# word structure
# [1,] "n" "C"
# [2,] "i" "v"
# [3,] "ng" "C"
# [4,] "a" "v"
# [5,] "t" "C"
# [6,] "i" "v"
# [7,] "ngg" "NC"
# [8,] "o" "v"

How to generate stratified permutations in R

I would like to generate different possible permutations with the same frequency as in the input vector. For example, I would like to generate the permutations using the vector x in the below example.
library(gtools)
x <- c('A','A','B')
permutations(2, 3, x, repeats.allowed = T)
It gives the below output.
# [,1] [,2] [,3]
# [1,] "A" "A" "A"
# [2,] "A" "A" "B"
# [3,] "A" "B" "A"
# [4,] "A" "B" "B"
# [5,] "B" "A" "A"
# [6,] "B" "A" "B"
# [7,] "B" "B" "A"
# [8,] "B" "B" "B"
But, I want only permutations having A, B with frequencies 2, 1 respectively. The expected output is:
# [,1] [,2] [,3]
# [1,] "A" "A" "B"
# [2,] "A" "B" "A"
# [3,] "B" "A" "A"
Is there any function available in R?
Note: I do not want to do post-processing of the output to get the expected output as my original input contains 300 elements. It is not recommended to generate factorial(300) number of permutations.
Update: The suggested link provides a nice faster solution but fails when the input vector is doubled (eg: length=20) with the error message:
Error in matrix(NA, nrow = N, ncol = prod(sapply(foo, ncol))) :
invalid 'ncol' value (too large or NA)
Your problem can be reformulated as finding all possible permutations of the frequency vector. Take a look at combinat::permn:
x <- c( 'A', 'A', 'B' )
unique(combinat::permn( x ))
# [[1]]
# [1] "A" "A" "B"
# [[2]]
# [1] "A" "B" "A"
# [[3]]
# [1] "B" "A" "A"
unique is necessary to remove duplicate entries, which is automatically done by gtools::permutations you've been using (through the default set=TRUE argument).
If you need the result in matrix format, as in your original question, pass the output as arguments to rbind using do.call:
do.call( rbind, unique(combinat::permn( x )) )
# [,1] [,2] [,3]
# [1,] "A" "A" "B"
# [2,] "A" "B" "A"
# [3,] "B" "A" "A"

Splitting vector based on vector of chunk-lengths

I've got a vector of binary numbers. I know the consecutive length of each group of objects; how can I split based on that information (without for loop)?
x = c("1","0","1","0","0","0","0","0","1")
.length = c(group1 = 2,group2=4, group3=3)
x is the binary number vector that I need to split. .length is the information that I am given. .length essentially tells me that the first group has 2 elements and they are the first two elements 1,0. The second group has 4 elements and contain the 4 numbers that follow the group 1 numbers, 1,0,0,0, etc.
Is there a way of splitting that and returning the splitted item in to a list?
The ugly way is to do with via a for loop keep track of the current cumsum, but I am looking for a more elegant way if there is one.
You can use rep to set up the split-by variable, the use split
x = c("1","0","1","0","0","0","0","0","1")
.length = c(group1 = 2,group2=4, group3=3)
split(x, rep.int(seq_along(.length), .length))
# $`1`
# [1] "1" "0"
#
# $`2`
# [1] "1" "0" "0" "0"
#
# $`3`
# [1] "0" "0" "1"
If you wanted to take the group names with you to the split list, you can change rep to replicate the names
split(x, rep.int(names(.length), .length))
# $group1
# [1] "1" "0"
#
# $group2
# [1] "1" "0" "0" "0"
#
# $group3
# [1] "0" "0" "1"
Another option is
split(x,cumsum(sequence(.length)==1))
#$`1`
#[1] "1" "0"
#$`2`
#[1] "1" "0" "0" "0"
#$`3`
#[1] "0" "0" "1"
to get the group names
split(x, sub('.$', '', names(sequence(.length))))
#$group1
#[1] "1" "0"
#$group2
#[1] "1" "0" "0" "0"
#$group3
#[1] "0" "0" "1"

Resources