Unlist str_locate_all into separate start and end lists - r

I use str_locate_all to get the start and end positions of a list of patterns in my string. It returns a list with the start and stop position for each match. How can I get the start and stop positions of all matches into separate lists?
library(stringr)
patterns <- c("ABS", "BSDF", "ERIDF", "RTZOP")
string <- "ABSBSDFERIDFRTZOPABSBSDFRTZOPABSBSDFERIDFRTZOP"
matches <- str_locate_all(string, patterns)
Result:
[[1]]
start end
[1,] 1 3
[2,] 18 20
[3,] 30 32
[[2]]
start end
[1,] 4 7
[2,] 21 24
[3,] 33 36
[[3]]
start end
[1,] 8 12
[2,] 37 41
[[4]]
start end
[1,] 13 17
[2,] 25 29
[3,] 42 46
What I would like:
start <- c(1, 18, 30, 4, 21, 33, 8, 37, 13, 25, 42)
end <- c(3, 20, 32, 7, 24, 36, 12, 41, 17, 29, 46)

Use do.call with rbind to stack the lists together, then take out the desired columns.
> library(stringr)
>
> patterns <- c("ABS", "BSDF", "ERIDF", "RTZOP")
> string <- "ABSBSDFERIDFRTZOPABSBSDFRTZOPABSBSDFERIDFRTZOP"
>
> matches <- str_locate_all(string, patterns)
>
> all <- do.call(rbind, matches)
> start <- all[, 1]
> stop <- all[, 2]
> start
[1] 1 18 30 4 21 33 8 37 13 25 42
> stop
[1] 3 20 32 7 24 36 12 41 17 29 46

Related

How to mutate str_locate_all into single list containing all positions involved in pattern

I am not able to "deconvolute" str_locate_all output so as to get all indexes involved in the pattern (not just start and end).
Here is the type of str_locate_all() output I get:
[1]]
start end
[1,] 4 7
[[2]]
start end
[1,] 8 12
[2,] 30 33
and how I would like to mutate it:
[[1]]
[1] 4 5 6 7
[[2]]
[1] 8 9 10 11 12 30 31 32 33
Thanks in advance !
You can iterate through the list, and then iterate through each row to create the sequence.
x <- list(matrix(c(4, 7), ncol = 2, dimnames = list(NULL, c("start", "end"))),
matrix(c(8, 30, 12, 33), ncol = 2, dimnames = list(NULL, c("start", "end"))))
Here it is done in base R.
lapply(x,
function(y) unlist(apply(y, 1,
function(z) seq(z[1], z[2]), simplify = FALSE)))
Gives the desired result.
[[1]]
[1] 4 5 6 7
[[2]]
[1] 8 9 10 11 12 30 31 32 33

Simulating Snakes and Ladders in R

I am a beginner in R and I have to simulate a snakes and ladders game in R for an assignment. The board has 100 squares. The only winning square is 100, for example if you’re on square 98 and roll a 6 you would go forward 2 spaces to 100 and then bounce back 4 spaces to 96. My difficulty is insterting the snakes/ladders transitions in the complete transition matrix, and coding the winning condition. Here is my code so far:
snakesNladders <-
function()
{
transitions <- rbind(
c(40, 3),
c(4, 25),
c(27, 5),
c(13, 46),
c(43, 18),
c(54, 31),
c(33, 49),
c(99, 41),
c(42, 63),
c(66, 45),
c(50, 69),
c(89, 53),
c(76, 58),
c(62, 81),
c(74, 92))
transmat <- 1:100
names(transmat) <- as.character(1:100)
transmat[transitions[,1]] <- transitions[,2]
firstpos <- 0
curpos_player1 <- NULL
curpos_player2 <- NULL
while(curpos_player1 & curpos_player2 < 100) {
curpos_player1 <- firstpos + curpos_player1 + sample(1:6, 1, replace=TRUE)
curpos_player2 <- firstpos + curpos_player2 + sample(1:6, 1, replace = TRUE)
curpos_player1 <- transmat[curpos_player1]
curpos_player2 <- transmat[curpos_player2]
if(curpos_player1 | curpos_player2 == 100){
print(win)
}else if(curpos_player1 > 100){
return()
}else if(curpos_player2 > 100){
return()
}
}
}
}
}
Not sure what I should put in the return brackets to simulate the winning condition. Also if the rest of the code seems ok. I would really appreciate any help.
Here's how I might approach it. Maybe you can get some ideas by walking through the code.
library(data.table)
set.seed(1633654196) # for reproducibility
# for the transitions, position 1 is the start position, position 2 is the
# square at the bottom left of the board, position 101 is the winning position
# get a vector of snake/ladder transitions (where a player moves after
# landing on each square)
trans <- 1:101
trans[c(40, 4, 27, 13, 43, 54, 33, 99, 42, 66, 50, 89, 76, 62, 74) + 1] <-
c( 3, 25, 5, 46, 18, 31, 49, 41, 63, 45, 69, 53, 58, 81, 92) + 1
# get a matrix of all possible starting/ending positions (based on die
# roll 1 through 6)
m <- c(sequence(rep(6L, 100), 2:101), rep(101L, 6))
# handle overshooting the winning square
blnBounce <- m > 101
m[blnBounce] <- 202 - m[blnBounce]
# add in the snake/ladder transitions
# (faster to index moves as a list of vectors than as a matrix)
to <- asplit(m <- matrix(trans[m], 6), 2)
simgame <- function(nplayers = 1, to, blnPrint = TRUE, turns = 50L, start = rep(1L, nplayers)) {
# turn 1 is the starting position, and won't be retained in the output, so
# add 1 to turns
turns <- as.integer(turns) + 1L
# sample the rolls for all players up front
rolls <- matrix(sample(6, nplayers*turns, TRUE), ncol = 2)
# initialize the output matrix
out <- matrix(0, turns, nplayers)
out[1,] <- start
for (j in 1:nplayers) {
for (i in 2:turns) {
out[i, j] <- to[[out[i - 1L]]][rolls[i, j]]
if (out[i, j] == 101L) {
turns <- i
break
}
}
}
# subtract 1 so "1" corresponds to square 1 and "100" corresponds to the
# winning square
out <- out[2:turns,,drop = FALSE] - 1L
turns <- turns - 1L
winner <- match(100L, out[turns,], 0L)
if (winner) {
if (blnPrint) print(paste("Player", winner, "wins"))
out
} else {
# no winner yet, recursively call simgame
rbind(out, Recall(nplayers, to, blnPrint, turns, out[turns,]))
}
}
# simulate a 2-player game
(game <- simgame(2, to))
#> [1] "Player 2 wins"
#> [,1] [,2]
#> [1,] 25 6
#> [2,] 31 30
#> [3,] 32 32
#> [4,] 34 34
#> [5,] 3 36
#> [6,] 25 6
#> [7,] 5 28
#> [8,] 11 10
#> [9,] 46 14
#> [10,] 52 51
#> [11,] 31 31
#> [12,] 35 37
#> [13,] 39 38
#> [14,] 3 41
#> [15,] 8 7
#> [16,] 12 12
#> [17,] 46 17
#> [18,] 69 48
#> [19,] 75 71
#> [20,] 78 77
#> [21,] 84 83
#> [22,] 87 53
#> [23,] 93 92
#> [24,] 96 94
#> [25,] 41 100
# simulate the number of rounds for 15k games
system.time(turns <- replicate(15e3, nrow(simgame(2, to, FALSE))))
#> user system elapsed
#> 1.18 0.01 1.06
hist(turns)
# Probability of single player finishing by turn
dt <- data.table(from = c(col(m)), to = c(m))[, .(prob = .N/6), from:to]
# full transition matrix
m1 <- matrix(0L, 101, 101)
m1[as.matrix(dt[, 1:2])] <- dt$prob
# calculate the probabilities for 300 turns
mm <- m1
prob1 <- numeric(300)
for (i in 1:300) {
mm <- mm %*% m1
prob1[i] <- mm[1, 101]
}
plot(prob1, xlab = "Roll number", main = "Probability of winning by turn")
# for a 3-player game:
prob3 <- 1 - (1 - prob1)^3

Using rbind on matrices in nested lists?

I am working in R, and I have a dataset that is a list of lists of matrices. Each sublist in the mainlist has two matrices of equal dimension (10 rows x 2 cols). I would like to rbind() each list of matrices into a single matrix (20 rows x 2 cols). But I do not want to combine every sublist into a single big matrix. Gonna try my best to write a sample code for it but the real data is pretty complex so I'll do my best.
> matrix_1 <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9), nrow = 5, ncol = 2, byrow = TRUE)
> matrix_2 <- matrix(c(9, 8, 7, 6, 5, 4, 3, 2, 1), nrow = 5, ncol = 2, byrow = TRUE)
> matrix_3 <- matrix(c(101, 91, 81, 71, 61, 51, 41, 31, 21, 11), nrow = 5, ncol = 2, byrow = TRUE)
> matrix_4 <- matrix(c(22, 20, 19, 18, 17, 16, 15, 14, 13, 12), nrow = 5, ncol = 2, byrow = TRUE)
> sublist_1 <- list(matrix_1, matrix_2)
[[1]]
[,1] [,2]
[1,] 1 5
[2,] 2 6
[3,] 1 3
[4,] 7 4
[5,] 8 3
[[2]]
[,1] [,2]
[1,] 10 9
[2,] 8 7
[3,] 6 5
[4,] 4 3
[5,] 2 1
> sublist_2 <- list(matrix_3, matrix_4)
[[1]]
[,1] [,2]
[1,] 101 91
[2,] 81 71
[3,] 61 51
[4,] 41 31
[5,] 21 11
[[2]]
[,1] [,2]
[1,] 22 20
[2,] 19 18
[3,] 17 16
[4,] 15 14
[5,] 13 12
> mainlist <- list(sublist_1, sublist_2)
What I really want is to make this:
> rbind(sublist_1[[1]], sublist_1[[2]])
[,1] [,2]
[1,] 1 5
[2,] 2 6
[3,] 1 3
[4,] 7 4
[5,] 8 3
[6,] 10 9
[7,] 8 7
[8,] 6 5
[9,] 4 3
[10,] 2 1
apply to all of the sublists in the mainlist.
I've tried to use various combinations of lapply, mapply, map, do.call, etc. to make it work, but either I don't know the right combinations or I need something else.
I've also noticed that rbind(sublist_1) does not work, which is making it difficult to use lapply. It has to be written as rbind(sublist_1[[1]], sublist_1[[2]]).
Thank you very much for your help.
Loop over the outer list, convert the inner list elements to data.frame and use do.call with rbind
out <- lapply(mainlist, function(x) do.call(rbind, lapply(x, as.data.frame)))

Convert dataframe to unnamed list [duplicate]

This question already has an answer here:
mapply over two lists [closed]
(1 answer)
Closed 6 years ago.
I have a dataframe df looking like this
A B C D
1 78 12 43 12
2 23 12 42 13
3 14 42 11 99
4 49 94 27 72
I need the first two columns converted into a list which looks exactly like this:
[[1]]
[1] 78 12
[[2]]
[1] 23 12
[[3]]
[1] 14 42
[[4]]
[1] 49 94
Basically what
list(c(78, 12), c(23, 12), c(14, 42), c(49, 94)
would do. I tried this
lapply(as.list(1:dim(df)[1]), function(x) df[x[1],])
as well as
lapply(as.list(1:nrow(df)), function(x) df)
But thats slightly different.
Any suggestions?
You can try the Map:
Map(c, df$A, df$B)
[[1]]
[1] 78 12
[[2]]
[1] 23 12
[[3]]
[1] 14 42
[[4]]
[1] 49 94
In case this is of interest, it is possible to accomplish this with the foreach package:
library(foreach)
foreach(i=seq.int(nrow(df))) %do% (c(df[[i]][1], df[[i]][2]))
foreach returns a list by default. The code runs down the rows and pulls elements from the first and second columns.
An even cleaner to read version:
foreach(i=seq.int(nrow(df))) %do% (df[[i]][1:2])
Another option is with lapply
lapply(seq_len(nrow(df1)), function(i) unlist(df1[i, 1:2], use.names=FALSE))
#[[1]]
#[1] 78 12
#[[2]]
#[1] 23 12
#[[3]]
#[1] 14 42
#[[4]]
#[1] 49 94

Putting specific values of vector the rows of matrix

I have a vector like this :
Sum<-c(24, 32, 40, 48, 56, 28, 36, 44, 52, 60)
now i want to make a matrix of this vector like the first 5 values of the vector should become the first row of the matrix and the rest five as second row like this.
1 2 3 4 5
1 24 32 40 48 56
2 28 36 44 52 60
when i use the matrix function it gives me output like this which i don't want:
[,1] [,2] [,3] [,4] [,5]
[1,] 24 40 56 36 52
[2,] 32 48 28 44 60
Is there a way to get the desired output??
Thanks for any help.
Use the byrow parameter of matrix
m <- matrix(Sum, nrow=2, byrow=T)
See ?matrix for more help

Resources