How to modify diagonal of a list element - r

I'd like to add toAdd to the diagonal of every element of list a. How can I do this? I tried
diag(a) = lapply(a, function(x) (toAdd + diag(x)))
but it doesn't work.
CODE:
a =list(
matrix(1:4, 2, 2),
matrix(5:8, 2, 2))
toAdd = list(
c(1, 3),
c(1, 2)
)
DESIRED OUTCOME:
out =list(
matrix(c(2, 2, 3, 5), 2, 2),
matrix(c(6, 6, 7, 9), 2, 2))

Try with Map
Map(function(x, y) {diag(y) <- x + diag(y); y}, toAdd, a)
Or use
Map(function(x, y) `diag<-`(y, diag(y) + x), toAdd, a)

Related

How to convert a for-loop to lapply function for parallel testing purposes?

I've been studying the advantages/disadvantages of for-loops versus versus the apply() family of functions and the answer isn't clear cut (apply() always faster than for-loops may not be true, depending on circumstances). So I want to test the various options against my actual data.
Below is a for-loop which looks pretty straightforward to me, but I'm unsure of how to replace it with lapply(). I assume lapply() is correct since the for-loop produces a list object.
The actual data I need to run this analysis against is a data frame containing 2.5 million rows, 30+ columns, so I'd like to run speed tests against the various options.
Any explanation would be most helpful. The examples I found online are light on explanations or the for-loops examples overly-complex, and I hope to learn to use apply() family functions well as they seem very useful and simpler to read than for-loops.
Here's the simplified for-loop code, with example data frame, which runs correctly for example purposes:
# Set up data frame to perform migration analysis on:
data <-
data.frame(
ID = c(1,1,1,2,2,2,3,3,3),
Period = c(1, 2, 3, 1, 2, 3, 1, 2, 3),
Values = c(5, 10, 15, 0, 2, 4, 3, 6, 9),
Flags = c("X0","X1","X2","X0","X2","X0", "X2","X1","X0")
)
# Function to set-up base table:
setTable <- function(data){
df <- data.frame(matrix(NA, ncol=length(unique(data$Flags)), nrow=length(unique(data$Flags))))
row.names(df) <- unique(data$Flags)
names(df) <- unique(data$Flags)
return(df)
}
# Function to complete migration table with for-loop:
migration <- function(data, from=1, to=3){
df <- setTable(data)
for (i in unique(data$ID)){
id_from <- as.character(data$Flags[(data$ID == i & data$Period == from)])
id_to <- as.character(data$Flags[data$ID == i & data$Period == to])
column <- which(names(df) == id_from)
row <- which(row.names(df) == id_to)
df[row, column] <- ifelse(is.na(df[row, column]), 1, df[row, column] + 1)
}
return(df)
}
# Now to run the function:
test1 <- migration(data, from=1, to=3)
Edit: wrapped in a function allowing to specify from & to:
library(data.table)
DF <- data.frame(
ID = c(1,1,1,2,2,2,3,3,3,4,4,4),
Period = c(1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3),
Values = c(5, 10, 15, 0, 2, 4, 3, 6, 9, 3, 6, 9),
Flags = c("X0","X1","X2","X0","X2","X0", "X2","X1","X0", "X2","X1","X0")
)
migration <- function(DT, from=1, to=3){
setDT(DT)
unique_flags <- unique(DT$Flags)
all_flags <- setDT(expand.grid(list(from_flag = unique_flags, to_flag = unique_flags)))
dcast(DT[, .(from_flag = Flags[Period == from], to_flag = Flags[Period == to]), by = ID][
,.N, c("from_flag", "to_flag")][
all_flags, on = c("from_flag", "to_flag")], to_flag ~ from_flag, value.var = "N")
}
migration(DF, 1, 3)
When it comes to speed in R, you can almost always count on library(data.table):
library(data.table)
DT <- setDT(data.frame(
ID = c(1,1,1,2,2,2,3,3,3,4,4,4),
Period = c(1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3),
Values = c(5, 10, 15, 0, 2, 4, 3, 6, 9, 3, 6, 9),
Flags = c("X0","X1","X2","X0","X2","X0", "X2","X1","X0", "X2","X1","X0")
))
unique_flags <- unique(DT$Flags)
all_flags <- setDT(expand.grid(list(first_flag = unique_flags, last_flag = unique_flags)))
resultDT <- dcast(DT[, .(first_flag = first(Flags), last_flag = last(Flags)), by = ID][
,.N, c("first_flag", "last_flag")][
all_flags, on = c("first_flag", "last_flag")], last_flag ~ first_flag, value.var = "N")
print(resultDT)
Step by step:
library(data.table)
DT <- setDT(data.frame(
ID = c(1,1,1,2,2,2,3,3,3,4,4,4),
Period = c(1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3),
Values = c(5, 10, 15, 0, 2, 4, 3, 6, 9, 3, 6, 9),
Flags = c("X0","X1","X2","X0","X2","X0", "X2","X1","X0", "X2","X1","X0")
))
unique_flags <- unique(DT$Flags)
all_flags <- setDT(expand.grid(list(first_flag = unique_flags, last_flag = unique_flags)))
resultDT <- DT[, .(first_flag = first(Flags), last_flag = last(Flags)), by = ID] # find relevant flags
resultDT <- resultDT[,.N, c("first_flag", "last_flag")] # count transitions
resultDT <- resultDT[all_flags, on = c("first_flag", "last_flag")] # merge all combinations
resultDT <- dcast(resultDT, last_flag ~ first_flag, value.var = "N") # dcast
print(resultDT)
Regarding lapply you can do (I'd prefer data.table):
# Set up data frame to perform migration analysis on:
input_data <-
data.frame(
ID = c(1,1,1,2,2,2,3,3,3),
Period = c(1, 2, 3, 1, 2, 3, 1, 2, 3),
Values = c(5, 10, 15, 0, 2, 4, 3, 6, 9),
Flags = c("X0","X1","X2","X0","X2","X0", "X2","X1","X0")
)
# Function to set-up base table:
setTable <- function(data){
DF <- data.frame(matrix(NA, ncol=length(unique(data$Flags)), nrow=length(unique(data$Flags))))
row.names(DF) <- unique(data$Flags)
names(DF) <- unique(data$Flags)
return(DF)
}
# Function to complete migration table with for-loop:
migration <- function(data, from=1, to=3){
DF <- setTable(data)
lapply(seq_along(unique(data$ID)), function(i){
id_from <- as.character(data$Flags[(data$ID == i & data$Period == from)])
id_to <- as.character(data$Flags[data$ID == i & data$Period == to])
column <- which(names(DF) == id_from)
row <- which(row.names(DF) == id_to)
DF[row, column] <<- ifelse(is.na(DF[row, column]), 1, DF[row, column] + 1)
})
return(DF)
}
# Now to run the function:
test1 <- migration(input_data, from=1, to=3)

Outputting a vector with repeated values

To keep my script records clean, I'd like to output vector inputs with rep() instead of repeated values in chain. Please see my example below, using dput():
v<-c(rep(1,2), rep(2,4), rep(NA,5))
dput(v)
>c(1, 1, 2, 2, 2, 2, NA, NA, NA, NA, NA)
unknown_function(v)
>c(rep(1,2), rep(2,4), rep(NA,5))
Surely trivial, but I cannot find any simple solution. Suggestions for unknown_function(), please?
rle will compute the values and lengths and from that we can paste it together:
with(rle(format(v)), paste0("c(", toString(paste0("rep(", values, ",", lengths, ")")), ")"))
## [1] "c(rep( 1,2), rep( 2,4), rep(NA,5))"
You can write a function using the stucture of rle and change it to crunch also NA and combine it with the method from #g-grothendieck.
dputRle <- function (x, nmin=3) {
if (!is.vector(x) && !is.list(x))
stop("'x' must be a vector of an atomic type")
n <- length(x)
if (n <= 1L)
return(x)
y <- x[-1L] != x[-n] | is.na(x[-1L]) != is.na(x[-n])
i <- c(which(y), n)
lengths = diff(c(0L, i))
paste0("c(", toString(unlist(sapply(seq(i), function(y) {
if(lengths[y] <= nmin) {rep(x[i[y]], lengths[y])
} else {paste0("rep(", x[i[y]], ",", lengths[y], ")")}
}))), ")")
}
v <- c(rep(1,2), rep(2,4), rep(NA,5), 1)
dputRle(v, 1)
#[1] "c(rep(1,2), rep(2,4), rep(NA,5), 1)"
dputRle(v)
#"c(1, 1, rep(2,4), rep(NA,5), 1)"
v <- 1
dputRle(v)
#[1] 1
v <- numeric(0)
dputRle(v)
#numeric(0)
Or alternative.
dputRle2 <- function (x) {
if (!is.vector(x) && !is.list(x))
stop("'x' must be a vector of an atomic type")
n <- length(x)
if (n <= 1L)
return(x)
y <- x[-1L] != x[-n] | is.na(x[-1L]) != is.na(x[-n])
i <- c(which(y), n)
paste0("rep(c(", toString(x[i]), "), c(", toString(diff(c(0L, i))), "))")
}
v <- c(rep(1,2), rep(2,4), rep(NA,5), 1)
dputRle2(v)
#[1] "rep(c(1, 2, NA, 1), c(2, 4, 5, 1))"
There are some ways to use rle with c and rep. All of the following will produce the same vector.
c(1, 2, 2, 3, 3, 3)
c(1, rep(2, 2), rep(3, 3))
c(1, rep(c(2, 3), c(2, 3)))
rep(c(1, 2, 3), c(1, 2, 3))
rep(1:3, 1:3)

Unpack a list by duplicating elements longer than 1

I have the following list that I wish to unpack (aka expand) using only base R.
For example, I want to turn this:
b <- list(a = c(1, 2), b = 1, d = c(5, 7))
into the equivalent of:
list(a = 1, a = 2, b = 1, d = 5, d = 7)
I have this function that works if only one named element has length > 1 but not if there are multiple elements:
expand_list <- function(listx){
long_elements <- as.numeric(which(lapply(listx, length) > 1))
short_elements <- as.numeric(which(lapply(listx, length) == 1))
res <- lapply(long_elements, function(x){
as.list(setNames(listx[[x]], rep(names(listx)[x], length(listx[[x]]))))
})
expanded_elements <- res[[1]]
c(listx[short_elements], expanded_elements)
}
expand_list(b)
You can use stack followed by setNames to achieve that
y <- list(a = c(1, 2), b = 1, c = 2, d = c(5, 7))
x <- stack(y)
as.list(setNames(x$values, x$ind))

Column wise granger's causal tests in R

I have 2 matrices of different parameters: M1and M3 with the same dimensions. I'll like to do a column wise grangertest in R.
M1<- matrix( c(2,3, 1, 4, 3, 3, 1,1, 5, 7), nrow=5, ncol=2)
M3<- matrix( c(1, 3, 1,5, 7,3, 1, 3, 3, 4), nrow=5, ncol=2)
I'll want to do a granger's causality test to determine if M2 granger causes M1. My actual Matrices contain more columns and rows but this is just an example. The original code between two vectors is below:
library(lmtest)
data(ChickEgg)
grangertest(chicken ~ egg, order = 3, data = ChickEgg)
How do I write this for a column wise analysis such that a matrix with 2 rows ( "F[2]" and "Pr(>F)[2]") and two columns is returned as results please?
Does this go into the right direction?
library(lmtest)
M1<- matrix( c(2,3, 1, 4, 3, 3, 1,1, 5, 7), nrow=5, ncol=2)
M3<- matrix( c(1, 3, 1,5, 7,3, 1, 3, 3, 4), nrow=5, ncol=2)
g <- list()
for (i in 1:ncol(M1)){
g[[i]] <- grangertest(M1[ ,i] ~ M3[ ,i])
}
foo <- function(x){
F <- x$F[2]
P <- x$`Pr(>F)`[2]
data.frame(F = F, P = P)
}
do.call(rbind, lapply(g, foo))
F P
1 0.3125000 0.6754896
2 0.1781818 0.7457180
We can use sapply
sapply(1:ncol(M1), function(i) {
m1 <- grangertest(M1[,i]~M3[,i])
data.frame(F=m1$F[2], p=m1$`Pr(>F)`[2])})
# [,1] [,2]
#F 0.3125 0.1781818
#p 0.6754896 0.745718

Stepwise creation of one big matrix from smaller matrices in R for-loops

I have the following code:
beta <- c(1, 2, 3)
X1 <- matrix(c(1, 1, 1, 1,
0, 1, 0, 1,
0, 0, 1, 1),
nrow = 4,
ncol = 3)
Z1 <- matrix(c(1, 1, 1, 1,
0, 1, 0, 1),
nrow = 4,
ncol = 2)
Z2 <- matrix(c(1, 1, 1, 1,
0, 1, 0, 1),
nrow = 4,
ncol = 2)
library(MASS)
S1 <- mvrnorm(70, mu = c(0,0), Sigma = matrix(c(10, 3, 3, 2), ncol = 2))
S2 <- mvrnorm(40, mu = c(0,0), Sigma = matrix(c(10, 4, 4, 2), ncol = 2))
z <- list()
y <- list()
for(j in 1:dim(S1)[1]){
for(i in 1:dim(S2)[1]){
z[[i]] <- X1 %*% beta+Z1 %*% S1[j,]+Z2 %*% S2[i,]+matrix(rnorm(4, mean = 0 , sd = 0.27), nrow = 4)
Z <- unname(do.call(rbind, z))
}
y[[j]] <- Z
Y <- unname(do.call(rbind, y))
}
X1 is a 4x3, Z1 and Z2 are 4x2 matrices. So everytime X1 %*% beta+X2 %*% S1[j,]+X2 %*% S2[i,]+matrix(rnorm(4, mean = 0 , sd = sigma), nrow = 4) is called it outputs a 4x1 matrix. So far I store all these values in the inner and outer loop in two lists and then call rbind() to transform them into a matrix. Is there a way to directly store them in matrices?
You can avoid using lists if you rely on the apply functions and on vector recycling. I broke down your equation into its parts. (I hope I interpreted it accurately!)
Mb <- as.vector(X1 %*% beta)
M1 <- apply(S1,1,function(x) Z1 %*% x )
M2 <- apply(S2,1,function(x) Z2 %*% x ) + Mb
Mout <- apply(M1,2,function(x) M2 + as.vector(x))
as.vector(Mout) + rnorm(length(Mout), mean = 0 , sd = 0.27)
because the random numbers are added after the matrix multiplication (ie are not involved in any calculation), you can just put them in on the end.
Also note that you can't add a smaller matrix to a larger one, but if you make it a vector first then R will recycle it as necessary. So when Mb (a vector of length 4) is added to a matrix with 4 rows and n columns, it is recycled n times.

Resources