Manipulating sub matrices in R - r

Nh<-matrix(c(17,26,30,17,23, 17 ,24, 23), nrow=2, ncol=4); Nh
Sh<-matrix(c(8.290133, 6.241174, 6.096808, 7.4449672, 6.894924, 7.692115,
4.540521, 7.409122), nrow=2, ncol=4); Sh
NhSh<-as.matrix(Nh*Sh); NhSh
rh<-c( 0.70710678, 0.40824829, 0.28867513, 0.22360680, 0.18257419,
0.15430335, 0.13363062, 0.11785113, 0.10540926, 0.09534626); rh
pv <- c()
for (j in 1:2) {
for (i in 1:4) {
pv <- rbind(pv, NhSh[j,i]*rh)
}
}
pv
row.names(pv) <- rep(c(1:2), each = 4)
lst<-lapply(split(seq_len(nrow(pv)), as.numeric(row.names(pv))), function(i)
pv[i,])
data<-40
nlargest <- function(x, data)
{
res <- order(x)[seq_len(data)];
pos <- arrayInd(res, dim(x), useNames = TRUE);
list(values = pv[res], position = pos)
}
out <- lapply(lst, nlargest, data = 40)
In continuation of above code Is there any brief way of repeating the following steps for each out$’k’$position for k in 1:2?
s1<-c(1,1,1,1); ch<-c(5,7,10,5); C<-150; a<-out$'1'$position
for (j in a[40:1, "row"] )
{
s1[j] <- s1[j]+1;
cost1 <- sum(ch*s1);
if (cost1>=C) break
}
s1; cost1
#Output [1] 5 6 6 5
# [1] 152
I have to get 2 values for 's' and 'cost' for out$k$position. I tried
mat = replicate (2,{x = matrix(data = rep(NA, 80), ncol = 2)}); mat
for (k in 1:2)
{
mat[,,k]<-out$'k'$position
}
mat
Error in mat[, , k] <- out$k$position :number of items to replace is not a multiple of replacement length
for (k in 1:2)
{
for (j in mat[,,k][40:1] ) {
s[j] <- s[j]+1
cost <- sum(ch*s)
if (cost>=C) break
}
}
s; cost
Error : Error in s[j] <- s[j] + 1 : NAs are not allowed in subscripted assignments
Please anyone help in resolving these errors.

We could apply the function directly by looping over the list. Note that each element of the list is a matrix
sapply(lst, is.matrix)
# 1 2
#TRUE TRUE
so, there is no need to unlist and create a matrix
out <- lapply(lst, nlargest, data = 40)
-checking with the OP's results
out1 <- nlargest(sub1, 40)
identical(out[[1]], out1)
#[1] TRUE
Update2
Based on the second update, we need to initialize 'cost' and 'sl' with the same length as 'k' elements. Here, we initialize 'sl' as a list of vectors
sl <- rep(list(c(1, 1, 1, 1)), 2)
C <- 150
cost <- numeric(2)
for (k in 1:2){
for (j in mat[,,k][40:1, 1] ) {
sl[[k]][j] <- sl[[k]][j]+1
cost[k] <- sum(ch*sl[[k]])
if (cost[k] >=C) break
}
}
sl
#[[1]]
#[1] 5 7 6 4
#[[2]]
#[1] 6 5 5 7
cost
#[1] 154 150

Related

Error in while (e_i$X1 < 12 | e_i$X2 < 12) { : argument is of length zero

In an earlier question (R: Logical Conditions Not Being Respected), I learned how to make the following simulation :
Step 1: Keep generating two random numbers "a" and "b" until both "a" and "b" are greater than 12
Step 2: Track how many random numbers had to be generated until it took for Step 1 to be completed
Step 3: Repeat Step 1 and Step 2 100 times
res <- matrix(0, nrow = 0, ncol = 3)
for (j in 1:100){
a <- rnorm(1, 10, 1)
b <- rnorm(1, 10, 1)
i <- 1
while(a < 12 | b < 12) {
a <- rnorm(1, 10, 1)
b <- rnorm(1, 10, 1)
i <- i + 1
}
x <- c(a,b,i)
res <- rbind(res, x)
}
head(res)
[,1] [,2] [,3]
x 12.14232 12.08977 399
x 12.27158 12.01319 1695
x 12.57345 12.42135 302
x 12.07494 12.64841 600
x 12.03210 12.07949 82
x 12.34006 12.00365 782
Question: Now, I am trying to make a slight modification to the above code - Instead of "a" and "b" being produced separately, I want them to be produced "together" (in math terms: "a" and "b" were being produced from two independent univariate normal distributions, now I want them to come from a bivariate normal distribution).
I tried to modify this code myself:
library(MASS)
Sigma = matrix(
c(1,0.5, 0.5, 1), # the data elements
nrow=2, # number of rows
ncol=2, # number of columns
byrow = TRUE) # fill matrix by rows
res <- matrix(0, nrow = 0, ncol = 3)
for (j in 1:100){
e_i = data.frame(mvrnorm(n = 1, c(10,10), Sigma))
e_i$i <- 1
while(e_i$X1 < 12 | e_i$X2 < 12) {
e_i = data.frame(mvrnorm(n = 1, c(10,10), Sigma))
e_i$i <- i + 1
}
x <- c(e_i$X1, e_i$X2 ,i)
res <- rbind(res, x)
}
res = data.frame(res)
But this is producing the following error:
Error in while (e_i$X1 < 12 | e_i$X2 < 12) { : argument is of length
zero
If I understand your code correctly you are trying to see how many samples occur before both values are >=12 and doing that for 100 trials? This is the approach I would take:
library(MASS)
for(i in 1:100){
n <- 1
while(any((x <- mvrnorm(1, mu=c(10,10), Sigma=diag(0.5, nrow=2)+0.5))<12)) n <- n+1
if(i==1) res <- data.frame("a"=x[1], "b"=x[2], n)
else res <- rbind(res, data.frame("a"=x[1], "b"=x[2], n))
}
Here I am assigning the results of a mvrnorm to x within the while() call. In that same call, it evaluates whether either are less than 12 using the any() function. If that evaluates to FALSE, n (the counter) is increased and the process repeated. Once TRUE, the values are appended to your data.frame and it goes back to the start of the for-loop.
Regarding your code, the mvrnorm() function is returning a vector, not a matrix, when n=1 so both values go into a single variable in the data.frame:
data.frame(mvrnorm(n = 1, c(10,10), Sigma))
Returns:
mvrnorm.n...1..c.10..10...Sigma.
1 9.148089
2 10.605546
The matrix() function within your data.frame() calls, along with some tweaks to your use of i, will fix your code:
library(MASS)
Sigma = matrix(
c(1,0.5, 0.5, 1), # the data elements
nrow=2, # number of rows
ncol=2, # number of columns
byrow = TRUE) # fill matrix by rows
res <- matrix(0, nrow = 0, ncol = 3)
for (j in 1:10){
e_i = data.frame(matrix(mvrnorm(n = 1, c(10,10), Sigma), ncol=2))
i <- 1
while(e_i$X1[1] < 12 | e_i$X2[1] < 12) {
e_i = data.frame(matrix(mvrnorm(n = 1, c(10,10), Sigma), ncol=2))
i <- i + 1
}
x <- c(e_i$X1, e_i$X2 ,i)
res <- rbind(res, x)
}
res = data.frame(res)

Faster computation of double for loop?

I have a piece of working code that is taking too many hours (days?) to compute.
I have a sparse matrix of 1s and 0s, I need to subtract each row from any other row, in all possible combinations, multiply the resulting vector by another vector, and finally average the values in it so to get a single scalar which I need to insert in a matrix. What I have is:
m <- matrix(
c(0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0), nrow=4,ncol=4,
byrow = TRUE)
b <- c(1,2,3,4)
for (j in 1:dim(m)[1]){
for (i in 1:dim(m)[1]){
a <- m[j,] - m[i,]
a[i] <- 0L
a[a < 0] <- 0L
c <- a*b
d[i,j] <- mean(c[c > 0])
}
}
The desired output is matrix with the same dimensions of m, where each entry is the result of these operations.
This loop works, but are there any ideas on how to make this more efficient? Thank you
My stupid solution is to use apply or sapply function, instead of for loop to do the iterations:
sapply(1:dim(m)[1], function(k) {z <- t(apply(m, 1, function(x) m[k,]-x)); diag(z) <- 0; z[z<0] <- 0; apply(t(apply(z, 1, function(x) x*b)),1,function(x) mean(x[x>0]))})
I tried to compare your solution and this in terms of running time in my computer, yours takes
t1 <- Sys.time()
d1 <- m
for (j in 1:dim(m)[1]){
for (i in 1:dim(m)[1]){
a <- m[j,] - m[i,]
a[i] <- 0L
a[a < 0] <- 0L
c <- a*b
d1[i,j] <- mean(c[c > 0])
}
}
Sys.time()-t1
Yours needs Time difference of 0.02799988 secs. For mine, it is reduced a bit but not too much, i.e., Time difference of 0.01899815 secs, when you run
t2 <- Sys.time()
d2 <- sapply(1:dim(m)[1], function(k) {z <- t(apply(m, 1, function(x) m[k,]-x)); diag(z) <- 0; z[z<0] <- 0; apply(t(apply(z, 1, function(x) x*b)),1,function(x) mean(x[x>0]))})
Sys.time()-t2
You can try it on your own computer with larger matrix, good luck!
1) create test sparse matrix:
nc <- nr <- 100
p <- 0.001
require(Matrix)
M <- Matrix(0L, nr, nc, sparse = T) # 0 matrix
n1 <- ceiling(p * (prod(dim(M)))) # 1 count
M[1:n1] <- 1L # fill only first column, to approximate max non 0 row count
# (each row has at maximum 1 positive element)
sum(M)/(prod(dim(M)))
b <- 1:ncol(M)
sum(rowSums(M))
So, if the proportion given is correct then we have at most 10 rows that contain non 0 elements
Based on this fact and your supplied calculations:
# a <- m[j, ] - m[i, ]
# a[i] <- 0L
# a[a < 0] <- 0L
# c <- a*b
# mean(c[c > 0])
we can see that the result will be meaningful only form[, j] rows which have at least 1 non 0 element
==> we can skip calculations for all m[, j] which contain only 0s, so:
minem <- function() { # write as function
t1 <- proc.time() # timing
require(data.table)
i <- CJ(1:nr, 1:nr) # generate all combinations
k <- rowSums(M) > 0L # get index where at least 1 element is greater that 0
i <- i[data.table(V1 = 1:nr, k), on = 'V1'] # merge
cat('at moust', i[, sum(k)/.N*100], '% of rows needs to be calculated \n')
i[k == T, rowN := 1:.N] # add row nr for 0 subset
i2 <- i[k == T] # subset only those indexes who need calculation
a <- M[i2[[1]],] - M[i2[[2]],] # operate on all combinations at once
a <- drop0(a) # clean up 0
ids <- as.matrix(i2[, .(rowN, V2)]) # ids for 0 subset
a[ids] <- 0L # your line: a[i] <- 0L
a <- drop0(a) # clean up 0
a[a < 0] <- 0L # the same as your line
a <- drop0(a) # clean up 0
c <- t(t(a)*b) # multiply each row with vector
c <- drop0(c) # clean up 0
c[c < 0L] <- 0L # for mean calculation
c <- drop0(c) # clean up 0
r <- rowSums(c)/rowSums(c > 0L) # row means
i[k == T, result := r] # assign results to data.table
i[is.na(result), result := NaN] # set rest to NaN
d2 <- matrix(i$result, nr, nr, byrow = F) # create resulting matrix
t2 <- proc.time() # timing
cat(t2[3] - t1[3], 'sec \n')
d2
}
d2 <- minem()
# at most 10 % of rows needs to be calculated
# 0.05 sec
Test on smaller example if results matches
d <- matrix(NA, nrow(M), ncol(M))
for (j in 1:dim(M)[1]) {
for (i in 1:dim(M)[1]) {
a <- M[j, ] - M[i, ]
a[i] <- 0L
a[a < 0] <- 0L
c <- a*b
d[i, j] <- mean(c[c > 0])
}
}
all.equal(d, d2)
Can we get results for your real data size?:
# generate data:
nc <- nr <- 6663L
b <- 1:nr
p <- 0.0001074096 # proportion of 1s
M <- Matrix(0L, nr, nc, sparse = T) # 0 matrix
n1 <- ceiling(p * (prod(dim(M)))) # 1 count
M[1:n1] <- 1L
object.size(as.matrix(M))/object.size(M)
# storing this data in usual matrix uses 4000+ times more memory
# calculation:
d2 <- minem()
# at most 71.57437 % of rows needs to be calculated
# 28.33 sec
So you need to convert your matrix to sparse one with
M <- Matrix(m, sparse = T)

For loops to create symmetric matrices

I want to reduce time and memory usage (I previously used outer for this but it consumes more memory than I have) by reducing the iterations to create a symmetric matrix, that is sol[i, j] is the same as sol[j, i].
My code so far:
# Prepare input
subss <- list(a = c(1, 2, 4), b = c(1, 2, 3), c = c(4, 5))
A <- matrix(runif(25), ncol = 5, nrow = 5)
# Pre allocate memory
sol <- matrix(nrow = length(subss), ncol = length(subss),
dimnames = list(names(subss), names(subss)))
x <- 0
for (i in seq_along(subss)) {
# Omit for the subsets I already calculated ?
for (j in seq_along(subss)) {
x <- x + 1
message(x)
# The function I use here might result in a NA
sol[i, j] <- mean(A[subss[[i]], subss[[j]]])
sol[j, i] <- sol[i, j] # Will overwrite when it shouldn't
}
}
Will use 9 iterations, how can I avoid them and do just 6 iterations?
I need to calculate the symmetric values, so this question doesn't apply. Also this other one doesn't work either because there might be many combinations and at some point it can't allocate the vector in memory.
A for loop will usually be slower than outer. Try byte-compiling the loop or implement it in Rcpp.
subss <- list(a = c(1, 2, 4), b = c(1, 2, 3), c = c(4, 5))
set.seed(42)
A <- matrix(runif(25), ncol = 5, nrow = 5)
#all combinations of indices
ij <- combn(seq_along(subss), 2)
#add all i = j
ij <- matrix(c(ij, rep(seq_along(subss), each = 2)), nrow = 2)
#preallocate
res <- numeric(ncol(ij))
#only one loop
for (k in seq_len(ncol(ij))) {
message(k)
res[k] <- mean(A[subss[[ij[1, k]]], subss[[ij[2, k]]]])
}
#1
#2
#3
#4
#5
#6
#create symmetric sparse matrix
library(Matrix)
sol <- sparseMatrix(i = ij[1,], j = ij[2,],
x = res, dims = rep(length(subss), 2),
symmetric = TRUE, index1 = TRUE)
#3 x 3 sparse Matrix of class "dsCMatrix"
#
#[1,] 0.7764715 0.6696987 0.7304413
#[2,] 0.6696987 0.6266553 0.6778936
#[3,] 0.7304413 0.6778936 0.5161089
I found a way with plain for loops:
x <- 0
for (i in seq_along(subss)) {
for (j in seq_len(i)) { # or for (j in 1:i) as proposed below
x <- x + 1
message(x)
sol[i, j] <- mean(A[subss[[i]], subss[[j]]])
sol[j, i] <- sol[i, j]
}
}
for (i in 1:length(subss)) {
for (j in 1:i) {
message(i, ' ', j, ' - ', mean(A[subss[[i]], subss[[j]]]) ) # Check iterations and value
sol2[i, j] <- sol2[j, i] <- mean(A[subss[[i]], subss[[j]]])
}
}
I checked your script values and aren't symmetric:
1 1 - 0.635455905252861
1 2 - 0.638608284398086
1 3 - 0.488700995299344
2 1 - 0.568414432255344
2 2 - 0.602851431118324
2 3 - 0.516099992596234
3 1 - 0.595461705311512
3 2 - 0.656920690399905
3 3 - 0.460815121419728
Mine values (same as #Llopis):
1 2 - 0.638608284398086
1 3 - 0.488700995299344
2 2 - 0.602851431118324
2 3 - 0.516099992596234
3 2 - 0.656920690399905
3 3 - 0.460815121419728

How to generate multiple matrix in R

I have gotten two lists of values in R.
daily_max_car: (List 1)
21 21 22 22 22 22 21
daily_0.8: (List 2)
16 17 17 17 18 17 17
Trying to write a For Loop in R-Studio to generate multiple matrix by using the one of the values from these two lists (One by One).
Here is the code I have been using to generate one matrix!
Lambda <- 21 (From List 1)
Mue <- 4
Rho <- Lambda/Mue
N <- 16 (From List 2)
All of these four parameters will be used in the "calculatewq" Function.
calculatewq <- function(c)
{....Some thing happening }
##Create Matrix
matrix1 <- matrix(0,Lambda,4)
matrix1[,1] <- 1:Lambda
### Create a column of matrix with repeated "N"
rep.row<-function(x,y)
{matrix(rep(x,each=y),nrow=y)}
created_mar_1 <- rep.row(N,Lambda)
car_n<- created_mar_1-matrix1[,1]
created_mar_3 <- rep.row(69*60*24,Lambda)
## Add into Matrix
for (i in 1:Lambda)
{matrix1[i,2] <- calculatewq(i)[2]
matrix1[i,3] <- calculatewq(i)[5]
matrix1[,4] = car_n*created_mar_3}`
Once I change one of the parameters it will generate a new matrix.
Thus, how can I write a for loop to generate multiple matrix while I am putting different value in Lambda and N.
Thank you so much!
Sampson
I removed for loop inside calculatewq function. Please make sure you needed a for loop in it.
myfun <- function(Lambda, N, mu )
{
# browser()
var1 <- seq_len( Lambda )
var2 <- ( rep( N, each = Lambda) ) - var1
var3 <- rep( 69*60*24, each = Lambda )
var4 <- var2 * var3
fun_vals <- do.call( 'rbind',
lapply( var1, function( x ) calculatewq( x, Lambda = Lambda, N = N, mu = mu ) ) )
mat <- matrix( NA, nrow = Lambda, ncol = mu )
mat[, 1] <- var1
mat[, 2] <- fun_vals[, 'Wq']
mat[, 3] <- fun_vals[, 'customer_serviced']
mat[, 4] <- var4
return(mat)
}
calculatewq <- function( x, Lambda, N, mu )
{
# browser()
Rho <- Lambda / mu
p0_inv <- ( Rho^x * (1-(( Rho/x )^( N-x+1)))) / (factorial( x ) * ( 1 - ( Rho / x ) ) )
p0_inv <- p0_inv + ( Rho^x) / factorial( x )
P0 <- 1/p0_inv
Lq <- ( Rho^(x+1)) * (1-((Rho/x)^(N-x+1))-((N-x+1)*(1-(Rho/x))*((Rho/x)^(N-x))))*P0/(factorial(x-1)*(x-Rho)^2)
Wq <- 60*Lq/Lambda
Ls <- Lq + Rho
Ws <- 60*Ls/Lambda
PN <- (Rho^N)*P0/(factorial(x)*x^(N-x))
customer_serviced <- (1 - PN)*100
a <- cbind( Lq, Wq, Ls, Ws, customer_serviced )
return(a)
}
mu <- 4
res <- Map( myfun,
list( 21 ,21, 22, 22 ,22, 22 ,21 ),
list( 16, 17, 17, 17, 18, 17 ,17 ),
mu)
head( res[[1]])
# [,1] [,2] [,3] [,4]
# [1,] 1 42.184874 19.04762 1490400
# [2,] 2 38.241748 38.09526 1391040
# [3,] 3 33.339271 57.13862 1291680
# [4,] 4 26.014138 75.70348 1192320
# [5,] 5 16.339462 89.88989 1092960
# [6,] 6 9.121053 96.32498 993600
daily_max_car <- list(21,21,22,22,22,22,21)
daily_0.8 <- list(16,17,17,17,18,17,17)
myfunction <- function(Lambda, N){
Mue <- 4
Rho <- Lambda/Mue
df <- as.data.frame(matrix(0, ncol = 4, nrow = Lambda))
names(df) <- c("A","B","C","D")
df[,1] <- 1:Lambda
df[,2] <- N
df[,3] <- df[,1] - df[,2]
df[,4] <- 69*60*24
return(df)
}
myfunction(21,16)
result <- mapply(myfunction, daily_max_car, daily_0.8)
Result
Lambda <- 21
Mue <- 4
Rho <- Lambda/Mue
N <- 19
matrix1 <- matrix(0,Lambda,4)
matrix1[,1] <- 1:Lambda
rep.row<-function(x,y)
{
matrix(rep(x,each=y),nrow=y)
}
created_mar_1 <- rep.row(N,Lambda)
car_n<- created_mar_1-matrix1[,1]
created_mar_3 <- rep.row(69*60*24,Lambda)
calculatewq(7)
calculatewq <- function(c)
{
P0inv <- (Rho^c*(1-((Rho/c)^(N-c+1))))/(factorial(c)*(1-(Rho/c)))
for (i in 1:c-1)
{
P0inv = P0inv + (Rho^i)/factorial(i)
}
P0 = 1/P0inv
Lq = (Rho^(c+1))*(1-((Rho/c)^(N-c+1))-((N-c+1)*(1-(Rho/c))*((Rho/c)^(N- c))))*P0/(factorial(c-1)*(c-Rho)^2)
Wq = 60*Lq/Lambda
Ls <- Lq + Rho
Ws <- 60*Ls/Lambda
PN <- (Rho^N)*P0/(factorial(c)*c^(N-c))
customer_serviced <- (1 - PN)*100
a <- cbind(Lq,Wq,Ls,Ws,customer_serviced)
return(a)
}
for (i in 1:Lambda)
{
matrix1[i,2] <- calculatewq(i)[2]
matrix1[i,3] <- calculatewq(i)[5]
matrix1[,4] = car_n*created_mar_3
}

Implement table() function as a user defined function

x <- c(1,2,3,2,1)
table(x)
# x
# 1 2 3
# 2 2 1
Outputs how many times each element occur in the vector.
I am trying to imitate the above function using function()
Below is my code:
TotalTimes = function(x){
times = 0
y = unique(x)
for (i in 1:length(y)) {
for (i in 1:length(x)) {
if(y[i] == x[i])
times = times + 1
}
return(times)
}
}
What would be the right approach?
Here's a one-liner, using rle():
f <- function(x) {
with(rle(sort(x)), setNames(lengths, values))
}
f(c(1,2,3,2,1))
# 1 2 3
# 2 2 1
Alternatively, here's an option that's less "tricky", and is probably a better model for learning to code in an R-ish way:
f2 <- function(x) {
ss <- sort(x)
uu <- unique(ss)
names(uu) <- uu
sapply(uu, function(u) sum(ss == u))
}
f2(c(1,2,3,2,1))
# 1 2 3
# 2 2 1
function(x) {
q = sapply(unique(x), function(i) sum(x == i))
names(q) = unique(x)
return(q)
}
Here is one method using base R:
# data
x <- c(1,2,3,2,1)
# set up
y <- sort(unique(x))
counts <- rep_len(0, length.out=length(y))
names(counts) <- y
for(i in seq_along(x)) {
counts[x[i] == y] <- counts[x[i] == y] + 1
}
Wrapping it in a function:
table2 <- function(x) {
# transform x into character vector to reduce search cost in loop
x <- as.character(x)
y <- sort(unique(x))
counts <- rep_len(0, length.out=length(y))
names(counts) <- y
for(i in seq_along(x)) {
counts[x[i]] <- counts[x[i]] + 1L
}
return(counts)
}
This version only accepts a single vector, of course. At #Frank's suggestion, the function version is slightly different, and possibly faster, in that it transforms the input x into a character. The potential speed up is in the search in counts[x[i]] where the name in counts is referred to (as x[i]), rather than performing a search using "==."

Resources