Related
I have a huge matrix say M1 and I want to create a new matrix M2, where M2 will be a copy of M1 and with a 30% substitution of non-zero values of M1 to 0.
Please let me know how to work around this.
sample from which(m > 0), I use <- 999 to demonstrate, just replace with <- 0. The which gives the indices of the non-zeroes, and we sample 30% from them.
m
# [,1] [,2] [,3] [,4] [,5]
# [1,] 9 4 9 7 3
# [2,] 9 7 1 8 4
# [3,] 2 9 9 3 0
# [4,] 8 2 9 6 9
# [5,] 6 4 0 0 4
# [6,] 5 9 5 8 9
# [7,] 7 9 3 0 8
# [8,] 1 1 9 2 6
# [9,] 6 4 4 9 9
# [10,] 7 5 8 6 6
m[sample(which(m > 0), length(m)*.3)] <- 999
m
# [,1] [,2] [,3] [,4] [,5]
# [1,] 9 4 999 7 999
# [2,] 999 7 1 8 4
# [3,] 999 9 999 3 0
# [4,] 999 2 999 6 999
# [5,] 999 4 0 0 4
# [6,] 999 9 5 999 9
# [7,] 7 9 999 0 8
# [8,] 1 1 9 999 6
# [9,] 6 4 999 999 9
# [10,] 7 5 8 6 6
sum(m == 999)/length(m) ## check
# [1] 0.3
Data:
set.seed(42)
m <- matrix(trunc(runif(50, 0, 1)*10), 10, 5)
The trick is to filter in matrix by non-zero elements like this:
M1<-matrix(rnorm(36),nrow=6)
M2 <- M1
M2
#> [,1] [,2] [,3] [,4] [,5] [,6]
#> [1,] 1.1450903 -1.3354652 1.7408616 2.4104801 1.0190374 -0.4452658
#> [2,] -0.6193147 0.6247960 0.8880114 0.2063487 1.4564834 -1.6591764
#> [3,] -1.4440763 -0.1740776 2.1646262 -1.3795811 -0.2231788 -2.1524281
#> [4,] 1.0929878 2.4982284 -1.5304989 1.0759637 0.2585276 0.3428240
#> [5,] -1.4013196 -0.3208720 0.8025738 -0.7251131 0.1134538 -1.2704551
#> [6,] -0.7992393 0.5610579 2.0940327 1.1937530 -1.5585291 -1.0766868
M2[sample(which(M2 > 0), length(M2[M2!=0])*0.3, replace = FALSE)] = 0
M2
#> [,1] [,2] [,3] [,4] [,5] [,6]
#> [1,] 1.1450903 -1.3354652 0.0000000 0.0000000 0.0000000 -0.4452658
#> [2,] -0.6193147 0.0000000 0.8880114 0.2063487 0.0000000 -1.6591764
#> [3,] -1.4440763 -0.1740776 2.1646262 -1.3795811 -0.2231788 -2.1524281
#> [4,] 0.0000000 0.0000000 -1.5304989 1.0759637 0.2585276 0.0000000
#> [5,] -1.4013196 -0.3208720 0.8025738 -0.7251131 0.0000000 -1.2704551
#> [6,] -0.7992393 0.5610579 0.0000000 1.1937530 -1.5585291 -1.0766868
Created on 2022-07-11 by the reprex package (v2.0.1)
Option jay mentioned in comments
M1<-matrix(rnorm(36),nrow=6)
M2 <- M1
M2
#> [,1] [,2] [,3] [,4] [,5] [,6]
#> [1,] 0.2704036 1.66744279 1.2249968 0.7105401 0.2930494 0.3019442
#> [2,] 0.6701630 0.23103360 0.3433342 -0.9176159 0.2890372 -1.3139269
#> [3,] -0.7845245 0.64272243 0.3152463 0.2794443 0.3818046 -1.7073781
#> [4,] 1.3994086 0.04721819 -0.1364107 -0.2889496 1.7605232 1.0270522
#> [5,] 0.8934011 0.53878503 -1.6008799 -0.4516311 -1.1541206 -1.3896758
#> [6,] 0.3205831 1.15597968 -0.4654826 -1.3999804 -1.0597505 0.2982040
i <- M2 != 0
M2[i] <- replace(M2[i], sample(sum(i), sum(i)*.3), 999)
M2
#> [,1] [,2] [,3] [,4] [,5] [,6]
#> [1,] 0.2704036 1.66744279 1.2249968 999.0000000 999.0000000 0.3019442
#> [2,] 0.6701630 0.23103360 0.3433342 -0.9176159 0.2890372 999.0000000
#> [3,] -0.7845245 0.64272243 999.0000000 0.2794443 0.3818046 -1.7073781
#> [4,] 1.3994086 0.04721819 -0.1364107 -0.2889496 999.0000000 1.0270522
#> [5,] 999.0000000 0.53878503 999.0000000 -0.4516311 -1.1541206 999.0000000
#> [6,] 0.3205831 1.15597968 -0.4654826 -1.3999804 999.0000000 999.0000000
Created on 2022-07-11 by the reprex package (v2.0.1)
First answer
You can sample through your matrix, where you can specify the number of elements you want to replace to 0 by saying that the amount of elements is 30% of your matrix. You can use the following code:
M1<-matrix(rnorm(36),nrow=6)
M1
#> [,1] [,2] [,3] [,4] [,5] [,6]
#> [1,] -1.4181422 -0.1675572 -0.07126163 -0.2250808 0.06538817 0.7096829
#> [2,] 0.1265111 0.6535900 -0.81718699 0.1660550 -0.84969221 0.5222353
#> [3,] -0.5860745 -0.7130558 0.80823046 0.5601937 2.06109461 -1.4000195
#> [4,] -1.8507512 -0.2643667 0.62158830 -1.0455708 -1.28048923 -0.3291040
#> [5,] -1.5950047 0.6611776 1.19810322 -0.8927425 -0.70925100 -1.8455213
#> [6,] -1.2737187 -1.3739572 -0.92623331 -0.1034901 1.12354331 -0.6559306
M1[sample(1:length(M1), length(M1)*0.3, replace = FALSE)] <- 0
M2 <- M1
M2
#> [,1] [,2] [,3] [,4] [,5] [,6]
#> [1,] -1.4181422 -0.1675572 0.0000000 0.0000000 0.0000000 0.0000000
#> [2,] 0.1265111 0.6535900 -0.8171870 0.1660550 -0.8496922 0.0000000
#> [3,] -0.5860745 0.0000000 0.8082305 0.5601937 2.0610946 -1.4000195
#> [4,] -1.8507512 -0.2643667 0.6215883 0.0000000 -1.2804892 -0.3291040
#> [5,] -1.5950047 0.6611776 0.0000000 -0.8927425 -0.7092510 -1.8455213
#> [6,] -1.2737187 -1.3739572 0.0000000 0.0000000 1.1235433 -0.6559306
Created on 2022-07-11 by the reprex package (v2.0.1)
So I have a 1256 by 5 matrix.
> head(retmatx12.30.3)
AMT HON KO
[1,] -0.006673489 -0.001292867 -0.0033654493
[2,] 0.004447249 0.002848406 0.0082009877
[3,] 0.001789891 0.002754232 -0.0035886573
[4,] -0.003479321 0.002231823 0.0024011113
[5,] -0.006605786 0.015159190 -0.0002394852
[6,] -0.002375004 -0.008267790 -0.0100625938
NEM NVAX
[1,] -0.034023392 -0.023255737
[2,] 0.016436786 0.007936468
[3,] 0.009529404 0.031496102
[4,] 0.046052588 0.007633549
[5,] -0.031446425 0.037878788
[6,] -0.001694084 0.036496350
I want to apply a function I've made to rows 1-126, then 2-127, and so on. The function is a block of matrix algebra that uses a matrix and a few vectors. Is it wise to somehow break the larger matrix into 1,131 126 by 5 matrices, and apply the function over each (hopefully at once). Or, some sort of application of apply?
Any help is greatly appreciated. Thanks
The actual numbers in the matrix are immaterial, so I'll use much smaller data to demonstrate one method, and a simple function to demonstrate the rolling calculation:
m <- matrix(1:24, nrow=8)
somefunc <- function(x) x %*% seq(ncol(x))
wid <- 4 # 126
somefunc(m[1:4,])
# [,1]
# [1,] 70
# [2,] 76
# [3,] 82
# [4,] 88
somefunc(m[2:5,])
# [,1]
# [1,] 76
# [2,] 82
# [3,] 88
# [4,] 94
The actual rolling work:
lapply(seq(nrow(m) - wid + 1), function(i) somefunc(m[i - 1 + seq(wid),]))
# [[1]]
# [,1]
# [1,] 70
# [2,] 76
# [3,] 82
# [4,] 88
# [[2]]
# [,1]
# [1,] 76
# [2,] 82
# [3,] 88
# [4,] 94
# [[3]]
# [,1]
# [1,] 82
# [2,] 88
# [3,] 94
# [4,] 100
# [[4]]
# [,1]
# [1,] 88
# [2,] 94
# [3,] 100
# [4,] 106
# [[5]]
# [,1]
# [1,] 94
# [2,] 100
# [3,] 106
# [4,] 112
where the first element of the output is from rows 1-4, then 2-5, then 2-6, etc.
I am new to R.
I am trying to create a time-loop, where the value of a node attribute gets updated over time, if the nodes are not informed. So far, I have something like the following:
#loading required packages
library(igraph)
library(tidyverse)
library(ggraph)
library(ggnetwork)
library(tidygraph)
g <- play_erdos_renyi(10, .2)
M<-matrix(0:1, nrow = 10, ncol = 10)
Periods=10
seeds=c(1,3)
#Seeds are informed
V(g)$informed <- F
V(g)[seeds]$informed=T
#Seeds have prob 0.5 (just treat prob as any variable here)
V(g)$prob=0
V(g)[seeds]$prob=0.5
#Time loop for creating prob over time
#prob at [t+1]=M*prob at [t]
#calculate prob for vertex v in time t+1 by multiplying 10X10 matrix M with 10X1 vector V(g)$prob
#then considering the v-th row
for (t in 1:Periods) {
for(v in V(g)) {
if(!V(g)[v]$informed) {
V(g)[v]$prob[t+1]= M %*% V(g)$prob[t][v]
}}}
I am getting
Error in M %*% V(g)$prob[t][v] : non-conformable arguments
I was wondering how to fix this.
I think you should use
for (t in 1:Periods) {
V(g)$prob <- M %*% matrix(V(g)$prob)
}
If you want to keep the track of dynamics, you can use Reduce
Reduce(
function(x, A) A %*% x,
rep(list(M), Periods),
init = matrix(V(g)$prob),
accumulate = TRUE
)
which gives you the evolution of V(g)$prob along time from 0 to length(Periods)
[[1]]
[,1]
[1,] 0.5
[2,] 0.0
[3,] 0.5
[4,] 0.0
[5,] 0.0
[6,] 0.0
[7,] 0.0
[8,] 0.0
[9,] 0.0
[10,] 0.0
[[2]]
[,1]
[1,] 1
[2,] 2
[3,] 3
[4,] 4
[5,] 5
[6,] 6
[7,] 7
[8,] 8
[9,] 9
[10,] 10
[[3]]
[,1]
[1,] 55
[2,] 110
[3,] 165
[4,] 220
[5,] 275
[6,] 330
[7,] 385
[8,] 440
[9,] 495
[10,] 550
[[4]]
[,1]
[1,] 3025
[2,] 6050
[3,] 9075
[4,] 12100
[5,] 15125
[6,] 18150
[7,] 21175
[8,] 24200
[9,] 27225
[10,] 30250
[[5]]
[,1]
[1,] 166375
[2,] 332750
[3,] 499125
[4,] 665500
[5,] 831875
[6,] 998250
[7,] 1164625
[8,] 1331000
[9,] 1497375
[10,] 1663750
[[6]]
[,1]
[1,] 9150625
[2,] 18301250
[3,] 27451875
[4,] 36602500
[5,] 45753125
[6,] 54903750
[7,] 64054375
[8,] 73205000
[9,] 82355625
[10,] 91506250
[[7]]
[,1]
[1,] 503284375
[2,] 1006568750
[3,] 1509853125
[4,] 2013137500
[5,] 2516421875
[6,] 3019706250
[7,] 3522990625
[8,] 4026275000
[9,] 4529559375
[10,] 5032843750
[[8]]
[,1]
[1,] 27680640625
[2,] 55361281250
[3,] 83041921875
[4,] 110722562500
[5,] 138403203125
[6,] 166083843750
[7,] 193764484375
[8,] 221445125000
[9,] 249125765625
[10,] 276806406250
[[9]]
[,1]
[1,] 1.522435e+12
[2,] 3.044870e+12
[3,] 4.567306e+12
[4,] 6.089741e+12
[5,] 7.612176e+12
[6,] 9.134611e+12
[7,] 1.065705e+13
[8,] 1.217948e+13
[9,] 1.370192e+13
[10,] 1.522435e+13
[[10]]
[,1]
[1,] 8.373394e+13
[2,] 1.674679e+14
[3,] 2.512018e+14
[4,] 3.349358e+14
[5,] 4.186697e+14
[6,] 5.024036e+14
[7,] 5.861376e+14
[8,] 6.698715e+14
[9,] 7.536054e+14
[10,] 8.373394e+14
[[11]]
[,1]
[1,] 4.605367e+15
[2,] 9.210733e+15
[3,] 1.381610e+16
[4,] 1.842147e+16
[5,] 2.302683e+16
[6,] 2.763220e+16
[7,] 3.223757e+16
[8,] 3.684293e+16
[9,] 4.144830e+16
[10,] 4.605367e+16
Just an update that I have solved the problem. Here is the answer:
#loading required packages
library(igraph)
library(tidyverse)
library(ggraph)
library(ggnetwork)
library(tidygraph)
g <- play_erdos_renyi(10, .2)
M<-matrix(0:1, nrow = 10, ncol = 10)
Periods=10
seeds=c(1,2)
#Seeds are informed
V(g)$informed <- F
V(g)[seeds]$informed=T
#Seeds have prob 0.5 (just treat prob as any variable here)
V(g)$prob=0
V(g)[seeds]$prob=0.5
#Time loop for creating prob over time
#prob at [t+1]=M*prob at [t]
#calculate prob for vertex v in time t+1 by multiplying 10X10 matrix M with 10X1 vector V(g)$prob
#then considering the v-th row
#extract the probs that were just created
prob <- V(g)$prob
#declare an initial matrix of simulated prob (called sim.prob)
sim.prob <- matrix(nrow=10 , ncol = Periods+1) #Create empty matrix
sim.prob[,1] <- diag(ncol(M)) %*% c(1:10) #First column is just the id's
for (t in 2:Periods) {
sim.prob[,t] <- diag(ncol(M)) %*% prob
sim.prob[,t+1] <- M %*% sim.prob[,t]
for(v in V(g)) {
if(!V(g)[v]$informed) {
V(g)$prob[v]= sim.prob[,t+1][v]
prob <- V(g)$prob
}}}
I have a list list_tot that has nested lists.
I want to subset or create a new list that selects specific subsets based on the parameters specified, details follow:
List_1 <- list(a = matrix(5,2), b = matrix(5,7), c = matrix(5,9), d = matrix(5,3))
List_2 <- list(a = matrix(7,3), b = matrix(7,7), c = matrix(7,1), d = matrix(7,9))
List_3 <- list(a = matrix(5,2), b = matrix(5,7), c = matrix(5,9), d = matrix(5,3))
List_4 <- list(a = matrix(5,2), b = matrix(5,7), c = matrix(5,9), d = matrix(5,3))
List_5 <- list(a = matrix(5,2), b = matrix(5,7), c = matrix(5,9), d = matrix(5,3))
List_tot <- list(List_1, List_2, List_3, List_4, List_5)
that reads:
[[1]]
[[1]]$a
[,1]
[1,] 5
[2,] 5
[[1]]$b
[,1]
[1,] 5
[2,] 5
[3,] 5
[4,] 5
[5,] 5
[6,] 5
[7,] 5
[[1]]$c
[,1]
[1,] 5
[2,] 5
[3,] 5
[4,] 5
[5,] 5
[6,] 5
[7,] 5
[8,] 5
[9,] 5
[[1]]$d
[,1]
[1,] 5
[2,] 5
[3,] 5
[[2]]
[[2]]$a
[,1]
[1,] 7
[2,] 7
[3,] 7
[[2]]$b
[,1]
[1,] 7
[2,] 7
[3,] 7
[4,] 7
[5,] 7
[6,] 7
[7,] 7
[[2]]$c
[,1]
[1,] 7
...etc
I want to select:
for each nested list only select list/matrix a, c, and d.
for each nested list select two lists/matrices with the top number of rows.
So New_List_tot would have an output of:
that reads:
[[1]]
[[1]]$c
[,1]
[1,] 5
[2,] 5
[3,] 5
[4,] 5
[5,] 5
[6,] 5
[7,] 5
[8,] 5
[9,] 5
[[1]]$d
[,1]
[1,] 5
[2,] 5
[3,] 5
etc...
Any assistance would be helpful. All my attempts, attempts using plyer and dplyr, but with no success and very much stuck.
We can use base R to do this. No packages are needed
lapply(List_tot, `[`, c("a", "c", "d"))
or with anonymous function
lapply(List_tot, function(x) x[c("a", "c", "d")])
if we need the top 2, order the number of rows (lengths work as these are single column matrix, so the number of rows are equal to the total number of elements, get the head of the names of the ordered vector of number of rows and use that to extract the inner list element
lapply(List_tot, function(x) {
x1 <- x[c("a", "c", "d")]
v1 <- lengths(x1)
x1[head(names(v1)[order(-v1)], 2)]
})
Update
For the second goal, you can try
lapply(
List_tot,
function(lst) {
head(lst[c("a", "c", "d")][order(-sapply(lst[c("a", "c", "d")], nrow))], 2)
}
)
which gives
[[1]]
[[1]]$c
[,1]
[1,] 5
[2,] 5
[3,] 5
[4,] 5
[5,] 5
[6,] 5
[7,] 5
[8,] 5
[9,] 5
[[1]]$d
[,1]
[1,] 5
[2,] 5
[3,] 5
[[2]]
[[2]]$d
[,1]
[1,] 7
[2,] 7
[3,] 7
[4,] 7
[5,] 7
[6,] 7
[7,] 7
[8,] 7
[9,] 7
[[2]]$a
[,1]
[1,] 7
[2,] 7
[3,] 7
[[3]]
[[3]]$c
[,1]
[1,] 5
[2,] 5
[3,] 5
[4,] 5
[5,] 5
[6,] 5
[7,] 5
[8,] 5
[9,] 5
[[3]]$d
[,1]
[1,] 5
[2,] 5
[3,] 5
[[4]]
[[4]]$c
[,1]
[1,] 5
[2,] 5
[3,] 5
[4,] 5
[5,] 5
[6,] 5
[7,] 5
[8,] 5
[9,] 5
[[4]]$d
[,1]
[1,] 5
[2,] 5
[3,] 5
[[5]]
[[5]]$c
[,1]
[1,] 5
[2,] 5
[3,] 5
[4,] 5
[5,] 5
[6,] 5
[7,] 5
[8,] 5
[9,] 5
[[5]]$d
[,1]
[1,] 5
[2,] 5
[3,] 5
For your first goal "for each nested list only select list/matrix a, c, and d."
lapply(List_tot, `[`, c("a", "c", "d"))
For your second goal "for each nested list select the lists/matrices with the top number of rows."
Map(`[`, List_tot, max.col(t(sapply(List_tot, lengths))))
#akrun has a clean answer for step 1, filtering by columns. For step 2, filtering by number of rows you can try
library(magrittr)
List_colfilter <- lapply(List_tot, function(i)i[c("a","c","d")])
longestlist <- function(l){
maxr <- lapply(l,nrow) %>% unlist %>% max
l2 <- lapply(l, function(x) if(nrow(x)==maxr) x else NA)
for (n in names(l)){
if (is.na(l2[n])){
l2[n] <- NULL
}
}
return(l2)
}
List_longfilter <- lapply(List_colfilter, longestlist)
In base R you could do:
lapply(List_tot,
function(x) (y<-x[c("a", "c", "d")])[order(sapply(y, nrow), decreasing = TRUE)[1:2]])
[[1]]
[[1]]$c
[,1]
[1,] 5
[2,] 5
[3,] 5
[4,] 5
[5,] 5
[6,] 5
[7,] 5
[8,] 5
[9,] 5
[[1]]$d
[,1]
[1,] 5
[2,] 5
[3,] 5
etc
I am trying to do conditional subtraction of matrices in R. Let's say I have a list of matrices A, B, C. If my selection = 1, then C = A - B. Here are my codes:
selection = 1
A <- matrix(c(1:10), nrow = 5, ncol = 2)
A
B <- matrix(c(11:20), nrow = 5, ncol = 2)
B
C <- matrix(0, nrow = 5, ncol = 2)
C
my_matrix_name <- as.vector(c("A", "B", "C"))
my_list = list(A, B, C)
names(my_list) <- my_matrix_name
my_list$C <- ifelse(selection == 1, my_list$A - my_list$B, "Error")
The above codes yield the following results for my_list:
> my_list
$A
[,1] [,2]
[1,] 1 6
[2,] 2 7
[3,] 3 8
[4,] 4 9
[5,] 5 10
$B
[,1] [,2]
[1,] 11 16
[2,] 12 17
[3,] 13 18
[4,] 14 19
[5,] 15 20
$C
[1] -10
But my desired output is like this:
> my_list
$A
[,1] [,2]
[1,] 1 6
[2,] 2 7
[3,] 3 8
[4,] 4 9
[5,] 5 10
$B
[,1] [,2]
[1,] 11 16
[2,] 12 17
[3,] 13 18
[4,] 14 19
[5,] 15 20
$C
[,1] [,2]
[1,] -10 -10
[2,] -10 -10
[3,] -10 -10
[4,] -10 -10
[5,] -10 -10
Please help! Thanks!
Use if since the condition that you are checking is scalar and not a vector.
my_list$C <- if(selection == 1) my_list$A - my_list$B else 'Error'
my_list
#$A
# [,1] [,2]
#[1,] 1 6
#[2,] 2 7
#[3,] 3 8
#[4,] 4 9
#[5,] 5 10
#$B
# [,1] [,2]
#[1,] 11 16
#[2,] 12 17
#[3,] 13 18
#[4,] 14 19
#[5,] 15 20
#$C
# [,1] [,2]
#[1,] -10 -10
#[2,] -10 -10
#[3,] -10 -10
#[4,] -10 -10
#[5,] -10 -10