Parallelise the calculation of a large, symmetric, distance matrix - r

I have a large distance matrix to calculate, of size 22k by 22k, with the distances calculated using the computationally heavy Frechet approach.
Here is the serial code:
library(doParallel)
library(kmlShape)
df <- data.frame(replicate(168, sample(1:50, 20, rep=TRUE)))
# distance matrix
dist.mat<-matrix(0, nrow(df), nrow(df))
for (i in 1:(nrow(df)-1)){
print(i)
for (j in (i+1):nrow(df)){
dist.mat[i,j] <- distFrechet(1:168, df[i,], 1:168, df[j,])
dist.mat[j,i] <- dist.mat[i,j]
}
}
I have tried to use the answer to this post How to construct in R a parallel version of nested for loop to compute values for a square matrix where the function is dependent on i and j?
with his being my version
# distance matrix
dist2.mat <- matrix(0, nrow(df), nrow(df))
cl<-makeCluster(detectCores()-1)
registerDoParallel(cl)
dist2.mat <-
foreach (i=1:nrow(df), .combine='cbind', .packages("kmlShape")) %dopar% {
x<-double(nrow(df))
for (j in 1:nrow(df)){
x[j] <- distFrechet(1:168, df[i,], 1:168, df[j,])
}
x
}
stopCluster(cl)
However, there are two points. Firstly, this gives an error:
Error in if (all.available) { : argument is not interpretable as logical
and secondly, it does not exploit the symmetric nature of the matrix to halve the workload.
Can anyone see what my mistake is, and also anyway to adapt this to exploit the symmetric nature of the matrix?

Using the %:% operator to create nested foreach loops:
library(doParallel)
library(kmlShape)
df <- data.frame(replicate(168, sample(1:50, 20, rep=TRUE)))
# distance matrix
dist2.mat <- matrix(0, nrow(df), nrow(df))
cl<-makeCluster(detectCores()-1)
registerDoParallel(cl)
x<-double(nrow(df))
dist <-
foreach (i = 1:(nrow(df)), .combine='cbind', .packages="kmlShape") %:%
foreach (j = (i):nrow(df)) %dopar% {
distFrechet(1:168, df[i,], 1:168, df[j,])
#list(i=i,j=j)
}
stopCluster(cl)
dist
result.1 result.2 result.3 result.4 result.5 result.6 result.7 result.8 result.9 result.10 result.11 result.12
[1,] 0 0 0 0 0 0 0 0 0 0 0 0
[2,] 1480.152 1448.745 1358.875 1366.565 1397.582 1405.809 1328.405 1360.226 1331.069 1308.166 1503.361 1435.218
[3,] 1404.957 1407.091 1371.23 1411.818 1459.328 1356.241 1242.108 1303.164 1350.508 1447.664 1378.07 1441.903
[4,] 1407.35 1419.361 1371.301 1409.521 1488.415 1409.456 1321.429 1343.909 1312.71 1307.351 1503.293 1462.787
[5,] 1466.282 1272.466 1388.564 1345.999 1393.626 1352.437 1343.351 1411.275 1334.988 1421.106 1431.027 1377.316
[6,] 1378.757 1427.55 1382.05 1382.904 1431.42 1389.155 1281.994 1391.232 1320.862 1386.65 1469.759 1410.739
[7,] 1394.628 1446.476 1226.788 1324.619 1378.869 1404.817 1426.891 1378.571 1393.961 1244.002 1408.215 1369.311
[8,] 1369.582 1348.065 1410.982 1320.011 1479.091 1402.211 1405.336 1465.377 1431.344 1296.284 1491.27 1337.368
[9,] 1377.445 1409.933 1385.837 1327.244 1378.182 1408.215 1432.204 1427.572 1383.904 1413.975 1420.113 1408.19
[10,] 1339.582 1452.823 1448.128 1390.824 1372.587 1409.99 1386.355 1378.127 1402.049 1317.684 1295.564 0
[11,] 1403.069 1362.809 1462.443 1362.996 1476.378 1397.757 1341.169 1485.1 1348.429 1275.999 0 1435.218
[12,] 1429.874 1363.44 1374.134 1337.237 1529.611 1340.024 1426.915 1367.23 1358.045 0 1503.361 1441.903
[13,] 1473.359 1423.106 1410.257 1399.169 1438.352 1363.531 1346.741 1435.122 0 1308.166 1378.07 1462.787
[14,] 1513.195 1499.721 1448.198 1372.167 1531.107 1368.213 1315.645 0 1331.069 1447.664 1503.293 1377.316
[15,] 1469.946 1466.107 1442.905 1426.129 1379.798 1440.91 0 1360.226 1350.508 1307.351 1431.027 1410.739
[16,] 1330.253 1455.198 1384.926 1287.825 1481.265 0 1328.405 1303.164 1312.71 1421.106 1469.759 1369.311
[17,] 1347.726 1404.506 1410.232 1385.049 0 1405.809 1242.108 1343.909 1334.988 1386.65 1408.215 1337.368
[18,] 1444.1 1309.435 1495.531 0 1397.582 1356.241 1321.429 1411.275 1320.862 1244.002 1491.27 1408.19
[19,] 1446.756 1320.464 0 1366.565 1459.328 1409.456 1343.351 1391.232 1393.961 1296.284 1420.113 0
[20,] 1432.192 0 1358.875 1411.818 1488.415 1352.437 1281.994 1378.571 1431.344 1413.975 1295.564 1435.218
result.13 result.14 result.15 result.16 result.17 result.18 result.19 result.20
[1,] 0 0 0 0 0 0 0 0
[2,] 1357.679 1413.495 1434.734 1417.133 1417.112 1412.082 1328.449 0
[3,] 1493.827 1336.9 1335.423 1412.955 1322.08 1491.122 0 0
[4,] 1369.99 1342.195 1395.864 1367.652 1409.937 0 1328.449 0
[5,] 1383.691 1467.201 1316.58 1318.347 0 1412.082 0 0
[6,] 1566.664 1324.274 1424.907 0 1417.112 1491.122 1328.449 0
[7,] 1393.022 1461.161 0 1417.133 1322.08 0 0 0
[8,] 1387.437 0 1434.734 1412.955 1409.937 1412.082 1328.449 0
[9,] 0 1413.495 1335.423 1367.652 0 1491.122 0 0
[10,] 1357.679 1336.9 1395.864 1318.347 1417.112 0 1328.449 0
[11,] 1493.827 1342.195 1316.58 0 1322.08 1412.082 0 0
[12,] 1369.99 1467.201 1424.907 1417.133 1409.937 1491.122 1328.449 0
[13,] 1383.691 1324.274 0 1412.955 0 0 0 0
[14,] 1566.664 1461.161 1434.734 1367.652 1417.112 1412.082 1328.449 0
[15,] 1393.022 0 1335.423 1318.347 1322.08 1491.122 0 0
[16,] 1387.437 1413.495 1395.864 0 1409.937 0 1328.449 0
[17,] 0 1336.9 1316.58 1417.133 0 1412.082 0 0
[18,] 1357.679 1342.195 1424.907 1412.955 1417.112 1491.122 1328.449 0
[19,] 1493.827 1467.201 0 1367.652 1322.08 0 0 0
[20,] 1369.99 1324.274 1434.734 1318.347 1409.937 1412.082 1328.449 0
I left the distances calculations where i = j to facilitate visualization/understanding of the result.
This now just needs to be reshaped correctly:
do.call(cbind,lapply(1:ncol(dist),function(col) c(rep(0,col-1),dist[1:(nrow(dist)-col+1),col])))
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
0 0 0 0 0 0 0 0 0 0 0 0 0 0
1480.152 0 0 0 0 0 0 0 0 0 0 0 0 0
1404.957 1448.745 0 0 0 0 0 0 0 0 0 0 0 0
1407.35 1407.091 1358.875 0 0 0 0 0 0 0 0 0 0 0
1466.282 1419.361 1371.23 1366.565 0 0 0 0 0 0 0 0 0 0
1378.757 1272.466 1371.301 1411.818 1397.582 0 0 0 0 0 0 0 0 0
1394.628 1427.55 1388.564 1409.521 1459.328 1405.809 0 0 0 0 0 0 0 0
1369.582 1446.476 1382.05 1345.999 1488.415 1356.241 1328.405 0 0 0 0 0 0 0
1377.445 1348.065 1226.788 1382.904 1393.626 1409.456 1242.108 1360.226 0 0 0 0 0 0
1339.582 1409.933 1410.982 1324.619 1431.42 1352.437 1321.429 1303.164 1331.069 0 0 0 0 0
1403.069 1452.823 1385.837 1320.011 1378.869 1389.155 1343.351 1343.909 1350.508 1308.166 0 0 0 0
1429.874 1362.809 1448.128 1327.244 1479.091 1404.817 1281.994 1411.275 1312.71 1447.664 1503.361 0 0 0
1473.359 1363.44 1462.443 1390.824 1378.182 1402.211 1426.891 1391.232 1334.988 1307.351 1378.07 1435.218 0 0
1513.195 1423.106 1374.134 1362.996 1372.587 1408.215 1405.336 1378.571 1320.862 1421.106 1503.293 1441.903 1357.679 0
1469.946 1499.721 1410.257 1337.237 1476.378 1409.99 1432.204 1465.377 1393.961 1386.65 1431.027 1462.787 1493.827 1413.495
1330.253 1466.107 1448.198 1399.169 1529.611 1397.757 1386.355 1427.572 1431.344 1244.002 1469.759 1377.316 1369.99 1336.9
1347.726 1455.198 1442.905 1372.167 1438.352 1340.024 1341.169 1378.127 1383.904 1296.284 1408.215 1410.739 1383.691 1342.195
1444.1 1404.506 1384.926 1426.129 1531.107 1363.531 1426.915 1485.1 1402.049 1413.975 1491.27 1369.311 1566.664 1467.201
1446.756 1309.435 1410.232 1287.825 1379.798 1368.213 1346.741 1367.23 1348.429 1317.684 1420.113 1337.368 1393.022 1324.274
result.20 1432.192 1320.464 1495.531 1385.049 1481.265 1440.91 1315.645 1435.122 1358.045 1275.999 1295.564 1408.19 1387.437 1461.161
[,15] [,16] [,17] [,18] [,19] [,20]
0 0 0 0 0 0
0 0 0 0 0 0
0 0 0 0 0 0
0 0 0 0 0 0
0 0 0 0 0 0
0 0 0 0 0 0
0 0 0 0 0 0
0 0 0 0 0 0
0 0 0 0 0 0
0 0 0 0 0 0
0 0 0 0 0 0
0 0 0 0 0 0
0 0 0 0 0 0
0 0 0 0 0 0
0 0 0 0 0 0
1434.734 0 0 0 0 0
1335.423 1417.133 0 0 0 0
1395.864 1412.955 1417.112 0 0 0
1316.58 1367.652 1322.08 1412.082 0 0
result.20 1424.907 1318.347 1409.937 1491.122 1328.449 0

Related

Number of rows with exactly 1 numerical value in matrice (R)

I have a matrice that is as such:
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
[1,] 0 0 0 0 0 0 0 0 0
[2,] 0 0 0 0 0 0 0 0 0
[3,] 0 0 0 0 0 0 0 0 0
[4,] 0 0 0 0 0 0 0 1 1
[5,] 0 0 0 0 0 0 1 1 0
[6,] 0 0 0 0 0 1 0 0 0
[7,] 0 0 0 0 1 1 0 0 0
[8,] 0 0 0 0 1 0 0 0 0
[9,] 0 0 0 0 1 0 0 0 0
[10,] 0 0 0 0 1 1 0 0 0
[11,] 0 0 0 0 0 1 0 0 0
[12,] 0 0 0 0 0 1 1 1 1
[13,] 0 0 0 0 0 0 0 0 0
[14,] 0 0 0 0 0 0 0 0 0
[15,] 0 0 0 0 0 0 0 0 0
[16,] 0 0 0 0 0 0 0 0 0
[17,] 0 0 0 0 0 0 0 0 0
[,10] [,11] [,12] [,13] [,14] [,15] [,16] [,17]
[1,] 0 0 0 0 0 0 0 0
[2,] 0 0 0 0 0 0 0 0
[3,] 0 0 0 0 0 0 0 0
[4,] 1 1 0 0 0 0 0 0
[5,] 0 1 0 0 0 0 0 0
[6,] 0 1 0 0 0 0 0 0
[7,] 0 1 0 0 0 0 0 0
[8,] 0 1 0 0 0 1 0 0
[9,] 0 1 0 0 0 1 0 0
[10,] 1 1 0 0 0 1 0 0
[11,] 1 1 0 1 1 0 0 0
[12,] 1 1 1 1 0 0 0 0
[13,] 0 0 0 0 0 0 0 0
[14,] 0 0 0 0 0 0 0 0
[15,] 0 0 0 0 0 0 0 0
[16,] 0 0 0 0 0 0 0 0
[17,] 0 0 0 0 0 0 0 0
[,18]
[1,] 0
[2,] 0
[3,] 0
[4,] 0
[5,] 0
[6,] 0
[7,] 0
[8,] 0
[9,] 0
[10,] 0
[11,] 0
[12,] 0
[13,] 0
[14,] 0
[15,] 0
[16,] 0
[17,] 0
How can I count the number of rows with exactly 1 value, not more than one?
I've tried using nrow(imageMatrix[imageMatrix < 2])
and also tried converting the matrice to dataframe and then using nrow(dataframe_matrice[dataframe_matrice == 1,])
but it has been of no avail.
Here imageMatrix is the name of the matrice.
Can someone please offer me a hint on what I'm doing wrong with my first line of code in counting rows?
We may use rowSums on a logical matrix (imageMatrix == 1) and then create a logical vector == 1 and get the count with sum
sum(rowSums(imageMatrix == 1) == 1)
imageMatrix <2 is a logical matrix, when it is used to subset the original matrix, it returns a vector of values which doesn't have dim and thus nrow wouldn't work i.e.
nrow(1:5)
NULL

How can I change some element of matrix inside a function in R language automatically?

I have tried to write a function for this part of code and I can not mange because I am new to R
can someone help me?
I made a function like this :
m <- matrix(0, nrow=10, ncol=10) # Create an adjacency matrix
and I have changed the the element of it like below :
m[1,2] <- m[2,3] <- m[3,4] <-m[4,5]<-m[5,6]<-m[6,7]<-m[7,8] <-m[8,9]<-m[9,10]<-m[1,10] <- 1
but how can i do it automatically inside a function? to automatically iterate and change value?
I am not very sure about the logic for why m[1,10] is assigned one, for the others, you can do:
m <- matrix(0, nrow=10, ncol=10)
m[row(m) == col(m)-1] <- 1
m
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
[1,] 0 1 0 0 0 0 0 0 0 0
[2,] 0 0 1 0 0 0 0 0 0 0
[3,] 0 0 0 1 0 0 0 0 0 0
[4,] 0 0 0 0 1 0 0 0 0 0
[5,] 0 0 0 0 0 1 0 0 0 0
[6,] 0 0 0 0 0 0 1 0 0 0
[7,] 0 0 0 0 0 0 0 1 0 0
[8,] 0 0 0 0 0 0 0 0 1 0
[9,] 0 0 0 0 0 0 0 0 0 1
[10,] 0 0 0 0 0 0 0 0 0 0

Place a vector randomly inside a matrix in R

How can I place the vector a<-c(1,2,3,4,5,6) in a ramdom position in the matrix m<-matrix(0, nrow = 10, ncol = 10)?
The vector has to be together:
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
[1,] 0 0 0 0 0 0 0 0 0 0
[2,] 1 0 0 0 0 0 0 0 0 0
[3,] 2 0 0 0 0 0 0 0 0 0
[4,] 3 0 0 0 0 0 0 0 0 0
[5,] 4 0 0 0 0 0 0 0 0 0
[6,] 5 0 0 0 0 0 0 0 0 0
[7,] 6 0 0 0 0 0 0 0 0 0
[8,] 0 0 0 0 0 0 0 0 0 0
[9,] 0 0 0 0 0 0 0 0 0 0
[10,] 0 0 0 0 0 0 0 0 0 0
And it has to be horizontally, vertically or diagonally
I have tried:
start = sample.int(length(m), 1)
m[start:(start+length(a)-1)] = a
But it cannot take place the following:
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
[1,] 0 0 0 0 0 0 5 0 0 0
[2,] 0 0 0 0 0 0 6 0 0 0
[3,] 0 0 0 0 0 0 0 0 0 0
[4,] 0 0 0 0 0 0 0 0 0 0
[5,] 0 0 0 0 0 0 0 0 0 0
[6,] 0 0 0 0 0 0 0 0 0 0
[7,] 0 0 0 0 0 1 0 0 0 0
[8,] 0 0 0 0 0 2 0 0 0 0
[9,] 0 0 0 0 0 3 0 0 0 0
[10,] 0 0 0 0 0 4 0 0 0 0
Thanks
Sample a 1d index of the same size as a and then assign the vector to m at those indices:
m[sample.int(length(m), length(a))] <- a
m
# [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
# [1,] 0 6 0 0 0 0 0 0 0 0
# [2,] 0 0 0 0 0 4 0 0 0 0
# [3,] 0 3 0 1 0 0 0 0 5 0
# [4,] 0 0 0 0 0 0 0 0 0 0
# [5,] 0 0 0 0 0 0 0 0 0 0
# [6,] 0 0 0 0 0 0 0 0 0 0
# [7,] 0 0 0 0 0 0 0 0 0 0
# [8,] 0 0 0 0 0 0 0 0 2 0
# [9,] 0 0 0 0 0 0 0 0 0 0
#[10,] 0 0 0 0 0 0 0 0 0 0
If the vector needs to be continuous, you can sample the start index, and then assign with range index:
start = sample.int(length(m), 1)
m[start:(start+length(a)-1)] = a
m
# [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
# [1,] 0 0 0 0 0 0 0 0 0 0
# [2,] 0 0 0 0 0 0 0 0 0 0
# [3,] 0 0 0 0 0 0 0 0 0 0
# [4,] 0 0 0 0 0 0 0 0 0 0
# [5,] 1 0 0 0 0 0 0 0 0 0
# [6,] 2 0 0 0 0 0 0 0 0 0
# [7,] 3 0 0 0 0 0 0 0 0 0
# [8,] 4 0 0 0 0 0 0 0 0 0
# [9,] 5 0 0 0 0 0 0 0 0 0
#[10,] 6 0 0 0 0 0 0 0 0 0

How to iteratively fill a matrix with rnorm values using a for loop?

I have a question, I am trying to create a 10x10 matrix using the code below, where the first column contains 10 values from a normal distribution with std dev of .5 and a mean equal to j where j is a value 1:10. My code below produces the observed matrix, where only the final column is filled with values. What am I doing wrong? Thank you.
for(j in 1:10){
y<-matrix(0,ncol=10,nrow=10)
y[,j]<-rnorm(n=10,mean=j,sd=.5)
}
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
[1,] 0 0 0 0 0 0 0 0 0 10.857520
[2,] 0 0 0 0 0 0 0 0 0 10.490549
[3,] 0 0 0 0 0 0 0 0 0 9.888620
[4,] 0 0 0 0 0 0 0 0 0 9.495205
[5,] 0 0 0 0 0 0 0 0 0 9.674356
[6,] 0 0 0 0 0 0 0 0 0 10.810197
[7,] 0 0 0 0 0 0 0 0 0 10.337517
[8,] 0 0 0 0 0 0 0 0 0 9.715229
[9,] 0 0 0 0 0 0 0 0 0 9.902603
[10,] 0 0 0 0 0 0 0 0 0 8.972656

add a value randomly to a matrix

How can I randomly add a value to a matrix?
say I have:
mat <- matrix(0, 10, 10)
v = 5
how can I add randomly v to mat, 2 positions at a time? The output should look like this after a single iteration:
out
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
[1,] 0 0 0 0 0 0 0 0 0 0
[2,] 5 0 0 0 0 0 0 0 0 0
[3,] 0 0 0 0 0 0 0 0 0 0
[4,] 0 0 0 0 5 0 0 0 0 0
[5,] 0 0 0 0 0 0 0 0 0 0
[6,] 0 0 0 0 0 0 0 0 0 0
[7,] 0 0 0 0 0 0 0 0 0 0
[8,] 0 0 0 0 0 0 0 0 0 0
[9,] 0 0 0 0 0 0 0 0 0 0
[10,] 0 0 0 0 0 0 0 0 0 0
After another iteration, mat should have 2 more positions filled with the value in 'v'
You could use ?sample to randomly index your matrix:
idx <- sample(length(mat), size=2)
mat[idx] <- mat[idx] + v

Resources