So I am trying to sum the rows of a matrix, and there are inf's within it. How do I sum the row, omitting the inf's?
Multiply your matrix by the result of is.finite(m) and call rowSums on the product with na.rm=TRUE. This works because Inf*0 is NaN.
m <- matrix(c(1:3,Inf,4,Inf,5:6),4,2)
rowSums(m*is.finite(m),na.rm=TRUE)
A[is.infinite(A)]<-NA
rowSums(A,na.rm=TRUE)
Some benchmarking for comparison:
library(microbenchmark)
rowSumsMethod<-function(A){
A[is.infinite(A)]<-NA
rowSums(A,na.rm=TRUE)
}
applyMethod<-function(A){
apply( A , 1 , function(x){ sum(x[!is.infinite(x)])})
}
rowSumsMethod2<-function(m){
rowSums(m*is.finite(m),na.rm=TRUE)
}
rowSumsMethod0<-function(A){
A[is.infinite(A)]<-0
rowSums(A)
}
A1 <- matrix(sample(c(1:5, Inf), 50, TRUE), ncol=5)
A2 <- matrix(sample(c(1:5, Inf), 5000, TRUE), ncol=5)
microbenchmark(rowSumsMethod(A1),rowSumsMethod(A2),
rowSumsMethod0(A1),rowSumsMethod0(A2),
rowSumsMethod2(A1),rowSumsMethod2(A2),
applyMethod(A1),applyMethod(A2))
Unit: microseconds
expr min lq median uq max neval
rowSumsMethod(A1) 13.063 14.9285 16.7950 19.3605 1198.450 100
rowSumsMethod(A2) 212.726 220.8905 226.7220 240.7165 307.427 100
rowSumsMethod0(A1) 11.663 13.9960 15.3950 18.1940 112.894 100
rowSumsMethod0(A2) 103.098 109.6290 114.0610 122.9240 159.545 100
rowSumsMethod2(A1) 8.864 11.6630 12.5960 14.6955 49.450 100
rowSumsMethod2(A2) 57.380 60.1790 63.4450 67.4100 81.172 100
applyMethod(A1) 78.839 84.4380 92.1355 99.8330 181.005 100
applyMethod(A2) 3996.543 4221.8645 4338.0235 4552.3825 6124.735 100
So Joshua's method wins! And apply method is clearly slower than two other methods (relatively speaking of course).
I'd use apply and is.infinite in order to avoid replacing Inf values by NA as in #Hemmo's answer.
> set.seed(1)
> Mat <- matrix(sample(c(1:5, Inf), 50, TRUE), ncol=5)
> Mat # this is an example
[,1] [,2] [,3] [,4] [,5]
[1,] 2 2 Inf 3 5
[2,] 3 2 2 4 4
[3,] 4 5 4 3 5
[4,] Inf 3 1 2 4
[5,] 2 5 2 5 4
[6,] Inf 3 3 5 5
[7,] Inf 5 1 5 1
[8,] 4 Inf 3 1 3
[9,] 4 3 Inf 5 5
[10,] 1 5 3 3 5
> apply(Mat, 1, function(x) sum(x[!is.infinite(x)]))
[1] 12 15 21 10 18 16 12 11 17 17
Try this...
m <- c( 1 ,2 , 3 , Inf , 4 , Inf ,5 )
sum(m[!is.infinite(m)])
Or
m <- matrix( sample( c(1:10 , Inf) , 100 , rep = TRUE ) , nrow = 10 )
sums <- apply( m , 1 , FUN = function(x){ sum(x[!is.infinite(x)])})
> m
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
[1,] 8 9 7 Inf 9 2 2 6 1 Inf
[2,] 8 7 4 5 9 5 8 4 7 10
[3,] 7 9 3 4 7 3 3 6 9 4
[4,] 7 Inf 2 6 4 8 3 1 9 9
[5,] 4 Inf 7 5 9 5 3 5 9 9
[6,] 7 3 7 Inf 7 3 7 3 7 1
[7,] 5 7 2 1 Inf 1 9 8 1 5
[8,] 4 Inf 10 Inf 8 10 4 9 7 2
[9,] 10 7 9 7 2 Inf 4 Inf 4 6
[10,] 9 4 6 3 9 6 6 5 1 8
> sums
[1] 44 67 55 49 56 45 39 54 49 57
This is a "non-apply" and non-destructive approach:
rowSums( matrix(match(A, A[is.finite(A)]), nrow(A)), na.rm=TRUE)
[1] 2 4
Although it is reasonably efficient, it is not as fast as Johsua's multiplication method.
Related
I was given this matrix and i have to create it using exclusively matrix operations.
0 1 2 3 4
1 2 3 4 5
2 3 4 5 6
3 4 5 6 7
4 5 6 7 8
So this is what i have done, but im not sure if this is actually considered as matrix operations in order to create my MATNew
mat1 <- matrix(c(0:4), nrow=1, ncol=5) ; print(mat1)
mat2 <- matrix(c(1:5), nrow=1, ncol=5) ; print(mat2)
mat3 <- matrix(c(2:6), nrow=1, ncol=5) ; print(mat3)
mat4 <- matrix(c(3:7), nrow=1, ncol=5) ; print(mat4)
mat5 <- matrix(c(4:8), nrow=1, ncol=5) ; print(mat5)
MATNew <- matrix(cbind(mat1,mat2,mat3,mat4,mat5), 5, 5) ; print(MATNew)
outer(0:4, 0:4, `+`)
# [,1] [,2] [,3] [,4] [,5]
# [1,] 0 1 2 3 4
# [2,] 1 2 3 4 5
# [3,] 2 3 4 5 6
# [4,] 3 4 5 6 7
# [5,] 4 5 6 7 8
You can use the row() and col() functions (if this is homework, make sure to cite your sources ...)
m <- matrix(NA, 5, 5)
row(m) + col(m)-2
A matrix-only solution
matrix( rep(0:8,5), 5 )[,1:9%%2==1]
[,1] [,2] [,3] [,4] [,5]
[1,] 0 1 2 3 4
[2,] 1 2 3 4 5
[3,] 2 3 4 5 6
[4,] 3 4 5 6 7
[5,] 4 5 6 7 8
Try this?
> embed(0:8, 5)[, 5:1]
[,1] [,2] [,3] [,4] [,5]
[1,] 0 1 2 3 4
[2,] 1 2 3 4 5
[3,] 2 3 4 5 6
[4,] 3 4 5 6 7
[5,] 4 5 6 7 8
Benchmark for kicks:
microbenchmark::microbenchmark(
outer = outer(0:4, 0:4, `+`),
rowcol = {m <- matrix(NA, 5, 5); row(m) + col(m)-2},
matrix1 = matrix(rep(0:8,5), 5)[,1:9%%2==1],
matrix2 = matrix(rep(0:8,5), 5)[,c(TRUE, FALSE)],
matrix3 = matrix(rep(0:4, each = 5) + 0:4, 5),
sequence = matrix(sequence(rep(5, 5), 0:4), 5), times = 1e5)
#> Unit: microseconds
#> expr min lq mean median uq max neval
#> outer 5.100 6.200 8.706730 6.501 7.202 6628.401 1e+05
#> rowcol 2.900 3.801 5.097179 4.002 4.402 4985.501 1e+05
#> matrix1 2.900 3.701 5.159770 4.000 4.400 3621.901 1e+05
#> matrix2 2.400 3.002 4.063637 3.201 3.601 2395.001 1e+05
#> matrix3 2.000 2.600 3.535451 2.701 3.001 2517.101 1e+05
#> sequence 3.701 4.601 6.179183 4.901 5.401 3303.102 1e+05
I have a matrix:
mat <- matrix(c(2,11,3,1,2,4,55,65,12,4,6,6,7,9,3,23,16,77,5,5,7),ncol = 3, byrow = TRUE)
[,1] [,2] [,3]
[1,] 2 11 3
[2,] 1 2 4
[3,] 55 65 12
[4,] 4 6 6
[5,] 7 9 3
[6,] 23 16 77
[7,] 5 5 7
I want to add a column with rows index. This index will starts at 1 and repeats the same index, until it arrived to a row where the rowsums is > 100 to move to the next value.
Indx[,2][,3][,4]
[1,] 1 2 11 3
[2,] 1 1 2 4
[3,] 2 55 65 12
[4,] 3 4 6 6
[5,] 3 7 9 3
[6,] 4 23 16 77
[7,] 5 5 5 7
Using rle:
matRle <- rle(rowSums(mat) > 100)$lengths
cbind(rep(seq(length(matRle)), matRle), mat)
# [,1] [,2] [,3] [,4]
# [1,] 1 2 11 3
# [2,] 1 1 2 4
# [3,] 2 55 65 12
# [4,] 3 4 6 6
# [5,] 3 7 9 3
# [6,] 4 23 16 77
# [7,] 5 5 5 7
A solution using dplyr.
library(dplyr)
mat2 <- mat %>%
as.data.frame() %>%
mutate(Indx = cumsum(rowSums(dat) > 100 | lag(rowSums(dat) > 100, default = TRUE))) %>%
select(Indx, paste0("V", 1:ncol(mat))) %>%
as.matrix()
mat2
# Indx V1 V2 V3
# [1,] 1 2 11 3
# [2,] 1 1 2 4
# [3,] 2 55 65 12
# [4,] 3 4 6 6
# [5,] 3 7 9 3
# [6,] 4 23 16 77
# [7,] 5 5 5 7
cbind(cumsum(replace(a<-rowSums(mat)>100,which(a==1)+1,1))+1,mat)
[,1] [,2] [,3] [,4]
[1,] 1 2 11 3
[2,] 1 1 2 4
[3,] 2 55 65 12
[4,] 3 4 6 6
[5,] 3 7 9 3
[6,] 4 23 16 77
[7,] 5 5 5 7
What does this do??:
first obtain the rowSums which are greater than 100
a<-rowSums(mat)>100
Then the next row for every row>100, should have the next index. Thus do a replace and cumsum:
cumsum(replace(a,which(a==1)+1,1))
Now you will realize that this starts from zero, so you add 1.
We could do this with rleid from data.table
library(data.table)
cbind(Indx = rleid(rowSums(mat) > 100), mat)
# Indx
#[1,] 1 2 11 3
#[2,] 1 1 2 4
#[3,] 2 55 65 12
#[4,] 3 4 6 6
#[5,] 3 7 9 3
#[6,] 4 23 16 77
#[7,] 5 5 5 7
I am trying to create a matrix by drawing random block rows from another matrix. I have managed to do so with a loop.
set.seed(1)
a_matrix <- matrix(1:10,10,5) # the matrix with original sample
b_matrix <- matrix(NA,10, 5) # a matrix to store the bootstrap sample
S2<- seq(from =1 , to = 10, by =2) #[1] 1 3 5 7 9
m <- 2 # block size of m
for (r in S2){ start_point<-sample(1:(nrow(a_matrix)-1), 1, replace=T)
#randomly choose a number 1 to length of a_matrix -1
b_block <- a_matrix[start_point:(start_point+(m-1)), 1:ncol(a_matrix)]
# randomly select blocks from matrix a
b_matrix[r,]<-as.matrix((b_block)[1,])
b_matrix[(r+1),]<-as.matrix((b_block)[2,]) # put the blocks into matrix b
}
b_matrix
#we now have a b_matrix that is made of random blocks (size m=2)
#of the original a_matrix
The loop method works but it is clearly not very efficient and it is not possible to extend it to other block size (for e.g. having a blocksize of 3) .What is a cleaner and expandable approach ? Thanks in advance
Here I tried to clean it up a bit and generalize the use of m:
random_block_sample <- function(a_matrix, m = 2L) {
N <- nrow(a_matrix)
stopifnot(m <= N)
n <- ceiling(N / m)
s <- sample(N - m + 1L, n, TRUE) # start_point
i <- unlist(lapply(s, seq, length.out = m))
b_matrix <- a_matrix[i, , drop = FALSE]
head(b_matrix, N)
}
set.seed(1L)
random_block_sample(a_matrix, m = 2L)
# [,1] [,2] [,3] [,4] [,5]
# [1,] 3 3 3 3 3
# [2,] 4 4 4 4 4
# [3,] 4 4 4 4 4
# [4,] 5 5 5 5 5
# [5,] 6 6 6 6 6
# [6,] 7 7 7 7 7
# [7,] 9 9 9 9 9
# [8,] 10 10 10 10 10
# [9,] 2 2 2 2 2
# [10,] 3 3 3 3 3
set.seed(1L)
random_block_sample(a_matrix, m = 5L)
# [,1] [,2] [,3] [,4] [,5]
# [1,] 2 2 2 2 2
# [2,] 3 3 3 3 3
# [3,] 4 4 4 4 4
# [4,] 5 5 5 5 5
# [5,] 6 6 6 6 6
# [6,] 3 3 3 3 3
# [7,] 4 4 4 4 4
# [8,] 5 5 5 5 5
# [9,] 6 6 6 6 6
# [10,] 7 7 7 7 7
Hi I have a dataset with 4 columns (all numeric) and I am replacing missing value with mean value of column. Below code is neither giving error nor replacing value.
mi <- function(x){
for( col in 1:ncol(x)){
for( row in 1:nrow(x)){
ifelse(is.na(x[row, col]), x[row,col] <- mean(x[, col], na.rm = TRUE), x[row, col])
}
}
}
please suggest..
Here's a pretty straightforward approach (with some reproducible sample data):
Some sample data:
set.seed(1)
df <- data.frame(matrix(sample(c(NA, 1:10), 100, TRUE), ncol = 4))
head(df)
# X1 X2 X3 X4
# 1 2 4 5 9
# 2 4 NA 9 9
# 3 6 4 4 4
# 4 9 9 2 8
# 5 2 3 NA 10
# 6 9 5 1 4
Let's make a copy and replace NA with the column means.
df2 <- df
df2[] <- lapply(df2, function(x) { x[is.na(x)] <- mean(x, na.rm=TRUE); x })
head(df2)
# X1 X2 X3 X4
# 1 2 4.000000 5 9
# 2 4 5.956522 9 9
# 3 6 4.000000 4 4
# 4 9 9.000000 2 8
# 5 2 3.000000 5 10
# 6 9 5.000000 1 4
Verify the correct values were inserted. Compare df2[2, 2] with the following:
mean(df$X2, na.rm = TRUE)
# [1] 5.956522
The argument x is a copy of the original. You need to return the modified value:
mi <- function(x){
for( col in 1:ncol(x)){
for( row in 1:nrow(x)){
ifelse(is.na(x[row, col]), x[row,col] <- mean(x[, col], na.rm = TRUE), x[row, col])
}
}
return(x)
}
Or like this:
x <- matrix(sample(c(NA,1:10),100,TRUE),nrow=10)
x
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
[1,] 7 7 1 6 7 3 10 4 NA 2
[2,] 3 2 7 9 1 4 2 5 10 1
[3,] 10 4 2 8 7 4 1 8 8 3
[4,] 7 7 6 9 2 6 NA 6 6 10
[5,] 1 NA 5 9 9 4 NA 5 8 2
[6,] 4 4 9 3 9 4 5 NA 5 1
[7,] NA 2 2 2 9 2 10 NA 8 7
[8,] 10 8 7 1 5 2 9 7 10 5
[9,] 6 3 10 9 8 6 7 10 3 10
[10,] 7 9 5 2 2 9 5 6 NA 9
means <- colMeans(x,na.rm=TRUE)
for(i in 1:ncol(x)){
x[is.na(x[,i]),i] <- means[i]
}
x
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
[1,] 7.000000 7.000000 1 6 7 3 10.000 4.000 7.25 2
[2,] 3.000000 2.000000 7 9 1 4 2.000 5.000 10.00 1
[3,] 10.000000 4.000000 2 8 7 4 1.000 8.000 8.00 3
[4,] 7.000000 7.000000 6 9 2 6 6.125 6.000 6.00 10
[5,] 1.000000 5.111111 5 9 9 4 6.125 5.000 8.00 2
[6,] 4.000000 4.000000 9 3 9 4 5.000 6.375 5.00 1
[7,] 6.111111 2.000000 2 2 9 2 10.000 6.375 8.00 7
[8,] 10.000000 8.000000 7 1 5 2 9.000 7.000 10.00 5
[9,] 6.000000 3.000000 10 9 8 6 7.000 10.000 3.00 10
[10,] 7.000000 9.000000 5 2 2 9 5.000 6.000 7.25 9
This is not quite exactly what you are looking for but might be useful. This function substitute all NA with median (in every column):
require(randomForest)
x <- matrix(sample(c(NA,1:10),100,TRUE),nrow=10)
na.roughfix(x)
I am trying to create a block circulant matrix in R. The structure of a block circulant matrix is given below.
C0 C1 ... Cn-1
Cn-1 C0 C1 ... Cn-2
Cn-2 Cn-1 .... Cn-3
and so on
I have the blocks
C0 .... Cn-1
What is the easiest way to create the matrix. Is there a function already available?
Thanks for a challenging question! Here is a solution summing kronecker products of your matrices with sub- and super-diagonals.
Sample data, a list of matrices:
C <- lapply(1:3, matrix, nrow = 2, ncol = 2)
My solution:
bcm <- function(C) {
require(Matrix)
n <- length(C)
Reduce(`+`, lapply((-n+1):(n-1),
function(i) kronecker(as.matrix(bandSparse(n, n, -i)),
C[[1 + (i %% n)]])))
}
bcm(C)
# [,1] [,2] [,3] [,4] [,5] [,6]
# [1,] 1 1 3 3 2 2
# [2,] 1 1 3 3 2 2
# [3,] 2 2 1 1 3 3
# [4,] 2 2 1 1 3 3
# [5,] 3 3 2 2 1 1
# [6,] 3 3 2 2 1 1
I don't know if this is particularly efficient, but as I interpret your question it does what you want.
rotList <- function(L,n) {
if (n==0) return(L)
c(tail(L,n),head(L,-n))
}
rowFun <- function(n,matList) do.call(rbind,rotList(matList,n))
bcMat <- function(matList) {
n <- length(matList)
do.call(cbind,lapply(0:(n-1),rowFun,matList))
}
Example:
bcMat(list(diag(3),matrix(1:9,nrow=3),matrix(4,nrow=3,ncol=3)))
I think what you are looking for is circulant.matrix from the lgcp package.
If x is a matrix whose columns are the bases of the sub-blocks of a
block circulant matrix, then this function returns the block circulant
matrix of interest.
eg
x <- matrix(1:8,ncol=4)
circulant(x)
# [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
# [1,] 1 2 3 4 5 6 7 8
# [2,] 2 1 4 3 6 5 8 7
# [3,] 7 8 1 2 3 4 5 6
# [4,] 8 7 2 1 4 3 6 5
# [5,] 5 6 7 8 1 2 3 4
# [6,] 6 5 8 7 2 1 4 3
# [7,] 3 4 5 6 7 8 1 2
# [8,] 4 3 6 5 8 7 2 1
Alternative approach
Here is a highly inefficient approach using kronecker and Reduce
bcirc <- function(list.blocks){
P <- lapply(seq_along(list.blocks), function(x,y) x ==y, x = circulant(seq_along(list.blocks)))
Reduce('+',Map(P = P, A=list.blocks, f = function(P,A) kronecker(P,A)))
}
benchmarking with #flodel and #Ben Bolker
lbirary(microbenchmark)
microbenchmark(bcm(C), bcirc(C), bcMat(C))
Unit: microseconds
expr min lq median uq max neval
bcm(C) 10836.719 10925.7845 10992.8450 11141.1240 21622.927 100
bcirc(C) 444.983 455.7275 479.5790 487.0370 569.105 100
bcMat(C) 288.558 296.4350 309.8945 348.4215 2190.231 100
Is something like this what you are looking for?
> vec <- 1:4
> sapply(rev(seq_along(vec)),function(x) c(tail(vec,x),head(vec,-x)) )
[,1] [,2] [,3] [,4]
[1,] 1 2 3 4
[2,] 2 3 4 1
[3,] 3 4 1 2
[4,] 4 1 2 3