Create table with subtotal per row and per column - r

I know how to create table in R using table, like this:
x <- rep(1:3,4)
y <- rep(1:4,3)
z<- cbind(x,y)
table(z[,1],z[,2])
1 2 3 4
1 1 1 1 1
2 1 1 1 1
3 1 1 1 1
How can I add the margin total of the table to making it looks like:
1 2 3 4
1 1 1 1 1 4
2 1 1 1 1 4
3 1 1 1 1 4
3 3 3 3

> a
[,1] [,2] [,3]
[1,] 1 1 1
[2,] 1 1 1
[3,] 1 1 1
> a <- cbind(a, rowSums(a))
> a <- rbind(a, colSums(a))
> a
[,1] [,2] [,3] [,4]
[1,] 1 3 1 5
[2,] 1 1 1 3
[3,] 1 1 1 3
[4,] 3 5 3 11
Another approach:
a <- addmargins(a, c(1, 2), sum)

Related

Get index locations of 0s which are completely surrounded by 1s

I have a matrix like so:
m <- matrix(c(1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,2,0,1,1,1,1,1,1,1,1,1), nrow = 12, ncol = 12)
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
[1,] 1 1 1 1 1 1 1 1 1 1 1 1
[2,] 1 1 2 1 1 2 1 1 2 1 1 2
[3,] 1 1 0 1 1 0 1 1 0 1 1 0
[4,] 1 1 1 1 1 1 1 1 1 1 1 1
[5,] 1 1 1 1 1 1 1 1 1 1 1 1
[6,] 1 1 1 1 1 1 1 1 1 1 1 1
[7,] 1 1 1 1 1 1 1 1 1 1 1 1
[8,] 1 1 1 1 1 1 1 1 1 1 1 1
[9,] 0 1 1 0 1 1 0 1 1 0 1 1
[10,] 1 1 1 1 1 1 1 1 1 1 1 1
[11,] 1 1 1 1 1 1 1 1 1 1 1 1
[12,] 1 1 1 1 1 1 1 1 1 1 1 1
and I want to find the index locations where 0 is completely surrounded by 1s in a 3x3 window. I can find all the zeros with:
which(m == 0) but this will also return places where a 2 surrounds a 0 such as at index location m[3,3]
w <- which(m == 0, arr.ind = TRUE)
w
# row col
# [1,] 9 1
# [2,] 3 3
# [3,] 9 4
# [4,] 3 6
# [5,] 9 7
# [6,] 3 9
# [7,] 9 10
# [8,] 3 12
We don't need to know which zeroes are on a boundary, so filter out those:
w <- w[ w[,1] > 1 & w[,1] < (nrow(m)-1) & w[,2] > 2 & w[,2] < (ncol(m)-1), ]
w
# row col
# [1,] 3 3
# [2,] 9 4
# [3,] 3 6
# [4,] 9 7
# [5,] 3 9
# [6,] 9 10
Now we can take those inner indices and build 3x3 submatrices into a list. Here are the first couple (of six):
Map(function(rn,cn) m[rn+(-1:1),cn+(-1:1)], w[,1], w[,2])[1:2]
# [[1]]
# [,1] [,2] [,3]
# [1,] 1 2 1
# [2,] 1 0 1
# [3,] 1 1 1
# [[2]]
# [,1] [,2] [,3]
# [1,] 1 1 1
# [2,] 1 0 1
# [3,] 1 1 1
Now we can just filter out the ones where there is only one non-1 entry in the matrix.
Filter(function(m3) sum(m3 != 1) == 1, Map(function(rn,cn) m[rn+(-1:1),cn+(-1:1)], w[,1], w[,2]))
# [[1]]
# [,1] [,2] [,3]
# [1,] 1 1 1
# [2,] 1 0 1
# [3,] 1 1 1
# [[2]]
# [,1] [,2] [,3]
# [1,] 1 1 1
# [2,] 1 0 1
# [3,] 1 1 1
# [[3]]
# [,1] [,2] [,3]
# [1,] 1 1 1
# [2,] 1 0 1
# [3,] 1 1 1
Since you need to just count the occurrences, add length(...) around that, and you have your answer.
(If you're curious, the reason I went with sum(m3!=1)==1 is because I wasn't certain if you wanted the border submatrices as well. If you wanted those, then the number of 1s would be reduced, not "8" as a typical 3x3 would be. But we know that there should always be exactly one non-1 in the submatrix: the center 0.)
To get just the indices that match,
w[mapply(function(rn,cn) sum(m[rn+(-1:1),cn+(-1:1)] != 1) == 1,
w[,1], w[,2]),]
# row col
# [1,] 9 4
# [2,] 9 7
# [3,] 9 10

Insert column vector at specific position in matrix dynamically

i have to put a new vector (in this example zero-vector) into an exisiting matrix. The problem is that I have an iterative process and the positions and number of vectors to insert change. I have not been able to come up with a function that a) works and b) is efficient enough for huge amounts of data.
A non-dynamic approach using simply cbind() is
old <- matrix(1,10,10) #original matrix
vec <- matrix(5,10,1) #vector 1 to insert
vec2 <- matrix(8,10,1) #vector 2 to insert
old
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
[1,] 1 1 1 1 1 1 1 1 1 1
[2,] 1 1 1 1 1 1 1 1 1 1
[3,] 1 1 1 1 1 1 1 1 1 1
[4,] 1 1 1 1 1 1 1 1 1 1
[5,] 1 1 1 1 1 1 1 1 1 1
[6,] 1 1 1 1 1 1 1 1 1 1
[7,] 1 1 1 1 1 1 1 1 1 1
[8,] 1 1 1 1 1 1 1 1 1 1
[9,] 1 1 1 1 1 1 1 1 1 1
[10,] 1 1 1 1 1 1 1 1 1 1
#assume that the positions to insert are 4 and 8
goal <- cbind(old[,c(1:3)],
vec,
old[,4:6], #attention, now old column 6 is new column 7
vec2,
old[,7:ncol(old)])
goal
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
[1,] 1 1 1 5 1 1 1 8 1 1 1 1
[2,] 1 1 1 5 1 1 1 8 1 1 1 1
[3,] 1 1 1 5 1 1 1 8 1 1 1 1
[4,] 1 1 1 5 1 1 1 8 1 1 1 1
[5,] 1 1 1 5 1 1 1 8 1 1 1 1
[6,] 1 1 1 5 1 1 1 8 1 1 1 1
[7,] 1 1 1 5 1 1 1 8 1 1 1 1
[8,] 1 1 1 5 1 1 1 8 1 1 1 1
[9,] 1 1 1 5 1 1 1 8 1 1 1 1
[10,] 1 1 1 5 1 1 1 8 1 1 1 1
However, I could not think of something that works with both changing positions and number of vectors to insert.
Any help is greatly appreciated, thank you very much.
cbind the vectors onto old and then reorder. If we knew that no were already sorted then we could replace sort(no) with no.
no <- c(4, 8)
vecs <- cbind(vec, vec2)
cbind(old, vecs)[, order(c(1:ncol(old), sort(no) - seq_along(no))) ]
Extending G. Grothendiecks approach and solving the ordering problem:
pos<-c(4,8)
pos<-pos-c(1:length(pos))
cbind(old, vec, vec2)[, order(c(1:ncol(old), c(pos)))]
Edit: Sorry, didn't see the edit of the answer above :)

R drop and element from a nested list using laply

I have nested list, the elements in the list are strings. So the structure is ...
> lapply(DVHlimits, function(x) laply (x, function(x) laply(x, function(x) length(x))))
[[1]]
1 2 3 4 5 6 7 8
[1,] 1 1 1 1 1 1 1 1
[2,] 1 1 1 1 1 1 1 1
[3,] 1 1 1 1 1 1 1 1
[4,] 1 1 1 1 1 1 1 1
[5,] 1 1 1 1 1 1 1 1
[[2]]
1 2 3 4 5 6 7
[1,] 1 1 1 1 1 1 1
[2,] 1 1 1 1 1 1 1
[3,] 1 1 1 1 1 1 1
[4,] 1 1 1 1 1 1 1
[[3]]
1 2 3 4 5 6 7
[1,] 1 1 1 1 1 1 1
[2,] 1 1 1 1 1 1 1
etc ......
What I want to do is drop the 8th element from each of sublists (where there is an 8th element) Can anyone tell me how to remove them?
Thank you
Thanks for your suggestions, this is the solution that I came up with.
# Create a function which can be used with lapply
cleanColls <- function(x) {
x <- x[c(-1, -8)]
}
DVHlimits <- lapply(DVHlimits, function(x) lapply( x, function(x) cleanColls(x)))
As you can probably figure out I have also dropped the first element of each sublist. The end result is each sublist has now only 6 elements, they are now all the same length which is what I wanted to achieve.

Creating a complicated empty list or data frame

I have a variable that is a list.
[[1]]
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
[1,] 1 1 1 1 1 1 1 4
[2,] 1 1 1 1 1 1 1 4
[3,] 1 1 1 1 1 1 1 4
[4,] 1 1 1 1 1 1 1 4
[5,] 1 1 1 1 1 1 1 4
[6,] 1 1 1 1 1 1 1 4
[7,] 1 1 1 1 1 1 1 4
[8,] 1 1 1 1 1 1 1 4
[9,] 1 1 1 1 1 1 1 4
[10,] 1 1 1 1 1 1 1 4
[11,] 1 1 1 1 1 1 1 4
[[2]]
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
[1,] 1 1 1 1 1 1 1 3
[2,] 1 1 1 1 1 1 1 4
[3,] 1 1 1 1 1 1 1 4
[4,] 1 1 1 1 1 1 1 3
[5,] 1 1 1 1 1 1 1 4
[6,] 1 1 1 1 1 1 1 4
[7,] 1 1 1 1 1 1 1 4
[8,] 1 1 1 1 1 1 1 4
[9,] 1 1 1 1 1 1 1 4
[10,] 1 1 1 1 1 1 1 4
[11,] 1 1 1 1 1 1 1 4
I need an empty variable which is same format as this variable.
My function will do some computation and will be put in the same location as this variable. However in the new variable I will not have 8th column.
Replicate your data
df = list(matrix(rep(1, 88), ncol = 8), matrix(rep(1, 88), ncol = 8))
Remove the 8th column from all sublist
new_df = lapply(df, function(x) x[,-8])
Remove the 8th column from all sublist and replace all the 1's with NA
new_df = lapply(df, function(x) replace(x[,-8], x[,-8] == 1, NA))

removing some special columns in large data set with R

I work with large data set(1200*10000),in my data sets some columns have a same value except in one or two point, I need to detect and delete this columns, for example in column “1846”:
> x[317:400,1846]
[1] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
[81] 2 2 **1** 2
Other row values(1:317 and 400:1200)=2.
How can I solve this?
For example in some part of My file (1200*10000),
x
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
[1,] 1 1 0 1 2 0 1 0 1 2 2 1
[2,] 1 1 0 1 2 0 1 0 1 2 1 1
[3,] 2 1 0 1 2 0 1 0 1 2 2 1
[4,] 1 2 0 1 2 0 1 0 1 2 2 2
[5,] 0 1 0 1 2 0 1 0 1 2 1 1
[6,] 2 0 0 1 2 0 1 2 0 2 1 2
[7,] 1 1 0 1 2 1 1 0 1 2 0 2
[8,] 0 1 0 1 2 0 1 0 1 2 0 0
[9,] 0 1 0 1 2 0 1 0 1 1 2 1
[10,] 1 1 0 1 2 0 1 0 1 2 1 1
I want to remove in my original data set columns like 3 to 10.
Continue from my answer in your first post,
detect.col <- function(
x,
n.diff=3 # the minimal number of unique values required per column
)
{
ret <- which(apply(x,2,function(e){length(unique(e))}) >= n.diff)
ret
}
x[,detect.col(x)]
I guess this is what you actually mean?
mm<-read.table(text=" [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
[1,] 1 1 0 1 2 0 1 0 1 2 2 1
[2,] 1 1 0 1 2 0 1 0 1 2 1 1
[3,] 2 1 0 1 2 0 1 0 1 2 2 1
[4,] 1 2 0 1 2 0 1 0 1 2 2 2
[5,] 0 1 0 1 2 0 1 0 1 2 1 1
[6,] 2 0 0 1 2 0 1 2 0 2 1 2
[7,] 1 1 0 1 2 1 1 0 1 2 0 2
[8,] 0 1 0 1 2 0 1 0 1 2 0 0
[9,] 0 1 0 1 2 0 1 0 1 1 2 1
[10,] 1 1 0 1 2 0 1 0 1 2 1 1", row.names=1, header=T)
now,
mm[,which(apply(mm,2,function (x) {length(unique(x))})==3)
output
X..1. X..2. X..11. X..12.
[1,] 1 1 2 1
[2,] 1 1 1 1
[3,] 2 1 2 1
[4,] 1 2 2 2
[5,] 0 1 1 1
[6,] 2 0 1 2
[7,] 1 1 0 2
[8,] 0 1 0 0
[9,] 0 1 2 1
[10,] 1 1 1 1
I am not certain, but I think you want to delete any columns that contain a single value in n-1 or n-2 rows where n is the number of rows. If so, then you would want to delete:
column x2 in my.data because it contains 9 '1's and one '0' and
column x5 in my.data because it contains 8 '2's and two '1's.
The code below does that. Sorry if this is not what you are trying to do. I am not sure whether this code would perform well with a huge data frame.
my.data <- read.table(text='
x1 x2 x3 x4 x5 x6
1 1 2 2 2 1
1 1 2 1 1 2
1 1 2 2 2 3
1 1 2 2 2 4
1 1 2 1 2 5
1 1 2 2 2 6
1 0 2 2 2 7
1 1 2 1 2 8
1 1 2 2 1 9
1 1 2 2 2 10
', header = TRUE)
my.data
my.summary <- as.data.frame.matrix(table( rep(colnames(my.data),
each=nrow(my.data)), unlist(my.data)))
my.summary
delete.these <- which(my.summary == (nrow(my.data)-2) |
my.summary == (nrow(my.data)-1), arr.ind = TRUE)[,1]
my.data[,-delete.these]
x1 x3 x4 x6
1 1 2 2 1
2 1 2 1 2
3 1 2 2 3
4 1 2 2 4
5 1 2 1 5
6 1 2 2 6
7 1 2 2 7
8 1 2 1 8
9 1 2 2 9
10 1 2 2 10
This will keep only columns with one distinct value, assuming your data.frame is named x:
keepIndex <- apply(
x,
2,
FUN = function(column) {
return(length(unique(column)) == 1)
})
x <- x[, keepIndex]
This Should work,
m<-matrix(2,nrow=100, ncol=100) #making dummy matrix m
m[sample(1:100,10), sample(1:100,10)]<-1 #replacing some random row and col to 1
m[,-which(colSums(m==1)>0)] #getting rid of cols with 1
A solution based on Boolean indexing.
> x<-cbind(c(1,1,1,1),c(1,1,1,2),c(1,1,1,1))
> x
[,1] [,2] [,3]
[1,] 1 1 1
[2,] 1 1 1
[3,] 1 1 1
[4,] 1 2 1
> x[,colSums(x!=x[1,])==0]
[,1] [,2]
[1,] 1 1
[2,] 1 1
[3,] 1 1
[4,] 1 1
If your data is stored in a data frame named df:
df[ ,sapply(df, function(x) all(x[1] == x[-1]))]
Either search the whole data or a subset of it:
detect.col <- function(
x,row.from=1,row.to=nrow(x),col.from=1,col.to=ncol(x),
n.diff=3 # the minimal number of unique values required per column
)
{
tmp.x <- x[row.from:row.to,col.from:col.to]
ret <- which(apply(tmp.x,2,function(e){length(unique(e))}) < n.diff )
if(length(ret)){
ret <- ret+col.from-1
}
ret
}
## search the whole
detect.col(x) # columns to remove
## Or only search within a range, like in your case
row.from <- 317
row.to <- 400
col.from <- 1000
col.to <- 2000
col.to.remove <- detect.col(x,row.from,row.to,col.from,col.to)
x[,-col.to.remove] # print those to keep

Resources