adding data frame of counts to template data frame in R

adding data frame of counts to template data frame in R - r

I have data.frames of counts such as:
a <- data.frame(id=1:10,
"1"=c(rep(1,3),rep(0,7)),
"3"=c(rep(0,4),rep(1,6)))
names(a)[2:3] <- c("1","3")
a
> a
id 1 3
1 1 1 0
2 2 1 0
3 3 1 0
4 4 0 0
5 5 0 1
6 6 0 1
7 7 0 1
8 8 0 1
9 9 0 1
10 10 0 1
and a template data.frame such as
m <- data.frame(id=1:10,
"1"= rep(0,10),
"2"= rep(0,10),
"3"= rep(0,10),
"4"= rep(0,10))
names(m)[-1] <- 1:4
m
> m
id 1 2 3 4
1 1 0 0 0 0
2 2 0 0 0 0
3 3 0 0 0 0
4 4 0 0 0 0
5 5 0 0 0 0
6 6 0 0 0 0
7 7 0 0 0 0
8 8 0 0 0 0
9 9 0 0 0 0
10 10 0 0 0 0
and I want to add the values of a into the template m
in the appropraite columns, leaving the rest as 0.
This is working but I would like to know
if there is a more elegant way, perhaps using plyr or data.table:
provi <- rbind.fill(a,m)
provi[is.na(provi)] <- 0
mnew <- aggregate(provi[,-1],by=list(provi$id),FUN=sum)
names(mnew)[1] <- "id"
mnew <- mnew[c(1,order(names(mnew)[-1])+1)]
mnew
> mnew
id 1 2 3 4
1 1 1 0 0 0
2 2 1 0 0 0
3 3 1 0 0 0
4 4 0 0 0 0
5 5 0 0 1 0
6 6 0 0 1 0
7 7 0 0 1 0
8 8 0 0 1 0
9 9 0 0 1 0
10 10 0 0 1 0

I guess the concise option would be:
m[names(a)] <- a
Or we match the column names ('i1'), use that to create the column index with max.col, cbind with the row index ('i2'), and a similar step can be done to create 'i3'. We change the values in 'm' corresponding to 'i2' with the 'a' values based on 'i3'.
i1 <- match(names(a)[-1], names(m)[-1])
i2 <- cbind(m$id, i1[max.col(a[-1], 'first')]+1L)
i3 <- cbind(a$id, max.col(a[-1], 'first')+1L)
m[i2] <- a[i3]
m
# id 1 2 3 4
#1 1 1 0 0 0
#2 2 1 0 0 0
#3 3 1 0 0 0
#4 4 0 0 0 0
#5 5 0 0 1 0
#6 6 0 0 1 0
#7 7 0 0 1 0
#8 8 0 0 1 0
#9 9 0 0 1 0
#10 10 0 0 1 0
A data.table option would be melt/dcast
library(data.table)
dcast(melt(setDT(a), id.var='id')[,
variable:= factor(variable, levels=1:4)],
id~variable, value.var='value', drop=FALSE, fill=0)
# id 1 2 3 4
# 1: 1 1 0 0 0
# 2: 2 1 0 0 0
# 3: 3 1 0 0 0
# 4: 4 0 0 0 0
# 5: 5 0 0 1 0
# 6: 6 0 0 1 0
# 7: 7 0 0 1 0
# 8: 8 0 0 1 0
# 9: 9 0 0 1 0
#10: 10 0 0 1 0
A similar dplyr/tidyr option would be
library(dplyr)
library(tidyr)
gather(a, Var, Val, -id) %>%
mutate(Var=factor(Var, levels=1:4)) %>%
spread(Var, Val, drop=FALSE, fill=0)

You could use merge, too:
res <- suppressWarnings(merge(a, m, by="id", suffixes = c("", "")))
(res[, which(!duplicated(names(res)))][, names(m)])
# id 1 2 3 4
# 1 1 1 0 0 0
# 2 2 1 0 0 0
# 3 3 1 0 0 0
# 4 4 0 0 0 0
# 5 5 0 0 1 0
# 6 6 0 0 1 0
# 7 7 0 0 1 0
# 8 8 0 0 1 0
# 9 9 0 0 1 0
# 10 10 0 0 1 0

Related

How to add multiple columns in R with different condition for each column?

Here is my data set. I would like to add 5 new columns to mydata with 5 different conditions.
mydata=data.frame(sub=rep(c(1:4),c(3,4,5,5)),t=c(1:3,1:4,1:5,1:5),
y.val=c(10,20,13,
5,7,8,0,
45,17,25,12,10,
40,0,0,5,8))
mydata
sub t y.val
1 1 1 10
2 1 2 20
3 1 3 13
4 2 1 5
5 2 2 7
6 2 3 8
7 2 4 0
8 3 1 45
9 3 2 17
10 3 3 25
11 3 4 12
12 3 5 10
13 4 1 40
14 4 2 0
15 4 3 0
16 4 4 5
17 4 5 8
I would like to add the following 5 (max of 't' column) columns as
mydata$It1=ifelse(mydata$t==1 & mydata$y.val>0,1,0)
mydata$It2=ifelse(mydata$t==2 & mydata$y.val>0,1,0)
mydata$It3=ifelse(mydata$t==3 & mydata$y.val>0,1,0)
mydata$It4=ifelse(mydata$t==4 & mydata$y.val>0,1,0)
mydata$It5=ifelse(mydata$t==5 & mydata$y.val>0,1,0)
Here is the expected outcome.
> mydata
sub t y.val It1 It2 It3 It4 It5
1 1 1 10 1 0 0 0 0
2 1 2 20 0 1 0 0 0
3 1 3 13 0 0 1 0 0
4 2 1 5 1 0 0 0 0
5 2 2 7 0 1 0 0 0
6 2 3 8 0 0 1 0 0
7 2 4 0 0 0 0 0 0
8 3 1 45 1 0 0 0 0
9 3 2 17 0 1 0 0 0
10 3 3 25 0 0 1 0 0
11 3 4 12 0 0 0 1 0
12 3 5 10 0 0 0 0 1
13 4 1 40 1 0 0 0 0
14 4 2 0 0 0 0 0 0
15 4 3 0 0 0 0 0 0
16 4 4 5 0 0 0 1 0
17 4 5 8 0 0 0 0 1
I appreciate your help if it can be written as a function using for loop or any other technique.

You could use sapply/lapply
n <- seq_len(5)
mydata[paste0("It", n)] <- +(sapply(n, function(x) mydata$t==x & mydata$y.val>0))
mydata
# sub t y.val It1 It2 It3 It4 It5
#1 1 1 10 1 0 0 0 0
#2 1 2 20 0 1 0 0 0
#3 1 3 13 0 0 1 0 0
#4 2 1 5 1 0 0 0 0
#5 2 2 7 0 1 0 0 0
#6 2 3 8 0 0 1 0 0
#7 2 4 0 0 0 0 0 0
#8 3 1 45 1 0 0 0 0
#9 3 2 17 0 1 0 0 0
#10 3 3 25 0 0 1 0 0
#11 3 4 12 0 0 0 1 0
#12 3 5 10 0 0 0 0 1
#13 4 1 40 1 0 0 0 0
#14 4 2 0 0 0 0 0 0
#15 4 3 0 0 0 0 0 0
#16 4 4 5 0 0 0 1 0
#17 4 5 8 0 0 0 0 1
mydata$t==x & mydata$y.val>0 returns a logical value of TRUE/FALSE based on condition. The + changes those logical values to 1/0 respectively. (Try +c(FALSE, TRUE)). It avoids using ifelse i.e ifelse(condition, 1, 0).

Here's another approach based on multiplying a model matrix by the logical y.val > 0.
df <- cbind(mydata[1:3], model.matrix(~ factor(t) + 0, mydata)*(mydata$y.val>0))
Which gives:
sub t y.val factor.t.1 factor.t.2 factor.t.3 factor.t.4 factor.t.5
1 1 1 10 1 0 0 0 0
2 1 2 20 0 1 0 0 0
3 1 3 13 0 0 1 0 0
4 2 1 5 1 0 0 0 0
5 2 2 7 0 1 0 0 0
6 2 3 8 0 0 1 0 0
7 2 4 0 0 0 0 0 0
8 3 1 45 1 0 0 0 0
9 3 2 17 0 1 0 0 0
10 3 3 25 0 0 1 0 0
11 3 4 12 0 0 0 1 0
12 3 5 10 0 0 0 0 1
13 4 1 40 1 0 0 0 0
14 4 2 0 0 0 0 0 0
15 4 3 0 0 0 0 0 0
16 4 4 5 0 0 0 1 0
17 4 5 8 0 0 0 0 1
To clean up the names you can do:
names(df) <- sub("factor.t.", "It", names(df), fixed = TRUE)

You can use sapply to compare each t for equality against 1:5 and combine this with an & of y.val>0.
within(mydata, It <- +(sapply(1:5, `==`, t) & y.val>0))
# sub t y.val It.1 It.2 It.3 It.4 It.5
#1 1 1 10 1 0 0 0 0
#2 1 2 20 0 1 0 0 0
#3 1 3 13 0 0 1 0 0
#4 2 1 5 1 0 0 0 0
#5 2 2 7 0 1 0 0 0
#6 2 3 8 0 0 1 0 0
#7 2 4 0 0 0 0 0 0
#8 3 1 45 1 0 0 0 0
#9 3 2 17 0 1 0 0 0
#10 3 3 25 0 0 1 0 0
#11 3 4 12 0 0 0 1 0
#12 3 5 10 0 0 0 0 1
#13 4 1 40 1 0 0 0 0
#14 4 2 0 0 0 0 0 0
#15 4 3 0 0 0 0 0 0
#16 4 4 5 0 0 0 1 0
#17 4 5 8 0 0 0 0 1

Here's a tidyverse solution, using pivot_wider:
library(tidyverse)
mydata %>%
mutate(new_col = paste0("It", t),
y_test = as.integer(y.val > 0)) %>%
pivot_wider(id_cols = c(sub, t, y.val),
names_from = new_col,
values_from = y_test,
values_fill = list(y_test = 0))
sub t y.val It1 It2 It3 It4 It5
<int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 1 10 1 0 0 0 0
2 1 2 20 0 1 0 0 0
3 1 3 13 0 0 1 0 0
4 2 1 5 1 0 0 0 0
5 2 2 7 0 1 0 0 0
6 2 3 8 0 0 1 0 0
7 2 4 0 0 0 0 0 0
8 3 1 45 1 0 0 0 0
9 3 2 17 0 1 0 0 0
10 3 3 25 0 0 1 0 0
11 3 4 12 0 0 0 1 0
12 3 5 10 0 0 0 0 1
13 4 1 40 1 0 0 0 0
14 4 2 0 0 0 0 0 0
15 4 3 0 0 0 0 0 0
16 4 4 5 0 0 0 1 0
17 4 5 8 0 0 0 0 1
Explanation:
Make two columns, new_col (new column names with "It") and y_test (y.val > 0).
Pivot new_col values into column names.
Fill in the NA values with zeros.

One purrr and dplyr option could be:
map_dfc(.x = 1:5,
~ mydata %>%
mutate(!!paste0("It", .x) := as.integer(t == .x & y.val > 0)) %>%
select(starts_with("It"))) %>%
bind_cols(mydata)
It1 It2 It3 It4 It5 sub t y.val
1 1 0 0 0 0 1 1 10
2 0 1 0 0 0 1 2 20
3 0 0 1 0 0 1 3 13
4 1 0 0 0 0 2 1 5
5 0 1 0 0 0 2 2 7
6 0 0 1 0 0 2 3 8
7 0 0 0 0 0 2 4 0
8 1 0 0 0 0 3 1 45
9 0 1 0 0 0 3 2 17
10 0 0 1 0 0 3 3 25
11 0 0 0 1 0 3 4 12
12 0 0 0 0 1 3 5 10
13 1 0 0 0 0 4 1 40
14 0 0 0 0 0 4 2 0
15 0 0 0 0 0 4 3 0
16 0 0 0 1 0 4 4 5
17 0 0 0 0 1 4 5 8
Or if you want to perform it dynamically according the range in t column:
map_dfc(.x = reduce(as.list(range(mydata$t)), `:`),
~ mydata %>%
mutate(!!paste0("It", .x) := as.integer(t == .x & y.val > 0)) %>%
select(starts_with("It"))) %>%
bind_cols(mydata)

create a loop to get samples in grouped data which meet a condition

I have a dataframe where data are grouped by ID. I need to know how many cells are the 10% of each group in order to select this number in a sample, but this sample should select the cells which EP is 1.
I've tried to do a nested For loop: one For to know the quantity of cells which are the 10% for each group and the bigger one to sample this number meeting the condition EP==1
x <- data.frame("ID"=rep(1:2, each=10),"EP" = rep(0:1, times=10))
x
ID EP
1 1 0
2 1 1
3 1 0
4 1 1
5 1 0
6 1 1
7 1 0
8 1 1
9 1 0
10 1 1
11 2 0
12 2 1
13 2 0
14 2 1
15 2 0
16 2 1
17 2 0
18 2 1
19 2 0
20 2 1
for(j in 1:1000){
for (i in 1:nrow(x)){
d <- x[x$ID==i,]
npix <- 10*nrow(d)/100
}
r <- sample(d[d$EP==1,],npix)
print(r)
}
data frame with 0 columns and 0 rows
data frame with 0 columns and 0 rows
data frame with 0 columns and 0 rows
.
.
.
until 1000
I would want to get this dataframe, where each sample is in a new column in x, and the cell sampled has "1":
ID EP s1 s2....s1000
1 1 0 0 0 ....
2 1 1 0 1
3 1 0 0 0
4 1 1 0 0
5 1 0 0 0
6 1 1 0 0
7 1 0 0 0
8 1 1 0 0
9 1 0 0 0
10 1 1 1 0
11 2 0 0 0
12 2 1 0 0
13 2 0 0 0
14 2 1 0 1
15 2 0 0 0
16 2 1 0 0
17 2 0 0 0
18 2 1 1 0
19 2 0 0 0
20 2 1 0 0
see that each 1 in S1 and s2 are the sampled cells and correspond to 10% of cells in each group (1, 2) which meet the condition EP==1

you can try
set.seed(1231)
x <- data.frame("ID"=rep(1:2, each=10),"EP" = rep(0:1, times=10))
library(tidyverse)
x %>%
group_by(ID) %>%
mutate(index= ifelse(EP==1, 1:n(),0)) %>%
mutate(s1 = ifelse(index %in% sample(index[index!=0], n()*0.1), 1, 0)) %>%
mutate(s2 = ifelse(index %in% sample(index[index!=0], n()*0.1), 1, 0))
# A tibble: 20 x 5
# Groups: ID [2]
ID EP index s1 s2
<int> <int> <dbl> <dbl> <dbl>
1 1 0 0 0 0
2 1 1 2 0 0
3 1 0 0 0 0
4 1 1 4 0 0
5 1 0 0 0 0
6 1 1 6 1 1
7 1 0 0 0 0
8 1 1 8 0 0
9 1 0 0 0 0
10 1 1 10 0 0
11 2 0 0 0 0
12 2 1 2 0 0
13 2 0 0 0 0
14 2 1 4 0 1
15 2 0 0 0 0
16 2 1 6 0 0
17 2 0 0 0 0
18 2 1 8 0 0
19 2 0 0 0 0
20 2 1 10 1 0

We can write a function which gives us 1's which are 10% for each ID and place it where EP = 1.
library(dplyr)
rep_func <- function() {
x %>%
group_by(ID) %>%
mutate(s1 = 0,
s1 = replace(s1, sample(which(EP == 1), floor(0.1 * n())), 1)) %>%
pull(s1)
}
then use replicate to repeat it for n times
n <- 5
x[paste0("s", seq_len(n))] <- replicate(n, rep_func())
x
# ID EP s1 s2 s3 s4 s5
#1 1 0 0 0 0 0 0
#2 1 1 0 0 0 0 0
#3 1 0 0 0 0 0 0
#4 1 1 0 0 0 0 0
#5 1 0 0 0 0 0 0
#6 1 1 1 0 0 1 0
#7 1 0 0 0 0 0 0
#8 1 1 0 1 0 0 0
#9 1 0 0 0 0 0 0
#10 1 1 0 0 1 0 1
#11 2 0 0 0 0 0 0
#12 2 1 0 0 1 0 0
#13 2 0 0 0 0 0 0
#14 2 1 1 1 0 0 0
#15 2 0 0 0 0 0 0
#16 2 1 0 0 0 0 1
#17 2 0 0 0 0 0 0
#18 2 1 0 0 0 1 0
#19 2 0 0 0 0 0 0
#20 2 1 0 0 0 0 0

Split column of comma-separated numbers into multiple columns based on value

I have a column f in my dataframe that I would like to spread into multiple columns based on the values in that column. For example:
df <- structure(list(f = c(NA, "18,17,10", "12,8", "17,11,6", "18",
"12", "12", NA, "17,11", "12")), .Names = "f", row.names = c(NA,
10L), class = "data.frame")
df
# f
# 1 <NA>
# 2 18,17,10
# 3 12,8
# 4 17,11,6
# 5 18
# 6 12
# 7 12
# 8 <NA>
# 9 17,11
# 10 12
How would I split column f into multiple columns indicating the numbers in the row. I'm interested in something like this:
6 8 10 11 12 17 18
1 0 0 0 0 0 0 0
2 0 0 1 0 0 1 1
3 0 1 0 0 1 0 0
4 1 0 0 1 0 1 0
5 0 0 0 0 0 0 1
6 0 0 0 0 1 0 0
7 0 0 0 0 1 0 0
8 0 0 0 0 0 0 0
9 0 0 0 1 0 1 0
10 0 0 0 0 1 0 0
I'm thinking I could useunique on the f column to create the seperate columns based on the different numbers and then do a grepl to determine if the specific number is in column f but I was wondering if there was a better way. Something similar to spread or separate in the tidyr package.

A solution using tidyr::separate_rows will be as:
library(tidyverse)
df %>% mutate(ind = row_number()) %>%
separate_rows(f, sep=",") %>%
mutate(f = ifelse(is.na(f),0, f)) %>%
count(ind, f) %>%
spread(f, n, fill = 0) %>%
select(-2) %>% as.data.frame()
# ind 10 11 12 17 18 6 8
# 1 1 0 0 0 0 0 0 0
# 2 2 1 0 0 1 1 0 0
# 3 3 0 0 1 0 0 0 1
# 4 4 0 1 0 1 0 1 0
# 5 5 0 0 0 0 1 0 0
# 6 6 0 0 1 0 0 0 0
# 7 7 0 0 1 0 0 0 0
# 8 8 0 0 0 0 0 0 0
# 9 9 0 1 0 1 0 0 0
# 10 10 0 0 1 0 0 0 0

This could be achieved by splitting on the ,, the stack it to a two column data.frame and get the frequency with table
df1 <- na.omit(stack(setNames(lapply(strsplit(df$f, ","),
as.numeric), seq_len(nrow(df))))[, 2:1])
table(df1)
# values
#ind 6 8 10 11 12 17 18
# 1 0 0 0 0 0 0 0
# 2 0 0 1 0 0 1 1
# 3 0 1 0 0 1 0 0
# 4 1 0 0 1 0 1 0
# 5 0 0 0 0 0 0 1
# 6 0 0 0 0 1 0 0
# 7 0 0 0 0 1 0 0
# 8 0 0 0 0 0 0 0
# 9 0 0 0 1 0 1 0
# 10 0 0 0 0 1 0 0

How to convert two factors to adjacency matrix in R?

I have a data frame with two columns (key and value) where each column is a factor:
df = data.frame(gl(3,4,labels=c('a','b','c')), gl(6,2))
colnames(df) = c("key", "value")
key value
1 a 1
2 a 1
3 a 2
4 a 2
5 b 3
6 b 3
7 b 4
8 b 4
9 c 5
10 c 5
11 c 6
12 c 6
I want to convert it to adjacency matrix (in this case 3x6 size) like:
1 2 3 4 5 6
a 1 1 0 0 0 0
b 0 0 1 1 0 0
c 0 0 0 0 1 1
So that I can run clustering on it (group keys that have similar values together) with either kmeans or hclust.
Closest that I was able to get was using model.matrix( ~ value, df) which results in:
(Intercept) value2 value3 value4 value5 value6
1 1 0 0 0 0 0
2 1 0 0 0 0 0
3 1 1 0 0 0 0
4 1 1 0 0 0 0
5 1 0 1 0 0 0
6 1 0 1 0 0 0
7 1 0 0 1 0 0
8 1 0 0 1 0 0
9 1 0 0 0 1 0
10 1 0 0 0 1 0
11 1 0 0 0 0 1
12 1 0 0 0 0 1
but results aren't grouped by key yet.
From another side I can collapse this dataset into groups using:
aggregate(df$value, by=list(df$key), unique)
Group.1 x.1 x.2
1 a 1 2
2 b 3 4
3 c 5 6
But I don't know what to do next...
Can someone help to solve this?

An easy way to do it in base R:
res <-table(df)
res[res>0] <-1
res
value
#key 1 2 3 4 5 6
# a 1 1 0 0 0 0
# b 0 0 1 1 0 0
# c 0 0 0 0 1 1

R: Print omitted 0's in table() - contingency tables [duplicate]

I am using the following R code to produce a confusion matrix comparing the true labels of some data to the output of a neural network.
t <- table(as.factor(test.labels), as.factor(nnetpredict))
However, sometimes the neural network doesn't predict any of a certain class, so the table isn't square (as, for example, there are 5 levels in the test.labels factor, but only 3 levels in the nnetpredict factor). I want to make the table square by adding in any factor levels necessary, and setting their counts to zero.
How should I go about doing this?
Example:
> table(as.factor(a), as.factor(b))
1 2 3 4 5 6 7 8 9 10
1 1 0 0 0 0 0 0 1 0 0
2 0 1 0 0 0 0 0 0 1 0
3 0 0 1 0 0 0 0 0 0 1
4 0 0 0 1 0 0 0 0 0 0
5 0 0 0 0 1 0 0 0 0 0
6 0 0 0 0 0 1 0 0 0 0
7 0 0 0 0 0 0 1 0 0 0
You can see in the table above that there are 7 rows, but 10 columns, because the a factor only has 7 levels, whereas the b factor has 10 levels. What I want to do is to pad the table with zeros so that the row labels and the column labels are the same, and the matrix is square. From the example above, this would produce:
1 2 3 4 5 6 7 8 9 10
1 1 0 0 0 0 0 0 1 0 0
2 0 1 0 0 0 0 0 0 1 0
3 0 0 1 0 0 0 0 0 0 1
4 0 0 0 1 0 0 0 0 0 0
5 0 0 0 0 1 0 0 0 0 0
6 0 0 0 0 0 1 0 0 0 0
7 0 0 0 0 0 0 1 0 0 0
8 0 0 0 0 0 0 0 0 0 0
9 0 0 0 0 0 0 0 0 0 0
10 0 0 0 0 0 0 0 0 0 0
The reason I need to do this is two-fold:
For display to users/in reports
So that I can use a function to calculate the Kappa statistic, which requires a table formatted like this (square, same row and col labels)

EDIT - round II to address the additional details in the question. I deleted my first answer since it wasn't relevant anymore.
This has produced the desired output for the test cases I've given it, but I definitely advise testing thoroughly with your real data. The approach here is to find the full list of levels for both inputs into the table and set that full list as the levels before generating the table.
squareTable <- function(x,y) {
x <- factor(x)
y <- factor(y)
commonLevels <- sort(unique(c(levels(x), levels(y))))
x <- factor(x, levels = commonLevels)
y <- factor(y, levels = commonLevels)
table(x,y)
}
Two test cases:
> #Test case 1
> set.seed(1)
> x <- factor(sample(0:9, 100, TRUE))
> y <- factor(sample(3:7, 100, TRUE))
>
> table(x,y)
y
x 3 4 5 6 7
0 2 1 3 1 0
1 1 0 2 3 0
2 1 0 3 4 3
3 0 3 6 3 2
4 4 4 3 2 1
5 2 2 0 1 0
6 1 2 3 2 3
7 3 3 3 4 2
8 0 4 1 2 4
9 2 1 0 0 3
> squareTable(x,y)
y
x 0 1 2 3 4 5 6 7 8 9
0 0 0 0 2 1 3 1 0 0 0
1 0 0 0 1 0 2 3 0 0 0
2 0 0 0 1 0 3 4 3 0 0
3 0 0 0 0 3 6 3 2 0 0
4 0 0 0 4 4 3 2 1 0 0
5 0 0 0 2 2 0 1 0 0 0
6 0 0 0 1 2 3 2 3 0 0
7 0 0 0 3 3 3 4 2 0 0
8 0 0 0 0 4 1 2 4 0 0
9 0 0 0 2 1 0 0 3 0 0
> squareTable(y,x)
y
x 0 1 2 3 4 5 6 7 8 9
0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0 0
2 0 0 0 0 0 0 0 0 0 0
3 2 1 1 0 4 2 1 3 0 2
4 1 0 0 3 4 2 2 3 4 1
5 3 2 3 6 3 0 3 3 1 0
6 1 3 4 3 2 1 2 4 2 0
7 0 0 3 2 1 0 3 2 4 3
8 0 0 0 0 0 0 0 0 0 0
9 0 0 0 0 0 0 0 0 0 0
>
> #Test case 2
> set.seed(1)
> xx <- factor(sample(0:2, 100, TRUE))
> yy <- factor(sample(3:5, 100, TRUE))
>
> table(xx,yy)
yy
xx 3 4 5
0 4 14 9
1 14 15 9
2 11 11 13
> squareTable(xx,yy)
y
x 0 1 2 3 4 5
0 0 0 0 4 14 9
1 0 0 0 14 15 9
2 0 0 0 11 11 13
3 0 0 0 0 0 0
4 0 0 0 0 0 0
5 0 0 0 0 0 0
> squareTable(yy,xx)
y
x 0 1 2 3 4 5
0 0 0 0 0 0 0
1 0 0 0 0 0 0
2 0 0 0 0 0 0
3 4 14 11 0 0 0
4 14 15 11 0 0 0
5 9 9 13 0 0 0

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

adding data frame of counts to template data frame in R - r

You could use merge, too: res <- suppressWarnings(merge(a, m, by="id", suffixes = c("", ""))) (res[, which(!duplicated(names(res)))][, names(m)]) # id 1 2 3 4 # 1 1 1 0 0 0 # 2 2 1 0 0 0 # 3 3 1 0 0 0 # 4 4 0 0 0 0 # 5 5 0 0 1 0 # 6 6 0 0 1 0 # 7 7 0 0 1 0 # 8 8 0 0 1 0 # 9 9 0 0 1 0 # 10 10 0 0 1 0

Related

How to add multiple columns in R with different condition for each column?

create a loop to get samples in grouped data which meet a condition

Split column of comma-separated numbers into multiple columns based on value

How to convert two factors to adjacency matrix in R?

R: Print omitted 0's in table() - contingency tables [duplicate]

Categories

Resources