Calculating frequency of a number in dataframe - r

I have a big dataset, on film ratings (1 - 10) and would like to get the distribution of the ratings. I also have 0s in the dataset, but those are in reality NAs, but I need them as 0s for later in the project (trying to build a recommendation system).
Sample Data
User.ID 60392452 60502258 60915544 60928336 60930535 60934417 60938455 60959037 60976845
1 26 0 0 0 0 0 0 0 0 0
2 51 0 0 0 0 0 0 0 0 0
3 91 0 0 0 0 0 0 0 0 0
4 99 0 0 0 0 0 0 0 0 0
5 114 0 0 0 0 0 0 0 0 0
6 125 0 0 0 0 0 0 0 0 0
7 165 0 0 0 0 0 0 0 0 9
8 243 0 0 10 0 0 0 0 0 0
Ok, it's not so readable, but User ID column is "26", "51" etc. The movies, which are indicated by codes, are "60392452" etc and are the column headers.
As a start, I used the following code:
table(mod_dataset)
but I got an error message:
Error in table(mod_dataset) :
attempt to make a table with >= 2^31 elements
What is the equivalent of table for "big data" ?

I am really not sure whether this answers your question, but it's a way to table the ratings on a column by column basis.
res <- sapply(mod_dataset[-1], function(x) table(factor(x, levels = 0:10)))
inx <- apply(res, 1, function(x) all(x == 0))
res[!inx, ]
Data in dputformat.
mod_dataset <-
structure(list(User.ID = c(26L, 51L, 91L, 99L, 114L, 125L, 165L,
243L), X60392452 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), X60502258 = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L), X60915544 = c(0L, 0L, 0L, 0L, 0L,
0L, 0L, 10L), X60928336 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L),
X60930535 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), X60934417 = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L), X60938455 = c(0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), X60959037 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), X60976845 = c(0L, 0L, 0L, 0L, 0L, 0L, 9L, 0L)), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8"))

Related

Find column name with value bigger than zero in each row

I have data as follows:
dat <- structure(list(rn = c("A", "B",
"C", "D", "E",
"F", "G", "H",
"I", "J", "K",
"L", "M", "N"
), `0` = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), `1` = c(0L, 0L, 0L, 0L, 0L, 0L, 569L, 0L, 0L, 0L, 0L, 0L,
0L, 0L), `2` = c(0L, 0L, 0L, 238L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), `3` = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1146L, 0L,
0L, 0L, 0L, 0L), `4` = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 337L, 0L, 0L), `5` = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 631L), `6` = c(0L, 0L, 0L, 0L, 156L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), `7` = c(0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 298L, 0L, 0L, 0L), `8` = c(0L, 0L, 0L, 0L,
0L, 456L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), `9` = c(0L, 0L, 0L,
0L, 0L, 0L, 0L, 927L, 0L, 0L, 0L, 0L, 0L, 0L), `10` = c(436L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), `11` = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 657L, 0L, 0L, 0L, 0L), `12` = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1771L, 0L), `13` = c(0L,
0L, 283L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), `14` = c(0L,
297L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L)), class = c("data.table",
"data.frame"), row.names = c(NA, -14L))
rn 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
1: A 0 0 0 0 0 0 0 0 0 0 436 0 0 0 0
2: B 0 0 0 0 0 0 0 0 0 0 0 0 0 0 297
3: C 0 0 0 0 0 0 0 0 0 0 0 0 0 283 0
4: D 0 0 238 0 0 0 0 0 0 0 0 0 0 0 0
5: E 0 0 0 0 0 0 156 0 0 0 0 0 0 0 0
6: F 0 0 0 0 0 0 0 0 456 0 0 0 0 0 0
7: G 0 569 0 0 0 0 0 0 0 0 0 0 0 0 0
8: H 0 0 0 0 0 0 0 0 0 927 0 0 0 0 0
9: I 0 0 0 1146 0 0 0 0 0 0 0 0 0 0 0
10: J 0 0 0 0 0 0 0 0 0 0 0 657 0 0 0
11: K 0 0 0 0 0 0 0 298 0 0 0 0 0 0 0
12: L 0 0 0 0 337 0 0 0 0 0 0 0 0 0 0
13: M 0 0 0 0 0 0 0 0 0 0 0 0 1771 0 0
14: N 0 0 0 0 0 631 0 0 0 0 0 0 0 0 0
I want to create a column with the column name of the column in which there is a value greater than zero.
Desired output:
dat <- structure(list(rn = c("A", "B",
"C", "D", "E",
"F", "G", "H",
"I", "J", "K",
"L", "M", "N"
), NR = c(10, 14, 13, 2, 6, 8, 1, 9, 3, 11, 7, 4, 12,
5)), class = c("data.table",
"data.frame"), row.names = c(NA, -14L))
rn NR
1: A 10
2: B 14
3: C 13
4: D 2
5: E 6
6: F 8
7: G 1
8: H 9
9: I 3
10: J 11
11: K 7
12: L 4
13: M 12
14: N 5
Easier is with max.col from base R
library(data.table)
dat[, .(rn, NR = max.col(.SD[,-1, with = FALSE] > 0, "first")-1)]
-output
rn NR
<char> <num>
1: A 10
2: B 14
3: C 13
4: D 2
5: E 6
6: F 8
7: G 1
8: H 9
9: I 3
10: J 11
11: K 7
12: L 4
13: M 12
14: N 5
Or another option is apply from base R
apply(dat[, -1], 1, \(x) which(x > 0)[1])-1
[1] 10 14 13 2 6 8 1 9 3 11 7 4 12 5

in R, how can I find the row number of the first occurrence and last occurrence of a value in a Matrix?

In R, I've created 25x25 matrices of values of 1 and 0 and I need to find the height between the first occurrence of 1 in the matrix and the last occurrence of 1 in the matrix.
Heres an example of a matrix of the letter a, where each 1 represents a black pixel and each 0 represents a white pixel:
a <- read.csv(csv_files[1])
a
V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20 V21 V22 V23 V24 V25
1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
6 0 0 0 0 0 0 0 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0
7 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0
8 0 0 0 0 0 1 1 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0
9 0 0 0 0 1 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
10 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
11 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
12 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
13 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
14 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
15 0 0 0 0 1 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0
16 0 0 0 0 1 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0
17 0 0 0 0 1 1 0 0 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 0
18 0 0 0 0 0 1 1 0 0 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0 0
19 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 1 1 0 0 0 0 0 0 0 0
20 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
21 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
22 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
23 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
24 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
25 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
My idea is the find the row number of the last occurrence of 1 and the row number of the first occurrence of 1 and take them away from eachother, which will give me the height of the symbol.
In this case it would be 19 - 6 = 13, so the height is 13.
For context, I drew images of different letters and symbols on GIMP, and the imported them into R and saved them in a matrix as a CSV file.
Try the code below
> diff(range(which(a == 1, arr.ind = TRUE)[, "row"]))
[1] 13
Data
> dput(a)
structure(list(V1 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L),
V2 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), V3 = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), V4 = c(0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L), V5 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), V6 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
), V7 = c(0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L), V8 = c(0L,
0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L), V9 = c(0L, 0L, 0L, 0L,
0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L), V10 = c(0L, 0L, 0L, 0L, 0L, 1L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L), V11 = c(0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L,
0L, 0L), V12 = c(0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L
), V13 = c(0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), V14 = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), V15 = c(0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L), V16 = c(0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L,
0L, 0L, 0L, 0L, 0L), V17 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 0L), V18 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), V19 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L),
V20 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), V21 = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), V22 = c(0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L), V23 = c(0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L), V24 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), V25 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L)), class = "data.frame", row.names = c("1", "2", "3",
"4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15",
"16", "17", "18", "19", "20", "21", "22", "23", "24", "25"))

how to prepare an adjacency matrix for network analysis

I am trying to convert the raw data below to an adjacent matrix by assigning the value on the column "s_chloramphenicol" in preparation for a network analysis.
df <- structure(list(studyid0 = c(1L, 5L, 6L, 8L, 9L, 11L, 3052L, 3057L,
3058L, 3058L, 3060L, 3063L, 3064L, 3067L), s_chloramphenicol = c(0L,
0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L)), row.names = c(NA,
-14L), class = "data.frame", .Names = c("studyid0", "s_chloramphenicol"
))
The expected output is
df<-structure(list(`1` = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), `5` = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), `6` = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), `8` = c(0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L,
0L, 0L, 0L), `9` = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), `11` = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), `3052` = c(0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L,
0L, 0L, 0L), `3057` = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), `3058` = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), `3060` = c(0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 0L), `3063` = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), `3064` = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), `3067` = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L)), .Names = c("1", "5", "6", "8", "9", "11", "3052",
"3057", "3058", "3060", "3063", "3064", "3067"), class = "data.frame", row.names = c(1L,
5L, 6L, 8L, 9L, 11L, 3052L, 3057L, 3058L, 3060L, 3063L, 3064L,
3067L))
You can use the function outer:
df2 <- outer(df$s_chloramphenicol, df$s_chloramphenicol)
rownames(df2) <- colnames(df2) <- df$studyid0
df2
Output:
1 5 6 8 9 11 3052 3057 3058 3058 3060 3063 3064 3067
1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
5 0 0 0 0 0 0 0 0 0 0 0 0 0 0
6 0 0 0 0 0 0 0 0 0 0 0 0 0 0
8 0 0 0 1 0 0 1 0 0 0 1 0 0 0
9 0 0 0 0 0 0 0 0 0 0 0 0 0 0
11 0 0 0 0 0 0 0 0 0 0 0 0 0 0
3052 0 0 0 1 0 0 1 0 0 0 1 0 0 0
3057 0 0 0 0 0 0 0 0 0 0 0 0 0 0
3058 0 0 0 0 0 0 0 0 0 0 0 0 0 0
3058 0 0 0 0 0 0 0 0 0 0 0 0 0 0
3060 0 0 0 1 0 0 1 0 0 0 1 0 0 0
3063 0 0 0 0 0 0 0 0 0 0 0 0 0 0
3064 0 0 0 0 0 0 0 0 0 0 0 0 0 0
3067 0 0 0 0 0 0 0 0 0 0 0 0 0 0

Find first column with specific property

In a data frame, after some calculations, all rows end with a series of 0, as in the (partial) example below:
X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
1 -9 0 0 0 0 0 0 0 0 0 0 0 0 0 0
2 4 -1 1 -1 0 -1 0 0 0 0 0 0 0 0 0
3 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
5 -3 0 0 0 0 0 0 0 0 0 0 0 0 0 0
6 -6 0 0 0 0 0 0 0 0 0 0 0 0 0 0
7 4 -4 1 -1 0 -1 0 0 0 0 0 0 0 0 0
8 3 -3 0 0 0 0 0 0 0 0 0 0 0 0 0
9 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0
10 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
11 -3 0 0 0 0 0 0 0 0 0 0 0 0 0 0
But:
- some isolated 0 can occur before the starting of the series of 0, as in lines 2 and 7
- some lines are entirely made of 0, as in lines 4 and 10
I would like to create a new column containing the following information:
"in which column does the series of 0 start?"
From the above example, this new column should contain the numbers:
2, 7, 2, 1, 2, 2, 7, 3, 2, 1, 2
I can't figure out how to do this...
Thanks for any hint.
Use apply to run rle on each row and get the first index where the value is equal to zero and the length is greater than 1 (start of series).
apply(df, 1, function(x) which(rle(x)$values == 0 & rle(x)$lengths > 1)[1] )
# [1] 2 7 2 1 2 2 7 3 2 1 2
Data
df = structure(list(X1 = c(-9L, 4L, 3L, 0L, -3L, -6L, 4L, 3L, 3L,
0L, -3L), X2 = c(0L, -1L, 0L, 0L, 0L, 0L, -4L, -3L, 0L, 0L, 0L
), X3 = c(0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L), X4 = c(0L,
-1L, 0L, 0L, 0L, 0L, -1L, 0L, 0L, 0L, 0L), X5 = c(0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), X6 = c(0L, -1L, 0L, 0L, 0L,
0L, -1L, 0L, 0L, 0L, 0L), X7 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), X8 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), X9 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), X10 = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), X11 = c(0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), X12 = c(0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L), X13 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), X14 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L), X15 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L)), .Names = c("X1",
"X2", "X3", "X4", "X5", "X6", "X7", "X8", "X9", "X10", "X11",
"X12", "X13", "X14", "X15"), class = "data.frame", row.names = c(NA,
-11L))
Here is a easy solution. Probably there are more sophisticated ones but it works. Assuming your matrix is called 'x'
# make new colum and fill with zeros
x[,ncol(x)+1] <- 0
#loop through rows and note first instance of zero in new column
for(i in 1:nrow(x)){
x[i,ncol(x)] <- grep(0, x[i,])[1]
}

table plot in R adjusting the axis

I am trying to plot this table
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
X0 4 0 0 0 0 0 0 0 0 0 0 163 0 0 78 0 0 0 1 0 0 0 0 0 1 0 0 153 0
X1 0 0 0 0 0 152 123 0 0 0 0 0 5 0 1 0 0 0 0 119 0 0 0 0 0 0 0 0 0
X2 0 0 55 0 0 1 0 0 185 0 0 0 0 0 0 0 3 0 0 0 2 0 0 0 0 154 0 0 0
X3 1 1 0 0 149 0 0 0 0 0 1 0 0 0 0 4 0 4 126 0 0 0 0 0 108 1 5 0 0
X4 0 0 0 16 0 1 0 108 0 110 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
X5 13 0 0 0 3 1 0 0 0 0 0 0 1 2 0 138 0 123 7 0 0 0 1 0 18 0 93 0 0
X6 0 0 0 0 0 3 0 0 0 0 0 0 0 85 1 0 0 0 1 0 0 104 100 104 0 0 2 0 0
X7 0 93 23 0 0 0 0 0 0 0 71 0 0 0 0 0 55 0 0 0 55 0 0 0 0 0 0 0 103
X8 245 0 0 0 0 0 0 0 0 0 0 0 0 0 73 7 0 12 1 0 0 0 1 1 4 0 48 8 0
X9 0 0 0 153 0 11 1 15 0 18 0 1 194 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0
30
X0 0
X1 0
X2 0
X3 0
X4 162
X5 0
X6 0
X7 0
X8 0
X9 5
but cannot make visually clear what the table is showing, as the y-axis get smudged due to the number of columns in table.
Is it possible somehow to plot this in a way that axis are clear, and nothing get smudged together?
> dput(tablen)
structure(c(4L, 0L, 0L, 1L, 0L, 13L, 0L, 0L, 245L, 0L, 0L, 0L,
0L, 1L, 0L, 0L, 0L, 93L, 0L, 0L, 0L, 0L, 55L, 0L, 0L, 0L, 0L,
23L, 0L, 0L, 0L, 0L, 0L, 0L, 16L, 0L, 0L, 0L, 0L, 153L, 0L, 0L,
0L, 149L, 0L, 3L, 0L, 0L, 0L, 0L, 0L, 152L, 1L, 0L, 1L, 1L, 3L,
0L, 0L, 11L, 0L, 123L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 108L, 0L, 0L, 0L, 0L, 15L, 0L, 0L, 185L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 110L, 0L, 0L, 0L, 0L, 18L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 71L, 0L, 0L, 163L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 0L, 5L, 0L, 0L, 3L, 1L, 0L, 0L, 0L, 194L, 0L,
0L, 0L, 0L, 0L, 2L, 85L, 0L, 0L, 0L, 78L, 1L, 0L, 0L, 0L, 0L,
1L, 0L, 73L, 0L, 0L, 0L, 0L, 4L, 0L, 138L, 0L, 0L, 7L, 0L, 0L,
0L, 3L, 0L, 0L, 0L, 0L, 55L, 0L, 0L, 0L, 0L, 0L, 4L, 0L, 123L,
0L, 0L, 12L, 0L, 1L, 0L, 0L, 126L, 0L, 7L, 1L, 0L, 1L, 0L, 0L,
119L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 2L, 0L, 0L, 2L, 0L, 0L, 0L,
0L, 55L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 104L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 100L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
104L, 0L, 1L, 0L, 1L, 0L, 0L, 108L, 0L, 18L, 0L, 0L, 4L, 0L,
0L, 0L, 154L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 5L, 0L,
93L, 2L, 0L, 48L, 0L, 153L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 8L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 103L, 0L, 0L, 0L, 0L, 0L, 0L, 162L,
0L, 0L, 0L, 0L, 5L), .Dim = c(10L, 30L), .Dimnames = structure(list(
c("X0", "X1", "X2", "X3", "X4", "X5", "X6", "X7", "X8", "X9"
), c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11",
"12", "13", "14", "15", "16", "17", "18", "19", "20", "21",
"22", "23", "24", "25", "26", "27", "28", "29", "30")), .Names = c("",
"")), class = "table")
You could rotate the y-axis labels (see help("par") for documentation:
plot(tablen, las = 1)

Resources