How to create a count up data frame - r

I’m trying to create a data frame in r that looks like this
X Y Z
3 1 1
3 1 2
3 1 3
3 2 1
3 2 2
3 2 3
4 1 1
4 1 2
4 1 3
4 2 1
...
So column z counts up to 3 then when it reaches 3 column y increments by 1 and z counts up again until 3. Then x increments by 1 and the process starts again

You could use expand.grid + rev
rev(expand.grid(z = 1:3, y = 1:2, x = 3:4))
x y z
1 3 1 1
2 3 1 2
3 3 1 3
4 3 2 1
5 3 2 2
6 3 2 3
7 4 1 1
8 4 1 2
9 4 1 3
10 4 2 1
11 4 2 2
12 4 2 3

An option is to use tidyr::crossing().
In your case:
crossing(X = 3:4,
Y = 1:2,
Z = 1:3)

data.frame(X=rep(3:4,each=6,1),
Y=rep(1:2,each=3,2),
Z=rep(1:3,each=1,4))

Here is another base R solution in addition to the expand.grid approach by #Onyambu.
The feature of this code below is that, you only need to put everything into the list lst, and pass it to function f:
f <- function(lst) data.frame(mapply(function(p,n) rep(p,each=n),lst, prod(lengths(lst))/cumprod(lengths(lst))))
lst<- list(x = 3:4,y = 1:2,z = 1:3)
res <- f(lst)
such that
> res
x y z
1 3 1 1
2 3 1 2
3 3 1 3
4 3 2 1
5 3 2 2
6 3 2 3
7 4 1 1
8 4 1 2
9 4 1 3
10 4 2 1
11 4 2 2
12 4 2 3

A data.table solution for completness:
data.table::CJ(x = 3:4, y = 1:2, z = 1:3)
x y z
1: 3 1 1
2: 3 1 2
3: 3 1 3
4: 3 2 1
5: 3 2 2
6: 3 2 3
7: 4 1 1
8: 4 1 2
9: 4 1 3
10: 4 2 1
11: 4 2 2
12: 4 2 3

Related

identify whenever values repeat in r

I have a dataframe like this.
data <- data.frame(Condition = c(1,1,2,3,1,1,2,2,2,3,1,1,2,3,3))
I want to populate a new variable Sequence which identifies whenever Condition starts again from 1.
So the new dataframe would look like this.
Thanks in advance for the help!
data <- data.frame(Condition = c(1,1,2,3,1,1,2,2,2,3,1,1,2,3,3),
Sequence = c(1,1,1,1,2,2,2,2,2,2,3,3,3,3,3))
base R
data$Sequence2 <- cumsum(c(TRUE, data$Condition[-1] == 1 & data$Condition[-nrow(data)] != 1))
data
# Condition Sequence Sequence2
# 1 1 1 1
# 2 1 1 1
# 3 2 1 1
# 4 3 1 1
# 5 1 2 2
# 6 1 2 2
# 7 2 2 2
# 8 2 2 2
# 9 2 2 2
# 10 3 2 2
# 11 1 3 3
# 12 1 3 3
# 13 2 3 3
# 14 3 3 3
# 15 3 3 3
dplyr
library(dplyr)
data %>%
mutate(
Sequence2 = cumsum(Condition == 1 & lag(Condition != 1, default = TRUE))
)
# Condition Sequence Sequence2
# 1 1 1 1
# 2 1 1 1
# 3 2 1 1
# 4 3 1 1
# 5 1 2 2
# 6 1 2 2
# 7 2 2 2
# 8 2 2 2
# 9 2 2 2
# 10 3 2 2
# 11 1 3 3
# 12 1 3 3
# 13 2 3 3
# 14 3 3 3
# 15 3 3 3
This took a while. Finally I find this solution:
library(dplyr)
data %>%
group_by(Sequnce = cumsum(
ifelse(Condition==1, lead(Condition)+1, Condition)
- Condition==1)
)
Condition Sequnce
<dbl> <int>
1 1 1
2 1 1
3 2 1
4 3 1
5 1 2
6 1 2
7 2 2
8 2 2
9 2 2
10 3 2
11 1 3
12 1 3
13 2 3
14 3 3
15 3 3

Group by each increasing sequence in data frame

If I have a data frame with a column of monotonically increasing values such as:
x
1
2
3
4
1
2
3
1
2
3
4
5
6
1
2
How do I add a column to group each increasing sequence that results in:
x y
1 1
2 1
3 1
4 1
1 2
2 2
3 2
1 3
2 3
3 3
4 3
5 3
6 3
1 4
2 4
I can only think of using a loop which will be slow.
You may choose cumsum function to do it.
> x <- c(1,2,3,4,1,2,3,1,2,4,5,1,2)
> cumsum(x==1)
[1] 1 1 1 1 2 2 2 3 3 3 3 4 4
I would use diff and compute the cumulative sum:
df$y <- c(1, cumsum(diff(df$x) < 0 ) + 1)
> df
x y
1 1 1
2 2 1
3 3 1
4 4 1
5 1 2
6 2 2
7 3 2
8 1 3
9 2 3
10 3 3
11 4 3
12 5 3
13 6 3
14 1 4
15 2 4

Count with table() and exclude 0's

I try to count triplets; for this I use three vectors that are packed in a dataframe:
X=c(4,4,4,4,4,4,4,4,1,1,1,1,1,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,3)
Y=c(1,1,1,1,1,1,1,1,1,1,1,1,2,2,3,4,2,2,2,2,3,4,1,1,2,2,3,3,4,4)
Z=c(4,4,5,4,4,4,4,4,6,1,1,1,1,1,1,1,2,2,2,2,7,2,3,3,3,3,3,3,3,3)
Count_Frame=data.frame(matrix(NA, nrow=(length(X)), ncol=3))
Count_Frame[1]=X
Count_Frame[2]=Y
Count_Frame[3]=Z
Counts=data.frame(table(Count_Frame))
There is the following problem: if I increase the value range in the vectors or use even more vectors the "Counts" dataframe quickly approaches its size limit due to the many 0-counts. Is there a way to exclude the 0-counts while generating "Counts"?
We can use data.table. Convert the 'data.frame' to 'data.table' (setDT(Count_Frame)), grouped by all the columns (.(X, Y, Z)), we get the number or rows (.N).
library(data.table)
setDT(Count_Frame)[,.N ,.(X, Y, Z)]
# X Y Z N
# 1: 4 1 4 7
# 2: 4 1 5 1
# 3: 1 1 6 1
# 4: 1 1 1 3
# 5: 1 2 1 2
# 6: 1 3 1 1
# 7: 1 4 1 1
# 8: 2 2 2 4
# 9: 2 3 7 1
#10: 2 4 2 1
#11: 3 1 3 2
#12: 3 2 3 2
#13: 3 3 3 2
#14: 3 4 3 2
Instead of naming all the columns, we can use names(Count_Frame) as well (if there are many columns)
setDT(Count_Frame)[,.N , names(Count_Frame)]
You can accomplish this with aggregate:
Count_Frame$one <- 1
aggregate(one ~ X1 + X2 + X3, data=Count_Frame, FUN=sum)
This will calculate the positive instances of table, but will not list the zero counts.
One solution is to create a combination of the column values and count those instead:
library(tidyr)
as.data.frame(table(unite(Count_Frame, tmp, X1, X2, X3))) %>%
separate(Var1, c('X1', 'X2', 'X3'))
Resulting output is:
X1 X2 X3 Freq
1 1 1 1 3
2 1 1 6 1
3 1 2 1 2
4 1 3 1 1
5 1 4 1 1
6 2 2 2 4
7 2 3 7 1
8 2 4 2 1
9 3 1 3 2
10 3 2 3 2
11 3 3 3 2
12 3 4 3 2
13 4 1 4 7
14 4 1 5 1
Or using plyr:
library(plyr)
count(Count_Frame, colnames(Count_Frame))
output
# > count(Count_Frame, colnames(Count_Frame))
# X1 X2 X3 freq
# 1 1 1 1 3
# 2 1 1 6 1
# 3 1 2 1 2
# 4 1 3 1 1
# 5 1 4 1 1
# 6 2 2 2 4
# 7 2 3 7 1
# 8 2 4 2 1
# 9 3 1 3 2
# 10 3 2 3 2
# 11 3 3 3 2
# 12 3 4 3 2
# 13 4 1 4 7
# 14 4 1 5 1

expand.grid with unknown set of variables

So, expand.grid returns a df of all the combinations of the vectors passed.
df <- expand.grid(1:3, 1:3)
df <- expand.grid(1:3, 1:3, 1:3)
What I would like is a generalized function that takes 1 parameter (number of vectors) and returns the appropriate data frame.
combinations <- function(n) {
return(expand.grid(0, 1, ... n))
}
Such that
combinations(2) returns(expand.grid(1:3, 1:3))
combinations(3) returns(expand.grid(1:3, 1:3, 1:3))
combinations(4) returns(expand.grid(1:3, 1:3, 1:3, 1:3))
etc.
combinations <- function(n)
expand.grid(rep(list(1:3),n))
> combinations(2)
Var1 Var2
1 1 1
2 2 1
3 3 1
4 1 2
5 2 2
6 3 2
7 1 3
8 2 3
9 3 3
> combinations(3)
Var1 Var2 Var3
1 1 1 1
2 2 1 1
3 3 1 1
4 1 2 1
5 2 2 1
6 3 2 1
7 1 3 1
8 2 3 1
9 3 3 1
10 1 1 2
11 2 1 2
12 3 1 2
13 1 2 2
14 2 2 2
15 3 2 2
16 1 3 2
17 2 3 2
18 3 3 2
19 1 1 3
20 2 1 3
21 3 1 3
22 1 2 3
23 2 2 3
24 3 2 3
25 1 3 3
26 2 3 3
27 3 3 3

Two dimensional heatmap with R

I have an input file of this form:
0.35217720 1 201 1
0.26413283 1 209 1
1.1665874 1 210 1
...
0.30815500 2 194 1
0.15407741 2 196 1
0.15407741 2 197 1
0.33016610 2 205 1
...
where the first column is a scalar value, the second is the x coordinate of a discrete lattice, the third is the y coordinate and the last one is time-like discrete component.
I would like to make a two dimensional heatmap of the scalar values at fixed time. How can i do? Edit: I don't know how to use image() to use the second and the third column as x, y coordinates.
Example file:
7.62939453 1 1 1
1.3153768 1 2 1
7.5560522 1 3 1
4.5865011 1 4 1
5.3276706 1 5 1
2.1895909 2 1 1
0.47044516 2 2 1
6.7886448 2 3 1
6.7929626 2 4 1
9.3469286 2 5 1
3.8350201 3 1 1
5.1941633 3 2 1
8.3096523 3 3 1
0.34571886 3 4 1
0.53461552 3 5 1
5.2970004 4 1 1
6.7114925 4 2 1
7.69805908 4 3 1
3.8341546 4 4 1
0.66842079 4 5 1
4.1748595 5 1 1
6.8677258 5 2 1
5.8897662 5 3 1
9.3043633 5 4 1
8.4616680 5 5 1
Reshape your data to a matrix and then use heatmap():
This worked on R version 2.10.1 (2009-12-14):
txt <- textConnection("7.62939453 1 1 1
1.3153768 1 2 1
7.5560522 1 3 1
4.5865011 1 4 1
5.3276706 1 5 1
2.1895909 2 1 1
0.47044516 2 2 1
6.7886448 2 3 1
6.7929626 2 4 1
9.3469286 2 5 1
3.8350201 3 1 1
5.1941633 3 2 1
8.3096523 3 3 1
0.34571886 3 4 1
0.53461552 3 5 1
5.2970004 4 1 1
6.7114925 4 2 1
7.69805908 4 3 1
3.8341546 4 4 1
0.66842079 4 5 1
4.1748595 5 1 1
6.8677258 5 2 1
5.8897662 5 3 1
9.3043633 5 4 1
8.4616680 5 5 1
")
df <- read.table(txt)
close(txt)
names(df) <- c("value", "x", "y", "t")
require(reshape)
dfc <- cast(df[ ,-4], x ~ y)
heatmap(as.matrix(dfc))
## Some copy/pasteable fake data for you (dput() works nicely for pasteable real data)
your_matrix <- cbind(runif(25, 0, 10), rep(1:5, each = 5), rep(1:5, 5), rep(1, 25))
heatmap_matrix <- matrix(your_matrix[, 1], nrow = 5)
## alternatively, if your_matrix isn't in order
## (The reshape method in EDi's answer is a nicer alternative)
for (i in 1:nrow(your_matrix)) {
heatmap_matrix[your_matrix[i, 2], you_matrix[i, 3]]
}
heatmap(heatmap_matrix) # one option
image(z = heatmap_matrix) # another option
require(gplots)
heatmap.2(heatmap_matrix) # this has fancier preferences

Resources