Create numbered sequence for occurrences of a given nesting variable - r

I'm hoping to add to a data set a variable that sequences the instances a certain grouping variable appears. For example:
ids <- c(rep(1,4),rep(2,6),rep(3,2))
I'm wanting another variable that would count the instances each id appears. Creating a vector like this:
1,2,3,4,1,2,3,4,5,6,1,2
With them combined looking something like this:
ids count
1 1 1
2 1 2
3 1 3
4 1 4
5 2 1
6 2 2
7 2 3
8 2 4
9 2 5
10 2 6
11 3 1
12 3 2
Any ideas? Many thanks!

I suggest ave with seq_along
ids <- c(rep(1,4),rep(2,6),rep(3,2))
count <- ave(ids,ids, FUN=seq_along)
cbind(ids, count)
# ids count
# [1,] 1 1
# [2,] 1 2
# [3,] 1 3
# [4,] 1 4
# [5,] 2 1
# [6,] 2 2
# [7,] 2 3
# [8,] 2 4
# [9,] 2 5
# [10,] 2 6
# [11,] 3 1
# [12,] 3 2

Or if it is ordered
cbind(ids, count=sequence(unname(table(ids))))
# ids count
# [1,] 1 1
# [2,] 1 2
# [3,] 1 3
# [4,] 1 4
# [5,] 2 1
# [6,] 2 2
# [7,] 2 3
# [8,] 2 4
# [9,] 2 5
# [10,] 2 6
# [11,] 3 1
# [12,] 3 2
Or
cbind(ids,within.list(rle(ids), lengths <- sequence(lengths))$lengths)
Or
library(data.table)
dt <- as.data.table(ids)
dt[,count:=seq_len(.N), by=ids]
Or
library(dplyr)
dat <- data.frame(ids)
dat %>%
group_by(ids) %>%
mutate(count=row_number())

Related

If n occurs less than 5 times, change to n-1

In R, if I have a df of numbers c(1,1,1,2,3,3,3,3,3,3,4,4,4,5,5), how do change n to n-1 if n occurs less than 5 times? Example input x and output out.
x out
1 1 1
2 1 1
3 1 1
4 2 1
5 3 3
6 3 3
7 3 3
8 3 3
9 3 3
10 3 3
11 4 3
12 4 3
13 4 3
As the first value in the column (it will also be the minimum value), 1 would stay the same. However, if it would make the coding easier, the 1s can change to 0, but the 2 would still change to 1.
EDIT:
How can I repeat this if the changed values now occur <5 times? For example
# x out
# [1,] 1 0
# [2,] 1 0
# [3,] 1 0
# [4,] 2 1
# [5,] 3 3
# [6,] 3 3
# [7,] 3 3
# [8,] 3 3
# [9,] 3 3
#[10,] 3 3
#[11,] 4 3
#[12,] 4 3
#[13,] 4 3
#[14,] 5 3
#[15,] 5 3
#[16,] 5 3
#[17,] 6 3
#[18,] 6 3
#[19,] 6 3
#[20,] 7 3
#[21,] 7 3
Using ave :
x <- c(1,1,1,2,3,3,3,3,3,3,4,4,4)
pmax(x - +(ave(x, x, FUN =length) < 5), 1)
#[1] 1 1 1 1 3 3 3 3 3 3 3 3 3
If values in x can repeat we need to use rle for grouping.
pmax(x - +(ave(x,with(rle(x),rep(seq_along(values), lengths)),FUN =length) < 5),1)
You can use rle if x is sorted to find how many times a number is there. And subtract 1 if there are less then 5.
i <- order(x)
y <- rle(x[i])
y$values <- y$values - (y$lengths < 5)
cbind(x,out=inverse.rle(y)[order(i)])
# x out
# [1,] 1 0
# [2,] 1 0
# [3,] 1 0
# [4,] 2 1
# [5,] 3 3
# [6,] 3 3
# [7,] 3 3
# [8,] 3 3
# [9,] 3 3
#[10,] 3 3
#[11,] 4 3
#[12,] 4 3
#[13,] 4 3
#[14,] 5 4
#[15,] 5 4
Another solution
library(tidyvese)
x <- c(1,1,1,2,3,3,3,3,3,3,4,4,4,5,5)
df <- tibble(x = x)
df %>%
group_by(x) %>%
mutate(n = n()) %>%
ungroup %>%
transmute(x,
out = if_else((x != min(x, na.rm = T) & n < 5), x - 1, x))

Add a vector based on previous vectors

Here is the data that I have:
round<-rep(1:5,4)
players<-rep(1:2, c(10,10))
decs<-sample(1:3,20,replace=TRUE)
game<-rep(rep(1:2,c(5,5)),2)
gamematrix<-cbind(players,game,round,decs)
gamematrix
Here is the output:
players game round decs
[1,] 1 1 1 2
[2,] 1 1 2 2
[3,] 1 1 3 1
[4,] 1 1 4 2
[5,] 1 1 5 1
[6,] 1 2 1 1
[7,] 1 2 2 1
[8,] 1 2 3 2
[9,] 1 2 4 1
[10,] 1 2 5 3
[11,] 2 1 1 2
[12,] 2 1 2 1
[13,] 2 1 3 3
[14,] 2 1 4 3
[15,] 2 1 5 3
[16,] 2 2 1 3
[17,] 2 2 2 2
[18,] 2 2 3 1
[19,] 2 2 4 1
[20,] 2 2 5 2
Now, I would like to add another column: "Same Choice" which I want to be "1" if the same player in the same game makes the same decision in next round as in previous round. For example, for player 1, the output should be: c(0,1,0,0,0,0,1,0,0,0). Any ideas how can I do it?
Thanks!
Here is a data.table answer:
# set seed
set.seed(1234)
# load data
round<-rep(1:5,4)
players<-rep(1:2, c(10,10))
decs<-sample(1:3,20,replace=TRUE)
game<-rep(rep(1:2,c(5,5)),2)
gamematrix<-cbind(players,game,round,decs)
library(data.table)
dt <- data.table(gamematrix)
dt[, .(decs=decs, lag=c(0,head(decs,-1)),
sameDec=as.integer(decs==c(NA,head(decs,-1)))),
by=c("players","game")]
I included the lag term so that you can verify.
#Frank s suggestion to use shift is much cleaner (and probably faster):
dt[, .(decs=decs, lag=shift(decs, 1),
sameDec=as.integer(decs==shift(decs, 1))),
by=c("players","game")]
compared to my hand-coded lag.
Following code will work
library(dplyr)
gamematrix %>% as.data.frame %>% group_by(players, game) %>% mutate(new_col = ifelse(decs == lag(decs), 1, 0) )
gamematrix$new_col[is.na(gamematrix$new_col)]<- 0

Creating contingency table from matrix

I have a matrix like so:
country cLabel
[1,] 3 1
[2,] 6 2
[3,] 8 1
[4,] 5 2
[5,] 5 2
[6,] 8 2
[7,] 8 2
[8,] 8 2
[9,] 8 2
[10,] 4 2
[11,] 6 2
[12,] 3 2
[13,] 5 2
[14,] 5 1
country is a value of 1-8, and cLabel is a value of 1-2. How can I print the contingency table for this? I tried print(table(myMatrix)).
It is printing
1 2 3 4 5 6 7 8
60 277 31 32 83 39 24 44
and what I want is it to print each country value (1-8) and how many 1s and 2s there are for each of these 8 values.
I guess there is a duplicate somewhere.
# Turn your matrix into a data.frame, easier to manipulate and to create labels
myDataFrame <- as.data.frame(myMatrix)
# Add factors to coutry, from 1 to 8. This will add missing levels to the final result
myDataFrame$country <- factor(myDataFrame$country, 1:8)
# Table
table(myDataFrame)
# cLabel
# country 1 2
# 1 0 0
# 2 0 0
# 3 1 1
# 4 0 1
# 5 1 3
# 6 0 2
# 7 0 0
# 8 1 4

All possible combinations over groups

I have 5 groups: G1, G2,…,G5 with n1,n2,…,n5 elements in each group respectively. I select 2 elements from each of the 4 groups and 1 element from the 5th group. How do I generate all possible combinations in R?
(It is not specified in the question whether the groups are mutually exclusive or not; So, assume:
1. the groups are mutually exclusive
2. the subsets of groups (n1, n2, ...) will use the same elements in being filled)
3 just for the sake of argument |G1|=|G2|=|G3|=5 (The user can change the following code accordingly for differing numbers of elements in the groups)
The following is 3 set mock-up answer of the question that any user can generalize to arbitrary number of groups. So, assume group names are G1, G2, G3.
library(causfinder)
gctemplate(5,2,2) # Elements are coded as: 1,2,3,4,5; |sub-G1|=2; |sub-G2|=2; |sub-G3|=5-(2+2)=1
# In the following table, each number represents a unique element. (SOLUTION ENDED!)
My package (causfinder) is not in CRAN. Hence, I will give the function gctemplate's code below.
[,1] [,2] [,3] [,4] [,5]
[1,] 1 2 3 4 5 sub-G1={1,2} sub-G2={3,4} sub-G3={5}
[2,] 1 2 3 5 4
[3,] 1 2 4 5 3 sub-G1={1,2} sub-G2={4,5} sub-G3={3}
[4,] 1 3 2 4 5
[5,] 1 3 2 5 4
[6,] 1 3 4 5 2
[7,] 1 4 2 3 5
[8,] 1 4 2 5 3
[9,] 1 4 3 5 2
[10,] 1 5 2 3 4
[11,] 1 5 2 4 3
[12,] 1 5 3 4 2
[13,] 2 3 1 4 5
[14,] 2 3 1 5 4
[15,] 2 3 4 5 1
[16,] 2 4 1 3 5
[17,] 2 4 1 5 3
[18,] 2 4 3 5 1
[19,] 2 5 1 3 4
[20,] 2 5 1 4 3
[21,] 2 5 3 4 1
[22,] 3 4 1 2 5
[23,] 3 4 1 5 2
[24,] 3 4 2 5 1
[25,] 3 5 1 2 4
[26,] 3 5 1 4 2
[27,] 3 5 2 4 1
[28,] 4 5 1 2 3
[29,] 4 5 1 3 2
[30,] 4 5 2 3 1
The code of gctemplate:
gctemplate <- function(nvars, ncausers, ndependents){
independents <- combn(nvars, ncausers)
patinajnumber <- dim(combn(nvars - ncausers, ndependents))[[2]]
independentspatinajednumber <- dim(combn(nvars, ncausers))[[2]]*patinajnumber
dependents <- matrix(, nrow = dim(combn(nvars, ncausers))[[2]]*patinajnumber, ncol = ndependents)
for (i in as.integer(1:dim(combn(nvars, ncausers))[[2]])){
dependents[(patinajnumber*(i-1)+1):(patinajnumber*i),] <- t(combn(setdiff(seq(1:nvars), independents[,i]), ndependents))
}
independentspatinajed <- matrix(, nrow = dim(combn(nvars, ncausers))[[2]]*patinajnumber, ncol = ncausers)
for (i in as.integer(1:dim(combn(nvars, ncausers))[[2]])){
for (j in as.integer(1:patinajnumber)){
independentspatinajed[(i-1)*patinajnumber+j,] <- independents[,i]
}}
independentsdependents <- cbind(independentspatinajed, dependents)
others <- matrix(, nrow = dim(combn(nvars, ncausers))[[2]]*patinajnumber, ncol = nvars - ncausers - ndependents)
for (i in as.integer(1:((dim(combn(nvars, ncausers))[[2]])*patinajnumber))){
others[i, ] <- setdiff(seq(1:nvars), independentsdependents[i,])
}
causalitiestemplate <- cbind(independentsdependents, others)
causalitiestemplate
}
Now, the solution for G1,G2,G3 is the above. Just generalize the above code to 5-variable case with the very same logic!

Create dataframe of all array indices in R

Using R, I'm trying to construct a dataframe of the row and col numbers of a given matrix. E.g., if
a <- matrix(c(1:15), nrow=5, ncol=3)
then I'm looking to construct a dataframe that gives:
row col
1 1
1 2
1 3
. .
5 1
5 2
5 3
What I've tried:
row <- matrix(row(a), ncol=1, nrow=dim(a)[1]*dim(a)[2], byrow=T)
col <- matrix(col(a), ncol=1, nrow=dim(a)[1]*dim(a)[2], byrow=T)
out <- cbind(row, col)
colnames(out) <- c("row", "col")
results in:
row col
[1,] 1 1
[2,] 2 1
[3,] 3 1
[4,] 4 1
[5,] 5 1
[6,] 1 2
[7,] 2 2
[8,] 3 2
[9,] 4 2
[10,] 5 2
[11,] 1 3
[12,] 2 3
[13,] 3 3
[14,] 4 3
[15,] 5 3
Which isn't what I'm looking for, as the sequence of rows and cols in suddenly reversed, even tough I specified "byrow=T". I don't see if and where I'm making a mistake but would hugely appreciate suggestions to overcome this problem. Thanks in advance!
I'd use expand.grid on the vectors 1:ncol and 1:nrow, then flip the columns with [,2:1] to get them in the order you want:
> expand.grid(seq(ncol(a)),seq(nrow(a)))[,2:1]
Var2 Var1
1 1 1
2 1 2
3 1 3
4 2 1
5 2 2
6 2 3
7 3 1
8 3 2
9 3 3
10 4 1
11 4 2
12 4 3
13 5 1
14 5 2
15 5 3
Use row and col, but more directly manipulate their output ordering since they return corresponding indices in place for the input array. Use t to get the non-default order you want in the end:
data.frame(row = as.vector(t(row(a))), col = as.vector(t(col(a))))
row col
1 1 1
2 1 2
3 1 3
4 2 1
5 2 2
6 2 3
7 3 1
8 3 2
9 3 3
10 4 1
11 4 2
12 4 3
13 5 1
14 5 2
15 5 3
Or, as a matrix not a data.frame:
cbind(as.vector(t(row(a))), as.vector(t(col(a))))
[,1] [,2]
[1,] 1 1
[2,] 1 2
[3,] 1 3
[4,] 2 1
[5,] 2 2
[6,] 2 3
[7,] 3 1
[8,] 3 2
[9,] 3 3
[10,] 4 1
[11,] 4 2
[12,] 4 3
[13,] 5 1
[14,] 5 2
[15,] 5 3
You may want to have a look at ?expand.grid, which does just about exactly what you want to achieve.
Since there are many ways to skin a cat, I'll chip in with yet another variant based on rep:
data.frame(row=rep(seq(nrow(a)), each=ncol(a)), col=rep(seq(ncol(a)), nrow(a)))
...but to announce a "winner", I think you need to time the solutions:
# Make up a huge matrix...
a <- matrix(runif(1e7), 1e4)
system.time( a1<-data.frame(row = as.vector(t(row(a))),
col = as.vector(t(col(a)))) ) # 0.68 secs
system.time( a2<-expand.grid(col = seq(ncol(a)),
row = seq(nrow(a)))[,2:1] ) # 0.49 secs
system.time( a3<-data.frame(row=rep(seq(nrow(a)), each=ncol(a)),
col=rep(seq(ncol(a)), nrow(a))) ) # 0.59 secs
identical(a1, a2) && identical(a1, a3) # TRUE
...so it seems #Spacedman has the speediest solution!

Resources