zoo - Round coredata to integer - r

I've got a list of 69 zoo objects, I used na.approx to fill few gaps, but since my time series deal with counts I need the imputed values to be integers.
This code
list_int <- lapply(list_dec, round(coredata(list_dec), digits=0))
gives me the following error message
Error in round(coredata(list_dec), digits=0) :
non-numeric argument to mathematical function
I thought it was a problem with applying the function to a list instead of a vector, but the function
coredata(list_dec)
correctly shows all 69 time series (without need for lapply).
So why can't round apply to coredata?
EDITED
As suggested here's a minimal data set
A1 <- runif(20, min=-5, max=13)
A2 <- runif(20, min=-1, max=5)
A3 <- runif(20, min=-3, max=10)
A4 <- runif(20, min=0, max=2)
ls <- list(A1, A2, A3, A4)
list_dec <- lapply(ls, as.zoo)

As discussed in the comments, you can accomplish what you want by the following:
> library(zoo)
> A1 <- runif(20, min=-5, max=13)
> A2 <- runif(20, min=-1, max=5)
> A3 <- runif(20, min=-3, max=10)
> A4 <- runif(20, min=0, max=2)
> ls <- list(A1, A2, A3, A4)
> list_dec <- lapply(ls, as.zoo)
Now list_dec looks as follows:
> list_dec
[[1]]
1 2 3 4 5 6 7 8 9 10 11 12 13
9.20889929 8.03050882 1.52621137 9.91528049 12.71637959 11.93573340 3.34967427 9.75224030 7.90654714 0.08199464 -2.84403691 11.57990103 4.74868873
14 15 16 17 18 19 20
2.94023319 10.71812525 -2.05394366 -1.07669056 7.17503613 4.84871327 4.58929978
[[2]]
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
1.0756646 0.5615212 0.5697795 0.9629726 2.5962021 3.1932062 0.6894849 1.9844943 1.3351256 4.0043998 0.4756172 0.4573920 0.6009208 4.4963877 4.4149804
16 17 18 19 20
3.7762369 2.9670795 -0.8241576 2.1796402 2.5504061
[[3]]
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
1.5765136 1.7310402 0.7273943 4.0838831 -0.9946958 -2.0222258 7.5756159 3.9105252 3.9006369 -0.9939739 4.7603811 8.5079521 3.3653795 0.8546201 3.8143874
16 17 18 19 20
5.0847501 -2.6324485 2.0860695 5.7202315 9.5304238
[[4]]
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
1.36751418 1.44009472 1.41155170 1.55018689 1.31378442 1.09746739 0.09224919 0.66425731 0.61047787 1.63552109 1.56096710 1.59775494 1.69658733 1.08939868 1.96183397
16 17 18 19 20
1.20476936 0.94640977 0.73820689 0.65899943 1.54647028
Now you can directly call lapply like this:
lapply(list_dec,round)
which gives you the desired output:
[[1]]
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
9 8 2 10 13 12 3 10 8 0 -3 12 5 3 11 -2 -1 7 5 5
[[2]]
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
1 1 1 1 3 3 1 2 1 4 0 0 1 4 4 4 3 -1 2 3
[[3]]
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
2 2 1 4 -1 -2 8 4 4 -1 5 9 3 1 4 5 -3 2 6 10
[[4]]
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
1 1 1 2 1 1 0 1 1 2 2 2 2 1 2 1 1 1 1 2

Related

Adding a sequential value using a loop

I have a large dataframe which is effectively combined output from a nested list using do.call(rbind, nested_list)
The output has the same number of rows for each list element (e.g. 5 rows per list) and I need to add a column which has a unique numeric code for each list (or group). How can I write a loop to reproduce the group column I have included in the example below, e.g. the five rows have a group value == 1, rows 6 to 10 have a group value == 2, rows 11 to 15 have a group value == 3
df <- data.frame("ID" = 1:15)
df$Var_A <- c(1,3,5,7,9,11,13,15,17,19,21,23,25,27,29)
df$Var_B <- c(10,0,0,0,12,12,12,12,0,14,NA_real_,14,16,16,16)
df$Var_C <- c(10,12,14,16,10,12,14,16,10,12,14,16,10,12,14)
df$Var_D <- c(10,12,14,16,10,12,14,16,10,12,14,16,10,12,14)
df$New_A <- c(2,5,5,8,11,14,15,17,20,21,22,23,25,25,27)
df$New_B <- c(10,0,0,0,12,12,12,12,0,14,NA_real_,14,16,16,16)
df$New_C <- c(10,12,14,16,10,12,14,16,10,12,14,16,10,12,14)
df$New_D <- c(10,12,14,16,10,12,14,16,10,12,14,16,10,12,14)
df$Group <- c(1,1,1,1,1,2,2,2,2,2,3,3,3,3,3)
ID Var_A Var_B Var_C Var_D New_A New_B New_C New_D Group
1 1 1 10 10 10 2 10 10 10 1
2 2 3 0 12 12 5 0 12 12 1
3 3 5 0 14 14 5 0 14 14 1
4 4 7 0 16 16 8 0 16 16 1
5 5 9 12 10 10 11 12 10 10 1
6 6 11 12 12 12 14 12 12 12 2
7 7 13 12 14 14 15 12 14 14 2
8 8 15 12 16 16 17 12 16 16 2
9 9 17 0 10 10 20 0 10 10 2
10 10 19 14 12 12 21 14 12 12 2
11 11 21 NA 14 14 22 NA 14 14 3
12 12 23 14 16 16 23 14 16 16 3
13 13 25 16 10 10 25 16 10 10 3
14 14 27 16 12 12 25 16 12 12 3
15 15 29 16 14 14 27 16 14 14 3
You can use the ceiling function:
df <- data.frame("ID" = 1:15)
df$Var_A <- c(1,3,5,7,9,11,13,15,17,19,21,23,25,27,29)
df$Var_B <- c(10,0,0,0,12,12,12,12,0,14,NA_real_,14,16,16,16)
df$Var_C <- c(10,12,14,16,10,12,14,16,10,12,14,16,10,12,14)
df$Var_D <- c(10,12,14,16,10,12,14,16,10,12,14,16,10,12,14)
df$New_A <- c(2,5,5,8,11,14,15,17,20,21,22,23,25,25,27)
df$New_B <- c(10,0,0,0,12,12,12,12,0,14,NA_real_,14,16,16,16)
df$New_C <- c(10,12,14,16,10,12,14,16,10,12,14,16,10,12,14)
df$New_D <- c(10,12,14,16,10,12,14,16,10,12,14,16,10,12,14)
df$Group <- ceiling(as.numeric(df$ID)/5)
df
# ID Var_A Var_B Var_C Var_D New_A New_B New_C New_D Group
# 1 1 1 10 10 10 2 10 10 10 1
# 2 2 3 0 12 12 5 0 12 12 1
# 3 3 5 0 14 14 5 0 14 14 1
# 4 4 7 0 16 16 8 0 16 16 1
# 5 5 9 12 10 10 11 12 10 10 1
# 6 6 11 12 12 12 14 12 12 12 2
# 7 7 13 12 14 14 15 12 14 14 2
# 8 8 15 12 16 16 17 12 16 16 2
# 9 9 17 0 10 10 20 0 10 10 2
# 10 10 19 14 12 12 21 14 12 12 2
# 11 11 21 NA 14 14 22 NA 14 14 3
# 12 12 23 14 16 16 23 14 16 16 3
# 13 13 25 16 10 10 25 16 10 10 3
# 14 14 27 16 12 12 25 16 12 12 3
# 15 15 29 16 14 14 27 16 14 14 3
Without adding an ID or rownums we can do this using nrow and knowledge of the group length.
group_len <- 5
groups <- nrow(df)/group_len
df$group <- rep(1:groups, each = group_len)
# Example:
# rep(1:3, each = 5)
# 1 1 1 1 1 2 2 2 2 2 3 3 3 3 3
I'd use dplyr::mutate with dplyr::row_number:
library(dplyr)
df %>%
mutate(Group=ceiling(row_number() / 5))
Output:
ID Var_A Var_B Var_C Var_D New_A New_B New_C New_D Group
1 1 1 10 10 10 2 10 10 10 1
2 2 3 0 12 12 5 0 12 12 1
3 3 5 0 14 14 5 0 14 14 1
4 4 7 0 16 16 8 0 16 16 1
5 5 9 12 10 10 11 12 10 10 1
6 6 11 12 12 12 14 12 12 12 2
7 7 13 12 14 14 15 12 14 14 2
8 8 15 12 16 16 17 12 16 16 2
9 9 17 0 10 10 20 0 10 10 2
10 10 19 14 12 12 21 14 12 12 2
11 11 21 NA 14 14 22 NA 14 14 3
12 12 23 14 16 16 23 14 16 16 3
13 13 25 16 10 10 25 16 10 10 3
14 14 27 16 12 12 25 16 12 12 3
15 15 29 16 14 14 27 16 14 14 3
An option would be to combine cumsum with rep.
cumsum(rep_len(c(TRUE, rep(FALSE, 4)), nrow(df)))
#cumsum(rep_len(c(TRUE, FALSE, FALSE, FALSE, FALSE), nrow(df))) #Alternative
# [1] 1 1 1 1 1 2 2 2 2 2 3 3 3 3 3
Or making use of auto repeat.
df$Group <- c(TRUE, rep(FALSE, 4))
df$Group <- cumsum(df$Group)
df$Group
# [1] 1 1 1 1 1 2 2 2 2 2 3 3 3 3 3
Or create a sequence with length of nrow and make an integer division %/%.
0:(nrow(df)-1) %/% 5
#seq(0, nrow(df)-1) %/% 5 #Alternative
#(seq_len(nrow(df))-1) %/% 5 #Alternative
# [1] 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2
Or using rep:
rep(1:ceiling(nrow(df)/5), each=5, length.out=nrow(df))
# [1] 1 1 1 1 1 2 2 2 2 2 3 3 3 3 3
You could use the cut function with labels = FALSE to return an integer to use for the group.
n_per_group <- 5
df$group <- cut(x = df$ID, breaks = nrow(df) / n_per_group, labels = FALSE)
df$group
#[1] 1 1 1 1 1 2 2 2 2 2 3 3 3 3 3

How to find all pairs of two lists, and categorize them without repetitions?

We are preparing for a program where 18 people should discuss topics in a way that in each round they form pairs, and then they switch until everyone has talked to everyone. It means 153 discussions, 9 pairs talking parallelly in each round, for 17 rounds. I tried to formulate a matrix showing who should talk to whom in order to avoid the chaos, but could not succeed. For the sake of simplicity everyone is given a number, so the bottom line is, i would need all pairs of combinations of the numbers from 1 to 18 (did that with combn function), but then these pairs should be rearranged for the 17 round so that each number only appears once per round. Any ideas?
Let's first look at a simpler problem with 6 persons. The following matrix lists who (rows) is talking to whom (columns) in which round (entry):
So for example in round 1 (yellow) we have the following pairs:
(1-2), (3-5), (4-6)
For round 2 (green) we would have:
(1-3), (2-6), (4-5)
and so on.
Thus, basically we are looking for a symmetric latin square (i.e. in each row and in each column each entry appears only once, cf. Latin Squares on Wikipedia).
The latin square in the box can be easily generated via an addition table:
inner_ls <- function(k) {
res <- outer(0:(k-1), 0:(k-1), function(i, j) (i + j) %% k)
## replace zeros by k
res[res == 0] <- k
## replace diagonal by NA
diag(res) <- NA
res
}
inner_ls(5)
# [,1] [,2] [,3] [,4] [,5]
# [1,] NA 1 2 3 4
# [2,] 1 NA 3 4 5
# [3,] 2 3 NA 5 1
# [4,] 3 4 5 NA 2
# [5,] 4 5 1 2 NA
So all is left to append the last row (column) with the missing round number:
full_ls <- function(k) {
i_ls <- inner_ls(k - 1)
last_row <- apply(i_ls, 1, function(row) {
rounds <- 1:(k - 1)
rounds[!rounds %in% row]
})
res <- cbind(rbind(i_ls, last_row), c(last_row, NA))
rownames(res) <- colnames(res) <- 1:k
res
}
full_ls(6)
# 1 2 3 4 5 6
# 1 NA 1 2 3 4 5
# 2 1 NA 3 4 5 2
# 3 2 3 NA 5 1 4
# 4 3 4 5 NA 2 1
# 5 4 5 1 2 NA 3
# 6 5 2 4 1 3 NA
With that you get your assignment matrix as follows:
full_ls(18)
# 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
# 1 NA 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
# 2 1 NA 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 2
# 3 2 3 NA 5 6 7 8 9 10 11 12 13 14 15 16 17 1 4
# 4 3 4 5 NA 7 8 9 10 11 12 13 14 15 16 17 1 2 6
# 5 4 5 6 7 NA 9 10 11 12 13 14 15 16 17 1 2 3 8
# 6 5 6 7 8 9 NA 11 12 13 14 15 16 17 1 2 3 4 10
# 7 6 7 8 9 10 11 NA 13 14 15 16 17 1 2 3 4 5 12
# 8 7 8 9 10 11 12 13 NA 15 16 17 1 2 3 4 5 6 14
# 9 8 9 10 11 12 13 14 15 NA 17 1 2 3 4 5 6 7 16
# 10 9 10 11 12 13 14 15 16 17 NA 2 3 4 5 6 7 8 1
# 11 10 11 12 13 14 15 16 17 1 2 NA 4 5 6 7 8 9 3
# 12 11 12 13 14 15 16 17 1 2 3 4 NA 6 7 8 9 10 5
# 13 12 13 14 15 16 17 1 2 3 4 5 6 NA 8 9 10 11 7
# 14 13 14 15 16 17 1 2 3 4 5 6 7 8 NA 10 11 12 9
# 15 14 15 16 17 1 2 3 4 5 6 7 8 9 10 NA 12 13 11
# 16 15 16 17 1 2 3 4 5 6 7 8 9 10 11 12 NA 14 13
# 17 16 17 1 2 3 4 5 6 7 8 9 10 11 12 13 14 NA 15
# 18 17 2 4 6 8 10 12 14 16 1 3 5 7 9 11 13 15 NA

How to merge two data frames by ranges in R?

Suppose I have two data frames such like:
set.seed(123)
df0<-data.frame(pos=3:12,
count0=rbinom(10, 50, 0.5),
count2=rbinom(10, 20, 0.5))
df0
pos count0 count2
1 3 23 14
2 4 28 10
3 5 24 11
4 6 29 10
5 7 30 7
6 8 19 13
7 9 25 8
8 10 29 6
9 11 25 9
10 12 25 14
df1<-data.frame(start=c(4, 7, 11, 14),
end=c(6, 9, 12, 15),
cnv=c(1, 2, 3, 4))
df1
start end cnv
1 4 6 1
2 7 9 2
3 11 12 3
4 14 15 4
What I want is to merge df0 and df1 using the df0$pos with the ranges ofdf1$start and df1$end. If the pos falls into the range of start:end, fills in the cnv from df1 otherwise set cnv as zeros. An output from the above example would be:
pos count0 count2 cnv
1 3 23 14 0
2 4 28 10 1
3 5 24 11 1
4 6 29 10 1
5 7 30 7 2
6 8 19 13 2
7 9 25 8 2
8 10 29 6 0
9 11 25 9 3
10 12 25 14 3
We can use sapply to find if there is an index which is present in range else return 0.
df0$cnv <- sapply(df0$pos, function(x) {
inds <- x >= df1$start & x <= df1$end
if (any(inds))
df1$cnv[inds]
else 0
})
df0
# pos count0 count2 cnv
#1 3 23 14 0
#2 4 28 10 1
#3 5 24 11 1
#4 6 29 10 1
#5 7 30 7 2
#6 8 19 13 2
#7 9 25 8 2
#8 10 29 6 0
#9 11 25 9 3
#10 12 25 14 3

How to generate new variables based on the name of the variables in the data frame

For example, I have a toy dataset as the one I created below,
a1<-1:10
a2<-11:20
v<-c(1,2,1,NA,2,1,2,1,2,1)
data<-data.frame(a1,a2,v,stringsAsFactors = F)
Then I want to create a new variable y which will be assigned the value a1 or a2 or NA based on the value of variable v. Therefore, the 'y'
should equals to 1 12 3 NA 15 6 17 8 19 10.
I want to generate it with the command similar to the ones I list below, It doesn't work, I guess it's because of the vectorization issue, then how can I fix it?
In reality, I have several as, say 10 and the actual values are characters instead of numeric ones.
data$y[!is.na(data$v)]<-data[,paste0('a',data$v)]
or
data%>%
mutate(y=ifelse(!is.na(v),get(paste0('a',v)),NA))
You could use standard indexing with cbind for that:
dat$y <- dat[cbind(1:nrow(dat), dat$v)]
The result:
> dat
a1 a2 v y
1 1 11 1 1
2 2 12 2 12
3 3 13 1 3
4 4 14 NA NA
5 5 15 2 15
6 6 16 1 6
7 7 17 2 17
8 8 18 1 8
9 9 19 2 19
10 10 20 1 10
(I used dat instead of data, because it is not wise to call a dataframe the same as a function; see ?data)
Only idea that comes to my mind:
data%>%
mutate(y=ifelse(!is.na(v),paste0('a',v),NA)) %>%
mutate(z=ifelse(!is.na(y),(ifelse(y=="a1",get("a1"),get("a2"))),NA))
a1 a2 v y z
1 1 11 1 a1 1
2 2 12 2 a2 12
3 3 13 1 a1 3
4 4 14 NA <NA> NA
5 5 15 2 a2 15
6 6 16 1 a1 6
7 7 17 2 a2 17
8 8 18 1 a1 8
9 9 19 2 a2 19
10 10 20 1 a1 10
or more directly:
data%>%
mutate(y=ifelse(!is.na(v),(ifelse(v==1, get("a1"),get("a2"))),NA))
a1 a2 v y
1 1 11 1 1
2 2 12 2 12
3 3 13 1 3
4 4 14 NA NA
5 5 15 2 15
6 6 16 1 6
7 7 17 2 17
8 8 18 1 8
9 9 19 2 19
10 10 20 1 10
still based on ifelse :(
You need to use a matrix accessor:
# Get the indices of missing values
ind <- which(!is.na(data$v))
# Transform colnames to indices
tab <- structure(match(c("a1", "a2"), names(data)), .Names = c("a1", "a2"))
# Access data with a matrix accessor
data$y[ind] <- data[cbind(ind, tab[paste0('a', data$v[ind])])]

R - Index position with condition

I've a data frame like this
w<-c(0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0)
i would like an index position starting after value 1.
output : NA,NA,NA,NA,NA,1,2,3,4,5,6,7,1,2,3,4,5,1,2,3,4,5,6,7,8,9
ideally applicable to a data frame.
Thanks
edit : w is a data frame,
roughly this function
m<-as.data.frame(w)
m[m!=1] <- row(m)[m!=1]
m
w
1 1
2 2
3 3
4 4
5 5
6 1
7 7
8 8
9 9
10 10
11 11
12 12
13 1
14 14
15 15
16 16
17 17
18 1
19 19
20 20
21 21
22 22
23 23
24 24
25 25
26 26
but with a return to 1 when value 1 is matching.
> m
w wanted
1 1 NA
2 2 NA
3 3 NA
4 4 NA
5 5 NA
6 1 1
7 7 2
8 8 3
9 9 4
10 10 5
11 11 6
12 12 7
13 1 1
14 14 2
15 15 3
16 16 4
17 17 5
18 1 1
19 19 2
20 20 3
21 21 4
22 22 5
23 23 6
24 24 7
25 25 8
26 26 9
Thanks
This assumes that the data is ordered in the way shown in example.
m$wanted <- with(m, ave(w, cumsum(c(TRUE,diff(w) <0)), FUN=seq_along))
m$wanted
#[1] 1 2 3 4 5 1 2 3 4 5 6 7 1 2 3 4 5 1 2 3 4 5 6 7 8 9
For the given data including repeated 1's and non-sequential input, the following works:
m[9,1] <- 100
m[3,1] <- 55
m[14,1] <- 60
m[14,1] <- 60
m[25,1] <- 1
m[19,1] <- 1
m$result <- 1:nrow(m) - which(m$w == 1)[cumsum(m$w == 1)] + 1
But if the data does not start on 1:
m[1,1] <- 2
Then this works:
firstone <- which(m$w == 1)[1]
subindex <- m[firstone:nrow(m),'w'] == 1
m$result <- c(rep(NA,firstone-1),1:length(subindex) - which(subindex)[cumsum(subindex)] + 1)

Resources