Normalize blocks/sub-matrices within a matrix - r

I want to normalize (i.e., 0-1) blocks/sub-matrices within a square matrix based on row/col names. It is important that the normalized matrix correspond to the original matrix. The below code extracts the blocks, e.g. all col/row names == "A" and normalizes it by its max value. How do I put that matrix of normalized blocks back together so it corresponds to the original matrix, such that each single value of the normalized blocks are in the same place as in the original matrix. I.e. you cannot put the blocks together and then e.g. sort the normalized matrix by the original's matrix row/col names.
#dummy code
mat <- matrix(round(runif(90, 0, 50),),9,9)
rownames(mat) <- rep(LETTERS[1:3],3)
colnames(mat) <- rep(LETTERS[1:3],3)
mat.n <- matrix(0,nrow(mat),ncol(mat), dimnames = list(rownames(mat),colnames(mat)))
for(i in 1:length(LETTERS[1:3])){
? <- mat[rownames(mat)==LETTERS[1:3][i],colnames(mat)==LETTERS[1:3][i]] / max(mat[rownames(mat)==LETTERS[1:3][i],colnames(mat)==LETTERS[1:3][i]])
#For example,
mat.n[rownames(mat)==LETTERS[1:3][i],colnames(mat)==LETTERS[1:3][i]] <- # doesn't work
}
UPDATE
Using ave() as #G. Grothendieck suggested works for the blocks, but I'm not sure how it's normalizing beyond that.
mat.n <- mat / ave(mat, rownames(mat)[row(mat)], colnames(mat)[col(mat)], FUN = max)
Within block the normalization works, e.g.
mat[rownames(mat)=="A",colnames(mat)=="A"]
A A A
A 13 18 15
A 38 33 41
A 12 18 47
mat.n[rownames(mat.n)=="A",colnames(mat.n)=="A"]
A A A
A 0.2765957 0.3829787 0.3191489
A 0.8085106 0.7021277 0.8723404
A 0.2553191 0.3829787 1.0000000
But beyond that, it looks weird.
> round(mat.n,1)
A B C A B C A B C
A 0.3 0.2 0.1 0.4 0.2 1.0 0.3 0.9 1.0
B 0.9 0.8 0.9 0.4 0.5 0.4 0.4 0.9 0.0
C 0.0 0.4 0.4 0.0 0.8 0.5 0.4 0.9 0.0
A 0.8 0.9 0.5 0.7 0.9 0.6 0.9 0.4 0.4
B 0.1 0.8 0.7 1.0 0.3 0.5 0.1 1.0 0.8
C 0.4 0.0 0.2 0.2 0.2 0.6 1.0 0.4 1.0
A 0.3 0.4 0.3 0.4 0.6 0.8 1.0 1.0 0.3
B 0.6 0.2 0.5 0.9 0.3 0.2 0.9 0.3 1.0
C 0.5 0.9 0.7 1.0 0.4 0.5 1.0 1.0 0.9
In this case, I would expect 3 1s across the whole matrix- 1 for each block. But there're 10 1s, e.g. mat.n[3,2], mat.n[1,9]. I'm not sure how this function normalized between blocks.
UPDATE 2
#Original matrix.
#Suggested solution produces `NaN`
mat <- as.matrix(read.csv(text=",1.21,1.1,2.2,1.1,1.1,1.21,2.2,2.2,1.21,1.22,1.22,1.1,1.1,2.2,2.1,2.2,2.1,2.2,2.2,2.2,1.21,2.1,2.1,1.21,1.21,1.21,1.21,1.21,2.2,1.21,2.2,1.1,1.22,1.22,1.22,1.22,1.21,1.22,2.1,2.1,2.1,1.22
1.21,0,0,0,0,0,0,0,0,292,13,0,0,0,0,0,0,0,0,0,0,22,0,0,94,19,79,0,9,0,126,0,0,0,0,0,0,0,0,0,0,0,0
1.1,0,0,0,155,166,0,0,0,0,0,0,4,76,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,34,0,0,0,0,0,0,0,0,0,0
2.2,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1.1,0,201,0,0,79,0,0,0,0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1.1,0,33,0,91,0,0,0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1.21,8,0,0,0,0,0,0,0,404,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,26,18,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2.2,0,0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,162,79,1,0,0,0,0,0,0,0,0,10,0,27,0,0,0,0,0,0,0,0,0,0,0
2.2,0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,33,17,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0
1.21,207,0,0,0,0,1644,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,16,17,402,0,0,0,606,0,0,0,0,0,0,0,0,0,0,0,0
1.22,13,0,0,0,0,0,0,0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,26,0,0,15,0,0,0,0,0
1.22,0,0,0,0,0,0,0,0,0,71,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,374,6,121,6,21,0,0,0,0
1.1,0,0,0,44,0,0,0,0,0,0,0,0,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,33,0,0,0,0,0,0,0,0,0,0
1.1,0,0,0,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0
2.2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2.1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,18,0,0,0,0,353,116,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,29,0,5,0
2.2,0,0,0,0,0,0,0,37,0,0,0,0,0,4,0,0,0,36,46,62,0,0,0,0,0,0,0,0,0,0,73,0,0,0,0,0,0,1,0,0,0,0
2.1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,61,0,0,0,0,0,0,0,38,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
2.2,17,0,23,0,0,0,444,65,0,0,0,0,0,0,0,78,0,0,42,30,15,0,0,0,0,0,0,0,4,0,18,0,0,0,0,0,0,0,0,0,0,0
2.2,0,0,0,0,0,0,75,8,0,0,0,0,0,0,0,87,0,74,0,85,0,0,0,0,0,0,0,0,1,0,19,0,25,0,0,0,0,0,0,0,0,0
2.2,0,0,13,0,0,0,12,20,0,0,0,0,0,0,0,118,0,29,92,0,25,0,0,0,0,0,0,0,0,0,16,0,48,0,0,0,0,0,0,0,0,0
1.21,14,0,1,0,0,0,0,0,17,0,0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,0,0,3,0,20,0,0,0,0,0,0,0,0,0,0,0
2.1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,204,0,0,0,0,0,0,0,133,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,44,0,0
2.1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,67,0,0,0,0,0,0,143,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,12,15,0
1.21,79,0,0,0,0,0,0,0,34,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,38,26,6,9,0,112,0,0,0,0,0,0,0,0,0,0,0,0
1.21,11,0,0,0,0,17,0,0,49,0,0,0,0,0,0,0,0,0,0,0,0,0,0,28,0,0,0,32,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1.21,40,0,0,0,0,0,0,0,122,0,0,0,0,0,0,0,0,0,0,0,3,0,0,24,11,0,887,20,0,389,0,0,0,0,0,0,0,0,0,0,0,0
1.21,14,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,50,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1.21,34,0,0,0,0,26,0,0,56,0,0,0,0,0,0,0,0,0,0,0,0,0,0,54,9,297,13,0,0,16,0,0,0,0,0,0,0,0,0,0,0,0
2.2,0,0,0,0,0,0,39,0,0,0,0,0,0,0,0,25,0,17,12,20,25,0,0,0,0,0,0,0,0,0,393,0,7,0,0,0,0,0,0,0,0,0
1.21,177,0,0,0,0,8,0,0,775,0,0,0,0,0,0,0,0,0,0,0,0,0,0,113,0,227,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2.2,0,0,0,0,0,0,21,17,0,0,0,0,0,0,0,0,0,42,30,16,0,0,0,0,0,0,0,0,165,0,0,0,0,0,0,0,0,0,0,0,0,0
1.1,0,6,0,28,0,0,0,0,0,0,0,9,30,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1.22,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,4,37,0,0,0,0,0,0,0,0,3,0,0,0,0,14,7,0,0,18,0,0,0,0
1.22,0,0,0,0,0,0,0,0,0,44,785,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,21,0,44,177,13,24,0,0,0,0
1.22,0,0,0,0,0,0,30,0,0,182,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,12,0,1231,135,17,0,0,0,0
1.22,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,73,1308,0,669,16,0,0,0,8
1.21,0,0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13,33,197,626,0,44,0,0,0,0
1.22,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24,37,12,80,0,0,0,0,16
2.1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24,54,0
2.1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,0,27,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,75,0,0,0
2.1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,58,0,1,0,0,0,0,28,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,61,2,0,0
1.22,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,31,9,0,0,0,0"))
ids <- read.csv(text=",x
1,1.21
2,1.1
3,2.2
4,1.1
5,1.1
6,1.21
7,2.2
8,2.2
9,1.21
10,1.22
11,1.22
12,1.1
13,1.1
14,2.2
15,2.1
16,2.2
17,2.1
18,2.2
19,2.2
20,2.2
21,1.21
22,2.1
23,2.1
24,1.21
25,1.21
26,1.21
27,1.21
28,1.21
29,2.2
30,1.21
31,2.2
32,1.1
33,1.22
34,1.22
35,1.22
36,1.22
37,1.21
38,1.22
39,2.1
40,2.1
41,2.1
42,1.22")
mat <- mat[,-1]
rownames(mat) <- ids$x
colnames(mat) <- ids$x
ans <- mat / ave(mat, rownames(mat)[row(mat)], colnames(mat)[col(mat)], FUN = max)
Any help is much appreciated, thanks.

Use ave to get the maxima:
mat / ave(mat, rownames(mat)[row(mat)], colnames(mat)[col(mat)], FUN = max)
For example, there are 9 ones, as expected, and there is one 1 in each block also as expected. (There could be more than 9 if the matrix happened to have multiple maxima in one or more blocks but there shoud not be less than 9.)
set.seed(123)
mat <- matrix(round(runif(90, 0, 50),),9,9)
rownames(mat) <- rep(LETTERS[1:3],3)
colnames(mat) <- rep(LETTERS[1:3],3)
ans <- mat / ave(mat, rownames(mat)[row(mat)], colnames(mat)[col(mat)], FUN = max)
sum(ans == 1)
## [1] 9
# there are no duplicates (i.e. a block showing up more than once) hence
# there is exactly one 1 in each block
w <- which(ans == 1, arr = TRUE)
anyDuplicated(cbind(rownames(mat)[w[, 1]], colnames(mat)[w[, 2]]))
## [1] 0
ADDED
If some blocks are entirely zero (which is the case in UPDATE 2) then you will get NaNs for those blocks. If you want 0s instead for the all-zero blocks try this:
xmax <- function(x) if (all(x == 0)) 0 else x/max(x)
ave(mat, rownames(mat)[row(mat)], colnames(mat)[col(mat)], FUN = xmax)

Related

How to collect outputs of multivariable vector-valued function into a dataframe?

I have a function f1 that take a pair of real numbers (x, y) and returns a triple of real numbers. I would like to collect all outputs of this function for all x in a vector a and y in a vector b. Could you please elaborate on how to do so?
f1 <- function(x, y){
return (c(x+y, x-y, x*y))
}
a <- seq(0, pi, 0.1)
b <- seq(0, 2 * pi, 0.1)
Update: I mean for all pair $(x, y) \in a \times b$.
Here is a data.table option
setDT(expand.grid(a, b))[, fval := do.call(Vectorize(f1, SIMPLIFY = FALSE), unname(.SD))][]
where expand.grid + do.call + Vectorize are used, giving
Var1 Var2 fval
1: 0.0 0.0 0,0,0
2: 0.1 0.0 0.1,0.1,0.0
3: 0.2 0.0 0.2,0.2,0.0
4: 0.3 0.0 0.3,0.3,0.0
5: 0.4 0.0 0.4,0.4,0.0
---
2012: 2.7 6.2 8.90,-3.50,16.74
2013: 2.8 6.2 9.00,-3.40,17.36
2014: 2.9 6.2 9.10,-3.30,17.98
2015: 3.0 6.2 9.2,-3.2,18.6
2016: 3.1 6.2 9.30,-3.10,19.22
A more compact one is using CJ(a,b) instead of setDT(expand.grid(a, b)) (Thank #akrun's advise)
We can use expand.grid to expand the data between 'a', and 'b' values, then loop over the row with apply, MARGIN = 1 and apply the f1
out <- as.data.frame(t(apply(expand.grid(a, b), 1, function(x) f1(x[1], x[2]))))
Or with tidyverse
library(dplyr)
library(purrr)
library(tidyr)
out2 <- crossing(x = a, y = b) %>%
pmap_dfr(f2)
-output
head(out2)
# A tibble: 6 x 3
# add subtract multiply
# <dbl> <dbl> <dbl>
#1 0 0 0
#2 0.1 -0.1 0
#3 0.2 -0.2 0
#4 0.3 -0.3 0
#5 0.4 -0.4 0
#6 0.5 -0.5 0
where f2
f2 <- function(x, y){
return (tibble(add = x+y, subtract = x-y, multiply = x*y))
}
It may be better to return a list or tibble so that it becomes easier
Create all possible combinations with expand.grid and use Map to apply f1 to every pair.
val <- expand.grid(a, b)
result <- do.call(rbind, Map(f1, val$Var1, val$Var2))
head(result)
# [,1] [,2] [,3]
#[1,] 0.0 0.0 0
#[2,] 0.1 0.1 0
#[3,] 0.2 0.2 0
#[4,] 0.3 0.3 0
#[5,] 0.4 0.4 0
#[6,] 0.5 0.5 0

Looping Through R data to replace all values

Basic level R programmer trying to re-calibrate data using a weighted effect and some other value. In particular I want to 1) if the weighted effect is negative take the row value of X and subtract the person's value or 2) if the weighted effect is positive take the person's value and subtract X.
Mock data:
p1 <- c(0.4,0.7,0.3,0.2)
p2 <- c(0.8,0.4,0.5,0.1)
p3 <- c(0.6,0.5,0.4,0.3)
wef <- c(1.5,-1.2,1.8,-1.3)
x <- c(0.5,0.4,0.6,0.2)
print(df)
p1 p2 p3 wef x
1 0.4 0.8 0.6 1.5 0.5
2 0.7 0.4 0.5 -1.2 0.4
3 0.3 0.5 0.4 1.8 0.6
4 0.2 0.1 0.3 -1.3 0.2
I attempted this (which did nothing and likely would be inefficient with for loops):
for(row in 1:nrow(df)) {
for(col in 1:ncol(df)) {
ifelse(weightef[row] < 0, df[row,col]==(df$x[row]-df[row,col]),
df[row,col]==df[row,col]-df$x[row])
}
}
my desired output in case the above was to hard to follow is this
person1 person2 person3 weightef x
1 -0.1 0.3 0.1 1.5 0.5
2 -0.3 0.0 -0.1 -1.2 0.4
3 -0.3 -0.1 -0.2 1.8 0.6
4 0.0 0.1 -0.1 -1.3 0.2
You can using apply and ifelse function in R. This is just one line function, and you are not required to understand grep. The second line of code just put everything into data frame.
result <- apply(df[, 1:3], 2, FUN = function(y) with(df, ifelse(wef < 0, x - y, y - x)))
df <- as.data.frame(cbind(result, wef, x))
p1 p2 p3 wef x
1 -0.1 0.3 0.1 1.5 0.5
2 -0.3 0.0 -0.1 -1.2 0.4
3 -0.3 -0.1 -0.2 1.8 0.6
4 0.0 0.1 -0.1 -1.3 0.2
We can do this without a loop in R
nm1 <- grep("^p\\d+", names(df), value = TRUE)
i1 <- df$wef > 0
df[i1, nm1] <- df[i1, nm1] - df$x[i1]
df[!i1, nm1] <- df$x - df[!i1, nm1]
data
df <- data.frame(p1, p2, p3, wef, x)
If you want to use for loops you can do it in this way
#Create dataframe
df = data.frame(p1, p2, p3, wef, x)
#looping lenght of vector wef
for (w in 1:length(df$wef))
{
#Checking positive or negative weight
if (df$wef[w] >= 0)
{
#subtracting
df$p1[w] = df$p1[w] - df$x[w]
df$p2[w] = df$p2[w] - df$x[w]
df$p3[w] = df$p3[w] - df$x[w]
}
else
{
#subtracting
df$p1[w] = df$x[w] - df$p1[w]
df$p2[w] = df$x[w] - df$p2[w]
df$p3[w] = df$x[w] - df$p3[w]
}
}
#print result
print(df)
p1 p2 p3 wef x
1 -0.1 0.3 0.1 1.5 0.5
2 -0.3 0.0 -0.1 -1.2 0.4
3 -0.3 -0.1 -0.2 1.8 0.6
4 0.0 0.1 -0.1 -1.3 0.2

Round sequence of numbers to chosen numbers

I got a vector of numbers from 0 to 1. I'd like to divide them to X amount of groups - for example if X=5, then round the numbers to 5 groups: all numbers from 0 to 0.2 will be 0, all from 0.2 to 0.4 will be 0.2, etc.
For example, if I have x <- c(0.34,0.07,0.56) and X=5 like the above explanation, I'll get (0.2, 0, 0.4).
So far, the only way I found to that is by looping over the entire vector. Is there a more elegant way to do that?
You can simply do:
floor(x*X)/X
# [1] 0.2 0.0 0.4
More testing cases:
X = 10
floor(x*X)/X
# [1] 0.3 0.0 0.5
X = 2
floor(x*X)/X
# [1] 0.0 0.0 0.5
X = 5
floor(x*X)/X
# [1] 0.2 0.0 0.4
Data:
x <- c(0.34,0.07,0.56)
Try:
cut.alt <- function(x, X) {
out <- cut(x, breaks=(1:X-1)/X)
levels(out) <- as.character((1:X-1)/X)
out
}
cut with breaks set to (1:X-1)/X divides the vector x into groups like OP asks. Then changing the levels to the value of the cutoff gives the answer.
Or using plyr:
library(plyr)
round_any(x, 1/X,floor)
# [1] 0.2 0.0 0.4

Trouble transforming a data set in R; making a look up table

R (programming language)
I would like to transform my data set that has sample numbers, treatment days and concentrations (variable); to set it up as a single matix where the cells are filed with only concentration values. My output is a lookup table, where the user can look up a sample number along the 1st row and a day along the first column (header), and follow these along to get a concentration.
This is not my data set (it comes as a matrix), however I quickly made these three for the example.
Samplenb - < c(1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4)
Day <- c(1,5,10,15,1,5,10,15,1,5,10,15,1,5,10,15)
Concentration <- c(0.2, 0.3, 0.5, 0.9,0.2, 0.3, 0.5, 0.9,0.2, 0.3, 0.5, 0.9,0.2, 0.3, 0.5, 0.9)
Any help it much appreciated. I have been playing around with the reshape package functions. However, they do not seem suitable.
Thank you for taking the time to help me!
For variety (and since you mentioned "reshape"), here are a few options (though MrFlick's is by far the most appropriate).
The first two options assume we have grouped your vectors into a data.frame:
DF <- data.frame(Samplenb, Day, Concentration)
Option 1: reshape
reshape(DF, direction = "wide", idvar = "Day", timevar = "Samplenb")
# Day Concentration.1 Concentration.2 Concentration.3 Concentration.4
# 1 1 0.2 0.2 0.2 0.2
# 2 5 0.3 0.3 0.3 0.3
# 3 10 0.5 0.5 0.5 0.5
# 4 15 0.9 0.9 0.9 0.9
Option 2: dcast from "reshape2"
library(reshape2)
dcast(DF, Day ~ Samplenb, value.var="Concentration")
# Day 1 2 3 4
# 1 1 0.2 0.2 0.2 0.2
# 2 5 0.3 0.3 0.3 0.3
# 3 10 0.5 0.5 0.5 0.5
# 4 15 0.9 0.9 0.9 0.9
Option 3: A manual approach--should be fast, but unless you're a coding masochist, best left as a lesson in matrix indexing in R.
Nrow <- unique(Day)
Ncol <- unique(Samplenb)
M <- matrix(0, nrow = length(Nrow), ncol = length(Ncol),
dimnames = list(Nrow, Ncol))
M[cbind(match(Day, rownames(M)), match(Samplenb, colnames(M)))] <- Concentration
# 1 2 3 4
# 1 0.2 0.2 0.2 0.2
# 5 0.3 0.3 0.3 0.3
# 10 0.5 0.5 0.5 0.5
# 15 0.9 0.9 0.9 0.9
Good ol' xtabs can help out here
xtabs(Concentration ~ Day + Samplenb)
will produce
Samplenb
Day 1 2 3 4
1 0.2 0.2 0.2 0.2
5 0.3 0.3 0.3 0.3
10 0.5 0.5 0.5 0.5
15 0.9 0.9 0.9 0.9

How to copy a value in a vector to next position(s) in vector

I have a vector that looks something like this:
c(0.5,0,0,0,0,0.7,0,0,0,0,0.4,0,0,0,0)
Suppose I want to copy the values on positions 1, 6 and 11 (the ones that are not 0) to the four positions following that specific value, to make the vector look like this:
c(0.5,0.5,0.5,0.5,0.5,0.7,0.7,0.7,0.7,0.7,0.4,0.4,0.4,0.4,0.4)
How could I best do that in R?
Many thanks!
Another possibility:
vec <- c(0.5,0,0,0,0,0.7,0,0,0,0,0.4,0,0,0,0)
library(zoo)
vec[vec==0] <- NA
na.locf(vec)
#[1] 0.5 0.5 0.5 0.5 0.5 0.7 0.7 0.7 0.7 0.7 0.4 0.4 0.4 0.4 0.4
Here's one way:
zero.locf <- function(x) {
    if (x[1] == 0) stop('x[1] should not be 0')
    with(rle(x), {
        no.0 <- replace(values, values == 0, values[(values == 0) - 1])
        rep(no.0, lengths)
    })
}
x <- c(0.5,0,0,0,0,0.7,0,0,0,0,0.4,0,0,0,0)
zero.locf(x)
# [1] 0.5 0.5 0.5 0.5 0.5 0.7 0.7 0.7 0.7 0.7 0.4 0.4 0.4 0.4 0.4
rle(x) returns a list with items values and lengths.
rle(x)
Run Length Encoding
lengths: int [1:6] 1 4 1 4 1 4
values : num [1:6] 0.5 0 0.7 0 0.4 0
with opens up this list and lets us reference these entries directly.
Here's another base R approach. Initial zeros are left as is:
v = c(0,1,2,-2.1,0,3,0,0.4,0,0)
v[v!=0] = diff(c(0, v[v!=0]))
cumsum(v)
# [1] 0.0 1.0 2.0 -2.1 -2.1 3.0 3.0 0.4 0.4 0.4
And here are some benchmarks:
roland = function(v) {v[v == 0] <- NA; na.locf(v)}
mp = function(x) {with(rle(x), rep(replace(values, values==0, values[which(values == 0)-1]), lengths))}
quant = function(dat) {not.0 <- (dat != 0); approx(which(not.0), dat[not.0], xout = seq(along.with = dat), method = "constant", rule = 2)}
eddi = function(v) {v[v!=0] = diff(c(0, v[v!=0])); cumsum(v)}
v = sample(c(-10:10, 0), 1e6, TRUE)
microbenchmark(roland(v), mp(v), quant(v), eddi(v), times = 10)
#Unit: milliseconds
# expr min lq median uq max neval
# roland(v) 595.1630 625.7692 638.4395 650.4758 664.9224 10
# mp(v) 410.8224 433.6775 469.9346 496.6328 528.3218 10
# quant(v) 646.1775 753.0684 759.9805 838.4281 883.3383 10
# eddi(v) 265.8064 286.2922 316.7022 339.0333 354.0836 10
I'd probably loop through every single element greater 0 using lapply, then apply rep function to repeat each of these values 5 times and merge the resulting list entries via do.call("c", ...).
do.call("c", lapply(which(tmp > 0), function(i) rep(tmp[i], 5)))
[1] 0.5 0.5 0.5 0.5 0.5 0.7 0.7 0.7 0.7 0.7 0.4 0.4 0.4 0.4 0.4
Here is an alternative using approx
dat <- c(0.5,0,0,0,0,0.7,0,0,0,0,0.4,0,0,0,0)
not.0 <- (dat != 0)
approx(which(not.0), dat[not.0], xout = seq(along.with = dat), method = "constant", yleft = 0, rule = 1:2)
# $x
# [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
#
# $y
# [1] 0.5 0.5 0.5 0.5 0.5 0.7 0.7 0.7 0.7 0.7 0.4 0.4 0.4 0.4 0.4
And here is an alternative that relies on the stated structure of the initial vector (repetitions of a non-zero value followed by 4 zeros).
It adresses the speed issue but at the cost of flexibility.
dat <- c(0.5,0,0,0,0,0.7,0,0,0,0,0.4,0,0,0,0)
rep(dat[seq(1, length(dat), by = 5)], each = 5)

Resources