I have the following dataframe:
> dput(df)
structure(list(x = c(0.871877138037235, 0.534444199409336, 0.677225327817723,
0.124835065566003, 0.972407285822555, 0.179870884865522, 0.468708630651236,
0.405605535488576, 0.717907374724746, 0.157441936200485), y = c(0,
1, 2, 0, 0, 0, 0, 0, 1, 0)), class = "data.frame", row.names = c(NA,
-10L))
i.e.
> df
x y
1 0.8718771 0
2 0.5344442 1
3 0.6772253 2
4 0.1248351 0
5 0.9724073 0
6 0.1798709 0
7 0.4687086 0
8 0.4056055 0
9 0.7179074 1
10 0.1574419 0
I would like to obtain a new dataframe considering the following rules:
If in column y appear 1 and 2 (or 2 and 1) sequentially, then multiply the next 3 values in column x by -1.4
If in column y appears 1 (and just 1), then multiply the next 3 values column x by -1
If in column y appear 1 and 3 (or 3 and 1) sequentially, then multiply the next 3 values column x by -0.6
If in column y appears 2 (and just 2), then multiply the next 3 values column x by 1.4
In our case the desired result is:
> df
x y
1 0.8718771 0
2 0.5344442 1
3 0.6772253 2
4 -0.1747691 0
5 -1.36137 0
6 -0.2518193 0
7 0.4687086 0
8 0.4056055 0
9 0.7179074 1
10 -0.1574419 0
This solution may sound ugly but I think it's quite stable, however it may need further testings and improvements:
library(dplyr)
# First I set out to detect every observation that falls into any of the 4 categories
df %>%
mutate(z = case_when(
lag(y, n = 2, default = 0) %in% c(1, 2) & lag(y, default = 0) %in% c(2, 1) ~ 1,
lag(y, n = 2, default = 0) == 0 & lag(y, default = 0) == 1 & y == 0 ~ 2,
lag(y, n = 2, default = 0) %in% c(1, 3) & lag(y, default = 0) %in% c(2, 1) ~ 3,
lag(y, n = 2, default = 0) == 0 & lag(y, default = 0) == 2 & y == 0 ~ 4,
TRUE ~ 0
)) -> DF
# Then I wrote a custom function to apply multiplication phase on a sequence of three rows
fn <- function(x) {
out <- x$x
for(i in 1:nrow(x)) {
if(x$z[i] == 1) {
out[i:(i+2)] <- out[i:(i+2)] * (-1.4)
} else if(x$z[i] == 2) {
out[i:(i+2)] <- out[i:(i+2)] * (-1)
} else if(x$z[i] == 3) {
out[i:(i+2)] <- out[i:(i+2)] * (-0.6)
} else if(x$z[i] == 4) {
out[i:(i+2)] <- out[i:(i+2)] * (1.4)
} else {
out[i:(i+2)] <- out[i:(i+2)] * 1
}
}
dt <- cbind(new_x = out[!is.na(out)], y = x$y) |> as.data.frame()
dt
}
fn(DF)
new_x y
1 0.8718771 0
2 0.5344442 1
3 0.6772253 2
4 -0.1747691 0
5 -1.3613702 0
6 -0.2518192 0
7 0.4687086 0
8 0.4056055 0
9 0.7179074 1
10 -0.1574419 0
A for loop
df <- structure(list(x = c(0.871877138037235, 0.534444199409336, 0.677225327817723,
0.124835065566003, 0.972407285822555, 0.179870884865522, 0.468708630651236,
0.405605535488576, 0.717907374724746, 0.157441936200485), y = c(0,
1, 2, 0, 0, 0, 0, 0, 1, 0)), class = "data.frame", row.names = c(NA,
-10L))
df
#> x y
#> 1 0.8718771 0
#> 2 0.5344442 1
#> 3 0.6772253 2
#> 4 0.1248351 0
#> 5 0.9724073 0
#> 6 0.1798709 0
#> 7 0.4687086 0
#> 8 0.4056055 0
#> 9 0.7179074 1
#> 10 0.1574419 0
for(i in 2:nrow(df)){
if((df$y[i] == 1 & df$y[i+1] ==2) | (df$y[i] == 2 & df$y[i+1] ==1)) {
df$x[seq(i+2, by = 1, length.out= min(nrow(df) - (i+1), 3))] <- df$x[seq(i+2, by = 1, length.out=min(nrow(df) - (i+1), 3))] * -1.4
} else if ((df$y[i] == 1 & df$y[i+1] ==3) | (df$y[i] == 3 & df$y[i+1] ==1)){
df$x[seq(i+2, by = 1, length.out= min(nrow(df) - (i+1), 3))] <- df$x[seq(i+2, by = 1,length.out= min(nrow(df) - (i+1), 3))] * -0.6
} else if (df$y[i] == 1 & !df$y[i+1] %in% c(1,2,3) & !df$y[i-1] %in% c(1,2,3) ) {
df$x[seq(i+1, by = 1, length.out=min(nrow(df) - (i), 3)) ] <- df$x[seq(i+1, by = 1, length.out= min(nrow(df) - (i), 3))] * -1
} else if (df$y[i] == 2 & !df$y[i+1] %in% c(1,2,3) & !df$y[i-1] %in% c(1,2,3)) {
df$x[seq(i+1, by = 1, length.out=min(nrow(df) - (i), 3)) ] <- df$x[seq(i+1, by = 1, length.out=min(nrow(df) - (i), 3))] * 1.4
}
}
df
#> x y
#> 1 0.8718771 0
#> 2 0.5344442 1
#> 3 0.6772253 2
#> 4 -0.1747691 0
#> 5 -1.3613702 0
#> 6 -0.2518192 0
#> 7 0.4687086 0
#> 8 0.4056055 0
#> 9 0.7179074 1
#> 10 -0.1574419 0
Created on 2021-06-26 by the reprex package (v2.0.0)
Related
Consider a vector:
int = c(1, 1, 0, 5, 2, 0, 0, 2)
I'd like to get the closest subsequent index (not the difference) for a specified value. The first parameter of the function should be the vector, while the second should be the value one wants to see the closest subsequent elements.
For instance,
f(int, 0)
# [1] 2 1 0 2 1 0 0 NA
Here, the first element of the vector (1) is two positions away from the first subsequent 0, (3 - 1 = 2), so it should return 2. Then the second element is 1 position away from a 0 (2 - 1 = 1). When there is no subsequent values that match the specified value, return NA (here it's the case for the last element, because no subsequent value is 0).
Other examples:
f(int, 1)
# [1] 0 0 NA NA NA NA NA NA
f(int, 2)
# [1] 4 3 2 1 0 2 1 0
f(int, 3)
# [1] NA NA NA NA NA NA NA NA
This should also work for character vectors:
char = c("A", "B", "C", "A", "A")
f(char, "A")
# [1] 0 2 1 0 0
Find the location of each value (numeric or character)
int = c(1, 1, 0, 5, 2, 0, 0, 2)
value = 0
idx = which(int == value)
## [1] 3 6 7
Expand the index to indicate the nearest value of interest, using an NA after the last value in int.
nearest = rep(NA, length(int))
nearest[1:max(idx)] = rep(idx, diff(c(0, idx))),
## [1] 3 3 3 6 6 6 7 NA
Use simple arithmetic to find the difference between the index of the current value and the index of the nearest value
abs(seq_along(int) - nearest)
## [1] 2 1 0 2 1 0 0 NA
Written as a function
f <- function(x, value) {
idx = which(x == value)
nearest = rep(NA, length(x))
if (length(idx)) # non-NA values only if `value` in `x`
nearest[1:max(idx)] = rep(idx, diff(c(0, idx)))
abs(seq_along(x) - nearest)
}
We have
> f(int, 0)
[1] 2 1 0 2 1 0 0 NA
> f(int, 1)
[1] 0 0 NA NA NA NA NA NA
> f(int, 2)
[1] 4 3 2 1 0 2 1 0
> f(char, "A")
[1] 0 2 1 0 0
> f(char, "B")
[1] 1 0 NA NA NA
> f(char, "C")
[1] 2 1 0 NA NA
The solution doesn't involve recursion or R-level loops, so should e fast even for long vectors.
Look for the match from nth position to the end of the vector, then get the 1st match:
f <- function(v, x){
sapply(seq_along(v), function(i){
which(v[ i:length(v) ] == x)[ 1 ] - 1
})
}
f(int, 0)
# [1] 2 1 0 2 1 0 0 NA
f(int, 1)
# [1] 0 0 NA NA NA NA NA NA
f(int, 2)
# [1] 4 3 2 1 0 2 1 0
f(int, 3)
# [1] NA NA NA NA NA NA NA NA
f(char, "A")
# [1] 0 2 1 0 0
Using sequence:
f <- function(v, x){
d = diff(c(0, which(v == x)))
vec <- sequence(d, d-1, by = -1)
length(vec) <- length(int)
vec
}
Output
int = c(1, 1, 0, 5, 2, 0, 0, 2)
char = c("A", "B", "C", "A", "A")
f(int, 0)
# [1] 2 1 0 2 1 0 0 NA
f(int, 1)
# [1] 0 0 NA NA NA NA NA NA
f(int, 2)
# [1] 4 3 2 1 0 2 1 0
f(char, "A")
# [1] 0 2 1 0 0
Benchmark (n = 1000):
set.seed(123)
int = sample(0:100, size = 1000, replace = T)
library(microbenchmark)
bm <- microbenchmark(
fSequence(int, 0),
fzx8754(int, 0),
fRecursive(int, 0),
fMartinMorgan(int, 0),
fMap2dbl(int, 0),
fReduce(int, 0),
fAve(int, 0),
fjblood94(int, 0),
times = 10L,
setup = gc(FALSE)
)
autoplot(bm)
Martin Morgan's solution seems to be the quickest, followed by this answer's sequence solution, sbarbit's recursive solution, and jblood94's for loop solution.
Functions used:
fSequence <- function(v, x){
vec <- sequence(diff(c(0, which(v == x))), diff(c(0, which(v == x))) - 1, by = -1)
length(vec) <- length(v)
vec
}
fzx8754 <- function(v, x){
sapply(seq_along(v), function(i){
which(v[ i:length(v) ] == x)[ 1 ] - 1
})
}
fRecursive <- function(lookup,val ) {
ind <- which(lookup == val)[1] -1
if (length(lookup) > 1) {
c(ind, f(lookup[-1], val))
} else {
ind
}
}
fMartinMorgan <- function(x, value) {
idx = which(x == value)
nearest = rep(NA, length(x))
nearest[1:max(idx)] = rep(idx, diff(c(0, idx)))
abs(seq_along(x) - nearest)
}
fMap2dbl <- function(int, num)
{
n <- length(int)
map2_dbl(num, 1:n, ~ ifelse(length(which(.x == int[.y:n])) == 0, NA,
min(which(.x == int[.y:n])) - 1))
}
fReduce <- function(vec, value) {
replace(
Reduce(
function(x, y)
x + (y * x) ,
vec != value,
right = TRUE,
accumulate = TRUE
),
max(tail(which(vec == value), 1), 0) < seq_along(vec),
NA
)
}
fAve <- function(init, k) {
ave(
seq_along(init),
c(0, head(cumsum(init == k), -1)),
FUN = function(x) if (any(x == k)) rev(seq_along(x) - 1) else NA
)
}
fjblood94 <- function(v, val) {
out <- integer(length(v))
if (v[length(v)] != val) out[length(v)] <- NA_integer_
for (i in (length(v) - 1L):1) {
if (v[i] == val) {
out[i] <- 0L
} else {
out[i] <- out[i + 1L] + 1L
}
}
return(out)
}
Here f is defined as a recursive function that calls itself over shorter tails of the lookup vector:
f <- function(lookup,val ) {
ind <- which(lookup == val)[1] -1
if (length(lookup) > 1) {
c(ind, f(lookup[-1], val))
} else {
ind
}
}
Here is an approach using Reduce() and then some fiddling to get the NA values.
f <- function(vec, value) {
replace(
Reduce(
function(x, y)
x + (y * x) ,
vec != value,
right = TRUE,
accumulate = TRUE
),
max(tail(which(vec == value), 1), 0) < seq_along(vec),
NA
)
}
f(int, 0)
[1] 2 1 0 2 1 0 0 NA
f(int, 1)
[1] 0 0 NA NA NA NA NA NA
f(int, 2)
[1] 4 3 2 1 0 2 1 0
f(int, 3)
[1] NA NA NA NA NA NA NA NA
char = c("A", "B", "C", "A", "A")
f(char, "A")
[1] 0 2 1 0 0
Another possible solution, based on purrr::map2_dbl:
library(purrr)
int = c(1, 1, 0, 5, 2, 0, 0, 2)
f <- function(int, num)
{
n <- length(int)
map2_dbl(num, 1:n, ~ ifelse(length(which(.x == int[.y:n])) == 0, NA,
min(which(.x == int[.y:n])) - 1))
}
f(int, 0)
#> [1] 2 1 0 2 1 0 0 NA
f(int, 1)
#> [1] 0 0 NA NA NA NA NA NA
f(int, 2)
#> [1] 4 3 2 1 0 2 1 0
f(int, 3)
#> [1] NA NA NA NA NA NA NA NA
char = c("A", "B", "C", "A", "A")
f(char, "A")
#> [1] 0 2 1 0 0
A single-pass for loop is simple and efficient:
f1 <- function(v, val) {
out <- integer(length(v))
if (v[length(v)] != val) out[length(v)] <- NA_integer_
for (i in (length(v) - 1L):1) {
if (v[i] == val) {
out[i] <- 0L
} else {
out[i] <- out[i + 1L] + 1L
}
}
return(out)
}
int <- c(1, 1, 0, 5, 2, 0, 0, 2)
chr <- c("A", "B", "C", "A", "A")
f1(int, 0)
#> [1] 2 1 0 2 1 0 0 NA
f1(chr, "A")
#> [1] 0 2 1 0 0
Benchmarking against other solutions:
f2 <- function(v, x){
sapply(seq_along(v), function(i){
which(v[ i:length(v) ] == x)[ 1 ] - 1
})
}
f3 <- function(lookup,val ) {
ind <- which(lookup == val)[1] -1
if (length(lookup) > 1) {
c(ind, f3(lookup[-1], val))
} else {
ind
}
}
f4 <- function(x, value) {
idx = which(x == value)
nearest = rep(NA, length(x))
nearest[1:max(idx)] = rep(idx, diff(c(0, idx)))
abs(seq_along(x) - nearest)
}
f5 <- function(vec, value) {
replace(
Reduce(
function(x, y)
x + (y * x) ,
vec != value,
right = TRUE,
accumulate = TRUE
),
max(tail(which(vec == value), 1), 0) < seq_along(vec),
NA
)
}
microbenchmark::microbenchmark(f1 = {f1(int, 0); f1(chr, "A")},
f2 = {f2(int, 0); f2(chr, "A")},
f3 = {f3(int, 0); f3(chr, "A")},
f4 = {f4(int, 0); f4(chr, "A")},
f5 = {f5(int, 0); f5(chr, "A")},
check = "equal")
#> Unit: microseconds
#> expr min lq mean median uq max neval
#> f1 6.0 7.50 8.990 8.40 9.60 18.3 100
#> f2 54.2 61.45 71.752 65.55 79.40 131.8 100
#> f3 25.5 28.60 33.393 30.75 35.90 105.2 100
#> f4 22.3 26.30 30.599 28.00 32.65 82.4 100
#> f5 59.7 64.55 73.474 69.10 75.70 157.0 100
A base R option using ave + cumsum
f <- function(init, k) {
ave(
seq_along(init),
c(0, head(cumsum(init == k), -1)),
FUN = function(x) if (any(x == k)) rev(seq_along(x) - 1) else NA
)
}
and you will see
> f(init, 0)
[1] 2 1 0 2 1 0 0 NA
> f(init, 1)
[1] 0 0 NA NA NA NA NA NA
> f(init, 2)
[1] 4 3 2 1 0 2 1 0
> f(init, 3)
[1] NA NA NA NA NA NA NA NA
The input vector is as below,
data=c(1,1,1,1,11,1,1,1,1,12,1,1,2,1,1,1)
I want the output as 1,1,1,1,11,11,11,11,11,12,12,12,2,2,2,2 where the 1's proceeding the non 1's should be imputed the non 1 value in R.
I tried the following code
data=c(1,1,1,1,11,1,1,1,1,12,1,1,2,1,1,1)
sapply(data, function(x) ifelse (lag(x)!=1,lag(x),x))
but it didn't yield expected output
You can convert every 1 after the first non-1 value to NA then use zoo::na.locf():
library(zoo)
x <- c(1,1,1,1,11,1,1,1,1,12,1,1,2,1,1,1)
data[seq_along(x) > which.max(x!= 1) & x== 1] <- NA
na.locf(x)
[1] 1 1 1 1 11 11 11 11 11 12 12 12 2 2 2 2
Or using replace() to add the NA values:
na.locf(replace(x, seq_along(x) > which.max(x != 1) & x == 1, NA))
In response to your comment about applying it to groups, you can use ave():
df <- data.frame(x = c(x, rev(x)), grp = rep(1:2, each = length(x)))
ave(df$x, df$grp, FUN = function(y)
na.locf(replace(y, seq_along(y) > which.max(y != 1) & y == 1, NA))
)
You can write your custom fill function:
x <- c(1,1,1,1,11,1,1,1,1,12,1,1,2,1,1,1)
myfill <- function(x) {
mem <- x[1]
for (i in seq_along(x)) {
if (x[i] == 1) {
x[i] <- mem
} else {
mem <- x[i]
}
}
x
}
myfill(x)
# 1 1 1 1 11 11 11 11 11 12 12 12 2 2 2 2
You could match unique 1 and non-1 values with the cumsum of non-1 values.
(c(1, x[x != 1]))[match(cumsum(x != 1), 0:3)]
# [1] 1 1 1 1 11 11 11 11 11 12 12 12 2 2 2 2
Data
x <- c(1, 1, 1, 1, 11, 1, 1, 1, 1, 12, 1, 1, 2, 1, 1, 1)
You can use rle from base to overwrite 1 with the value before.
x <- rle(data)
y <- c(FALSE, (x$values == 1)[-1])
x$values[y] <- x$values[which(y)-1]
inverse.rle(x)
# [1] 1 1 1 1 11 11 11 11 11 12 12 12 2 2 2 2
x<-c(0,1,1,0,1,1,1,0,1,1)
aaa<-data.frame(x)
How to insert a blank row before zero? When the first row is zeroļ¼do not add blank row. Thank you.
Result:
0
1
1
.
0
1
1
1
.
0
1
1
Below we used dot but you can replace "." with NA or "" or something else depending on what you want.
1) We can use Reduce and append:
Append <- function(x, y) append(x, ".", y - 1)
data.frame(x = Reduce(Append, setdiff(rev(which(aaa$x == 0)), 1), init = aaa$x))
2) gsub Another possibility is to convert to a character string, use gsub and convert back:
data.frame(x = strsplit(gsub("(.)0", "\\1.0", paste(aaa$x, collapse = "")), "")[[1]])
3) We can create a two row matrix in which the first row is dot before each 0 and NA otherwise. Then unravel it to a vector and use na.omit to remove the NA values.
data.frame(x = na.omit(c(rbind(replace(ifelse(aaa$x == 0, ".", NA), 1, NA), aaa$x))))
4) We can lapply over aaa$x[-1] outputting c(".", 9) or 1. Unlist that and insert aaa$x[1] back in. No packages are used.
repl <- function(x) if (!x) c(".", 0) else 1
data.frame(x = c(aaa$x[1], unlist(lapply(aaa$x[-1], repl))))
5) Create a list of all but the first element and replace the 0's in that list with c(".", 0) . Unlist that and insert the first element back in. No packages are used.
L <- as.list(aaa$x[-1])
L[x[-1] == 0] <- list(c(".", 0))
data.frame(x = c(aaa$x[1], unlist(L)))
6) Assuming aaa has two columns where the second column is character (NOT factor). Append a row of dots to aaa and then create an index vector using unlist and Map to access the appropriate row of the extended aaa.
aaa <- data.frame(x = c(0,1,1,0,1,1,1,0,1,1), y = letters[1:10],
stringsAsFactors = FALSE)
nr <- nrow(aaa); nc <- ncol(aaa)
fun <- function(ix, x) if (!is.na(x) & x == 0 & ix > 1) c(nr + 1, ix) else ix
rbind(aaa, rep(".", nc))[unlist(Map(fun, 1:nr, aaa$x)), ]
If we did want to have y be factor then note that we can't just add a dot to a factor if it is not a level of that factor so there is the question of what levels the factor can have. To get around that let us add an NA rather than a dot to the factor. Then we get the following which is the same except that aaa has been redefined so that y is a factor, we no longer need nc since we are assuming 2 columns and rep(...) in the last line is replaced with c(".", NA).
aaa <- data.frame(x = c(0,1,1,0,1,1,1,0,1,1), y = letters[1:10])
nr <- nrow(aaa)
fun <- function(ix, x) if (!is.na(x) & x == 0 & ix > 1) c(nr + 1, ix) else ix
rbind(aaa, c(".", NA))[unlist(Map(fun, 1:nr, aaa$x)), ]
One dplyr and tidyr possibility may be:
aaa %>%
uncount(ifelse(row_number() > 1 & x == 0, 2, 1)) %>%
mutate(x = ifelse(x == 0 & lag(x == 1, default = first(x)), NA_integer_, x))
x
1 0
2 1
3 1
4 NA
5 0
6 1
7 1
8 1
9 NA
10 0
11 1
12 1
It is not adding a blank row as you have a numeric vector. Instead, it is adding a row with NA. If you need a blank row, you can convert it into a character vector and then replace NA with blank.
ind = with(aaa, ifelse(x == 0 & seq_along(x) > 1, 2, 1))
d = aaa[rep(1:NROW(aaa), ind), , drop = FALSE]
transform(d, x = replace(x, sequence(ind) == 2, NA))
Here is an option with rleid
library(data.table)
setDT(aaa)[, .(x = if(x[.N] == 1) c(x, NA) else x), rleid(x)][-.N, .(x)]
# x
# 1: 0
# 2: 1
# 3: 1
# 4: NA
# 5: 0
# 6: 1
# 7: 1
# 8: 1
# 9: NA
#10: 0
#11: 1
#12: 1
data.frame(x = unname(unlist(by(aaa$x,cumsum(aaa==0),c,'.'))))
x
1 0
2 1
3 1
4 .
5 0
6 1
7 1
8 1
9 .
10 0
11 1
12 1
13 .
My solution is
aaa <- data.frame(x = c(0,1,1,0,1,1,1,0,1,1), y = letters[1:10])
aaa$ind = with(aaa, ifelse(x == 0 & seq_along(x) > 1, 2, 1))
aaa<-aaa[rep(1:nrow(aaa), aaa$ind), ,]
aaa[(aaa$ind== 2 & !grepl(".1",rownames(aaa))),]<-NA
aaa$ind<- NULL
aaa
x y
1 0 a
2 1 b
3 1 c
4 NA <NA>
4.1 0 d
5 1 e
6 1 f
7 1 g
8 NA <NA>
8.1 0 h
9 1 i
10 1 j
Im trying to write a function with nested if-else in R. How can I convert a data.frame where the values of columns are set to:
Input
df <- read.table(header = TRUE, text="Chr start end num seg.mean seg.mean.1 seg.mean.2
1 68580000 68640000 A8430 0.7000 0 0.1032
1 115900000 116260000 B8430 0.0039 2.7202 2.7202
1 173500000 173680000 C5 -1.7738 -2.0746 -0.2722")
condition:
x > 0 & x< 1 : 1
x >= 1 : 2
x < 0 & x > - 1 : -1
x <= -1 : -2
x = 0 : 0
expected output
df <- read.table(header = TRUE, text="Chr start end num seg.mean seg.mean.1 seg.mean.2
1 68580000 68640000 A8430 1 0 1
1 115900000 116260000 B8430 1 2 2
1 173500000 173680000 C5 -2 -2 -1")
fun_cond <- function(x) { ifelse( x >= 1, 2,ifelse( x > 0 & x < 1, 1),ifelse( x <= 1, 2,ifelse( x < 0 & x > -1, -1)))}
new_df[5:length(df)] <- lapply(new_df[5:length(df)], fun_cond)
I think what you want is this:
x = c(-1, 1, 0, 0, 1, -1, 0.5, 0.3, -0.4)
fun_cond(x)
fun_cond <- function(x){
ifelse(x >= 1, 2, ifelse(x < 1 & x > 0, 1, ifelse(x < 0 & x > -1, -1, -2)))
}
> fun_cond(x)
#[1] -2 2 -2 -2 2 -2 1 1 -1
Try it out...
Note that x == 0 is -2. There is no x <= 0 ... or x >= 0 ... expression like you described it.
If you want 0 as zero then use:
x = c(-1,1,0,0,1,-1,0.5,0.3, -0.4)
fun_cond(x)
fun_cond <- function(x){
ifelse(x >= 1, 2, ifelse(x < 1 & x > 0, 1, ifelse( x == 0, 0, ifelse(x < 0 & x > -1, -1, -2))))
}
> fun_cond(x)
#[1] -2 2 0 0 2 -2 1 1 -1
Try cut in base R:
cols <- grep("seg.mean", names(df))
res <- sapply(cols, function(i)
cut(df[,i], breaks = c(-Inf, -1, 0, 1, Inf), labels = c(-2,-1,1,2)))
# to leave zeros untouched
res[df[cols]==0] <- 0
If you want to get your expected output:
df[cols] <- res
# Chr start end num seg.mean seg.mean.1 seg.mean.2
# 1 1 68580000 68640000 A8430 1 0 1
# 2 1 115900000 116260000 B8430 1 2 2
# 3 1 173500000 173680000 C5 -2 -2 -1
Could you suggest a more elegant solution to the following problem? Remove rows containing more than one 0 in columns x,z,y or a,b,c.
df <- data.frame(x = 0, y = 1:5, z = 0:4, a = 4:0, b = 1:5, c=0)
my solution (row 1 and row 5 should get removed)
df_new <- subset(df, ((((x != 0 & y != 0) | (x != 0 & z != 0) | (y != 0 & z != 0)) & ((a != 0 & b != 0) | (a != 0 & c != 0) | (b != 0 & c != 0)))))
# 1:3 is same as columns 'x', 'y', 'z', Similarily for 4:6 .
# You can also specify the colnames explicitly
# add a na.rm = T inside rowSums() incase you also have missing data
(rowSums(df[, 1:3]==0)>1)|(rowSums(df[, 4:6]==0)>1)
# did you mean this ?
df[!((rowSums(df[, 1:3]==0)>1)|(rowSums(df[, 4:6]==0)>1)),]
# x y z a b c
#2 0 2 1 3 2 0
#3 0 3 2 2 3 0
#4 0 4 3 1 4 0