Convert negative values to zero in dataframe in R - r

I have a dataframe in R that I would like to convert all columns (outside the ids) from negative to zero
id1 id2 var1 var2 var3
-1 -1 0 -33 5
-1 -2 9 -10 -1
I can convert all columns with code line like:
temp[temp < 0] <- 0
But I can't adjust it to only a subset of columns. I've tried:
temp[temp < 0, -c(1,2)] <- 0
But this errors saying non-existent rows not allowed

Edit a bit your variant
temp[,-c(1,2)][temp[, -c(1,2)] < 0] <- 0

You can try using replace:
> mydf[-c(1, 2)] <- replace(mydf[-c(1, 2)], mydf[-c(1, 2)] < 0, 0)
> mydf
id1 id2 var1 var2 var3
1 -1 -1 0 0 5
2 -1 -2 9 0 0

We can use data.table
setDT(d1)
for(j in grep('^var', names(d1))){
set(d1, i= which(d1[[j]]<0), j= j, value=0)
}
d1
# id1 id2 var1 var2 var3
# 1: -1 -1 0 0 5
# 2: -1 -2 9 0 0

There might be fancier or more compact ways, but here's a vectorised replacement you can apply to the var columns:
mytable <- read.table(textConnection("
id1 id2 var1 var2 var3
-1 -1 0 -33 5
-1 -2 9 -10 -1"), header = TRUE)
mytable[, grep("^var", names(mytable))] <-
apply(mytable[, grep("^var", names(mytable))], 2, function(x) ifelse(x < 0, 0, x))
mytable
## id1 id2 var1 var2 var3
## 1 -1 -1 0 0 5
## 2 -1 -2 9 0 0

You could use pmax:
dat <- data.frame(id1=c(-1,-1), id2=c(-1,-2), var1=c(0,9), var2=c(-33,10), var3=c(5,-1))
dat[,-c(1,2)] <- matrix(pmax(unlist(dat[,-c(1,2)]),0), nrow=nrow(dat))

Related

Replace selected columns' negative values with 0s or NAs using R

For the example data df, I want to replace the negative values in the first column (x1) with 0 and the third column (x3) with NA by the function replace_negatives as follows:
df <- data.frame(x1 = -3:1,
x2 = -1,
x3 = -2:2)
df
Out:
x1 x2 x3
1 -3 -1 -2
2 -2 -1 -1
3 -1 -1 0
4 0 -1 1
5 1 -1 2
Please note that I do not index by column names because there are many columns in the actual data and the column names are not fixed.
replace_negatives <- function(data){
df <<- data %>%
mutate(.[[1]] = if_else(.[[2]] < 0, 0, .[[1]])) %>%
mutate(.[[3]] = if_else(.[[3]] < 0, NA, .[[3]]))
return(df)
}
lapply(df, replace_negatives)
But it raises an error:
> replace_negatives <- function(data){
+ df <<- data %>%
+ mutate(.[[1]] = if_else(.[[2]] < 0, 0, .[[1]])) %>%
Error: unexpected '=' in:
" df <<- data %>%
mutate(.[[1]] ="
> mutate(.[[3]] = if_else(.[[3]] < 0, NA, .[[3]]))
Error: unexpected '=' in " mutate(.[[3]] ="
> return(df)
Error: no function to return from, jumping to top level
> }
Error: unexpected '}' in "}"
Any helps would be appreciated.
The expected output:
x1 x2 x3
1 0 -1 NA
2 0 -1 NA
3 0 -1 0
4 0 -1 1
5 1 -1 2
To perform the required operation, here's a base R method:
df <- data.frame(x1 = -3:1,
x2 = -1,
x3 = -2:2)
df[[1]] <- ifelse(df[[1]] < 0, 0, df[[1]])
df[[3]] <- ifelse(df[[3]] < 0, NA, df[[3]])
df
#> x1 x2 x3
#> 1 0 -1 NA
#> 2 0 -1 NA
#> 3 0 -1 0
#> 4 0 -1 1
#> 5 1 -1 2
Created on 2022-04-18 by the reprex package (v2.0.1)
You could use across in the function:
library(tidyverse)
replace_negatives <- function(data){
df <- data %>%
mutate(across(1, ~ ifelse(. < 0, 0, .)),
across(3, ~ ifelse(. < 0, NA, .)))
return(df)
}
replace_negatives(df)
Output
x1 x2 x3
1 0 -1 NA
2 0 -1 NA
3 0 -1 0
4 0 -1 1
5 1 -1 2
Here is base R version of your function:
replace_negatives <- function(df){
is.na(df[,1]) <- df[,1] < 0
index <- df[,3] < 0
df[,3][index] <- 0
return(df)
}
replace_negatives(df)
x1 x2 x3
1 NA -1 0
2 NA -1 0
3 NA -1 0
4 0 -1 1
5 1 -1 2

Use a Loop or Apply to Overwrite Only Certain Values of a Variable in a Data Frame Based on a Condition

I would like to write a loop or apply in R to overwrite only certain values of a variable based on a condition. Here is an example data frame:
df <- data.frame(
state = c("MA","CO","TX"),
random_numeric = c(26,28,4),
var1 = c(3,0,0),
var2 = c(3,1,5),
var3 = c(0,1,0),
prelim_row_sum = c(6,2,5)
)
df
state random_numeric var1 var2 var3 prelim_row_sum
1 MA 26 3 3 0 6
2 CO 28 0 1 1 2
3 TX 4 0 5 0 5
In df, I would like to replace only the first value in var1, var2, or var3 to zero if it equals half prelim_row_sum. Thus, a correct loop or apply would replace only the first three and first 1 to zero. I have the random_numeric and state variables in the example data frame to show that there other character and numeric variables in my larger data frame. Accordingly, a dplyr solution with across would not work for me. I could, of course, do this one-by-one:
df[1,3] <- 0
df[2,4] <- 0
df$final_row_sum = rowSums(df[3:5])
df
state random_numeric var1 var2 var3 prelim_row_sum final_row_sum
1 MA 26 0 3 0 6 3
2 CO 28 0 0 1 2 1
3 TX 4 0 5 0 5 5
But I would really appreciate help with a loop, apply, or a function, so that I can do this on larger, non-stylized data frames. Thank you!
Here is one way to do with with apply. I allowed for a little bit of generality in that you must input which columns to apply this function to. At the end there is a wrapper function so you can set those values to zero and then create the final_row_sum column.
state = c("MA","CO","TX"),
random_numeric = c(26,28,4),
var1 = c(3,0,0),
var2 = c(3,1,5),
var3 = c(0,1,0),
prelim_row_sum = c(6,2,5)
)
my_func <- function(x){
value_to_zero <- which(
x[1:(length(x)-1)] == (x[length(x)]/2)
)
if(length(value_to_zero) > 0){
x[value_to_zero[1]] <- 0
}
return(x)
}
new_df <- df
cols_to_fix <- c("var1", "var2", "var3", "prelim_row_sum")
new_df[,cols_to_fix] <- t(
apply(
new_df[,cols_to_fix],
1,
my_func
)
)
new_df$final_row_sum <- rowSums(new_df[,cols_to_fix[-length(cols_to_fix)]])
new_df
state random_numeric var1 var2 var3 prelim_row_sum final_row_sum
1 MA 26 0 3 0 6 3
2 CO 28 0 0 1 2 1
3 TX 4 0 5 0 5 5
all_in_one <- function(x, cols){
my_func <- function(x){
value_to_zero <- which(
x[1:(length(x)-1)] == (x[length(x)]/2)
)
if(length(value_to_zero) > 0){
x[value_to_zero[1]] <- 0
}
return(x)
}
x[,cols] <- t(
apply(
x[,cols],
1,
my_func
)
)
x$final_row_sum <- rowSums(x[,cols[-length(cols)]])
return(x)
}
answer <- all_in_one(df, c("var1", "var2", "var3", "prelim_row_sum"))
state random_numeric var1 var2 var3 prelim_row_sum final_row_sum
1 MA 26 0 3 0 6 3
2 CO 28 0 0 1 2 1
3 TX 4 0 5 0 5 5
I have a more function based, tidyverse answer. It will return a data frame of var1, var2, and var3. You can easily combine that with the origin data frame. I like the origin Tsai's response, but I think this is a bit easily to understand and is more flexible.
library(tidyvese) # you really just need purrr
f <- function(var1, var2, var3, prelim_row_sum, ...) {
cols <- c(var1, var2, var3)
index <- which((cols * 2) == prelim_row_sum)[1]
assign(paste0("var", index), 0)
data.frame(var1=var1, var2=var2, var3=var3)
}
pmap_dfr(df, f)
Try this
cbind(df[1:2], t(apply(df[-(1:2)], 1, function(x){
x[which.max(x == x[4]/2)] <- 0
c(x, final_row_sum = sum(x[-4]))
})))
# state random_numeric var1 var2 var3 prelim_row_sum final_row_sum
# 1 MA 26 0 3 0 6 3
# 2 CO 28 0 0 1 2 1
# 3 TX 4 0 5 0 5 5

Compare each row string from columns and add condition to evaluate and print the result in new column

I have a dataframe as below
string1 string2 var1 var2
T T 1 1
T F 0 1
F F 0 0
I want to iterate through each row which has n number of rows and add conditions like below
Iterate n number of rows
if(string1 == T & sting2 == F){
if(va1 > 1 & var2 > 1){
# do some operation and append to new column
# For example
new column <- var1 + var2
}
elif(var1 == 0 & var2 > 1){
# Do some adds / subs with var1 & var2 and append to new column
}
elif{var1 > 1 & var2 ==0){
# Do some adds / subs with var1 & var2 and append to new column
}
}
elif(string1 == F & sting2 == T){
# again repeat set of if-else opration on var1 and var2 as mentioned in
above if else condition
}
elif(nth condition)
How do i achieve in R
Based on your description in comments I think you need is
df$new_col <- with(df, ifelse(string1 & string2 & var1 > 0 & var2 > 0, var1 + var2, 0))
df
# string1 string2 var1 var2 new_col
#1 TRUE TRUE 1 1 2
#2 TRUE FALSE 0 1 0
#3 FALSE FALSE 0 0 0
This adds up var1 and var2 if string1 and string2 is TRUE and both var1 and var2 are greater than 0 or else it keeps new_col as 0.
string1 <-c(T,T,F)
string2 <-c(T,F,F)
var1 <- c(1,0,0)
var2 <- c(1,1,0)
df <- data.frame(string1,string2,var1,var2)
df
string1 string2 var1 var2
1 TRUE TRUE 1 1
2 TRUE FALSE 0 1
3 FALSE FALSE 0 0
df$new_column <- apply(df, 1, function(x) ifelse(x[1] == T & x[2] == T & x[3] > 0 & x[4] > 0, x[1]+x[2], NA))
df
string1 string2 var1 var2 new_column
1 TRUE TRUE 1 1 2
2 TRUE FALSE 0 1 NA
3 FALSE FALSE 0 0 NA
Replace NA with 0, if you want to have 0 in the final result.

Converting counts to individual observations in r

I have a data set that looks as follows
df <- data.frame( name = c("a", "b", "c"),
judgement1= c(5, 0, NA),
judgement2= c(1, 1, NA),
judgement3= c(2, 1, NA))
I want to reshape the dataframe to look like this
# name judgement1 judgement2 judgement3
# a 1 0 0
# a 1 0 0
# a 1 0 0
# a 1 0 0
# a 1 0 0
# b 1 0 0
# b 0 1 0
# b 0 0 1
And so on. I have seen that untable is recommended on some other threads, but it does not appear to work with the current version of r. Is there a package that can convert summarised counts into individual observations?
You could try something like this:
df <- data.frame( name = c("a", "b", "c"),
judgement1= c(5, 0, NA),
judgement2= c(1, 1, NA),
judgement3= c(2, 1, NA))
rep.vec <- colSums(df[colnames(df) %in% paste0("judgement", (1:nrow(df)), sep="")], na.rm = TRUE)
want <- data.frame(name=df$name, cbind(diag(nrow(df))))
colnames(want)[-1] <- paste0("judgement", (1:nrow(df)), sep="")
(want <- want[rep(1:nrow(want), rep.vec), ])
I wrote a function that works to give you your desired output:
untabl <- function(df, id.col, count.cols) {
df[is.na(df)] <- 0 # replace NAs
out <- lapply(count.cols, function(x) { # for each column with counts
z <- df[rep(1:nrow(df), df[,x]), ] # replicate rows
z[, -c(id.col)] <- 0 # set all other columns to zero
z[, x] <- 1 # replace the count values with 1
z
})
out <- do.call(rbind, out) # combine the list
out <- out[order(out[,c(id.col)]),] # reorder (you can change this)
rownames(out) <- NULL # return to simple row numbers
out
}
untabl(df = df, id.col = 1, count.cols = c(2,3,4))
# name judgement1 judgement2 judgement3
#1 a 1 0 0
#2 a 1 0 0
#3 a 1 0 0
#4 a 1 0 0
#5 a 1 0 0
#6 a 0 1 0
#7 b 0 1 0
#8 a 0 0 1
#9 a 0 0 1
#10 b 0 0 1
And for your reference, reshape::untable consists of the following code:
function (df, num)
{
df[rep(1:nrow(df), num), ]
}

Make a vector with counts of rows that meet criteria

I want to make a vector that contains number of rows that meet my criteria^=:
leftE0 <- c(900,2000,4000,9000,15000,30000,53000,100000,160000)
rightE0 <- c(2000,4000,9000,15000,30000,53000,100000,160000,300000)
sum(datap$CF > 0 & (datap$E0.keV > leftE0[1]) & (datap$E0.keV < rightE0[1]), na.rm=TRUE)
I don't understand how to vectorise this action.
Use cut and table:
#some example data
set.seed(42)
datap <- data.frame(CF = rnorm(100), E0.keV = exp(runif(100, 0, log(4e6))))
breaks <- c(-Inf, 900,2000,4000,9000,15000,30000,53000,100000,160000, 300000, Inf)
table(cut(datap$E0.keV, breaks), datap$CF > 0)
# FALSE TRUE
# (-Inf,900] 21 32
# (900,2e+03] 6 3
# (2e+03,4e+03] 3 3
# (4e+03,9e+03] 6 0
# (9e+03,1.5e+04] 1 1
# (1.5e+04,3e+04] 0 1
# (3e+04,5.3e+04] 1 0
# (5.3e+04,1e+05] 2 0
# (1e+05,1.6e+05] 1 0
# (1.6e+05,3e+05] 2 1
# (3e+05, Inf] 3 13

Resources