How to move values between columns with condition - r

I want to move value from the column II to the column I only when rows on the column I show NA.
This is the data.frame :
id <- c("A","B","C","D", "E","F","G")
I <- c("NA","NA","NA","10","20","NA","30")
II <- c("3","4","5","6","7", "8", "8")
df <- data.frame(id, I, II)
The expected result would be like this :
id <- c("A","B","C","D", "E","F","G")
I <- c("NA","NA","NA","10","20","NA","30")
II <- c("3","4","5","6","7", "8", "8")
III <- c("3","4","5","10","20", "8", "30")
df <- data.frame(id, I, II,III)
Thanks in advance!

You can use ifelse :
transform(df, III = ifelse(I == 'NA', II, I))
# id I II III
#1 A NA 3 3
#2 B NA 4 4
#3 C NA 5 5
#4 D 10 6 10
#5 E 20 7 20
#6 F NA 8 8
#7 G 30 8 30

We can use a simple coalesce after converting the quoted "NA" to actual unquoted NA in a single line
library(dplyr)
df1 <- df %>%
mutate(III = coalesce(na_if(I, "NA"), II))
df1
# id I II III
#1 A <NA> 3 3
#2 B <NA> 4 4
#3 C <NA> 5 5
#4 D 10 6 10
#5 E 20 7 20
#6 F <NA> 8 8
#7 G 30 8 30
Or using base R, change the "NA" to NA, create a logical vector based on the presence of NA elements in 'I' to change the values of 'III' (after assigning the values of 'II'
df$I[df$I == "NA"] <- NA
df$III <- df$II
df$III[!is.na(df$I)] <- df$I[!is.na(df$I)]
Or with ifelse
df$III <- with(df, ifelse(I == "NA", II, I))

A simple for loop in Base R will get this done
III = 0
for (i in 1:length(id)){
if (I[i] == "NA"){
III[i] = II[i]} else {
III[i] = I[i]}
}
df = data.frame(id, I, II, III)

Related

Creating global environment objects from two lists

I am having an issue assigning variable objects to a list of data frames. For example,
df1 <- data.frame(a = 1, b = 1:10)
df2 <- data.frame(a = 2, b = 1:10)
df3 <- data.frame(a = 3, b = 1:10)
x <- c("a", "b", "c")
y <- list("df1", "df2", "df3")
My goal is to assign each data frame in the y list to the object in x. I can do it long hand.
a <- y[[1]]
But I have many iterations. I have tried the following without any luck
map2(x, y, function(x, y) x <- y)
and
map2(x, y, ~assign(x, y))
Appreciate any help!
We can unlist the list of object names (unlist(y)), get the values with mget in a list, set the names of the list elements with 'x' vector values and use list2env to create objects in the global env (not recommended though)
list2env(setNames(mget(unlist(y)), x), .GlobalEnv)
If we use map2, then we need to get the value of 'y' and also specify the environment to assign the value
map2(x, y, ~ assign(.x, get(.y), envir = .GlobalEnv))
-output
a
# a b
#1 1 1
#2 1 2
#3 1 3
#4 1 4
#5 1 5
#6 1 6
#7 1 7
#8 1 8
#9 1 9
#10 1 10
b
# a b
#1 2 1
#2 2 2
#3 2 3
#4 2 4
#5 2 5
#6 2 6
#7 2 7
#8 2 8
#9 2 9
#10 2 10
Unfortunately, the above options didn't work in my case. I ended up adapting the suggestions made by akrun which worked.
names(y) <- x
list2env(y, envir=.GlobalEnv)

How to select columns if there is not any NA in the last n observations? How to drop columns if there are more than x adjacent NA's observations?

I need the following:
1) Keep the columns if: i) The last n observations (n = 3) aren't NA's, ii) there is no NA's at all, iii) Backwards from the last NA's, there are not more than 3 adjacent NA observations
2) Drop the columns if: i) There are 3 or more adjacent NA observations
I'd like if the answer is using dplyr
Some example:
data = data.frame(
A = c(3,3,3,3,4, rep(NA,5)),
B = c(rnorm(10)),
C = c(rep(NA,3), rnorm(7)),
D = c(rnorm(8), NA, NA)
)
I've tried:
data %>%
select_if(~sum(!is.na(.)) >= 3)
select_if(~sum(is.na(.)) > 0)
In my example, I'd only keep B, C and D.
We can use tail to get last n entries and drop the columns where all of them are NA.
n <- 3
library(dplyr)
data %>% select_if(~!all(is.na(tail(., n))))
# B C D
#1 0.5697 NA 0.29145
#2 -0.1351 NA -0.44329
#3 2.4016 NA 0.00111
#4 -0.0392 0.153 0.07434
#5 0.6897 2.173 -0.58952
#6 0.0280 0.476 -0.56867
#7 -0.7433 -0.710 -0.13518
#8 0.1888 0.611 1.17809
#9 -1.8050 -0.934 NA
#10 1.4656 -1.254 NA
Or with inverted logic
data %>% select_if(~any(!is.na(tail(., n))))
For the second condition,
Drop the columns if: i) There are 3 or more adjacent NA observations
we can use rle to get adjacent values
data %>% select_if(~!any(with(rle(is.na(.)), lengths[values]) >= n))
# B D
#1 0.5697 0.29145
#2 -0.1351 -0.44329
#3 2.4016 0.00111
#4 -0.0392 0.07434
#5 0.6897 -0.58952
#6 0.0280 -0.56867
#7 -0.7433 -0.13518
#8 0.1888 1.17809
#9 -1.8050 NA
#10 1.4656 NA
Since we already have the functions, we can use the same in base R as well with sapply
#Condition 1
data[!sapply(data, function(x) all(is.na(tail(x, n))))]
#Condition 2
data[!sapply(data, function(x) any(with(rle(is.na(x)), lengths[values]) >= n))]

Conditional mutate and vector

I have the following data frame:
df <- data.frame(
x = rep(letters[1:3], 2)
)
And the following vector:
vec <- c(1.5, 3.2)
This vector belongs to each b in df. How do I mutate vec if it matches b and return NA values if not?
Expected outcome:
1 a NA
2 b 1.5
3 c NA
4 a NA
5 b 3.2
6 c NA
Simplest way would be to get indexes of "b" and replace them with vec.
df$output[df$x == "b"] <- vec
df
# x output
#1 a NA
#2 b 1.5
#3 c NA
#4 a NA
#5 b 3.2
#6 c NA
Another option is with replace
df$output <- replace(df$output, df$x == "b", vec)
Forcefully, fitting this into tidyverse
library(dplyr)
df$output <- NA
df %>%
mutate(output = replace(output, x == "b", vec))

Map user input to a data frame and return NA if the user input is not found in the data frame

I have a data frame with two columns "A" and "B". I created a function that works as mentioned below:
If X (user entered value) is found in column A, then return the X value found in column A and it's corresponding value in B column.
Here's my code:
myfunction <- function(x) {
r<- with(my_dataframe, my_dataframe[A %in% x, c("A", "B")])
return(data.frame(r))
}
I want to tweak this in such a way that if user input (value for X) doesn't appear in column A, return that value and NA for column B.
Example:
A B
1 A12
2 F1222
If the values for X are 1, 5. I want the output to look like this --
1 A12
5 NA
One approach could be to first find matched rows using condition as matched = my_dataframe$A==x.
Now, there are any matched rows found use matched value to return corresponding rows. Otherwise create a row with NA value for B.
myfunction <- function(x) {
r <- data.frame()
matched = my_dataframe$A %in% x
if(sum(matched) > 0){
r<- with(my_dataframe, my_dataframe[matched, c("A", "B")])
} else{
r<-data.frame(A = x, B = NA)
}
return(r)
}
#Test
myfunction(2)
# A B
# 2 2 A34
myfunction(11)
# A B
# 1 11 NA
Edited: Based on latest feedback from OP, I think dplyr::left_join will do the trick for him as:
a <- 1
dplyr::left_join(data.frame(A=a), my_dataframe, by="A")
# A B
# 1 1 A21
a <- c(2,3,12,34,45)
dplyr::left_join(data.frame(A=a), my_dataframe, by="A")
# A B
# 1 2 A34
# 2 3 D345
# 3 12 <NA>
# 4 34 <NA>
# 5 45 <NA>
Data
my_dataframe <- data.frame(A = 1:4,
B=c("A21", "A34", "D345", "E45"), stringsAsFactors = FALSE)
myfunction <- function(x) {
r<- with(my_dataframe, my_dataframe[A %in% x, c("A", "B")])
if(!nrow(r)) data.frame(A=x,B=NA) else data.frame(r)
}
> myfunction(3)
A B
1 3 NA
> myfunction(2)
A B
2 2 F1222
edit to allow vectors:
my=function(x){
s=subset(data,A==x)
m=x%in%s$A
if(all(m)) s else rbind(s,cbind(A=x[!m],B=NA))
}
my(1)
A B
1 1 A12
> my(1:10)
A B
1 1 A12
2 2 F1222
3 3 <NA>
4 4 <NA>
5 5 <NA>
6 6 <NA>
7 7 <NA>
8 8 <NA>
9 9 <NA>
10 10 <NA>
> my(4)
A B
1 4 NA
my(c(1,3.11))
A B
1 1.00 A12
2 3.11 <NA>

remove cases following certain other cases

I have a dataframe, say
df = data.frame(x = c("a","a","b","b","b","c","d","t","c","b","t","c","t","a","a","b","d","t","t","c"),
y = c(2,4,5,2,6,2,4,5,2,6,2,4,5,2,6,2,4,5,2,6))
I want to remove only those rows in which one or multiple ts are directly in between a d and a c, in all other cases I want to retain the cases. So for this example, I would like to remove the ts on row 8, 18 and 19, but keep the others. I have over thousands of cases so doing this manually would be a true horror. Any help is very much appreciated.
One option would be to use rle to get runs of the same string and then you can use an sapply to check forward/backward and return all the positions you want to drop:
rle_vals <- rle(as.character(df$x))
drop <- unlist(sapply(2:length(rle_vals$values), #loop over values
function(i, vals, lengths) {
if(vals[i] == "t" & vals[i-1] == "d" & vals[i+1] == "c"){#Check if value is "t", previous is "d" and next is "c"
(sum(lengths[1:i-1]) + 1):sum(lengths[1:i]) #Get row #s
}
},vals = rle_vals$values, lengths = rle_vals$lengths))
drop
#[1] 8 18 19
df[-drop,]
# x y
#1 a 2
#2 a 4
#3 b 5
#4 b 2
#5 b 6
#6 c 2
#7 d 4
#9 c 2
#10 b 6
#11 t 2
#12 c 4
#13 t 5
#14 a 2
#15 a 6
#16 b 2
#17 d 4
#20 c 6
This also works, by collapsing to a string, identifying groups of t's between d and c (or c and d - not sure whether you wanted this option as well), then working out where they are and removing the rows as appropriate.
df = data.frame(x=c("a","a","b","b","b","c","d","t","c","b","t","c","t","a","a","b","d","t","t","c"),
y=c(2,4,5,2,6,2,4,5,2,6,2,4,5,2,6,2,4,5,2,6),stringsAsFactors = FALSE)
dfs <- paste0(df$x,collapse="") #collapse to a string
dfs2 <- do.call(rbind,lapply(list(gregexpr("dt+c",dfs),gregexpr("ct+d",dfs)),
function(L) data.frame(x=L[[1]],y=attr(L[[1]],"match.length"))))
dfs2 <- dfs2[dfs2$x>0,] #remove any -1 values (if string not found)
drop <- unlist(mapply(function(a,b) (a+1):(a+b-2),dfs2$x,dfs2$y))
df2 <- df[-drop,]
Here is another solution with base R:
df = data.frame(x = c("a","a","b","b","b","c","d","t","c","b","t","c","t","a","a","b","d","t","t","c"),
y = c(2,4,5,2,6,2,4,5,2,6,2,4,5,2,6,2,4,5,2,6))
#
s <- paste0(df$x, collapse="")
L <- c(NA, NA)
while (TRUE) {
r <- regexec("dt+c", s)[[1]]
if (r[1]==-1) break
L <- rbind(L, c(pos=r[1]+1, length=attr(r, "match.length")-2))
s <- sub("d(t+)c", "x\\1x", s)
}
L <- L[-1,]
drop <- unlist(apply(L,1, function(x) seq(from=x[1], len=x[2])))
df[-drop, ]
# > drop
# 8 18 19
# > df[-drop, ]
# x y
# 1 a 2
# 2 a 4
# 3 b 5
# 4 b 2
# 5 b 6
# 6 c 2
# 7 d 4
# 9 c 2
# 10 b 6
# 11 t 2
# 12 c 4
# 13 t 5
# 14 a 2
# 15 a 6
# 16 b 2
# 17 d 4
# 20 c 6
With gregexpr() it is shorter:
s <- paste0(df$x, collapse="")
g <- gregexpr("dt+c", s)[[1]]
L <- data.frame(pos=g+1, length=attr(g, "match.length")-2)
drop <- unlist(apply(L,1, function(x) seq(from=x[1], len=x[2])))
df[-drop, ]

Resources