Alternate empty column among dataframe - r

I have this df:
df = data.frame(aa = letters[1:5],
bb = letters[1:5],
cc = letters[1:5],
dd = letters[1:5])
df2 = c('ee', 'ff', 'gg')
df[df2] = NA
And I want to have this output:
ee aa bb ff cc dd gg
NA a a NA a a NA
NA b b NA b b NA
NA c c NA c c NA
NA d d NA d d NA
NA e e NA e e NA
Is there an elegant way to do so instead of:
df = df[,c('ee', 'aa', 'bb', 'ff', 'cc', 'dd', 'gg')] ??

Here is one option. Based on the input/output, we need to have alternate columns within each block of 2 columns, Created a matrix 'm1' of column names, split them by col of the matrix, concatenate each list element with one of the element of 'df2' to create a vector of column names in the specified order ('un1'). Using that, a 'data.frame' of NA is created (through the matrix route) and assign the values of columns of 'df' to that
m1 <- matrix(names(df), 2, 2)
un1 <- c(unlist(Map(c, df2[seq_len(nrow(m1))],
split(m1, col(m1)))), df2[length(df2)])
dfN <- as.data.frame(matrix(NA, ncol =length(un1),
nrow = nrow(df), dimnames = list(NULL, un1)))
dfN[names(df)] <- df
dfN
# ee aa bb ff cc dd gg
#1 NA a a NA a a NA
#2 NA b b NA b b NA
#3 NA c c NA c c NA
#4 NA d d NA d d NA
#5 NA e e NA e e NA
Or another option is add_column from tibble. We split the dataset into a list of data.frame based on the 'k' (blocks of column - 2), loop through the list and the sequence of list with map2, add the columns at the beginning (add_column), convert it to a single data.frame (map2_dfc) and then add the remaining column at the end
library(tidyverse)
k <- 2
l1 <- split.default(df, as.integer(gl(ncol(df), k, ncol(df))))
i1 <- seq_along(l1)
nm1 <- tail(names(df), 1)
l1 %>%
map2_dfc(., i1, ~
.x %>%
add_column(!! df2[.y] := NA, .before = 1)) %>%
add_column(!!df2[-i1] := NA, .after = nm1)
# ee aa bb ff cc dd gg
#1 NA a a NA a a NA
#2 NA b b NA b b NA
#3 NA c c NA c c NA
#4 NA d d NA d d NA
#5 NA e e NA e e NA

If the names of empty columns do not matter much then you can also use for loop. It will result in the desired dataframe named df2
df = data.frame(aa = letters[1:5],
bb = letters[1:5],
cc = letters[1:5],
dd = letters[1:5])
df2 = NA
for (i in 1:(ncol(df) / 2)) {
df2 <- data.frame(df2, df[, (i*2-1):(i*2)], NA)
}
Column names can be added later if needed as
colnames(df2)[seq(1,ncol(df2),3)] <- c('ee', 'ff', 'gg')

Related

Using lapply with data in two lists in R

I have two lists and I want to use lapply to get a new list
The data is
library(dplyr)
list.A <- list(df1=data.frame(x= c(1:5), y = letters[1:5], z= rep(1,5)),
df2=data.frame(x= c(10:15), y = letters[5:10], z= rep(10,6)))
list.B <- list(df1=data.frame(x= c(1:6), var2 = letters[10:15], var3= rep(7,6)),
df2=data.frame(x= c(10,12), var2 = letters[1:2], var3= rep(5,2)))
I want the result to be as following
dat.1 <- left_join(list.A[[1]], list.B[[1]], by=("x"))
dat.2 <- left_join(list.A[[2]], list.B[[2]], by=("x"))
new.list <- list(df1 = dat.1, df2 =dat.2)
But when I use lapply the results are weird and not as I wish them to be
new.list <- lapply(list.A, function(a){lapply(list.B, function(b){
df <-left_join(a, b, by=("x"))
})
})
Any help, please. I need to apply loop or lapply would work?
my actual lists have so many data frames
We need either map2 from purrr as this loops over each corresponding elements of both list and do the left_join by the 'x' column
library(dplyr)
library(purrr)
map2(list.A, list.B, ~ left_join(.x, .y, by = 'x'))
-output
#$df1
# x y z var2 var3
#1 1 a 1 j 7
#2 2 b 1 k 7
#3 3 c 1 l 7
#4 4 d 1 m 7
#5 5 e 1 n 7
#$df2
# x y z var2 var3
#1 10 e 10 a 5
#2 11 f 10 <NA> NA
#3 12 g 10 b 5
#4 13 h 10 <NA> NA
#5 14 i 10 <NA> NA
#6 15 j 10 <NA> NA
Or Map (from base R)
Map(merge, list.A, list.B, MoreArgs = list(all.x = TRUE, by = 'x'))

How to add rows from another data frame to another but only selected column

How to add rows from certain column in a data frame to another column of another data frame. See Example below.
> DF1
A B C
1 3 axe aa
2 6 base bb
3 9 lol cc
> DF2
D E
1 x ss
2 y dd
3 z vv
And I want to add/merge the rows of Column E of DF2 into Column C of DF1. And the other columns' rows should be NA.
> DF3
A B C
1 3 axe aa
2 6 base bb
3 9 lol cc
4 NA NA ss
5 NA NA dd
6 NA NA vv
You can rename E to C and rbind. I prefer bind_rows
> library(dplyr)
> names(DF2)[2] <- "C"
> DF1 <- bind_rows(DF1, select(DF2, C))
> DF1
A B C
1 3 axe aa
2 6 base bb
3 9 lol cc
4 NA <NA> ss
5 NA <NA> dd
6 NA <NA> vv
Another approach:
> DF1 %>%
+ bind_rows(DF2) %>%
+ mutate(C = ifelse(is.na(C), E, C)) %>%
+ select(A:C)
A B C
1 3 axe aa
2 6 base bb
3 9 lol cc
4 NA <NA> ss
5 NA <NA> dd
6 NA <NA> vv
Use rbind from base R:
DF3 <- rbind(DF1, data.frame(A = NA, B = NA, C = DF2$E))

Replacing NAs in a column with the values of other column

I wonder how to replace NAs in a column with the values of other column in R using dplyr. MWE is below.
Letters <- LETTERS[1:5]
Char <- c("a", "b", NA, "d", NA)
df1 <- data.frame(Letters, Char)
df1
library(dplyr]
df1 %>%
mutate(Char1 = ifelse(Char != NA, Char, Letters))
Letters Char Char1
1 A a NA
2 B b NA
3 C <NA> NA
4 D d NA
5 E <NA> NA
You can use coalesce:
library(dplyr)
df1 <- data.frame(Letters, Char, stringsAsFactors = F)
df1 %>%
mutate(Char1 = coalesce(Char, Letters))
Letters Char Char1
1 A a a
2 B b b
3 C <NA> C
4 D d d
5 E <NA> E

Matching columns with other columns in data frames and adding certain columns of matching values

I have tried searching for something but cannot find it. I have found similar threads but still they don't get what I want. I know there should be an easy way to do this without writing a loop function. Here it goes
I have two data frame df1 and df2
df1 <- data.frame(ID = c("a", "b", "c", "d", "e", "f"), y = 1:6 )
df2 <- data.frame(x = c("a", "c", "g", "f"), f=c("M","T","T","M"), obj=c("F70", "F60", "F71", "F82"))
df2$f <- as.factor(df2$f)
now I want to match df1 and df2 "ID" and "x" column with each other. But I want to add new columns to the df1 data frame that matches "ID" and "x" from df2 as well. The final output of df1 should look like this
ID y obj f1 f2
a 1 F70 M NA
b 2 NA NA NA
c 3 F60 NA T
d 4 NA NA NA
e 5 NA NA NA
f 6 F82 M NA
We can do this with tidyverse after joining the two datasets and spread the 'f' column
library(tidyverse)
left_join(df1, df2, by = c(ID = "x")) %>%
group_by(f) %>%
spread(f, f) %>%
select(-6) %>%
rename(f1 = M, f2 = T)
# A tibble: 6 × 5
# ID y obj f1 f2
#* <chr> <int> <fctr> <fctr> <fctr>
#1 a 1 F70 M NA
#2 b 2 NA NA NA
#3 c 3 F60 NA T
#4 d 4 NA NA NA
#5 e 5 NA NA NA
#6 f 6 F82 M NA
Or a similar approach with data.table
library(data.table)
dcast(setDT(df2)[df1, on = .(x = ID)], x+obj + y ~ f, value.var = 'f')[, -6, with = FALSE]
Here is a base R process.
# combine the data.frames
dfNew <- merge(df1, df2, by.x="ID", by.y="x", all.x=TRUE)
# add f1 and f2 variables
dfNew[c("f1", "f2")] <- lapply(c("M", "T"),
function(i) factor(ifelse(as.character(dfNew$f) == i, i, NA)))
# remove original factor variable
dfNew <- dfNew[-3]
ID y obj f1 f2
1 a 1 F70 M <NA>
2 b 2 <NA> <NA> <NA>
3 c 3 F60 <NA> T
4 d 4 <NA> <NA> <NA>
5 e 5 <NA> <NA> <NA>
6 f 6 F82 M <NA>

rbind a list of data.frames while keeping NULL elements in R

I would like to rbind a list of data.frame and transform NULL elements into NA using R. Consider the following example,
l <- list(data.frame(C1 = 1, C2 = 2),
NULL,
data.frame(C1 = 3))
# bind_rows results
dplyr::bind_rows(l)
# C1 C2
# 1 1 2
# 2 3 NA
# Desired output
data.frame(C1 = c(1, NA, 3), C2 = c(2, NA, NA))
# C1 C2
# 1 1 2
# 2 NA NA
# 3 3 NA
Start with transforming the NULL elements:
l <- lapply(l, function(x) if(is.null(x)) data.frame(C1 = NA) else x)
dplyr::bind_rows(l)
# C1 C2
#1 1 2
#2 NA NA
#3 3 NA

Resources