I have two lists and I want to use lapply to get a new list
The data is
library(dplyr)
list.A <- list(df1=data.frame(x= c(1:5), y = letters[1:5], z= rep(1,5)),
df2=data.frame(x= c(10:15), y = letters[5:10], z= rep(10,6)))
list.B <- list(df1=data.frame(x= c(1:6), var2 = letters[10:15], var3= rep(7,6)),
df2=data.frame(x= c(10,12), var2 = letters[1:2], var3= rep(5,2)))
I want the result to be as following
dat.1 <- left_join(list.A[[1]], list.B[[1]], by=("x"))
dat.2 <- left_join(list.A[[2]], list.B[[2]], by=("x"))
new.list <- list(df1 = dat.1, df2 =dat.2)
But when I use lapply the results are weird and not as I wish them to be
new.list <- lapply(list.A, function(a){lapply(list.B, function(b){
df <-left_join(a, b, by=("x"))
})
})
Any help, please. I need to apply loop or lapply would work?
my actual lists have so many data frames
We need either map2 from purrr as this loops over each corresponding elements of both list and do the left_join by the 'x' column
library(dplyr)
library(purrr)
map2(list.A, list.B, ~ left_join(.x, .y, by = 'x'))
-output
#$df1
# x y z var2 var3
#1 1 a 1 j 7
#2 2 b 1 k 7
#3 3 c 1 l 7
#4 4 d 1 m 7
#5 5 e 1 n 7
#$df2
# x y z var2 var3
#1 10 e 10 a 5
#2 11 f 10 <NA> NA
#3 12 g 10 b 5
#4 13 h 10 <NA> NA
#5 14 i 10 <NA> NA
#6 15 j 10 <NA> NA
Or Map (from base R)
Map(merge, list.A, list.B, MoreArgs = list(all.x = TRUE, by = 'x'))
Related
I would like to match two columns based on another. I'm trying to use the match function but gets NA values.
a <- data.frame( x = c(1,2,3,4,5))
b <- data.frame( y = c(3,4),
z = c("A","B"))
a$x <- b$z[match(a$x, b$y)]
I get:
> a
x
1 <NA>
2 <NA>
3 A
4 B
5 <NA>
I would like :
> a
x
1 1
2 2
3 A
4 B
5 5
First, rename the numeric column of b so that you can merge the two data frames:
b <- b %>% rename(x = y)
Then, merge them, turn variables into character and replace the values of column x with those of z if not NA.
a <- merge(a, b, by = "x", all.x = TRUE) %>%
mutate_all(as.character) %>%
mutate(x = ifelse(is.na(z), x, z))
Result:
x z
1 1 <NA>
2 2 <NA>
3 A A
4 B B
5 5 <NA>
Without renaming I would propose this which ends with the same result that broti
tmp.merge<- merge(a,b,by.x = "x", by.y="y", all = TRUE)
for (elm in as.numeric(row.names(tmp.merge[which(!is.na(tmp.merge$z)),]))){
tmp.merge[elm,'x'] <- as.character(tmp.merge[elm,'z'])
}
tmp.merge
result :
> tmp.merge
x z
1 1 <NA>
2 2 <NA>
3 A A
4 B B
5 5 <NA>
The following works but you need to set stringsAsFactors = F, when defining dataframe b
a <- data.frame( x = c(1,2,3,4,10,13,12,11))
b <- data.frame( y = c(10,12,13),
z = c("A","B","C"),stringsAsFactors = F)
#
a %>% mutate(x = ifelse(x %in% b$y,b$z[match(x,b$y)],x))
Output
x
1 1
2 2
3 3
4 4
5 A
6 C
7 B
8 11
I want to recursively filter a dataframe, d by an arbitrary number of conditions (represented as rows in another dataframe z).
I begin with a dataframe d:
d <- data.frame(x = 1:10, y = letters[1:10])
The second dataframe z, has columns x1 and x2, which are lower and upper limits to filter d$x. This dataframe z may grow to be an arbitrary number of rows long.
z <- data.frame(x1 = c(1,3,8), x2 = c(1,4,10))
I want to return all rows of d for which d$x <= z$x1[i] and d$x >= z$x2[i] for all i, where i = nrow(z).
So for this toy example, exclude everything from 1:1, 3:4, 8:10, inclusive.
x y
2 2 b
5 5 e
6 6 f
7 7 g
We can create a sequence between x1 and x2 values and use anti_join to select rows from d that are not present in z.
library(tidyverse)
remove <- z %>%
mutate(x = map2(x1, x2, seq)) %>%
unnest(x) %>%
select(x)
anti_join(d, remove)
# x y
#1 2 b
#2 5 e
#3 6 f
#4 7 g
We can use a non-equi join
library(data.table)
i1 <- setDT(d)[z, .I, on = .(x >=x1, x <= x2), by = .EACHI]$I
i1
#[1] 1 3 4 8 9 10
d[i1]
# x y
#1: 1 a
#2: 3 c
#3: 4 d
#4: 8 h
#5: 9 i
#6: 10 j
d[!i1]
# x y
#1: 2 b
#2: 5 e
#3: 6 f
#4: 7 g
Or using fuzzyjoin
library(fuzzyjoin)
library(dplyr)
fuzzy_inner_join(d, z, by = c('x' = 'x1', 'x' = 'x2'),
match_fun = list(`>=`, `<=`)) %>%
select(names(d))
# A tibble: 6 x 2
# x y
# <int> <fct>
#1 1 a
#2 3 c
#3 4 d
#4 8 h
#5 9 i
#6 10 j
Or to get the rows not in 'x' from 'd'
fuzzy_anti_join(d, z, by = c('x' = 'x1', 'x' = 'x2'),
match_fun = list(`>=`, `<=`)) %>%
select(names(d))
# A tibble: 4 x 2
# x y
# <int> <fct>
#1 2 b
#2 5 e
#3 6 f
#4 7 g
I have two data frames as below and am trying to improve my code so the letters column in df1 should replaced with the letters column in df2 if they match.
df1 <- data.frame(ID = c(1,3,2,4,5), Letters = LETTERS[1:5], stringsAsFactors = F)
df2 <- data.frame(ID = c(1,3,4), Letters2 = "F", stringsAsFactors = F)
desired:
ID letters
1 F
2 B
3 F
4 D
5 F
It would be like doing the following by in one line:
desired <- merge(df1, df2, by = "ID", all.x = T)
desired$letters <- ifelse(is.na(desired$letters2), desired$letters, desired$letters2)
desired$letters2 <- NULL
Try this:
library(tidyverse)
df1%>%
left_join(df2)%>%
mutate(Letters=coalesce(letters2,Letters),letters2=NULL)
Joining, by = "ID"
ID Letters
1 1 F
2 2 B
3 3 F
4 4 F
5 5 E
We could use the numeric 'ID' as index to change the values in 'Letters' to those of 'letters2' (which are all 'F's)
df1$Letters[df2$ID] <- df2$letters2
df1
# ID Letters
#1 1 F
#2 2 B
#3 3 F
#4 4 F
#5 5 E
Or using data.table
library(data.table)
setDT(df1)[df2, Letters := Letters2, on = .(ID)]
df1
# ID Letters
#1: 1 F
#2: 3 F
#3: 2 C
#4: 4 F
#5: 5 E
I am trying to convert a matrix to a dataframe and use a column name and row name in the matrix with variables in the dataframe.
here is the sample
sample = matrix(c(1,NA,NA,2,NA,3,NA,NA,5,NA,NA,6,NA,NA,NA,NA,8,NA,3,1),ncol = 4)
colnames(sample) = letters[1:4]
row.names(sample) = letters[22:26]
My dataset has a lot of NA so I am trying to remove all the NA in the dataframe.
so here is my desiring output,
data.frame(col = c("v","v","w","w","y","y","y","z"),
row = c("a","b","c","c","a","b","d","d"),
value = c(1,3,6,8,2,5,3,1))
Use melt from reshape2 package for reshaping, then clear NA. Finally, do some formating stuff to get your desired output (ordering, setting colnames...).
> library(reshape2)
> df <- na.omit(melt(sample)) # reshaping
> df <- df[order(df$Var1), ] # ordering
> colnames(df) <- c("col", "row", "value") # setting colnames
> df # getting desired output
col row value
1 v a 1
6 v b 3
12 w c 6
17 w d 8
4 y a 2
9 y b 5
19 y d 3
20 z d 1
With dplyr and magrittr
> library(magrittr)
> library(dplyr)
> sample %>% melt %>%
na.omit %>%
arrange(., Var1) %>%
setNames(c('col', 'row', 'value'))
col row value
1 v a 1
2 v b 3
3 w c 6
4 w d 8
5 y a 2
6 y b 5
7 y d 3
8 z d 1
Here is a base R method by replicating the row names and column names
out <- na.omit(data.frame(col = rownames(sample)[row(sample)],
row = colnames(sample)[col(sample)], value = c(sample)))
out <- out[order(out$col),]
row.names(out) <- NULL
out
# col row value
#1 v a 1
#2 v b 3
#3 w c 6
#4 w d 8
#5 y a 2
#6 y b 5
#7 y d 3
#8 z d 1
I have tried searching for something but cannot find it. I have found similar threads but still they don't get what I want. I know there should be an easy way to do this without writing a loop function. Here it goes
I have two data frame df1 and df2
df1 <- data.frame(ID = c("a", "b", "c", "d", "e", "f"), y = 1:6 )
df2 <- data.frame(x = c("a", "c", "g", "f"), f=c("M","T","T","M"), obj=c("F70", "F60", "F71", "F82"))
df2$f <- as.factor(df2$f)
now I want to match df1 and df2 "ID" and "x" column with each other. But I want to add new columns to the df1 data frame that matches "ID" and "x" from df2 as well. The final output of df1 should look like this
ID y obj f1 f2
a 1 F70 M NA
b 2 NA NA NA
c 3 F60 NA T
d 4 NA NA NA
e 5 NA NA NA
f 6 F82 M NA
We can do this with tidyverse after joining the two datasets and spread the 'f' column
library(tidyverse)
left_join(df1, df2, by = c(ID = "x")) %>%
group_by(f) %>%
spread(f, f) %>%
select(-6) %>%
rename(f1 = M, f2 = T)
# A tibble: 6 × 5
# ID y obj f1 f2
#* <chr> <int> <fctr> <fctr> <fctr>
#1 a 1 F70 M NA
#2 b 2 NA NA NA
#3 c 3 F60 NA T
#4 d 4 NA NA NA
#5 e 5 NA NA NA
#6 f 6 F82 M NA
Or a similar approach with data.table
library(data.table)
dcast(setDT(df2)[df1, on = .(x = ID)], x+obj + y ~ f, value.var = 'f')[, -6, with = FALSE]
Here is a base R process.
# combine the data.frames
dfNew <- merge(df1, df2, by.x="ID", by.y="x", all.x=TRUE)
# add f1 and f2 variables
dfNew[c("f1", "f2")] <- lapply(c("M", "T"),
function(i) factor(ifelse(as.character(dfNew$f) == i, i, NA)))
# remove original factor variable
dfNew <- dfNew[-3]
ID y obj f1 f2
1 a 1 F70 M <NA>
2 b 2 <NA> <NA> <NA>
3 c 3 F60 <NA> T
4 d 4 <NA> <NA> <NA>
5 e 5 <NA> <NA> <NA>
6 f 6 F82 M <NA>