parsing list string vector to multiple column data.table - r

I am trying to convert data of form:
dt <- data.table(foo = c(c('a=a1|b=b1'),c('a=a2|b=b2|c=c2'),c('a=a3|d=d3')))
to form:
data.table(a=c('a1','a2','a3'),b=c('b1','b2',NA),c=c(NA,'c2',NA),d=c(NA,NA,'d3'))
I tried to parse first step using:
lapply(dt$foo, function(x) unlist(strsplit(x, split = '|', fixed = T)))
but couldn't proceed further. Any pointers?

Will update if more cases are provided. This is not a data.table because I don't use it, but afaik it should still work? Maybe if coerced to data.frame first.
library(tidyverse)
dt <- tibble(foo = c(c('a=a1|b=b1'),c('a=a2|b=b2|c=c2'),c('a=a3|d=d3')))
tibble(a=c('a1','a2','a3'),b=c('b1','b2',NA),c=c(NA,'c2',NA),d=c(NA,NA,'d3'))
#> # A tibble: 3 x 4
#> a b c d
#> <chr> <chr> <chr> <chr>
#> 1 a1 b1 <NA> <NA>
#> 2 a2 b2 c2 <NA>
#> 3 a3 <NA> <NA> d3
dt %>%
mutate(foo = str_split(foo, pattern = "\\|")) %>%
rowid_to_column() %>%
unnest() %>%
separate(foo, into = c("col", "val"), sep = "=") %>%
spread(col, val)
#> # A tibble: 3 x 5
#> rowid a b c d
#> <int> <chr> <chr> <chr> <chr>
#> 1 1 a1 b1 <NA> <NA>
#> 2 2 a2 b2 c2 <NA>
#> 3 3 a3 <NA> <NA> d3
Created on 2018-04-12 by the reprex package (v0.2.0).

Converting my comment to an answer, you can try:
library(splitstackshape)
cSplit(dt[, row := .I], "foo", "|", "long")[
, cSplit(.SD, "foo", "=")][
, dcast(.SD, row ~ foo_1, value.var = "foo_2")]
# row a b c d
# 1: 1 a1 b1 <NA> <NA>
# 2: 2 a2 b2 c2 <NA>
# 3: 3 a3 <NA> <NA> d3
You can, of course, also just use a combination of strsplit, tstrsplit, and dcast as well.
dt[, unlist(strsplit(foo, "|", TRUE)), 1:nrow(dt)][
, c("col", "val") := tstrsplit(V1, "=", fixed = TRUE)][
, dcast(.SD, nrow ~ col, value.var = "val")]
# nrow a b c d
# 1: 1 a1 b1 <NA> <NA>
# 2: 2 a2 b2 c2 <NA>
# 3: 3 a3 <NA> <NA> d3

An option is to use read.table function to read values as key-value pair and then finally convert into data.frame. The dplyr::bind_rows can help to join different rows.
dt <- data.table(foo = c(c('a=a1|b=b1'),c('a=a2|b=b2|c=c2'),c('a=a3|d=d3')))
library(dplyr)
bind_rows(mapply(function(x){
t <- read.table(text = gsub("\\|","\n",x), sep=c("="), stringsAsFactors=FALSE)
t <- as.data.frame(t(t), stringsAsFactors = FALSE)
colnames(t) <- t[1,]
t <- t[-1,]
}, dt$foo))
# a b c d
# 1 a1 b1 <NA> <NA>
# 2 a2 b2 c2 <NA>
# 3 a3 <NA> <NA> d3
UPDATED: data.table based solution as suggested by #abhiieor will be as:
library(data.table)
rbindlist(mapply(function(x){
t <- read.table(text = gsub("\\|","\n",x), sep=c("="), stringsAsFactors=FALSE)
t <- as.data.frame(t(t), stringsAsFactors = FALSE)
colnames(t) <- t[1,]
t <- t[-1,]
}, dt$foo), use.names = T, fill = T)

Related

Joining two incomplete data.tables with the same column names

I have two incomplete data.tables with the same column names.
dt1 <- data.table(id = c(1, 2, 3), v1 = c("w", "x", NA), v2 = c("a", NA, "c"))
dt2 <- data.table(id = c(2, 3, 4), v1 = c(NA, "y", "z"), v2 = c("b", "c", NA))
They look like this:
dt1
id v1 v2
1: 1 w a
2: 2 x <NA>
3: 3 <NA> c
> dt2
id v1 v2
1: 2 <NA> b
2: 3 y c
3: 4 z <NA>
Is there a way to merge the two by filling in the missing info?
This is the result I'm after:
id v1 v2
1: 1 w a
2: 2 x b
3: 3 y c
4: 4 z <NA>
I've tried various data.table joins, merges but I either get the columns repeated:
> merge(dt1,
+ dt2,
+ by = "id",
+ all = TRUE)
id v1.x v2.x v1.y v2.y
1: 1 w a <NA> <NA>
2: 2 x <NA> <NA> b
3: 3 <NA> c y c
4: 4 <NA> <NA> z <NA>
or the rows repeated:
> merge(dt1,
+ dt2,
+ by = names(dt1),
+ all = TRUE)
id v1 v2
1: 1 w a
2: 2 <NA> b
3: 2 x <NA>
4: 3 <NA> c
5: 3 y c
6: 4 z <NA>
Both data.tables have the same column names.
You can group by ID and get the unique values after omitting NAs, i.e.
library(data.table)
merge(dt1, dt2, all = TRUE)[,
lapply(.SD, function(i)na.omit(unique(i))),
by = id][]
# id v1 v2
#1: 1 w a
#2: 2 x b
#3: 3 y c
#4: 4 z <NA>
You could also start out with rbind():
rbind(dt1, dt2)[, lapply(.SD, \(x) unique(x[!is.na(x)])), by = id]
# id v1 v2
# <num> <char> <char>
# 1: 1 w a
# 2: 2 x b
# 3: 3 y c
# 4: 4 z <NA>
First full_join and after that group_by per id and merge the rows:
library(dplyr)
library(tidyr)
dt1 %>%
full_join(dt2, by = c("id", "v1", "v2")) %>%
group_by(id) %>%
fill(starts_with('v'), .direction = 'updown') %>%
slice(1) %>%
ungroup
Output:
# A tibble: 4 × 3
id v1 v2
<dbl> <chr> <chr>
1 1 w a
2 2 x b
3 3 y c
4 4 z NA

Split variable from comma into an ordered dataframe

I have a dataframe like this, where the values are separated by comma.
# Events
# A,B,C
# C,D
# B,A
# D,B,A,E
# A,E,B
I would like to have the next data frame
# Event1 Event2 Event3 Event4 Event5
# A B C NA NA
# NA NA C NA NA
# A B NA NA NA
# A B NA D E
# A B NA NA E
I have tried with cSplit but I don't have the desired df. Is possible?
NOTE: The values doesn't appear in the same possition as the variable Event in the second dataframe.
1) Here is a base R solution. split each row giving list s and create cols which contains the possible values. Then iterate over s and convert that to a data frame.
Note that this does not hard code the column names and continues to work even if some column names are substrings of other column names.
s <- strsplit(DF$Events, ",")
cols <- unique(sort(unlist(s)))
data.frame(Event = t(sapply(s, function(x) ifelse(cols %in% x, cols, NA))))
giving:
Event.1 Event.2 Event.3 Event.4 Event.5
1 A B C <NA> <NA>
2 <NA> <NA> C D <NA>
3 A B <NA> <NA> <NA>
4 A B <NA> D E
5 A B <NA> <NA> E
2) This base R solution uses strsplit as above and then names the components since stack requires a named list and then invokes stack. Then we expand that into a wide form using tapply and convert it to a data frame and fix up the names.
s <- strsplit(DF$Events, ",")
names(s) <- seq_along(s)
stk <- stack(s)
mat <- t(tapply(stk$values, stk, c))
colnames(mat) <- NULL
data.frame(Event = mat)
giving:
Event.1 Event.2 Event.3 Event.4 Event.5
1 A B C <NA> <NA>
2 <NA> <NA> C D <NA>
3 A B <NA> <NA> <NA>
4 A B <NA> D E
5 A B <NA> <NA> E
This could also be represented as an R 4.2+ pipeline:
DF |>
with(setNames(Events, seq_along(Events))) |>
strsplit(",") |>
stack() |>
with(tapply(values, data.frame(ind, values), c)) |>
`colnames<-`(NULL) |>
data.frame(Event = _)
Note
The input in reproducible form:
Lines <- "Events
A,B,C
C,D
B,A
D,B,A,E
A,E,B"
DF <- read.table(text = Lines, header = TRUE, strip.white = TRUE)
Another approach using tidyverse:
library(dplyr)
library(purrr)
library(stringr)
Events = c("A,B,C", 'C,D', "B,A", "D,B,A,E", "A,E,B")
letters <- Events %>% str_split(",") %>% unlist() %>% unique()
df <- data.frame(Events)
df %>%
map2_dfc(.y = letters, ~ ifelse(str_detect(.x, .y), .y, NA)) %>%
set_names(nm = paste0("Events", 1:length(letters)))
#> # A tibble: 5 × 5
#> Events1 Events2 Events3 Events4 Events5
#> <chr> <chr> <chr> <chr> <chr>
#> 1 A B C <NA> <NA>
#> 2 <NA> <NA> C D <NA>
#> 3 A B <NA> <NA> <NA>
#> 4 A B <NA> D E
#> 5 A B <NA> <NA> E
Created on 2022-07-11 by the reprex package (v2.0.1)
This tidyverse solution is easily the most economical in terms of amount of code used:
library(tidyverse)
data.frame(Events) %>%
# split the strings by the comma:
mutate(Events = str_split(Events, ",")) %>%
# unnest splitted values wider into columns:
unnest_wider(Events, names_sep = "")
# A tibble: 5 × 4
Events1 Events2 Events3 Events4
<chr> <chr> <chr> <chr>
1 A B C NA
2 C D NA NA
3 B A NA NA
4 D B A E
5 A E B NA
Data:
Events = c("A,B,C", 'C,D', "B,A", "D,B,A,E", "A,E,B")
We can try the following base R code
> d <- t(table(stack(setNames(strsplit(df$Events, ","), 1:nrow(df)))))
> as.data.frame.matrix(`dim<-`(colnames(d)[ifelse(d > 0, d * col(d), NA)], dim(d)))
V1 V2 V3 V4 V5
1 A B C <NA> <NA>
2 <NA> <NA> C D <NA>
3 A B <NA> <NA> <NA>
4 A B <NA> D E
5 A B <NA> <NA> E

Conditionally copy contents of one column to another [duplicate]

This question already has answers here:
How to join (merge) data frames (inner, outer, left, right)
(13 answers)
Closed 2 years ago.
I want to add extra columns depending on values of code which are defined in VAR
DF <- data.frame(id = c(1:5), code = c("A","B","C","D","E"), sub = c("A1","B1","C1","D1","E1"))
id code sub
1 1 A A1
2 2 B B1
3 3 C C1
4 4 D D1
5 5 E E1
VAR <- c("A","B")
How result should be:
id code sub AB ABsub
1 1 A A1 A A1
2 2 B B1 B B1
3 3 C C1 <NA> <NA>
4 4 D D1 <NA> <NA>
5 5 E E1 <NA> <NA>
Or using dplyr:
library(dplyr)
DF<-data.frame(id=c(1:5),code=c("A","B","C","D","E"),sub=c("A1","B1","C1","D1","E1"), stringsAsFactors = FALSE)
VAR<-c("A","B")
DF <- DF %>%
mutate(AB = ifelse(code %in% {{VAR}}, code, NA_character_)) %>%
mutate(ABsub = ifelse(code == AB, sub, NA_character_))
with:
> DF
id code sub AB ABsub
1 1 A A1 A A1
2 2 B B1 B B1
3 3 C C1 <NA> <NA>
4 4 D D1 <NA> <NA>
5 5 E E1 <NA> <NA>
Also works if VAR would equal c("A", "B", "C") but we do not know if that is what you are after.
A simple base R option using merge + subset
merge(DF,subset(DF,code %in% VAR),by = "id",all = TRUE)
such that
> merge(DF,subset(DF,code %in% VAR),by = "id",all = TRUE)
id code.x sub.x code.y sub.y
1 1 A A1 A A1
2 2 B B1 B B1
3 3 C C1 <NA> <NA>
4 4 D D1 <NA> <NA>
5 5 E E1 <NA> <NA>
A dplyr solution with across():
library(dplyr)
DF %>%
mutate(across(-id, ~ replace(.x, !(code %in% VAR), NA), .names = "AB{col}"))
# id code sub ABcode ABsub
# 1 1 A A1 A A1
# 2 2 B B1 B B1
# 3 3 C C1 <NA> <NA>
# 4 4 D D1 <NA> <NA>
# 5 5 E E1 <NA> <NA>
or with left_join():
DF %>%
filter(code %in% VAR) %>%
left_join(DF, ., by = "id", suffix = c("", "AB"))
# id code sub codeAB subAB
# 1 1 A A1 A A1
# 2 2 B B1 B B1
# 3 3 C C1 <NA> <NA>
# 4 4 D D1 <NA> <NA>
# 5 5 E E1 <NA> <NA>
Note: If you have multiple columns in your real data, you don't need to type
mutate(Col1 = ifelse(...), Col2 = ifelse(...), etc.)
one by one.
Here's a solution
ABsub <- ifelse(DF$code %in% VAR, DF$code, NA)
cbind(DF, ABsub)

Mutating based on multiple columns in a data frame

Ok so my dataframe looks like this let's call if df
KEY A1 A2 A3 A4 B1 B2 B3 B4 C1 C2 C3 C4
1 120 100 NA 110 1 1 NA 1 NA NA NA NA
2 100 NA 115 NA NA NA NA NA Y N Y N
So what I'm trying to do is make it so that when an A columns has a value of 100 and the corresponding B or C column has a value of 1 or "Y" respectively that makes a new column with a X with a value of 1. In Row 1 that would be A2 and B2 and in row that would be A1 and C1.
I tried doing three sets of gather and then using the mutate function using case_when. like so
df<- df %>%
gather(key="A",value="code",dx)%>%
gather(key="B",value="number",dxadm)%>%
gather(key="C",value="character",dxpoa) %>%
mutate(X=case_when(
code == 100 & present >0 ~ 1,
code ==100 & character == "Y"~1)
)
Except my spread function of these rows came back with rows all array and my X out of place.
Alternatively, I considered something like
df <- df %>%
mutate(X=case_when(
A1 == 100 & B1 >0 ~ 1,
A1 ==100 & C1 == "Y"~1,
A2 == 100 & B2 >0 ~ 1,
A2 ==100 & C2 == "Y"~1,)
and so on for all permutations. The two problems with this are that I have a lot of columns and I'd like to this for multiple different values of A.
Can anyone recommend an alternative or at least a way to make the second solution into something that would only require one annoying long piece of code that I could make into a more generalizable function? Thanks!
A suggestion
require(read.so) #awesome package to read from Stackoverflow,
# available on GitHub [https://alistaire47.github.io/read.so/][1]
require(tidyr)
require(reshape2)
require(dplyr)
dat <- read.so()
dat %>% gather(var, value, 2:13) %>% #make it long
mutate(var = gsub('([A-Z])', '\\1_', .[['var']])) %>% #add underscore
separate(var, c('var', 'number') ) %>% #separate your column
dcast(KEY+number ~ var) %>% #dcast is a bit complex but quite powerful
group_by(KEY) %>%
filter(A == 100)
# A tibble: 2 x 5
# Groups: KEY [2]
KEY number A B C
<int> <chr> <chr> <chr> <chr>
1 1 2 100 1 <NA>
2 2 1 100 <NA> Y
A solution using dplyr and tidyr. We can gather all the columns except KEY, separate the letters and numbers, and then spread the letter so that we can create the X column without specifying the numbers. Notice that I assume if the condition is not met, X would be 0, and based on your description, I used any(A %in% 100 & (B %in% 1 | C %in% "Y")) to test the condition as any given numbers met the condition, X would be 1.
library(dplyr)
library(tidyr)
df2 <- df %>%
gather(Column, Value, -KEY) %>%
separate(Column, into = c("Letter", "Number"), sep = 1) %>%
spread(Letter, Value, convert = TRUE) %>%
group_by(KEY) %>%
mutate(X = ifelse(any(A %in% 100 & (B %in% 1 | C %in% "Y")), 1L, 0L))
df2 %>% as.data.frame()
# KEY Number A B C X
# 1 1 1 120 1 <NA> 1
# 2 1 2 100 1 <NA> 1
# 3 1 3 NA NA <NA> 1
# 4 1 4 110 1 <NA> 1
# 5 2 1 100 NA Y 1
# 6 2 2 NA NA N 1
# 7 2 3 115 NA Y 1
# 8 2 4 NA NA N 1
I think the structure of df2 is good, but if you really want the original structure, we can do the following.
df3 <- df2 %>%
gather(Letter, Value, A:C) %>%
unite(Column, Letter, Number, sep = "") %>%
spread(Column, Value) %>%
select(names(df), X)
df3 %>% as.data.frame()
# KEY A1 A2 A3 A4 B1 B2 B3 B4 C1 C2 C3 C4 X
# 1 1 120 100 <NA> 110 1 1 <NA> 1 <NA> <NA> <NA> <NA> 1
# 2 2 100 <NA> 115 <NA> <NA> <NA> <NA> <NA> Y N Y N 1
df3 is the final output.
DATA
df <- read.table(text = "KEY A1 A2 A3 A4 B1 B2 B3 B4 C1 C2 C3 C4
1 120 100 NA 110 1 1 NA 1 NA NA NA NA
2 100 NA 115 NA NA NA NA NA Y N Y N",
header = TRUE, stringsAsFactors = FALSE)
Same idea as Tjebo, but sticking to the tidyverse....
library(tidyverse)
dat <- data.frame(stringsAsFactors=FALSE,
KEY = c(1L, 2L),
A1 = c(120L, 100L),
A2 = c(100L, NA),
A3 = c(NA, 115L),
A4 = c(110L, NA),
B1 = c(1L, NA),
B2 = c(1L, NA),
B3 = c(NA, NA),
B4 = c(1L, NA),
C1 = c(NA, "Y"),
C2 = c(NA, "N"),
C3 = c(NA, "Y"),
C4 = c(NA, "N"))
dat %>%
gather(var, value, -KEY) %>% #make it long
extract(var, regex = "(.)(.)", into = c("var", "number") ) %>%
spread(var, value) %>%
filter( A %in% 100 )
#> KEY number A B C
#> 1 1 2 100 1 <NA>
#> 2 2 1 100 <NA> Y
Created on 2018-02-27 by the reprex package (v0.2.0).

Combining columns, while ignoring duplicates and NAs

I have a dataframe as follows and I would like to combine two columns, namely Var1 and Var2. I want the combined column (Var3) to contain no duplicates of <alpha><digit>. i.e. if Var1 == A1 and Var2 == A1, hence Var3 == A1 but not Var3 == A1-A1 or if Var1 == A4-E9 and Var2 == A4, hence Var3 == A4-E9 but not Var3 == A4-E9-A4
df <- read.table(header = TRUE, text =
"id Var1 Var2
A A1 A1
B F2 A2
C NA A3
D A4-E9 A4
E E5 A5
F NA NA
G B2-R4 A3-B2
H B3-B4 E1-G5", stringsAsFactors = FALSE)
The following is my code. I would like to improve on its readability as well as get rid of the NA that is present in row 3's entry for Var3, i.e A3-NA.
library(dplyr)
library(tidyr)
df %>%
mutate(Var3 = paste(Var1, Var2, sep = "-")) %>%
separate_rows(Var3, sep = "-") %>%
group_by(id, Var3) %>%
slice(1) %>%
group_by(id) %>%
mutate(Var3 = paste(unlist(Var3[!is.na(Var3)]), collapse = "-")) %>%
slice(1) %>%
ungroup
Here's my desired output:
# A tibble: 8 x 4
id Var1 Var2 Var3
<chr> <chr> <chr> <chr>
1 A A1 A1 A1
2 B F2 A2 A2-F2
3 C <NA> A3 A3
4 D A4-E9 A4 A4-E9
5 E E5 A5 A5-E5
6 F <NA> <NA> <NA>
7 G B2-R4 A3-B2 A3-B2-R4
8 H B3-B4 E1-G5 B3-B4-E1-G5
if 'df1' is the output, then we remove the 'NA' that follows a - with sub
df1 %>%
mutate(Var3 = sub("-NA", "", Var3))
# A tibble: 8 x 4
# id Var1 Var2 Var3
# <chr> <chr> <chr> <chr>
#1 A A1 A1 A1
#2 B F2 A2 A2-F2
#3 C <NA> A3 A3
#4 D A4-E9 A4 A4-E9
#5 E E5 A5 A5-E5
#6 F <NA> <NA> NA
#7 G B2-R4 A3-B2 A3-B2-R4
#8 H B3-B4 E1-G5 B3-B4-E1-G5
We can also do this slightly differently with tidyverse by gather into 'long' format, then split the 'value' column using separate_rows, grouped by 'id', summarise the 'Var3' column by pasteing the sorted unique elements of 'Var3' and left_join with the original dataset 'df'
library(tidyverse)
gather(df, key, value, -id) %>%
separate_rows(value) %>%
group_by(id) %>%
summarise(Var3 = paste(sort(unique(value)), collapse='-')) %>%
mutate(Var3 = replace(Var3, Var3=='', NA)) %>%
left_join(df, .)
# id Var1 Var2 Var3
#1 A A1 A1 A1
#2 B F2 A2 A2-F2
#3 C <NA> A3 A3
#4 D A4-E9 A4 A4-E9
#5 E E5 A5 A5-E5
#6 F <NA> <NA> <NA>
#7 G B2-R4 A3-B2 A3-B2-R4
#8 H B3-B4 E1-G5 B3-B4-E1-G5
NOTE: The %>% makes even a simple code to appear in multiple lines, but if required, we can put all those statements in a single line and term as one-liner
Here is a one-liner
library(data.table)
setDT(df)[, Var3 := paste(sort(unique(unlist(strsplit(unlist(.SD),"-")))), collapse="-"), id]
You could do it in one line
df$Var3 = lapply(strsplit(paste(df$Var1, df$Var2, sep = "-"),"-"),
function(x)paste(unique(x)[unique(x)!="NA"],collapse="-"))
Output:
id Var1 Var2 Var3
1 A A1 A1 A1
2 B F2 A2 F2-A2
3 C <NA> A3 A3
4 D A4-E9 A4 A4-E9
5 E E5 A5 E5-A5
6 F <NA> <NA>
7 G B2-R4 A3-B2 B2-R4-A3
8 H B3-B4 E1-G5 B3-B4-E1-G5
The first part in the lapply function is similar to your first call with dplyr. First the columns are concatenated, and then we split them again.
The function within lapply removes all NA's, and then collapses the string again.
Hope this helps!
EDIT: Speed comparison for fun!
262,144 rows
Average runtimes:
Florian: 3.97 seconds
Sotos: 2.46 seconds
Akrun: 1.34 seconds
Adamm: >120 seconds
df <- read.table(header = TRUE, text =
"id Var1 Var2
A A1 A1
B F2 A2
C NA A3
D A4-E9 A4
E E5 A5
F NA NA
G B2-R4 A3-B2
H B3-B4 E1-G5", stringsAsFactors = FALSE)
for(i in 1:15)
{
df = rbind(df,df)
}
library(microbenchmark)
# Florian's method
microbenchmark(
lapply(strsplit(paste(df$Var1, df$Var2, sep = "-"),"-"),
function(x)paste(unique(x)[unique(x)!="NA"],collapse="-")),times=5)
# Sotos'method
microbenchmark(
gsub('NA-|-NA', '', vapply(strsplit(do.call(paste, df[-1]), " |-"), function(i) paste(unique(i), collapse = "-"), character(1L))), times=5)
# akrun method
library(data.table)
microbenchmark(
setDT(df)[, Var3 := paste(sort(unique(unlist(strsplit(unlist(.SD),"-")))), collapse="-"), id], times=5)
# Adamm method
microbenchmark(
sapply(1:nrow(df), function(i) ifelse(df[i,2]!=df[i,3] & !is.na(df[i,2]) & !is.na(df[i,3]), paste(df[i,2], df[i,3], sep="-"), ifelse(!is.na(df[i,3]), df[i,3], df[i,2]))), times=5)
If you want complex solution; long one-liner, nested ifelse().
df$Var3 <- sapply(1:nrow(df), function(i) ifelse(df[i,2]!=df[i,3] & !is.na(df[i,2]) & !is.na(df[i,3]), paste(df[i,2], df[i,3], sep="-"), ifelse(!is.na(df[i,3]), df[i,3], df[i,2])))
> df
id Var1 Var2 Var3
1 A A1 A1 A1
2 B F2 A2 F2-A2
3 C <NA> A3 A3
4 D A4-E9 A4 A4-E9-A4
5 E E5 A5 E5-A5
6 F <NA> <NA> <NA>
7 G B2-R4 A3-B2 B2-R4-A3-B2
8 H B3-B4 E1-G5 B3-B4-E1-G5
In case of efficiency I made a small experiment and I measured time of each proposed solution, here are the results:
First of all I need more rows:
n <- 10000
df <- do.call("rbind", replicate(n, df, simplify = FALSE))
Akrun solution 1 with tidyverse
Time difference of 1.452809 secs
Akrun solution 2 with data.table
Time difference of 0.4530261 secs
Florian Maas solution with lapply
Time difference of 1.812106 secs
My solution with sapply
Time difference of 2.289345 mins
Sotos solution
Time difference of 1.515296 secs

Resources