I have df as follow
df
ID type other-col
1 A1 cc
1 A2 dd
1 A3 cc
2 A1 cc
2 B1 aa
3 A2 aa
I want add new to when "ID" changes with the value of F for "type" and "other-col" columns
new_df
ID
df
ID type other-col
1 A1 cc
1 A2 dd
1 A3 cc
1 F F <- this row added
2 A1 cc
2 B1 aa
2 F F <- this row added
3 A2 aa
how can I do it in R?
thx
This should be doable in a single replacement operation once you know the indexes of where each change occurs. E.g.:
idx <- match(unique(df$ID), df$ID)[-1] - 1
df <- df[sort(c(sequence(nrow(df)),idx)),]
df[seq_along(idx) + idx, c("type","other_col")] <- "F"
# ID type other_col
#1 1 A1 cc
#2 1 A2 dd
#3 1 A3 cc
#3.1 1 F F
#4 2 A1 cc
#5 2 B1 aa
#5.1 2 F F
#6 3 A2 aa
Where df was:
df <- read.table(text="ID type other_col
1 A1 cc
1 A2 dd
1 A3 cc
2 A1 cc
2 B1 aa
3 A2 aa", header=TRUE, stringsAsFactors=FALSE)
An option with group_split and add_row. We can split by 'ID' with group_split into a list of data.frames, then loop through the list with map, add a row as the last row (add_row - by default adds row to the end, but we can control it with .before and .after), then slice out the last row as the last 'ID' didn't need the 'F' row
library(tidyverse)
df1 %>%
group_split(ID) %>%
map_dfr(~ .x %>%
add_row(ID = first(.$ID), type = 'F', `other-col` = 'F')) %>%
slice(-n())
Here is another approach with a similar idea as #akrun's answer.
library(tidyverse)
dat2 <- dat %>%
split(f = .$ID) %>%
map_if(.p = function(x) unique(x$ID) < max(dat$ID),
~bind_rows(.x, tibble(ID = unique(.x$ID), type = "F", `other.col` = "F"))) %>%
bind_rows()
dat2
# ID type other.col
# 1 1 A1 cc
# 2 1 A2 dd
# 3 1 A3 cc
# 4 1 F F
# 5 2 A1 cc
# 6 2 B1 aa
# 7 2 F F
# 8 3 A2 aa
Data
dat <- read.table(text = "ID type other-col
1 A1 cc
1 A2 dd
1 A3 cc
2 A1 cc
2 B1 aa
3 A2 aa",
header = TRUE, stringsAsFactors = FALSE)
Update
I provided an updated answer to show that if ID column is not integer but character, we can create a new column (ID2 in this case) that is converted to be factor based on ID, and then convert it to integer. The rest of the operation would be similar to the original answer but based on ID2.
library(tidyverse)
dat2 <- dat %>%
mutate(ID2 = as.integer(factor(ID, levels = unique(.$ID)))) %>%
split(f = .$ID2) %>%
map_if(.p = function(x) unique(x$ID2) != unique(last(.)$ID2),
~bind_rows(.x, tibble(ID = unique(.x$ID), type = "F", `other.col` = "F",
ID2 = unique(.x$ID2)))) %>%
bind_rows() %>%
select(-ID2)
dat2
# ID type other.col
# 1 C A1 cc
# 2 C A2 dd
# 3 C A3 cc
# 4 C F F
# 5 A A1 cc
# 6 A B1 aa
# 7 A F F
# 8 B A2 aa
DATA
dat <- read.table(text = "ID type other-col
C A1 cc
C A2 dd
C A3 cc
A A1 cc
A B1 aa
B A2 aa",
header = TRUE, stringsAsFactors = FALSE)
Similar to akrun's answer but in base R. Basically, split dataframe by ID then rbind extra row to each split, then recombine dataframe and remove unrequired last row using head(..., -1) -
head(n = -1,
do.call(rbind,
lapply(split(dat, dat$ID), function(x) {
rbind(x, c(x$ID[1], "F", "F"))
})
)
)
ID type other.col
1.1 1 A1 cc
1.2 1 A2 dd
1.3 1 A3 cc
1.4 1 F F
2.4 2 A1 cc
2.5 2 B1 aa
2.3 2 F F
3.6 3 A2 aa
Using base R you could do:
cbind(ID=sort(c(dat$ID,unique(dat$ID))),do.call(rbind,by(dat[-1],dat[1],rbind,'F')))
ID type other.col
1.1 1 A1 cc
1.2 1 A2 dd
1.3 1 A3 cc
1.4 1 F F
2.4 2 A1 cc
2.5 2 B1 aa
2.3 2 F F
3.6 3 A2 aa
3.2 3 F F
Or you could do:
do.call(rbind,by(dat,dat$ID,function(x)cbind(ID = unique(x[,1]),rbind(x[-1],"F"))))
inds = head(cumsum(with(rle(df$ID), unlist(lapply(lengths, function(i) c((rep(1, i)), F = 0))))), -1)
df1 = df[inds,]
df1[which(names(inds) == "F"), c("type", "other_col")] = "F"
df1
# ID type other_col
#1 1 A1 cc
#2 1 A2 dd
#3 1 A3 cc
#3.1 1 F F
#4 2 A1 cc
#5 2 B1 aa
#5.1 2 F F
#6 3 A2 aa
A possible approach using data.table:
library(data.table)
m <- setDT(df)[, max(ID)]
df[, if (.BY$ID < m) rbind(.SD, as.list(rep("F", ncol(.SD)))) else .SD, ID]
output:
ID type other-col
1: 1 A1 cc
2: 1 A2 dd
3: 1 A3 cc
4: 1 F F
5: 2 A1 cc
6: 2 B1 aa
7: 2 F F
8: 3 A2 aa
or if you dont mind adding another row at the bottom, code will be shorter: setDT(df)[, rbind(.SD, as.list(rep("F", ncol(.SD)))), ID]
Related
I have example data as follows:
library(data.table)
dat <- fread("Survey Variable_codes_2022
D D1
A A1
B B1
B B3
B B2
E E1
B NA
E NA")
For the two rows that have Variable_codes_2022==NA, I would like to increment the variable code so that it becomes:
dat <- fread("Survey Variable_codes_2022
D D1
A A1
B B1
B B3
B B2
E E1
B B4
E E2"
Because the column Variable_codes_2022 is a string variable, the numbers are not in numerical order.
I have no idea where to start and I was wondering if someone could help me on the right track.
We could do it this way:
grouping
arranging and
mutate.
To keep the original order we could first create and id and then rearrange:
library(dplyr)
dat %>%
group_by(Survey) %>%
arrange(.by_group = TRUE) %>%
mutate(Variable_codes_2022 = paste0(Survey, row_number()))
Survey Variable_codes_2022
<chr> <chr>
1 A A1
2 B B1
3 B B2
4 B B3
5 B B4
6 D D1
7 E E1
8 E E2
data.table option using rleid like this:
library(data.table)
dat[, Variable_codes_2022 := paste0(Survey, rleid(Variable_codes_2022)), by = Survey]
dat
#> Survey Variable_codes_2022
#> 1: D D1
#> 2: A A1
#> 3: B B1
#> 4: B B2
#> 5: B B3
#> 6: E E1
#> 7: B B4
#> 8: E E2
Created on 2022-12-01 with reprex v2.0.2
dat <-
structure(list(survey = c("D", "A", "B", "B", "B", "E", "B",
"E", "B"), var_code = c("D1", "A1", "B1", "B3", "B2", "E1", NA,
NA, NA)), row.names = c(NA, -9L), class = c("data.table", "data.frame"
), .internal.selfref = <pointer: 0x0000026db10f1ef0>)
library(dplyr)
library(stringr)
dat %>%
group_by(survey) %>%
mutate(
aux1 = as.numeric(stringr::str_remove(var_code,survey)),
aux2 = cumsum(is.na(var_code)),
var_code = paste0(survey,max(aux1,na.rm = TRUE)+aux2)
) %>%
ungroup() %>%
select(-aux1,-aux2)
# A tibble: 9 x 2
survey var_code
<chr> <chr>
1 D D1
2 A A1
3 B B3
4 B B3
5 B B3
6 E E1
7 B B4
8 E E2
9 B B5
This solution with rowid.
Added an extra element to the sample so it can be tested against multiple missings
library(data.table)
#> Warning: package 'data.table' was built under R version 4.2.2
dat <- fread("Survey Variable_codes_2022
D D1
A A1
B B1
B B3
B B2
E E1
B NA
E NA
E NA")
dat[, n := as.numeric(substr(
Variable_codes_2022, nchar(Survey)+1, nchar(Variable_codes_2022)))]
dat[is.na(n),
Variable_codes_2022 := paste0(Survey, rowid(Survey) +
dat[.SD[,.(Survey)], .(m=max(n, na.rm=T)), on = "Survey", by=.EACHI ][,m])]
dat
#> Survey Variable_codes_2022 n
#> 1: D D1 1
#> 2: A A1 1
#> 3: B B1 1
#> 4: B B3 3
#> 5: B B2 2
#> 6: E E1 1
#> 7: B B4 NA
#> 8: E E2 NA
#> 9: E E3 NA
This question already has answers here:
How to join (merge) data frames (inner, outer, left, right)
(13 answers)
Closed 2 years ago.
I want to add extra columns depending on values of code which are defined in VAR
DF <- data.frame(id = c(1:5), code = c("A","B","C","D","E"), sub = c("A1","B1","C1","D1","E1"))
id code sub
1 1 A A1
2 2 B B1
3 3 C C1
4 4 D D1
5 5 E E1
VAR <- c("A","B")
How result should be:
id code sub AB ABsub
1 1 A A1 A A1
2 2 B B1 B B1
3 3 C C1 <NA> <NA>
4 4 D D1 <NA> <NA>
5 5 E E1 <NA> <NA>
Or using dplyr:
library(dplyr)
DF<-data.frame(id=c(1:5),code=c("A","B","C","D","E"),sub=c("A1","B1","C1","D1","E1"), stringsAsFactors = FALSE)
VAR<-c("A","B")
DF <- DF %>%
mutate(AB = ifelse(code %in% {{VAR}}, code, NA_character_)) %>%
mutate(ABsub = ifelse(code == AB, sub, NA_character_))
with:
> DF
id code sub AB ABsub
1 1 A A1 A A1
2 2 B B1 B B1
3 3 C C1 <NA> <NA>
4 4 D D1 <NA> <NA>
5 5 E E1 <NA> <NA>
Also works if VAR would equal c("A", "B", "C") but we do not know if that is what you are after.
A simple base R option using merge + subset
merge(DF,subset(DF,code %in% VAR),by = "id",all = TRUE)
such that
> merge(DF,subset(DF,code %in% VAR),by = "id",all = TRUE)
id code.x sub.x code.y sub.y
1 1 A A1 A A1
2 2 B B1 B B1
3 3 C C1 <NA> <NA>
4 4 D D1 <NA> <NA>
5 5 E E1 <NA> <NA>
A dplyr solution with across():
library(dplyr)
DF %>%
mutate(across(-id, ~ replace(.x, !(code %in% VAR), NA), .names = "AB{col}"))
# id code sub ABcode ABsub
# 1 1 A A1 A A1
# 2 2 B B1 B B1
# 3 3 C C1 <NA> <NA>
# 4 4 D D1 <NA> <NA>
# 5 5 E E1 <NA> <NA>
or with left_join():
DF %>%
filter(code %in% VAR) %>%
left_join(DF, ., by = "id", suffix = c("", "AB"))
# id code sub codeAB subAB
# 1 1 A A1 A A1
# 2 2 B B1 B B1
# 3 3 C C1 <NA> <NA>
# 4 4 D D1 <NA> <NA>
# 5 5 E E1 <NA> <NA>
Note: If you have multiple columns in your real data, you don't need to type
mutate(Col1 = ifelse(...), Col2 = ifelse(...), etc.)
one by one.
Here's a solution
ABsub <- ifelse(DF$code %in% VAR, DF$code, NA)
cbind(DF, ABsub)
I got a nested list l with each item each self is a 2 level list. For example:
l1 = list("a", list("a1"= "a1v"))
l2 = list("b", list("b1" = "b1v", b2 = "b2v"))
l3 = list("c", list("c1" = c("c1v1", "c1v2", "c1v3")))
l = list(l1, l2, l3)
How do I tranform it to a data.frame like this:
df = data.frame(A = c("a", "b", "b", "c", "c", "c"), B= c("a1", "b1", "b2", "c1", "c1", "c1"), C=c("a1v", "b1v", "b2v", "c1v1", "c1v2", "c1v3"))
> df
A B C
1 a a1 a1v
2 b b1 b1v
3 b b2 b2v
4 c c1 c1v1
5 c c1 c1v2
6 c c1 c1v3
Tried with seperate_rows and map_df but both failed to deal with inconsistent dimension of .x[[2]] items.
Update 1:
#akrun's solution is not running for me:
We could use bind_rows with map
library(purrr)
library(dplyr)
library(tidyr)
map_dfr(l, ~bind_cols(.x) %>%
pivot_longer(cols = -1, names_to = 'B', values_to = 'C') %>%
rename_at(1, ~'A'))
# A tibble: 6 x 3
# A B C
#* <chr> <chr> <chr>
#1 a a1 a1v
#2 b b1 b1v
#3 b b2 b2v
#4 c c1 c1v1
#5 c c1 c1v2
#6 c c1 c1v3
If the sample data in your question accurately reflects your actual data, you can try one of the following:
library(data.table)
data.table(l)[, list(names(unlist(l)),
unlist(l, use.names = FALSE))][
, V3 := V2[1], cumsum(V1 == "")][V1 != ""]
## V1 V2 V3
## 1: a1 a1v a
## 2: b1 b1v b
## 3: b2 b2v b
## 4: c11 c1v1 c
## 5: c12 c1v2 c
## 6: c13 c1v3 c
reshape2::melt(setNames(lapply(l, "[[", -1), lapply(l, "[[", 1)))
## value L2 L1
## 1 a1v a1 a
## 2 b1v b1 b
## 3 b2v b2 b
## 4 c1v1 c1 c
## 5 c1v2 c1 c
## 6 c1v3 c1 c
Base R option :
do.call(rbind, lapply(l, function(x) {
data.frame(A = x[[1]], B = unlist(x[[2]]), C = names(x[[2]]))
}))
# A B C
#a1 a a1v a1
#b1 b b1v b1
#b2 b b2v b2
#c11 c c1v1 c1
#c12 c c1v2 c1
#c13 c c1v3 c1
Since this is also one of the solution, I will post it here as well. This one is the one I can relate to.
map_df(l, ~ tibble(A=.x[[1]], B=names(.x[[2]]), C= unlist(.x[[2]])))
Read:
Run through all elements of l and make a data.frame (map_df and ~ inside) from a sub-data.frame created by tibble where column A = ..., B = ..`, ...
Thanks go to:
#akrun for prompt answer, I could have used the solution, but was
too busy to figure out.
#A5C1D2H2I1M1N2O1R2T1 also provided a
performant answer.
#Ronak Shah provided a plain R base
solution that I can translate to this.
I am still relatively new to working in R and I am not sure how to approach this problem. Any help or advice is greatly appreciated!!!
The problem I have is that I am working with two data frames and I need to recode the first data frame with values from the second. The first data frame (df1) contains the data from the respondents to a survey and the other data frame(df2) is the data dictionary for df1.
The data looks like this:
df1 <- data.frame(a = c(1,2,3),
b = c(4,5,6),
c = c(7,8,9))
df2 <- data.frame(columnIndicator = c("a","a","a","b","b","b","c","c","c" ),
df1_value = c(1,2,3,4,5,6,7,8,9),
new_value = c("a1","a2","a3","b1","b2","b3","c1","c2","c3"))
So far I can manually recode df1 to get the expected output by doing this:
df1 <- within(df1,{
a[a==1] <- "a1"
a[a==2] <- "a2"
a[a==3] <- "a3"
b[b==4] <- "b4"
b[b==5] <- "b5"
b[b==6] <- "b6"
c[c==7] <- "c7"
c[c==8] <- "c8"
c[c==9] <- "c9"
})
However my real dataset has about 42 columns that need to be recoded and that method is a little time intensive. Is there another way in R for me to recode the values in df1 with the values in df2?
Thanks!
Just need to transform the shape a bit.
library(data.table)
df1 <- data.frame(a = c(1,2,3),
b = c(4,5,6),
c = c(7,8,9))
df2 <- data.frame(columnIndicator = c("a","a","a","b","b","b","c","c","c" ),
df1_value = c(1,2,3,4,5,6,7,8,9),
new_value = c("a1","a2","a3","b4","b5","b6","c7","c8","c9"),stringsAsFactors = FALSE)
setDT(df1)
setDT(df2)
df1[,ID:=.I]
ldf1 <- melt(df1,measure.vars = c("a","b","c"),variable.name = "columnIndicator",value.name = "df1_value")
ldf1[df2,"new_value":=i.new_value,on=.(columnIndicator,df1_value)]
ldf1
#> ID columnIndicator df1_value new_value
#> 1: 1 a 1 a1
#> 2: 2 a 2 a2
#> 3: 3 a 3 a3
#> 4: 1 b 4 b4
#> 5: 2 b 5 b5
#> 6: 3 b 6 b6
#> 7: 1 c 7 c7
#> 8: 2 c 8 c8
#> 9: 3 c 9 c9
dcast(ldf1,ID~columnIndicator,value.var = "new_value")
#> ID a b c
#> 1: 1 a1 b4 c7
#> 2: 2 a2 b5 c8
#> 3: 3 a3 b6 c9
Created on 2020-04-18 by the reprex package (v0.3.0)
In base R, we can unlist df1 match it with df1_value and get corresponding new_value.
df1[] <- df2$new_value[match(unlist(df1), df2$df1_value)]
df1
# a b c
#1 a1 b1 c1
#2 a2 b2 c2
#3 a3 b3 c3
Is this what you are looking for???
library(dplyr)
df3 <- df1 %>% gather(key = "key", value = "value")
df3 %>% inner_join(df2, by = c("key" = "columnIndicator", "value" = "df1_value"))
Output
key value new_value
1 a 1 a1
2 a 2 a2
3 a 3 a3
4 b 4 b1
5 b 5 b2
6 b 6 b3
7 c 7 c1
8 c 8 c2
9 c 9 c3
I have a dataframe as follows and I would like to combine two columns, namely Var1 and Var2. I want the combined column (Var3) to contain no duplicates of <alpha><digit>. i.e. if Var1 == A1 and Var2 == A1, hence Var3 == A1 but not Var3 == A1-A1 or if Var1 == A4-E9 and Var2 == A4, hence Var3 == A4-E9 but not Var3 == A4-E9-A4
df <- read.table(header = TRUE, text =
"id Var1 Var2
A A1 A1
B F2 A2
C NA A3
D A4-E9 A4
E E5 A5
F NA NA
G B2-R4 A3-B2
H B3-B4 E1-G5", stringsAsFactors = FALSE)
The following is my code. I would like to improve on its readability as well as get rid of the NA that is present in row 3's entry for Var3, i.e A3-NA.
library(dplyr)
library(tidyr)
df %>%
mutate(Var3 = paste(Var1, Var2, sep = "-")) %>%
separate_rows(Var3, sep = "-") %>%
group_by(id, Var3) %>%
slice(1) %>%
group_by(id) %>%
mutate(Var3 = paste(unlist(Var3[!is.na(Var3)]), collapse = "-")) %>%
slice(1) %>%
ungroup
Here's my desired output:
# A tibble: 8 x 4
id Var1 Var2 Var3
<chr> <chr> <chr> <chr>
1 A A1 A1 A1
2 B F2 A2 A2-F2
3 C <NA> A3 A3
4 D A4-E9 A4 A4-E9
5 E E5 A5 A5-E5
6 F <NA> <NA> <NA>
7 G B2-R4 A3-B2 A3-B2-R4
8 H B3-B4 E1-G5 B3-B4-E1-G5
if 'df1' is the output, then we remove the 'NA' that follows a - with sub
df1 %>%
mutate(Var3 = sub("-NA", "", Var3))
# A tibble: 8 x 4
# id Var1 Var2 Var3
# <chr> <chr> <chr> <chr>
#1 A A1 A1 A1
#2 B F2 A2 A2-F2
#3 C <NA> A3 A3
#4 D A4-E9 A4 A4-E9
#5 E E5 A5 A5-E5
#6 F <NA> <NA> NA
#7 G B2-R4 A3-B2 A3-B2-R4
#8 H B3-B4 E1-G5 B3-B4-E1-G5
We can also do this slightly differently with tidyverse by gather into 'long' format, then split the 'value' column using separate_rows, grouped by 'id', summarise the 'Var3' column by pasteing the sorted unique elements of 'Var3' and left_join with the original dataset 'df'
library(tidyverse)
gather(df, key, value, -id) %>%
separate_rows(value) %>%
group_by(id) %>%
summarise(Var3 = paste(sort(unique(value)), collapse='-')) %>%
mutate(Var3 = replace(Var3, Var3=='', NA)) %>%
left_join(df, .)
# id Var1 Var2 Var3
#1 A A1 A1 A1
#2 B F2 A2 A2-F2
#3 C <NA> A3 A3
#4 D A4-E9 A4 A4-E9
#5 E E5 A5 A5-E5
#6 F <NA> <NA> <NA>
#7 G B2-R4 A3-B2 A3-B2-R4
#8 H B3-B4 E1-G5 B3-B4-E1-G5
NOTE: The %>% makes even a simple code to appear in multiple lines, but if required, we can put all those statements in a single line and term as one-liner
Here is a one-liner
library(data.table)
setDT(df)[, Var3 := paste(sort(unique(unlist(strsplit(unlist(.SD),"-")))), collapse="-"), id]
You could do it in one line
df$Var3 = lapply(strsplit(paste(df$Var1, df$Var2, sep = "-"),"-"),
function(x)paste(unique(x)[unique(x)!="NA"],collapse="-"))
Output:
id Var1 Var2 Var3
1 A A1 A1 A1
2 B F2 A2 F2-A2
3 C <NA> A3 A3
4 D A4-E9 A4 A4-E9
5 E E5 A5 E5-A5
6 F <NA> <NA>
7 G B2-R4 A3-B2 B2-R4-A3
8 H B3-B4 E1-G5 B3-B4-E1-G5
The first part in the lapply function is similar to your first call with dplyr. First the columns are concatenated, and then we split them again.
The function within lapply removes all NA's, and then collapses the string again.
Hope this helps!
EDIT: Speed comparison for fun!
262,144 rows
Average runtimes:
Florian: 3.97 seconds
Sotos: 2.46 seconds
Akrun: 1.34 seconds
Adamm: >120 seconds
df <- read.table(header = TRUE, text =
"id Var1 Var2
A A1 A1
B F2 A2
C NA A3
D A4-E9 A4
E E5 A5
F NA NA
G B2-R4 A3-B2
H B3-B4 E1-G5", stringsAsFactors = FALSE)
for(i in 1:15)
{
df = rbind(df,df)
}
library(microbenchmark)
# Florian's method
microbenchmark(
lapply(strsplit(paste(df$Var1, df$Var2, sep = "-"),"-"),
function(x)paste(unique(x)[unique(x)!="NA"],collapse="-")),times=5)
# Sotos'method
microbenchmark(
gsub('NA-|-NA', '', vapply(strsplit(do.call(paste, df[-1]), " |-"), function(i) paste(unique(i), collapse = "-"), character(1L))), times=5)
# akrun method
library(data.table)
microbenchmark(
setDT(df)[, Var3 := paste(sort(unique(unlist(strsplit(unlist(.SD),"-")))), collapse="-"), id], times=5)
# Adamm method
microbenchmark(
sapply(1:nrow(df), function(i) ifelse(df[i,2]!=df[i,3] & !is.na(df[i,2]) & !is.na(df[i,3]), paste(df[i,2], df[i,3], sep="-"), ifelse(!is.na(df[i,3]), df[i,3], df[i,2]))), times=5)
If you want complex solution; long one-liner, nested ifelse().
df$Var3 <- sapply(1:nrow(df), function(i) ifelse(df[i,2]!=df[i,3] & !is.na(df[i,2]) & !is.na(df[i,3]), paste(df[i,2], df[i,3], sep="-"), ifelse(!is.na(df[i,3]), df[i,3], df[i,2])))
> df
id Var1 Var2 Var3
1 A A1 A1 A1
2 B F2 A2 F2-A2
3 C <NA> A3 A3
4 D A4-E9 A4 A4-E9-A4
5 E E5 A5 E5-A5
6 F <NA> <NA> <NA>
7 G B2-R4 A3-B2 B2-R4-A3-B2
8 H B3-B4 E1-G5 B3-B4-E1-G5
In case of efficiency I made a small experiment and I measured time of each proposed solution, here are the results:
First of all I need more rows:
n <- 10000
df <- do.call("rbind", replicate(n, df, simplify = FALSE))
Akrun solution 1 with tidyverse
Time difference of 1.452809 secs
Akrun solution 2 with data.table
Time difference of 0.4530261 secs
Florian Maas solution with lapply
Time difference of 1.812106 secs
My solution with sapply
Time difference of 2.289345 mins
Sotos solution
Time difference of 1.515296 secs