I have a list formed by 12 elements, each being a data frame. Each df contain three columns, two common columns across all the elements and one different.
The two common columns are:
coche_OEM
dia_hora_OEM
The other column, which is different in every element, can be collapsed in an unique column when converting the list into a data frame. For instance, column U0073 in one of the elements containS one value with the same name, whereas column B1182 contains another element with the same name as the variable name.
The issue is that I would like to convert this list into a data frame with three columns (variables):
coche_OEM
dia_hora_OEM
DTC: this column with all the values present in each column with their codes.
The list is this one:
listdf <- list(structure(list(B1182 = structure(1L, .Label = c("B1182",
"NULL"), class = "factor"), coche_OEM = structure(3L, .Label = c("356232050832996",
"356232050836666", "356232050880755", "356232050882736", "356232050899078",
"356232050905933"), class = "factor"), dia_hora_OEM = structure(1577774413, class = c("POSIXct",
"POSIXt"), tzone = "UTC")), row.names = c(NA, -1L), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), groups = structure(list(B1182 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("B1182",
"NULL"), class = "factor"), coche_OEM = structure(c(1L, 2L, 3L,
4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L), .Label = c("356232050832996",
"356232050836666", "356232050880755", "356232050882736", "356232050899078",
"356232050905933"), class = "factor"), .rows = list(integer(0),
integer(0), 1L, integer(0), integer(0), integer(0), integer(0),
integer(0), integer(0), integer(0), integer(0), integer(0))), .Names = c("B1182",
"coche_OEM", ".rows"), row.names = c(NA, -12L), class = c("tbl_df",
"tbl", "data.frame"), .drop = FALSE), .Names = c("B1182", "coche_OEM",
"dia_hora_OEM")), structure(list(B124D = structure(1L, .Label = c("B124D",
"NULL"), class = "factor"), coche_OEM = structure(3L, .Label = c("356232050832996",
"356232050836666", "356232050880755", "356232050882736", "356232050899078",
"356232050905933"), class = "factor"), dia_hora_OEM = structure(1577774413, class = c("POSIXct",
"POSIXt"), tzone = "UTC")), row.names = c(NA, -1L), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), groups = structure(list(B124D = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("B124D",
"NULL"), class = "factor"), coche_OEM = structure(c(1L, 2L, 3L,
4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L), .Label = c("356232050832996",
"356232050836666", "356232050880755", "356232050882736", "356232050899078",
"356232050905933"), class = "factor"), .rows = list(integer(0),
integer(0), 1L, integer(0), integer(0), integer(0), integer(0),
integer(0), integer(0), integer(0), integer(0), integer(0))), .Names = c("B124D",
"coche_OEM", ".rows"), row.names = c(NA, -12L), class = c("tbl_df",
"tbl", "data.frame"), .drop = FALSE), .Names = c("B124D", "coche_OEM",
"dia_hora_OEM")), structure(list(P2000 = structure(1L, .Label = c("c(\"P2000\", \"P2000\", \"P2000\")",
"NULL"), class = "factor"), coche_OEM = structure(5L, .Label = c("356232050832996",
"356232050836666", "356232050880755", "356232050882736", "356232050899078",
"356232050905933"), class = "factor"), dia_hora_OEM = structure(1577793330, class = c("POSIXct",
"POSIXt"), tzone = "UTC")), row.names = c(NA, -1L), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), groups = structure(list(P2000 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("c(\"P2000\", \"P2000\", \"P2000\")",
"NULL"), class = "factor"), coche_OEM = structure(c(1L, 2L, 3L,
4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L), .Label = c("356232050832996",
"356232050836666", "356232050880755", "356232050882736", "356232050899078",
"356232050905933"), class = "factor"), .rows = list(integer(0),
integer(0), integer(0), integer(0), 1L, integer(0), integer(0),
integer(0), integer(0), integer(0), integer(0), integer(0))), .Names = c("P2000",
"coche_OEM", ".rows"), row.names = c(NA, -12L), class = c("tbl_df",
"tbl", "data.frame"), .drop = FALSE), .Names = c("P2000", "coche_OEM",
"dia_hora_OEM")), structure(list(U3003 = structure(c(2L, 2L), .Label = c("NULL",
"U3003"), class = "factor"), coche_OEM = structure(c(5L, 1L), .Label = c("356232050832996",
"356232050836666", "356232050880755", "356232050882736", "356232050899078",
"356232050905933"), class = "factor"), dia_hora_OEM = structure(c(1577793330,
1582648789), class = c("POSIXct", "POSIXt"), tzone = "UTC")), row.names = c(NA,
-2L), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), groups = structure(list(
U3003 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L), .Label = c("NULL", "U3003"), class = "factor"),
coche_OEM = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L,
4L, 5L, 6L), .Label = c("356232050832996", "356232050836666",
"356232050880755", "356232050882736", "356232050899078",
"356232050905933"), class = "factor"), .rows = list(integer(0),
integer(0), integer(0), integer(0), integer(0), integer(0),
2L, integer(0), integer(0), integer(0), 1L, integer(0))), .Names = c("U3003",
"coche_OEM", ".rows"), row.names = c(NA, -12L), class = c("tbl_df",
"tbl", "data.frame"), .drop = FALSE), .Names = c("U3003", "coche_OEM",
"dia_hora_OEM")), structure(list(B1D01 = structure(c(1L, 1L,
2L), .Label = c("B1D01", "c(\"B1D01\", \"B1D01\")", "NULL"), class = "factor"),
coche_OEM = structure(c(2L, 1L, 1L), .Label = c("356232050832996",
"356232050836666", "356232050880755", "356232050882736",
"356232050899078", "356232050905933"), class = "factor"),
dia_hora_OEM = structure(c(1581690876, 1582648789, 1582651926
), class = c("POSIXct", "POSIXt"), tzone = "UTC")), row.names = c(NA,
-3L), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), groups = structure(list(
B1D01 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("B1D01", "c(\"B1D01\", \"B1D01\")",
"NULL"), class = "factor"), coche_OEM = structure(c(1L, 2L,
3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L,
6L), .Label = c("356232050832996", "356232050836666", "356232050880755",
"356232050882736", "356232050899078", "356232050905933"), class = "factor"),
.rows = list(2L, 1L, integer(0), integer(0), integer(0),
integer(0), 3L, integer(0), integer(0), integer(0), integer(0),
integer(0), integer(0), integer(0), integer(0), integer(0),
integer(0), integer(0))), .Names = c("B1D01", "coche_OEM",
".rows"), row.names = c(NA, -18L), class = c("tbl_df", "tbl",
"data.frame"), .drop = FALSE), .Names = c("B1D01", "coche_OEM",
"dia_hora_OEM")), structure(list(U0155 = structure(2L, .Label = c("NULL",
"U0155"), class = "factor"), coche_OEM = structure(1L, .Label = c("356232050832996",
"356232050836666", "356232050880755", "356232050882736", "356232050899078",
"356232050905933"), class = "factor"), dia_hora_OEM = structure(1582648789, class = c("POSIXct",
"POSIXt"), tzone = "UTC")), row.names = c(NA, -1L), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), groups = structure(list(U0155 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("NULL",
"U0155"), class = "factor"), coche_OEM = structure(c(1L, 2L,
3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L), .Label = c("356232050832996",
"356232050836666", "356232050880755", "356232050882736", "356232050899078",
"356232050905933"), class = "factor"), .rows = list(integer(0),
integer(0), integer(0), integer(0), integer(0), integer(0),
1L, integer(0), integer(0), integer(0), integer(0), integer(0))), .Names = c("U0155",
"coche_OEM", ".rows"), row.names = c(NA, -12L), class = c("tbl_df",
"tbl", "data.frame"), .drop = FALSE), .Names = c("U0155", "coche_OEM",
"dia_hora_OEM")), structure(list(C1B00 = structure(1L, .Label = c("C1B00",
"NULL"), class = "factor"), coche_OEM = structure(1L, .Label = c("356232050832996",
"356232050836666", "356232050880755", "356232050882736", "356232050899078",
"356232050905933"), class = "factor"), dia_hora_OEM = structure(1582648789, class = c("POSIXct",
"POSIXt"), tzone = "UTC")), row.names = c(NA, -1L), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), groups = structure(list(C1B00 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("C1B00",
"NULL"), class = "factor"), coche_OEM = structure(c(1L, 2L, 3L,
4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L), .Label = c("356232050832996",
"356232050836666", "356232050880755", "356232050882736", "356232050899078",
"356232050905933"), class = "factor"), .rows = list(1L, integer(0),
integer(0), integer(0), integer(0), integer(0), integer(0),
integer(0), integer(0), integer(0), integer(0), integer(0))), .Names = c("C1B00",
"coche_OEM", ".rows"), row.names = c(NA, -12L), class = c("tbl_df",
"tbl", "data.frame"), .drop = FALSE), .Names = c("C1B00", "coche_OEM",
"dia_hora_OEM")), structure(list(P037D = structure(2L, .Label = c("NULL",
"P037D"), class = "factor"), coche_OEM = structure(1L, .Label = c("356232050832996",
"356232050836666", "356232050880755", "356232050882736", "356232050899078",
"356232050905933"), class = "factor"), dia_hora_OEM = structure(1582648789, class = c("POSIXct",
"POSIXt"), tzone = "UTC")), row.names = c(NA, -1L), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), groups = structure(list(P037D = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("NULL",
"P037D"), class = "factor"), coche_OEM = structure(c(1L, 2L,
3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L), .Label = c("356232050832996",
"356232050836666", "356232050880755", "356232050882736", "356232050899078",
"356232050905933"), class = "factor"), .rows = list(integer(0),
integer(0), integer(0), integer(0), integer(0), integer(0),
1L, integer(0), integer(0), integer(0), integer(0), integer(0))), .Names = c("P037D",
"coche_OEM", ".rows"), row.names = c(NA, -12L), class = c("tbl_df",
"tbl", "data.frame"), .drop = FALSE), .Names = c("P037D", "coche_OEM",
"dia_hora_OEM")), structure(list(P0616 = structure(2L, .Label = c("NULL",
"P0616"), class = "factor"), coche_OEM = structure(1L, .Label = c("356232050832996",
"356232050836666", "356232050880755", "356232050882736", "356232050899078",
"356232050905933"), class = "factor"), dia_hora_OEM = structure(1582648789, class = c("POSIXct",
"POSIXt"), tzone = "UTC")), row.names = c(NA, -1L), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), groups = structure(list(P0616 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("NULL",
"P0616"), class = "factor"), coche_OEM = structure(c(1L, 2L,
3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L), .Label = c("356232050832996",
"356232050836666", "356232050880755", "356232050882736", "356232050899078",
"356232050905933"), class = "factor"), .rows = list(integer(0),
integer(0), integer(0), integer(0), integer(0), integer(0),
1L, integer(0), integer(0), integer(0), integer(0), integer(0))), .Names = c("P0616",
"coche_OEM", ".rows"), row.names = c(NA, -12L), class = c("tbl_df",
"tbl", "data.frame"), .drop = FALSE), .Names = c("P0616", "coche_OEM",
"dia_hora_OEM")), structure(list(P0562 = structure(2L, .Label = c("NULL",
"P0562"), class = "factor"), coche_OEM = structure(1L, .Label = c("356232050832996",
"356232050836666", "356232050880755", "356232050882736", "356232050899078",
"356232050905933"), class = "factor"), dia_hora_OEM = structure(1582648789, class = c("POSIXct",
"POSIXt"), tzone = "UTC")), row.names = c(NA, -1L), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), groups = structure(list(P0562 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("NULL",
"P0562"), class = "factor"), coche_OEM = structure(c(1L, 2L,
3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L), .Label = c("356232050832996",
"356232050836666", "356232050880755", "356232050882736", "356232050899078",
"356232050905933"), class = "factor"), .rows = list(integer(0),
integer(0), integer(0), integer(0), integer(0), integer(0),
1L, integer(0), integer(0), integer(0), integer(0), integer(0))), .Names = c("P0562",
"coche_OEM", ".rows"), row.names = c(NA, -12L), class = c("tbl_df",
"tbl", "data.frame"), .drop = FALSE), .Names = c("P0562", "coche_OEM",
"dia_hora_OEM")), structure(list(U0073 = structure(2L, .Label = c("NULL",
"U0073"), class = "factor"), coche_OEM = structure(1L, .Label = c("356232050832996",
"356232050836666", "356232050880755", "356232050882736", "356232050899078",
"356232050905933"), class = "factor"), dia_hora_OEM = structure(1582648789, class = c("POSIXct",
"POSIXt"), tzone = "UTC")), row.names = c(NA, -1L), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), groups = structure(list(U0073 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("NULL",
"U0073"), class = "factor"), coche_OEM = structure(c(1L, 2L,
3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L), .Label = c("356232050832996",
"356232050836666", "356232050880755", "356232050882736", "356232050899078",
"356232050905933"), class = "factor"), .rows = list(integer(0),
integer(0), integer(0), integer(0), integer(0), integer(0),
1L, integer(0), integer(0), integer(0), integer(0), integer(0))), .Names = c("U0073",
"coche_OEM", ".rows"), row.names = c(NA, -12L), class = c("tbl_df",
"tbl", "data.frame"), .drop = FALSE), .Names = c("U0073", "coche_OEM",
"dia_hora_OEM")), structure(list(P0138 = structure(1L, .Label = c("c(\"P0138\", \"P0138\", \"P0138\")",
"NULL"), class = "factor"), coche_OEM = structure(5L, .Label = c("356232050832996",
"356232050836666", "356232050880755", "356232050882736", "356232050899078",
"356232050905933"), class = "factor"), dia_hora_OEM = structure(1583391111, class = c("POSIXct",
"POSIXt"), tzone = "UTC")), row.names = c(NA, -1L), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), groups = structure(list(P0138 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("c(\"P0138\", \"P0138\", \"P0138\")",
"NULL"), class = "factor"), coche_OEM = structure(c(1L, 2L, 3L,
4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L), .Label = c("356232050832996",
"356232050836666", "356232050880755", "356232050882736", "356232050899078",
"356232050905933"), class = "factor"), .rows = list(integer(0),
integer(0), integer(0), integer(0), 1L, integer(0), integer(0),
integer(0), integer(0), integer(0), integer(0), integer(0))), .Names = c("P0138",
"coche_OEM", ".rows"), row.names = c(NA, -12L), class = c("tbl_df",
"tbl", "data.frame"), .drop = FALSE), .Names = c("P0138", "coche_OEM",
"dia_hora_OEM")))
So, how could I convert this list into a data frame with my requirements?
We can rename all the columns that are not 'coche_OEM' or 'dia_hora_OEM' to a predefined string ('id' here):
map_df(listdf, ~rename_at(.x, vars(-c('coche_OEM', 'dia_hora_OEM')), ~'id'))
# A tibble: 15 x 3
# Groups: id, coche_OEM [78]
id coche_OEM dia_hora_OEM
<chr> <fct> <dttm>
1 "B1182" 356232050880755 2019-12-31 06:40:13
2 "B124D" 356232050880755 2019-12-31 06:40:13
3 "c(\"P2000\", \"P2000\", \"P2000\")" 356232050899078 2019-12-31 11:55:30
4 "U3003" 356232050899078 2019-12-31 11:55:30
5 "U3003" 356232050832996 2020-02-25 16:39:49
6 "B1D01" 356232050836666 2020-02-14 14:34:36
7 "B1D01" 356232050832996 2020-02-25 16:39:49
8 "c(\"B1D01\", \"B1D01\")" 356232050832996 2020-02-25 17:32:06
9 "U0155" 356232050832996 2020-02-25 16:39:49
10 "C1B00" 356232050832996 2020-02-25 16:39:49
11 "P037D" 356232050832996 2020-02-25 16:39:49
12 "P0616" 356232050832996 2020-02-25 16:39:49
13 "P0562" 356232050832996 2020-02-25 16:39:49
14 "U0073" 356232050832996 2020-02-25 16:39:49
15 "c(\"P0138\", \"P0138\", \"P0138\")" 356232050899078 2020-03-05 06:51:51
I have two tables:
dat: contains the data
dates: contains the table of dates
library(data.table)
dates = structure(list(date = structure(c(17562, 17590, 17621, 17651,
17682, 17712, 17743, 17774, 17804, 17835, 17865, 17896), class = "Date")),
row.names = c(NA, -12L), class = "data.frame")
dat = structure(list(date = structure(c(17546, 17743, 17778, 17901,
17536, 17806, 17901, 17981, 17532, 17722, 17969, 18234), class = "Date"),
country = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L), .Label = c("AAA", "BBB", "CCC"), class = "factor"),
state = structure(c(1L, 1L, 2L, 3L, 4L, 1L, 2L, 5L, 6L, 1L,
2L, 2L), .Label = c("S1", "S2", "S3", "S4", "S5", "S6"), class = "factor"),
item = structure(c(1L, 2L, 4L, 6L, 3L, 5L, 3L, 2L, 2L, 4L,
5L, 7L), .Label = c("M1", "M2", "M3", "M4", "M5", "M6", "M7"
), class = "factor"), value = c(67L, 10L, 50L, 52L, 93L,
50L, 62L, 46L, 6L, 30L, 30L, 14L)), row.names = c(NA, -12L
), class = "data.frame")
dates = data.table(dates)
dat = data.table(dat)
setkey(dates, date)
setkey(dat, date)
The result I'm after is below. I.e doing a rolling join with each individual row of dat and then combining the result.
rbind(
dat[1,][dates, roll = 90],
dat[2,][dates, roll = 90],
dat[3,][dates, roll = 90],
...
dat[12,][dates, roll = 90]
)
My actual dataset is much larger so it's no practical to list every row of dat. Is there a short hand way of doing the same thing without a loop?
If I understand your intent correctly, you want to rollover the records for 90 days.
I used a cross join and then used the rollover criteria to subset
Your original tables:
library(data.table)
dates = structure(list(date = structure(c(17562, 17590, 17621, 17651,
17682, 17712, 17743, 17774, 17804, 17835, 17865, 17896), class = "Date")),
row.names = c(NA, -12L), class = "data.frame")
dat = structure(list(date = structure(c(17546, 17743, 17778, 17901,
17536, 17806, 17901, 17981, 17532, 17722, 17969, 18234), class = "Date"),
country = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L), .Label = c("AAA", "BBB", "CCC"), class = "factor"),
state = structure(c(1L, 1L, 2L, 3L, 4L, 1L, 2L, 5L, 6L, 1L,
2L, 2L), .Label = c("S1", "S2", "S3", "S4", "S5", "S6"), class = "factor"),
item = structure(c(1L, 2L, 4L, 6L, 3L, 5L, 3L, 2L, 2L, 4L,
5L, 7L), .Label = c("M1", "M2", "M3", "M4", "M5", "M6", "M7"
), class = "factor"), value = c(67L, 10L, 50L, 52L, 93L,
50L, 62L, 46L, 6L, 30L, 30L, 14L)), row.names = c(NA, -12L
), class = "data.frame")
dates = data.table(dates)
dat = data.table(dat)
Note, I haven't setkey.
I am using a cross join function from the reference: How to do cross join in R?
CJ.table.1 <- function(X,Y)
setkey(X[,c(k=1,.SD)],k)[Y[,c(k=1,.SD)],allow.cartesian=TRUE][,k:=NULL]
Then I cross join, subset for the roll join, rename columns and sort
dsn1<-CJ.table.1(dat,dates)[i.date-date<=90 & i.date-date>=0][,.(date=i.date,country, state, item, value)][order(country, state, item, value,date),]
This is not necessarily the best way to do it, but you could simply write a loop here to iterate through your data:
df <- data.frame()
for (i in 1:nrow(dat)){
df <- rbind(df, dat[i,][dates, roll = 90])
}
head(df)
date country state item value
1: 2018-01-31 CCC S6 M2 6
2: 2018-02-28 CCC S6 M2 6
3: 2018-03-31 CCC S6 M2 6
4: 2018-04-30 <NA> <NA> <NA> NA
5: 2018-05-31 <NA> <NA> <NA> NA
Edit: just saw you said "without a loop", it's been a long day. This is one way to solve the problem though.