Related
This seems like a simple enough thing but I can't figure it out nor find an answer online - apologies if it something obvious. I have two seperate dataframes containing the same patients with the same unique identifier. Both datasets have time varying variables - one continuous and one categorical and the time to each reading is different in the sets but have a common start point at time 1. I have tried to modify the tmerge function from survival package but without luck as I don't have a dichotomous outcome variable nor a single data set with one row per patient.
Reprex for creating the datasets below (df1 and df2) and an example of my desired combined output table for a single patient (ID 3), output gets very long if done for all 4 patients
Thanks for any possible help
df1 <- structure(list(tstart = c(1, 1, 1, 1426, 1, 560, 567), tstop = c(2049,
3426, 1426, 1707, 560, 567, 4207), category = structure(c(1L,
1L, 1L, 2L, 1L, 4L, 2L), .Label = c("none", "high", "low", "moderate"
), class = "factor"), id = c(1L, 2L, 3L, 3L, 4L, 4L, 4L)), row.names = c(NA,
-7L), class = c("tbl_df", "tbl", "data.frame"))
df2 <- structure(list(tstart = c(1, 365, 730, 1, 365, 730, 1096, 2557,
1, 365, 730, 1096, 1826, 2557, 3652, 1), tstop = c(365, 730,
1096, 365, 730, 1096, 2557, 2582, 365, 730, 1096, 1826, 2557,
3652, 4864, 365), egfr = c(66, 62, 58, 54, 50, 43, 49, 51, 106,
103, 80, 92, 97, 90, 81, 51), id = c(1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 4L)), row.names = c(NA, -16L), class = c("tbl_df",
"tbl", "data.frame"))
df_example_patient_3 <- structure(list(id = c(3L, 3L, 3L,
3L, 3L, 3L,3L, 3L, 3L), tstart = c(1, 365, 730, 1096, 1426, 1707, 1826, 2557, 3652), tstop = c(365, 730,
1096, 1426, 1707, 1826, 2557, 3652, 4864), egfr = c(106, 103, 80, 92, 92, 92, 97, 90, 81), category = c("none", "none", "none", "none", "high", "high", "high", "high", "high")), row.names = c(NA, -9L), class = c("tbl_df",
"tbl", "data.frame"))
# DF1
tstart tstop category id
<dbl> <dbl> <fct> <int>
1 1 2049 none 1
2 1 3426 none 2
3 1 1426 none 3
4 1426 1707 high 3
5 1 560 none 4
6 560 567 moderate 4
7 567 4207 high 4
# DF2
tstart tstop egfr id
<dbl> <dbl> <dbl> <int>
1 1 365 66 1
2 365 730 62 1
3 730 1096 58 1
4 1 365 54 2
5 365 730 50 2
6 730 1096 43 2
7 1096 2557 49 2
8 2557 2582 51 2
9 1 365 106 3
10 365 730 103 3
11 730 1096 80 3
12 1096 1826 92 3
13 1826 2557 97 3
14 2557 3652 90 3
15 3652 4864 81 3
16 1 365 51 4
# Combined set
id tstart tstop egfr category
<int> <dbl> <dbl> <dbl> <chr>
1 3 1 365 106 none
2 3 365 730 103 none
3 3 730 1096 80 none
4 3 1096 1426 92 none
5 3 1426 1707 92 high
6 3 1707 1826 92 high
7 3 1826 2557 97 high
8 3 2557 3652 90 high
9 3 3652 4864 81 high
I had to do it this way to really work out the details.
First, i construct a full df1 with all the timestamps, including those of df2.
then i proceed with multiple merges. This is not elegant, but it works:
library(data.table)
library(zoo)
# Proper data.tables
setDT(df1, key = c("id", "tstart"))
setDT(df2, key = c("id", "tstart"))
timestamps_by_id <- unique(rbind(
df1[, .(id, tstart)],
df1[, .(id, tstop)],
df2[, .(id, tstart)],
df2[, .(id, tstop)],
use.names = F
))
setorder(timestamps_by_id, id, tstart)
# Merge to construct full df1
df1_full <- df1[timestamps_by_id]
df1_full[, category := na.locf(category), by = id]
df1_full[, tstop := shift(tstart, -1), by = id]
setkey(df1_full, id, tstart)
# Merge with df2
result <- na.omit(df2[df1_full, roll = T])
result[, tstop := i.tstop]
print(result[id == 3, .(id, tstart, tstop, egfr, category)])
Or a more data.tabley solution using the more arcane foverlaps:
library(data.table)
# Proper data.tables
setDT(df1, key = c("id", "tstart", "tstop"))
setDT(df2, key = c("id", "tstart", "tstop"))
# We add an infinite upper range
proper_df1 <- rbind(
df1,
df1[, .SD[which.max(tstop)], by = .(id)][, .(id, tstart = tstop, tstop = Inf, category), ]
)
setkey(proper_df1, id, tstart, tstop)
overlaps <- foverlaps(df2, proper_df1, type = "any") # Overlap join
overlaps[
tstart %between% .(i.tstart, i.tstop) & tstart != 1,
i.tstart := tstart
]
overlaps[tstop %between% .(i.tstart, i.tstop), i.tstop := tstop]
print(overlaps[
id == 3,
.(id, "tstart" = i.tstart, "tstop" = i.tstop, category, egfr)
])
This messy dplyr solution seems to work for this particular dataset but don't know would it work for all datasets, the direction of the fill may need to be altered depending on particular dataset
library(tidyverse)
library(magrittr)
df1 %>%
bind_rows(df2) %>%
group_by(id) %>%
arrange(id, tstop) %>%
mutate(
tstart = case_when(
tstart < lag(tstop) ~ lag(tstop), TRUE ~ tstart)) %>%
fill(egfr, category, .direction = "updown") %>%
ungroup() %>%
filter(id == 3)
tstart tstop category id egfr
<dbl> <dbl> <fct> <int> <dbl>
1 1 365 none 3 106
2 365 730 none 3 103
3 730 1096 none 3 80
4 1096 1426 none 3 92
5 1426 1707 high 3 92
6 1707 1826 high 3 92
7 1826 2557 high 3 97
8 2557 3652 high 3 90
9 3652 4864 high 3 81
When I run just this line of the code, the results are as expected. When I run the chunk, the mutations stop on the third line. How can I fix this, I feel like this is something new that I did not face before with the same code.
Sample data:
> dput(head(out))
structure(list(SectionCut = c("S-1", "S-1", "S-1", "S-1", "S-2",
"S-2"), OutputCase = c("LL-1", "LL-2", "LL-3", "LL-4", "LL-1",
"LL-2"), V2 = c(81.782, 119.251, 119.924, 96.282, 72.503, 109.595
), M3 = c("-29.292000000000002", "-32.661999999999999", "-30.904",
"-23.632999999999999", "29.619", "32.994"), id = c("./100-12-S01.xlsx",
"./100-12-S01.xlsx", "./100-12-S01.xlsx", "./100-12-S01.xlsx",
"./100-12-S01.xlsx", "./100-12-S01.xlsx")), row.names = c(NA,
-6L), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), groups = structure(list(
SectionCut = c("S-1", "S-1", "S-1", "S-1", "S-2", "S-2"),
OutputCase = c("LL-1", "LL-2", "LL-3", "LL-4", "LL-1", "LL-2"
), id = c("./100-12-S01.xlsx", "./100-12-S01.xlsx", "./100-12-S01.xlsx",
"./100-12-S01.xlsx", "./100-12-S01.xlsx", "./100-12-S01.xlsx"
), .rows = list(1L, 2L, 3L, 4L, 5L, 6L)), row.names = c(NA,
-6L), class = c("tbl_df", "tbl", "data.frame"), .drop = TRUE))
> dput(head(Beamline_Shear))
structure(list(VLL = c(159.512186, 154.3336, 149.4451613, 167.0207595,
161.2269091, 156.4116505)), row.names = c("84-9", "84-12", "84-15",
"92-9", "92-12", "92-15"), class = "data.frame")
Code that I am trying to run:
Shear <- out[,-4] %>% mutate(N_l = str_extract(OutputCase,"\\d+"),
UG = str_extract(id,"\\d+"), a = str_extract(id,"-\\d+"),
S = str_extract(a,"\\d+"), Sections = paste0(UG,"-",S),
Sample = str_remove_all(id, "./\\d+-\\d+-|.xlsx")) %>%
left_join(Beamline_Shear %>% rownames_to_column("Sections"), by = "Sections") %>%
select(-OutputCase,-id,-Sections,-a)
There are some group attributes in the data, which should work normally, but can be an issue if we are running in a different env. Also, the mutate step and the join step doesn't really need any grouping attributes as they are fairly very straightforward rowwise operations that are vectorized.
library(dplyr)
out %>%
select(-4) %>%
ungroup %>% # // removes group attributes
mutate(N_l = str_extract(OutputCase,"\\d+"),
UG = str_extract(id,"\\d+"), a = str_extract(id,"-\\d+"),
S = str_extract(a,"\\d+"), Sections = paste0(UG,"-",S),
Sample = str_remove_all(id, "./\\d+-\\d+-|.xlsx")) %>% left_join(Beamline_Shear %>% rownames_to_column("Sections"), by = "Sections")
# A tibble: 6 x 11
# SectionCut OutputCase V2 id N_l UG a S Sections Sample VLL
# <chr> <chr> <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
#1 S-1 LL-1 81.8 ./100-12-S01.xlsx 1 100 -12 12 100-12 S01 NA
#2 S-1 LL-2 119. ./100-12-S01.xlsx 2 100 -12 12 100-12 S01 NA
#3 S-1 LL-3 120. ./100-12-S01.xlsx 3 100 -12 12 100-12 S01 NA
#4 S-1 LL-4 96.3 ./100-12-S01.xlsx 4 100 -12 12 100-12 S01 NA
#5 S-2 LL-1 72.5 ./100-12-S01.xlsx 1 100 -12 12 100-12 S01 NA
#6 S-2 LL-2 110. ./100-12-S01.xlsx 2 100 -12 12 100-12 S01 NA
I have a dataframe "data" with the following structure:
structure(list(age = c(45, 4, 32, 45), sex = c(1, 0, 1, 0), height = c(165,
178, 145, 132), weight = c(65, 73, 60, 45)), row.names = c(NA,
-4L), class = c("tbl_df", "tbl", "data.frame"))
And I would like to add to this data.frame two new variables (var1, var2), which should be calculated with the two following formulas:
var1 = age*height + (4 if sex==1 OR 2 if sex==0)
var2 = height*weight + (1 if age>40 or 2 if age=<40)
I have a problem both in adding the two variables to the data frame, both in applying a function (I tried to build a function, but seems that can be applied only to a single value and not to all values from all rows).
Can anyone help me, please?
akrun's suggestion of using Boolean arithmetic is a good one but you could also do simply a Boolean version of your own expression substituting multiplication for the if statements.s (whit mild editing of the "=<" to "<=")
data <- structure(list(age = c(45, 4, 32, 45), sex = c(1, 0, 1, 0), height = c(165, 178, 145, 132), weight = c(65, 73, 60, 45)), row.names = c(NA, -4L), class = c("tbl_df", "tbl", "data.frame"))
data <- within(data, {var1 = age*height + 4*(sex==1) + 2 *(sex==0);
var2 = height*weight + (age>40) + 2 *(age <= 40)})
#----
> data
age sex height weight var2 var1
1 45 1 165 65 10726 7429
2 4 0 178 73 12996 714
3 32 1 145 60 8702 4644
4 45 0 132 45 5941 5942
Since the two sets of conditions are each disjoint, the "non-qualifying" choice terms will each be 0.
the function ifelse() is vector based, so it will apply the conditions to each element in the vector.
df <- structure(list(age = c(45, 4, 32, 45), sex = c(1, 0, 1, 0), height = c(165,
178, 145, 132), weight = c(65, 73, 60, 45)), row.names = c(NA,
-4L), class = c("tbl_df", "tbl", "data.frame"))
df$var1 <- ifelse(df$sex == 1,(df$age * df$height) + 4,(df$age * df$height) + 2)
df$var2 <- ifelse(df$age > 40,(df$weight * df$height) + 1,(df$age * df$height) + 2)
final output
> df
# A tibble: 4 x 6
age sex height weight var1 var2
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 45 1 165 65 7429 10726
2 4 0 178 73 714 714
3 32 1 145 60 4644 4642
4 45 0 132 45 5942 5941
I rather the tool case_when() from dplyr package.
Your original data is:
data <-
structure(
list(age = c(45, 4, 32, 45),
sex = c(1, 0, 1, 0),
height = c(165, 178, 145, 132),
weight = c(65, 73, 60, 45)),
row.names = c(NA, -4L),
class = c("tbl_df", "tbl", "data.frame"))
The new variables are created by:
library(dplyr)
data ->
data %>% mutate(var1 = case_when(sex==1 ~ age*height + 4,
sex==0 ~ age*height + 2),
var2 = case_when(age>40 ~ height*weight + 1,
age<=40 ~ height*weight + 2)
)
The outcome is:
# A tibble: 4 x 6
age sex height weight var1 var2
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 45 1 165 65 7429 10726
2 4 0 178 73 714 12996
3 32 1 145 60 4644 8702
4 45 0 132 45 5942 5941
We convert the logical/binary to numeric index by adding 1 to it and use that to change the values to 2, 4, or just 1, 2 and use that in the calculation
library(dplyr)
data %>%
mutate(var1 = (age * height) + c(2, 4)[sex + 1],
var2 = (height * weight) + (age <= 40)+1)
# A tibble: 4 x 6
# age sex height weight var1 var2
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 45 1 165 65 7429 10726
#2 4 0 178 73 714 12996
#3 32 1 145 60 4644 8702
#4 45 0 132 45 5942 5941
I've got 2 data frames that I'm trying to divide by each other but it's not working for me. Both dataframes are 8 x 3 with column one the same for both, column names are also the same for both data frames
bal_tier[,c(1, 3:4)]
# A tibble: 8 x 3
# Groups: hierachy_level2 [8]
hierachy_level2 `201804` `201904`
<chr> <dbl> <dbl>
1 CS 239 250
2 FNZ 87 97
3 OPS 1057 1136.
4 P&T 256 279
5 R&A 520 546
6 SPE 130 136.
7 SPP 67 66
8 TUR 46 69
dput(bal_tier[,c(1, 3:4)])
structure(list(hierachy_level2 = c("CS", "FNZ", "OPS", "P&T",
"R&A", "SPE", "SPP", "TUR"), `201804` = c(239, 87, 1057, 256,
520, 130, 67, 46), `201904` = c(250, 97, 1136.5, 279, 546, 136.5,
66, 69)), row.names = c(NA, -8L), groups = structure(list(hierachy_level2 = c("CS",
"FNZ", "OPS", "P&T", "R&A", "SPE", "SPP", "TUR"), .rows = list(
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L)), row.names = c(NA, -8L), class = c("tbl_df",
"tbl", "data.frame"), .drop = FALSE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
tier_leavers[,c(1, 3:4)]
# A tibble: 8 x 3
# Groups: hierachy_level2 [8]
hierachy_level2 `201804` `201904`
<chr> <dbl> <dbl>
1 CS 32 47
2 FNZ 1 11
3 OPS 73 76
4 P&T 48 33
5 R&A 41 33
6 SPE 28 30
7 SPP 10 12
8 TUR 2 3
dput(tier_leavers[,c(1, 3:4)])
structure(list(hierachy_level2 = c("CS", "FNZ", "OPS", "P&T",
"R&A", "SPE", "SPP", "TUR"), `201804` = c(32, 1, 73, 48, 41,
28, 10, 2), `201904` = c(47, 11, 76, 33, 33, 30, 12, 3)), row.names = c(NA,
-8L), groups = structure(list(hierachy_level2 = c("CS", "FNZ",
"OPS", "P&T", "R&A", "SPE", "SPP", "TUR"), .rows = list(1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L)), row.names = c(NA, -8L), class = c("tbl_df",
"tbl", "data.frame"), .drop = FALSE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
Doing this gives me what I want:
bal_tier[,1]
# A tibble: 8 x 1
# Groups: hierachy_level2 [8]
hierachy_level2
<chr>
1 CS
2 FNZ
3 OPS
4 P&T
5 R&A
6 SPE
7 SPP
8 TUR
(tier_leavers[,c(3:4)] / bal_tier[,c(3:4)])
201804 201904
1 0.13389121 0.18800000
2 0.01149425 0.11340206
3 0.06906339 0.06687198
4 0.18750000 0.11827957
5 0.07884615 0.06043956
6 0.21538462 0.21978022
7 0.14925373 0.18181818
8 0.04347826 0.04347826
but when I combine it in a cbind I end up with this:
cbind(bal_tier[,1], tier_leavers[,c(3:4)] / bal_tier[,c(3:4)])
[,1] [,2]
201804 Character,8 Numeric,8
201904 Character,8 Numeric,8
What am I understanding wrong here?
Here's a solution using tidyverse
nme <- c("A","B","C","D","E")
yr_1 <- round(10*runif(n=5,min=0,max=10),0)
yr_2 <- round(10*runif(n=5,min=0,max=10),0)
data_1 <- data.frame(nme,yr_1,yr_2)
yr_1 <- round(10*runif(n=5,min=0,max=10),0)
yr_2 <- round(10*runif(n=5,min=0,max=10),0)
data_2 <- data.frame(nme,yr_1,yr_2)
data_divide <- data_1 %>%
left_join(data_2,by="nme") %>%
mutate(
result_1=yr_1.x/yr_1.y,
result_2=yr_2.x/yr_2.y
)
What I ended up doing feels like cheating but I got a clue from Zeus's answer:
a <- bal_tier[, 1]
b <- tier_leavers[,c(3:4)] / bal_tier[,c(3:4)]
tier_to <- data.frame(a, b)
tier_to
> tier_to
hierachy_level2 X201804 X201904
1 CS 0.13389121 0.18800000
2 FNZ 0.01149425 0.11340206
3 OPS 0.06906339 0.06687198
4 P&T 0.18750000 0.11827957
5 R&A 0.07884615 0.06043956
6 SPE 0.21538462 0.21978022
7 SPP 0.14925373 0.18181818
8 TUR 0.04347826 0.04347826
I have tried related solutions but they do not work for my case. I have a dataframe that has a nested list in one column and i want to split this list and put it in columns.The list contains another list with the time stamp for each month(ts) and the consumption for each month(v). The dataframe is:
id monthly_consum
1 112 list1
2 34 list2
3 54 list3
where
list1<-list(list(ts = "2016-01-01T00:00:00+01:00", v = 466.6),list(ts = "2016-02-01T00:00:00+01:00", v = 565.6),
list(ts = "2016-03-01T00:00:00+01:00", v = 765.6),list(ts = "2016-04-01T00:00:00+01:00", v = 888.6),
list(ts = "2016-05-01T00:00:00+01:00", v = 465),list(ts = "2016-06-01T00:00:00+01:00", v = 465.6),
list(ts = "2016-07-01T00:00:00+01:00", v = 786),list(ts = "2016-08-01T00:00:00+01:00", v = 435),
list(ts = "2016-09-01T00:00:00+01:00", v = 568),list(ts = "2016-10-01T00:00:00+01:00", v = 678),
list(ts = "2016-11-01T00:00:00+01:00", v = 522),list(ts = "2016- 12-01T00:00:00+01:00", v = 555))
list2<-list(list(ts = "2016-01-01T00:00:00+01:00", v = 333.6),list(ts = "2016-02-01T00:00:00+01:00", v = 565.6),
list(ts = "2016-03-01T00:00:00+01:00", v = 765.6),list(ts = "2016-04-01T00:00:00+01:00", v = 333.6),
list(ts = "2016-05-01T00:00:00+01:00", v = 465),list(ts = "2016-06-01T00:00:00+01:00", v = 465.6),
list(ts = "2016-07-01T00:00:00+01:00", v = 786),list(ts = "2016-08-01T00:00:00+01:00", v = 435),
list(ts = "2016-09-01T00:00:00+01:00", v = 568),list(ts = "2016-10-01T00:00:00+01:00", v = 678),
list(ts = "2016-11-01T00:00:00+01:00", v = 522),list(ts = "2016-12-01T00:00:00+01:00", v = 555))
list3<-list(list(ts = "2016-01-01T00:00:00+01:00", v = 323.6),list(ts = "2016-02-01T00:00:00+01:00", v = 565.6),
list(ts = "2016-03-01T00:00:00+01:00", v = 333.6),list(ts = "2016-04-01T00:00:00+01:00", v = 888.6),
list(ts = "2016-05-01T00:00:00+01:00", v = 465),list(ts = "2016-06-01T00:00:00+01:00", v = 465.6),
list(ts = "2016-07-01T00:00:00+01:00", v = 786),list(ts = "2016-08-01T00:00:00+01:00", v = 435),
list(ts = "2016-09-01T00:00:00+01:00", v = 568),list(ts = "2016-10-01T00:00:00+01:00", v = 678),
list(ts = "2016-11-01T00:00:00+01:00", v = 522),list(ts = "2016-12-01T00:00:00+01:00", v = 555))
I would like to split the list and create a dataframe which will have one of the 2 following formats:
id ts.1 cons.1 ts.2 cons.2 ts.3 etc..
1 112 2016-01-01T00:00:00+01:00 466.6 2016-02.. ... ...
2 34 2016-01-01T00:00:00+01:00 333.6 2016-02.. ... ...
3 54 2016-01-01T00:00:00+01:00 323.6 2016-02.. ... ...
OR
id ts consumption
112 2016-01-01T00:00:00+01:00 466.6
112 2016-02-01T00:00:00+01:00 565.6
112 2016-03-01T00:00:00+01:00 765.6
112 2016-04-01T00:00:00+01:00 888.6
112 2016-05-01T00:00:00+01:00 465
112 2016-06-01T00:00:00+01:00 465.6
112 2016-07-01T00:00:00+01:00 786
112 2016-08-01T00:00:00+01:00 435
112 2016-09-01T00:00:00+01:00 568
112 2016-10-01T00:00:00+01:00 678
112 2016-11-01T00:00:00+01:00 522
112 2016-12-01T00:00:00+01:00 555
34 2016-01-01T00:00:00+01:00 466.6
34 2016-02-01T00:00:00+01:00 333.6
34 2016-03-01T00:00:00+01:00 323.6
etc............
could you help me? I am using data.frame(matrix(unlist..)) but it does not give the format that i want. When I use rbind list i get:
"Error in rbindlist(....) :
Item 1 of list input is not a data.frame, data.table or list"
Thank you in advance!
UPDATE
Using dput i would get (in the real problem):
>dput(locs_total[9:12,1:5])
structure(list(X.dep_id. = c("34", "34", "34", "34"), X.loc_id. = c("17761",
"17406", "23591", "27838"), X.surface. = c("200", "1250", "54",
"150"), X.sector. = c("HOUSING", "SMALL-STORE-FOOD", "LIBRARY",
"OFFICE-BUILDING"),
X.avg_cons_main. = list(list(structure(list(
ts = "2016-01-01T00:00:00+01:00", v = 466.65), .Names = c("ts",
"v")), structure(list(ts = "2016-02-01T00:00:00+01:00", v = 406.45),
.Names = c("ts",
"v")), structure(list(ts = "2016-03-01T00:00:00+01:00", v = 483.35),
.Names = c("ts",
"v")), structure(list(ts = "2016-04-01T00:00:00+02:00", v = 79.45), .
Names = c("ts",
"v"))), NULL, NULL, NULL)), .Names = c("X.dep_id.", "X.loc_id.",
"X.surface.", "X.sector.", "X.avg_cons_main."
), row.names = c("9", "10", "11", "12"), class = "data.frame")
If the ids are also in the lists, you can use dplyr::bind_rows
dplyr::bind_rows(list1, list2, list3)
# A tibble: 36 × 2
ts v
<chr> <dbl>
1 2016-01-01T00:00:00+01:00 466.6
2 2016-02-01T00:00:00+01:00 565.6
3 2016-03-01T00:00:00+01:00 765.6
4 2016-04-01T00:00:00+01:00 888.6
5 2016-05-01T00:00:00+01:00 465.0
6 2016-06-01T00:00:00+01:00 465.6
7 2016-07-01T00:00:00+01:00 786.0
8 2016-08-01T00:00:00+01:00 435.0
9 2016-09-01T00:00:00+01:00 568.0
10 2016-10-01T00:00:00+01:00 678.0
# ... with 26 more rows
To add IDs from another df
library(dplyr)
ids <- data_frame(list_id = c(112, 34, 54),
monthly_consum = c("list1", "list2", "list3"))
If we consider nested lists, you can use purrr:map as follows:
-combine the three lists in one list
k <- list(list1, list2, list3)
-use map to bind_rows in each column independently
k1 <- purrr:: map(k, bind_rows)
-use the ids as names for the lists
names(k1) <- ids$list_id
-bind_rows using .id
bind_rows(k1, .id = "id")
# A tibble: 36 × 3
id ts v
<chr> <chr> <dbl>
1 112 2016-01-01T00:00:00+01:00 466.6
2 112 2016-02-01T00:00:00+01:00 565.6
3 112 2016-03-01T00:00:00+01:00 765.6
4 112 2016-04-01T00:00:00+01:00 888.6
5 112 2016-05-01T00:00:00+01:00 465.0
6 112 2016-06-01T00:00:00+01:00 465.6
7 112 2016-07-01T00:00:00+01:00 786.0
8 112 2016-08-01T00:00:00+01:00 435.0
9 112 2016-09-01T00:00:00+01:00 568.0
10 112 2016-10-01T00:00:00+01:00 678.0
We can loop through the list
res <- do.call(rbind, Map(cbind, id = df1$id, lapply(mget(df1$monthly_consum),
function(x) do.call(rbind.data.frame, x))))
names(res)[3] <- "consumption"
row.names(res) <- NULL
head(res, 14)
# id ts consumption
#1 112 2016-01-01T00:00:00+01:00 466.6
#2 112 2016-02-01T00:00:00+01:00 565.6
#3 112 2016-03-01T00:00:00+01:00 765.6
#4 112 2016-04-01T00:00:00+01:00 888.6
#5 112 2016-05-01T00:00:00+01:00 465.0
#6 112 2016-06-01T00:00:00+01:00 465.6
#7 112 2016-07-01T00:00:00+01:00 786.0
#8 112 2016-08-01T00:00:00+01:00 435.0
#9 112 2016-09-01T00:00:00+01:00 568.0
#10 112 2016-10-01T00:00:00+01:00 678.0
#11 112 2016-11-01T00:00:00+01:00 522.0
#12 112 2016- 12-01T00:00:00+01:00 555.0
#13 34 2016-01-01T00:00:00+01:00 333.6
#14 34 2016-02-01T00:00:00+01:00 565.6
data
df1 <- structure(list(id = c(112L, 34L, 54L), monthly_consum = c("list1",
"list2", "list3")), .Names = c("id", "monthly_consum"),
class = "data.frame", row.names = c("1", "2", "3"))