I have a dataframe df1 like this :
timestamp
01-12-2015 00:04
01-12-2015 02:20
01-12-2015 02:43
01-12-2015 04:31
01-12-2015 08:51
01-12-2015 11:28
01-12-2015 20:53
01-12-2015 21:28
02-12-2015 00:30
02-12-2015 20:22
Which contains time stamps. I would want to get count by binning hours in 12 hours interval i.e(01-12-2015[0-9],01-12-2015[9-21], and so on.
output Sample:
DayOfMonth Group count
1 1 5
1 2 2
2 1 2
2 2 1
The day of month can be replaced by Serial Number also, starting with 1. Any help to solve this is highly appreciated.
A possible solution in base R:
# convert the 'timestamp' column to a datetime format
df1$timestamp <- as.POSIXct(strptime(df1$timestamp, format = '%d-%m-%Y %H:%M'))
# create day.of.month variable
df1$day.of.month <- format(df1$timestamp, '%d')
# extract the 12 hour interval as am/pm values
df1$group <- gsub('[0-9: ]+','\\1',format(df1$timestamp, '%r'))
# aggregate
aggregate(. ~ group + day.of.month, df1, length)
which gives:
group day.of.month timestamp
1 am 01 6
2 pm 01 2
3 am 02 1
4 pm 02 1
Another solution using data.table and and the pm function of lubridate:
library(lubridate)
library(data.table)
setDT(df1)[, timestamp := dmy_hm(timestamp)
][, group := pm(timestamp)+1
][, .N, .(day.of.month = day(timestamp),group)]
which gives:
day.of.month group N
1: 1 1 6
2: 1 2 2
3: 2 1 1
4: 2 2 1
Used data:
df1 <- structure(list(timestamp = c("01-12-2015 00:04", "01-12-2015 02:20", "01-12-2015 02:43", "01-12-2015 04:31", "01-12-2015 08:51",
"01-12-2015 11:28", "01-12-2015 20:53", "01-12-2015 21:28", "02-12-2015 00:30", "02-12-2015 20:22")),
.Names = "timestamp", class = "data.frame", row.names = c(NA,-10L))
We can use lubridate functions to convert to 'Datetime' class easily and with dplyr to get the output efficiently compared to base R methods.
library(lubridate)
library(dplyr)
df1 %>%
mutate(timestamp = dmy_hm(timestamp)) %>%
group_by(DayOfMonth = day(timestamp)) %>%
group_by(Group = as.numeric(cut(timestamp, breaks = "12 hour")),
add=TRUE) %>%
summarise(GroupCount = n())
# DayOfMonth Group GroupCount
# <int> <dbl> <int>
#1 1 1 6
#2 1 2 2
#3 2 1 1
#4 2 2 1
Or we can use a compact option with data.table
library(data.table)
setDT(df1)[, {t1 <- dmy_hm(timestamp); .(DayOfMonth = day(t1),
Group = (hour(t1)>12)+1L)}][, .(GroupCount = .N), .(DayOfMonth, Group)]
# DayOfMonth Group GroupCount
#1: 1 1 6
#2: 1 2 2
#3: 2 1 1
#4: 2 2 1
NOTE: The data.table solution is done with just two steps...
data
df1 <- structure(list(timestamp = c("01-12-2015 00:04", "01-12-2015 02:20",
"01-12-2015 02:43", "01-12-2015 04:31", "01-12-2015 08:51", "01-12-2015 11:28",
"01-12-2015 20:53", "01-12-2015 21:28", "02-12-2015 00:30", "02-12-2015 20:22"
)), .Names = "timestamp", class = "data.frame", row.names = c(NA,-10L))
Another possible solution in base R :
timeStamp <- c("01-12-2015 00:04","01-12-2015 02:20","01-12-2015 02:43","01-12-2015 04:31",
"01-12-2015 08:51","01-12-2015 11:28","01-12-2015 20:53","01-12-2015 21:28",
"02-12-2015 00:30","02-12-2015 20:22")
times <- as.POSIXlt(timeStamp,format="%d-%m-%Y %H:%M",tz='GMT')
DF <- data.frame(Times=times)
DF$Group <- as.logical(times$hour > 12) + 1
DF$DayOfMonth <- times$mday
res <- aggregate(Times ~ DayOfMonth + Group,data=DF, FUN = length)
# res :
# DayOfMonth Group Times
# 1 1 1 6
# 2 2 1 1
# 3 1 2 2
# 4 2 2 1
Or if you want to include dates in hours range: [21-0] of previous day in the next day :
timeStamp <- c("01-12-2015 00:04","01-12-2015 02:20","01-12-2015 02:43","01-12-2015 04:31",
"01-12-2015 08:51","01-12-2015 11:28","01-12-2015 20:53","01-12-2015 21:28",
"02-12-2015 00:30","02-12-2015 20:22")
times <- as.POSIXlt(timeStamp,format="%d-%m-%Y %H:%M",tz='GMT')
h <- times$hour + times$min*1/60 + times$sec*1/3600
# here we add 3 hours to the dates in hours range [21-0] in this way we
# push them to the next day
times[h >= 21] <- times[h >= 21] + 3*3600
DF <- data.frame(Times=times)
DF$Group <- ifelse(h < 9,1,ifelse(h <= 21,2,NA))
DF$DayOfMonth <- times$mday
res <- aggregate(Times ~ DayOfMonth + Group,data=na.omit(DF), FUN = length)
# res :
# DayOfMonth Group Times
# 1 1 1 5
# 2 2 1 2
# 3 1 2 2
# 4 2 2 1
Adding to the several already presented options, the stringi package has some date parsing functions as well:
library(stringi)
df1$timestamp <- stri_datetime_parse(df1$timestamp, format = 'dd-mm-yyyy HH:mm')
df1$DayOfMonth <- stri_datetime_format(df1$timestamp, format = 'd')
df1$Group <- stri_datetime_format(df1$timestamp, format = 'a')
After that you can get a count with for example the following two options:
# option 1:
aggregate(. ~ Group + DayOfMonth, df1, length) # copied from #ProcrastinatusMaximus
# option 2a:
library(dplyr)
df1 %>%
group_by(DayOfMonth, Group) %>%
tally()
# option 2b:
count(df1, DayOfMonth, Group)
The output of the latter:
DayOfMonth Group n
(chr) (chr) (int)
1 1 a.m. 6
2 1 p.m. 2
3 2 a.m. 1
4 2 p.m. 1
Related
This question already has an answer here:
How to use Pivot_longer to reshape from wide-type data to long-type data with multiple variables
(1 answer)
Closed 1 year ago.
I have a data frame which each row indicates a unique id.
ID <- 1:12
Date1 <- seq(as.Date("2000/1/1"), length.out = 12, by = "months")
Date2 <- seq(as.Date("2001/1/1"), length.out = 12, by = "months")
Date3 <- seq(as.Date("2002/1/1"), length.out = 12, by = "months")
Fcast1 <- rnorm(12)
Fcast2 <- rnorm(12)
Fcast3 <- rnorm(12)
df <- data.frame(ID, Date1, Fcast1, Date2, Fcast2, Date3, Fcast3)
I would like to gather Date1 to Date3 and Fcast1 to Fcast3 columns in two columns of Date and Fcast and repeat IDs 3 times. basically creating long-view of data or rbind-ing each pair of Date and Fcast.
Desired Output shape:
ID <- rep(ID, 3)
Date = c(Date1, Date2, Date3)
Fcast = c(Fcast1, Fcast2, Fcast3)
df <- data.frame(ID, Date, Fcast)
Using tidyr::pivot_longer -
tidyr::pivot_longer(df,
cols = -ID,
names_to = '.value',
names_pattern = '(.*)\\d+')
# A tibble: 36 x 3
# ID Date Fcast
# <int> <date> <dbl>
# 1 1 2000-01-01 0.452
# 2 1 2001-01-01 0.242
# 3 1 2002-01-01 -0.540
# 4 2 2000-02-01 1.54
# 5 2 2001-02-01 0.178
# 6 2 2002-02-01 0.883
# 7 3 2000-03-01 -0.987
# 8 3 2001-03-01 1.40
# 9 3 2002-03-01 0.675
#10 4 2000-04-01 -0.632
# … with 26 more rows
You can do something like:
library(data.table)
setDT(df)
melt(df, measure.vars=patterns("^Date", "^Fcast"), value.name=c("Date", "Fcast"))[,
variable := NULL][]
I would like to get the counts per hour for each type (version1 and version2).
Sample data:
type <- c('version1','version1','version1','version2','version2')
startdate <- as.POSIXct(c('2017-11-1 02:11:02.000','2018-3-25 02:13:02.000','2019-3-14 03:45:02.000',
'2017-3-14 02:55:02.000','2018-3-14 03:45:02.000'))
df <- data.frame(type, startdate)
df
type startdate
1 version1 2017-11-01 02:11:02
2 version1 2018-03-25 02:13:02
3 version1 2019-03-14 03:45:02
4 version2 2017-03-14 02:55:02
5 version2 2018-03-14 03:45:02
In this df we see that version1 has two counts for 02h and one count for 03h.
And version2 has one count for 02h and one count for 03h.
Desired output:
hour version1 version2
1 00:00 0 0
2 01:00 0 0
3 02:00 2 1
4 03:00 1 1
We can first get hours from startdate, count number of rows for each hour and type. complete missing hours and fill their count with 0 and use pivot_wider to get data in wide format.
library(dplyr)
library(tidyr)
df %>%
mutate(hr = lubridate::hour(startdate)) %>%
count(hr, type) %>%
complete(type, hr = seq(0, max(hr)), fill = list(n = 0)) %>%
pivot_wider(names_from = type, values_from = n)
# A tibble: 4 x 3
# hr version1 version2
# <int> <dbl> <dbl>
#1 0 0 0
#2 1 0 0
#3 2 2 1
#4 3 1 1
Something was wrong with your start date variable. Thus I set it up with the package lubridate
library(dplyr)
library(tidyr)
type = c('version1','version1','version1','version2','version2')
startdate = lubridate::ymd_hms(c('2017-11-1T02:11:02.000','2018-3-25T02:13:02.000',
'2019-3-14T03:45:02.000','2017-3-14T02:55:02.000',
'2018-3-14T03:45:02.000'))
tibble(type = type, startdate = startdate) %>%
count(type, hour = lubridate::hour(startdate)) %>%
spread(type, n)
# A tibble: 2 x 3
hour version1 version2
<int> <int> <int>
1 2 2 1
2 3 1 1
Base R solution:
# Extract the hour and store it as a vector:
df$hour <- gsub(".* ", "", trunc(df$startdate, units = "hours"))
# Count the number of observations of each type in each hour:
df$type_hour_cnt <- with(df,
ave(paste(type, hour, sep = " - "),
paste(type, hour, sep = " - "), FUN = seq_along))
# Reshape dataframe:
df <- as.data.frame(as.matrix(xtabs(type_hour_cnt ~ hour + type, df, sparse = T)))
# Extract rownames and store them as "hour" vector and then delete row.names:
df <- data.frame(cbind(hour = row.names(df), df), row.names = NULL)
I'm struggling to find a solution for the following problem. I have a df with id's/ dob's and another monthbucket df as following
set.seed(33)
df <- data.frame(dob = sample(seq(as.Date('1940/01/01'), as.Date('2010/01/01'), by="day"), 10),
id = seq(1:10) )
monthbucket <- data.frame(month = format(seq(as.Date("2010-01-01"),as.Date("2011-01-01"),by="months"),'%Y-%m'),
startmonth = seq(as.Date("2010-01-01"),as.Date("2011-01-01"),by="months"),
endmonth = seq(as.Date("2010-02-01"),as.Date("2011-02-01"),by="months")-1)
I want to get an output which gives me the count of members within age groups (<19, 19-64, >64) for each of my monthly buckets. The count obviously switches over the year when people have birthdays.
I got the age calculation with something like:
age.fct <- function(dob, bucketdate) {
period <- as.period(interval(dob, bucketdate),unit = "year")
period$year}
I guess the general approach would be to calculate the age for each monthbucket, assign into one of the 3 age groups and count it up by month. Any suggestions?
EDIT 1.
Thanks for all the different approaches, I just run a brief benchmark on the solutions to determine which answer to accept. Somehow the data table solution didn't work on my test data set but I will check as soon as I have a few minutes in the next days.
set.seed(33)
df <- data.frame(dob = sample(seq(as.Date('1940/01/01'), as.Date('2010/01/01'), by="day"), 10000),
id = seq(1:10000) )
monthbucket <- data.frame(month = format(seq(as.Date("2010-01-01"),as.Date("2011-01-01"),by="months"),'%Y-%m'),
startmonth = seq(as.Date("2010-01-01"),as.Date("2011-01-01"),by="months"),
endmonth = seq(as.Date("2010-02-01"),as.Date("2011-02-01"),by="months")-1)
birth_days <- df$dob
month_bucket <- monthbucket$startmonth
and the benchmark
microbenchmark::microbenchmark(
MM= monthbucket %>% group_by_all %>% expand(id=df$id) %>% left_join(.,{df %>% mutate(birth_month =cut(dob, "month"))},by="id") %>% mutate(age=time_length(difftime(startmonth, birth_month),"years")) %>%
mutate(age_cat=case_when(age<19 ~ "<19", age>64 ~ ">64",TRUE ~ "19-64")) %>% group_by(month) %>% count(age_cat) %>% gather(variable, count, n) %>%
unite(variable, age_cat) %>% spread(variable, count)
,
AkselA = {ages <- as.data.frame(t(unclass(outer(monthbucket$startmonth, df$dob, "-")/365.25)))
ages <- do.call(data.frame, lapply(ages, cut, c(0, 19, 64, Inf), c("0-19", "19-64", "64+")))
ages <- sapply(ages, table)
colnames(ages) <- monthbucket$month
},
Cole1 ={t(table(apply(X = outer(month_bucket, birth_days, `-`) / 365.25, MARGIN = 2, FUN = cut, c(0,19,65, Inf)), rep(format(month_bucket,'%Y-%m'), length(birth_days))))
},
# cole2={ cast(CJ(month_bucket, birth_days)[, .N, by = .(month_bucket , cut(as.numeric(month_bucket - birth_days)/365.25, c(0,19,65,Inf)))], month_bucket ~ cut, value.var = 'N')
# },
#
Cole3={crossing(month_bucket, birth_days)%>%count(month_bucket, age_range = cut(as.numeric(month_bucket - birth_days) / 365.25, c(0,19,65,Inf)))%>%spread(age_range, n)
},
Cole4={all_combos <- expand.grid(month_bucket = month_bucket, birth_days = birth_days)
all_combos$age <- as.numeric(all_combos$month_bucket - all_combos$birth_days) / 365.25
all_combos$cut_r <- cut(all_combos$age, c(0,19,65,Inf))
reshape(data = aggregate( all_combos$month_bucket, by = list(bucket = all_combos$month_bucket,age_group = all_combos$cut_r), FUN = length), timevar = 'age_group' , idvar = 'bucket', direction = 'wide' )
},
times = 1L)
Unit: milliseconds
expr min lq mean median uq max neval
MM 4249.02810 4249.02810 4249.02810 4249.02810 4249.02810 4249.02810 1
AkselA 17.12697 17.12697 17.12697 17.12697 17.12697 17.12697 1
Cole1 3237.94534 3237.94534 3237.94534 3237.94534 3237.94534 3237.94534 1
Cole3 23.63945 23.63945 23.63945 23.63945 23.63945 23.63945 1
Cole4 877.92782 877.92782 877.92782 877.92782 877.92782 877.92782 1
Based on speed AkselA's approach seems to be the fastest but I get a different result for M-M's approach compared to all others (once AkselA's changes to 65 in the cut part cut, c(0, 19, 64, Inf)... I will accept answer based on speed but will look into the differences in the results!
Not very sophisticated but I joined the two tables (first expanded monthbucket on df$id) and then calculated the age (as you have the whole month, I just calculated difftime with the first day of month of birth and startmonth). Then, for each month (bucket) I counted number of different age groups and at the end converted long format to wide for better illustration.
library(lubridate)
library(tidyverse)
monthbucket %>%
group_by_all %>%
expand(id=df$id) %>%
left_join(.,{df %>%
mutate(birth_month =cut(dob, "month"))},
by="id") %>%
mutate(age=time_length(difftime(startmonth, birth_month),"years")) %>%
mutate(age_cat=case_when(age<19 ~ "<19",
age>64 ~ ">64",
TRUE ~ "19-64")) %>%
group_by(month) %>%
count(age_cat) %>%
gather(variable, count, n) %>%
unite(variable, age_cat) %>%
spread(variable, count)
#> # A tibble: 13 x 4
#> # Groups: month [13]
#> month `<19` `>64` `19-64`
#> <fct> <int> <int> <int>
#> 1 2010-01 3 2 5
#> 2 2010-02 3 2 5
#> 3 2010-03 3 2 5
#> 4 2010-04 3 2 5
#> 5 2010-05 3 2 5
#> 6 2010-06 3 2 5
#> 7 2010-07 3 2 5
#> 8 2010-08 3 2 5
#> 9 2010-09 3 2 5
#> 10 2010-10 3 2 5
#> 11 2010-11 3 2 5
#> 12 2010-12 3 2 5
#> 13 2011-01 3 2 5
Created on 2019-07-03 by the reprex package (v0.3.0)
Assuming I understand your request.
ages <- as.data.frame(t(unclass(outer(monthbucket$startmonth, df$dob, "-")/365.25)))
ages <- do.call(data.frame,
lapply(ages, cut, c(0, 19, 64, Inf), c("0-19", "19-64", "64+")))
ages <- sapply(ages, table)
colnames(ages) <- monthbucket$month
ages
# 2010-01 2010-02 2010-03 2010-04 2010-05 2010-06 2010-07 2010-08 2010-09 2010-10 2010-11 2010-12 2011-01
# 0-19 2 2 2 2 2 2 2 2 2 2 2 2 2
# 19-64 7 7 7 7 7 7 7 7 7 7 7 7 7
# 64+ 1 1 1 1 1 1 1 1 1 1 1 1 1
#
There are some similarities to #AkselA's answer as it depends on outer(), cut(), and table().
set.seed(33)
birth_days <- sample(seq(as.Date('1940/01/01'), as.Date('2010/01/01'), by="day"), 10)
month_bucket <- seq(as.Date("2010-01-01"),as.Date("2011-01-01"),by="months")
t(
table(
apply(
X = outer(month_bucket, birth_days, `-`) / 365.25
, MARGIN = 2
, FUN = cut, c(0,19,65, Inf)
)
, rep(format(month_bucket,'%Y-%m'), length(birth_days))
)
)
(0,19] (19,65] (65,Inf]
2010-01 2 7 1
2010-02 2 7 1
2010-03 2 7 1
2010-04 2 7 1
2010-05 2 7 1
2010-06 2 7 1
2010-07 2 7 1
2010-08 2 7 1
2010-09 2 7 1
2010-10 2 7 1
2010-11 2 7 1
2010-12 2 7 1
2011-01 2 7 1
I felt weird having such a similar solution so here is data.table:
library(data.table)
dcast(
CJ(month_bucket, birth_days
)[, .N
, by = .(month_bucket
, cut(as.numeric(month_bucket - birth_days)/365.25, c(0,19,65,Inf)))
]
, month_bucket ~ cut
, value.var = 'N')
dplyr and tidyr:
library(dplyr)
library(tidyr)
crossing(month_bucket, birth_days)%>%
count(month_bucket
, age_range = cut(as.numeric(month_bucket - birth_days) / 365.25, c(0,19,65,Inf))
)%>%
spread(age_range, n)
And a similar approach in base that I'm not completely happy with.
all_combos <- expand.grid(month_bucket = month_bucket, birth_days = birth_days)
all_combos$age <- as.numeric(all_combos$month_bucket - all_combos$birth_days) / 365.25
all_combos$cut_r <- cut(all_combos$age, c(0,19,65,Inf))
reshape(
data = aggregate(
all_combos$month_bucket
, by = list(bucket = all_combos$month_bucket
,age_group = all_combos$cut_r)
, FUN = length)
, timevar = 'age_group'
, idvar = 'bucket'
, direction = 'wide'
)
I have a data frame like this:
Date
20130101
20130102
20130103
20130104
how i can split Date column in different column?
I have already used following function but it is not working :
library(data.table)
setDT(DF)[, tstrsplit(DATE, "/|\\s", type.convert = TRUE)]
Here are some solutions that do not require any packages. They all produce a data.frame with a "Date" class column followed by numeric columns for year, month and day. (The input used in reproducible form is given in the Note at the end.)
1) POSIXlt First convert the Date column to "Date" class giving date and then to an unclassed "POSIXlt" object giving lt. Now pick off the elements of lt appropriately:
date <- as.Date(as.character(DF$Date), format = "%Y%m%d")
lt <- unclass(as.POSIXlt(date))
with(lt, data.frame(Date = date, year = year + 1900, month = mon + 1, day = mday))
giving:
Date year month day
1 2013-01-01 2013 1 1
2 2013-01-02 2013 1 2
3 2013-01-03 2013 1 3
4 2013-01-04 2013 1 4
2) format
data.frame(date = as.Date(as.character(DF$Date), format = "%Y%m%d"),
year = as.numeric(format(date, "%Y")),
month = as.numeric(format(date, "%m")),
day = as.numeric(format(date, "%d")))
giving:
date year month day
1 2013-01-01 2013 1 1
2 2013-01-02 2013 1 2
3 2013-01-03 2013 1 3
4 2013-01-04 2013 1 4
3) math
with(DF, data.frame(date = as.Date(as.character(DF$Date), format = "%Y%m%d"),
year = Date %/% 10000,
month = Date %% 10000 %/% 100,
day = Date %% 100))
giving:
date year month day
1 2013-01-01 2013 1 1
2 2013-01-02 2013 1 2
3 2013-01-03 2013 1 3
4 2013-01-04 2013 1 4
4) read.fwf
data.frame(date = as.Date(as.character(DF$Date), format = "%Y%m%d"),
read.fwf(textConnection(as.character(DF$Date)), c(4, 2, 2),
col.names = c("year", "month", "day")))
giving:
date year month day
1 2013-01-01 2013 1 1
2 2013-01-02 2013 1 2
3 2013-01-03 2013 1 3
4 2013-01-04 2013 1 4
5) sub/read.table
date.ch <- sub("(....)(..)(..)", "\\1-\\2-\\3", DF$Date)
data.frame(date = as.Date(date.ch),
read.table(text = date.ch, col.names = c("year", "month", "day"), sep = "-"))
giving:
date year month day
1 2013-01-01 2013 1 1
2 2013-01-02 2013 1 2
3 2013-01-03 2013 1 3
4 2013-01-04 2013 1 4
Note: The input used, "DF", in reproducible form is:
DF <- data.frame(Date = 20130101:20130104)
If you not set on using data.table you could use the following command that incorporates substr:
x = data.frame("20130101", "20130102", "20130103", "20130104")
y<-data.frame(Year=substr(x[,1],1,4),
Month=substr(x[,1],5,6),
Day=substr(x[,1],7,8))
If you are sure your data is in the same format for the whole vector.
You can also do this with lubridate
library(dplyr)
library(lubridate)
data =
data_frame(Date = c(20130101, 20130102, 20130103, 20130104) ) %>%
mutate(date =
Date %>%
as.character %>%
ymd,
year = year(date),
month = month(date),
day = day(date))
I have a data frame of date which I want to merge with a list that has different number of rows ( 3 or 4 ). I want to merge the first date of the data frame with all elements (dates) of the list. For example the first two dates in df1 are merged with the first two elements of the list (bc) and I would get the following:
date date1 id
1992-09-26 1992-09-05 1
1992-09-26 1992-09-12 1
1992-09-26 1992-09-19 1
1992-09-27 1992-09-06 2
1992-09-27 1992-09-13 2
1992-09-27 1992-09-20 2
How can I do that? I have searched this forum and did not find similar problem or comparable solution.
df1 <- structure(c(8304, 8305, 8306, 8307, 8308, 8309, 8310, 8311, 8312,
8313, 8314), class = "Date")
mylist <- list(structure(c(8283, 8290, 8297), class = "Date"), structure(c(8284,
8291, 8298), class = "Date"), structure(c(8285, 8292, 8299), class = "Date"),
structure(c(8279, 8286, 8293, 8300), class = "Date"), structure(c(8280,
8287, 8294, 8301), class = "Date"), structure(c(8316, 8323,
8330, 8337), class = "Date"), structure(c(8317, 8324, 8331,
8338), class = "Date"), structure(c(8318, 8325, 8332, 8339
), class = "Date"), structure(c(8319, 8326, 8333), class = "Date"),
structure(c(8320, 8327, 8334), class = "Date"), structure(c(8321,
8328, 8335), class = "Date"))
Highly inelegant, but it works:
df <- as.data.frame(do.call("rbind", mapply(function(x,y) cbind(date=x, date1=y, id=which(df1==x)), df1, mylist)))
class(df[,1]) <- "Date"
class(df[,2]) <- "Date"
head(df)
date date1 id
1 1992-09-26 1992-09-05 1
2 1992-09-26 1992-09-12 1
3 1992-09-26 1992-09-19 1
4 1992-09-27 1992-09-06 2
5 1992-09-27 1992-09-13 2
6 1992-09-27 1992-09-20 2
It sounds like you're just looking for rep and unlist, as in the following:
A <- sapply(mylist, length)
out <- data.frame(date = rep(df1, A),
date1 = unlist(mylist),
id = rep(seq_along(A), A))
head(out)
# date date1 id
# 1 1992-09-26 8283 1
# 2 1992-09-26 8290 1
# 3 1992-09-26 8297 1
# 4 1992-09-27 8284 2
# 5 1992-09-27 8291 2
# 6 1992-09-27 8298 2
tail(out)
# date date1 id
# 33 1992-10-05 8320 10
# 34 1992-10-05 8327 10
# 35 1992-10-05 8334 10
# 36 1992-10-06 8321 11
# 37 1992-10-06 8328 11
# 38 1992-10-06 8335 11
You could also use data.table
library(data.table)
DT <- data.table(date = df1, date1 = mylist, key = "date")
DT[, id := 1:nrow(DT)]
DT[, as.IDate(unlist(date1), origin = "1970-01-01"), by = list(date, id)]