Consider a data frame of the form
idnum start end
1993.1 17 1993-01-01 1993-12-31
1993.2 17 1993-01-01 1993-12-31
1993.3 17 1993-01-01 1993-12-31
with start and end being of type Date
$ idnum : int 17 17 17 17 27 27
$ start : Date, format: "1993-01-01" "1993-01-01" "1993-01-01" "1993-01-01" ...
$ end : Date, format: "1993-12-31" "1993-12-31" "1993-12-31" "1993-12-31" ...
I would like to create a new dataframe, that has instead monthly observations for every row, for every month in between start and end (including the boundaries):
Desired Output
idnum month
17 1993-01-01
17 1993-02-01
17 1993-03-01
...
17 1993-11-01
17 1993-12-01
I'm not sure what format month should have, I will at some point want to group by idnum, month for regressions on the rest of the data set.
So far, for every single row, seq(from=test[1,'start'], to=test[1, 'end'], by='1 month') gives me the right sequence - but as soon as I try to apply that to the whole data frame, it will not work:
> foo <- apply(test, 1, function(x) seq(x['start'], to=x['end'], by='1 month'))
Error in to - from : non-numeric argument to binary operator
Using data.table:
require(data.table) ## 1.9.2+
setDT(df)[ , list(idnum = idnum, month = seq(start, end, by = "month")), by = 1:nrow(df)]
# you may use dot notation as a shorthand alias of list in j:
setDT(df)[ , .(idnum = idnum, month = seq(start, end, by = "month")), by = 1:nrow(df)]
setDT converts df to a data.table. Then for each row, by = 1:nrow(df), we create idnum and month as required.
Using dplyr :
test %>%
group_by(idnum) %>%
summarize(start=min(start),end=max(end)) %>%
do(data.frame(idnum=.$idnum, month=seq(.$start,.$end,by="1 month")))
Note that here I don't generate a sequence between start and end for each row, instead it is a sequence between min(start) and max(end) for each idnum. If you want the former :
test %>%
rowwise() %>%
do(data.frame(idnum=.$idnum, month=seq(.$start,.$end,by="1 month")))
Updated2
With new versions of purrr (0.3.0) and dplyr (0.8.0), this can be done with map2
library(dplyr)
library(purrr)
test %>%
# sequence of monthly dates for each corresponding start, end elements
transmute(idnum, month = map2(start, end, seq, by = "1 month")) %>%
# unnest the list column
unnest %>%
# remove any duplicate rows
distinct
Updated
Based on #Ananda Mahto's comments
res1 <- melt(setNames(lapply(1:nrow(test), function(x) seq(test[x, "start"],
test[x, "end"], by = "1 month")), test$idnum))
Also,
res2 <- setNames(do.call(`rbind`,
with(test,
Map(`expand.grid`,idnum,
Map(`seq`, start, end, by='1 month')))), c("idnum", "month"))
head(res1)
# idnum month
#1 17 1993-01-01
#2 17 1993-02-01
#3 17 1993-03-01
#4 17 1993-04-01
#5 17 1993-05-01
#6 17 1993-06-01
One option creating a sequence per every row using dplyr and tidyr could be:
df %>%
rowwise() %>%
transmute(idnum,
date = list(seq(start, end, by = "month"))) %>%
unnest(date)
idnum date
<int> <date>
1 17 1993-01-01
2 17 1993-02-01
3 17 1993-03-01
4 17 1993-04-01
5 17 1993-05-01
6 17 1993-06-01
7 17 1993-07-01
8 17 1993-08-01
9 17 1993-09-01
10 17 1993-10-01
# … with 26 more rows
Or creating the sequence using a grouping ID:
df %>%
group_by(idnum) %>%
transmute(date = list(seq(min(start), max(end), by = "month"))) %>%
unnest(date)
Or when the goal is to create only one unique sequence per ID:
df %>%
group_by(idnum) %>%
summarise(start = min(start),
end = max(end)) %>%
transmute(date = list(seq(min(start), max(end), by = "month"))) %>%
unnest(date)
date
<date>
1 1993-01-01
2 1993-02-01
3 1993-03-01
4 1993-04-01
5 1993-05-01
6 1993-06-01
7 1993-07-01
8 1993-08-01
9 1993-09-01
10 1993-10-01
11 1993-11-01
12 1993-12-01
tidyverse answer
Data
df <- structure(list(idnum = c(17L, 17L, 17L), start = structure(c(8401,
8401, 8401), class = "Date"), end = structure(c(8765, 8765, 8765
), class = "Date")), class = "data.frame", .Names = c("idnum",
"start", "end"), row.names = c(NA, -3L))
Answer and output
library(tidyverse)
df %>%
nest(start, end) %>%
mutate(data = map(data, ~seq(unique(.x$start), unique(.x$end), 1))) %>%
unnest(data)
# # A tibble: 365 x 2
# idnum data
# <int> <date>
# 1 17 1993-01-01
# 2 17 1993-01-02
# 3 17 1993-01-03
# 4 17 1993-01-04
# 5 17 1993-01-05
# 6 17 1993-01-06
# 7 17 1993-01-07
# 8 17 1993-01-08
# 9 17 1993-01-09
# 10 17 1993-01-10
# # ... with 355 more rows
And yet another tidyverse approach would be to use tidyr::expand:
library(dplyr, warn = FALSE)
library(tidyr)
df |>
mutate(
row = row_number()
) |>
group_by(row) |>
expand(idnum, date = seq(start, end, "month")) |>
ungroup() |>
select(-row)
#> # A tibble: 36 × 2
#> idnum date
#> <int> <date>
#> 1 17 1993-01-01
#> 2 17 1993-02-01
#> 3 17 1993-03-01
#> 4 17 1993-04-01
#> 5 17 1993-05-01
#> 6 17 1993-06-01
#> 7 17 1993-07-01
#> 8 17 1993-08-01
#> 9 17 1993-09-01
#> 10 17 1993-10-01
#> # … with 26 more rows
Related
Given that, i have a dataframe as below:
dt <- data.frame(year = sample(c(2000:2019),100,replace = T ),
month = sample(c(1:12),100,replace = T ),
paitent_ID = sample(c(1:50),100,replace = T ),
state = sample(c(1:10),100,replace = T ) )
and i need to apply the below function to this dataset after group by and sort:
newState <- function(dt){
dt["new"]= dt[0,"state"]*3
dt
}
So, this function is supposed to add a new column called new to each group.
Here is the group_by:
library(dplyr)
dt %>%
group_by(paitent_ID) %>%
group_map( ~ .x %>%
arrange( year,month)) %>%
group_map( ~ .x %>%
newState())
when i run the code, it complains with:
Error in UseMethod("group_split") :
no applicable method for 'group_split' applied to an object of class "list"
As #André Oliveira mentions in the comments, it is recommended to use mutate for adding a column. However, it is possible to do so with group_modify after making some small changes to your function.
newState <- function(dt, groupvars){
dt["new"]= dt[1,"state"]*3
dt
}
dt %>%
group_by(paitent_ID) %>%
arrange(year, month) %>%
group_modify(newState) %>%
ungroup
# # A tibble: 100 x 5
# paitent_ID year month state new
# <int> <int> <int> <int> <dbl>
# 1 1 2006 5 3 9
# 2 2 2012 12 3 9
# 3 3 2013 11 8 24
# 4 3 2014 10 1 24
# 5 3 2019 5 6 24
# 6 4 2006 7 5 15
# 7 4 2006 7 2 15
# 8 5 2003 8 8 24
# 9 7 2015 12 2 6
# 10 7 2017 8 10 6
And a more conventional approach
dt %>%
group_by(paitent_ID) %>%
arrange(year, month) %>%
mutate(new = state[1]*3)
I want to create a line chart in ggplot2 with 350 beer breweries. I want to count per year how many active breweries there are. I only have the start and end date of brewery activity. tidyverse answers prefered.
begin_datum_jaar is year the brewery started. eind_datum_jaar is in which year the brewery has ended.
example data frame:
library(tidyverse)
# A tibble: 4 x 3
brouwerijnaam begin_datum_jaar eind_datum_jaar
<chr> <int> <int>
1 Brand 1340 2019
2 Heineken 1592 2019
3 Grolsche 1615 2019
4 Bavaria 1719 2010
dput:
df <- structure(list(brouwerijnaam = c("Brand", "Heineken", "Grolsche",
"Bavaria"), begin_datum_jaar = c(1340L, 1592L, 1615L, 1719L),
eind_datum_jaar = c(2019L, 2019L, 2019L, 2010L)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -4L))
Desired output where etc. is a placeholder.
# A tibble: 13 x 2
year n
<chr> <dbl>
1 1340 1
2 1341 1
3 1342 1
4 1343 1
5 etc. 1
6 1592 2
7 1593 2
8 etc. 2
9 1625 3
10 1626 3
11 1627 3
12 1628 3
13 etc. 3
Could try:
library(tidyverse)
df %>%
rowwise %>%
do(data.frame(brouwerij = .$brouwerijnaam,
Year = seq(.$begin_datum_jaar, .$eind_datum_jaar, by = 1))) %>%
count(Year, name = "Active breweries") %>%
ggplot(aes(x = Year, y = `Active breweries`)) +
geom_line() +
theme_minimal()
Or try expand for the first part:
df %>%
group_by(brouwerijnaam) %>%
expand(Year = begin_datum_jaar:eind_datum_jaar) %>%
ungroup() %>%
count(Year, name = "Active breweries")
However, note that the rowwise, do or expand parts are resource intensive and may take long time. If that happens, I'd rather use data.table for expanding the data frame, and then continue, like below:
library(data.table)
library(tidyverse)
df <- setDT(df)[, .(Year = seq(begin_datum_jaar, eind_datum_jaar, by = 1)), by = brouwerijnaam]
df %>%
count(Year, name = "Active breweries") %>%
ggplot(aes(x = Year, y = `Active breweries`)) +
geom_line() +
theme_minimal()
The above gives you the plot directly. If you'd like to save it to a data frame first (and then do the ggplot2 thing), this is the main part (I use the data.table for expanding as it's much faster in my experience):
library(data.table)
library(tidyverse)
df <- setDT(df)[
, .(Year = seq(begin_datum_jaar, eind_datum_jaar, by = 1)),
by = brouwerijnaam] %>%
count(Year, name = "Active breweries")
Output:
# A tibble: 680 x 2
Year `Active breweries`
<dbl> <int>
1 1340 1
2 1341 1
3 1342 1
4 1343 1
5 1344 1
6 1345 1
7 1346 1
8 1347 1
9 1348 1
10 1349 1
# ... with 670 more rows
We can use map2 to get the sequence from start to end date for each corresponding element, unnest the list column to expand and use count to get the frequency of the 'year'
library(tidyverse)
df %>%
transmute(year = map2(begin_datum_jaar, eind_datum_jaar, `:`)) %>%
unnest %>%
count(year)
# A tibble: 680 x 2
# year n
# <int> <int>
# 1 1340 1
# 2 1341 1
# 3 1342 1
# 4 1343 1
# 5 1344 1
# 6 1345 1
# 7 1346 1
# 8 1347 1
# 9 1348 1
#10 1349 1
# … with 670 more rows
Or using Map from base R
table(unlist(do.call(Map, c(f = `:`, df[-1]))))
df1 <- data.frame(year=1000:2020) # Enter range for years of choice
df1 %>%
rowwise()%>%
mutate(cnt=nrow(df %>%
filter(begin_datum_jaar<year & eind_datum_jaar>year)
)
)
I have date dataframe which like that
id weight beginning_date end_date age categ_car
22 2 1960-06-02 1960-06-02 17 A
17 4 2001-07-02 19 B
I want the following dataframe
id weight beginning_date end_date age categ_car
22 2 1960-06-02 1960-06-02 17 A
22 2 1961-06-02 1961-06-02 18 A
17 4 2001-07-02 19 B
17 4 2002-07-02 20 B
17 4 2003-07-02 21 B
17 4 2004-07-02 22 B
I know that I can use the melt function from the package reshape 2 to create the pivot but I don't how I can increment date and age?
thank you,
N
Here is some help to get you going. You need to get the year from date columns, apply the same function for date columns, and bind them all after:
library(data.table)
setDT(df)
AddWeightage<-function(a,x){
x<-cumsum(rep(1,x-1))
return(x+a)
}
cols<-c("age")
df[,lapply(.SD,AddWeightage,x=weight), by=.(categ_car),.SDcols=cols]
Here is the function to generate date columns:
AddWeightDate<-function(a,x){
x<-cumsum(rep(1,x-1))
a1<-x+year(a)
b<-substr(as.character(a),5,10)
return(sprintf('%s%s',a1,b))
}
cols<-c('beginning_date',"end_date")
df3<-df[,lapply(.SD,AddWeightDate,x=weight), by=.(categ_car),.SDcols=cols]
We can use complete and fill from tidyr package to find a solution. Important point is to generate a sequence of dates (increment by 1 year) using %m+% operator from lubridate package.
library(dplyr)
library(tidyr)
library(lubridate)
df %>%
mutate(beginning_date = ymd(beginning_date), end_date = ymd(end_date)) %>%
group_by(id) %>%
complete(beginning_date = seq(beginning_date, beginning_date %m+% years(weight-1),
by="1 year")) %>%
fill(weight, end_date, age, categ_car) %>%
arrange(desc(id)) %>%
select(id, weight, beginning_date, end_date, age, categ_car)
# # A tibble: 6 x 6
# # Groups: id [2]
# id weight beginning_date end_date age categ_car
# <int> <int> <date> <date> <int> <chr>
# 1 22 2 1960-06-02 1960-06-02 17 A
# 2 22 2 1961-06-02 1960-06-02 17 A
# 3 17 4 2001-07-02 NA 19 B
# 4 17 4 2002-07-02 NA 19 B
# 5 17 4 2003-07-02 NA 19 B
# 6 17 4 2004-07-02 NA 19 B
Update: Based on feedback from OP to handler multiple begining_date for same 'id`:
df %>%
mutate(beginning_date = ymd(beginning_date), end_date = ymd(end_date)) %>%
group_by(id) %>%
complete(beginning_date = seq(as.Date(min(beginning_date), origin="1970-01-01"),
as.Date(min(beginning_date), origin="1970-01-01") %m+% years(weight-1),
by="1 year")) %>%
fill(weight, end_date, age, categ_car) %>%
arrange(desc(id)) %>%
select(id, weight, beginning_date, end_date, age, categ_car)
Data
df <- read.table(text =
"id weight beginning_date end_date age categ_car
22 2 1960-06-02 1960-06-02 17 A
17 4 2001-07-02 NA 19 B",
header = TRUE, stringsAsFactors = FALSE)
Note: NA has been used instead of blank value for end_date.
I have a dataframe of events that looks something like this:
EVENT DATE LONG LAT TYPE
1 1/1/2000 23 45 A
2 2/1/2000 23 45 B
3 3/1/2000 23 45 B
3 5/2/2000 22 56 A
4 6/2/2000 19 21 A
I'd like to collapse this so that any events that occur on consecutive days at the same location (as defined by LONG, LAT) are collapsed into a single event with a START and END date and a concatenated column of the TYPES involved.
Thus the above table would become:
EVENT START-DATE END-DATE LONG LAT TYPE
1 1/1/2000 3/1/2000 23 45 ABB
2 5/2/2000 5/2/2000 22 56 A
3 6/2/2000 6/2/2000 19 21 A
Any advice on how to best approach this would be greatly appreciated.
Here's a modified version of Ronak Shah's solution, taking non-consecutive events at the same location as separate event periods.
# expanded data sample
df <- data.frame(
DATE = as.Date(c("2000-01-01", "2000-01-02", "2000-01-03", "2000-01-05",
"2000-02-05", "2000-02-06", "2000-02-07"), format = "%Y-%m-%d"),
LONG = c(23, 23, 23, 23, 22, 19, 22),
LAT = c(45, 45, 45, 45, 56, 21, 56),
TYPE = c("A", "B", "B", "A", "A", "B", "A")
)
library(dplyr)
df %>%
group_by(LONG, LAT) %>%
arrange(DATE) %>%
mutate(DATE.diff = c(1, diff(DATE))) %>%
mutate(PERIOD = cumsum(DATE.diff != 1)) %>%
ungroup() %>%
group_by(LONG, LAT, PERIOD) %>%
summarise(START_DATE = min(DATE),
END_DATe = max(DATE),
TYPE = paste(TYPE, collapse = "")) %>%
ungroup()
# A tibble: 5 x 6
LONG LAT PERIOD START_DATE END_DATe TYPE
<dbl> <dbl> <int> <date> <date> <chr>
1 19 21 0 2000-02-06 2000-02-06 B
2 22 56 0 2000-02-05 2000-02-05 A
3 22 56 1 2000-02-07 2000-02-07 A
4 23 45 0 2000-01-01 2000-01-03 ABB
5 23 45 1 2000-01-05 2000-01-05 A
Edit to add explanation for what's going on with the "PERIOD" variable.
For simplicity, let's consider some sequential consecutive & non-consecutive events at the same location, so we can skip the group_by(LONG, LAT) & arrange(DATE) steps:
# sample dataset of 10 events at the same location.
# first 3 are on consecutive days, next 2 are on consecutive days,
# next 4 are on consecutive days, & last 1 is on its own.
df2 <- data.frame(
DATE = as.Date(c("2001-01-01", "2001-01-02", "2001-01-03",
"2001-01-05", "2001-01-06",
"2001-02-01", "2001-02-02", "2001-02-03", "2001-02-04",
"2001-04-01"), format = "%Y-%m-%d"),
LONG = rep(23, 10),
LAT = rep(45, 10),
TYPE = LETTERS[1:10]
)
As an intermediate step, we create some helper variables:
"DATE.diff" counts the difference between current row's date & previous row's date. Since the first row has no date before "2001-01-01", we default the difference to 1.
"non.consecutive" indicates whether the calculated date difference is not 1 (i.e. not consecutive from previous day), or 1 (i.e. consecutive from previous day). If you need to account for same-day events at the same location in the dataset, you can change the calculation from DATE.diff != 1 to DATE.diff > 1 here.
"PERIOD" keeps track of the number of TRUE results in the "non.consecutive" variable. Starting from the first row, every time a row's is non-consecutive from the previous row, "PERIOD" increments by 1.
As a result of the helper variables, "PERIOD" takes on a different value for each group of consecutive dates.
df2.intermediate <- df2 %>%
mutate(DATE.diff = c(1, diff(DATE))) %>%
mutate(non.consecutive = DATE.diff != 1) %>%
mutate(PERIOD = cumsum(non.consecutive))
> df2.intermediate
DATE LONG LAT TYPE DATE.diff non.consecutive PERIOD
1 2001-01-01 23 45 A 1 FALSE 0
2 2001-01-02 23 45 B 1 FALSE 0
3 2001-01-03 23 45 C 1 FALSE 0
4 2001-01-05 23 45 D 2 TRUE 1
5 2001-01-06 23 45 E 1 FALSE 1
6 2001-02-01 23 45 F 26 TRUE 2
7 2001-02-02 23 45 G 1 FALSE 2
8 2001-02-03 23 45 H 1 FALSE 2
9 2001-02-04 23 45 I 1 FALSE 2
10 2001-04-01 23 45 J 56 TRUE 3
We can then treat "PERIOD" as a grouping variable in order to find the start / end date & events within each period:
df2.intermediate %>%
group_by(PERIOD) %>%
summarise(START_DATE = min(DATE),
END_DATe = max(DATE),
TYPE = paste(TYPE, collapse = "")) %>%
ungroup()
# A tibble: 4 x 4
PERIOD START_DATE END_DATe TYPE
<int> <date> <date> <chr>
1 0 2001-01-01 2001-01-03 ABC
2 1 2001-01-05 2001-01-06 DE
3 2 2001-02-01 2001-02-04 FGHI
4 3 2001-04-01 2001-04-01 J
With dplyr, we can group by LAT and LONG and select the maximum and minimum DATE for each group and paste the TYPE column together.
library(dplyr)
df %>%
group_by(LONG, LAT) %>%
summarise(start_date = min(as.Date(DATE, "%d/%m/%Y")),
end_date = max(as.Date(DATE, "%d/%m/%Y")),
type = paste0(TYPE, collapse = ""))
# LONG LAT start_date end_date type
# <int> <int> <date> <date> <chr>
#1 19 21 2000-02-06 2000-02-06 A
#2 22 56 2000-02-05 2000-02-05 A
#3 23 45 2000-01-01 2000-01-03 ABB
Consider a data frame of the form
idnum start end
1993.1 17 1993-01-01 1993-12-31
1993.2 17 1993-01-01 1993-12-31
1993.3 17 1993-01-01 1993-12-31
with start and end being of type Date
$ idnum : int 17 17 17 17 27 27
$ start : Date, format: "1993-01-01" "1993-01-01" "1993-01-01" "1993-01-01" ...
$ end : Date, format: "1993-12-31" "1993-12-31" "1993-12-31" "1993-12-31" ...
I would like to create a new dataframe, that has instead monthly observations for every row, for every month in between start and end (including the boundaries):
Desired Output
idnum month
17 1993-01-01
17 1993-02-01
17 1993-03-01
...
17 1993-11-01
17 1993-12-01
I'm not sure what format month should have, I will at some point want to group by idnum, month for regressions on the rest of the data set.
So far, for every single row, seq(from=test[1,'start'], to=test[1, 'end'], by='1 month') gives me the right sequence - but as soon as I try to apply that to the whole data frame, it will not work:
> foo <- apply(test, 1, function(x) seq(x['start'], to=x['end'], by='1 month'))
Error in to - from : non-numeric argument to binary operator
Using data.table:
require(data.table) ## 1.9.2+
setDT(df)[ , list(idnum = idnum, month = seq(start, end, by = "month")), by = 1:nrow(df)]
# you may use dot notation as a shorthand alias of list in j:
setDT(df)[ , .(idnum = idnum, month = seq(start, end, by = "month")), by = 1:nrow(df)]
setDT converts df to a data.table. Then for each row, by = 1:nrow(df), we create idnum and month as required.
Using dplyr :
test %>%
group_by(idnum) %>%
summarize(start=min(start),end=max(end)) %>%
do(data.frame(idnum=.$idnum, month=seq(.$start,.$end,by="1 month")))
Note that here I don't generate a sequence between start and end for each row, instead it is a sequence between min(start) and max(end) for each idnum. If you want the former :
test %>%
rowwise() %>%
do(data.frame(idnum=.$idnum, month=seq(.$start,.$end,by="1 month")))
Updated2
With new versions of purrr (0.3.0) and dplyr (0.8.0), this can be done with map2
library(dplyr)
library(purrr)
test %>%
# sequence of monthly dates for each corresponding start, end elements
transmute(idnum, month = map2(start, end, seq, by = "1 month")) %>%
# unnest the list column
unnest %>%
# remove any duplicate rows
distinct
Updated
Based on #Ananda Mahto's comments
res1 <- melt(setNames(lapply(1:nrow(test), function(x) seq(test[x, "start"],
test[x, "end"], by = "1 month")), test$idnum))
Also,
res2 <- setNames(do.call(`rbind`,
with(test,
Map(`expand.grid`,idnum,
Map(`seq`, start, end, by='1 month')))), c("idnum", "month"))
head(res1)
# idnum month
#1 17 1993-01-01
#2 17 1993-02-01
#3 17 1993-03-01
#4 17 1993-04-01
#5 17 1993-05-01
#6 17 1993-06-01
One option creating a sequence per every row using dplyr and tidyr could be:
df %>%
rowwise() %>%
transmute(idnum,
date = list(seq(start, end, by = "month"))) %>%
unnest(date)
idnum date
<int> <date>
1 17 1993-01-01
2 17 1993-02-01
3 17 1993-03-01
4 17 1993-04-01
5 17 1993-05-01
6 17 1993-06-01
7 17 1993-07-01
8 17 1993-08-01
9 17 1993-09-01
10 17 1993-10-01
# … with 26 more rows
Or creating the sequence using a grouping ID:
df %>%
group_by(idnum) %>%
transmute(date = list(seq(min(start), max(end), by = "month"))) %>%
unnest(date)
Or when the goal is to create only one unique sequence per ID:
df %>%
group_by(idnum) %>%
summarise(start = min(start),
end = max(end)) %>%
transmute(date = list(seq(min(start), max(end), by = "month"))) %>%
unnest(date)
date
<date>
1 1993-01-01
2 1993-02-01
3 1993-03-01
4 1993-04-01
5 1993-05-01
6 1993-06-01
7 1993-07-01
8 1993-08-01
9 1993-09-01
10 1993-10-01
11 1993-11-01
12 1993-12-01
tidyverse answer
Data
df <- structure(list(idnum = c(17L, 17L, 17L), start = structure(c(8401,
8401, 8401), class = "Date"), end = structure(c(8765, 8765, 8765
), class = "Date")), class = "data.frame", .Names = c("idnum",
"start", "end"), row.names = c(NA, -3L))
Answer and output
library(tidyverse)
df %>%
nest(start, end) %>%
mutate(data = map(data, ~seq(unique(.x$start), unique(.x$end), 1))) %>%
unnest(data)
# # A tibble: 365 x 2
# idnum data
# <int> <date>
# 1 17 1993-01-01
# 2 17 1993-01-02
# 3 17 1993-01-03
# 4 17 1993-01-04
# 5 17 1993-01-05
# 6 17 1993-01-06
# 7 17 1993-01-07
# 8 17 1993-01-08
# 9 17 1993-01-09
# 10 17 1993-01-10
# # ... with 355 more rows
And yet another tidyverse approach would be to use tidyr::expand:
library(dplyr, warn = FALSE)
library(tidyr)
df |>
mutate(
row = row_number()
) |>
group_by(row) |>
expand(idnum, date = seq(start, end, "month")) |>
ungroup() |>
select(-row)
#> # A tibble: 36 × 2
#> idnum date
#> <int> <date>
#> 1 17 1993-01-01
#> 2 17 1993-02-01
#> 3 17 1993-03-01
#> 4 17 1993-04-01
#> 5 17 1993-05-01
#> 6 17 1993-06-01
#> 7 17 1993-07-01
#> 8 17 1993-08-01
#> 9 17 1993-09-01
#> 10 17 1993-10-01
#> # … with 26 more rows