Complete list of date ranges to full year - r

I have the following example data frame:
Date_from <- c("2013-01-01","2013-05-10","2013-08-13","2013-11-19")
Date_to <- c("2013-05-07","2013-08-12","2013-11-18","2013-12-25")
y <- data.frame(Date_from,Date_to)
y$concentration <- c("1.5","2.5","1.5","3.5")
y$Date_from <- as.Date(y$Date_from)
y$Date_to <- as.Date(y$Date_to)
y$concentration <- as.numeric(y$concentration)
I use the following code to detect gaps in date ranges and add the missing date ranges into the data frame and asign NA to the missing concentration:
adding<-data.frame(Date_from=y$Date_to[-nrow(y)]+1,Date_to=y$Date_from[-1]-1,concentration=NA)
adding<-adding[ adding$Date_from<adding$Date_to,]
res<-rbind(y,adding)
res[order(res$Date_from),]
This results in:
Date_from Date_to concentration
2013-01-01 2013-05-07 1.5
2013-05-08 2013-05-09 NA
2013-05-10 2013-08-12 2.5
2013-08-13 2013-11-18 1.5
2013-11-19 2013-12-25 3.5
The problem now is that the data frame ends at 2013-12-25 and not 2013-12-31. How can I do the following:
Detect the ending date of the last date range in the data frame, e.g. 2013-12-25
Add one additional line and calculate new date range up to the last day of the year and add NA for concentration
The results should look like this:
Date_from Date_to concentration
2013-01-01 2013-05-07 1.5
2013-05-08 2013-05-09 NA
2013-05-10 2013-08-12 2.5
2013-08-13 2013-11-18 1.5
2013-11-19 2013-12-25 3.5
2013-12-26 2013-12-31 NA

Don't you just want this?
df <- read.table(text = "
Date_from Date_to concentration
2013-01-01 2013-05-07 1.5
2013-05-08 2013-05-09 NA
2013-05-10 2013-08-12 2.5
2013-08-13 2013-11-18 1.5
2013-11-19 2013-12-25 3.5", h = T, stringsAsFactors = F)
rbind(df, c(as.character(max(as.Date(df$Date_to))+1), paste0(substr(max(as.Date(df$Date_to)), 1, 4),"-12-31") , NA))
Date_from Date_to concentration
1 2013-01-01 2013-05-07 1.5
2 2013-05-08 2013-05-09 <NA>
3 2013-05-10 2013-08-12 2.5
4 2013-08-13 2013-11-18 1.5
5 2013-11-19 2013-12-25 3.5
6 2013-12-26 2013-12-31 <NA>

You can use this explicit function
date_order<-function(dt){
for(i in 1:(nrow(dt)-1)){
if(dt[[1]][i+1] - dt[[2]][i] > 1){
pre<-dt[[2]][i] + 1
post<-dt[[1]][(i+1)] - 1
add<-data.frame("Date_from" = pre,"Date_to" = post,"concentration" = NA)
dt2<-rbind.data.frame(dt,add)
}
}
if(exists("dt2") == F){
dt2<-dt
}
dt2<-dt2[order(dt2$Date_from),]
yr<-substr(x = dt[[2]][nrow(dt)],start = 1,stop = 4)
last_day<-as.Date(paste(yr,"12-31",sep = "-"),format = "%Y-%m-%d")
if(dt[[2]][nrow(dt)] != last_day){
add2<-data.frame("Date_from" = dt[[2]][nrow(dt)] + 1,"Date_to" = last_day,"concentration" = NA)
dt2<-rbind.data.frame(dt2,add2)
}
return(dt2)
}
Using this function with your data returns this:
> date_order(y)
Date_from Date_to concentration
1 2013-01-01 2013-05-07 1.5
5 2013-05-08 2013-05-09 NA
2 2013-05-10 2013-08-12 2.5
3 2013-08-13 2013-11-18 1.5
4 2013-11-19 2013-12-25 3.5
11 2013-12-26 2013-12-31 NA
Hope that's what you were looking for.

My suggestion is to join y with a dataframe that contains all possible periods (either explicitely given or the "remainder") within the year. The solution below is using data.table syntax and the floor_date() and ceiling_date() functions from the lubridate package. This ensures that the solution will work even if the given periods span multiple years.
library(data.table)
library(magrittr)
# coerce character dates to numeric dates
cols <- c("Date_from", "Date_to")
setDT(y, key = cols)[, (cols) := lapply(.SD, as.IDate), .SDcols = cols]
# create sequence of starting points of all periods
breaks <- y[, c(Date_from, Date_to + 1L)] %>%
# append start and end of year
c(lubridate::floor_date(min(.), "year"),
lubridate:: ceiling_date(max(.), "year")) %>%
sort() %>%
unique() %T>%
print()
[1] "2013-01-01" "2013-05-08" "2013-05-10" "2013-08-13" "2013-11-19" "2013-12-26" "2014-01-01"
# create periods
x <- data.table(from = head(breaks, -1L), to = tail(breaks, -1L) - 1L,
key = c("from", "to"))
x
from to
1: 2013-01-01 2013-05-07
2: 2013-05-08 2013-05-09
3: 2013-05-10 2013-08-12
4: 2013-08-13 2013-11-18
5: 2013-11-19 2013-12-25
6: 2013-12-26 2013-12-31
# right join to create the expected result
y[x]
Date_from Date_to concentration
1: 2013-01-01 2013-05-07 1.5
2: 2013-05-08 2013-05-09 NA
3: 2013-05-10 2013-08-12 2.5
4: 2013-08-13 2013-11-18 1.5
5: 2013-11-19 2013-12-25 3.5
6: 2013-12-26 2013-12-31 NA

Related

R error: Coercing 'list' RHS to 'double' to match the type of the target column

I have a dataset DT as below:
All date columns are 'date'
All res columns are 'numeric'
date_1 and res_1 are pair and date_2 and res_2 are pair
index_date date_1 date_2 res_1 res_2 taken_date taken_res
1 2015-08-25 2013-11-13 2015-08-25 1.50 1.5 NA NA
2 2017-09-11 2016-09-29 2017-05-12 2.70 2.4 NA NA
3 2015-08-17 2014-08-08 2015-06-08 2.00 2.6 NA NA
4 2017-05-14 2016-05-31 2016-12-19 1.30 1.2 NA NA
5 2015-11-14 2014-11-11 2015-08-10 1.60 2.8 NA NA
6 2016-08-08 NA 2016-08-08 NA 1.4 NA NA
7 2018-12-01 2014-05-30 2017-07-24 1.70 1.8 NA NA
8 2013-01-11 NA 2012-10-23 NA 3.7 NA NA
9 2015-06-06 NA 2015-02-07 NA 1.3 NA NA
10 2015-05-19 NA 2015-05-19 NA 1.4 NA NA
What I want:
Filling values in "taken_date" and "taken_res" by comparing "date_1" and "date_2" with "index_date"
"taken_date" and "taken_res" will be the closest date and result to the "index_date"
For example, the first row should be 2015-08-25 for "taken_date" and 1.5 for "taken_res"
I have a working function, which finds the closest date and fill values accordingly, if there are two dates and results.
However, my issue comes when there is only one date and result, like the 6th, 8th, 9th, and 10th.
The code is:
date.vars <- c("date_1", "date_2")
res.vars <- c("res_1", "res_2")
taken.vars <- c("taken_date", "taken_res")
# some more lines here to prepare DT
...
# only one date and result
DT[apply(DT[, date.vars, with=F], 1, function(x)
sum(is.na(x))==1),
(taken.vars) := list(
apply(.SD, 1, function(x)
as.numeric(na.omit(x[res.vars]))),
apply(.SD, 1, function(x)
as.Date(na.omit(x[date.vars])))
)]
R returns me warning as below:
Error in `[.data.table`(DT, apply(DT[, date.vars, with = F], 1, function(x) sum(is.na(x)) == :
(list) object cannot be coerced to type 'double'
In addition: Warning message:
In `[.data.table`(DT, apply(DT[, date.vars, with = F], 1, function(x) sum(is.na(x)) == :
Coercing 'list' RHS to 'double' to match the type of the target column (column 7 named 'taken_res').
Would you please help me to correct my code?
Here is my attempt. I did not handle the cases where there is NA in date. What you can do is to calculate the gaps between 1) index date and date 1, and 2) index date and date 2. Using the gaps, you can run logical checks. Based on that, you can assign targeted dates and values.
I am not the right person who can explain the error message well. But, I think you are facing coercion issues. See page 16 in the CRAN manual (version 1.12.8), where you can find information on := (assignment by reference). If anybody can provide technical explanation, please do so.
setDT(mydt)[, `:=` (taken_date = fifelse(test = abs(index_date - date_1) < abs(index_date - date_2),
yes = date_1,
no = date_2),
taken_res = fifelse(test = abs(index_date - date_1) < abs(index_date - date_2),
yes = res_1,
no = res_2))][]
# index_date date_1 date_2 res_1 res_2 taken_date taken_res
# 1: 2015-08-25 2013-11-13 2015-08-25 1.5 1.5 2015-08-25 1.5
# 2: 2017-09-11 2016-09-29 2017-05-12 2.7 2.4 2017-05-12 2.4
# 3: 2015-08-17 2014-08-08 2015-06-08 2.0 2.6 2015-06-08 2.6
# 4: 2017-05-14 2016-05-31 2016-12-19 1.3 1.2 2016-12-19 1.2
# 5: 2015-11-14 2014-11-11 2015-08-10 1.6 2.8 2015-08-10 2.8
# 6: 2016-08-08 <NA> 2016-08-08 NA 1.4 <NA> NA
# 7: 2018-12-01 2014-05-30 2017-07-24 1.7 1.8 2017-07-24 1.8
# 8: 2013-01-11 <NA> 2012-10-23 NA 3.7 <NA> NA
# 9: 2015-06-06 <NA> 2015-02-07 NA 1.3 <NA> NA
#10: 2015-05-19 <NA> 2015-05-19 NA 1.4 <NA> NA
Although you have not clearly mentioned what you wanna do with rows with NA, it seems to me that you were trying to do something like this.
setDT(mydt)[, `:=` (taken_date = fifelse(test = abs(index_date - date_1) < abs(index_date - date_2),
yes = date_1,
no = date_2),
taken_res = fifelse(test = abs(index_date - date_1) < abs(index_date - date_2),
yes = res_1,
no = res_2))][is.na(date_1),
`:=` (taken_date = date_2, taken_res = res_2)][is.na(date_2),
`:=` (taken_date = date_1, taken_res = res_1)]
# index_date date_1 date_2 res_1 res_2 taken_date taken_res
# 1: 2015-08-25 2013-11-13 2015-08-25 1.5 1.5 2015-08-25 1.5
# 2: 2017-09-11 2016-09-29 2017-05-12 2.7 2.4 2017-05-12 2.4
# 3: 2015-08-17 2014-08-08 2015-06-08 2.0 2.6 2015-06-08 2.6
# 4: 2017-05-14 2016-05-31 2016-12-19 1.3 1.2 2016-12-19 1.2
# 5: 2015-11-14 2014-11-11 2015-08-10 1.6 2.8 2015-08-10 2.8
# 6: 2016-08-08 <NA> 2016-08-08 NA 1.4 2016-08-08 1.4
# 7: 2018-12-01 2014-05-30 2017-07-24 1.7 1.8 2017-07-24 1.8
# 8: 2013-01-11 <NA> 2012-10-23 NA 3.7 2012-10-23 3.7
# 9: 2015-06-06 <NA> 2015-02-07 NA 1.3 2015-02-07 1.3
#10: 2015-05-19 <NA> 2015-05-19 NA 1.4 2015-05-19 1.4
DATA
mydt <- structure(list(index_date = structure(c(16672, 17420, 16664,
17300, 16753, 17021, 17866, 15716, 16592, 16574), class = "Date"),
date_1 = structure(c(16022, 17073, 16290, 16952, 16385, NA,
16220, NA, NA, NA), class = "Date"), date_2 = structure(c(16672,
17298, 16594, 17154, 16657, 17021, 17371, 15636, 16473, 16574
), class = "Date"), res_1 = c(1.5, 2.7, 2, 1.3, 1.6, NA,
1.7, NA, NA, NA), res_2 = c(1.5, 2.4, 2.6, 1.2, 2.8, 1.4,
1.8, 3.7, 1.3, 1.4)), row.names = c("1", "2", "3", "4", "5",
"6", "7", "8", "9", "10"), class = "data.frame")

Complete data frame with missing date ranges for multiple parameters

I have the following data frame:
Date_from <- c("2013-02-01","2013-05-10","2013-08-13","2013-02-01","2013-05-10","2013-08-13","2013-02-01","2013-05-10","2013-08-13")
Date_to <- c("2013-05-07","2013-08-12","2013-11-18","2013-05-07","2013-08-12","2013-11-18","2013-05-07","2013-08-12","2013-11-18")
y <- data.frame(Date_from,Date_to)
y$concentration <- c("1.5","2.5","1.5","3.5","1.5","2.5","1.5","3.5","3")
y$Parameter<-c("A","A","A","B","B","B","C","C","C")
y$Date_from <- as.Date(y$Date_from)
y$Date_to <- as.Date(y$Date_to)
y$concentration <- as.numeric(y$concentration)
I will need to check the data frame if for EACH Parameter the date range begins at the first day of the year (2013-01-01) and ends at the last day of the year (2013-12-31). If not I will need to add an extra row at the beginning and at the end for each of the parameters to complete the date range to a full year for each parameter. The result should look like this:
Date_from Date_to concentration Parameter
2013-01-01 2013-01-31 NA NA
2013-02-01 2013-05-07 1.5 A
2013-05-10 2013-08-12 2.5 A
2013-08-13 2013-11-18 1.5 A
2013-11-19 2013-12-31 NA NA
2013-01-01 2013-01-31 NA NA
2013-02-01 2013-05-07 3.5 B
2013-05-10 2013-08-12 1.5 B
2013-08-13 2013-11-18 2.5 B
2013-11-19 2013-12-31 NA NA
2013-01-01 2013-01-31 NA NA
2013-02-01 2013-05-07 1.5 C
2013-05-10 2013-08-12 3.5 C
2013-08-13 2013-11-18 3.0 C
2013-11-19 2013-12-31 NA NA
Please note: The date ranges are only equal in this example for simplification.
UPDATE: This is my original data snippet and code:
sm<-read.csv("https://www.dropbox.com/s/tft6inwcrjqujgt/Test_data.csv?dl=1",sep=";",header=TRUE)
cleaned_sm<-sm[,c(4,5,11,14)] ##Delete obsolete columns
colnames(cleaned_sm)<-c("Parameter","Concentration","Date_from","Date_to")
cleaned_sm$Date_from<-as.Date(cleaned_sm$Date_from, format ="%d.%m.%Y")
cleaned_sm$Date_to<-as.Date(cleaned_sm$Date_to, format ="%d.%m.%Y")
#detect comma decimal separator and replace with dot decimal separater as comma is not recognised as a number
cleaned_sm=lapply(cleaned_sm, function(x) gsub(",", ".", x))
cleaned_sm<-data.frame(cleaned_sm)
cleaned_sm$Concentration <- as.numeric(cleaned_sm$Concentration)
cleaned_sm$Date_from <- as.Date(cleaned_sm$Date_from)
cleaned_sm$Date_to <- as.Date(cleaned_sm$Date_to)
Added code based on #jasbner:
cleaned_sm %>%
group_by(Parameter) %>%
do(add_row(.,
Date_from = ymd(max(Date_to))+1 ,
Date_to = ymd(paste(year(max(Date_to)),"1231")),
Parameter = .$Parameter[1])) %>%
do(add_row(.,
Date_to = ymd(min(Date_from))-1,
Date_from = ymd(paste(year(min(Date_from)),"0101")) ,
Parameter = .$Parameter[1],
.before = 0)) %>%
filter(!duplicated(Date_from,fromLast = T),!duplicated(Date_to))
My attempt with dplyr and lubridate. Hacked together but I think it should work. Note this does not look for any gaps in the middle of the date ranges. Basically, for each group, you add a row before and after that particular group. Then if there are any cases where the date range starts at the beginning of the year or ends at the end of the year the added rows are filtered out.
library(dplyr)
library(lubridate)
cleaned_sm %>%
group_by(Parameter) %>%
do(add_row(.,
Date_from = ymd(max(.$Date_to))+1 ,
Date_to = ymd(paste(year(max(.$Date_to)),"1231")),
Parameter = .$Parameter[1])) %>%
do(add_row(.,
Date_to = ymd(min(.$Date_from))-1,
Date_from = ymd(paste(year(min(.$Date_from)),"0101")) ,
Parameter = .$Parameter[1],
.before = 0)) %>%
filter(!duplicated(Date_from,fromLast = T),!duplicated(Date_to))
# A tibble: 15 x 4
# Groups: Parameter [3]
# Date_from Date_to concentration Parameter
# <date> <date> <dbl> <chr>
# 1 2013-01-01 2013-01-31 NA A
# 2 2013-02-01 2013-05-07 1.50 A
# 3 2013-05-10 2013-08-12 2.50 A
# 4 2013-08-13 2013-11-18 1.50 A
# 5 2013-11-19 2013-12-31 NA A
# 6 2013-01-01 2013-01-31 NA B
# 7 2013-02-01 2013-05-07 3.50 B
# 8 2013-05-10 2013-08-12 1.50 B
# 9 2013-08-13 2013-11-18 2.50 B
# 10 2013-11-19 2013-12-31 NA B
# 11 2013-01-01 2013-01-31 NA C
# 12 2013-02-01 2013-05-07 1.50 C
# 13 2013-05-10 2013-08-12 3.50 C
# 14 2013-08-13 2013-11-18 3.00 C
# 15 2013-11-19 2013-12-31 NA C
This seems like it requires a combination of different packages to attack it. I am using tidyr, data.table, and I used lubridate.
date.start <- seq.Date(as.Date("2013-01-01"), as.Date("2013-12-31"), by = "day")
Date.Int <- data.frame(Date_from = date.start, Date_to = date.start)
y_wide <- y %>% spread(Parameter, concentration)
y_wide <- as.data.table(setkey(as.data.table(y_wide), Date_from, Date_to))
Date.Int <- as.data.table(setkey(as.data.table(Date.Int), Date_from, Date_to))
dats <- foverlaps(Date.Int, y_wide, nomatch = NA)
fin.dat <- dats %>%
mutate(A = ifelse(is.na(A), -5, A),
seqs = cumsum(!is.na(A) & A != lag(A, default = -5))) %>%
group_by(seqs) %>%
summarise(Date_from = first(i.Date_from),
Date_to = last(i.Date_to) ,
A = first(A),
B = first(B),
C = first(C)) %>%
mutate(A = ifelse(A == -5, NA, A)) %>%
ungroup()%>%
gather(Concentration, Parameter, A:C) %>%
mutate(Concentration = ifelse(is.na(Parameter), NA, Concentration))
Okay, so I created a vector of dates from a start point to an end point (date.start); then I turned into a data.frame with the same interval names and interval dates for Date.Int. This is because foverlaps needs to compare two intervals (same date start and end dates in Date.Int are now officially intervals). I then took your data you provided and spread, turning it from long format data to wide format data and turned that into a data.table. keying a data.table sets up how it should be arranged, and when using foverlaps you have to key the start dates and end dates (in that order). foverlaps determines if an interval falls within another interval of dates. If you print out dats, you will see a bunch of lines with NA for everything because they did not fall within an interval. So now we have to group these in some manner. I picked grouping by values of "A" in dats. The grouping variable is called seqs. But then I summarised the data, and then switched it back from wide format to long format and replaced the appropriate NA values.

Fill in missing date ranges

I have the following example data frame:
Date_from <- c("2013-01-01","2013-01-10","2013-01-16","2013-01-19")
Date_to <- c("2013-01-07","2013-01-12","2013-01-18","2013-01-25")
y <- data.frame(Date_from,Date_to)
y$concentration <- c("1.5","2.5","1.5","3.5")
y$Date_from <- as.Date(y$Date_from)
y$Date_to <- as.Date(y$Date_to)
y$concentration <- as.numeric(y$concentration)
These are measurend concentrations of heavy metals for a specific date range. However, the date ranges are not consecutive as there are gaps between 2013-01-07 to 2013-01-10 and 2013-01-12 to 2013-01-16. I need to detect those gaps, insert a row after each gap and fill it with the missing range. The result should look like this:
Date_from Date_to concentration
2013-01-01 2013-01-07 1.5
2013-01-08 2013-01-09 NA
2013-01-10 2013-01-12 2.5
2013-01-13 2013-01-15 NA
2013-01-16 2013-01-18 1.5
2013-01-19 2013-01-25 3.5
Try this:
adding <- data.frame(Date_from = y$Date_to[-nrow(y)]+1,
Date_to = y$Date_from[-1]-1, concentration = NA)
adding <- adding[adding$Date_from <= adding$Date_to,]
res <- rbind(y,adding)
res[order(res$Date_from),]
# Date_from Date_to concentration
#1 2013-01-01 2013-01-07 1.5
#5 2013-01-08 2013-01-09 NA
#2 2013-01-10 2013-01-12 2.5
#6 2013-01-13 2013-01-15 NA
#3 2013-01-16 2013-01-18 1.5
#4 2013-01-19 2013-01-25 3.5
Here's a solution that requires magrittr and dplyr. It finds the gaps, then loops through to fill them.
# Locations to pad data frame
tmp <- which(y$Date_from-lag(y$Date_to) > 1)
tmp <- tmp + (1:length(tmp)) - 1
for(i in tmp) {
# Add row
y %<>% add_row(Date_from = y$Date_to[i-1] + 1,
Date_to = y$Date_from[i] - 1,
.before = i)
}
# Date_from Date_to concentration
# 1 2013-01-01 2013-01-07 1.5
# 2 2013-01-08 2013-01-09 NA
# 3 2013-01-10 2013-01-12 2.5
# 4 2013-01-13 2013-01-15 NA
# 5 2013-01-16 2013-01-18 1.5
# 6 2013-01-19 2013-01-25 3.5

Converting Monthly Data to Daily in R

I have a data.frame df that has monthly data:
Date Value
2008-01-01 3.5
2008-02-01 9.5
2008-03-01 0.1
I want there to be data on every day in the month (and I will assume Value does not change during each month) since I will be merging this into a different table that has monthly data.
I want the output to look like this:
Date Value
2008-01-02 3.5
2008-01-03 3.5
2008-01-04 3.5
2008-01-05 3.5
2008-01-06 3.5
2008-01-07 3.5
2008-01-08 3.5
2008-01-09 3.5
2008-01-10 3.5
2008-01-11 3.5
2008-01-12 3.5
2008-01-13 3.5
2008-01-14 3.5
2008-01-15 3.5
2008-01-16 3.5
2008-01-17 3.5
2008-01-18 3.5
2008-01-19 3.5
2008-01-20 3.5
2008-01-21 3.5
2008-01-22 3.5
2008-01-23 3.5
2008-01-24 3.5
2008-01-25 3.5
2008-01-26 3.5
2008-01-27 3.5
2008-01-28 3.5
2008-01-29 3.5
2008-01-30 3.5
2008-01-31 3.5
2008-02-01 9.5
I have tried to.daily but my call:
df <- to.daily(df$Date)
returns
Error in to.period(x, "days", name = name, ...) : ‘x’ contains no data
Not sure if i understood perfectly but i think something like this may work.
First, i define the monthly data table
library(data.table)
DT_month=data.table(Date=as.Date(c("2008-01-01","2008-02-01","2008-03-01","2008-05-01","2008-07-01"))
,Value=c(3.5,9.5,0.1,5,8))
Then, you have to do the following
DT_month[,Month:=month(Date)]
DT_month[,Year:=year(Date)]
start_date=min(DT_month$Date)
end_date=max(DT_month$Date)
DT_daily=data.table(Date=seq.Date(start_date,end_date,by="day"))
DT_daily[,Month:=month(Date)]
DT_daily[,Year:=year(Date)]
DT_daily[,Value:=-100]
for( i in unique(DT_daily$Year)){
for( j in unique(DT_daily$Month)){
if(length(DT_month[Year==i & Month== j,Value])!=0){
DT_daily[Year==i & Month== j,Value:=DT_month[Year==i & Month== j,Value]]
}
}
}
Basically, the code will define the month and year of each monthly value in separate columns.
Then, it will create a vector of daily data using the minimum and maximum dates in your monthly data, and will create two separate columns for year and month for the daily data as well.
Finally, it goes through every combination of year and months of data filling the daily values with the monthly ones. In case there is no data for certain combination of month and year, it will show a -100.
Please let me know if it works.
An option using tidyr::expand expand a row between 1st day of month to last day of month. The lubridate::floor_date can provide 1st day of month and lubridate::ceiling_date() - days(1) will provide last day of month.
library(tidyverse)
library(lubridate)
df %>% mutate(Date = ymd(Date)) %>%
group_by(Date) %>%
expand(Date = seq(floor_date(Date, unit = "month"),
ceiling_date(Date, unit="month")-days(1), by="day"), Value) %>%
as.data.frame()
# Date Value
# 1 2008-01-01 3.5
# 2 2008-01-02 3.5
# 3 2008-01-03 3.5
# 4 2008-01-04 3.5
# 5 2008-01-05 3.5
#.....so on
# 32 2008-02-01 9.5
# 33 2008-02-02 9.5
# 34 2008-02-03 9.5
# 35 2008-02-04 9.5
# 36 2008-02-05 9.5
#.....so on
# 85 2008-03-25 0.1
# 86 2008-03-26 0.1
# 87 2008-03-27 0.1
# 88 2008-03-28 0.1
# 89 2008-03-29 0.1
# 90 2008-03-30 0.1
# 91 2008-03-31 0.1
Data:
df <- read.table(text =
"Date Value
2008-01-01 3.5
2008-02-01 9.5
2008-03-01 0.1",
header = TRUE, stringsAsFactors = FALSE)
to.daily can only be applied to xts/zooobjects and can only convert to a LOWER frequency. i.e. from daily to monthly, but not the other way round.
One easy way to accomplish what you want is converting df to an xts object:
df.xts <- xts(df$Value,order.by = df$Date)
And merge, like so:
na.locf(merge(df.xts, foo=zoo(NA, order.by=seq(start(df.xts), end(df.xts),
"day",drop=F)))[, 1])
df.xts
2018-01-01 3.5
2018-01-02 3.5
2018-01-03 3.5
2018-01-04 3.5
2018-01-05 3.5
2018-01-06 3.5
2018-01-07 3.5
….
2018-01-27 3.5
2018-01-28 3.5
2018-01-29 3.5
2018-01-30 3.5
2018-01-31 3.5
2018-02-01 9.5
2018-02-02 9.5
2018-02-03 9.5
2018-02-04 9.5
2018-02-05 9.5
2018-02-06 9.5
2018-02-07 9.5
2018-02-08 9.5
….
2018-02-27 9.5
2018-02-28 9.5
2018-03-01 0.1
If you want to adjust the price continuously over the course of a month use na.spline in place of na.locf.
Maybe not an efficient one but with base R we can do
do.call("rbind", lapply(1:nrow(df), function(i)
data.frame(Date = seq(df$Date[i],
(seq(df$Date[i],length=2,by="months") - 1)[2], by = "1 days"),
value = df$Value[i])))
We basically generate a sequence of dates from start_date to the last day of that month which is calculated by
seq(df$Date[i],length=2,by="months") - 1)[2]
and repeat the same value for all the dates and put them in the data frame.
We get a list of dataframe and then we can rbind them using do.call.
Another way:
library(lubridate)
d <- read.table(text = "Date Value
2008-01-01 3.5
2008-02-01 9.5
2008-03-01 0.1",
stringsAsFactors = FALSE, header = TRUE)
Dates <- seq(from = min(as.Date(d$Date)),
to = ceiling_date(max(as.Date(d$Date)), "month") - days(1),
by = "1 days")
data.frame(Date = Dates,
Value = setNames(d$Value, d$Date)[format(Dates, format = "%Y-%m-01")])

Given start date and end date, reshape/expand data for each day between (each day on a row) [duplicate]

This question already has answers here:
Expand rows by date range using start and end date
(6 answers)
Closed 6 years ago.
I have spent much time to get each difference days in R:
start <- as.Date(c("2013-02-26", "2013-03-26","2013-04-01","2013-04-26","2013-05-26"))
end <- as.Date(c("2013-03-25","2013-03-31","2013-04-25","2013-05-25","2013-06-25"))
per_cost <- c(3451380,3767052,3726900,4076868,3575311)
x <- data.frame(START_DAY=start, END_DAY=end, PER_COST=per_cost)
x$DIF_DAYS<- x$END_DAY-x$START_DAY
Then, I got this:
START_DAY END_DAY PER_COST DIF_DAYS
1 2013-02-26 2013-03-25 3451380 27 days
2 2013-03-26 2013-03-31 3767052 5 days
3 2013-04-01 2013-04-25 3726900 24 days
4 2013-04-26 2013-05-25 4076868 29 days
5 2013-05-26 2013-06-25 3575311 30 days
I would like to get this output:
DATE PER_COST
2013-02-26 3451380
2013-02-27 3451380
2013-02-28 3451380
2013-02-29 3451380
...
2013-03-25 3451380
2013-03-26 3767052
2013-03-27 3767052
2013-03-28 3767052
How to do so?
Using data.table
library(data.table)
setDT(x)[, list(DATE=seq(START_DAY, END_DAY, by = 'day')), PER_COST]
# PER_COST DATE
# 1: 3451380 2013-02-26
# 2: 3451380 2013-02-27
# 3: 3451380 2013-02-28
# 4: 3451380 2013-03-01
# 5: 3451380 2013-03-02
#---
#116: 3575311 2013-06-21
#117: 3575311 2013-06-22
#118: 3575311 2013-06-23
#119: 3575311 2013-06-24
#120: 3575311 2013-06-25
If there are duplicate PER_COST, then it may be better to use 1:nrow(x) as the grouping variable
setDT(x)[, list(DATE=seq(START_DAY, END_DAY, by = 'day'),
PER_COST=rep(PER_COST, END_DAY-START_DAY+1)), 1:nrow(x)]
Update
Using dplyr
library(dplyr)
x %>%
rowwise() %>%
do(data.frame(DATE=seq(.$START_DAY, .$END_DAY, by='day'),
PER_COST= rep(.$PER_COST, .$END_DAY-.$START_DAY+1)))
You could do something like
do.call(rbind, apply(df, 1, function(x)
data.frame(DATE = seq.Date(from = as.Date(x[1]), to = as.Date(x[2]), by = "day"),
PER_COST = x[3], row.names = NULL))
)
# 1.1 2013-02-26 3451380
# 1.2 2013-02-27 3451380
# 1.3 2013-02-28 3451380
# 1.4 2013-03-01 3451380
# 1.5 2013-03-02 3451380
# 1.6 2013-03-03 3451380
# 1.7 2013-03-04 3451380

Resources