Multiple string replacement, decimals to quarters - r

I want to replace .00 with -Q1, .25 with -Q2, .50 with -Q3, and .75 with -Q4 as given below. However, my code is not working as expected. Any hints?
library(tidyverse)
dt1 <-
tibble(Date = c(2015.00, 2015.25, 2015.50, 2015.75))
dt1
# A tibble: 4 x 1
Date
<dbl>
1 2015
2 2015.
3 2016.
4 2016.
dt1 %>%
pull(Date)
[1] 2015.00 2015.25 2015.50 2015.75
dt1 %>%
mutate(Date1 = str_replace_all(string = Date, pattern = c(".00" = "-Q1", ".25" = "-Q2", ".50" = "-Q3", ".75" = "-Q4")))
# A tidytable: 4 × 2
Date Date1
<dbl> <chr>
1 2015 2015
2 2015. 2015-Q2
3 2016. 2015.5
4 2016. 2015-Q4

There also is a zoo-function for that:
library(tidyverse)
library(zoo)
dt1 <-
tibble(Date = c(2015.00, 2015.25, 2015.50, 2015.75))
dt1 %>%
mutate(Date1 = format.yearqtr(Date, format = "%Y.Q%q") )
# Date Date1
# <dbl> <chr>
# 1 2015 2015.Q1
# 2 2015. 2015.Q2
# 3 2016. 2015.Q3
# 4 2016. 2015.Q4

You may also use integer division %/% and modulo division %% simultaneously
paste0(dt1$Date %/% 1, '-Q',(dt1$Date %% 1)*4 +1)
[1] "2015-Q1" "2015-Q2" "2015-Q3" "2015-Q4"
Thus, using it in piped syntax as
dt1 %>%
mutate(date1 = paste0(Date %/% 1, '-Q',(Date %% 1)*4 +1))
# A tibble: 4 x 2
Date date1
<dbl> <chr>
1 2015 2015-Q1
2 2015. 2015-Q2
3 2016. 2015-Q3
4 2016. 2015-Q4

here is a quick fix:
dt1 %>%
mutate(Date1 = str_replace_all(format(Date, nsmall = 2),
pattern = c(".00" = "-Q1", ".25" = "-Q2", ".50" = "-Q3", ".75" = "-Q4")))
The problem is that 2015.00 is first transformed to character at which point it becomes 2015. Therefore, the string replacement fails.
You can see this, by trying as.character(2015.00).
However, this can easily be fixed by using format to format the number first.

vec <- c("00" = "-Q1", "25" = "-Q2", "50" = "-Q3", "75" = "-Q4")
dt1 %>%
mutate(new = paste0(Date %/% 1, vec[sprintf("%02d", Date %% 1 * 100)]))
Date new
<dbl> <chr>
1 2015 2015-Q1
2 2015. 2015-Q2
3 2016. 2015-Q3
4 2016. 2015-Q4

library(tidyverse)
dt1 <-
as.character(c(2015.00, 2015.25, 2015.50, 2015.75))
dt1 <- if_else(str_detect(dt1, '\\.', negate = TRUE),
paste0(dt1, '.00'), #If condition TRUE
dt1) #if condition FALSE
value_before <- c("\\.00","\\.25","\\.5","\\.75" )
value_after <- c("-Q1", "-Q2","-Q3", "-Q4")
tibble(Date = str_replace(dt1, value_before, value_after))
#> # A tibble: 4 x 1
#> Date
#> <chr>
#> 1 2015-Q1
#> 2 2015-Q2
#> 3 2015-Q3
#> 4 2015-Q4
Created on 2021-06-01 by the reprex package (v2.0.0)

A solution with dyplr and tidyr:
Prepare decimals for further process with format
separate and mutate with -Q1-Q4
unite
library(tidyr)
library(dplyr)
dt1 %>%
mutate(Date = format(round(Date, digits=2), nsmall = 2)) %>%
separate(Date, into = c("Year", "Quarter"), remove=FALSE) %>%
mutate(Quarter = recode(Quarter, "00" = "-Q1", "25" = "-Q2", "50" = "-Q3", "75" = "-Q4")) %>%
unite("new", Year:Quarter, sep = "")
Output:
Date new
<chr> <chr>
1 2015.00 2015-Q1
2 2015.25 2015-Q2
3 2015.50 2015-Q3
4 2015.75 2015-Q4

Related

How to create a new column that specifies which range of years a date belongs to (like academic year)?

In some cases, a "year" doesn't necessarily cycle from January 1st. For example, academic year starts at the end of August in the US. Another example is the NBA season.
My question: given data containing a date column, I want to create another column that refers to which period it falls in. For example, consider that we are given the following tib:
library(lubridate, warn.conflicts = FALSE)
library(tibble)
tib <- tibble(my_dates = as_date(c("1999-01-01", "2010-08-09", "2010-09-02", "1995-03-02")))
tib
#> # A tibble: 4 x 1
#> my_dates
#> <date>
#> 1 1999-01-01
#> 2 2010-08-09
#> 3 2010-09-02
#> 4 1995-03-02
and we want to mutate a column that refers to the academic year each date belongs to, provided that the academic year starts on August 31st:
desired_output <-
tib %>%
add_column(belongs_to_school_year = c("1998-1999", "2009-2010", "2010-2011", "1994-1995"))
desired_output
#> # A tibble: 4 x 2
#> my_dates belongs_to_school_year
#> <date> <chr>
#> 1 1999-01-01 1998-1999
#> 2 2010-08-09 2009-2010
#> 3 2010-09-02 2010-2011
#> 4 1995-03-02 1994-1995
How can I create the column belongs_to_school_year using mutate(), based on my_dates?
You can use dplyr and lubridate for this:
desired_output <- tib %>%
mutate(school_year = case_when(month(my_dates) <= 8 ~ paste(year(my_dates)-1, year(my_dates), sep = "-"),
month(my_dates) > 8 ~ paste(year(my_dates), year(my_dates)+1, sep = "-")))
or:
desired_output <- tib %>%
mutate(school_year = if_else(month(my_dates) <= 8,
paste(year(my_dates)-1, year(my_dates), sep = "-"),
paste(year(my_dates), year(my_dates)+1, sep = "-")))

R - dataframe with sequence from another dataframe

I have a dataframe like this:
source_data <-
data.frame(
id = c(seq(1,3)),
start = c(as.Date("2020-04-04"), as.Date("2020-04-02"), as.Date("2020-04-03")),
end = c(as.Date("2020-04-08"), as.Date("2020-04-05"), as.Date("2020-04-05"))
)
I want to create a date sequence for each id = crate each day between start and end dates and put it to another dataframe. So the result should look like this:
result <-
data.frame(
id = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3),
date = c(
as.Date("2020-04-04"),
as.Date("2020-04-05"),
as.Date("2020-04-06"),
as.Date("2020-04-07"),
as.Date("2020-04-08"),
as.Date("2020-04-02"),
as.Date("2020-04-03"),
as.Date("2020-04-04"),
as.Date("2020-04-05"),
as.Date("2020-04-03"),
as.Date("2020-04-04"),
as.Date("2020-04-05")
)
)
I started with this date sequence, but how to join my source_data dataframe there?
solution <-
data.frame(
date = seq(min(source_data$start), max(source_data$end), by = 1)
)
We can use map2 to create the sequence between each corresponding 'start', 'end' dates and then unnest the list column
library(dplyr)
library(purrr)
library(tidyr)
source_data %>%
transmute(id, date = map2(start, end, seq, by = '1 day')) %>%
unnest(c(date))
# A tibble: 12 x 2
# id date
# <int> <date>
# 1 1 2020-04-04
# 2 1 2020-04-05
# 3 1 2020-04-06
# 4 1 2020-04-07
# 5 1 2020-04-08
# 6 2 2020-04-02
# 7 2 2020-04-03
# 8 2 2020-04-04
# 9 2 2020-04-05
#10 3 2020-04-03
#11 3 2020-04-04
#12 3 2020-04-05
Or using data.table
library(data.table)
setDT(source_data)[, .(date = seq(start, end, by = '1 day')), by = id]
Additional option with base R
lst1 <- Map(seq, source_data$start, source_data$end, MoreArgs = list(by = '1 day'))
data.frame(id = rep(source_data$id, lengths(lst1)), date = do.call(c, lst1))
Another base R solution
result <- do.call(rbind,
c(make.row.names = FALSE,
lapply(split(source_data,source_data$id),
function(v) with(v,data.frame(id = id, date = seq(start,end,by = 1))))))
which yields
> result
id date
1 1 2020-04-04
2 1 2020-04-05
3 1 2020-04-06
4 1 2020-04-07
5 1 2020-04-08
6 2 2020-04-02
7 2 2020-04-03
8 2 2020-04-04
9 2 2020-04-05
10 3 2020-04-03
11 3 2020-04-04
12 3 2020-04-05
additional option
library(dplyr)
source_data %>%
rowwise() %>%
mutate(out = list(seq.Date(start, end, "day"))) %>%
unnest(out) %>%
select(-c(start, end))

Efficient manipulation and extraction of data from multiple matrices - means and dates

I have a series of large matrices and I am just getting used to navigating them in this format and working with functions.
I have minute data for a number of parameters which i have been able to reduce to daily averages - i would like to align each mean output with a date sequence and from there extract the daily average for each year.
In the singular form i have done it like this
A <- matrix(c(1:3285),nrow=3)
AA <- sapply(1:1095, function(x) mean(A [,x], na.rm = TRUE))
D <- seq(from = as.Date("2013-01-01"), to = as.Date("2015-12-31"), by= 1)
df <- cbind.data.frame(D,AA)
Which gets me the means per column aligned to a date for 2013-2015
library(lubridate)
years <- year(as.Date(df$D, "%d-%b-%y"))
day <- yday(as.Date(df$D, "%d-%b-%y"))
#to get the average of DOY over three years
avg <- as.data.frame(tapply(df$AA,day, mean, na.rm=T)) #gives average value on day of year
#Average for specific DOY for each year
av <- as.data.frame(tapply(df$AA,list(day,years), mean, na.rm=T)) #gets the DOY average per year
#bind to get yearly averages and overall average in a data frame format
DF <- cbind(av,avg)
head(DF)
colnames(DF)[4] <- "avg" #rename ts average column
Now say i have multiple matrices (all the same dimension just different parameters) that i want to do this for... is there an efficient way to loop through this so i get a data frame (DF) output for each A-C?
#extra matrices to play with:
B <- matrix(c(3285:6570),nrow=3)
C <- matrix(c(6570:9855),nrow=3)
I have gotten thus far with some initial help on stackoverflow:
#column means for each matrices
vapply(list(A, B, C), colMeans, numeric(1095))
Here's a tidyverse solution. Let
dates <- seq(from = as.Date("2013-01-01"), to = as.Date("2015-12-31"), by = 1)
A <- data.frame(matrix(c(1:3285), ncol = 3, byrow = TRUE))
since I understand that dates are the same to all the matrices. Also, I made A long rather than wide, that's better when working with tidyverse. Then perhaps you would prefer the output in the form of
A %>% group_by(year = year(dates), day = yday(dates)) %>%
summarise(dayYearAvg = mean(c(X1, X2, X3))) %>%
group_by(day) %>% mutate(dayAvg = mean(dayYearAvg))
# A tibble: 1,095 x 4
# Groups: day [365]
# year day dayYearAvg dayAvg
# <dbl> <dbl> <dbl> <dbl>
# 1 2013 1 2 1097
# 2 2013 2 5 1100
# 3 2013 3 8 1103
# ...
If not, we get the same as in your example with
A %>% group_by(year = year(dates), day = yday(dates)) %>%
summarise(dayYearAvg = mean(c(X1, X2, X3))) %>%
group_by(day) %>% mutate(dayAvg = mean(dayYearAvg)) %>%
spread(year, dayYearAvg) %>% ungroup %>% select(-day)
# A tibble: 365 x 4
# dayAvg `2013` `2014` `2015`
# <dbl> <dbl> <dbl> <dbl>
# 1 1097 2 1097 2192
# 2 1100 5 1100 2195
# 3 1103 8 1103 2198
# 4 1106 11 1106 2201
# ...
Now let also
B <- data.frame(matrix(c(3285:6569), ncol = 3, byrow = TRUE))
C <- data.frame(matrix(c(6570:9854), ncol = 3, byrow = TRUE))
l <- list(A, B, C)
This gives
map(l, . %>% group_by(year = year(dates), day = yday(dates)) %>%
summarise(dayYearAvg = mean(c(X1, X2, X3))) %>%
group_by(day) %>% mutate(dayAvg = mean(dayYearAvg)) %>%
spread(year, dayYearAvg) %>% ungroup %>% select(-day))
# [[1]]
# A tibble: 365 x 4
# dayAvg `2013` `2014` `2015`
# <dbl> <dbl> <dbl> <dbl>
# 1 1097 2 1097 2192
# 2 1100 5 1100 2195
# ...
# [[2]]
# A tibble: 365 x 4
# dayAvg `2013` `2014` `2015`
# <dbl> <dbl> <dbl> <dbl>
# 1 4381 3286 4381 5476
# 2 4384 3289 4384 5479
# ...
# [[3]]
# A tibble: 365 x 4
# dayAvg `2013` `2014` `2015`
# <dbl> <dbl> <dbl> <dbl>
# 1 7666 6571 7666 8761
# 2 7669 6574 7669 8764
# ...
Here's a tinyverse solution (i.e., no third-party packages) that wraps your process in a function to receive a matrix as input and return data frame as output. Then run lapply on a list of matrices.
df_process <- function(mat) {
# CREATE DF AND ADD NEW COLUMNS
df <- within(data.frame(D=seq(from = as.Date("2013-01-01"),
to = as.Date("2015-12-31"), by= 1),
AA=sapply(1:1095, function(x) mean(mat[,x], na.rm=TRUE))),
{
year <- format(as.Date(df$D, origin="1970-01-01"), "%Y")
day <- format(as.Date(df$D, origin="1970-01-01"), "%d")
})
# CREATE DF WITH TAPPLY CALLS, RENAME COLUMNS
df <- setNames(data.frame(tapply(df$AA,list(day,years), mean, na.rm=T),
avg = c(tapply(df$AA, day, mean, na.rm=T))),
c("2013", "2014", "2015", "avg"))
}
A <- matrix(c(1:3285),nrow=3)
B <- matrix(c(3286:6570),nrow=3)
C <- matrix(c(6571:9855),nrow=3)
# NAMED LIST OF DATA FRAMES
DF_list <- setNames(lapply(list(A, B, C), df_process), c("A", "B", "C"))
all.equal(DF, DF_list$A)
# [1] TRUE
identical(DF, DF_list$A)
# [1] TRUE
Output
lapply(DF_list, head)
# $A
# 2013 2014 2015 avg
# 01 501.5 1596.5 2691.5 1596.5
# 02 504.5 1599.5 2694.5 1599.5
# 03 507.5 1602.5 2697.5 1602.5
# 04 510.5 1605.5 2700.5 1605.5
# 05 513.5 1608.5 2703.5 1608.5
# 06 516.5 1611.5 2706.5 1611.5
# $B
# 2013 2014 2015 avg
# 01 3786.5 4881.5 5976.5 4881.5
# 02 3789.5 4884.5 5979.5 4884.5
# 03 3792.5 4887.5 5982.5 4887.5
# 04 3795.5 4890.5 5985.5 4890.5
# 05 3798.5 4893.5 5988.5 4893.5
# 06 3801.5 4896.5 5991.5 4896.5
# $C
# 2013 2014 2015 avg
# 01 7071.5 8166.5 9261.5 8166.5
# 02 7074.5 8169.5 9264.5 8169.5
# 03 7077.5 8172.5 9267.5 8172.5
# 04 7080.5 8175.5 9270.5 8175.5
# 05 7083.5 8178.5 9273.5 8178.5
# 06 7086.5 8181.5 9276.5 8181.5

Expand start and end dates into a sequence of beginning and ending dates by calendar month

Given a table
id start end
1 22/03/2016 05/06/2016
2 17/08/2016 29/08/2016
3 22/09/2017 25/12/2017
I'm trying to split by Calendar month as the following table
id start end
1 22/03/2016 31/03/2016
1 01/04/2016 30/04/2016
1 01/05/2016 05/06/2016
2 17/08/2016 29/08/2016
3 22/09/2017 30/09/2017
3 01/10/2017 31/10/2017
3 01/11/2017 30/11/2017
3 01/12/2017 25/12/2017
I'm trying to modify a code extract from how to split rows of a dataframe in multiple rows based on start date and end date? , but I am not being able to modify correctly the code. The problem is generally in the months with 30 days, and maybe is easy but I am not still familiarized with regular expressions.
#sample data
df <- data.frame("starting_date" = as.Date(c("2016-03-22", "2016-08-17", "2017-09-12")),
"end_date" = as.Date(c("2016-06-05", "2016-08-29", "2017-12-25")),
col3=c('1','2', '3'))
df1 <- df[,1:2] %>%
rowwise() %>%
do(rbind(data.frame(matrix(as.character(c(
.$starting_date,
seq(.$starting_date, .$end_date, by=1)[grep("\\d{4}-\\d{2}-31|\\d{4}-\\d{2}-01", seq(.$starting_date, .$end_date, by=1))],
.$end_date)), ncol=2, byrow=T))
)
) %>%
data.frame() %>%
`colnames<-`(c("starting_date", "end_date")) %>%
mutate(starting_date= as.Date(starting_date, format= "%Y-%m-%d"),
end_date= as.Date(end_date, format= "%Y-%m-%d"))
#add temporary columns to the original and expanded date column dataframes
df$row_idx <- seq(1:nrow(df))
df$temp_col <- (year(df$end_date) - year(df$starting_date)) +1
df1 <- cbind(df1,row_idx = rep(df$row_idx,df$temp_col))
#join both dataframes to get the final result
final_df <- left_join(df1,df[,3:(ncol(df)-1)],by="row_idx") %>%
select(-row_idx)
final_df
If anyone knows how to modify the code or a better way to do it I will be very grateful.
We assume there is an error in the sample output in the question since the third row spans parts of two months and so should be split into two rows.
Define Seq which given one start and end Date variables produces a data.frame of start and end columns and then run it on each id using group_by:
library(dplyr)
library(zoo)
Seq <- function(start, end) {
ym <- seq(as.yearmon(start), as.yearmon(end), 1/12)
starts <- pmax(start, as.Date(ym, frac = 0))
ends <- pmin(end, as.Date(ym, frac = 1))
unique(data.frame(start = starts, end = ends))
}
fmt <- "%d/%m/%Y"
DF %>%
mutate(start = as.Date(start, fmt), end = as.Date(end, fmt)) %>%
group_by(id) %>%
do(Seq(.$start, .$end)) %>%
ungroup
giving:
# A tibble: 9 x 3
id start end
<int> <date> <date>
1 1 2016-03-22 2016-03-31
2 1 2016-04-01 2016-04-30
3 1 2016-05-01 2016-05-31
4 1 2016-06-01 2016-06-05
5 2 2016-08-17 2016-08-29
6 3 2017-09-22 2017-09-30
7 3 2017-10-01 2017-10-31
8 3 2017-11-01 2017-11-30
9 3 2017-12-01 2017-12-25
Note
The input DF in reproducible form:
Lines <- "
id start end
1 22/03/2016 05/06/2016
2 17/08/2016 29/08/2016
3 22/09/2017 25/12/2017"
DF <- read.table(text = Lines, header = TRUE)
So there's a probably a more elegant way to accomplish this and I feel like I've seen similar questions, but could not find a duplicate quickly, so here goes...
SETUP
library(tidyverse)
library(lubridate)
df <- data.frame(
id = c('1', '2', '3'),
starting_date = as.Date(c("2016-03-22", "2016-08-17", "2017-09-12")),
end_date = as.Date(c("2016-06-05", "2016-08-29", "2017-12-25")),
stringsAsFactors = FALSE
)
df
#> id starting_date end_date
#> 1 1 2016-03-22 2016-06-05
#> 2 2 2016-08-17 2016-08-29
#> 3 3 2017-09-12 2017-12-25
SOLUTION
df %>%
group_by(id) %>%
mutate(
date_seq = list(seq.Date(starting_date, end_date, by = "month") %>% ceiling_date("month") - 1)
) %>%
unnest() %>%
mutate(row = row_number()) %>%
mutate(
new_end_date = if_else(row == max(row), end_date, date_seq),
new_start_date = if_else(row == min(row), starting_date, floor_date(new_end_date, "month"))
) %>%
select(
id, new_start_date, new_end_date
)
#> # A tibble: 8 x 3
#> # Groups: id [3]
#> id new_start_date new_end_date
#> <chr> <date> <date>
#> 1 1 2016-03-22 2016-03-31
#> 2 1 2016-04-01 2016-04-30
#> 3 1 2016-06-01 2016-06-05
#> 4 2 2016-08-17 2016-08-29
#> 5 3 2017-09-12 2017-09-30
#> 6 3 2017-10-01 2017-10-31
#> 7 3 2017-11-01 2017-11-30
#> 8 3 2017-12-01 2017-12-25
EXPLANATION
Much of what's going on here takes place in the first mutate call which creates date_seq. To understand it, consider the following:
seq.Date(ymd("2016-03-22"), ymd("2016-06-05"), by = "month")
# [1] "2016-03-22" "2016-04-22" "2016-05-22"
seq.Date(ymd("2016-03-22"), ymd("2016-06-05"), by = "month") %>%
ceiling_date("month")
# [1] "2016-04-01" "2016-05-01" "2016-06-01"
seq.Date(ymd("2016-03-22"), ymd("2016-06-05"), by = "month") %>%
ceiling_date("month") - 1
# [1] "2016-03-31" "2016-04-30" "2016-05-31"
So basically, create a sequence of "end-of-month" dates between the original start and end dates. Putting this in a list-column allows us to organize by the id so that we unnest appropriately. Checkout the output after the end of the unnest():
df %>%
group_by(id) %>%
mutate(
date_seq = list(seq.Date(starting_date, end_date, by = "month") %>% ceiling_date("month") - 1)
) %>%
unnest()
From there I hope things are relatively straightforward. The row_number probably could have been replaced with something fancier like a first/last, but I thought this might be easier to follow.

Conditional sum with dates in column names

Want to calculate conditional sum based on specified dates in r. My sample df is
start_date = c("7/24/2017", "7/1/2017", "7/25/2017")
end_date = c("7/27/2017", "7/4/2017", "7/28/2017")
`7/23/2017` = c(1,5,1)
`7/24/2017` = c(2,0,2)
`7/25/2017` = c(0,0,10)
`7/26/2017` = c(2,2,2)
`7/27/2017` = c(0,0,0)
df = data.frame(start_date,end_date,`7/23/2017`,`7/24/2017`,`7/25/2017`,`7/26/2017`,`7/27/2017`)
In Excel it looks like:
I want to perform calculations as specified in Column H which is a conditional sum of columns C through G based on the dates specified in columns A and B.
Apparently, Excel allows columns to be dates but not R.
#wide to long format
dat <- reshape(df, direction="long", varying=list(names(df)[3:7]), v.names="Value",
idvar=c("start_date","end_date"), timevar="Date",
times=seq(as.Date("2017/07/23"),as.Date("2017/07/27"), "day"))
#convert from factor to date class
dat$end_date <- as.Date(dat$end_date, format = "%m/%d/%Y")
dat$start_date <- as.Date(dat$start_date, format = "%m/%d/%Y")
library(dplyr)
dat %>% group_by(start_date, end_date) %>%
mutate(mval = ifelse(between(Date, start_date, end_date), Value, 0)) %>%
summarise(conditional_sum=sum(mval))
# # A tibble: 3 x 3
# # Groups: start_date [?]
# start_date end_date conditional_sum
# <date> <date> <dbl>
# 1 2017-07-01 2017-07-04 0
# 2 2017-07-24 2017-07-27 4
# 3 2017-07-25 2017-07-28 12
You could achieve that as follows:
# number of trailing columns without numeric values
c = 2
# create a separate vector with the dates
dates = as.Date(gsub("X","",tail(colnames(df),-c)),format="%m.%d.%Y")
# convert date columns in dataframe
df$start_date = as.Date(df$start_date,format="%m/%d/%Y")
df$end_date = as.Date(df$end_date,format="%m/%d/%Y")
# calculate sum
sapply(1:nrow(df),function(x) {y = df[x,(c+1):ncol(df)][dates %in%
seq(df$start_date[x],df$end_date[x],by="day") ]; ifelse(length(y)>0,sum(y),0) })
returns:
[1] 4 0 12
Hope this helps!
Here's a solution all in one dplyr pipe:
library(dplyr)
library(lubridate)
library(tidyr)
df %>%
gather(date, value, -c(1, 2)) %>%
mutate(date = gsub('X', '', date)) %>%
mutate(date = gsub('\\.', '/', date)) %>%
mutate(date = mdy(date)) %>%
filter(date >= mdy(start_date) & date <=mdy(end_date)) %>%
group_by(start_date, end_date) %>%
summarize(Conditional_Sum = sum(value)) %>%
right_join(df) %>%
mutate(Conditional_Sum = ifelse(is.na(Conditional_Sum), 0, Conditional_Sum)) %>%
select(-one_of('Conditional_Sum'), one_of('Conditional_Sum'))
## start_date end_date X7.23.2017 X7.24.2017 X7.25.2017 X7.26.2017 X7.27.2017 Conditional_Sum
## <fctr> <fctr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 7/24/2017 7/27/2017 1 2 0 2 0 4
## 2 7/1/2017 7/4/2017 5 0 0 2 0 0
## 3 7/25/2017 7/28/2017 1 2 10 2 0 12

Resources