sliding window with tidyr nest - r

I am using tidyr::nest to deliver a grouped_by table to function boot and boot.ci from boot package in order to calculate mean and confidence interval for a non-parametric statistic. This works fine for non-overlapping groups like below:
library(dplyr)
library(tidyr)
library(purrr)
library(lubridate)
library(broom)
library(boot)
#toy example
set.seed(1)
Sys.setenv(TZ="America/Chicago")
df <- data.frame(date = mdy("01-01-2018")+ddays(sample(0:364,100,replace = T)),
score = sample(0:10,100,replace = T,prob=c(0.15,0.15,rep(0.15/7,7),0.25,0.3)))
# the statistic of interest
net_promoter_score <- function(data,col_name='score') {
return(
(sum(data[[col_name]]>=9,na.rm=TRUE)-
sum(data[[col_name]]<=6,na.rm=TRUE))/sum(!is.na(data[[col_name]]))*100
)
}
# boot needs to resample the staistic by index
nps_boot <- function(d,i) net_promoter_score(d[i,])
#do NPS confidence intervals by month - this works fine!
by_month = df %>%
mutate(month = lubridate::month(date,label=T,abbr=T)) %>%
nest(-month) %>%
mutate(boots = map(data, ~boot::boot(.x,nps_boot,R=4999)),
CI = map(boots, ~boot::boot.ci(.x,conf=0.9)$bca),
tidied_NPS = map(boots,broom::tidy),
tidied_CI = map(CI,broom::tidy)
) %>%
unnest(tidied_NPS,tidied_CI,.drop=T) %>%
select(month,mean=statistic,CI10=V4,CI90=V5)
by_month %>% head
A tibble: 6 x 4
month mean CI10 CI90
<ord> <dbl> <dbl> <dbl>
1 Apr 0 -100 33.3
2 May 6.67 -46.7 33.3
3 Jul 60 -100 60
4 Nov -20 -80 20
5 Mar -11.1 -66.7 33.3
6 Dec 0 -100 50
But I would like to do this for a sliding window - kind of like a moving average except I would like to use a different statistic to slide over. I can do this with lapply but I would like to use tidyverse.
#do 50-sample sliding window. I would like to solve this with tidyverse
window_size = 50
results = lapply(1:(nrow(df)-window_size), function(x) {
boot_df = df %>% arrange(date) %>% slice(x:(x+window_size-1))
boot = boot::boot(boot_df,nps_boot,R=999)
CI = boot.ci(boot,conf=0.9)$bca[4:5]
return(c(x,mean(boot$t),CI))
})
by_slide = as.data.frame(do.call(rbind, results)) %>%
select(date=V1,mean=V2,CI10=V3,CI90=V4) %>%
mutate(date = mdy("01-01-2018")+ddays((window_size %/% 2)+date))
by_slide %>% head
date mean CI10 CI90
1 2018-01-27 15.40541 -8.00000 38
2 2018-01-28 15.94194 -8.00000 36
3 2018-01-29 15.83383 -8.00000 36
4 2018-01-30 15.24525 -8.00000 38
5 2018-01-31 15.79780 -10.00000 36
6 2018-02-01 15.82583 -10.92218 36

You can use purrr::map_dfr():
results <- purrr::map_dfr(1:(nrow(df)-window_size), function(x) {
boot_df = df %>% arrange(date) %>% slice(x:(x+window_size-1))
boot = boot::boot(boot_df,nps_boot,R=999)
CI = boot.ci(boot,conf=0.9)$bca[4:5]
list(date = boot_df$date[1],
mean = mean(boot$t),
ci_lo = CI[1],
ci_hi = CI[2])
})
results
# A tibble: 50 x 4
date mean ci_lo ci_hi
<date> <dbl> <dbl> <dbl>
1 2018-01-05 15.6 -8 38
2 2018-01-09 16.3 -8 36
3 2018-01-22 16.2 -10 36
4 2018-01-23 15.6 -10 36
5 2018-01-26 15.2 -10 36
6 2018-01-31 16.5 -10 36
7 2018-02-06 19.7 -4.75 40
8 2018-02-09 19.5 -8 40
9 2018-02-14 16.3 -10 36
10 2018-02-15 16.1 -10 36
# … with 40 more rows
Then you can use results directly in computing by_slide:
by_slide = results %>%
mutate(date = mdy("01-01-2018") + ddays(window_size %/% 2))
Although I admit I don't understand how adding date in the ddays duration object works, that doesn't seem to come out with your provided output. But I'm assuming that's a syntax issue - separate from your question about how to replace lapply.

Related

Interpolate and insert missing rows into dataframe R

I am working with a large time series of oceanographic data which needs a lot of manipulation.
I have several days of data missing and would like to interpolate them. Specifically date/depth/temperature.
Here is an example of my df:
> tibble(df)
# A tibble: 351,685 x 9
date time depthR SV temp salinity conduct density calcSV
<date> <times> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 2021-11-17 07:50:18 0.5 1524. 19.7 37.8 51.0 27 1524.
2 2021-11-17 07:50:22 0.5 1524. 19.9 37.6 50.9 26.8 1524.
3 2021-11-17 07:50:23 1.1 1524. 19.9 37.6 50.9 26.8 1524.
4 2021-11-17 07:50:24 1.5 1524. 19.9 37.6 50.9 26.8 1524.
5 2021-11-17 07:50:25 2 1524. 19.9 37.6 50.9 26.8 1524.
Each date contains over 1000 lines of data and so my idea was to find the max depth of each day to therefore interpolate reasonable max depth values for the missing days between.
So far, I have found the max depth per date:
group <- df %>% group_by(date) %>% summarise(max =max(depthR, na.rm=TRUE))
> tibble(group)
# A tibble: 40 x 2
date max
<date> <dbl>
1 2021-11-17 685.
2 2021-11-18 695.
3 2021-11-19 136.
4 2021-11-20 138.
5 2021-11-21 142.
6 2021-11-22 26
7 2021-11-23 136.
8 2021-11-24 297.
9 2021-11-25 613.
10 2021-11-26 81.1
# ... with 30 more rows
And then I managed to interpolate the missing dates by:
> group <- seq(min(group$date), max(group$date), by = "1 day")
> group <- data.frame(date=group)
> tibble(group)
# A tibble: 69 x 1
date
<date>
1 2021-11-17
2 2021-11-18
3 2021-11-19
4 2021-11-20
5 2021-11-21
6 2021-11-22
7 2021-11-23
8 2021-11-24
9 2021-11-25
10 2021-11-26
# ... with 59 more rows
As you can see, the previous query was overwritten.
So I tried creating a new df for the interpolated dates and tried merging them together. I got the error:
> library(stringr)
> group$combined <- str_c(group$date, '', dateinterp$date)
Error: Assigned data `str_c(group$date, "", dateinterp$date)` must be compatible with existing data.
x Existing data has 40 rows.
x Assigned data has 69 rows.
i Only vectors of size 1 are recycled.
How can I insert these two matrices of differing length into the dataframe in chronological order without overwriting original data or conflicting?
Following that, I'm not sure how I would proceed to interpolate the depths and temperatures for each date.
Perhaps starting with something like the following:
depth = seq(1, 200, length.out = 100))
Eventually the date variable will be exchanged for geo coords.
Any advice greatly appreciated.
EDIT: As requested by #AndreaM, an example of my data:
> dput(head(df))
structure(list(date = structure(c(18948, 18948, 18948, 18948,
18948, 18948), class = "Date"), time = structure(c(0.326597222222222,
0.326643518518519, 0.326655092592593, 0.326666666666667, 0.326678240740741,
0.326712962962963), format = "h:m:s", class = "times"), depth = c(0.5,
0.5, 1.1, 1.5, 2, 2.5), SV = c(1524.024, 1524.026, 1524.025,
1524.008, 1524.016, 1524.084), temp = c(19.697, 19.864, 19.852,
19.854, 19.856, 19.847), salinity = c(37.823, 37.561, 37.557,
37.568, 37.573, 37.704), conduct = c(51.012, 50.878, 50.86, 50.876,
50.884, 51.032), density = c(27, 26.755, 26.758, 26.768, 26.773,
26.877), calcSV = c(1523.811, 1523.978, 1523.949, 1523.975, 1523.993,
1524.124)), row.names = 100838:100843, class = "data.frame")
one approach, adapt to your case as appropriate:
library(dplyr)
library(lubridate) ## facilitates date-time manipulations
## example data:
patchy_data <- data.frame(date = as.Date('2021-11-01') + sample(1:10, 6),
value = rnorm(12)) %>%
arrange(date)
## create vector of -only!- missing dates:
missing_dates <-
setdiff(
seq.Date(from = min(patchy_data$date),
to = max(patchy_data$date),
by = '1 day'
),
patchy_data$date
) %>% as.Date(origin = '1970-01-01')
## extend initial dataframe with rows per missing date:
full_data <-
patchy_data %>%
bind_rows(data.frame(date = missing_dates,
value = NA)
) %>%
arrange(date)
## group by month and impute missing data from monthwise statistic:
full_data %>%
mutate(month = lubridate::month(date)) %>%
group_by(month) %>%
## coalesce conveniently replaces ifelse-constructs to replace NAs
mutate(imputed = coalesce(.$value, mean(.$value, na.rm = TRUE)))
edit
One possibility to granulate generated data (missing dates) with additional parameters (e. g. measuring depths) is to use expand.grid as follows. Assuming object names from previous code:
## depths of daily measurements:
observation_depths <- c(0.5, 1.1, 1.5) ## example
## generate dataframe with missing dates x depths:
missing_dates_and_depths <-
setNames(expand.grid(missing_dates, observation_depths),
c('date','depthR')
)
## stack both dataframes as above:
full_data <-
patchy_data %>%
bind_rows(missing_dates_and_depths) %>%
arrange(date)

Moving window regression on time series with defined starting point each day

I am trying to fit linear models to a time-series where the regression begins at midnight each day and uses all data until 0600 the following morning (covering a total of 30 hrs). I want to do this for every day in the time-series, and this also needs to be applied by a grouping factor. What I ultimately need is the regression coefficients added to the data frame for the day where the regression started. I am familiar with rolling and window regressions and how to apply functions across groups using dplyr. Where I am struggling is how to code that the regression needs to start at midnight each day. If I were to use a window function, after the first day it would be shifted ahead six hours from midnight and I am not sure how to shift the window back to midnight. Seems like I need to specify a window size and a lag/lead at each iteration but can't visualize how to implement that. Any insight is appreciated.
here is some sample data. I would like to model dv ~ datetime, by = grp
df <- dplyr::arrange(data.frame(datetime = seq(as.POSIXct("2020-09-19 00:00:00"), as.POSIXct("2020-09-30 00:00:00"),"hour"),
grp = rep(c('a', 'b', 'c'), 265),
dv = rnorm(795)),grp, datetime)
We assume that we want each regression to cover 30 rows (except for any stub at the end) and that we should move forward by 24 hours for each regression so that there is one regression per date within grp.
ans <- df %>%
group_by(grp) %>%
group_modify(~ {
r <- rollapplyr(1:nrow(.), 30, by = 24,
function(ix) coef(lm(dv ~ datetime, ., subset = ix)),
align = "left", partial = TRUE)
data.frame(date = head(unique(as.Date(.$datetime)), nrow(r)),
coef1 = r[, 1], coef2 = r[, 2])
}) %>%
ungroup
giving:
> ans
# A tibble: 36 x 4
grp date coef1 coef2
<chr> <date> <dbl> <dbl>
1 a 2020-09-19 -7698. 0.00000481
2 a 2020-09-20 -2048. 0.00000128
3 a 2020-09-21 -82.0 0.0000000514
4 a 2020-09-22 963. -0.000000602
5 a 2020-09-23 2323. -0.00000145
6 a 2020-09-24 5886. -0.00000368
7 a 2020-09-25 7212. -0.00000450
8 a 2020-09-26 -17448. 0.0000109
9 a 2020-09-27 1704. -0.00000106
10 a 2020-09-28 15731. -0.00000982
# ... with 26 more rows
old
After re-reading question I replaced this with the above.
Within group create g which groups the values since the last 6 am and let width be the number of rows since the most recent 6am row. Then run rollapplyr using the width vector to define the widths to regress over.
library(dplyr)
library(zoo)
ans <- df %>%
group_by(grp) %>%
group_modify(~ {
g <- cumsum(format(.$datetime, "%H") == "06")
width = 1:nrow(.) - match(g, g) + 1
r <- rollapplyr(1:nrow(.), width,
function(ix) coef(lm(dv ~ datetime, ., subset = ix)),
partial = TRUE, fill = NA)
mutate(., coef1 = r[, 1], coef2 = r[, 2])
}) %>%
ungroup
giving:
> ans
# A tibble: 795 x 5
grp datetime dv coef1 coef2
<chr> <dttm> <dbl> <dbl> <dbl>
1 a 2020-09-19 00:00:00 -0.560 -0.560 NA
2 a 2020-09-19 01:00:00 -0.506 -24071. 0.0000150
3 a 2020-09-19 02:00:00 -1.76 265870. -0.000166
4 a 2020-09-19 03:00:00 0.0705 -28577. 0.0000179
5 a 2020-09-19 04:00:00 1.95 -248499. 0.000155
6 a 2020-09-19 05:00:00 0.845 -205918. 0.000129
7 a 2020-09-19 06:00:00 0.461 0.461 NA
8 a 2020-09-19 07:00:00 0.359 45375. -0.0000284
9 a 2020-09-19 08:00:00 -1.40 412619. -0.000258
10 a 2020-09-19 09:00:00 -0.446 198902. -0.000124
# ... with 785 more rows
Note
Input used
set.seed(123)
df <- dplyr::arrange(data.frame(datetime = seq(as.POSIXct("2020-09-19 00:00:00"), as.POSIXct("2020-09-30 00:00:00"),"hour"),
grp = rep(c('a', 'b', 'c'), 265),
dv = rnorm(795)),grp, datetime)

Subtracting values from a value equal to specific date

I have a dataset that looks similar to:
Date
Total_Nonfarm
Leisure_and_Hospitality
2020-01-01
300
50
2020-02-01
200
40
2020-03-01
100
15
2020-04-01
75
15
2020-05-01
150
10
I need to calculate the monthly losses of both Nonfarm and Leisure and Hospitality jobs (two new columns) based on the total amount lost since 2020-02-01. So this would not affect rows before 2020-02-01, only after.
I've tried finding a way to print the value for the jobs dependent on date being equal to 2020-02-01, but could not successfully figure it out. I thought perhaps if I could print this value (say Feb2020 <- *the correct function to find that value*, then I could use dplyr to mutate a new column and execute mutate(Total_jobs_lost = Feb2020 - Total_Nonfarm. But I think this would then affect rows prior to 2020-02-01.
I would like the output to look as such:
Date
Total_Nonfarm
Leisure_and_Hospitality
Total_Nonfarm_Losses
LH_Losses
2020-01-01
300
50
NA
NA
2020-02-01
200
40
0
0
2020-03-01
100
15
-100
-25
2020-04-01
75
15
-125
-25
2020-05-01
150
10
-50
-30
Any help would be appreciated. Thanks.
require(tidyverse)
require(lubridate)
(df <- read_csv2(file = "data.csv"))
df$Date <- dmy(df$Date)
df$Total_Nonfarm_Losses <- NA
df$Leisure_and_Hospitality_Losses <- NA
timepoint <- ymd("2020/02/01")
(before <- df %>% filter(Date < timepoint))
(after <- df %>% filter(Date >= timepoint))
(
after
%>% mutate(Feb_Nonfarm = df[df$Date == timepoint, "Total_Nonfarm"] ,
Feb_Leisure = df[df$Date == timepoint, "Leisure_and_Hospitality"],
Total_Nonfarm_Losses = Feb_Nonfarm - Total_Nonfarm,
Leisure_and_Hospitality_Losses = Feb_Leisure - Leisure_and_Hospitality
)
%>% select(1:5)
%>% bind_rows(before, .)
)
And the output:
# A tibble: 5 x 5
Date Total_Nonfarm Leisure_and_Hospi~ Total_Nonfarm_Losses~ Leisure_and_Hospitality_L~
<date> <dbl> <dbl> <dbl> <dbl>
1 2020-01-01 300 50 NA NA
2 2020-02-01 200 40 0 0
3 2020-03-01 100 15 100 25
4 2020-04-01 75 15 125 25
5 2020-05-01 150 10 50 30
library(tidyverse)
data <- read.delim("clipboard")
data$Total_Nonfarm_Losses <- rep("NA", nrow(data))
data$LH_Losses <- rep("NA", nrow(data))
Feb2020 <- filter(data,Date == "2020-02-01")$Total_Nonfarm
Feb2020b <- filter(data,Date == "2020-02-01")$Leisure_and_Hospitality
data[2:5,] <- data[2:5,] %>%
mutate(Total_Nonfarm_Losses = Total_Nonfarm- Feb2020,
LH_Losses = Leisure_and_Hospitality - Feb2020b)
`
Date Total_Nonfarm Leisure_and_Hospitality Total_Nonfarm_Losses LH_Losses
1 2020-01-01 300 50 NA NA
2 2020-02-01 200 40 0 0
3 2020-03-01 100 15 -100 -25
4 2020-04-01 75 15 -125 -25
5 2020-05-01 150 10 -50 -30

Missing data in R - How to skip grouping of days with missing information?

I have hourly values of temperature measurements and I wish to calculate the average per day only for complete (i.e. with 24 measurements) days. Incomplete days would then be summarized as "NA".
I have grouped the values together per year, month and day and call summarize().
I have three month of data missing which appears as a gap in my ggplot function and which is what I want to achieve with the rest. The problem is that when I call summarize() to calculate the mean of my values, days with only 1 or 2 measurements also get called. Only those with all missing values (24) appear as "NA".
Date TempUrb TempRur UHI
1 2011-03-21 22:00:00 10.1 11.67000 -1.570000
2 2011-03-21 23:00:00 9.9 11.67000 -1.770000
3 2011-03-22 00:00:00 10.9 11.11000 -0.210000
4 2011-03-22 01:00:00 10.7 10.56000 0.140000
5 2011-03-22 02:00:00 9.7 10.00000 -0.300000
6 2011-03-22 03:00:00 9.5 10.00000 -0.500000
7 2011-03-22 04:00:00 9.4 8.89000 0.510000
8 2011-03-22 05:00:00 8.4 8.33500 0.065000
9 2011-03-22 06:00:00 8.2 7.50000 0.700000
AvgUHI <- UHI %>% group_by(year(Date), add = TRUE) %>%
group_by(month(Date), add = TRUE) %>%
group_by(day(Date), add = TRUE, .drop = TRUE) %>%
summarize(AvgUHI = mean(UHI, na.rm = TRUE))
# A tibble: 2,844 x 4
# Groups: year(Date), month(Date) [95]
`year(Date)` `month(Date)` `day(Date)` AvgUHI
<int> <int> <int> <dbl>
1476 2015 4 4 0.96625000
1477 2015 4 5 -0.11909722
1478 2015 4 6 -0.60416667
1479 2015 4 7 -0.92916667
1480 2015 4 8 NA
1481 2015 4 9 NA
AvgUHI<- AvgUHI %>% group_by(`year(Date)`, add = TRUE) %>%
group_by(`month(Date)`, add = TRUE) %>%
summarize(AvgUHI= mean(AvgUHI, na.rm = TRUE))
# A tibble: 95 x 3
# Groups: year(Date) [9]
`year(Date)` `month(Date)` AvgUHI
<int> <int> <dbl>
50 2015 4 0.580887346
51 2015 5 0.453815051
52 2015 6 0.008479618
As you can see above on the final table, I have an average for 04-2015, while I am missing data on that month (08 - 09/04/2015 on this example represented on the second table).
The same happens when I calculate AvgUHI and I'm missing hourly data.
I simply would like to see on the last table the AvgUHI for 04-2015 be NA.
E.g: of my graph1
The following will give a dataframe aggregated by day, where only the complete days, with 4 observations, are not NA. Then you can group by month to have the final dataframe.
UHI %>%
mutate(Day = as.Date(Date)) %>%
group_by(Day) %>%
mutate(n = n(), tmpUHI = if_else(n == 24, UHI, NA_real_)) %>%
summarize(AvgUHI = mean(tmpUHI)) %>%
full_join(data.frame(Day = seq(min(.$Day), max(.$Day), by = "day"))) %>%
arrange(Day) -> AvgUHI
For hours look at Rui Barradas' answer. For months the following code worked:
AvgUHI %>%
group_by(year(Day), add = TRUE) %>%
group_by(month(Day), add = TRUE) %>%
mutate(sum = sum(is.na(AvgUHI)), tmpUHI = if_else(sum <= 10, AvgUHI, NA_real_)) %>%
summarise(AvgUHI = mean(tmpUHI, na.rm = TRUE)) -> AvgUHI

Sum between two weeks interval

Suppose I have a daily rain data.frame like this:
df.meteoro = data.frame(Dates = seq(as.Date("2017/1/19"), as.Date("2018/1/18"), "days"),
rain = rnorm(length(seq(as.Date("2017/1/19"), as.Date("2018/1/18"), "days"))))
I'm trying to sum the accumulated rain between a 14 days interval with this code:
library(tidyverse)
library(lubridate)
df.rain <- df.meteoro %>%
mutate(TwoWeeks = round_date(df.meteoro$data, "14 days")) %>%
group_by(TwoWeeks) %>%
summarise(sum_rain = sum(rain))
The problem is that it isn't starting on 2017-01-19 but on 2017-01-15 and I was expecting my output dates to be:
"2017-02-02" "2017-02-16" "2017-03-02" "2017-03-16" "2017-03-30" "2017-04-13"
"2017-04-27" "2017-05-11" "2017-05-25" "2017-06-08" "2017-06-22" "2017-07-06" "2017-07-20"
"2017-08-03" "2017-08-17" "2017-08-31" "2017-09-14" "2017-09-28" "2017-10-12" "2017-10-26"
"2017-11-09" "2017-11-23" "2017-12-07" "2017-12-21" "2018-01-04" "2018-01-18"
TL;DR I have a year long daily rain data.frame and want to sum the accumulate rain for the dates above.
Please help.
Use of round_date in the way you have shown it will not give you 14-day periods as you might expect. I have taken a different approach in this solution and generated a sequence of dates between your first and last dates and grouped these into 14-day periods then joined the dates to your observations.
startdate = min(df.meteoro$Dates)
enddate = max(df.meteoro$Dates)
dateseq =
data.frame(Dates = seq.Date(startdate, enddate, by = 1)) %>%
mutate(group = as.numeric(Dates - startdate) %/% 14) %>%
group_by(group) %>%
mutate(starts = min(Dates))
df.rain <- df.meteoro %>%
right_join(dateseq) %>%
group_by(starts) %>%
summarise(sum_rain = sum(rain))
head(df.rain)
> head(df.rain)
# A tibble: 6 x 2
starts sum_rain
<date> <dbl>
1 2017-01-19 6.09
2 2017-02-02 5.55
3 2017-02-16 -3.40
4 2017-03-02 2.55
5 2017-03-16 -0.12
6 2017-03-30 8.95
Using a right-join to the date sequence is to ensure that if there are missing observation days that spanned a complete time period you'd still get that period listed in the result (though in your case you have a complete year of dates anyway).
round_date rounds to the nearest multiple of unit (here, 14 days) since some epoch (probably the Unix epoch of 1970-01-01 00:00:00), which doesn't line up with your purpose.
To get what you want, you can do the following:
df.rain = df.meteoro %>%
mutate(days_since_start = as.numeric(Dates - as.Date("2017/1/18")),
TwoWeeks = as.Date("2017/1/18") + 14*ceiling(days_since_start/14)) %>%
group_by(TwoWeeks) %>%
summarise(sum_rain = sum(rain))
This computes days_since_start as the days since 2017/1/18 and then manually rounds to the next multiple of two weeks.
Assuming you want to round to the closest date from the ones you have specified I guess the following will work
targetDates<-seq(ymd("2017-02-02"),ymd("2018-01-18"),by='14 days')
df.meteoro$Dates=targetDates[sapply(df.meteoro$Dates,function(x) which.min(abs(interval(targetDates,x))))]
sum_rain=ddply(df.meteoro,.(Dates),summarize,sum_rain=sum(rain,na.rm=T))
as you can see not all dates have the same number of observations. Date "2017-02-02" for instance has all the records between "2017-01-19" until "2017-02-09", which are 22 records. From "2017-02-10" on dates are rounded to "2017-02-16" etc.
This may be a cheat, but assuming each row/observation is a separate day, then why not just group by every 14 rows and sum.
# Assign interval groups, each 14 rows
df.meteoro$my_group <-rep(1:100, each=14, length.out=nrow(df.meteoro))
# Grab Interval Names
my_interval_names <- df.meteoro %>%
select(-rain) %>%
group_by(my_group) %>%
slice(1)
# Summarise
df.meteoro %>%
group_by(my_group) %>%
summarise(rain = sum(rain)) %>%
left_join(., my_interval_names)
#> Joining, by = "my_group"
#> # A tibble: 27 x 3
#> my_group rain Dates
#> <int> <dbl> <date>
#> 1 1 3.86 2017-01-19
#> 2 2 -0.581 2017-02-02
#> 3 3 -0.876 2017-02-16
#> 4 4 1.80 2017-03-02
#> 5 5 3.79 2017-03-16
#> 6 6 -3.50 2017-03-30
#> 7 7 5.31 2017-04-13
#> 8 8 2.57 2017-04-27
#> 9 9 -1.33 2017-05-11
#> 10 10 5.41 2017-05-25
#> # ... with 17 more rows
Created on 2018-03-01 by the reprex package (v0.2.0).

Resources