generate seasonal plot, but with fiscal year start/end dates - r

Hello! Is there a way to index a chart to start and end at specific points
(which may be out of numeric order)?
I have data that begins October 1st, and ends September 31st the following year. The series repeats through multiple years past, and i want to build a daily seasonality chart. The challenge is the X axis is not from low to high, it runs 10-11-12-1-2-3-4-5-6-7-8-9.
Question 1:
Can you order the index by month 10-11-12-1-2-3-4-5-6-7-8-9?
while, being compatible with %m-%d formatting, as the real problem is in
daily format, but for the sake of brevity, I am only using months.
the result should look something like this...sorry i had to use excel...
Question 2:
Can we remove the connected chart lines, or will the solution to 1, naturally fix
question 2? examples in the attempts below.
Question 3:
Can the final formatting of the solution allow to take a moving average, or other
mutations of the initial data? The table in attempt #2 would allow to take the average of each month by year. Since July 17 is 6 and July 18 is 12, we would plot a 9 in the chart, ect for the entire plot.
Question 4:
Is there and XTS equivalent to solve this problem?
THANK YOU, THANK YOU, THANK YOU!
library(ggplot2)
library(plotly)
library(tidyr)
library(reshape2)
Date <- seq(as.Date("2016-10-1"), as.Date("2018-09-01"), by="month")
values <- c(2,3,4,3,4,5,6,4,5,6,7,8,9,10,8,9,10,11,12,13,11,12,13,14)
YearEnd <-c(2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,
2018,2018,2018,2018,2018,2018,2018,2018,2018,2018,2018,2018)
df <- data.frame(Date,values,YearEnd)
## PLOT THE TIMESERIES
plot_ly(df, x = ~Date, y = ~values, type = "scatter", mode = "lines")
## PLOT THE DATA BY MONTH: attempt 1
df$Month <- format(df$Date, format="%m")
df2 <- df %>%
select(values, Month, YearEnd)
plot_ly(df2, x = ~Month, y = ~values, type = "scatter", mode = "lines",
connectgaps = FALSE)
## Plot starts on the 10th month, which is good, but the index is
## in standard order, not 10-11-12-1-2-3-4-5-6-7-8-9
## It also still connects the gaps, bad.
## CREATE A PIVOTTABLE: attempt 2
table <- spread(df2,YearEnd, values)
df3 <- melt(table , id.vars = 'Month', variable.name = 'series')
plot_ly(df3, x = ~Month, y = ~values, type = "scatter", mode = "lines",
connectgaps = FALSE)
## now the data are in the right order, but the index is still wrong
## I also do not understand how plotly is ordering it correctly, as 2
## is not the starting point in January.

You just need to set the desired levels for the Month inside factor
library(magrittr)
library(tidyverse)
library(lubridate)
library(plotly)
Date <- seq(as.Date("2016-10-1"), as.Date("2018-09-01"), by = "month")
values <- c(2, 3, 4, 3, 4, 5, 6, 4, 5, 6, 7, 8, 9, 10, 8, 9, 10, 11, 12, 13, 11, 12, 13, 14)
YearEnd <- c(
2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017,
2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018
)
df <- data.frame(Date, values, YearEnd)
# to fiscal year order
df %<>%
mutate(
Month = month(Date),
YearEnd = factor(YearEnd)) %>%
mutate(Month = factor(Month,
levels = c(10:12, 1:9),
labels = c(month.abb[10:12], month.abb[1:9])))
df
#> Date values YearEnd Month
#> 1 2016-10-01 2 2017 Oct
#> 2 2016-11-01 3 2017 Nov
#> 3 2016-12-01 4 2017 Dec
#> 4 2017-01-01 3 2017 Jan
#> 5 2017-02-01 4 2017 Feb
#> 6 2017-03-01 5 2017 Mar
#> 7 2017-04-01 6 2017 Apr
#> 8 2017-05-01 4 2017 May
#> 9 2017-06-01 5 2017 Jun
#> 10 2017-07-01 6 2017 Jul
#> 11 2017-08-01 7 2017 Aug
#> 12 2017-09-01 8 2017 Sep
...
p1 <- ggplot(df, aes(
x = Month, y = values,
color = YearEnd,
group = YearEnd)) +
geom_line() +
theme_classic(base_size = 12)
ggplotly(p1)
Edit: to plot by Julian day, we use a similar method to the 3rd one from this answer
# Generate random data
set.seed(2018)
date = seq(from = as.Date("2016-10-01"), to = as.Date("2018-09-30"),
by = "days")
values = c(rnorm(length(date)/2, 8, 1.5), rnorm(length(date)/2, 16, 2))
dat <- data.frame(date, values)
df <- dat %>%
tbl_df() %>%
mutate(jday = factor(yday(date)),
Month = month(date),
Year = year(date),
# only create label for the 1st day of the month
myLabel = case_when(day(date) == 1L ~ format(date, "%b-%d"),
TRUE ~ NA_character_)) %>%
# create fiscal year column
mutate(fcyear = case_when(Month > 9 ~ as.factor(Year + 1),
TRUE ~ as.factor(Year))) %>%
mutate(Month = factor(Month,
levels = c(10:12, 1:9),
labels = c(month.abb[10:12], month.abb[1:9])))
df
#> # A tibble: 730 x 7
#> date values jday Month Year myLabel fcyear
#> <date> <dbl> <fct> <fct> <dbl> <chr> <fct>
#> 1 2016-10-01 7.37 275 Oct 2016 Oct-01 2017
#> 2 2016-10-02 5.68 276 Oct 2016 <NA> 2017
#> 3 2016-10-03 7.90 277 Oct 2016 <NA> 2017
#> 4 2016-10-04 8.41 278 Oct 2016 <NA> 2017
#> 5 2016-10-05 10.6 279 Oct 2016 <NA> 2017
#> 6 2016-10-06 7.60 280 Oct 2016 <NA> 2017
#> 7 2016-10-07 11.1 281 Oct 2016 <NA> 2017
#> 8 2016-10-08 9.30 282 Oct 2016 <NA> 2017
#> 9 2016-10-09 7.08 283 Oct 2016 <NA> 2017
#> 10 2016-10-10 8.96 284 Oct 2016 <NA> 2017
#> # ... with 720 more rows
# Create a row number for plotting to make sure ggplot plot in
# the exact order of a fiscal year
df1 <- df %>%
group_by(fcyear) %>%
mutate(order = row_number()) %>%
ungroup()
df1
#> # A tibble: 730 x 8
#> date values jday Month Year myLabel fcyear order
#> <date> <dbl> <fct> <fct> <dbl> <chr> <fct> <int>
#> 1 2016-10-01 7.37 275 Oct 2016 Oct-01 2017 1
#> 2 2016-10-02 5.68 276 Oct 2016 <NA> 2017 2
#> 3 2016-10-03 7.90 277 Oct 2016 <NA> 2017 3
#> 4 2016-10-04 8.41 278 Oct 2016 <NA> 2017 4
#> 5 2016-10-05 10.6 279 Oct 2016 <NA> 2017 5
#> 6 2016-10-06 7.60 280 Oct 2016 <NA> 2017 6
#> 7 2016-10-07 11.1 281 Oct 2016 <NA> 2017 7
#> 8 2016-10-08 9.30 282 Oct 2016 <NA> 2017 8
#> 9 2016-10-09 7.08 283 Oct 2016 <NA> 2017 9
#> 10 2016-10-10 8.96 284 Oct 2016 <NA> 2017 10
#> # ... with 720 more rows
# plot with `order` as x-axis
p2 <- ggplot(df1,
aes(x = order, y = values,
color = fcyear,
group = fcyear)) +
geom_line() +
theme_classic(base_size = 12) +
xlab(NULL)
# now replace `order` label with `myLabel` created above
x_break <- df1$order[!is.na(df1$myLabel)][1:12]
x_label <- df1$myLabel[x_break]
x_label
#> [1] "Oct-01" "Nov-01" "Dec-01" "Jan-01" "Feb-01" "Mar-01" "Apr-01"
#> [8] "May-01" "Jun-01" "Jul-01" "Aug-01" "Sep-01"
p3 <- p2 +
scale_x_continuous(
breaks = x_break,
labels = x_label) +
theme(axis.text.x = element_text(angle = 90)) +
scale_color_brewer("Fiscal Year", palette = "Dark2") +
xlab(NULL)
p3
ggplotly(p3)
Created on 2018-09-09 by the reprex package (v0.2.0.9000).

Consider this an appendix to Tung's excellent answer. Here I've made it obvious how to alter the code for different start and end months of financial (or production) years which varies by country (and industry), with the Parameter EndMonth. I've also added an annual average, which seems like a pretty obvious thing to want as well (though outside the OP's request).
library(tidyverse)
library(lubridate)
## Generate random data
set.seed(2018)
date = seq(from = as.Date("2016-06-01"), to = as.Date("2016-06-01")+729,
by = "days") # about 2 years, but even number of days
values = c(rnorm(length(date)/2, 8, 1.5), rnorm(length(date)/2, 16, 2))
dat <- data.frame(date, values)
EndMonth <- 5 #i.e. if last month of financial year is May, use 5 for 5th month of calendar year
df <- dat %>%
tbl_df() %>%
mutate(jday = factor(yday(date)),
Month = month(date),
Year = year(date),
# only create label for the 1st day of the month
myLabel = case_when(day(date) == 1L ~ format(date, "%b%e"),
TRUE ~ NA_character_)) %>%
# create fiscal year column
mutate(fcyear = case_when(Month > EndMonth ~ as.factor(Year + 1),
TRUE ~ as.factor(Year))) %>%
mutate(Month = factor(Month,
levels = c((EndMonth+1):12, 1:(EndMonth)),
labels = c(month.abb[(EndMonth+1):12], month.abb[1:EndMonth])))
df
#make 2 (or n) year average
df_mean <- df %>%
group_by(jday) %>%
mutate(values = mean(values, na.rm=TRUE)) %>%
filter(fcyear %in% c("2017")) %>% #note hard code for first fcyear in dataset
mutate(fcyear = "Average")
#Add average to data frame
df <- bind_rows(df, df_mean)
# Create a row number for plotting to make sure ggplot plot in
# the exact order of a fiscal year
df1 <- df %>%
group_by(fcyear) %>%
mutate(order = row_number()) %>%
ungroup()
df1
# plot with `order` as x-axis
p2 <- ggplot(df1,
aes(x = order, y = values,
color = fcyear,
group = fcyear)) +
geom_line() +
theme_classic(base_size = 12) +
xlab(NULL)
p2
# now replace `order` label with `myLabel` created above
x_break <- df1$order[!is.na(df1$myLabel)][1:12]
x_label <- df1$myLabel[x_break]
x_label
p3 <- p2 +
scale_x_continuous(
breaks = x_break,
labels = x_label) +
theme(axis.text.x = element_text(angle = 90)) +
scale_color_brewer("Fiscal Year", palette = "Dark2") +
xlab(NULL)
p3

Related

Show more digits, tsibble in R

I have read a number of ways to show more digits in R output (to R Studio display) but the methods I have reviewed do not work for a tsibble with a mixture of character and numeric columns. I am able to use the num() function to set the number of digits for an individual numeric column but I would like to show the full tsibble object with character columns intact.
In the example below, I would like the output to be the same as that shown except the x1 and x2 columns should have three digits after the decimal for all rows.
Thank you in advance for your help.
library(tidyverse)
library(tsibble)
data <- tibble(date = seq(as.Date("2022/1/1"), by = "month", length.out = 6),
region = c("A","C","A","B","C","C"),
x1 = c(7.3456, 123.4532, 106.1059, 17.1594, 175.3951, 62.9431),
x2 = c(12.12, 15.29, 27.92, 9.23, 16.29, 13.11))
data <- data %>%
mutate(month = yearmonth(date)) %>%
as_tsibble(index = month)
data
data
# A tsibble: 6 x 5 [1M]
date region x1 x2 month
<date> <chr> <dbl> <dbl> <mth>
1 2022-01-01 A 7.35 12.1 2022 Jan
2 2022-02-01 C 123. 15.3 2022 Feb
3 2022-03-01 A 106. 27.9 2022 Mar
4 2022-04-01 B 17.2 9.23 2022 Apr
5 2022-05-01 C 175. 16.3 2022 May
6 2022-06-01 C 62.9 13.1 2022 Jun
>
As Jon Spring mentioned in the comments mutate(across(where(is.numeric), ~num(.,digits = 3))) does work, the same as it does for tibbles, section Fixed number of digits.
Do note the the print under x1 and x2. It will show num:.3! instead of . But this is just a print from how tibbles are printed. The data in x1 and x2 is still a double.
In your code:
data %>%
mutate(month = yearmonth(date),
across(where(is.numeric), ~num(.,digits = 3))) %>%
as_tsibble(index = month)
# A tsibble: 6 x 5 [1M]
date region x1 x2 month
<date> <chr> <num:.3!> <num:.3!> <mth>
1 2022-01-01 A 7.346 12.120 2022 Jan
2 2022-02-01 C 123.453 15.290 2022 Feb
3 2022-03-01 A 106.106 27.920 2022 Mar
4 2022-04-01 B 17.159 9.230 2022 Apr
5 2022-05-01 C 175.395 16.290 2022 May
6 2022-06-01 C 62.943 13.110 2022 Jun

Is there a way to make aggregate vectors made from fable forecast easier to read? Or rename them?

I'm forecasting hierarchical data with fable that has 2 levels of aggregation (but will have more in the future), and am having trouble knowing which predictions correspond to which series. Here is a simplified version of what I have:
# A fable: 7 x 6 [12M]
# Key: type, name, .model [7]
type name .model date value .mean
<chr*> <chr*> <chr> <mth> <dist> <dbl>
1 x x1 mint 2021 Jan N(20, 0.82) 19.9
2 x x2 mint 2021 Jan N(20, 1.3) 19.9
3 x <aggregated> mint 2021 Jan N(40, 1) 39.8
4 y y1 mint 2021 Jan N(9.7, 1.9) 9.73
5 y y2 mint 2021 Jan N(9.9, 1.7) 9.92
6 y <aggregated> mint 2021 Jan N(20, 3.8) 19.6
7 <aggregated> <aggregated> mint 2021 Jan N(59, 5.9) 59.4
Is there a way to rename the aggregated vectors as I am pivoting and aggregating the table? So it would look something like this:
# A fable: 7 x 6 [12M]
# Key: type, name, .model [7]
type name .model date value .mean
<chr*> <chr*> <chr> <mth> <dist> <dbl>
1 x x1 mint 2021 Jan N(20, 0.82) 19.9
2 x x2 mint 2021 Jan N(20, 1.3) 19.9
3 x x mint 2021 Jan N(40, 1) 39.8
4 y y1 mint 2021 Jan N(9.7, 1.9) 9.73
5 y y2 mint 2021 Jan N(9.9, 1.7) 9.92
6 y y mint 2021 Jan N(20, 3.8) 19.6
7 xy xy mint 2021 Jan N(59, 5.9) 59.4
I can do it manually for one level of aggregation by just renaming all aggregate vectors to what I want, but for two (or more) I'm not sure how to do it. I have tried using the is_aggregated() function but when I have 20 series at the bottom level it becomes very weird to try and find what corresponds to what.
Thanks so much!
Here's a repex
df <- tibble(
date = seq(from = as.Date("2011/1/1"), to = as.Date("2020/1/1"), by = "year"),
x1 = 11:20,
x2 = x1 + rnorm(10),
y1 = 1:10,
y2 = y1 - rnorm(10)
)
df %>%
mutate(date = yearmonth(date)) %>%
as_tsibble(index = date) %>%
pivot_longer(!date) %>%
group_by(name) %>%
mutate(type = case_when(
name %in% c("x1", "x2") ~ "x",
name %in% c("y1", "y2") ~ "y")) %>%
aggregate_key((type / name), value = sum(value)) %>%
model(arima = ARIMA(value)) %>%
reconcile(mint = min_trace(arima, method = "mint_shrink")) %>%
forecast(h = 1) %>%
filter(.model == "mint") %>%
print(n = 7)

How to generate a sequence of numbers increasing at a fixed percentage?

I would like to calculate the predicted value at 2% growth rate over 10 years.
My data looks like this
df <- structure(list(fin_year = c(2016, 2017, 2018, 2019, 2020, 2021
), Total = c(136661.9, 142748.25, 146580.77, 155486.07, 171115.58,
69265.01)), class = "data.frame", row.names = c(NA, -6L))
I would like to add a new column (two_percent) with the calculated amounts based on the 2016 Total value.
I expect the answers to look like this:
I've tried this but can't figure out how to code the script properly to do what I want
df1 <- df %>%
mutate(two_percent = rep(Total[1:1] *1.02))
Your help is much appreciated
The formula is 1.02^n where n is the number of periods. One may need to subtract 1 from n depending on whether the interest is at the beginning or end of the period.
basevalue <- df$Total[1]
df1 <- df %>%
mutate(two_percent = basevalue*1.02^(row_number()-1))
We can use purrr::accumulate to calculate the 2% growth forecast. First let's calculate this for the existing data.frame. We need to supply a vector of 1.02 in the length of one less than the total row number to accumulates .x argument. Further, we need the base value of Total as .init argument (this is the value we want to base the forecast on). The function .f that we then use is just .x * .y.
library(dplyr)
library(purrr)
# Calculate the growth rate for the existing data.frame
df %>%
mutate(two_percent = accumulate(rep(1.02, nrow(.)-1),
~ .x * .y,
.init = first(Total)))
#> fin_year Total two_percent
#> 1 2016 136661.90 136661.9
#> 2 2017 142748.25 139395.1
#> 3 2018 146580.77 142183.0
#> 4 2019 155486.07 145026.7
#> 5 2020 171115.58 147927.2
#> 6 2021 69265.01 150885.8
While this works for the existing data.frame we need a new one, if we want to forecast values for years that the current df doesn't contain. Basically, we use the same approach as above and combine it with a right_join:
# Calculate the growth rate for a 10 year period, and then join
new_df <- tibble(Year = 1:10,
two_percent = df$Total[1]) %>%
mutate(two_percent = accumulate(rep(1.02, nrow(.)-1),
~ .x * .y,
.init = first(two_percent)))
df %>%
mutate(Year = row_number()) %>%
right_join(new_df)
#> Joining, by = "Year"
#> fin_year Total Year two_percent
#> 1 2016 136661.90 1 136661.9
#> 2 2017 142748.25 2 139395.1
#> 3 2018 146580.77 3 142183.0
#> 4 2019 155486.07 4 145026.7
#> 5 2020 171115.58 5 147927.2
#> 6 2021 69265.01 6 150885.8
#> 7 NA NA 7 153903.5
#> 8 NA NA 8 156981.6
#> 9 NA NA 9 160121.2
#> 10 NA NA 10 163323.6
Created on 2022-01-13 by the reprex package (v2.0.1)
Here's another simple method that anchors the base of the Two_percent calculation to the value of Total in the first fin_year using which.min(fin_year)
library(tidyverse)
df <- structure(list(fin_year = c(2016, 2017, 2018, 2019, 2020, 2021
), Total = c(136661.9, 142748.25, 146580.77, 155486.07, 171115.58,
69265.01)), class = "data.frame", row.names = c(NA, -6L))
df %>%
mutate(two_percent = Total[which.min(fin_year)] * 1.02^(seq_along(fin_year)))
#> fin_year Total two_percent
#> 1 2016 136661.90 139395.1
#> 2 2017 142748.25 142183.0
#> 3 2018 146580.77 145026.7
#> 4 2019 155486.07 147927.2
#> 5 2020 171115.58 150885.8
#> 6 2021 69265.01 153903.5
Created on 2022-01-13 by the reprex package (v2.0.1)

Plotting average monthly counts per decade on a plot

I have a data set that has monthly "flows" over 68 years. I am trying to make a comparison of flow distributions by decade by making a plot that has a seasonal distribution on the x-axis and displays a mean value for each decade on the plot.
Using your sample data, and the tidyverse packages, the following code will calculate the average per decade and month:
library(tidyverse)
x <- "Year Jan Feb Mar Apr May Jun Jul Aug Sep
1948 29550 47330 64940 61140 20320 17540 37850 29250 17100
1949 45700 53200 37870 36310 39200 23040 31170 23640 19720
1950 16050 17950 27040 21610 15510 16090 12010 11360 14390
1951 14280 13210 16260 24280 13570 9547 9921 8129 7304
1952 19030 29250 58860 31780 19940 16930 9268 9862 9708
1953 24340 28020 31830 29700 44980 15630 22660 14190 13430
1954 34660 23260 24390 21500 13250 10860 10700 8188 6092
1955 14050 19430 12780 19330 12210 7892 12450 10920 6850
1956 7262 20800 27680 24110 13560 8594 10150 7721 10540
1957 14470 13350 22720 39860 23980 12630 10230 7008 8567"
d <- read_table(x) %>%
mutate(
decade = (Year %/% 10)*10 # add column for decade
) %>%
select(-Year) %>% # remove the year
pivot_longer( # convert to a 'tidy' (long) format
cols = Jan:Sep,
names_to = "month",
values_to = "count"
) %>%
mutate(
month = factor(month, levels = month.abb, ordered = TRUE) # make sure months are ordered
) %>%
group_by(decade, month) %>%
summarise(
mean = mean(count)
)
If you print that dataframe, you get:
> d
# A tibble: 18 x 3
# Groups: decade [2]
decade month mean
<dbl> <ord> <dbl>
1 1940 Jan 37625
2 1940 Feb 50265
3 1940 Mar 51405
4 1940 Apr 48725
5 1940 May 29760
6 1940 Jun 20290
7 1940 Jul 34510
8 1940 Aug 26445
9 1940 Sep 18410
10 1950 Jan 18018.
11 1950 Feb 20659.
12 1950 Mar 27695
13 1950 Apr 26521.
14 1950 May 19625
15 1950 Jun 12272.
16 1950 Jul 12174.
17 1950 Aug 9672.
18 1950 Sep 9610.
If you need it back in wide format:
d2 <- d %>%
pivot_wider(
id_cols = decade,
names_from = month,
values_from = mean
)
> d2
# A tibble: 2 x 10
# Groups: decade [2]
decade Jan Feb Mar Apr May Jun Jul Aug Sep
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1940 37625 50265 51405 48725 29760 20290 34510 26445 18410
2 1950 18018. 20659. 27695 26521. 19625 12272. 12174. 9672. 9610.
(Edit: changed from line graph to dodged bar plot, to better align with OP code.)
Here's an approach using dplyr, tidyr, and ggplot2 from tidyverse.
library(tidyverse)
M %>%
group_by(Decade = floor(Year/10)*10) %>%
summarize_at(vars(Jan:Sep), mean) %>%
# This uses tidyr::pivot_longer to reshape the data longer, which gives us the
# ability to map decade to color.
pivot_longer(-Decade, names_to = "Month", values_to = "Avg") %>%
# This step to get the months to be an ordered factor in order of appearance,
# which is necessary to avoid the months showing up in alphabetical order.
mutate(Month = fct_inorder(Month)) %>%
# Alternatively, we could have aligned these thusly
# mutate(Month_order = match(Month, month.abb)) %>%
# mutate(Month = fct_reorder(Month, Month_order)) %>%
ggplot(aes(Month, Avg, fill = as.factor(Decade))) +
geom_col(position = position_dodge()) +
scale_fill_discrete(name = "Decade")

Use dplyr/tidyr to turn rows into columns in R data frame

I have a data frame like this:
year <-c(floor(runif(100,min=2015, max=2017)))
month <- c(floor(runif(100, min=1, max=13)))
inch <- c(floor(runif(100, min=0, max=10)))
mm <- c(floor(runif(100, min=0, max=100)))
df = data.frame(year, month, inch, mm);
year month inch mm
2016 11 0 10
2015 9 3 34
2016 6 3 33
2015 8 0 77
I only care about the columns year, month, and mm.
I need to re-arrange the data frame so that the first column is the name of the month and the rest of the columns is the value of mm.
Months 2015 2016
Jan # #
Feb
Mar
Apr
May
Jun
Jul
Aug
Sep
Oct
Nov
Dec
So two things needs to happen.
(1) The month needs to become a string of the first three letters of the month.
(2) I need to group by year, and then put the mm values in a column under that year.
So far I have this code, but I can't figure it out:
df %>%
select(-inch) %>%
group_by(month) %>%
summarize(mm = mm) %>%
ungroup()
To convert month to names, you can refer to month.abb; And then you can summarize by year and month, spread to wide format:
library(dplyr)
library(tidyr)
df %>%
group_by(year, month = month.abb[month]) %>%
summarise(mm = mean(mm)) %>% # use mean as an example, could also be sum or other
# intended aggregation methods
spread(year, mm) %>%
arrange(match(month, month.abb)) # rearrange month in chronological order
# A tibble: 12 x 3
# month `2015` `2016`
# <chr> <dbl> <dbl>
# 1 Jan 65.50000 28.14286
# 2 Feb 54.40000 30.00000
# 3 Mar 23.50000 95.00000
# 4 Apr 7.00000 43.60000
# 5 May 45.33333 44.50000
# 6 Jun 70.33333 63.16667
# 7 Jul 72.83333 52.00000
# 8 Aug 53.66667 66.50000
# 9 Sep 51.00000 64.40000
#10 Oct 74.00000 39.66667
#11 Nov 66.20000 58.71429
#12 Dec 38.25000 51.50000

Resources