Adding vline to time-series ggplot collapses X asis? - r

I have ts data that I draw with quarterly legend:
z <- as.zoo(my_data)
breaks <- seq(min(time(z)), max(time(z)), .25);
autoplot(z, geom="line",ylim=c(0,75)) + scale_x_yearqtr(breaks = breaks, format = "%yQ%q")
I'd like to draw a vertical line at a predetermined position (let's say at 1975-08-01).
The problem is, then I add "+ geom_vline", I get a really weird collapsed chart. Obviously, I have no true idea what I'm doing but I've tried this:
+ geom_vline(xintercept=as.Date("1975-08-01"))
+ geom_vline(xintercept=as.numeric(as.Date("1975-08-01")))
and as offsets (not sure how it goes):
+ geom_vline(xintercept=as.numeric(z[c(10,11)]))
+ geom_vline(xintercept=as.numeric(z[10]))
+ geom_vline(xintercept=3)
This is what happens (without geom_vline it is OK):
How can I put an offset ("draw vline at datapoint X") or data ("1975-08-01")?
What am I doing wrong?
Adding some data.
dput(z)
structure(c(NA, NA, NA, 56.0775, 58.53, 58.17, 61.5025, 57.71,
56.5075, 53.9375, 47.345, 48.6975, 53.15, 60.3125, 60.2, 65.1025,
63.445, 57.86, 62.1225, 62.19, 64.075, 71.7725, 69.565, 63.4575000000001,
59.2175, 53.8525, 53.4175, 50.1475, 50.9, 50.0675, 52.6925, 59.9325,
59.8625, 61.8375, 57.655, 50.23, 47.8775, 39.5475, 40.1375, 43.2075,
44.885, 48.115), index = structure(c(1974, 1974.08333333333,
1974.16666666667, 1974.25, 1974.33333333333, 1974.41666666667,
1974.5, 1974.58333333333, 1974.66666666667, 1974.75, 1974.83333333333,
1974.91666666667, 1975, 1975.08333333333, 1975.16666666667, 1975.25,
1975.33333333333, 1975.41666666667, 1975.5, 1975.58333333333,
1975.66666666667, 1975.75, 1975.83333333333, 1975.91666666667,
1976, 1976.08333333333, 1976.16666666667, 1976.25, 1976.33333333333,
1976.41666666667, 1976.5, 1976.58333333333, 1976.66666666667,
1976.75, 1976.83333333333, 1976.91666666667, 1977, 1977.08333333333,
1977.16666666667, 1977.25, 1977.33333333333, 1977.41666666667
), class = "yearmon"), frequency = 12, class = c("zooreg", "zoo"))

The index class of z has class `"yearmon"
class(index(z))
## [1] "yearmon"
so the xintercept= should be specified consistently, i.e. also as a "yearmon" object:
p <- autoplot(z, ylim=c(0,75)) +
scale_x_yearqtr(breaks = breaks, format = "%yQ%q")
p + geom_vline(xintercept = as.yearmon("1975-08"))
Any other valid specification of a "yearmon" object would work as well, e.g.
p + geom_vline(xintercept = as.yearmon(1975 + (8-1) / 12))
p + geom_vline(xintercept = as.yearmon(as.Date("1975-08-01")))

As bVa pointed out, I used dput to see the format of index.
As dates are stored in decimal, the solution is to use simple decimal value. 1975.67 for aug 1975.
geom_vline(xintercept=as.numeric(1975.67))

Related

Decimal digits in `Slope Graph` with `ggplot2`

Following a former question I opened few weeks ago:
Slope Chart - ggplot2
I face another issue, concerning the numeric values reported in the graph. Even specifying the decimal digits I need (exactly 3) with any of the two commands:
y=round(y, digit = 3) at the endof the code
or
options(digits=3) at the beginning of the whole code
The graphical output, doesn't give me the desired number of digits but only concerning 0. In the graph, I wanted to have 0.800 (not 0.8) and 0.940 (not 0.94). It looks like it removes 0. Below the graphical output from R, I circled in red the number I intended to change.
Below the whole code:
library(dplyr)
library(ggplot2)
#options(digits=3)
theme_set(theme_classic())
#### Data
df <- structure(list(group = c("Ups", "Ups", "Ups", "Ups", "Ups"),
yshift = c(0, 0, 0, 0, 0), x = structure(1:5, .Label = c("1 day",
"2 days", "3 days", "5 days", "7 days"), class = "factor"),
y = c(0.108, 0.8, 0.94, 1.511, 1.905), ypos = c(0.10754145,
0.8, 0.94, 1.5111111, 1.90544651164516)), row.names = c(1L,
3L, 5L, 7L, 9L), class = "data.frame")
# Define functions. Source: https://github.com/jkeirstead/r-slopegraph
plot_slopegraph <- function(df) {
ylabs <- subset(df, x==head(x,1))$group
yvals <- subset(df, x==head(x,1))$ypos
fontSize <- 3
gg <- ggplot(df,aes(x=x,y=ypos)) +
geom_line(aes(group=group),colour="grey80") +
geom_point(colour="white",size=8) +
geom_text(aes(label=y), size=fontSize, family="American Typewriter") +
scale_y_continuous(name="", breaks=yvals, labels=ylabs)
return(gg)
}
## Plot
plot_slopegraph(df) + labs(title="Monomer content after days of heating")
I am making any mistake or missing something? Is there any other way to force 0 digits?
Thank you in advance for every eventual reply or comment.
I like the scales package functions for things like this (though you could certainly use formatC or sprintf instead).
I've modified plot_slopegraph to use label=scales::label_number(accuracy = 0.001)(y)) in the geom_text():
plot_slopegraph <- function(df) {
ylabs <- subset(df, x==head(x,1))$group
yvals <- subset(df, x==head(x,1))$ypos
fontSize <- 3
gg <- ggplot(df,aes(x=x,y=ypos)) +
geom_line(aes(group=group),colour="grey80") +
geom_point(colour="white",size=8) +
geom_text(aes(label=scales::label_number(accuracy = 0.001)(y)), size=fontSize, family="American Typewriter") +
scale_y_continuous(name="", breaks=yvals, labels=ylabs)
return(gg)
}
plot_slopegraph(df)

Scatter plot with ggplot2 colored by specific dates interval in r

I'm trying to assign different colors to the scatterplot based on their dates, more specifically the year.
This is how my dataset looks like:
> dput(head(CORt_r100_stack_join_fspec,10))
structure(list(Date = structure(c(16779, 16834, 16884, 16924,
16973, 16997, 17031, 17184, 17214, 17254), class = "Date"), meanNDVIN_int = c(0.677501157246889,
0.632728796482024, 0.578636981692124, 0.547002029242488, 0.632635423362751,
NA, 0.699596252720458, 0.670059391804396, 0.643347941166436,
0.674034259709311), meanNDVIW_int = c(0.784142418592418, 0.652437451242156,
0.648319814752948, 0.593432266488189, 0.767890365415717, NA,
0.779249089832163, 0.71974944410843, 0.715777992826006, 0.685045115352089
), meanNDVIE_int = c(0.703614512017928, 0.701963337684803, 0.488628353756438,
0.631309466083632, 0.781589421376217, NA, 0.799663418920722,
0.78910564747191, 0.710962969930836, 0.715644011856453), meanNDVINr_int_f = c(0.677501157246889,
0.632728796482024, 0.578636981692124, 0.547002029242488, 0.632635423362751,
0.687343078509066, 0.699596252720458, 0.670059391804396, 0.643347941166436,
0.674034259709311), meanNDVIWr_int_f = c(0.784142418592418, 0.652437451242156,
0.648319814752948, 0.593432266488189, 0.767890365415717, 0.749505859407419,
0.779249089832163, 0.71974944410843, 0.715777992826006, 0.685045115352089
), meanNDVIEr_int_f = c(0.703614512017928, 0.701963337684803,
0.488628353756438, 0.631309466083632, 0.781589421376217, 0.625916155640988,
0.799663418920722, 0.78910564747191, 0.710962969930836, 0.715644011856453
), NDVI_N = c(0.17221248, 0.644239685, 0.57222623, 0.558666635,
0.51654034, 0.42053949, 0.396706695, 0.641767447, 0.641008268,
0.662841949), NDVI_W = c(0.08182944, 0.69112807, 0.637699375,
0.629429605, 0.658829525, 0.60621678, 0.57186129, 0.72636742,
0.724193596, 0.738424976), NDVI_E = c(0.17135712, 0.659222803,
0.58665977, 0.573081253, 0.533498035, 0.437643585, 0.412841468,
0.652057206, 0.651854988, 0.670345511), NDVI_U = c(0.40520304,
0.578414833, 0.455746833, 0.428289893, 0.208847548, 0, 0, 0.475193691,
0.478691084, 0.505043773)), row.names = c(NA, 10L), class = "data.frame")
I've been plotting meanNDVIN_int against NDVI_N using this code:
ggplot(CORt_r100_join_fspec_2NDVIday,aes(x=NDVI_N)) +
geom_point(aes(y=meanNDVIN_int), colour="red")
theme_bw()+
ylab("meanNDVIN_int")+
xlab("NDVI_N")
Now I want to color each point differently (no matter the color) based on their year, 2015, 2016, and 2017.
I've used the scale_color_manual function to introduce the dates but no success so far.
Any help will be much appreciated.
Here is an alternative where you substring the first 4 characters from Date in color
df
ggplot(df,aes(x=NDVI_N)) +
geom_point(aes(y=meanNDVIN_int, color=substring(Date,1,4))) +
labs(color="Year")+
theme_bw()+
ylab("meanNDVIN_int")+
xlab("NDVI_N")
I created a year variable with lubridate and stored it asfactor for discrete colouring. You were just missing moving color inside the aes() to color it by year.
# Add year Variable;
CORt_r100_stack_join_fspec <- CORt_r100_stack_join_fspec %>% mutate(
year = as.factor(lubridate::year(Date))
)
# Plot;
ggplot(CORt_r100_stack_join_fspec,aes(x=NDVI_N)) +
geom_point(aes(y=meanNDVIN_int, color = year)) +
theme_bw() +
ylab("meanNDVIN_int")+
xlab("NDVI_N")
Note: The data you provided, and named is not the same as in your plot-call. So I changed CORt_r100_join_fspec_2NDVIday to CORt_r100_join_fspec_2NDVIday to make the plot and mutate function properly.

ggplot2: plot seconds in %H%M%S format in y axi

I've a column in seconds that need to plot as "%H%M%S".
I've tried using lubridate pkg, but the column results in:
loadtime_dfs$avgPageLoadTime <- seconds_to_period(loadtime_df$avgPageLoadTime)
Formal class 'Period' [package "lubridate"]
that I can plot but doesn't show any format.
loadtime_df <- structure(list(date = structure(c(17766, 17767, 17768, 17769,
17770, 17771), class = "Date"), pagePath = c("/webapp/wcs/stores/servlet/CategoryDisplay?urlRequestType=Base&catalogId=3074457345616676668&categoryId=3074457345616676994&pageView=grid&urlLangId=-24&beginIndex=0&langId=-24&top_category=3074457345616676981&parent_category_rn=3074457345616720192&storeId=10151",
"/webapp/wcs/stores/servlet/CategoryDisplay?urlRequestType=Base&catalogId=3074457345616676668&categoryId=3074457345616676994&pageView=grid&urlLangId=-24&beginIndex=0&langId=-24&top_category=3074457345616676981&parent_category_rn=3074457345616720192&storeId=10151",
"/webapp/wcs/stores/servlet/CategoryDisplay?urlRequestType=Base&catalogId=3074457345616676668&categoryId=3074457345616676994&pageView=grid&urlLangId=-24&beginIndex=0&langId=-24&top_category=3074457345616676981&parent_category_rn=3074457345616720192&storeId=10151",
"/webapp/wcs/stores/servlet/CategoryDisplay?urlRequestType=Base&catalogId=3074457345616676668&categoryId=3074457345616676994&pageView=grid&urlLangId=-24&beginIndex=0&langId=-24&top_category=3074457345616676981&parent_category_rn=3074457345616720192&storeId=10151",
"/webapp/wcs/stores/servlet/CategoryDisplay?urlRequestType=Base&catalogId=3074457345616676668&categoryId=3074457345616676994&pageView=grid&urlLangId=-24&beginIndex=0&langId=-24&top_category=3074457345616676981&parent_category_rn=3074457345616720192&storeId=10151",
"/webapp/wcs/stores/servlet/CategoryDisplay?urlRequestType=Base&catalogId=3074457345616676668&categoryId=3074457345616676994&pageView=grid&urlLangId=-24&beginIndex=0&langId=-24&top_category=3074457345616676981&parent_category_rn=3074457345616720192&storeId=10151"
), pageviews = c(245L, 225L, 194L, 214L, 214L, 213L), pageLoadTime = c(18965L,
185834L, 31115L, 114561L, 88807L, 0L), avgPageLoadTime = c(6,
27, 16, 138, 144, 0), bouncerate = c(5.63380281690141, 3.48837209302326,
5.40540540540541, 7.01754385964912, 0, 5), mes = c("agosto",
"agosto", "agosto", "agosto", "agosto", "agosto")), .Names = c("date",
"pagePath", "pageviews", "pageLoadTime", "avgPageLoadTime", "bouncerate",
"mes"), row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame"
))
This is what I need to plot:
ggplot(loadtime_df, aes(date,avgPageLoadTime)) +
geom_point() +
geom_smooth()
But with the Y axis with breaks: "00:01:00", "00:02:00", "00:03:00", "00:04:00", "00:05:00".
You are going to have to provide strings for ggplot to assign as labels.
If you read ?scale_y_continuous(labels=...), you'll see that labels= takes either waiver(), character, or a function. If you want to specify the specific locations and representations, then you'll want to specific both breaks= and labels=. However, typically you want ggplot2 to determine where to put the axis labels, so we'll provide a function that takes a value and returns a string.
I'm guessing there's a helper function somewhere to do this, but here's a base-R version. (The origin of the function is unimportant, since we can replace our function with another with likely the same outcome.)
This formatting function cheats a little by temporarily converting the seconds of avgPageLoadTime to POSIXct and then to a string. Doing it this way means it honors options("digits.secs"), if set.
fmt_hms <- function(x, digits.secs=NULL) {
if (!is.null(digits.secs)) {
oopts <- options(digits.secs = digits.secs)
on.exit(options(oopts), add=TRUE)
}
format(as.POSIXct(x, origin="1970-01-01 00:00:00"), format="%H:%M:%OS", tz="UTC")
}
In order to demonstrate this, I'll change one of the values of your data:
loadtime_df$avgPageLoadTime[3] <- loadtime_df$avgPageLoadTime[3] + 0.123456
fmt_hms(loadtime_df$avgPageLoadTime)
# [1] "00:00:06" "00:00:27" "00:00:16" "00:02:18" "00:02:24" "00:00:00"
fmt_hms(loadtime_df$avgPageLoadTime, digits.secs=3)
# [1] "00:00:06.000" "00:00:27.000" "00:00:16.123" "00:02:18.000" "00:02:24.000"
# [6] "00:00:00.000"
So we can just provide this function:
library(ggplot2)
ggplot(loadtime_df, aes(date,avgPageLoadTime)) +
geom_point() +
geom_smooth() +
scale_y_continuous(labels=fmt_hms)
I think you need to convert the date values in seconds to a %H%m%s format and then try plotting. I think you need one of the below approaches -
library(ggplot2)
library(lubridate)
# convert seconds to periods
td <- seconds_to_period(loadtime_df$avgPageLoadTime)
# then apply the required format
avgPageLoadTime_vector <- sprintf('%02d:%02d:%02d', td#hour, minute(td),
second(td))
# plotting using %H%m%s we can use them as y-axis ticks
# this will give you the same plot as above but Y-axis is fuzzy
ggplot(loadtime_df, aes(date,avgPageLoadTime)) +
geom_point() +
geom_smooth() +
scale_y_continuous(breaks = loadtime_df$avgPageLoadTime,
labels = avgPageLoadTime_vector)
# if you just want to plot with points and not use geom_smooth
# convert the column avgPageLoadTime into %H%m%s date-time format
loadtime_df$avgPageLoadTime <- avgPageLoadTime_vector
# this gives you the right Y-axis values but no smoothing
ggplot(loadtime_df, aes(date,avgPageLoadTime)) +
geom_point()
]2

How draw a loess line in ts plot

I tried hours to figure out how I can make my loess line work. The problem is I do not know much (lets say near nothing). I only have to use R for one course in university.
I created a fake table the real table is for download here
I have to make a timeline plot that worked surprisingly well. But now I have to add two loess lines with different spans. My Problem is I don't know how the command really works. I mean I know it should be something like loess(..~.., data=..). The step where I'm stuck is marked with "WHAT BELONGS HERE" in the given code below.
table <- structure(list(
Months = c("1980-06", "1980-07", "1980-08", "1980-09",
"1980-10", "1980-11", "1980-12", "1981-01"),
Total = c(75000, 70000, 60000, 73000, 72000, 71000, 76000, 71000)),
.Names = c("Monts", "Total of Killed Pigs"),
row.names = c(NA, 4L), class = "data.frame")
ts.obj <- ts(table$`Total of Killed Pigs`, start = c(1980, 1), frequency = 2)
plot(ts.obj)
trend1 <- loess(# **WHAT BELONGS HERE?**, data = table, span =1)
predict1 <- predict(trend1)
lines(predict1, col ="blue")
That is my original code:
obj <- read.csv(file="PATH/monthly-total-number-of-pigs-sla.csv", header=TRUE, sep=",")
ts.obj <- ts(obj$Monthly.total.number.of.pigs.slaughtered.in.Victoria..Jan.1980...August.1995, start = c(1980, 1), frequency = 12)
plot(ts.obj)
trend1 <- loess (WHAT BELONGS HERE?, data = obj, span =1)
predict1 <- predict (trend1)
lines(predict1, col="blue")
We can do away with the data argument as the time series is univariate (just one variable).
The formula ts.obj ~ index(ts.obj) can be read as
value as a function of time
as ts.obj will give you the values, and index(ts.obj) will give you the time index for those values, and the tilde ~ specifies that the first is a function of, or dependent on, the other.
library(zoo) # for index()
plot(ts.obj)
trend1 <- loess(ts.obj ~ index(ts.obj), span=1)
trend2 <- loess(ts.obj ~ index(ts.obj), span=2)
trend3 <- loess(ts.obj ~ index(ts.obj), span=3)
pred <- sapply(list(trend1, trend2, trend3), predict)
matlines(index(ts.obj), pred, lty=1, col=c("blue", "red", "orange"))
zoo isn't strictly required. If you replace index(ts.obj) with as.numeric(time(ts.obj)) you should be fine, I think.
In case you were wanting to go with ggplot2:
library(ggplot2)
library(dplyr)
table <- structure(list(
Months = c("1980-06", "1980-07", "1980-08", "1980-09",
"1980-10", "1980-11", "1980-12", "1981-01"),
Total = c(75000, 70000, 60000, 73000, 72000, 71000, 76000, 71000)),
.Names = c("Months", "Total"),
row.names = c(NA, 8L), class = "data.frame")
Change to proper dates:
table <- table %>% mutate(Months = as.Date(paste0(Months,"-01")))
Plot:
ggplot(table, aes(x=Months, y=Total)) +
geom_line() +
geom_smooth(span=1, se= FALSE, color ="red") +
geom_smooth(span=2, se= FALSE, color ="green") +
geom_smooth(span=3, se= FALSE) +
theme_minimal()

calendar heat map tetris chart

So I was reading this post and I fell a little in love with the calendar heat map with Tetris-style month breaks.
However, the ggplot example doesn't implement the Tetris breaks, which are arguably the best part.
So, FTFY, gist here:
The procedure for this is:
create appropriate Tetris breaks for your data
left_join your data to the Tetris breaks created in (1)
pump the above through ggplot with some specially crafted geoms
The methodology for (1) is reasonably straightforward, implemented in the calendar_tetris_data(...) function in the gist, though it would be nice to make it a little more flexible.
My question is mainly around (3): how do I bundle up the 7 geoms necessary to make the breaks into a single procedure or geom?
If I do this:
calendar_tetris_geoms <- function() {
geom_segment(aes(x=x, xend=x, y=ymin, yend=ymax)) + # (a)
geom_segment(aes(x=xmin, xend=xmax, y=y, yend=y)) + # (b)
geom_segment(aes(x=dec.x, xend=dec.x, y=dec.ymin, yend=dec.ymax)) + # (c)
geom_segment(aes(x=nye.xmin, xend=nye.xmax, y=nye.y, yend=nye.y)) + # (d)
geom_segment(x=-0.5, xend=51.5, y=7.5, yend=7.5) + # put a line along the top
geom_segment(x=0.5, xend=52.5, y=0.5, yend=0.5) + # put a line along the bottom
geom_text(aes(x=month.x, y=month.y, label=month.l), hjust=0.25) # (e)
}
And then try to add that to my ggplot, it doesn't work:
> ggplot(data) + calendar_tetris_geoms()
Error in calendar_tetris_geoms() :
argument "plot" is missing, with no default
I clearly don't understand how this works. How does this work?
Modifying #baptiste's suggestion, if I do this:
calendar_tetris_geoms <- function() {
list(
geom_segment(aes(x=x, xend=x, y=ymin, yend=ymax)), # (a)
geom_segment(aes(x=xmin, xend=xmax, y=y, yend=y)), # (b)
geom_segment(aes(x=dec.x, xend=dec.x, y=dec.ymin, yend=dec.ymax)), # (c)
geom_segment(aes(x=nye.xmin, xend=nye.xmax, y=nye.y, yend=nye.y)), # (d)
geom_segment(x=-0.5, xend=51.5, y=7.5, yend=7.5), # put a line along the top
geom_segment(x=0.5, xend=52.5, y=0.5, yend=0.5), # put a line along the bottom
geom_text(aes(x=month.x, y=month.y, label=month.l), hjust=0.25) # (e)
)
}
Then this works a treat:
calendar_tetris_data(min(stock.data$date), max(stock.data$date)) %>%
left_join(stock.data) %>%
ggplot() +
geom_tile(aes(x=week, y=wday2factor(wday), fill = Adj.Close), colour = "white") +
calendar_tetris_geoms() +
facet_wrap(~ year, ncol = 1)
Update 2019-08-06 - Pulling everything into one post to make a Tetris Calendar Heat Map
Sample date data.
This is a stand in for your date data.
mydatedata<-as.Date(paste(sample(c(2018:2019), 3000, replace = TRUE), # year
sample(c(1:12), 3000, replace = TRUE), # month
sample(c(1:28), 3000, replace = TRUE), # day
sep="-"))
Create a data frame summarizing your data
Replace mydatedata with your df$date field.
newdf<-as.data.frame(table(mydatedata), stringsAsFactors = FALSE);
names(newdf)<-c("date", "n")
newdf$date<-as.Date(newdf$date, format='%Y-%m-%d')
Create Calendar Tetris Data Functions
Note: I created a weekday label, renamed several functions to avoid name collision, and moved the the helper functions inside the main function.
Original source links:
1) https://gist.github.com/dvmlls/5f46ad010bea890aaf17
2) calendar heat map tetris chart
calendar_tetris_data <- function(date_min, date_max) {
year2 <- function(d) as.integer(format(d, '%Y'))
wday2 <- function(d) {
n <- as.integer(format(d, '%u'))
ifelse(n==7, 0, n) + 1 # I want the week to start on Sunday=1, so turn 7 into 0.
}
wday2factor <- function(wd) factor(wd, levels=1:7, labels=c('Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday'))
week2 <- function(d, year) {
# If January 1st is a Sunday, my weeks will start from 1 instead of 0 like the rest of them.
nyd <- as.Date(ISOdate(year, 1, 1))
# So if that's the case, subtract 1.
as.integer(format(d, '%U')) - ifelse(wday2(nyd) == 1, 1, 0)
}
start <- as.Date(ISOdate(year2(min(date_min)),1,1))
end <- as.Date(ISOdate(year2(max(date_max)), 12, 31))
all.dates <- start + 0:as.integer(end - start, units='days')
data.frame(date=all.dates) %>% tbl_df %>%
mutate(
wday=wday2(date),
year=year2(date),
month=as.integer(format(date, '%m')),
week=week2(date, year),
day=as.integer(format(date, '%d')),
weekday=wday2factor(wday), #20190806, adding weekday label
# (a) put vertical lines to the left of the first week of each month
x=ifelse(day <= 7, week - 0.5, NA),
ymin=ifelse(day <= 7, wday - 0.5, NA),
ymax=ifelse(day <= 7, wday + 0.5, NA),
# (b) put a horizontal line at the bottom of the first of each month
y=ifelse(day == 1, wday - 0.5, NA),
xmin=ifelse(day == 1, week - 0.5, NA),
xmax=ifelse(day == 1, week + 0.5, NA),
# (c) in december, put vertical lines to the right of the last week
dec.x=ifelse(month==12 & day >= 25, week + 0.5, NA),
dec.ymin=ifelse(month==12 & day >= 25, wday - 0.5, NA),
dec.ymax=ifelse(month==12 & day >= 25, wday + 0.5, NA),
# (d) put a horizontal line at the top of New Years Eve
nye.y=ifelse(month==12 & day == 31, wday + 0.5, NA),
nye.xmin=ifelse(month==12 & day == 31, week - 0.5, NA),
nye.xmax=ifelse(month==12 & day == 31, week + 0.5, NA),
# (e) put the first letter of the month on the first day
month.x=ifelse(day == 1, week, NA),
month.y=ifelse(day == 1, wday, NA),
month.l=ifelse(day == 1, substr(format(date, '%B'), 1, 3), NA)
)
}
Create the ggplot2 geom:
calendar_tetris_geoms <- function() {
list(
geom_segment(aes(x=x, xend=x, y=ymin, yend=ymax)), # (a)
geom_segment(aes(x=xmin, xend=xmax, y=y, yend=y)), # (b)
geom_segment(aes(x=dec.x, xend=dec.x, y=dec.ymin, yend=dec.ymax)), # (c)
geom_segment(aes(x=nye.xmin, xend=nye.xmax, y=nye.y, yend=nye.y)), # (d)
geom_segment(x=-0.5, xend=51.5, y=7.5, yend=7.5), # put a line along the top
geom_segment(x=0.5, xend=52.5, y=0.5, yend=0.5), # put a line along the bottom
geom_text(aes(x=month.x, y=month.y, label=month.l), hjust=0.25) # (e)
)
}
Create the plot:
library(ggplot2)
library(dplyr) # for %>% pipe
calendar_tetris_data(min(newdf$date), max(newdf$date)) %>%
left_join(newdf) %>%
ggplot() +
geom_tile(aes(x=week, y=weekday, fill = n), colour = "white") +
calendar_tetris_geoms() +
facet_wrap(~ year, ncol = 1)

Resources