Force time series x labels on each year - r

Below I create a reproducible example chart ranging for 10000 days. As you can see this chart is highly informative and value adding, but it will do for the example.
Instead of one x label every 10 years, I would like to force a label every year. How can this be achieved?
library(ggplot2)
library(tidyr)
exdays <- 1:10000
exdata <- sin(exdays)
exdate <- as_date("2022-01-01")+days(exdays)
exdat <- tibble(exdate, exdata)
p1 <- ggplot(exdat, aes(x=exdate, y=exdata)) +
geom_line(color="darkred", size=0.7) +
ggtitle("example")
p1

You maybe want this using scale_x_date with date_breaks of 1 year where you specify the date_labels:
library(ggplot2)
library(tidyr)
exdays <- 1:10000
exdata <- sin(exdays)
exdate <- as_date("2022-01-01")+days(exdays)
exdat <- tibble(exdate, exdata)
p1 <- ggplot(exdat, aes(x=exdate, y=exdata)) +
geom_line(color="darkred", size=0.7) +
scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
ggtitle("example")
p1
Output:

This works
p1 <- ggplot(exdat, aes(x=exdate, y=exdata)) +
geom_line(color="darkred", size=0.7) +
ggtitle("example") + scale_x_date(date_breaks = "1 year")

Related

R ggplot2 - Plot year variable one over the other in same plot

How do I plot each year as a separate line in ggplot2 I tried the below code but it seems to plot continuous as a single plot.
library(ggplot2)
# Dummy data
data <- data.frame(
Date = c(as.Date("2017-01-14") - 0:13,as.Date("2016-01-14") - 0:13),
value = runif(28)
)
#data$Date <- strptime(data$Date, "%Y-%m-%d" )
data$Year <- as.character(year(data$Date))
data$Year <- factor(data$Year)
ggplot(data) + geom_line(aes(x = Date, y = value, group=Year, color=Year)) +
scale_x_date(date_breaks = "1 day", date_labels = "%d-%m-%y") +
theme(axis.text.x = element_text(angle = 90))
But I want each year to be a separate graph in the same plot.
something like below
Try this approach formating day and month in your date. You got a mess in your plot because of the different year in your date variable. Setting format can help you. Here the code:
library(ggplot2)
library(lubridate)
# Dummy data
data <- data.frame(
Date = c(as.Date("2017-01-14") - 0:13,as.Date("2016-01-14") - 0:13),
value = runif(28)
)
data$Year <- as.character(year(data$Date))
data$Year <- factor(data$Year)
#Format month
data$MonthDay <- format(data$Date,'%b-%d')
#Plot
ggplot(data) + geom_line(aes(x = MonthDay, y = value, group=Year, color=Year)) +
theme_bw()+
theme(axis.text.x = element_text(angle = 90))
Output:

Programatically display date range with R scale limits

Currently using ggplot2 and scales doing this but would be ideal to show a date range +/- 1 Year (for example). I shouldn't really be hardcoding these dates as it's not very efficient.
library(scales) #date time scales
library(ggplot2) # Visualization
ggplot(dataset,aes(x=datetime_start, y=dataset$Product, color=Stage, order = - as.numeric(Stage))) +
geom_segment(aes(x=From,xend=To,yend=dataset$Product), size=10) +
scale_x_datetime(
breaks = date_breaks("1 month"),
labels=date_format("%b%y"),
limits = c(
as.POSIXct("2016-03-01"),
as.POSIXct("2018-02-01")
)
) +
Expand the scale:
library(ggplot2)
df <- data.frame(x = seq(Sys.Date()-lubridate::years(2), Sys.Date(), by="3 month"))
df$y <- 1:nrow(df)
p <- ggplot(df, aes(x, y)) + geom_line()
p + scale_x_date(expand = c(0, 365))

Hide/Drop missing values in heat map with ggplot2

I have a data frame with continous missing values from 11 Jan to 14 Jan 2016 as
library(lubridate)
set.seed(123)
timestamp1 <- seq(as.POSIXct("2016-01-01"),as.POSIXct("2016-01-10 23:59:59"), by = "hour")
timestamp2 <- seq(as.POSIXct("2016-01-15"),as.POSIXct("2016-01-20 23:59:59"), by = "hour")
data_obj <- data.frame(value = c (rnorm(length(timestamp1),150,5),rnorm(length(timestamp2),110,3)),timestamp = c(timestamp1,timestamp2))
data_obj$day <- lubridate::date(data_obj$timestamp)
data_obj$hour <- lubridate::hour(data_obj$timestamp)
When I plot a heat map using
ggplot(data_obj,aes(day,hour,fill=value)) + geom_tile()
I get heat map like below one; red marked rectangular region corresponds to missing values
How should I entirely hide this blank area and make a continuous heat map?
Note that I do not want to change the format of x-axis date and I don't want to show missing values with some other color.
Slightly different answer to #Jacob's that preserves the date label format and order:
library(lubridate)
set.seed(123)
timestamp1 <- seq(as.POSIXct("2016-01-01"),as.POSIXct("2016-01-10 23:59:59"), by = "hour")
timestamp2 <- seq(as.POSIXct("2016-01-15"),as.POSIXct("2016-01-20 23:59:59"), by = "hour")
data_obj <- data.frame(value = c (rnorm(length(timestamp1),150,5),
rnorm(length(timestamp2),110,3)),
timestamp = c(timestamp1,timestamp2))
data_obj$day <- lubridate::date(data_obj$timestamp)
data_obj$hour <- lubridate::hour(data_obj$timestamp)
# preserve the date order manally in a factor
data_obj$day_f <- format(data_obj$day, "%b %d")
dplyr::arrange(data_obj, day) %>%
dplyr::distinct(day_f) -> day_f_order
data_obj$day_f <- factor(data_obj$day_f, levels=day_f_order$day_f)
ggplot(data_obj, aes(day_f, hour, fill=value)) +
geom_tile() +
scale_x_discrete(expand=c(0,0), breaks=c("Jan 04", "Jan 18")) +
scale_y_continuous(expand=c(0,0)) +
viridis::scale_fill_viridis(name=NULL) +
coord_equal() +
labs(x=NULL, y=NULL) +
theme(panel.background=element_blank()) +
theme(panel.grid=element_blank()) +
theme(axis.ticks=element_blank()) +
theme(legend.position="bottom")
Note: you're still mis-truthing the data to your audience without an explicit, very visible note that explains that there is missing data.
If you change the day to a factor it ignores the gap:
ggplot(data_obj, aes(factor(day),hour,fill=value)) + geom_tile()
Depending on what the real thing looks like you may or may not be happy with how the x axis looks.

Weekly boxplot from hourly data [duplicate]

This code produces a single boxplot:
df <- data.frame(value = rnorm(62), my.date = seq(as.Date("2013-12-01"), as.Date("2014-01-31"), by="1 day"))
library(ggplot2)
ggplot(df, aes(as.Date(my.date), value)) + geom_boxplot() + scale_x_date(minor_breaks = "1 week", labels = date_format("%W\n%b"))
How can I produce a plot that has single boxplots for each week between 1 December and 31 January? So within the single plot, there should be about 8 boxplots. Would prefer solution that uses either ggplot() or scale_x_date().
One option is to transform your date before using ggplot
library(ggplot2)
df <- data.frame(value = rnorm(62),
my.date = seq(as.Date("2013-12-01"), as.Date("2014-01-31"), by="1 day"))
weeks <- format(df$my.date, "%Y/%W")
weeks <- factor(weeks, levels = unique(weeks))
ggplot(df, aes(weeks, value)) +
geom_boxplot()
library(ggplot2)
ggplot(df, aes(format(as.Date(my.date), "%W\n%b"), value)) + geom_boxplot()
Edit:
To order the dates:
ggplot(df, aes(reorder(format(as.Date(my.date), "%W\n%b"),
as.Date(my.date)),
value)) +
geom_boxplot()
This fulfils #luciano's request to retain functionality of scale_x_date
library('scales')
library(ggplot2)
df <- data.frame(value = rnorm(62), my.date = seq(as.Date("2013-12-01"), as.Date("2014-01-31"), by="1 day"))
ggplot(df, aes(x=as.Date(my.date), y=value, group=format(as.Date(my.date),"%W-%b"))) + geom_boxplot() + scale_x_date(date_breaks = "1 week", date_labels="%Y-%b-%d")
Alternatively, if you don't want the data grouped by week# - which gives you the split around most new years - you can group by week ending Sundays as below. Adjusting from the Sunday weekending, to say Friday, can be achieved with some such code
ceiling_date(x, "week") + ifelse(weekdays(x) %in% c("Saturday", "Sunday"), 5, -2)
ggplot(df, aes(x=as.Date(my.date), y=value, group=ceiling_date(my.date, "week"))) + geom_boxplot() + scale_x_date(date_breaks = "1 week", date_labels="%Y-%b-%d")

ggplot2 boxplots by week

This code produces a single boxplot:
df <- data.frame(value = rnorm(62), my.date = seq(as.Date("2013-12-01"), as.Date("2014-01-31"), by="1 day"))
library(ggplot2)
ggplot(df, aes(as.Date(my.date), value)) + geom_boxplot() + scale_x_date(minor_breaks = "1 week", labels = date_format("%W\n%b"))
How can I produce a plot that has single boxplots for each week between 1 December and 31 January? So within the single plot, there should be about 8 boxplots. Would prefer solution that uses either ggplot() or scale_x_date().
One option is to transform your date before using ggplot
library(ggplot2)
df <- data.frame(value = rnorm(62),
my.date = seq(as.Date("2013-12-01"), as.Date("2014-01-31"), by="1 day"))
weeks <- format(df$my.date, "%Y/%W")
weeks <- factor(weeks, levels = unique(weeks))
ggplot(df, aes(weeks, value)) +
geom_boxplot()
library(ggplot2)
ggplot(df, aes(format(as.Date(my.date), "%W\n%b"), value)) + geom_boxplot()
Edit:
To order the dates:
ggplot(df, aes(reorder(format(as.Date(my.date), "%W\n%b"),
as.Date(my.date)),
value)) +
geom_boxplot()
This fulfils #luciano's request to retain functionality of scale_x_date
library('scales')
library(ggplot2)
df <- data.frame(value = rnorm(62), my.date = seq(as.Date("2013-12-01"), as.Date("2014-01-31"), by="1 day"))
ggplot(df, aes(x=as.Date(my.date), y=value, group=format(as.Date(my.date),"%W-%b"))) + geom_boxplot() + scale_x_date(date_breaks = "1 week", date_labels="%Y-%b-%d")
Alternatively, if you don't want the data grouped by week# - which gives you the split around most new years - you can group by week ending Sundays as below. Adjusting from the Sunday weekending, to say Friday, can be achieved with some such code
ceiling_date(x, "week") + ifelse(weekdays(x) %in% c("Saturday", "Sunday"), 5, -2)
ggplot(df, aes(x=as.Date(my.date), y=value, group=ceiling_date(my.date, "week"))) + geom_boxplot() + scale_x_date(date_breaks = "1 week", date_labels="%Y-%b-%d")

Resources