I want to make a seemingly trivial adjustment to the chart pictured below:
I would like the labels along the x-axis to be even years, rather than odd years. So instead of going from 2009 -> 2011 -> 2013, they should go from 2008 -> 2010 -> 2012, and so forth...
How do I go about doing this?
Here is the code:
germany_yields <- read.csv(file = "Germany 10-Year Yield Weekly (2007-2020).csv", stringsAsFactors = F)
italy_yields <- read.csv(file = "Italy 10-Year Yield Weekly (2007-2020).csv", stringsAsFactors = F)
germany_yields <- germany_yields[, -(3:6)]
italy_yields <- italy_yields[, -(3:6)]
colnames(germany_yields)[1] <- "Date"
colnames(germany_yields)[2] <- "Germany.Yield"
colnames(italy_yields)[1] <- "Date"
colnames(italy_yields)[2] <- "Italy.Yield"
combined <- join(germany_yields, italy_yields, by = "Date")
combined <- na.omit(combined)
combined$Date <- as.Date(combined$Date,format = "%B %d, %Y")
combined["Spread"] <- combined$Italy.Yield - combined$Germany.Yield
fl_dates <- c(tail(combined$Date, n=1), head(combined$Date, n=1))
ggplot(data=combined, aes(x = Date, y = Spread)) + geom_line() +
scale_x_date(limits = fl_dates,
expand = c(0, 0),
date_breaks = "2 years",
date_labels = "%Y")
A -- not very elegant -- way would be to put these arguments in your scale_x_date() :
scale_x_date(date_labels = "%Y",
breaks = ymd(unique(year(combined$fl_dates)[year(combined$fl_dates)%%2 == 0]), truncated = 2L)
(we define breaks manually, by subsetting the whole range of dates and keeping the even years)
That's actually fairly simple. Just set the lower limit to an even number, and set the upper limit to NA. As you haven't provided a reproducible example, here on some fake data.
library(tidyverse)
mydates <- seq(as.Date("2007/1/1"), by = "3 months", length.out =100)
df <- tibble(
myvalue = rnorm(length(mydates))
)
# without limits argument
ggplot(df ) +
aes(x = mydates, y = myvalue) +
geom_line(size = 1L, colour = "#0c4c8a") +
scale_x_date(date_breaks = "2 years",
date_labels = "%Y")
# with limits argument
ggplot(df ) +
aes(x = mydates, y = myvalue) +
geom_line(size = 1L, colour = "#0c4c8a") +
scale_x_date(date_breaks = "2 years",
date_labels = "%Y",
limits = c(as.Date("2006/1/1"), NA))
Created on 2020-04-29 by the reprex package (v0.3.0)
I am currently trying to generate NOAA tide prediction charts (x = datetime, y = water level) with the dawn/sunrise/dusk/sunset times as vertical lines along the x axis timeline.
The rnoaa package calls the data and gives me the prediction date times in POSIXct. The suncalc library provides me a data frame with each date in the range's sunrise, sunset, etc. in POSIXct format as well.
library(rnoaa)
library(tidyverse)
library(ggplot2)
library(suncalc)
march.tides <- as.data.frame(coops_search(station_name = 8551762,
begin_date = 20200301, end_date = 20200331,
datum = "mtl", product = "predictions"))
march.tides <- march.tides %>%
mutate(yyyy.mm.dd = as.Date(predictions.t))
dates <- unique(march.tides$yyyy.mm.dd)
sunlight.times <- getSunlightTimes(date = seq.Date(as.Date("2020/3/1"), as.Date("2020/3/31"), by = 1),
lat = 39.5817, lon = -75.5883, tz = "EST")
I then have a loop that spits out separate plots for each calendar date - which works hunky dory. The vertical lines are drawing on the graph without an error, but are definitely in the wrong spot (sunrise is being drawn around 11am when it should be 06:30).
for (i in seq_along(dates)) {
plot <- ggplot(subset(march.tides, march.tides$yyyy.mm.dd==dates[i])) +
aes(x = predictions.t, y = predictions.v) +
geom_line(size = 1L, colour = "#0c4c8a") +
theme_bw() +
geom_vline(xintercept = sunlight.times$sunrise) +
geom_vline(xintercept = sunlight.times$sunset) +
geom_vline(xintercept = sunlight.times$dawn, linetype="dotted") +
geom_vline(xintercept = sunlight.times$dusk, linetype="dotted") +
ggtitle(dates[i])
print(plot)
}
I could alternatively facet the separate dates instead of this looping approach. Even when I subset the data to a single date, the vertical lines still did not draw correctly.
I wondered if maybe the issue was a time zone one. If I try to stick a time zone argument onto the tide prediction data call, I get the error:
Error in if (!repeated && grepl("%[[:xdigit:]]{2}", URL, useBytes = TRUE)) return(URL) :
missing value where TRUE/FALSE needed
It looks like you want to use EST as your timezone, so you could include in your conversion of predictions.t.
I would be explicit in what you want labeled on your xaxis in ggplot using scale_x_datetime, including the timezone.
library(rnoaa)
library(tidyverse)
library(ggplot2)
library(suncalc)
library(scales)
march.tides <- as.data.frame(coops_search(station_name = 8551762,
begin_date = 20200301, end_date = 20200331,
datum = "mtl", product = "predictions"))
march.tides <- march.tides %>%
mutate(yyyy.mm.dd = as.Date(predictions.t, tz = "EST"))
dates <- unique(march.tides$yyyy.mm.dd)
sunlight.times <- getSunlightTimes(date = seq.Date(as.Date("2020/3/1"), as.Date("2020/3/31"), by = 1),
lat = 39.5817, lon = -75.5883, tz = "EST")
for (i in seq_along(dates)) {
plot <- ggplot(subset(march.tides, march.tides$yyyy.mm.dd==dates[i])) +
aes(x = predictions.t, y = predictions.v) +
geom_line(size = 1L, colour = "#0c4c8a") +
theme_bw() +
geom_vline(xintercept = sunlight.times$sunrise) +
geom_vline(xintercept = sunlight.times$sunset) +
geom_vline(xintercept = sunlight.times$dawn, linetype="dotted") +
geom_vline(xintercept = sunlight.times$dusk, linetype="dotted") +
ggtitle(dates[i]) +
scale_x_datetime(labels = date_format("%b %d %H:%M", tz = "EST"))
print(plot)
}
Plot
I have coded a heatmap using ggplot tiles and it has sequencial days on the x axis . The problem I am trying to solve is to remove weekends from the heatmap and show only weekdays. I have found that one solution would be to transform the dates into factors but if I do that how can I format the labels in scale_x_discrete to be in %d%m date format ? Is there a way to keep the dates as date format instead of turning it into factors ?
Below is an example:
randomString <- function(n=5,length=3) {
randomStringX <- c(1:n)
for(i in 1:n) {
randomStringX[i] <- paste(sample(c(LETTERS),length,replace = TRUE),collapse = "")
}
return(randomStringX)
}
randomString()
data.frame(client=randomString(),day=rep(seq.Date(Sys.Date()-10,length.out=10,by="1 day"),2)) %>% mutate(sales=round(rnorm(20,492,300),1)) %>% mutate(scale=cut(sales,breaks=c(0,100,200,300,max(sales)),labels = c("0-100","100-200","200-300","+300"))) %>% ggplot(.,aes(x=day,y=client,fill=scale)) + geom_tile() + scale_x_date(date_breaks = "1 day")
Thanks in advance
You can exclude data from weekends using the is.weekend function from chron
The weekend dates themselves can be excluded from an x-axis using the bdscale package
library(chron)
library(bdscale)
library(scales)
library(ggplot2)
library(dplyr)
df <- as.data.frame(client = randomString(), day = rep(seq.Date(
Sys.Date() - 10, length.out = 10, by = "1 day"), 2)) %>%
mutate(sales = round(rnorm(20, 492, 300), 1)) %>%
mutate(scale =
cut(
sales,
breaks = c(0, 100, 200, 300, max(sales)),
labels = c("0-100", "100-200", "200-300", "+300")
)) %>%
filter(is.weekend(day) == FALSE)
ggplot(df, aes(x = day, y = client, color = scale, fill = scale)) +
geom_tile() +
# scale_x_date(date_breaks = "1 day") +
theme(axis.text.x = element_text(angle = 45)) +
scale_x_bd(business.dates = sort(df$day), max.major.breaks = 30, labels=scales::date_format('%d %b'))
Removing data from weekends can also be done using lubridate and the wday function as
filter(!wday(day) %in% c(1,7))
Sun/Sat are stored as 1 and 7 respectively. - Credit to #AHart
Is there any way to shift the dates of a seasonal graph so that they match an arbitrary fiscal year (for example MARCH/FEB instead of DEC/JAN)?
I have this of far:
startMonth = 3 # march
startDate = as.Date(paste0(year(today()), '-', startMonth, '-1'))
dts = seq.Date(from = today() - 500, to = today(), by = 'day')
dat = data.frame(date = dts, value = runif(n = length(dts), min = 1, max = 10))
dat$month = month(dat$date)
dat$year = year(dat$date)
dat$yearPlot = ifelse(test = dat$month < startMonth, yes = (dat$year - 1), no = dat$year)
dat$year = as.character(dat$year)
dat$ydaydiff = yday(dat$date) - yday(startDate)
dat$datePlot1 = ifelse(dat$ydaydiff < 0, dat$ydaydiff + 365, dat$ydaydiff)
dat$datePlot1 = as.Date('0001-01-01') + days(dat$datePlot1)
dat$yearPlot = as.character(dat$yearPlot)
ggplot(dat) +
geom_path(aes(x = datePlot1, y = value, color = yearPlot)) +
scale_x_date(date_labels = '%b', )
Which makes this plot:
However I'd like the x-axis to start at March instead of Jan. Is there any way to adjust this? I thought of using the month column in dat but not sure how to implement.
Here is a not-too-pretty solution. The logic somewhat follows your own: find the starting date/time for each fiscal year (March 1 = time 1) and the last date/time (Feb 28 = time 365). Use this separate 'time' variable as your x-axis, then re-label the tick marks. You can change the scale_x_continuous() breaks and labels to get your desired dates along the x-axis.
t <- data.frame(date=seq.Date(as.Date('2018-03-01'),as.Date('2020-02-28'),by='days'),
fy=1)
t$fy[t$date>='2019-03-01'] <- 2
t <- t %>% group_by(fy) %>% mutate(time=seq(1:n()))
dat <- left_join(dat,t)
dat %>% ggplot(.) +
geom_path(aes(x = time, y = value, color = factor(fy),group=fy)) +
scale_x_continuous(breaks = c(1,100,200,300),labels=c('March 1','June 8','Sept 16','Dec 25'))
The breaks_width argument to scale_x_date() allows you to offset the breaks by a few months in a year.
The labels argument accepts a function to format the labels as a fiscal year, e.g. to convert a date 2019-03-01 to "19/20".
# Function to create fiscal year labels like "14/15" for the 2014/15 fiscal year
fiscal_year <- function(x) {
year_number <- lubridate::year(x)
paste(substr(year_number, 3, 4),
substr(year_number + 1, 3, 4),
sep = "/")
}
ggplot(dat) +
geom_path(aes(x = date, y = value, color = yearPlot)) +
scale_x_date(labels = fiscal_year, # Use the function to create the labels
breaks = scales::breaks_width("1 year", offset = 90)) # Offset by 90 days to March
My question is in the following.
here is my raw data:
type,com,year,month,value
A,CH,2016,1,0
A,CH,2016,2,0
A,CH,2016,3,0
A,CH,2016,4,0
A,CH,2016,5,0
A,CH,2016,6,0
A,CH,2016,7,0
A,CH,2016,8,0
A,CH,2016,9,453128
A,CH,2016,10,868256
A,CH,2016,11,1015080
A,CH,2016,12,650912
B,CH,2016,1,0
B,CH,2016,2,0
B,CH,2016,3,0
B,CH,2016,4,0
B,CH,2016,5,0
B,CH,2016,6,61273
B,CH,2016,7,27711
B,CH,2016,8,161780
B,CH,2016,9,48889
B,CH,2016,10,72805
B,CH,2016,11,131466
B,CH,2016,12,73756
C,CH,2016,1,0
C,CH,2016,2,0
C,CH,2016,3,0
C,CH,2016,4,0
C,CH,2016,5,0
C,CH,2016,6,0
C,CH,2016,7,0
C,CH,2016,8,2200
C,CH,2016,9,111384
C,CH,2016,10,28758
C,CH,2016,11,21161
C,CH,2016,12,0
I use it to plot a line graph with gglot2.
The code is:
test <- read.csv("test.csv", header = T)
test_list <- split(test, test$type)
Rplot <- ggplot(test_list$A, aes(x=month, y=value, col=com))+geom_line()
Rplot
My question:
how to let my x-axis to display like month(1,2,3....,12)?
and how can I combine year and month showing on x-axis at the same time.(Jan2016, Feb2016,.....) or (2016/1, 2016/2,.....)
very appreciate.
Create date_label column as Date class in the dataframe using ymd() from lubridate package.
library( 'lubridate' )
test_list <- lapply( test_list, function( x ) {
x$date_label <- ymd( paste( x$year, month.abb[ x$month ], 01, sep = "-"))
x
})
Manipulate x axis using scale_x_date()
# 1. date_labels = "%Y/%m" - 2016/01
# 2. date_labels = "%m" - 01
# 3. date_labels = "%b %Y" - Jan 2016
library( 'ggplot2' )
ggplot( data = test_list$A, aes( x = date_label, y = value, color = com, group = com ) ) +
geom_line( size = 3 ) +
scale_x_date(name = "month",
date_labels = "%m",
date_breaks = "1 month" )
ggplot( data = test_list$A, aes( x = date_label, y = value, group = com, color = com ) ) +
geom_line( size = 3 ) +
theme( axis.text.x = element_text( angle = 45, hjust = 1 )) +
scale_x_date(name = "month_year",
date_labels = "%b %Y",
date_breaks = "1 month" )
For the first case, you can convert month to be a factor. The following code would eliminate decimals in the month.
test$month <- as.factor(test$month)
test_list <- split(test, test$type)
Rplot <- ggplot(test_list$A, aes(x=month, y=value, col=com, group = 1))+geom_line()
Rplot
To get numeric months you have to have a new column. I will create a new one using dplyr package and change that.
library(dplyr)
# make a new column by joining month after converting it to character and year.
test <- test %>% mutate(exactDate = paste(month.abb[as.numeric(as.character(month))], year, sep=""))
# convert to factor
test$exactDate <- factor(test$exactDate, levels = c("Jan2016","Feb2016","Mar2016","Apr2016","May2016","Jun2016",
"Jul2016","Aug2016","Sep2016", "Oct2016", "Nov2016", "Dec2016"),
labels = c("Jan2016","Feb2016","Mar2016","Apr2016","May2016","Jun2016",
"Jul2016","Aug2016","Sep2016", "Oct2016", "Nov2016", "Dec2016"))
# Plot similarly
test_list <- split(test, test$type)
Rplot <- ggplot(test_list$A, aes(x=exactDate, y=value, col=com, group = 1))+geom_line()
# Rotate axis labels by 90 degrees
Rplot+ theme(axis.text.x=element_text(angle=90, hjust=1))