Related
My dataset contains 2 variables Y and X. Y was measured every 1.0 seconds.
My Data:
dput(Dataexample)
structure(list(X = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77,
78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93,
94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120,
121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133,
134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146,
147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172,
173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185,
186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198,
199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211,
212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237,
238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250,
251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263,
264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276,
277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289,
290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302,
303, 304, 305, 306), Y = c(71756.2344, 71745.85, 70882.42, 71025.61,
70539.02, 70602.3047, 70811.87, 70514.125, 69998.63, 70531.76,
70424.9141, 70663.51, 70075.375, 69731.0859, 70029.74, 70519.31,
69858.63, 69987.23, 70080.56, 69970.63, 69829.6, 69872.12, 69775.68,
69679.24, 69814.05, 69639.84, 69645.02, 69344.35, 69430.41, 70078.49,
69239.65, 69734.1953, 69736.27, 69549.63, 69506.0859, 69108,
69669.91, 69516.45, 69490.54, 69609.77, 69314.29, 69454.25, 69590.07,
69721.76, 69525.79, 69736.27, 69303.92, 69171.23, 69294.59, 69430.41,
69457.36, 69462.54, 69144.27, 69590.07, 69446.99, 70083.67, 69358.87,
69800.56, 69680.28, 69332.95, 69723.83, 69942.63, 69772.56, 69969.59,
69808.86, 70043.23, 70208.13, 70077.45, 69856.56, 70423.875,
69490.54, 69984.12, 70175.98, 70192.58, 70279.7, 70480.93, 70594,
70792.16, 70234.06, 70165.61, 70249.62, 70564.95, 70403.13, 70444.625,
70426.99, 69907.375, 70327.4141, 70686.3359, 70473.67, 71031.83,
70864.78, 70710.1953, 70691.52, 70703.97, 70826.39, 70708.12,
70595.04, 70946.75, 71319.27, 70977.875, 70475.74, 70612.68,
70680.11, 70527.61, 70461.22, 70877.2344, 70631.35, 70723.68,
70677, 70433.21, 70306.6641, 71246.63, 70375.125, 70416.62, 70150.0547,
70733.0156, 70583.63, 70866.86, 70580.5156, 70433.21, 70377.2,
70114.79, 70347.12, 70613.71, 70576.37, 70599.19, 70407.28, 70581.5547,
70650.02, 71122.11, 70909.4, 70694.63, 71076.45, 70650.02, 71133.52,
70810.83, 71240.41, 70630.31, 71144.94, 71493.63, 71117.95, 71374.28,
71143.9, 70805.64, 71349.375, 71208.2344, 71322.39, 71727.1641,
71060.88, 71546.56, 71569.4, 70984.1, 72032.37, 71573.55, 71787.375,
71469.76, 71398.15, 71683.57, 71709.52, 71637.9, 71556.9453,
71870.4141, 71612.99, 71953.47, 71515.43, 71315.125, 72007.4453,
72021.9844, 71549.68, 72001.22, 71359.75, 71775.95, 72327.23,
71949.31, 71844.47, 71857.96, 72128.9141, 72147.6, 71501.94,
72268.05, 72104, 72217.1641, 72253.51, 72198.48, 72908.78, 72084.27,
72653.29, 72431.06, 72858.92, 72512.0547, 72632.5156, 72700.02,
72335.53, 72713.52, 73065.62, 72818.42, 73004.3359, 72458.06,
73436.48, 73231.82, 73002.26, 73313.89, 73213.125, 72980.4453,
72948.25, 73106.13, 72931.625, 73409.47, 73057.31, 73141.4453,
73218.32, 73216.24, 73273.375, 73701.42, 73486.35, 72574.37,
73229.74, 73576.74, 73195.46, 73697.2656, 73115.48, 73065.62,
73062.5, 73111.32, 73988.23, 73619.3359, 73874.95, 73683.76,
73674.41, 73550.7656, 74166.9844, 73875.99, 74013.17, 74092.16,
73872.875, 74015.25, 73984.07, 73911.33, 73606.87, 74082.8, 73866.64,
74550.53, 74271.95, 73980.95, 74502.71, 74901.92, 74753.25, 74310.4141,
75178.51, 74748.05, 74756.37, 75194.1, 74797.95, 75531.0547,
75549.77, 75293.94, 75378.17, 75457.21, 75676.67, 76087.56, 76141.6641,
76008.5, 76241.55, 76585.96, 76091.73, 76880.4844, 76898.18,
77005.38, 77080.32, 77548.78, 77337.4453, 77000.18, 77448.8359,
76997.0547, 77314.54, 77919.47, 77185.46, 78127.75, 77464.45,
78349.59, 77824.71, 77465.49, 77818.46, 78140.25, 78547.51, 77850.74,
78236.06, 78341.2656, 78104.8359, 78464.17, 77888.23, 78392.3,
78686.0547, 78149.625, 78623.5547, 78672.5156, 78810.03, 78498.55,
78652.72, 78717.31, 78831.91, 78882.96, 78715.23, 78499.5859,
78892.3359, 78372.51)), row.names = c(NA, -306L), class = c("tbl_df",
"tbl", "data.frame"))
I have used ggplot to plot the data and used a loop to calculate the average slope within a moving 60-second-window for the entire duration of the dataset to find the 60 consecutive seconds where the slope is greatest.
Code:
library(readr)
library(ggplot2)
Dataexample<- read_csv("HF-6.csv", skip = 3)
Dataexample<- head(Dataexample, -1)
Dataexample$X <- as.numeric(Dataexample$X)
df <- data.frame(Dataexample)
ggplot(data=df, aes(x=X, y=Y, group=1)) +
geom_line()
slopes <- rep(NA, nrow(Dataexample)-59)
for( i in 1:length(slopes)){
slopes[i] <- lm(Y ~ X, data=Dataexample[i:(i+59), ])$coefficients[2]
}
print(slopes)
which.max(slopes)
max(slopes)
My questions is how can I then take the results of my loop that show the consecutive 60 seconds where the slope is highest and change the color of the line in the plot during those 60 seconds to highlight where slope is greatest.
This should work:
maxslope_ind <- which.max(slope)
Dataexample$highlight <- ifelse(Dataexample$X %in% maxslope_ind:(maxslope_ind+59), 1, 0)
library(ggplot2)
ggplot(data=Dataexample, aes(x=X, y=Y, group=1)) +
geom_line(aes(colour=as.factor(highlight)), show.legend=FALSE) +
scale_colour_manual(values=c("black", "red"))
I am trying to sort a data frame into quantiles based on a value, and then assign subgroups to each row. These subgroups are supposed to be randomly assigned, but in a way that results in each subgroup having as equal size as possible. So for example, if I have a dataframe of 300 rows, and want 5 quantiles each with 3 subgroups, I would expect 20 rows in quantile 1 subgroup 1, 20 rows in quantile 1 subgroup 2, etc.
Is there a nice and elegant way to do this, that preferably can be piped %>% using dplyr?
An example of my current code:
library(dplyr)
# Construct dataframe.
df <- structure(list(id = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92,
93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106,
107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132,
133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145,
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158,
159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171,
172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184,
185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197,
198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210,
211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236,
237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249,
250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262,
263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275,
276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288,
289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300),
value = c(26550.87, 37212.39, 57285.34, 90820.78, 20168.19,
89838.97, 94467.53, 66079.78, 62911.4, 6178.63, 20597.46,
17655.68, 68702.28, 38410.37, 76984.14, 49769.92, 71761.85,
99190.61, 38003.52, 77744.52, 93470.52, 21214.25, 65167.38,
12555.51, 26722.07, 38611.41, 1339.03, 38238.8, 86969.08,
34034.9, 48208.01, 59956.58, 49354.13, 18621.76, 82737.33,
66846.67, 79423.99, 10794.36, 72371.09, 41127.44, 82094.63,
64706.02, 78293.28, 55303.63, 52971.96, 78935.62, 2333.12,
47723.01, 73231.37, 69273.16, 47761.96, 86120.95, 43809.71,
24479.73, 7067.9, 9946.62, 31627.17, 51863.43, 66200.51,
40683.02, 91287.59, 29360.34, 45906.57, 33239.47, 65087.05,
25801.68, 47854.52, 76631.07, 8424.69, 87532.13, 33907.29,
83944.04, 34668.35, 33377.49, 47635.12, 89219.83, 86433.95,
38998.95, 77732.07, 96061.8, 43465.95, 71251.47, 39999.44,
32535.22, 75708.71, 20269.23, 71112.12, 12169.19, 24548.85,
14330.44, 23962.94, 5893.44, 64228.83, 87626.92, 77891.47,
79730.88, 45527.45, 41008.41, 81087.02, 60493.33, 65472.39,
35319.73, 27026.01, 99268.41, 63349.33, 21320.81, 12937.23,
47811.8, 92407.45, 59876.1, 97617.07, 73179.25, 35672.69,
43147.37, 14821.16, 1307.76, 71556.61, 10318.42, 44628.43,
64010.1, 99183.86, 49559.36, 48434.95, 17344.23, 75482.09,
45389.55, 51116.98, 20754.51, 22865.81, 59571.2, 57487.22,
7706.44, 3554.06, 64279.55, 92861.52, 59809.24, 56090.07,
52602.77, 98509.52, 50764.18, 68278.81, 60154.12, 23886.87,
25816.59, 72930.96, 45257.08, 17512.68, 74669.83, 10498.76,
86454.49, 61464.5, 55715.95, 32877.73, 45313.14, 50044.1,
18086.64, 52963.06, 7527.57, 27775.59, 21269.95, 28479.05,
89509.41, 44623.53, 77998.49, 88061.9, 41312.42, 6380.85,
33548.75, 72372.59, 33761.53, 63041.41, 84061.46, 85613.17,
39135.93, 38049.39, 89544.54, 64431.58, 74107.86, 60530.34,
90308.16, 29373.02, 19126.01, 88645.09, 50333.95, 87705.75,
18919.36, 75810.31, 72449.89, 94372.48, 54764.66, 71174.39,
38890.51, 10087.31, 92730.21, 28323.25, 59057.32, 11036.06,
84050.7, 31796.37, 78285.13, 26750.82, 21864.53, 51679.68,
26895.06, 18116.83, 51857.61, 56278.29, 12915.69, 25636.76,
71793.53, 96140.99, 10014.08, 76322.27, 94796.64, 81863.47,
30829.23, 64957.95, 95335.55, 95373.27, 33997.92, 26247.41,
16545.39, 32216.81, 51012.52, 92396.85, 51095.97, 25762.13,
4646.09, 41785.63, 85400.15, 34723.07, 13144.23, 37448.69,
63142.02, 39007.89, 68962.78, 68941.34, 55490.06, 42962.44,
45272.01, 30644.33, 57835.39, 91037.03, 14260.41, 41504.76,
21092.58, 42875.04, 13269, 46009.64, 94295.71, 76197.39,
93290.98, 47067.85, 60358.81, 48498.97, 10880.63, 24772.68,
49851.45, 37286.67, 93469.14, 52398.61, 31714.47, 27796.6,
78754.05, 70246.25, 16502.76, 6445.75, 75470.56, 62041, 16957.68,
6221.41, 10902.93, 38171.64, 16931.09, 29865.25, 19220.95,
25717, 18123.18, 47731.37, 77073.7, 2778.71, 52731.08, 88031.91,
37306.34, 4795.91, 13862.82, 32149.21, 15483.16, 13222.82,
22130.59, 22638.08, 13141.65, 98156.35, 32701.37, 50693.95,
68144.25, 9916.91, 11890.26, 5043.97, 92925.39)), class = "data.frame", row.names = c(NA,
-300L))
Currently I am using this code:
groups <- 5
subgroups <- 3
set.seed(30)
result.df <- df %>%
mutate(group = ntile(x = value, n = groups),
subgroup = replicate(nrow(df),
sample(1:subgroups, 1, replace = T))
)
But when I check the distribution of the subgroups, I find that it is not as equal as it could be.
table(result.df$group, result.df$subgroup)
# Returns:
# 1 2 3
# 1 21 22 17
# 2 22 20 18
# 3 24 17 19
# 4 25 15 20
# 5 23 19 18
As there are 300 rows, 5 groups, and 3 subgroups, in this case I would like there to be 20 rows assigned to each group. How can this be achieved, in a way that is consistently "random" but also achieves an as equal as possible subgroup size, regardless of the number of quantiles and the number of subgroups?
Thank you.
Here is one way :
First shuffle the data set, then create group and sort by group. Finally create subgroup as row number modulo number of subgroups :
result.df <- sample_frac(df) %>%
mutate(group = ntile(x = value, n = groups)) %>%
arrange(group) %>%
mutate(subgroup = row_number(group) %% subgroups)
table(result.df$group, result.df$subgroup)
0 1 2
1 20 20 20
2 20 20 20
3 20 20 20
4 20 20 20
5 20 20 20
The best approach is to systematically assign the random groupings.
First group by quantiles (as you done), then use the dplyr's group_by function to create 5 sets of 60 variables, then assign the each of the subgroups to 20 elements in each parent group. (Probably does not describe the process well)
groups <- 5
subgroups <- 3
result.df <- df %>%
mutate(group = ntile(x = value, n = groups)) %>%
group_by(group) %>%
mutate(subgroup = sample(rep(1:3, ceiling(n()/subgroups)), n(), replace = F))
table(result.df$group, result.df$subgroup)
1 2 3
1 20 20 20
2 20 20 20
3 20 20 20
4 20 20 20
5 20 20 20
My task is to plot cumulative distribution function of asymptotic Kernels. For this purpose, i prepared following R-code for cdf of Log-normal kernel; but my problem is that cdf graph is moving downward. After I used transformation provided by author of kernel (Lognormal kernel ).
Kindly provide suggestions/correction in this problem.
R code:
k <- 200
y <- c(306, 455, 210, 883, 310, 361, 218, 166, 170, 654, 728, 71, 567, 144, 613, 707, 61, 88, 301, 81, 624, 371, 394, 520, 574, 118, 390, 12, 473, 26, 533, 107, 53, 122, 814, 93,731, 460, 153, 433, 145, 583, 95, 303, 519, 643, 765, 735, 189, 53, 246, 689, 65, 5,132, 687, 345, 444, 223, 175, 60, 163, 65, 208, 428, 230, 305, 11, 132, 226, 426, 705,363, 11, 176, 791, 95, 167, 284, 641, 147, 163, 655, 239, 88, 245, 30, 179, 310, 477,166, 450, 364, 107, 177, 156, 11, 429, 351, 15, 181, 283, 201, 524, 13, 212, 524, 288, 363, 442, 199, 550, 54, 558, 207, 92, 60, 293, 202, 353, 267, 371, 387, 457, 337, 201, 222, 62, 353, 163, 31, 340, 229, 182, 156, 329, 291, 179, 268, 142, 194, 320, 181, 285,348, 197, 180, 186, 145, 350, 285, 110, 286, 270, 81, 131, 269, 135, 79, 59, 105, 239, 13, 183, 116)
n <- length(y)
h <- 0.79 * IQR(y) * length(y) ^ (-1/5)
x <- seq(min(y) + 0.05, max(y), length = k)
Fhat <- rep(0, k)
for (j in 1:k) {
for (i in 1:n) {
PhiLN <- matrix(rep(0, k * n), ncol = k)
PhiLN[i, j] <- pnorm((log(y[i])-log(x[j]))/(sqrt(4*log(1+h))))
}
Fhat[j] <- 1/n * (sum(PhiLN[, j]))
}
plot(x, Fhat, type = "l")
I would like to display months (in abbreviated form) along the horizontal axis, with the corresponding year printed once. I know how to display month-year:
The un-needed repetition of the year clutters the labels. Instead I would like something like this:
except that the year would be printed below the months.
I printed the year above the axis labels, because that's the best I could do. This follows a limitation of the annotate() function, which gets clipped if it lies outside of the plot area. I am aware of possible workarounds based on annotate_custom(), but I couldn't make them to work with date objects (I did not try to convert dates to numbers and back to dates again, as it seemed more complicated than hopefully necessary)
I'm wondering if the new dup_axis() could be hijacked for this purpose. If instead of sending the duplicated axis to the opposite side of the panel, it could send it a few lines below the duplicated axis, then perhaps it would just be a matter of setting up one axis with panel.grid.major blanked out and the labels set to %b, while the other axis would have panel.grid.minor blanked out and the labels set to %Y. (an added challenge is that the year labels would be shifted to October instead of January)
These questions are related. However, the annotate_custom() function and textGrob() functions do not play well with dates, as far as I can tell.
how-can-i-add-annotations-below-the-x-axis-in-ggplot2
displaying-text-below-the-plot-generated-by-ggplot2
Data and basic code below:
library("ggplot2")
library("scales")
ggplot(data = df, aes(x = Date, y = value)) + geom_line() +
scale_x_date(date_breaks = "2 month", date_minor_breaks = "1 month", labels = date_format("%b %Y")) +
xlab(NULL)
ggplot(data = df, aes(x = Date, y = value)) + geom_line() +
scale_x_date(date_minor_breaks = "2 month", labels = date_format("%b")) +
annotate(geom = "text", x = as.Date("1719-10-01"), y = 0, label = "1719") +
annotate(geom = "text", x = as.Date("1720-10-01"), y = 0, label = "1720") +
xlab(NULL)
# data
df <- structure(list(Date = structure(c(-91455, -91454, -91453, -91452,
-91451, -91450, -91448, -91447, -91446, -91445, -91444, -91443,
-91441, -91440, -91439, -91438, -91437, -91436, -91434, -91433,
-91431, -91430, -91429, -91427, -91426, -91425, -91424, -91423,
-91422, -91420, -91419, -91418, -91417, -91416, -91415, -91413,
-91412, -91411, -91410, -91409, -91408, -91406, -91405, -91404,
-91403, -91402, -91401, -91399, -91398, -91397, -91396, -91395,
-91394, -91392, -91391, -91390, -91389, -91388, -91387, -91385,
-91384, -91382, -91381, -91380, -91379, -91377, -91376, -91375,
-91374, -91373, -91372, -91371, -91370, -91369, -91368, -91367,
-91366, -91364, -91363, -91362, -91361, -91360, -91359, -91357,
-91356, -91355, -91354, -91353, -91352, -91350, -91349, -91348,
-91347, -91346, -91345, -91343, -91342, -91341, -91340, -91339,
-91338, -91336, -91335, -91334, -91333, -91332, -91331, -91329,
-91328, -91327, -91326, -91325, -91324, -91322, -91321, -91320,
-91319, -91315, -91314, -91313, -91312, -91311, -91310, -91308,
-91307, -91306, -91305, -91304, -91303, -91301, -91300, -91299,
-91298, -91297, -91296, -91294, -91293, -91292, -91291, -91290,
-91289, -91287, -91286, -91285, -91284, -91283, -91282, -91280,
-91279, -91278, -91277, -91276, -91275, -91273, -91272, -91271,
-91270, -91269, -91268, -91266, -91265, -91264, -91263, -91262,
-91261, -91259, -91258, -91257, -91256, -91255, -91254, -91252,
-91251, -91250, -91249, -91248, -91247, -91245, -91244, -91243,
-91242, -91241, -91240, -91238, -91237, -91236, -91235, -91234,
-91233, -91231, -91230, -91229, -91228, -91227, -91226, -91224,
-91223, -91222, -91221, -91220, -91219, -91217, -91216, -91215,
-91214, -91213, -91212, -91210, -91209, -91208, -91207, -91205,
-91201, -91200, -91199, -91198, -91196, -91195, -91194, -91193,
-91192, -91191, -91189, -91188, -91187, -91186, -91185, -91184,
-91182, -91181, -91180, -91179, -91178, -91177, -91175, -91174,
-91173, -91172, -91171, -91170, -91168, -91167, -91166, -91165,
-91164, -91163, -91161, -91160, -91159, -91158, -91157, -91156,
-91154, -91153, -91152, -91151, -91150, -91149, -91147, -91146,
-91145, -91144, -91143, -91142, -91140, -91139, -91138, -91131,
-91130, -91129, -91128, -91126, -91125, -91124, -91123, -91122,
-91121, -91119, -91118, -91117, -91116, -91115, -91114, -91112,
-91111, -91110, -91109, -91108, -91107, -91104, -91103, -91102,
-91101, -91100, -91099, -91097, -91096, -91095, -91094, -91093,
-91091, -91090, -91089, -91088, -91087, -91086, -91084, -91083,
-91082, -91081, -91080, -91079, -91077, -91076, -91075, -91074,
-91073, -91072, -91070, -91069, -91068, -91065, -91063, -91062,
-91061, -91060, -91059, -91058, -91056, -91055, -91054, -91053,
-91052, -91051, -91049, -91048, -91047, -91046, -91045, -91044,
-91042, -91041, -91040, -91039, -91038, -91037, -91035, -91034,
-91033, -91032, -91031, -91030, -91028, -91027, -91026, -91025,
-91024, -91023, -91021, -91020, -91019, -91018, -91017, -91016,
-91014, -91013, -91012, -91011, -91010, -91009, -91007, -91006,
-91005, -91004, -91003, -91002, -91000, -90999, -90998, -90997,
-90996, -90995, -90993, -90992, -90991, -90990, -90989, -90988,
-90986, -90985, -90984, -90983, -90982), class = "Date"), value = c(113,
113, 113, 113, 114, 114, 114, 115, 115, 115, 116, 116, 116, 116,
117, 117, 117, 117, 116, 117, 116, 116, 116, 117, 117, 117, 117,
117, 117, 117, 116, 117, 116, 116, 116, 117, 117, 117, 117, 117,
117, 117, 116, 116, 117, 117, 117, 117, 117, 117, 117, 117, 117,
117, 117, 118, 118, 118, 118, 117, 118, 117, 117, 117, 117, 117,
117, 118, 116, 116, 116, 116, 116, 116, 116, 117, 117, 118, 118,
118, 118, 118, 119, 120, 120, 119, 119, 120, 120, 121, 121, 122,
124, 124, 122, 123, 124, 123, 123, 123, 123, 123, 124, 124, 126,
126, 126, 126, 126, 125, 125, 126, 127, 126, 126, 125, 126, 126,
126, 128, 128, 128, 130, 133, 131, 133, 134, 134, 134, 136, 136,
136, 135, 135, 135, 136, 136, 136, 136, 135, 135, 135, 135, 130,
129, 129, 130, 131, 136, 138, 155, 157, 161, 170, 174, 168, 165,
169, 171, 181, 184, 182, 179, 181, 179, 175, 177, 177, 174, 170,
174, 173, 178, 173, 178, 179, 182, 184, 184, 180, 181, 182, 182,
184, 184, 188, 195, 198, 220, 255, 275, 350, 310, 315, 320, 320,
316, 300, 310, 310, 320, 317, 313, 312, 310, 297, 285, 285, 286,
288, 315, 328, 338, 344, 345, 352, 352, 342, 335, 343, 340, 342,
339, 337, 336, 336, 342, 347, 352, 352, 351, 352, 352, 351, 352,
352, 355, 375, 400, 452, 487, 476, 475, 473, 485, 500, 530, 595,
720, 720, 770, 750, 770, 750, 735, 740, 745, 735, 700, 700, 750,
760, 755, 755, 760, 760, 765, 950, 950, 950, 875, 875, 875, 880,
880, 880, 900, 900, 900, 880, 880, 890, 895, 890, 880, 870, 870,
870, 870, 870, 860, 860, 860, 860, 850, 840, 810, 820, 810, 810,
805, 810, 805, 820, 815, 820, 805, 790, 800, 780, 760, 765, 750,
740, 820, 810, 800, 800, 775, 750, 810, 750, 740, 700, 705, 660,
630, 640, 595, 590, 570, 565, 535, 440, 400, 410, 400, 405, 390,
370, 300, 300, 180, 200, 310, 290, 260, 260, 275, 260, 270, 265,
255, 250, 210, 210, 200, 195, 210, 215, 240, 240, 220, 220, 220,
220, 210, 212, 208, 220, 210, 212, 208, 220, 215, 220, 214, 214,
213, 212, 210, 210, 195, 195, 160, 160, 175, 205, 210, 208, 197,
181, 185)), .Names = c("Date", "value"), row.names = c(NA, 393L
), class = "data.frame")
The code below provides two potential options for adding year labels.
Option 1a: Faceting
You could use faceting to mark the years. For example:
library(ggplot2)
library(lubridate)
ggplot(df, aes(Date, value)) +
geom_line() +
scale_x_date(date_labels="%b", date_breaks="month", expand=c(0,0)) +
facet_grid(~ year(Date), space="free_x", scales="free_x", switch="x") +
theme_bw() +
theme(strip.placement = "outside",
strip.background = element_rect(fill=NA,colour="grey50"),
panel.spacing=unit(0,"cm"))
Note that with this approach, if there are missing dates at the beginning or end of a year (by "missing", I mean rows for those dates are not even present in the data) then the x-axis will start/end at the first/last date in the data for that year, rather than go from Jan-1 to Dec-31. In that case, you'd need to add in rows for the missing dates and either NA for value or interpolate value. In addition, with this method there is no space or line between December 31 of one year and January 1 of the next year, so there's a discontinuity across each year.
Option 1b: Faceting + centered month labels
To address #AF7's comment. You can center the month labels by adding some spaces before each label. But you have to choose the number of spaces manually, depending on the physical size of the plot when you print it to a device. (There's probably a way to center the labels programmatically based on the internal grob measurements, but I'm not sure how to do it.) I've also removed the minor vertical gridlines and lightened the line between years.
ggplot(df, aes(Date, value)) +
geom_line() +
scale_x_date(date_labels=paste(c(rep(" ",11), "%b"), collapse=""),
date_breaks="month", expand=c(0,0)) +
facet_grid(~ year(Date), space="free_x", scales="free_x", switch="x") +
theme_bw() +
theme(strip.placement = "outside",
strip.background = element_blank(),
panel.grid.minor.x = element_blank(),
panel.border = element_rect(colour="grey70"),
panel.spacing=unit(0,"cm"))
Option 2a: Edit the x-axis label grob
Here's a more complex and finicky method (though it could likely be automated by someone who understands the structure and unit spacings of grid graphics better than I do) that avoids the pitfalls of the faceting method described above:
library(grid)
# Fake data with an extra year added for illustration
set.seed(2)
df = data.frame(Date=seq(as.Date("1718-03-01"),as.Date("1721-09-20"), by="1 day"))
df$value = cumsum(rnorm(nrow(df)))
# The plot we'll start with
p = ggplot(df, aes(Date, value)) +
geom_vline(xintercept=as.numeric(df$Date[yday(df$Date)==1]), colour="grey60") +
geom_line() +
scale_x_date(date_labels="%b", date_breaks="month", expand=c(0,0)) +
theme_bw() +
theme(panel.grid.minor.x = element_blank()) +
labs(x="")
Now we want to add the year values below and in between June and July of each year. The code below does that by modifying the x-axis label grob and is adapted from this SO answer by #SandyMuspratt.
# Get the grob
g <- ggplotGrob(p)
# Get the y axis
index <- which(g$layout$name == "axis-b") # Which grob
xaxis <- g$grobs[[index]]
# Get the ticks (labels and marks)
ticks <- xaxis$children[[2]]
# Get the labels
ticksB <- ticks$grobs[[2]]
# Edit x-axis label grob
# Find every index of Jun in the x-axis labels and add a newline and
# then a year label
junes = which(ticksB$children[[1]]$label == "Jun")
ticksB$children[[1]]$label[junes] = paste0(ticksB$children[[1]]$label[junes],
"\n ", unique(year(df$Date)))
# Put the edited labels back into the plot
ticks$grobs[[2]] <- ticksB
xaxis$children[[2]] <- ticks
g$grobs[[index]] <- xaxis
# Draw the plot
grid.newpage()
grid.draw(g)
Option 2b: Edit the x-axis label grob and center the month labels
Below is the only change that needs to be made to Option 2a to center the month labels, but, once again, the number of spaces needs to be tweaked manually.
# Make the edit
# Center the month labels between ticks
ticksB$children[[1]]$label = paste0(paste(rep(" ",7),collapse=""), ticksB$children[[1]]$label)
# Find every index of Jun in the x-axis labels and a year label
junes = grep("Jun", ticksB$children[[1]]$label)
ticksB$children[[1]]$label[junes] = paste0(ticksB$children[[1]]$label[junes], "\n ", unique(year(df$Date)))
I came upon this question and thought maybe I can add a solution. We can display both month and year in every year's first displayed month by using a simple condition. You can play with the date_breaks to remove January from the labels, and this will still work. I'm using month() and year() from lubridate.
library(tidyverse)
library(lubridate)
df %>%
ggplot(aes(Date, value)) +
geom_line() +
scale_x_date(date_breaks = "2 months",
labels = function(x) if_else(is.na(lag(x)) | !year(lag(x)) == year(x),
paste(month(x, label = TRUE), "\n", year(x)),
paste(month(x, label = TRUE))))
If you want to try to hack together a sub-label, you could convert it to a grob. I edited this from the original post to create a function that adds the sublabels and returns a gtable object. Note that the sublabs input must be the same length as your x-axis breaks:
library(grid)
library(gtable)
library(gridExtra)
add_sublabs <- function(plot, sublabs){
gg <- ggplotGrob(plot)
axis_num <- which(gg$layout[,"name"] == "axis-b")
xbreaks <- gg[["grobs"]][[axis_num]][["children"]][[2]][["grobs"]][[2]][["children"]][[1]]$x
if(length(xbreaks) != length(sublabs)) stop("Sub-labels must be the same length as the x-axis breaks")
to_breaks <- c(as.numeric(xbreaks),1)[which(!duplicated(sublabs, fromLast = TRUE))+1]
sublabs_x <- diff(c(0,to_breaks))
sublabs_labels <- sublabs[!duplicated(sublabs, fromLast = TRUE)]
tg <- tableGrob(matrix(sublabs_labels, nrow = 1))
tg$widths = unit(sublabs_x, attr(xbreaks,"unit"))
pos <- gg$layout[axis_num,c("t","l")]
gg2 <- gtable_add_rows(gg, heights = sum(tg$heights)+unit(4,"mm"), pos = pos$t)
gg3 <- gtable_add_grob(gg2, tg, t = pos$t+1, l = pos$l)
return(gg3)
}
#Plot and sublabels
p <- ggplot(data = df, aes(x = Date, y = value)) + geom_line() +
scale_x_date(date_breaks = "2 month", date_minor_breaks = "1 month", labels = date_format("%b")) +
xlab(NULL)
sublabs <- c(rep("1719",2),rep("1720",6))
#Draw
grid.draw(add_sublabs(p, sublabs))
One way to avoid the complexities would be to change the required output so that January is replaced by the year.
The lab function returns the labels given the breaks. Unexpectedly, ggplot will pass NAs to it so in the first line of the function body we replace those with some date -- it does not matter which date since such values are not subsequently used by ggplot. Finally we format the date as a year or abbreviated month depending on whether the month is January (which corresponds to the POSIXlt component mon equalling 0) or not.
library(ggplot2)
library(scales)
lab <- function(b) {
b[is.na(b)] <- Sys.Date()
format(b, ifelse(as.POSIXlt(b)$mon == 0, "%Y", "%b"))
}
ggplot(df, aes(Date, value)) +
geom_line() +
scale_x_date(date_breaks = "month", labels = lab)
Note: I have added Issue 2182 to the ggplot2 github issues list regarding the NAs that are passed to the label function. If subsequent versions of ggplot2 no longer pass the NAs then the first line of the body of lab could be omitted .
Update: fixed.
I would like to display months (in abbreviated form) along the horizontal axis, with the corresponding year printed once. I know how to display month-year:
The un-needed repetition of the year clutters the labels. Instead I would like something like this:
except that the year would be printed below the months.
I printed the year above the axis labels, because that's the best I could do. This follows a limitation of the annotate() function, which gets clipped if it lies outside of the plot area. I am aware of possible workarounds based on annotate_custom(), but I couldn't make them to work with date objects (I did not try to convert dates to numbers and back to dates again, as it seemed more complicated than hopefully necessary)
I'm wondering if the new dup_axis() could be hijacked for this purpose. If instead of sending the duplicated axis to the opposite side of the panel, it could send it a few lines below the duplicated axis, then perhaps it would just be a matter of setting up one axis with panel.grid.major blanked out and the labels set to %b, while the other axis would have panel.grid.minor blanked out and the labels set to %Y. (an added challenge is that the year labels would be shifted to October instead of January)
These questions are related. However, the annotate_custom() function and textGrob() functions do not play well with dates, as far as I can tell.
how-can-i-add-annotations-below-the-x-axis-in-ggplot2
displaying-text-below-the-plot-generated-by-ggplot2
Data and basic code below:
library("ggplot2")
library("scales")
ggplot(data = df, aes(x = Date, y = value)) + geom_line() +
scale_x_date(date_breaks = "2 month", date_minor_breaks = "1 month", labels = date_format("%b %Y")) +
xlab(NULL)
ggplot(data = df, aes(x = Date, y = value)) + geom_line() +
scale_x_date(date_minor_breaks = "2 month", labels = date_format("%b")) +
annotate(geom = "text", x = as.Date("1719-10-01"), y = 0, label = "1719") +
annotate(geom = "text", x = as.Date("1720-10-01"), y = 0, label = "1720") +
xlab(NULL)
# data
df <- structure(list(Date = structure(c(-91455, -91454, -91453, -91452,
-91451, -91450, -91448, -91447, -91446, -91445, -91444, -91443,
-91441, -91440, -91439, -91438, -91437, -91436, -91434, -91433,
-91431, -91430, -91429, -91427, -91426, -91425, -91424, -91423,
-91422, -91420, -91419, -91418, -91417, -91416, -91415, -91413,
-91412, -91411, -91410, -91409, -91408, -91406, -91405, -91404,
-91403, -91402, -91401, -91399, -91398, -91397, -91396, -91395,
-91394, -91392, -91391, -91390, -91389, -91388, -91387, -91385,
-91384, -91382, -91381, -91380, -91379, -91377, -91376, -91375,
-91374, -91373, -91372, -91371, -91370, -91369, -91368, -91367,
-91366, -91364, -91363, -91362, -91361, -91360, -91359, -91357,
-91356, -91355, -91354, -91353, -91352, -91350, -91349, -91348,
-91347, -91346, -91345, -91343, -91342, -91341, -91340, -91339,
-91338, -91336, -91335, -91334, -91333, -91332, -91331, -91329,
-91328, -91327, -91326, -91325, -91324, -91322, -91321, -91320,
-91319, -91315, -91314, -91313, -91312, -91311, -91310, -91308,
-91307, -91306, -91305, -91304, -91303, -91301, -91300, -91299,
-91298, -91297, -91296, -91294, -91293, -91292, -91291, -91290,
-91289, -91287, -91286, -91285, -91284, -91283, -91282, -91280,
-91279, -91278, -91277, -91276, -91275, -91273, -91272, -91271,
-91270, -91269, -91268, -91266, -91265, -91264, -91263, -91262,
-91261, -91259, -91258, -91257, -91256, -91255, -91254, -91252,
-91251, -91250, -91249, -91248, -91247, -91245, -91244, -91243,
-91242, -91241, -91240, -91238, -91237, -91236, -91235, -91234,
-91233, -91231, -91230, -91229, -91228, -91227, -91226, -91224,
-91223, -91222, -91221, -91220, -91219, -91217, -91216, -91215,
-91214, -91213, -91212, -91210, -91209, -91208, -91207, -91205,
-91201, -91200, -91199, -91198, -91196, -91195, -91194, -91193,
-91192, -91191, -91189, -91188, -91187, -91186, -91185, -91184,
-91182, -91181, -91180, -91179, -91178, -91177, -91175, -91174,
-91173, -91172, -91171, -91170, -91168, -91167, -91166, -91165,
-91164, -91163, -91161, -91160, -91159, -91158, -91157, -91156,
-91154, -91153, -91152, -91151, -91150, -91149, -91147, -91146,
-91145, -91144, -91143, -91142, -91140, -91139, -91138, -91131,
-91130, -91129, -91128, -91126, -91125, -91124, -91123, -91122,
-91121, -91119, -91118, -91117, -91116, -91115, -91114, -91112,
-91111, -91110, -91109, -91108, -91107, -91104, -91103, -91102,
-91101, -91100, -91099, -91097, -91096, -91095, -91094, -91093,
-91091, -91090, -91089, -91088, -91087, -91086, -91084, -91083,
-91082, -91081, -91080, -91079, -91077, -91076, -91075, -91074,
-91073, -91072, -91070, -91069, -91068, -91065, -91063, -91062,
-91061, -91060, -91059, -91058, -91056, -91055, -91054, -91053,
-91052, -91051, -91049, -91048, -91047, -91046, -91045, -91044,
-91042, -91041, -91040, -91039, -91038, -91037, -91035, -91034,
-91033, -91032, -91031, -91030, -91028, -91027, -91026, -91025,
-91024, -91023, -91021, -91020, -91019, -91018, -91017, -91016,
-91014, -91013, -91012, -91011, -91010, -91009, -91007, -91006,
-91005, -91004, -91003, -91002, -91000, -90999, -90998, -90997,
-90996, -90995, -90993, -90992, -90991, -90990, -90989, -90988,
-90986, -90985, -90984, -90983, -90982), class = "Date"), value = c(113,
113, 113, 113, 114, 114, 114, 115, 115, 115, 116, 116, 116, 116,
117, 117, 117, 117, 116, 117, 116, 116, 116, 117, 117, 117, 117,
117, 117, 117, 116, 117, 116, 116, 116, 117, 117, 117, 117, 117,
117, 117, 116, 116, 117, 117, 117, 117, 117, 117, 117, 117, 117,
117, 117, 118, 118, 118, 118, 117, 118, 117, 117, 117, 117, 117,
117, 118, 116, 116, 116, 116, 116, 116, 116, 117, 117, 118, 118,
118, 118, 118, 119, 120, 120, 119, 119, 120, 120, 121, 121, 122,
124, 124, 122, 123, 124, 123, 123, 123, 123, 123, 124, 124, 126,
126, 126, 126, 126, 125, 125, 126, 127, 126, 126, 125, 126, 126,
126, 128, 128, 128, 130, 133, 131, 133, 134, 134, 134, 136, 136,
136, 135, 135, 135, 136, 136, 136, 136, 135, 135, 135, 135, 130,
129, 129, 130, 131, 136, 138, 155, 157, 161, 170, 174, 168, 165,
169, 171, 181, 184, 182, 179, 181, 179, 175, 177, 177, 174, 170,
174, 173, 178, 173, 178, 179, 182, 184, 184, 180, 181, 182, 182,
184, 184, 188, 195, 198, 220, 255, 275, 350, 310, 315, 320, 320,
316, 300, 310, 310, 320, 317, 313, 312, 310, 297, 285, 285, 286,
288, 315, 328, 338, 344, 345, 352, 352, 342, 335, 343, 340, 342,
339, 337, 336, 336, 342, 347, 352, 352, 351, 352, 352, 351, 352,
352, 355, 375, 400, 452, 487, 476, 475, 473, 485, 500, 530, 595,
720, 720, 770, 750, 770, 750, 735, 740, 745, 735, 700, 700, 750,
760, 755, 755, 760, 760, 765, 950, 950, 950, 875, 875, 875, 880,
880, 880, 900, 900, 900, 880, 880, 890, 895, 890, 880, 870, 870,
870, 870, 870, 860, 860, 860, 860, 850, 840, 810, 820, 810, 810,
805, 810, 805, 820, 815, 820, 805, 790, 800, 780, 760, 765, 750,
740, 820, 810, 800, 800, 775, 750, 810, 750, 740, 700, 705, 660,
630, 640, 595, 590, 570, 565, 535, 440, 400, 410, 400, 405, 390,
370, 300, 300, 180, 200, 310, 290, 260, 260, 275, 260, 270, 265,
255, 250, 210, 210, 200, 195, 210, 215, 240, 240, 220, 220, 220,
220, 210, 212, 208, 220, 210, 212, 208, 220, 215, 220, 214, 214,
213, 212, 210, 210, 195, 195, 160, 160, 175, 205, 210, 208, 197,
181, 185)), .Names = c("Date", "value"), row.names = c(NA, 393L
), class = "data.frame")
The code below provides two potential options for adding year labels.
Option 1a: Faceting
You could use faceting to mark the years. For example:
library(ggplot2)
library(lubridate)
ggplot(df, aes(Date, value)) +
geom_line() +
scale_x_date(date_labels="%b", date_breaks="month", expand=c(0,0)) +
facet_grid(~ year(Date), space="free_x", scales="free_x", switch="x") +
theme_bw() +
theme(strip.placement = "outside",
strip.background = element_rect(fill=NA,colour="grey50"),
panel.spacing=unit(0,"cm"))
Note that with this approach, if there are missing dates at the beginning or end of a year (by "missing", I mean rows for those dates are not even present in the data) then the x-axis will start/end at the first/last date in the data for that year, rather than go from Jan-1 to Dec-31. In that case, you'd need to add in rows for the missing dates and either NA for value or interpolate value. In addition, with this method there is no space or line between December 31 of one year and January 1 of the next year, so there's a discontinuity across each year.
Option 1b: Faceting + centered month labels
To address #AF7's comment. You can center the month labels by adding some spaces before each label. But you have to choose the number of spaces manually, depending on the physical size of the plot when you print it to a device. (There's probably a way to center the labels programmatically based on the internal grob measurements, but I'm not sure how to do it.) I've also removed the minor vertical gridlines and lightened the line between years.
ggplot(df, aes(Date, value)) +
geom_line() +
scale_x_date(date_labels=paste(c(rep(" ",11), "%b"), collapse=""),
date_breaks="month", expand=c(0,0)) +
facet_grid(~ year(Date), space="free_x", scales="free_x", switch="x") +
theme_bw() +
theme(strip.placement = "outside",
strip.background = element_blank(),
panel.grid.minor.x = element_blank(),
panel.border = element_rect(colour="grey70"),
panel.spacing=unit(0,"cm"))
Option 2a: Edit the x-axis label grob
Here's a more complex and finicky method (though it could likely be automated by someone who understands the structure and unit spacings of grid graphics better than I do) that avoids the pitfalls of the faceting method described above:
library(grid)
# Fake data with an extra year added for illustration
set.seed(2)
df = data.frame(Date=seq(as.Date("1718-03-01"),as.Date("1721-09-20"), by="1 day"))
df$value = cumsum(rnorm(nrow(df)))
# The plot we'll start with
p = ggplot(df, aes(Date, value)) +
geom_vline(xintercept=as.numeric(df$Date[yday(df$Date)==1]), colour="grey60") +
geom_line() +
scale_x_date(date_labels="%b", date_breaks="month", expand=c(0,0)) +
theme_bw() +
theme(panel.grid.minor.x = element_blank()) +
labs(x="")
Now we want to add the year values below and in between June and July of each year. The code below does that by modifying the x-axis label grob and is adapted from this SO answer by #SandyMuspratt.
# Get the grob
g <- ggplotGrob(p)
# Get the y axis
index <- which(g$layout$name == "axis-b") # Which grob
xaxis <- g$grobs[[index]]
# Get the ticks (labels and marks)
ticks <- xaxis$children[[2]]
# Get the labels
ticksB <- ticks$grobs[[2]]
# Edit x-axis label grob
# Find every index of Jun in the x-axis labels and add a newline and
# then a year label
junes = which(ticksB$children[[1]]$label == "Jun")
ticksB$children[[1]]$label[junes] = paste0(ticksB$children[[1]]$label[junes],
"\n ", unique(year(df$Date)))
# Put the edited labels back into the plot
ticks$grobs[[2]] <- ticksB
xaxis$children[[2]] <- ticks
g$grobs[[index]] <- xaxis
# Draw the plot
grid.newpage()
grid.draw(g)
Option 2b: Edit the x-axis label grob and center the month labels
Below is the only change that needs to be made to Option 2a to center the month labels, but, once again, the number of spaces needs to be tweaked manually.
# Make the edit
# Center the month labels between ticks
ticksB$children[[1]]$label = paste0(paste(rep(" ",7),collapse=""), ticksB$children[[1]]$label)
# Find every index of Jun in the x-axis labels and a year label
junes = grep("Jun", ticksB$children[[1]]$label)
ticksB$children[[1]]$label[junes] = paste0(ticksB$children[[1]]$label[junes], "\n ", unique(year(df$Date)))
I came upon this question and thought maybe I can add a solution. We can display both month and year in every year's first displayed month by using a simple condition. You can play with the date_breaks to remove January from the labels, and this will still work. I'm using month() and year() from lubridate.
library(tidyverse)
library(lubridate)
df %>%
ggplot(aes(Date, value)) +
geom_line() +
scale_x_date(date_breaks = "2 months",
labels = function(x) if_else(is.na(lag(x)) | !year(lag(x)) == year(x),
paste(month(x, label = TRUE), "\n", year(x)),
paste(month(x, label = TRUE))))
If you want to try to hack together a sub-label, you could convert it to a grob. I edited this from the original post to create a function that adds the sublabels and returns a gtable object. Note that the sublabs input must be the same length as your x-axis breaks:
library(grid)
library(gtable)
library(gridExtra)
add_sublabs <- function(plot, sublabs){
gg <- ggplotGrob(plot)
axis_num <- which(gg$layout[,"name"] == "axis-b")
xbreaks <- gg[["grobs"]][[axis_num]][["children"]][[2]][["grobs"]][[2]][["children"]][[1]]$x
if(length(xbreaks) != length(sublabs)) stop("Sub-labels must be the same length as the x-axis breaks")
to_breaks <- c(as.numeric(xbreaks),1)[which(!duplicated(sublabs, fromLast = TRUE))+1]
sublabs_x <- diff(c(0,to_breaks))
sublabs_labels <- sublabs[!duplicated(sublabs, fromLast = TRUE)]
tg <- tableGrob(matrix(sublabs_labels, nrow = 1))
tg$widths = unit(sublabs_x, attr(xbreaks,"unit"))
pos <- gg$layout[axis_num,c("t","l")]
gg2 <- gtable_add_rows(gg, heights = sum(tg$heights)+unit(4,"mm"), pos = pos$t)
gg3 <- gtable_add_grob(gg2, tg, t = pos$t+1, l = pos$l)
return(gg3)
}
#Plot and sublabels
p <- ggplot(data = df, aes(x = Date, y = value)) + geom_line() +
scale_x_date(date_breaks = "2 month", date_minor_breaks = "1 month", labels = date_format("%b")) +
xlab(NULL)
sublabs <- c(rep("1719",2),rep("1720",6))
#Draw
grid.draw(add_sublabs(p, sublabs))
One way to avoid the complexities would be to change the required output so that January is replaced by the year.
The lab function returns the labels given the breaks. Unexpectedly, ggplot will pass NAs to it so in the first line of the function body we replace those with some date -- it does not matter which date since such values are not subsequently used by ggplot. Finally we format the date as a year or abbreviated month depending on whether the month is January (which corresponds to the POSIXlt component mon equalling 0) or not.
library(ggplot2)
library(scales)
lab <- function(b) {
b[is.na(b)] <- Sys.Date()
format(b, ifelse(as.POSIXlt(b)$mon == 0, "%Y", "%b"))
}
ggplot(df, aes(Date, value)) +
geom_line() +
scale_x_date(date_breaks = "month", labels = lab)
Note: I have added Issue 2182 to the ggplot2 github issues list regarding the NAs that are passed to the label function. If subsequent versions of ggplot2 no longer pass the NAs then the first line of the body of lab could be omitted .
Update: fixed.