Adding Consecutive Arrows to geom_point() in ggplot2 - r

I want to add a series of arrows connecting each observation in geom_point as in the graph:
I understand that geom_segment is meant to be used, but I am having issues, and have not found something quite like this after quite a bit of searching.
This is sample code that should satisfy the pattern:
Note: The labels are not important ; just the arrows
df <- data.frame(year = c(1935:1968),
y_axis_values = c( 2755,2696, 2646, 2701, 2654, 2766, 2832, 2964, 3041, 3010, 3018, 3374, 3545, 3441, 3456, 3455, 3503, 3641, 3721, 3828, 3831, 3858, 3925, 3880, 3935, 3895, 3840, 3756, 3669, 3502, 3145, 2812, 2586,2441),
x_axis_values = c(238, 240, 241, 242, 244, 245, 246, 268, 333, 335, 331, 253, 243, 241, 242, 237, 242, 240, 233, 232, 236, 245, 256, 261, 265, 278, 291, 290, 290, 307, 313, 325, 339, 338)
I have tried the general formula with many different argument variations, but cannot seem to find it.
ggplot(df, aes(x = x_axis_values, y = y_axis_values) +
geom_point() +
geom_segment()

You need the xend and yend values for each segment. Since your data frame is in order, the xend and yend value for each segment is just the next row's x and y values. You can get these by using dplyr::lead on the x and y aesthetics.
library(ggplot2)
library(dplyr)
ggplot(df, aes(x = x_axis_values, y = y_axis_values)) +
geom_point(color = "#69b3a2") +
geom_segment(aes(xend = after_stat(lead(x)), yend = after_stat(lead(y))),
arrow = arrow(length = unit(3, "mm")), color = "#69b3a2") +
geom_text(aes(label = year), size = 5, fontface = 2,
data = . %>% filter(year %in% c(1935, 1937, 1939, 1942, 1945, 1946,
1953, 1957, 1960, 1961)),
nudge_x = c(-3, -2, 4, 0, 0, -2, -5, 0, 3, 5),
nudge_y = c(30, -30, 10, -30, -40, -40, 0, -50, 30, 0)) +
labs(x = "partic", y = "tfr") +
theme_bw(base_size = 16)

Related

Create bar plot in ggplot2 - Place data frame values instead of count

I'd like to place this data onto a bar plot using ggplot2
where the column "Clades" would be placed on the X axis and the values from each column (such as the values of 19A, for example) would be place on Y axis
I'm trying something like this:
cols = as.vector(names(snv_data)[2:19])
ggplot(df, aes(x=cols)) + geom_bar()
But I keep getting this:
I'm new to ggplot2 so any help is very welcome!
I'm doing this to try and get 7 plots (one for each column such as 19A, 20A, 20B, etc) where each plot would have the Clades on the X-axis and each value from each column as the "counts" on the Y-axis
dput:
structure(list(Clades = c("C.T", "A.G", "G.A", "G.C", "T.C",
"C.A", "G.T", "A.T", "T.A", "T.G", "A.C", "C.G", "A.del", "TAT.del",
"TCTGGTTTT.del", "TACATG.del", "AGTTCA.del", "GATTTC.del"), `19A` = c(413,
93, 21, 0, 49, 9, 238, 13, 3, 1, 0, 4, 1, 0, 0, 0, 0, 0), `20A` = c(7929,
1920, 1100, 419, 1025, 124, 3730, 124, 22, 45, 64, 17, 8, 19,
23, 39, 0, 0), `20B` = c(5283, 1447, 2325, 1106, 336, 117, 946,
137, 35, 53, 123, 11, 9, 10, 21, 1, 0, 0), `20E (EU1)` = c(13086,
1927, 650, 1337, 1864, 96, 2967, 243, 69, 92, 115, 1486, 27,
5, 0, 1, 0, 0), `20I (Alpha, V1)` = c(71142, 12966, 12047, 15587,
14935, 15382, 11270, 12211, 5284, 4273, 430, 99, 5674, 4536,
4974, 4592, 0, 0), `20J (Gamma, V3)` = c(2822, 654, 883, 409,
501, 213, 843, 399, 203, 27, 429, 198, 1, 0, 197, 0, 0, 0), `21J (Delta)` = c(166003,
49195, 26713, 1399, 25824, 15644, 95967, 2011, 329, 11034, 716,
21087, 10532, 198, 0, 14, 9809, 10503)), class = "data.frame", row.names = c("C.T",
"A.G", "G.A", "G.C", "T.C", "C.A", "G.T", "A.T", "T.A", "T.G",
"A.C", "C.G", "A.del", "TAT.del", "TCTGGTTTT.del", "TACATG.del",
"AGTTCA.del", "GATTTC.del"))
To add to the previous answer, here is how you can get 7 plots (1 for each Clade, which is how I interpreted the question) using facet_wrap():
df <- df %>%
pivot_longer(-Clades)
ggplot(data = df,
aes(x = Clades,
y = value)) +
geom_bar(aes(fill = Clades),
stat = 'identity') +
facet_wrap(~name, scales = 'free_y') +
theme(axis.text.x = element_blank())
As cazman said in the comments, you need to get your data in long form for it to work with ggplot2 (efficiently).
First, use pivot_longer(), and then use ggplot2:
library(tidyverse)
dat %>%
pivot_longer(-Clades) %>%
ggplot(aes(x=Clades, y=value, fill=name)) +
geom_col()

Having trouble rearranging multiple GGPLOT2 graphs and moving/resizing scale

I am working with a dataset consisting of different plant genotypes, rates of fertilizer applications, and 5 different measurements. I am using ggplot2 to produce multiple bar graphs, and then using the gridExtra package to combine multiple graphs onto a single page. The trouble I am having involves moving and resizing the scale so that there is only one scale for each of my graphs, and I would like to move it to the lower right corner of the graph. The data/code below should better explain what I mean.
Packages/Dataset
#Open packages
library(dplyr)
library(ggplot2)
library(gridExtra)
#Dataset
plantdata <- data.frame(genotype = c(1,
1,
1,
1,
2,
2,
2,
2,
3,
3,
3,
3,
2,
2,
1,
3,
3,
3,
1,
3,
2,
2,
1,
1,
1,
2,
2,
1,
3,
3,
3,
3,
2,
1,
2,
1),
rate=c(1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
2,
4,
1,
1,
3,
2,
3,
4,
1,
3,
4,
2,
2,
1,
3,
3,
4,
1,
3,
2,
4,
1,
2,
4),
measure1=c(958,
309,
750,
43,
20,
868,
905,
674,
64,
151,
677,
144,
803,
485,
707,
881,
684,
222,
399,
507,
4,
690,
831,
574,
104,
238,
378,
897,
63,
154,
582,
641,
750,
855,
194,
55),
measure2=c(359,
728,
180,
614,
241,
989,
117,
101,
95,
156,
227,
355,
597,
50,
636,
912,
149,
862,
897,
601,
176,
7,
182,
214,
453,
569,
124,
113,
969,
781,
713,
613,
800,
334,
435,
748),
measure3=c(639,
304,
891,
317,
869,
901,
723,
267,
837,
923,
171,
991,
107,
309,
733,
705,
819,
807,
346,
447,
891,
957,
359,
323,
846,
944,
400,
548,
327,
48,
677,
624,
564,
854,
658,
343),
measure4=c(805,
24,
624,
675,
261,
437,
601,
129,
733,
172,
746,
586,
142,
243,
103,
779,
612,
870,
84,
881,
850,
456,
255,
52,
228,
492,
556,
66,
670,
682,
736,
178,
568,
501,
229,
500),
measure5=c(667,
105,
565,
724,
238,
861,
299,
13,
171,
759,
755,
557,
739,
228,
870,
595,
793,
790,
572,
590,
365,
974,
550,
766,
441,
265,
245,
909,
150,
88,
473,
245,
340,
378,
998,
121))
Function for standard error of the mean:
sem <- function(x) sd(x)/sqrt(length(x))
Generating the graphs:
#Measurement 1 graph
meas1 <- select(plantdata, genotype, rate, measure1)
#Aggregating data frame
meas1_mean <- aggregate(meas1, by=list(meas1$genotype, meas1$rate), mean)
meas1_sem <- aggregate(meas1, by=list(meas1$genotype, meas1$rate), sem)
g1 <- ggplot(meas1_mean, aes(x=Group.1, y=measure1, fill=factor(Group.2)))+
geom_bar(stat="identity",width=0.6, position="dodge", col="black")+
scale_fill_discrete(name= 'rate', labels=c("1","2","3","4"))+
xlab("Genotype")+ylab("Measurement")+
geom_errorbar(aes(ymin= meas1_mean$measure1, ymax=meas1_mean$measure1+meas1_sem$measure1), width=0.2, position = position_dodge(0.6))+
ggtitle("Plant Measurement 1")+
scale_fill_brewer(palette='PRGn', name= 'rate', labels=c("1","2","3","4"))+
theme(plot.title = element_text(hjust=0.5))
####################################################################
#Measurement 2 graph
meas2 <- select(plantdata, genotype, rate, measure2)
#Aggregating dataframe
meas2_mean <- aggregate(meas2, by=list(meas2$genotype, meas2$rate), mean)
meas2_sem <- aggregate(meas2, by=list(meas2$genotype, meas2$rate), sem)
#Generating graph
g2 <- ggplot(meas2_mean, aes(x=Group.1, y=measure2, fill=factor(Group.2)))+
geom_bar(stat="identity",width=0.6, position="dodge", col="black")+
scale_fill_discrete(name= 'rate', labels=c("1","2","3","4"))+
xlab("Genotype")+ylab("Measurement")+
geom_errorbar(aes(ymin= meas2_mean$measure2, ymax=meas2_mean$measure2+meas2_sem$measure2), width=0.2, position = position_dodge(0.6))+
ggtitle("Plant Measurement 2")+
scale_fill_brewer(palette='PRGn', name= 'rate', labels=c("1","2","3","4"))+
theme(plot.title = element_text(hjust=0.5))
####################################################################
#Measurement 3 graph
meas3 <- select(plantdata, genotype, rate, measure3)
#Aggregating dataframe
meas3_mean <- aggregate(meas3, by=list(meas3$genotype, meas3$rate), mean)
meas3_sem <- aggregate(meas3, by=list(meas3$genotype, meas3$rate), sem)
#Graph
g3 <- ggplot(meas3_mean, aes(x=Group.1, y=measure3, fill=factor(Group.2)))+
geom_bar(stat="identity",width=0.6, position="dodge", col="black")+
scale_fill_discrete(name= 'rate', labels=c("1","2","3","4"))+
xlab("Genotype")+ylab("Measurement")+
geom_errorbar(aes(ymin= meas3_mean$measure3, ymax=meas3_mean$measure3+meas3_sem$measure3), width=0.2, position = position_dodge(0.6))+
ggtitle("Plant Measurement 3")+
scale_fill_brewer(palette='PRGn', name= 'rate', labels=c("1","2","3","4"))+
theme(plot.title = element_text(hjust=0.5))
##############################################################
#Measurement 4 graph
meas4 <- select(plantdata, genotype, rate, measure4)
#Aggregating dataframe
meas4_mean <- aggregate(meas4, by=list(meas4$genotype, meas4$rate), mean)
meas4_sem <- aggregate(meas4, by=list(meas4$genotype, meas4$rate), sem)
#Graph
g4 <- ggplot(meas4_mean, aes(x=Group.1, y=measure4, fill=factor(Group.2)))+
geom_bar(stat="identity",width=0.6, position="dodge", col="black")+
scale_fill_discrete(name= 'rate', labels=c("1","2","3","4"))+
xlab("Genotype")+ylab("Measurement")+
geom_errorbar(aes(ymin= meas4_mean$measure4, ymax=meas4_mean$measure4+meas4_sem$measure4), width=0.2, position = position_dodge(0.6))+
ggtitle("Plant Measurement 4")+
scale_fill_brewer(palette='PRGn', name= 'rate', labels=c("1","2","3","4"))+
theme(plot.title = element_text(hjust=0.5))
################################################################
#Measurement 5 graph
meas5 <- select(plantdata, genotype, rate, measure5)
#Aggregate dataframe
meas5_mean <- aggregate(meas5, by=list(meas5$genotype, meas5$rate), mean)
meas5_sem <- aggregate(meas5, by=list(meas5$genotype, meas5$rate), sem)
#Graph
g5 <- ggplot(meas5_mean, aes(x=Group.1, y=measure5, fill=factor(Group.2)))+
geom_bar(stat="identity",width=0.6, position="dodge", col="black")+
scale_fill_discrete(name= 'rate', labels=c("1","2","3","4"))+
xlab("Genotype")+ylab("Measurement")+
geom_errorbar(aes(ymin= meas5_mean$measure5, ymax=meas5_mean$measure5+meas5_sem$measure5), width=0.2, position = position_dodge(0.6))+
ggtitle("Plant Measurement 5")+
scale_fill_brewer(palette='PRGn', name= 'rate', labels=c("1","2","3","4"))+
theme(plot.title = element_text(hjust=0.5))
Then, I arranged all the graphs onto one page, as follows:
grid.arrange(g1, g2, g3, g4, g5,
nrow=2, ncol=3)
The output:
Because the requirements for this graph are that it needs to be exported in a smaller format, I would like to re-arrange the scale so that there is only one scale for the whole combined image, and I would like to increase the size of it. See below:
How can I do this? If there is a better way to generate this than the way I did it, I am open to learning how to do it.
Consider the ggpubr package and in there the ggarrange function with the argument common.legend = TRUE.
a <- data.frame(x = rnorm(100), y=rnorm(100), group = gl(5,20))
b <- data.frame(x = rnorm(100), y=rnorm(100), group = gl(5,20))
c <- data.frame(x = rnorm(100), y=rnorm(100), group = gl(5,20))
library(ggplot2)
library(ggpubr)
p1 <- ggplot(a, aes(x = x, y = y, color = group)) + geom_point()
p2 <- ggplot(b, aes(x = x, y = y, color = group)) + geom_point()
p3 <- ggplot(c, aes(x = x, y = y, color = group)) + geom_point()
ggarrange(p1, p2, p3, common.legend = TRUE)
Documentation: https://rpkgs.datanovia.com/ggpubr/reference/ggarrange.html

Multiple groupings in ggplot2 [duplicate]

I would like to display months (in abbreviated form) along the horizontal axis, with the corresponding year printed once. I know how to display month-year:
The un-needed repetition of the year clutters the labels. Instead I would like something like this:
except that the year would be printed below the months.
I printed the year above the axis labels, because that's the best I could do. This follows a limitation of the annotate() function, which gets clipped if it lies outside of the plot area. I am aware of possible workarounds based on annotate_custom(), but I couldn't make them to work with date objects (I did not try to convert dates to numbers and back to dates again, as it seemed more complicated than hopefully necessary)
I'm wondering if the new dup_axis() could be hijacked for this purpose. If instead of sending the duplicated axis to the opposite side of the panel, it could send it a few lines below the duplicated axis, then perhaps it would just be a matter of setting up one axis with panel.grid.major blanked out and the labels set to %b, while the other axis would have panel.grid.minor blanked out and the labels set to %Y. (an added challenge is that the year labels would be shifted to October instead of January)
These questions are related. However, the annotate_custom() function and textGrob() functions do not play well with dates, as far as I can tell.
how-can-i-add-annotations-below-the-x-axis-in-ggplot2
displaying-text-below-the-plot-generated-by-ggplot2
Data and basic code below:
library("ggplot2")
library("scales")
ggplot(data = df, aes(x = Date, y = value)) + geom_line() +
scale_x_date(date_breaks = "2 month", date_minor_breaks = "1 month", labels = date_format("%b %Y")) +
xlab(NULL)
ggplot(data = df, aes(x = Date, y = value)) + geom_line() +
scale_x_date(date_minor_breaks = "2 month", labels = date_format("%b")) +
annotate(geom = "text", x = as.Date("1719-10-01"), y = 0, label = "1719") +
annotate(geom = "text", x = as.Date("1720-10-01"), y = 0, label = "1720") +
xlab(NULL)
# data
df <- structure(list(Date = structure(c(-91455, -91454, -91453, -91452,
-91451, -91450, -91448, -91447, -91446, -91445, -91444, -91443,
-91441, -91440, -91439, -91438, -91437, -91436, -91434, -91433,
-91431, -91430, -91429, -91427, -91426, -91425, -91424, -91423,
-91422, -91420, -91419, -91418, -91417, -91416, -91415, -91413,
-91412, -91411, -91410, -91409, -91408, -91406, -91405, -91404,
-91403, -91402, -91401, -91399, -91398, -91397, -91396, -91395,
-91394, -91392, -91391, -91390, -91389, -91388, -91387, -91385,
-91384, -91382, -91381, -91380, -91379, -91377, -91376, -91375,
-91374, -91373, -91372, -91371, -91370, -91369, -91368, -91367,
-91366, -91364, -91363, -91362, -91361, -91360, -91359, -91357,
-91356, -91355, -91354, -91353, -91352, -91350, -91349, -91348,
-91347, -91346, -91345, -91343, -91342, -91341, -91340, -91339,
-91338, -91336, -91335, -91334, -91333, -91332, -91331, -91329,
-91328, -91327, -91326, -91325, -91324, -91322, -91321, -91320,
-91319, -91315, -91314, -91313, -91312, -91311, -91310, -91308,
-91307, -91306, -91305, -91304, -91303, -91301, -91300, -91299,
-91298, -91297, -91296, -91294, -91293, -91292, -91291, -91290,
-91289, -91287, -91286, -91285, -91284, -91283, -91282, -91280,
-91279, -91278, -91277, -91276, -91275, -91273, -91272, -91271,
-91270, -91269, -91268, -91266, -91265, -91264, -91263, -91262,
-91261, -91259, -91258, -91257, -91256, -91255, -91254, -91252,
-91251, -91250, -91249, -91248, -91247, -91245, -91244, -91243,
-91242, -91241, -91240, -91238, -91237, -91236, -91235, -91234,
-91233, -91231, -91230, -91229, -91228, -91227, -91226, -91224,
-91223, -91222, -91221, -91220, -91219, -91217, -91216, -91215,
-91214, -91213, -91212, -91210, -91209, -91208, -91207, -91205,
-91201, -91200, -91199, -91198, -91196, -91195, -91194, -91193,
-91192, -91191, -91189, -91188, -91187, -91186, -91185, -91184,
-91182, -91181, -91180, -91179, -91178, -91177, -91175, -91174,
-91173, -91172, -91171, -91170, -91168, -91167, -91166, -91165,
-91164, -91163, -91161, -91160, -91159, -91158, -91157, -91156,
-91154, -91153, -91152, -91151, -91150, -91149, -91147, -91146,
-91145, -91144, -91143, -91142, -91140, -91139, -91138, -91131,
-91130, -91129, -91128, -91126, -91125, -91124, -91123, -91122,
-91121, -91119, -91118, -91117, -91116, -91115, -91114, -91112,
-91111, -91110, -91109, -91108, -91107, -91104, -91103, -91102,
-91101, -91100, -91099, -91097, -91096, -91095, -91094, -91093,
-91091, -91090, -91089, -91088, -91087, -91086, -91084, -91083,
-91082, -91081, -91080, -91079, -91077, -91076, -91075, -91074,
-91073, -91072, -91070, -91069, -91068, -91065, -91063, -91062,
-91061, -91060, -91059, -91058, -91056, -91055, -91054, -91053,
-91052, -91051, -91049, -91048, -91047, -91046, -91045, -91044,
-91042, -91041, -91040, -91039, -91038, -91037, -91035, -91034,
-91033, -91032, -91031, -91030, -91028, -91027, -91026, -91025,
-91024, -91023, -91021, -91020, -91019, -91018, -91017, -91016,
-91014, -91013, -91012, -91011, -91010, -91009, -91007, -91006,
-91005, -91004, -91003, -91002, -91000, -90999, -90998, -90997,
-90996, -90995, -90993, -90992, -90991, -90990, -90989, -90988,
-90986, -90985, -90984, -90983, -90982), class = "Date"), value = c(113,
113, 113, 113, 114, 114, 114, 115, 115, 115, 116, 116, 116, 116,
117, 117, 117, 117, 116, 117, 116, 116, 116, 117, 117, 117, 117,
117, 117, 117, 116, 117, 116, 116, 116, 117, 117, 117, 117, 117,
117, 117, 116, 116, 117, 117, 117, 117, 117, 117, 117, 117, 117,
117, 117, 118, 118, 118, 118, 117, 118, 117, 117, 117, 117, 117,
117, 118, 116, 116, 116, 116, 116, 116, 116, 117, 117, 118, 118,
118, 118, 118, 119, 120, 120, 119, 119, 120, 120, 121, 121, 122,
124, 124, 122, 123, 124, 123, 123, 123, 123, 123, 124, 124, 126,
126, 126, 126, 126, 125, 125, 126, 127, 126, 126, 125, 126, 126,
126, 128, 128, 128, 130, 133, 131, 133, 134, 134, 134, 136, 136,
136, 135, 135, 135, 136, 136, 136, 136, 135, 135, 135, 135, 130,
129, 129, 130, 131, 136, 138, 155, 157, 161, 170, 174, 168, 165,
169, 171, 181, 184, 182, 179, 181, 179, 175, 177, 177, 174, 170,
174, 173, 178, 173, 178, 179, 182, 184, 184, 180, 181, 182, 182,
184, 184, 188, 195, 198, 220, 255, 275, 350, 310, 315, 320, 320,
316, 300, 310, 310, 320, 317, 313, 312, 310, 297, 285, 285, 286,
288, 315, 328, 338, 344, 345, 352, 352, 342, 335, 343, 340, 342,
339, 337, 336, 336, 342, 347, 352, 352, 351, 352, 352, 351, 352,
352, 355, 375, 400, 452, 487, 476, 475, 473, 485, 500, 530, 595,
720, 720, 770, 750, 770, 750, 735, 740, 745, 735, 700, 700, 750,
760, 755, 755, 760, 760, 765, 950, 950, 950, 875, 875, 875, 880,
880, 880, 900, 900, 900, 880, 880, 890, 895, 890, 880, 870, 870,
870, 870, 870, 860, 860, 860, 860, 850, 840, 810, 820, 810, 810,
805, 810, 805, 820, 815, 820, 805, 790, 800, 780, 760, 765, 750,
740, 820, 810, 800, 800, 775, 750, 810, 750, 740, 700, 705, 660,
630, 640, 595, 590, 570, 565, 535, 440, 400, 410, 400, 405, 390,
370, 300, 300, 180, 200, 310, 290, 260, 260, 275, 260, 270, 265,
255, 250, 210, 210, 200, 195, 210, 215, 240, 240, 220, 220, 220,
220, 210, 212, 208, 220, 210, 212, 208, 220, 215, 220, 214, 214,
213, 212, 210, 210, 195, 195, 160, 160, 175, 205, 210, 208, 197,
181, 185)), .Names = c("Date", "value"), row.names = c(NA, 393L
), class = "data.frame")
The code below provides two potential options for adding year labels.
Option 1a: Faceting
You could use faceting to mark the years. For example:
library(ggplot2)
library(lubridate)
ggplot(df, aes(Date, value)) +
geom_line() +
scale_x_date(date_labels="%b", date_breaks="month", expand=c(0,0)) +
facet_grid(~ year(Date), space="free_x", scales="free_x", switch="x") +
theme_bw() +
theme(strip.placement = "outside",
strip.background = element_rect(fill=NA,colour="grey50"),
panel.spacing=unit(0,"cm"))
Note that with this approach, if there are missing dates at the beginning or end of a year (by "missing", I mean rows for those dates are not even present in the data) then the x-axis will start/end at the first/last date in the data for that year, rather than go from Jan-1 to Dec-31. In that case, you'd need to add in rows for the missing dates and either NA for value or interpolate value. In addition, with this method there is no space or line between December 31 of one year and January 1 of the next year, so there's a discontinuity across each year.
Option 1b: Faceting + centered month labels
To address #AF7's comment. You can center the month labels by adding some spaces before each label. But you have to choose the number of spaces manually, depending on the physical size of the plot when you print it to a device. (There's probably a way to center the labels programmatically based on the internal grob measurements, but I'm not sure how to do it.) I've also removed the minor vertical gridlines and lightened the line between years.
ggplot(df, aes(Date, value)) +
geom_line() +
scale_x_date(date_labels=paste(c(rep(" ",11), "%b"), collapse=""),
date_breaks="month", expand=c(0,0)) +
facet_grid(~ year(Date), space="free_x", scales="free_x", switch="x") +
theme_bw() +
theme(strip.placement = "outside",
strip.background = element_blank(),
panel.grid.minor.x = element_blank(),
panel.border = element_rect(colour="grey70"),
panel.spacing=unit(0,"cm"))
Option 2a: Edit the x-axis label grob
Here's a more complex and finicky method (though it could likely be automated by someone who understands the structure and unit spacings of grid graphics better than I do) that avoids the pitfalls of the faceting method described above:
library(grid)
# Fake data with an extra year added for illustration
set.seed(2)
df = data.frame(Date=seq(as.Date("1718-03-01"),as.Date("1721-09-20"), by="1 day"))
df$value = cumsum(rnorm(nrow(df)))
# The plot we'll start with
p = ggplot(df, aes(Date, value)) +
geom_vline(xintercept=as.numeric(df$Date[yday(df$Date)==1]), colour="grey60") +
geom_line() +
scale_x_date(date_labels="%b", date_breaks="month", expand=c(0,0)) +
theme_bw() +
theme(panel.grid.minor.x = element_blank()) +
labs(x="")
Now we want to add the year values below and in between June and July of each year. The code below does that by modifying the x-axis label grob and is adapted from this SO answer by #SandyMuspratt.
# Get the grob
g <- ggplotGrob(p)
# Get the y axis
index <- which(g$layout$name == "axis-b") # Which grob
xaxis <- g$grobs[[index]]
# Get the ticks (labels and marks)
ticks <- xaxis$children[[2]]
# Get the labels
ticksB <- ticks$grobs[[2]]
# Edit x-axis label grob
# Find every index of Jun in the x-axis labels and add a newline and
# then a year label
junes = which(ticksB$children[[1]]$label == "Jun")
ticksB$children[[1]]$label[junes] = paste0(ticksB$children[[1]]$label[junes],
"\n ", unique(year(df$Date)))
# Put the edited labels back into the plot
ticks$grobs[[2]] <- ticksB
xaxis$children[[2]] <- ticks
g$grobs[[index]] <- xaxis
# Draw the plot
grid.newpage()
grid.draw(g)
Option 2b: Edit the x-axis label grob and center the month labels
Below is the only change that needs to be made to Option 2a to center the month labels, but, once again, the number of spaces needs to be tweaked manually.
# Make the edit
# Center the month labels between ticks
ticksB$children[[1]]$label = paste0(paste(rep(" ",7),collapse=""), ticksB$children[[1]]$label)
# Find every index of Jun in the x-axis labels and a year label
junes = grep("Jun", ticksB$children[[1]]$label)
ticksB$children[[1]]$label[junes] = paste0(ticksB$children[[1]]$label[junes], "\n ", unique(year(df$Date)))
I came upon this question and thought maybe I can add a solution. We can display both month and year in every year's first displayed month by using a simple condition. You can play with the date_breaks to remove January from the labels, and this will still work. I'm using month() and year() from lubridate.
library(tidyverse)
library(lubridate)
df %>%
ggplot(aes(Date, value)) +
geom_line() +
scale_x_date(date_breaks = "2 months",
labels = function(x) if_else(is.na(lag(x)) | !year(lag(x)) == year(x),
paste(month(x, label = TRUE), "\n", year(x)),
paste(month(x, label = TRUE))))
If you want to try to hack together a sub-label, you could convert it to a grob. I edited this from the original post to create a function that adds the sublabels and returns a gtable object. Note that the sublabs input must be the same length as your x-axis breaks:
library(grid)
library(gtable)
library(gridExtra)
add_sublabs <- function(plot, sublabs){
gg <- ggplotGrob(plot)
axis_num <- which(gg$layout[,"name"] == "axis-b")
xbreaks <- gg[["grobs"]][[axis_num]][["children"]][[2]][["grobs"]][[2]][["children"]][[1]]$x
if(length(xbreaks) != length(sublabs)) stop("Sub-labels must be the same length as the x-axis breaks")
to_breaks <- c(as.numeric(xbreaks),1)[which(!duplicated(sublabs, fromLast = TRUE))+1]
sublabs_x <- diff(c(0,to_breaks))
sublabs_labels <- sublabs[!duplicated(sublabs, fromLast = TRUE)]
tg <- tableGrob(matrix(sublabs_labels, nrow = 1))
tg$widths = unit(sublabs_x, attr(xbreaks,"unit"))
pos <- gg$layout[axis_num,c("t","l")]
gg2 <- gtable_add_rows(gg, heights = sum(tg$heights)+unit(4,"mm"), pos = pos$t)
gg3 <- gtable_add_grob(gg2, tg, t = pos$t+1, l = pos$l)
return(gg3)
}
#Plot and sublabels
p <- ggplot(data = df, aes(x = Date, y = value)) + geom_line() +
scale_x_date(date_breaks = "2 month", date_minor_breaks = "1 month", labels = date_format("%b")) +
xlab(NULL)
sublabs <- c(rep("1719",2),rep("1720",6))
#Draw
grid.draw(add_sublabs(p, sublabs))
One way to avoid the complexities would be to change the required output so that January is replaced by the year.
The lab function returns the labels given the breaks. Unexpectedly, ggplot will pass NAs to it so in the first line of the function body we replace those with some date -- it does not matter which date since such values are not subsequently used by ggplot. Finally we format the date as a year or abbreviated month depending on whether the month is January (which corresponds to the POSIXlt component mon equalling 0) or not.
library(ggplot2)
library(scales)
lab <- function(b) {
b[is.na(b)] <- Sys.Date()
format(b, ifelse(as.POSIXlt(b)$mon == 0, "%Y", "%b"))
}
ggplot(df, aes(Date, value)) +
geom_line() +
scale_x_date(date_breaks = "month", labels = lab)
Note: I have added Issue 2182 to the ggplot2 github issues list regarding the NAs that are passed to the label function. If subsequent versions of ggplot2 no longer pass the NAs then the first line of the body of lab could be omitted .
Update: fixed.

Axis labels on two lines with nested x variables (year below months)

I would like to display months (in abbreviated form) along the horizontal axis, with the corresponding year printed once. I know how to display month-year:
The un-needed repetition of the year clutters the labels. Instead I would like something like this:
except that the year would be printed below the months.
I printed the year above the axis labels, because that's the best I could do. This follows a limitation of the annotate() function, which gets clipped if it lies outside of the plot area. I am aware of possible workarounds based on annotate_custom(), but I couldn't make them to work with date objects (I did not try to convert dates to numbers and back to dates again, as it seemed more complicated than hopefully necessary)
I'm wondering if the new dup_axis() could be hijacked for this purpose. If instead of sending the duplicated axis to the opposite side of the panel, it could send it a few lines below the duplicated axis, then perhaps it would just be a matter of setting up one axis with panel.grid.major blanked out and the labels set to %b, while the other axis would have panel.grid.minor blanked out and the labels set to %Y. (an added challenge is that the year labels would be shifted to October instead of January)
These questions are related. However, the annotate_custom() function and textGrob() functions do not play well with dates, as far as I can tell.
how-can-i-add-annotations-below-the-x-axis-in-ggplot2
displaying-text-below-the-plot-generated-by-ggplot2
Data and basic code below:
library("ggplot2")
library("scales")
ggplot(data = df, aes(x = Date, y = value)) + geom_line() +
scale_x_date(date_breaks = "2 month", date_minor_breaks = "1 month", labels = date_format("%b %Y")) +
xlab(NULL)
ggplot(data = df, aes(x = Date, y = value)) + geom_line() +
scale_x_date(date_minor_breaks = "2 month", labels = date_format("%b")) +
annotate(geom = "text", x = as.Date("1719-10-01"), y = 0, label = "1719") +
annotate(geom = "text", x = as.Date("1720-10-01"), y = 0, label = "1720") +
xlab(NULL)
# data
df <- structure(list(Date = structure(c(-91455, -91454, -91453, -91452,
-91451, -91450, -91448, -91447, -91446, -91445, -91444, -91443,
-91441, -91440, -91439, -91438, -91437, -91436, -91434, -91433,
-91431, -91430, -91429, -91427, -91426, -91425, -91424, -91423,
-91422, -91420, -91419, -91418, -91417, -91416, -91415, -91413,
-91412, -91411, -91410, -91409, -91408, -91406, -91405, -91404,
-91403, -91402, -91401, -91399, -91398, -91397, -91396, -91395,
-91394, -91392, -91391, -91390, -91389, -91388, -91387, -91385,
-91384, -91382, -91381, -91380, -91379, -91377, -91376, -91375,
-91374, -91373, -91372, -91371, -91370, -91369, -91368, -91367,
-91366, -91364, -91363, -91362, -91361, -91360, -91359, -91357,
-91356, -91355, -91354, -91353, -91352, -91350, -91349, -91348,
-91347, -91346, -91345, -91343, -91342, -91341, -91340, -91339,
-91338, -91336, -91335, -91334, -91333, -91332, -91331, -91329,
-91328, -91327, -91326, -91325, -91324, -91322, -91321, -91320,
-91319, -91315, -91314, -91313, -91312, -91311, -91310, -91308,
-91307, -91306, -91305, -91304, -91303, -91301, -91300, -91299,
-91298, -91297, -91296, -91294, -91293, -91292, -91291, -91290,
-91289, -91287, -91286, -91285, -91284, -91283, -91282, -91280,
-91279, -91278, -91277, -91276, -91275, -91273, -91272, -91271,
-91270, -91269, -91268, -91266, -91265, -91264, -91263, -91262,
-91261, -91259, -91258, -91257, -91256, -91255, -91254, -91252,
-91251, -91250, -91249, -91248, -91247, -91245, -91244, -91243,
-91242, -91241, -91240, -91238, -91237, -91236, -91235, -91234,
-91233, -91231, -91230, -91229, -91228, -91227, -91226, -91224,
-91223, -91222, -91221, -91220, -91219, -91217, -91216, -91215,
-91214, -91213, -91212, -91210, -91209, -91208, -91207, -91205,
-91201, -91200, -91199, -91198, -91196, -91195, -91194, -91193,
-91192, -91191, -91189, -91188, -91187, -91186, -91185, -91184,
-91182, -91181, -91180, -91179, -91178, -91177, -91175, -91174,
-91173, -91172, -91171, -91170, -91168, -91167, -91166, -91165,
-91164, -91163, -91161, -91160, -91159, -91158, -91157, -91156,
-91154, -91153, -91152, -91151, -91150, -91149, -91147, -91146,
-91145, -91144, -91143, -91142, -91140, -91139, -91138, -91131,
-91130, -91129, -91128, -91126, -91125, -91124, -91123, -91122,
-91121, -91119, -91118, -91117, -91116, -91115, -91114, -91112,
-91111, -91110, -91109, -91108, -91107, -91104, -91103, -91102,
-91101, -91100, -91099, -91097, -91096, -91095, -91094, -91093,
-91091, -91090, -91089, -91088, -91087, -91086, -91084, -91083,
-91082, -91081, -91080, -91079, -91077, -91076, -91075, -91074,
-91073, -91072, -91070, -91069, -91068, -91065, -91063, -91062,
-91061, -91060, -91059, -91058, -91056, -91055, -91054, -91053,
-91052, -91051, -91049, -91048, -91047, -91046, -91045, -91044,
-91042, -91041, -91040, -91039, -91038, -91037, -91035, -91034,
-91033, -91032, -91031, -91030, -91028, -91027, -91026, -91025,
-91024, -91023, -91021, -91020, -91019, -91018, -91017, -91016,
-91014, -91013, -91012, -91011, -91010, -91009, -91007, -91006,
-91005, -91004, -91003, -91002, -91000, -90999, -90998, -90997,
-90996, -90995, -90993, -90992, -90991, -90990, -90989, -90988,
-90986, -90985, -90984, -90983, -90982), class = "Date"), value = c(113,
113, 113, 113, 114, 114, 114, 115, 115, 115, 116, 116, 116, 116,
117, 117, 117, 117, 116, 117, 116, 116, 116, 117, 117, 117, 117,
117, 117, 117, 116, 117, 116, 116, 116, 117, 117, 117, 117, 117,
117, 117, 116, 116, 117, 117, 117, 117, 117, 117, 117, 117, 117,
117, 117, 118, 118, 118, 118, 117, 118, 117, 117, 117, 117, 117,
117, 118, 116, 116, 116, 116, 116, 116, 116, 117, 117, 118, 118,
118, 118, 118, 119, 120, 120, 119, 119, 120, 120, 121, 121, 122,
124, 124, 122, 123, 124, 123, 123, 123, 123, 123, 124, 124, 126,
126, 126, 126, 126, 125, 125, 126, 127, 126, 126, 125, 126, 126,
126, 128, 128, 128, 130, 133, 131, 133, 134, 134, 134, 136, 136,
136, 135, 135, 135, 136, 136, 136, 136, 135, 135, 135, 135, 130,
129, 129, 130, 131, 136, 138, 155, 157, 161, 170, 174, 168, 165,
169, 171, 181, 184, 182, 179, 181, 179, 175, 177, 177, 174, 170,
174, 173, 178, 173, 178, 179, 182, 184, 184, 180, 181, 182, 182,
184, 184, 188, 195, 198, 220, 255, 275, 350, 310, 315, 320, 320,
316, 300, 310, 310, 320, 317, 313, 312, 310, 297, 285, 285, 286,
288, 315, 328, 338, 344, 345, 352, 352, 342, 335, 343, 340, 342,
339, 337, 336, 336, 342, 347, 352, 352, 351, 352, 352, 351, 352,
352, 355, 375, 400, 452, 487, 476, 475, 473, 485, 500, 530, 595,
720, 720, 770, 750, 770, 750, 735, 740, 745, 735, 700, 700, 750,
760, 755, 755, 760, 760, 765, 950, 950, 950, 875, 875, 875, 880,
880, 880, 900, 900, 900, 880, 880, 890, 895, 890, 880, 870, 870,
870, 870, 870, 860, 860, 860, 860, 850, 840, 810, 820, 810, 810,
805, 810, 805, 820, 815, 820, 805, 790, 800, 780, 760, 765, 750,
740, 820, 810, 800, 800, 775, 750, 810, 750, 740, 700, 705, 660,
630, 640, 595, 590, 570, 565, 535, 440, 400, 410, 400, 405, 390,
370, 300, 300, 180, 200, 310, 290, 260, 260, 275, 260, 270, 265,
255, 250, 210, 210, 200, 195, 210, 215, 240, 240, 220, 220, 220,
220, 210, 212, 208, 220, 210, 212, 208, 220, 215, 220, 214, 214,
213, 212, 210, 210, 195, 195, 160, 160, 175, 205, 210, 208, 197,
181, 185)), .Names = c("Date", "value"), row.names = c(NA, 393L
), class = "data.frame")
The code below provides two potential options for adding year labels.
Option 1a: Faceting
You could use faceting to mark the years. For example:
library(ggplot2)
library(lubridate)
ggplot(df, aes(Date, value)) +
geom_line() +
scale_x_date(date_labels="%b", date_breaks="month", expand=c(0,0)) +
facet_grid(~ year(Date), space="free_x", scales="free_x", switch="x") +
theme_bw() +
theme(strip.placement = "outside",
strip.background = element_rect(fill=NA,colour="grey50"),
panel.spacing=unit(0,"cm"))
Note that with this approach, if there are missing dates at the beginning or end of a year (by "missing", I mean rows for those dates are not even present in the data) then the x-axis will start/end at the first/last date in the data for that year, rather than go from Jan-1 to Dec-31. In that case, you'd need to add in rows for the missing dates and either NA for value or interpolate value. In addition, with this method there is no space or line between December 31 of one year and January 1 of the next year, so there's a discontinuity across each year.
Option 1b: Faceting + centered month labels
To address #AF7's comment. You can center the month labels by adding some spaces before each label. But you have to choose the number of spaces manually, depending on the physical size of the plot when you print it to a device. (There's probably a way to center the labels programmatically based on the internal grob measurements, but I'm not sure how to do it.) I've also removed the minor vertical gridlines and lightened the line between years.
ggplot(df, aes(Date, value)) +
geom_line() +
scale_x_date(date_labels=paste(c(rep(" ",11), "%b"), collapse=""),
date_breaks="month", expand=c(0,0)) +
facet_grid(~ year(Date), space="free_x", scales="free_x", switch="x") +
theme_bw() +
theme(strip.placement = "outside",
strip.background = element_blank(),
panel.grid.minor.x = element_blank(),
panel.border = element_rect(colour="grey70"),
panel.spacing=unit(0,"cm"))
Option 2a: Edit the x-axis label grob
Here's a more complex and finicky method (though it could likely be automated by someone who understands the structure and unit spacings of grid graphics better than I do) that avoids the pitfalls of the faceting method described above:
library(grid)
# Fake data with an extra year added for illustration
set.seed(2)
df = data.frame(Date=seq(as.Date("1718-03-01"),as.Date("1721-09-20"), by="1 day"))
df$value = cumsum(rnorm(nrow(df)))
# The plot we'll start with
p = ggplot(df, aes(Date, value)) +
geom_vline(xintercept=as.numeric(df$Date[yday(df$Date)==1]), colour="grey60") +
geom_line() +
scale_x_date(date_labels="%b", date_breaks="month", expand=c(0,0)) +
theme_bw() +
theme(panel.grid.minor.x = element_blank()) +
labs(x="")
Now we want to add the year values below and in between June and July of each year. The code below does that by modifying the x-axis label grob and is adapted from this SO answer by #SandyMuspratt.
# Get the grob
g <- ggplotGrob(p)
# Get the y axis
index <- which(g$layout$name == "axis-b") # Which grob
xaxis <- g$grobs[[index]]
# Get the ticks (labels and marks)
ticks <- xaxis$children[[2]]
# Get the labels
ticksB <- ticks$grobs[[2]]
# Edit x-axis label grob
# Find every index of Jun in the x-axis labels and add a newline and
# then a year label
junes = which(ticksB$children[[1]]$label == "Jun")
ticksB$children[[1]]$label[junes] = paste0(ticksB$children[[1]]$label[junes],
"\n ", unique(year(df$Date)))
# Put the edited labels back into the plot
ticks$grobs[[2]] <- ticksB
xaxis$children[[2]] <- ticks
g$grobs[[index]] <- xaxis
# Draw the plot
grid.newpage()
grid.draw(g)
Option 2b: Edit the x-axis label grob and center the month labels
Below is the only change that needs to be made to Option 2a to center the month labels, but, once again, the number of spaces needs to be tweaked manually.
# Make the edit
# Center the month labels between ticks
ticksB$children[[1]]$label = paste0(paste(rep(" ",7),collapse=""), ticksB$children[[1]]$label)
# Find every index of Jun in the x-axis labels and a year label
junes = grep("Jun", ticksB$children[[1]]$label)
ticksB$children[[1]]$label[junes] = paste0(ticksB$children[[1]]$label[junes], "\n ", unique(year(df$Date)))
I came upon this question and thought maybe I can add a solution. We can display both month and year in every year's first displayed month by using a simple condition. You can play with the date_breaks to remove January from the labels, and this will still work. I'm using month() and year() from lubridate.
library(tidyverse)
library(lubridate)
df %>%
ggplot(aes(Date, value)) +
geom_line() +
scale_x_date(date_breaks = "2 months",
labels = function(x) if_else(is.na(lag(x)) | !year(lag(x)) == year(x),
paste(month(x, label = TRUE), "\n", year(x)),
paste(month(x, label = TRUE))))
If you want to try to hack together a sub-label, you could convert it to a grob. I edited this from the original post to create a function that adds the sublabels and returns a gtable object. Note that the sublabs input must be the same length as your x-axis breaks:
library(grid)
library(gtable)
library(gridExtra)
add_sublabs <- function(plot, sublabs){
gg <- ggplotGrob(plot)
axis_num <- which(gg$layout[,"name"] == "axis-b")
xbreaks <- gg[["grobs"]][[axis_num]][["children"]][[2]][["grobs"]][[2]][["children"]][[1]]$x
if(length(xbreaks) != length(sublabs)) stop("Sub-labels must be the same length as the x-axis breaks")
to_breaks <- c(as.numeric(xbreaks),1)[which(!duplicated(sublabs, fromLast = TRUE))+1]
sublabs_x <- diff(c(0,to_breaks))
sublabs_labels <- sublabs[!duplicated(sublabs, fromLast = TRUE)]
tg <- tableGrob(matrix(sublabs_labels, nrow = 1))
tg$widths = unit(sublabs_x, attr(xbreaks,"unit"))
pos <- gg$layout[axis_num,c("t","l")]
gg2 <- gtable_add_rows(gg, heights = sum(tg$heights)+unit(4,"mm"), pos = pos$t)
gg3 <- gtable_add_grob(gg2, tg, t = pos$t+1, l = pos$l)
return(gg3)
}
#Plot and sublabels
p <- ggplot(data = df, aes(x = Date, y = value)) + geom_line() +
scale_x_date(date_breaks = "2 month", date_minor_breaks = "1 month", labels = date_format("%b")) +
xlab(NULL)
sublabs <- c(rep("1719",2),rep("1720",6))
#Draw
grid.draw(add_sublabs(p, sublabs))
One way to avoid the complexities would be to change the required output so that January is replaced by the year.
The lab function returns the labels given the breaks. Unexpectedly, ggplot will pass NAs to it so in the first line of the function body we replace those with some date -- it does not matter which date since such values are not subsequently used by ggplot. Finally we format the date as a year or abbreviated month depending on whether the month is January (which corresponds to the POSIXlt component mon equalling 0) or not.
library(ggplot2)
library(scales)
lab <- function(b) {
b[is.na(b)] <- Sys.Date()
format(b, ifelse(as.POSIXlt(b)$mon == 0, "%Y", "%b"))
}
ggplot(df, aes(Date, value)) +
geom_line() +
scale_x_date(date_breaks = "month", labels = lab)
Note: I have added Issue 2182 to the ggplot2 github issues list regarding the NAs that are passed to the label function. If subsequent versions of ggplot2 no longer pass the NAs then the first line of the body of lab could be omitted .
Update: fixed.

ggplot visualization questions

for visualized my data, I used gplot.
Question: Why "colour" doesn't change, and is it possible to do type = "h" like in basic plot?
print(qplot(roundpop, Observation, data=roundpopus), shape = 5, colour = "blue") # i tryed with "" and without.
And if it's possible to change type to histogram, like on second picture, can I draw a line by the top of lines?
Like that:
and maybe to write labels (states) on the top of the lines. Because I know how to give a name only for dots on basic plot.
Thank you!
Here are some options, which you may want to tweak according to your needs:
library(ggplot2)
df <- structure(list(x = c(1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 5, 6,
6, 6, 7, 7, 7, 7, 8, 9, 10, 10, 10, 12, 13, 13, 20, 20, 27, 39
), y = c(33, 124, 45, 294, 160, 105, 276, 178, 377, 506, 176,
393, 247, 378, 221, 796, 503, 162, 801, 486, 268, 575, 828, 493,
252, 495, 836, 551, 413, 832, 1841, 1927), lab = c("i8g8Q", "oXlWk",
"NC2WO", "pYxBL", "Xfsy6", "FJcOl", "Ke98f", "K2mCW", "g4XYi",
"ICzWp", "7nqrK", "dzhlC", "JagAW", "0bObp", "8ljIW", "E8OZR",
"6Tuxz", "3Grbq", "xqsld", "BvuJT", "JXi2N", "eSDYS", "OYVWN",
"vyWzK", "6AKxk", "nCgPx", "8lHrq", "kWAGm", "E08Rd", "cmIYY",
"btoUm", "k6Iek")), .Names = c("x", "y", "lab"), row.names = c(NA,
-32L), class = "data.frame")
p <- ggplot(df, aes(x, y))
gridExtra::grid.arrange(
p + geom_point(),
p + geom_point() + geom_text(aes(label = lab), angle = 60, hjust = 0, size = 2),
p + geom_segment(aes(xend=x, yend=0)),
p + geom_segment(aes(xend=x, yend=0)) + geom_line(color = "red", size = 2) ,
p + geom_segment(aes(xend=x, yend=0)) + geom_smooth(span = .4, se = FALSE, color = "red", size = 2)
)

Resources