ggplot and two different geom_line(): the legend does not appear - r

I have the following code (dput data for the data sets is here):
ruz <- structure(list(date = structure(c(16617, 16618, 16619, 16622,
16623, 16624, 16625, 16626, 16629, 16630, 16631, 16632, 16633,
16636, 16637, 16638, 16639, 16640, 16643, 16644, 16645, 16646,
16647, 16650, 16651, 16652, 16653, 16654, 16657, 16658, 16659,
16660, 16661, 16664, 16665, 16666, 16667, 16668, 16671, 16672,
16673, 16674, 16675, 16678, 16679, 16680, 16681, 16682, 16685,
16686, 16687, 16688, 16689, 16692, 16693, 16694, 16695, 16696,
16699, 16700, 16701, 16702, 16703, 16706, 16707, 16708, 16709,
16710, 16713, 16714, 16715, 16716, 16717, 16720, 16721, 16722,
16723, 16724, 16727, 16728, 16729, 16730, 16731, 16734, 16735,
16736, 16737, 16738, 16741, 16742, 16743, 16744, 16745, 16748,
16749, 16750, 16751, 16752, 16755, 16756, 16757, 16758, 16759,
16762, 16763, 16764, 16765, 16766, 16769), class = "Date"), val1 = c(61.8954,
61.6297, 61.7859, 62.2135, 62.692, 63.026, 63.1511, 63.008, 62.7991,
62.5304, 62.3971, 62.1703, 61.9535, 61.7927, 61.8367, 62.1856,
62.7663, 63.5846, 64.859, 66.0745, 65.9327, 65.1387, 65.8362,
67.9171, 68.8917, 68.7714, 69.295, 69.9932, 70.0878, 70.0563,
71.0985, 71.7451, 71.9923, 72.3836, 72.6186, 72.7895, 74.1316,
76.3577, 79.6818, 80.4601, 79.637, 77.1905, 74.7982, 74.0868,
73.6844, 74.7815, 75.1829, 75.0874, 76.0362, 76.5334, 76.1729,
76.2661, 76.521, 76.5815, 76.1411, 74.7473, 74.229, 74.8073,
74.8083, 74.2189, 73.7976, 74.0765, 73.7323, 73.5319, 73.8853,
73.7351, 73.2462, 73.7254, 73.4657, 72.5227, 70.9683, 70.1357,
69.7459, 69.7823, 70.714, 71.5863, 71.3391, 70.2717, 70.1001,
70.3965, 70.964, 70.901, 69.6083, 69.0542, 70.325, 71.2619, 70.6912,
70.5258, 70.6195, 69.9786, 68.9845, 68.7403, 69.5909, 69.6324,
69.2801, 69.3884, 70.4129, 71.6024, 70.7705, 69.6673, 69.2706,
69.2517, 69.2788, 69.3983, 69.7819, 69.8404, 69.8002, 69.9816,
70.1287)), .Names = c("date", "val1"), row.names = c("2015-07-01",
"2015-07-02", "2015-07-03", "2015-07-06", "2015-07-07", "2015-07-08",
"2015-07-09", "2015-07-10", "2015-07-13", "2015-07-14", "2015-07-15",
"2015-07-16", "2015-07-17", "2015-07-20", "2015-07-21", "2015-07-22",
"2015-07-23", "2015-07-24", "2015-07-27", "2015-07-28", "2015-07-29",
"2015-07-30", "2015-07-31", "2015-08-03", "2015-08-04", "2015-08-05",
"2015-08-06", "2015-08-07", "2015-08-10", "2015-08-11", "2015-08-12",
"2015-08-13", "2015-08-14", "2015-08-17", "2015-08-18", "2015-08-19",
"2015-08-20", "2015-08-21", "2015-08-24", "2015-08-25", "2015-08-26",
"2015-08-27", "2015-08-28", "2015-08-31", "2015-09-01", "2015-09-02",
"2015-09-03", "2015-09-04", "2015-09-07", "2015-09-08", "2015-09-09",
"2015-09-10", "2015-09-11", "2015-09-14", "2015-09-15", "2015-09-16",
"2015-09-17", "2015-09-18", "2015-09-21", "2015-09-22", "2015-09-23",
"2015-09-24", "2015-09-25", "2015-09-28", "2015-09-29", "2015-09-30",
"2015-10-01", "2015-10-02", "2015-10-05", "2015-10-06", "2015-10-07",
"2015-10-08", "2015-10-09", "2015-10-12", "2015-10-13", "2015-10-14",
"2015-10-15", "2015-10-16", "2015-10-19", "2015-10-20", "2015-10-21",
"2015-10-22", "2015-10-23", "2015-10-26", "2015-10-27", "2015-10-28",
"2015-10-29", "2015-10-30", "2015-11-02", "2015-11-03", "2015-11-04",
"2015-11-05", "2015-11-06", "2015-11-09", "2015-11-10", "2015-11-11",
"2015-11-12", "2015-11-13", "2015-11-16", "2015-11-17", "2015-11-18",
"2015-11-19", "2015-11-20", "2015-11-23", "2015-11-24", "2015-11-25",
"2015-11-26", "2015-11-27", "2015-11-30"), class = "data.frame")
dfr <- structure(list(date = structure(c(16616, 16646, 16677, 16708,
16738, 16769), class = "Date"), val2 = c(0, 0.0920000000000001,
0.120392, 0.136077488, 0.0917704659680001, 0.0874033841041282
)), .Names = c("date", "val2"), row.names = c("2015-06", "2015-07",
"2015-08", "2015-09", "2015-10", "2015-11"), class = "data.frame")
ggplot:
ggplot() +
geom_line(data = ruz, aes(date, val1), size = 1.5, color = "blue") +
geom_line(data = dfr, aes(date, val2 * 100), size = 1.5, color = "red") +
scale_fill_manual(values = c("blue", "red"))
which produces the following graph:
My question is, how to modify it to show a legend after all?

If you want to avoid combining the data.frames, you can do this:
ggplot() +
geom_line(data = ruz, aes(date, val1, color = "a"), size = 1.5) +
geom_line(data = dfr, aes(date, val2 * 100, color = "b"), size = 1.5) +
scale_color_manual(name = "Colors",
values = c("a" = "blue", "b" = "red"))
In order to get a legend, you have to map something to color within aes. You can then use scale_color_manual to define the colors for the mapped character values. There are situations where this trick is easier and results in more readable code then reshaping/combining data.

You could rbind them and use color
ruz$type <- "ruz"
dfr$val2 <- dfr$val2 * 100
dfr$type <- "dfr"
names(ruz) <- names(dfr)
df <- rbind(ruz, dfr)
ggplot(df, aes(date, val2, color = type), size = 1.5) + geom_line()

Related

format dates as quarters in ggplot2 x axis

I have the following chart generated using ggplot2. Is there a way in which I can format my dates on the x axis as quarters (for example 2022-Q1) instead of the current date format which I have?
data <- structure(list(Date = structure(c(19083, 19086, 19087, 19088,
19089, 19090, 19093, 19094, 19095, 19096, 19101, 19102, 19103,
19104, 19107, 19108, 19109, 19110, 19111, 19114, 19115, 19116,
19117, 19118, 19121, 19122, 19123, 19124, 19125, 19128, 19129,
19130, 19131, 19132, 19135, 19136, 19137, 19138, 19139, 19143,
19144, 19145, 19146, 19149, 19150, 19151, 19152, 19153, 19156,
19157, 19158, 19159, 19160, 19164, 19165, 19166, 19167, 19170,
19171, 19172, 19173, 19174, 19178, 19179, 19180, 19181, 19184,
19185, 19186, 19187, 19188, 19191, 19192, 19193, 19194, 19195,
19198, 19199, 19200, 19201, 19202, 19205, 19206, 19207, 19208,
19209, 19212, 19213, 19214, 19215, 19216, 19219, 19220, 19221,
19222, 19223, 19226, 19227, 19228, 19229, 19230, 19233, 19234,
19235, 19236, 19237, 19241, 19242, 19243, 19244, 19247), class = "Date"),
US = c(-0.099, -0.082, -0.102, -0.276, -0.265, -0.214, -0.254,
-0.321, -0.263, -0.195, -0.189, -0.077, -0.025, -0.278, -0.543,
-0.595, -0.638, -0.587, -0.571, -0.754, -0.681, -0.597, -0.68,
-0.738, -0.942, -0.882, -0.657, -0.673, -0.468, -0.394, -0.323,
-0.357, -0.604, -0.533, -0.409, -0.529, -0.577, -0.496, -0.344,
-0.362, -0.326, -0.367, -0.365, -0.318, -0.43, -0.405, -0.515,
-0.658, -0.77, -1.033, -1.058, -1.217, -1.112, -1.096, -1.114,
-1.158, -1.013, -0.915, -0.863, -1.099, -1.192, -1.236, -1.395,
-1.144, -1.09, -1.167, -1.325, -1.378, -1.343, -1.492, -1.591,
-1.56, -1.254, -1.505, -1.327, -1.505, -1.542, -1.528, -1.526,
-1.591, -1.416, -1.513, -1.577, -1.389, -1.429, -1.486, -1.547,
-1.649, -1.61, -1.585, -1.475, -1.393, -1.307, -1.376, -1.571,
-1.744, -2.067, -2.294, -2.401, -2.423, -2.449, -2.69, -2.633,
-2.654, -2.812, -2.909, -3.037, -2.912, -2.456, -2.523, -2.51
)), row.names = c(NA, -111L), class = c("tbl_df", "tbl",
"data.frame"))
library(tidyverse)
ggplot(data, aes(Date, US)) +
geom_line(size=2) +
scale_x_date(breaks = '3 months')
You could try making use of the zoo package:
ggplot(data, aes(Date, US)) +
geom_line(size=2) +
scale_x_date(breaks = "3 months",
labels = function(x) zoo::format.yearqtr(x, "%Y-Q%q"))

Different errors in using left_join in r

I am trying to interpolate two series but I get different errors when it comes to use left_join. These are the two series:
df1 = structure(list(Date = structure(c(11690, 11725, 11753, 11781,
11809, 11844, 11872, 11900, 11942, 11970, 11998, 12026, 12061,
12089, 12117, 12145, 12180, 12208, 12243, 12265, 12299, 12327,
12362, 12390, 12425, 12453, 12481, 12509, 12544, 12572, 12600,
12631, 12663, 12698, 12726, 12754, 12796, 12817, 12845, 12880,
12907, 12936, 12971, 12996, 13027, 13062, 13090, 13118, 13160,
13181, 13209, 13244, 13272, 13307, 13335, 13363, 13392, 13426,
13454, 13489, 13524, 13552, 13580, 13615, 13643, 13670, 13699,
13726, 13762, 13790, 13825, 13853, 13888, 13916, 13944, 13979,
14007, 14035, 14063, 14098, 14126, 14154, 14189, 14217, 14259,
14280, 14308, 14336, 14371, 14399, 14427, 14462, 14490, 14525,
14553, 14581, 14623, 14644, 14672, 14707, 14735, 14770, 14798,
14826, 14854, 14889, 14917, 14945, 14987, 15008, 15036, 15071,
15099, 15134, 15162, 15190, 15225, 15253, 15281, 15316, 15351,
15379, 15407, 15434, 15463, 15497, 15526, 15554, 15589, 15617,
15652, 15680, 15715, 15743, 15771, 15799, 15827, 15862, 15890,
15918, 15953, 15980, 16016, 16044, 16079, 16107, 16135, 16163,
16198, 16226, 16254, 16289, 16317, 16345, 16380, 16408, 16457,
16467, 16499, 16540, 16556, 16589, 16632, 16648, 16681, 16730,
16740, 16772, 16821, 16832, 16870, 16912, 16922, 16954, 17003,
17014, 17052, 17094, 17106, 17143, 17185, 17198, 17234, 17283,
17287, 17325, 17367, 17379, 17416, 17465, 17471, 17514, 17556,
17563, 17598, 17647, 17652, 17696, 17738, 17744, 17787, 17829,
17836, 17878, 17920, 17928, 17962, 17996, 18017, 18053, 18102,
18109, 18151, 18193, 18201, 18242), class = "Date"), Fit = c(-1.68038051095608,
-2.12317945962401, -2.71086209338424, -3.22489682411764, -2.51769032592554,
-1.33242532610804, -2.13564807610995, -2.13564807610995, -2.03415137348661,
-1.58909921518124, -0.68844714029518, -1.94691881575563, -1.16714425518695,
-2.15153420569546, -2.04779261960842, -0.867515299774483, -1.47986823637587,
-0.650513604798111, -1.61361732632524, -1.61361732632524, -1.48596960028163,
-2.20004804407501, -2.64689217553021, -2.67436545120372, -3.48049123019991,
-2.28510809912552, -2.32172665536549, -1.76823348887895, -2.763353378483,
-2.09381469041352, -2.08400217235893, -2.08400217235893, -1.59720187270177,
-2.06034560841579, -2.56317571167687, -1.32635640217861, -1.48729782102413,
-2.00732693090646, -3.40272319833461, -2.49810610074565, -3.32810591309226,
-1.92402348117091, -1.35397391665409, -1.35397391665409, -1.76640461987233,
-2.51735528772741, -2.35332514240503, -2.35272634525907, -2.68468172493552,
-2.92171051216825, -3.16925509035157, -2.23093489115309, -3.06530983495044,
-2.30445613039677, -3.30583207178147, -2.82188405397887, -2.82188405397887,
-2.51962207875813, -2.66982767393931, -2.98041595989062, -2.55306093500464,
-3.36871970472208, -2.6201699311654, -2.49494986723432, -2.78910296607766,
-2.57178346785915, -3.28445145020619, -3.28445145020619, -2.42213276484722,
-2.38709926337358, -2.99568739417641, -2.65138790450656, -1.58074660422518,
-1.91906056315361, -2.90337379582732, -2.20117735700684, -1.29303194740331,
-2.2494711668963, -1.53224235243069, -2.04481468859739, -0.929377541149286,
-0.0656900034556943, -2.20508088335906, -0.578502434372448, -0.858925093713048,
-2.96970181427872, -2.97862851965814, -3.31779605911778, -2.08207766441392,
-2.4848907725881, -1.99650762208841, -2.17076028347941, -1.27061305909,
-1.92537835567221, -1.49409886851971, -2.32667673830125, -1.90852299188928,
-2.88390697795999, -2.9172411130509, -3.5788915130035, -2.92151370364156,
-2.71478221054495, -3.40505963822921, -3.21129210245385, -2.66481268506047,
-3.27494238949828, -3.37230078139583, -3.53346393522174, -3.99626066800013,
-3.41713548837371, -3.67409843863548, -3.50182175058264, -3.37467436298585,
-3.40580625489191, -2.99003362011982, -3.66653724632765, -3.61588309823573,
-3.18167532125541, -3.35619362672467, -2.8806023910338, -2.23185392156432,
-0.626338062660262, -2.00482507082523, -1.7359344838577, -0.800609837957656,
-1.93997314433068, -0.93482911672218, -2.63652739226028, -1.93546315176661,
-1.17524044037369, -2.274143231604, -2.69679235052359, -2.23731851002543,
-1.86716852238077, -0.716926538642468, -1.30258784685856, -1.08194224250233,
-0.930472602419788, 1.13081617308243, 2.57874064965174, -0.388461792958877,
-0.340421132850094, 0.638071432169484, -1.57318833539501, 4.40961161388978,
1.62060735764472, -0.674369921428344, 1.76679629890753, -0.748857461264583,
0.520407646064921, -0.575555044309392, 4.34511452569889, 3.34194433067617,
3.21304624521961, 1.97147139745885, 3.45052500620869, -0.399783726365629,
-0.399783726365629, -0.519126753266423, -0.924795366280377, -0.924795366280377,
4.49023560078066, 2.40210157814194, 2.40210157814194, 3.48634734098189,
2.72330542145941, 2.72330542145941, 2.44175433903678, 1.83754103018167,
1.83754103018167, 2.00744190890702, 2.84495436947268, 2.84495436947268,
3.68678762972081, 3.16657087164265, 3.16657087164265, 2.54382298188246,
3.23525009614645, 3.23525009614645, 2.25680930181096, 1.44116917115332,
1.44116917115332, -0.510916286085479, -0.212859586548733, -0.212859586548733,
-0.740774827758169, -0.740774827758169, -0.740774827758169, -0.740774827758169,
-0.41701391913469, -0.41701391913469, -1.22601225946438, -0.632288989161026,
-0.632288989161026, -1.04309509051099, -1.17291896442253, -1.17291896442253,
-1.17291896442253, -1.17291896442253, -1.17291896442253, -1.11302828507398,
-2.78220449305609, -2.78220449305609, -2.86224137302201, -2.86224137302201,
-2.86224137302201, 1.34122649704384, 1.33515381941954, 1.33515381941954,
2.56866171677324, 0.963485086246604, 0.963485086246604, 4.22653525972732,
5.35095831479335, 5.35095831479335, 3.93448842430935)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -216L))
df2 = structure(list(Fit = c(-3.36981025066699, -2.83074686834444,
-3.4112766646918, -2.45754914212114, -2.16728424394746, -3.61860873481586,
-3.61860873481586, -2.2916834860219, -1.37942237747605, -3.95034004701435,
-1.46235520552567, -2.91367969639407, -3.95034004701435, -3.57714232079105,
-2.16728424394746, -2.20875065797227, -2.6648812122452, -2.74781404029482,
-2.00141858784822, -3.07954535249331, -3.07954535249331, -3.24541100859256,
-3.36981025066699, -2.49901555614595, -2.6648812122452, -3.95034004701435,
-3.0380789384685, -3.24541100859256, -3.12101176651812, -2.58194838419558,
-2.83074686834444, -3.4112766646918, -2.6648812122452, -2.99661252444369,
-3.61860873481586, -2.99661252444369, -3.57714232079105, -2.74781404029482,
-3.20394459456775, -2.25021707199708, -2.37461631407152, -3.61860873481586,
-3.36981025066699, -2.95514611041888, -3.61860873481586, -3.32834383664218,
-2.74781404029482, -3.20394459456775, -2.00141858784822, -3.12101176651812,
-3.66007514884067, -3.47347628572902, -3.95034004701435, -3.45274307871661,
-3.45274307871661, -2.83074686834444, -3.95034004701435, -2.99661252444369,
-3.61860873481586, -2.16728424394746, -3.70154156286548, -2.37461631407152,
-2.83074686834444, -2.12581782992265, -3.32834383664218, -2.6648812122452,
-3.0380789384685, -3.20394459456775, -3.4112766646918, -2.83074686834444,
-2.91367969639407, -3.95034004701435, -2.6648812122452, -3.36981025066699,
-2.2916834860219, -3.45274307871661, -2.37461631407152, -1.54528803357529,
-2.91367969639407, -3.49420949274142, -2.95514611041888, -3.32834383664218,
-2.45754914212114, -3.57714232079105, -3.36981025066699, -3.45274307871661,
-3.95034004701435, -3.16247818054293, -2.83074686834444, -3.36981025066699,
-2.6648812122452, -2.6648812122452, -2.45754914212114, -3.32834383664218,
-3.66007514884067, -3.36981025066699, -3.95034004701435, -2.83074686834444,
-2.91367969639407, -3.45274307871661, -2.20875065797227, -2.6648812122452,
-3.95034004701435, -3.95034004701435, -2.74781404029482, -2.83074686834444,
-2.99661252444369, -3.57714232079105, -3.95034004701435, -1.54528803357529,
-2.45754914212114, -2.91367969639407, -3.20394459456775, -2.83074686834444,
-3.70154156286548, -1.29648954942642, -3.36981025066699, -2.20875065797227,
-2.45754914212114, -3.24541100859256, -2.6648812122452, -1.87701934577378,
-2.83074686834444, -2.37461631407152, -3.95034004701435, -3.07954535249331,
-2.6648812122452, -2.99661252444369, -2.91367969639407, -3.49420949274142,
-3.61860873481586, -3.20394459456775, 0.44509983961565, 0.776831151814141,
-2.37461631407152, -3.61860873481586, -0.259829198806142, -2.37461631407152,
3.72094654757574, -1.37942237747605, -3.12101176651812, 0.196301355466782,
-2.20875065797227, -0.923291823203123, -2.58194838419558, 0.569499081690084,
0.486566253640461, 1.56469301828556, -1.62822086162492, 3.59654730550131,
-0.923291823203123, -2.2916834860219, -0.633026925029444, 0.486566253640461,
2.14522281463291, 1.15002887803744, 2.60135336890584, 2.97455109512914,
1.02562963596301, 1.89642433048405, 2.97455109512914, 3.38921523537725,
3.22334957927801, 3.47214806342688, 1.77202508840961, 4.05267785977424,
3.30628240732763, -0.0939635427068968, -0.342762026855765, -0.0939635427068968,
1.23296170608707, -0.342762026855765, -0.425694854905387, -3.36981025066699,
-0.674493339054255, -1.08915747930237, -1.37942237747605, -2.08435141589784,
-1.46235520552567, -1.46235520552567, -2.83074686834444, -2.25021707199708,
-3.0380789384685, 3.05748392317876, 1.15002887803744, 2.68428619695546,
0.196301355466782, 2.26962205670735, 2.85015185305471, 2.85015185305471
), Date = c("2002-01-03", "2002-02-07", "2002-03-07", "2002-04-04",
"2002-05-02", "2002-06-06", "2002-07-04", "2002-09-12", "2002-10-10",
"2002-11-07", "2002-12-05", "2003-01-09", "2003-02-06", "2003-03-06",
"2003-04-03", "2003-05-08", "2003-06-05", "2003-07-10", "2003-09-04",
"2003-10-02", "2003-11-06", "2003-12-04", "2004-01-08", "2004-02-05",
"2004-03-04", "2004-04-01", "2004-05-06", "2004-06-03", "2004-07-01",
"2004-09-02", "2004-10-07", "2004-11-04", "2004-12-02", "2005-01-13",
"2005-02-03", "2005-03-03", "2005-04-07", "2005-05-04", "2005-06-02",
"2005-07-07", "2005-09-01", "2005-10-06", "2005-11-03", "2005-12-01",
"2006-01-12", "2006-02-02", "2006-03-02", "2006-04-06", "2006-05-04",
"2006-06-08", "2006-07-06", "2006-08-03", "2006-10-05", "2006-11-02",
"2006-12-07", "2007-01-11", "2007-02-08", "2007-03-08", "2007-04-12",
"2007-05-10", "2007-06-06", "2007-07-05", "2007-09-06", "2007-10-04",
"2007-11-08", "2007-12-06", "2008-01-10", "2008-02-07", "2008-03-06",
"2008-04-10", "2008-05-08", "2008-06-05", "2008-07-03", "2008-08-07",
"2008-09-04", "2008-10-02", "2008-11-06", "2008-12-04", "2009-01-15",
"2009-02-05", "2009-03-05", "2009-04-02", "2009-05-07", "2009-06-04",
"2009-07-02", "2009-08-06", "2009-09-03", "2009-10-08", "2009-11-05",
"2009-12-03", "2010-01-14", "2010-02-04", "2010-03-04", "2010-04-08",
"2010-05-06", "2010-06-10", "2010-07-08", "2010-08-05", "2010-09-02",
"2010-10-07", "2010-11-04", "2010-12-02", "2011-01-13", "2011-02-03",
"2011-03-03", "2011-04-07", "2011-05-05", "2011-06-09", "2011-07-07",
"2011-08-04", "2011-09-08", "2011-10-06", "2011-11-03", "2011-12-08",
"2012-01-12", "2012-02-09", "2012-03-08", "2012-04-04", "2012-05-03",
"2012-06-06", "2012-07-05", "2012-08-02", "2012-09-06", "2012-10-04",
"2012-11-08", "2012-12-06", "2013-01-10", "2013-02-07", "2013-03-07",
"2013-04-04", "2013-05-02", "2013-06-06", "2013-07-04", "2013-08-01",
"2013-09-05", "2013-10-02", "2013-11-07", "2013-12-05", "2014-01-09",
"2014-02-06", "2014-03-06", "2014-04-03", "2014-05-08", "2014-06-05",
"2014-07-03", "2014-08-07", "2014-09-04", "2014-10-02", "2014-11-06",
"2014-12-04", "2015-01-22", "2015-03-05", "2015-04-15", "2015-06-03",
"2015-07-16", "2015-09-03", "2015-10-22", "2015-12-03", "2016-01-21",
"2016-03-10", "2016-04-21", "2016-06-02", "2016-07-21", "2016-09-08",
"2016-10-20", "2016-12-08", "2017-01-19", "2017-03-09", "2017-04-27",
"2017-06-08", "2017-07-20", "2017-09-07", "2017-10-26", "2017-12-14",
"2018-01-25", "2018-03-08", "2018-04-26", "2018-06-14", "2018-07-26",
"2018-09-13", "2018-10-25", "2018-12-13", "2019-01-24", "2019-03-07",
"2019-04-10", "2019-06-06", "2019-07-25", "2019-09-12", "2019-10-24",
"2019-12-12")), class = "data.frame", row.names = c(NA, -190L
))
I then used these codes to interpolate the series:
# first series
monthss <- data.frame(Date = seq(as.Date("2002-01-03"), as.Date("2019-12-12"), by = "month"), stringsAsFactors = F)
df1 <- left_join(x = monthss, y = df1, by = "Date")
df1 <- na.approx(object = df1$'as it comes from the previous step', rule = 2)
# second series
df2 <- left_join(x = monthss, y = df2, by = "Date")
df2 <- na.approx(object = df2$'as it comes from the previous step', rule = 2)
However, in the first case, I get a weird result when using left_join (I get too many NAs that don't correspond to the missing dates), while in the second case, when using left_join, I get Error: cannot join a Date object with an object that is not a Date object. For this error, I tried to look for some solutions online, but what I found doesn't help.
Can anyone help me fix this?
Thanks!
You get the left_join error in the second case because df2$Date is not of class Date. This fixes the second join:
library(lubridate)
# second series
df2$Date <- lubridate::date(df2$Date) #convert Date variable to date
df2 <- left_join(x = monthss, y = df2, by = "Date")
df2 <- na.approx(object = df2$'as it comes from the previous step', rule = 2)
Changing this will fix the first join:
# first series
monthss <- data.frame(Date = seq(as.Date("2002-01-03"), as.Date("2019-12-12"), by = "day"), stringsAsFactors = F)
You did a sequence by month and thus only got the dates for each month where the day is "3" e.g., 2002-01-03, 2002-02-03 etc. Thus you only got joins for Dates ending on "03" but not for other dates that you have Fit values for in df1.

How to establish if the dates in a column are unique?

I have a simple question to ask. I couldn't find a solution on SO.
I have a column vector of dates and I would like to see whether months are unique or not. I tried to use unique but I may have used it in the wrong way.
An example:
Date = structure(c(11690, 11725, 11753, 11781, 11809, 11844, 11872,
11900, 11942, 11970, 11998, 12026, 12061, 12089, 12117, 12145,
12180, 12208, 12243, 12264, 12265, 12299, 12327, 12362, 12390,
12425, 12453, 12481, 12509, 12544, 12572, 12600, 12635, 12663,
12698, 12726, 12754, 12796, 12817, 12845, 12880, 12907, 12936,
12971, 12999, 13027, 13062, 13090, 13118, 13160, 13181, 13209,
13244, 13272, 13307, 13335, 13363, 13392, 13426, 13454, 13489,
13524, 13552, 13580, 13615, 13643, 13670, 13699, 13727, 13762,
13790, 13825, 13853, 13888, 13916, 13944, 13979, 14007, 14035,
14063, 14098, 14126, 14154, 14160, 14189, 14217, 14259, 14280,
14308, 14336, 14371, 14399, 14427, 14462, 14490, 14525, 14553,
14581, 14623, 14644, 14672, 14707, 14735, 14770, 14798, 14826,
14854, 14889, 14917, 14945, 14987, 15008, 15036, 15071, 15099,
15134, 15162, 15190, 15225, 15253, 15281, 15316, 15351, 15379,
15407, 15434, 15463, 15497, 15526, 15554, 15589, 15617, 15652,
15680, 15715, 15743, 15771, 15799, 15827, 15862, 15890, 15918,
15953, 15980, 16016, 16044, 16079, 16107, 16135, 16163, 16198,
16226, 16254, 16289, 16317, 16345, 16380, 16408, 16457, 16467,
16499, 16540, 16556, 16589, 16632, 16648, 16681, 16730, 16740,
16772, 16821, 16832, 16870, 16912, 16922, 16954, 17003, 17014,
17052, 17094, 17106, 17143, 17185, 17198, 17234, 17283, 17287,
17325, 17367, 17379, 17416, 17465, 17471, 17514, 17556, 17563,
17598, 17647, 17652, 17696, 17738, 17744, 17787, 17829, 17836,
17878, 17920, 17928, 17962, 17996, 18017, 18053, 18102, 18109,
18151, 18193, 18201, 18242), class = "Date")
In this column vector I would like to see whether there are two observations in the same month (there are 2 for "2003-07" and "2008-10"). I can I check it with one line of command?
Thanks!
In base R, we can format the Date to get only year and month, use table to count their occurrences, Filter to select only those month-year which occur more than once.
names(Filter(function(x) x > 1, table(format(Date, "%Y-%m"))))
#[1] "2003-07" "2008-10"
Same logic using zoo::as.yearmon.
names(Filter(function(x) x > 1, table(zoo::as.yearmon(Date))))
#[1] "Jul 2003" "Oct 2008"
library('lubridate')
library('tidyverse')
Date %>%
enframe() %>%
count(year(value), month(value)) %>%
filter(n > 1)
One line with as.yearmon from zoo
library(zoo)
Date[which(duplicated(as.yearmon(Date))==TRUE)]
[1] "2003-07-31" "2008-10-08"
Use str_sub to get year-month and use table to count frequency of occurence:
library(tidyverse)
year_month <- str_sub(Date, 1, 7) #this will extract characters 1-7 (year-mo)
result <- as_tibble(table(year_month))
#or pipe it all
year_month2 <- str_sub(Date, 1, 7) %>%
table() %>%
as_tibble()
#or filter to get only those with > 1 occurence
year_month3 <- str_sub(Date, 1, 7) %>%
table() %>%
as_tibble() %>%
filter(n > 1)

"Breakpoints" don't display dates but a continuous measure

I'm trying to detect some structural breaks in my series. The problem is that it displays continuous numbers rather than dates, despite my series being a ts object.
I found this solution but it doesn't work in my case.
This is my dataset and code:
df = structure(list(Date = structure(c(11690, 11725, 11753, 11781,
11809, 11844, 11872, 11900, 11942, 11970, 11998, 12026, 12061,
12089, 12117, 12145, 12180, 12208, 12243, 12265, 12299, 12327,
12362, 12390, 12425, 12453, 12481, 12509, 12544, 12572, 12600,
12631, 12663, 12698, 12726, 12754, 12796, 12817, 12845, 12880,
12907, 12936, 12971, 12996, 13027, 13062, 13090, 13118, 13160,
13181, 13209, 13244, 13272, 13307, 13335, 13363, 13392, 13426,
13454, 13489, 13524, 13552, 13580, 13615, 13643, 13670, 13699,
13726, 13762, 13790, 13825, 13853, 13888, 13916, 13944, 13979,
14007, 14035, 14063, 14098, 14126, 14154, 14189, 14217, 14259,
14280, 14308, 14336, 14371, 14399, 14427, 14462, 14490, 14525,
14553, 14581, 14623, 14644, 14672, 14707, 14735, 14770, 14798,
14826, 14854, 14889, 14917, 14945, 14987, 15008, 15036, 15071,
15099, 15134, 15162, 15190, 15225, 15253, 15281, 15316, 15351,
15379, 15407, 15434, 15463, 15497, 15526, 15554, 15589, 15617,
15652, 15680, 15715, 15743, 15771, 15799, 15827, 15862, 15890,
15918, 15953, 15980, 16016, 16044, 16079, 16107, 16135, 16163,
16198, 16226, 16254, 16289, 16317, 16345, 16380, 16408, 16457,
16467, 16499, 16540, 16556, 16589, 16632, 16648, 16681, 16730,
16740, 16772, 16821, 16832, 16870, 16912, 16922, 16954, 17003,
17014, 17052, 17094, 17106, 17143, 17185, 17198, 17234, 17283,
17287, 17325, 17367, 17379, 17416, 17465, 17471, 17514, 17556,
17563, 17598, 17647, 17652, 17696, 17738, 17744, 17787, 17829,
17836, 17878, 17920, 17928, 17962, 17996, 18017, 18053, 18102,
18109, 18151, 18193, 18201, 18242), class = "Date"), Fit = c(-2.01864866574525,
-2.51081772611801, -3.07896216512166, -3.02724722640642, -0.764567739958455,
-1.81459657078637, -2.13093106123547, -2.13093106123547, -1.91543051022373,
-1.31418467170089, -1.86573850139921, -2.42539556395029, -1.26414303389104,
-2.5433900359616, -1.99767537794132, -1.34728409808229, -1.64315561542246,
-0.687106946387411, -2.48041219070826, -2.48041219070826, -1.78680159845671,
-2.13687301896279, -2.6123923387608, -2.84563515334999, -3.41506073833104,
-2.74565641471061, -2.3682788731863, -1.77410755661286, -2.46191758167165,
-2.34829604543204, -2.37030627525843, -2.37030627525843, -1.75944822651175,
-2.21875944722698, -2.60249841953241, -2.6758310533823, -1.99157259723667,
-2.34860918772813, -3.24977356678388, -3.1998805120359, -3.64471855523435,
-2.80762315792921, -1.46910836105049, -1.46910836105049, -2.24153954648439,
-2.64718944648088, -2.61088260257325, -2.45889016663966, -2.59732356608009,
-3.49037732690643, -2.75284369990193, -2.56284320115193, -3.01470163344929,
-2.24267403694233, -3.36759206183078, -2.65899770326269, -2.65899770326269,
-3.83487166356133, -2.30405890853423, -3.83487166356133, -2.91420930066836,
-2.92649062542454, -2.45288174087111, -2.59203353843301, -2.37211828478634,
-2.35485833573613, -3.28807932670479, -3.28807932670479, -2.69856893402308,
-2.4482421908289, -3.42965769805337, -2.4002640291758, -1.72498017056001,
-2.10246950134994, -2.75989530409431, -2.04609226712013, -1.50354129352453,
-1.721866774994, -1.42652131446034, -1.99149928941641, -0.924508173463412,
-0.34424720787331, -1.47956887747857, -0.699260660882747, -0.705970004477605,
-2.89615299118885, -2.87168709242964, -3.49698896688496, -1.80133944039088,
-2.3066390154612, -2.16578274820764, -1.62064416630292, -1.50034889686538,
-1.64551702528081, -1.54888542275039, -2.36526073757675, -2.17980843362752,
-2.61987658921009, -2.99580131757171, -3.27224528690084, -2.90968038360951,
-2.43786428440244, -3.53447897261775, -2.94164730632451, -2.67914051197011,
-3.08963971104142, -3.30489291781406, -3.16112222668117, -3.78875309229899,
-3.27799815735179, -3.27546357519604, -3.28715323339141, -3.277230212033,
-2.73537305926061, -2.63360778909794, -3.42285993586989, -3.02592822360864,
-2.80491835054881, -3.1610709896381, -2.69912996631718, -2.48975331263934,
-0.134524884114962, -2.3485759078928, -1.67019370390805, -1.30630530826772,
-2.2627030307026, -1.19967822767006, -2.18902328617136, -2.32822018421121,
-1.05335780233708, -2.32765305050142, -3.70136681094428, -2.47624061269887,
-2.2395891355029, -0.873612387550348, -2.52750186765166, -1.58254587448088,
-1.3519682697086, -2.67716755653968, -2.09120993997918, -2.83947106437091,
-1.59227436938979, -2.70393468772428, -3.07475393381032, -1.72535933812472,
-2.62864985613023, -2.1788856069182, -1.66072722296379, -2.02593106477748,
-0.236862069023111, -2.20046381510765, 2.67747589830398, 2.03103654671807,
-0.411843127888723, 0.15392859458, 3.15264600488878, -0.115883494946465,
-0.115883494946465, -2.48408112888983, -2.13179786204659, -2.13179786204659,
-0.421916196665926, -1.81454302259545, -1.81454302259545, -0.719344207794365,
2.30623888786222, 2.30623888786222, 0.233349485130917, 0.807655736612547,
0.807655736612547, 0.00810498434400109, 1.73561337499853, 1.73561337499853,
2.05294933680988, 2.52332617911213, 2.52332617911213, 1.6590362509139,
2.44897469036036, 2.44897469036036, 1.48162277916561, 0.109012820753664,
0.109012820753664, -0.552382527186447, 0.342735574558364, 0.342735574558364,
0.352860787128766, 0.352860787128766, 0.352860787128766, 0.352860787128766,
0.726520452040748, 0.726520452040748, 0.176144461112964, 2.28171712015304,
2.28171712015304, 0.256037205994603, 0.10686264754173, 0.10686264754173,
0.10686264754173, 0.10686264754173, 0.10686264754173, -0.871047910469186,
-1.61892724112359, -1.61892724112359, -2.04847571973674, -2.04847571973674,
-2.04847571973674, 1.22730660297267, 1.94291846403141, 1.94291846403141,
2.64766715573213, 1.66439852581802, 1.66439852581802, 3.92242045719081,
2.92445832371034, 2.92445832371034, 4.09796304281725)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -216L))
word = ts(df$Fit, start = c(2002, 1), end = c(2019, 12), frequency = 12)
breaks = breakpoints(df$Fit ~ 1, h = 0.1)
plot(breaks)
coef(breaks, breaks = 2)
word[breaks$breakpoints] # July 2014
plot(df[,2], type = "b")
lines(fitted(breaks), col = 4, lwd = 2)
abline(v = df[breaks$breakpoints], lty = 2)
Can anyone help me sort this out?
Thanks a lot!
You created a timeseries object but you did not use it:
plot(word)
breaks <- breakpoints(word ~ 1, h = 0.1)
lines(fitted(breaks), col="blue", lwd=2)
abline(v = time(word)[breaks$breakpoints], lty = 2)
If I remember correctly, for irregularly spaced intervals you use zoo, you can try something like below:
library(strucchange)
library(zoo)
breaks = breakpoints(df$Fit ~ 1, h = 0.1)
zoo_df = as.zoo(df[,2])
time(zoo_df) = df[,1]
plot(zoo_df, type = "b")
abline(v = time(zoo_df)[breaks$breakpoints], lty = 2)
This gets you the plot with date on x-axis. Now to get the x-axis values, you do time(zoo_df) and it's a matter of subsetting this according to the index. For the fitted line, if you don't want the lines to join, you need to split them into breakpoints + 1 groups, and draw each one separately:
#group your breaks
grps = cut(1:length(zoo_df),
breaks=c(0,breaks$breakpoints,+Inf),
labels=1:(length(breaks$breakpoints)+1))
for(i in unique(grps)){
lines(time(zoo_df)[grps==i],fitted(breaks)[grps==i])
}

Add Date Tooltip to GGvis object R

I want to add to a plot with Date axis (x axis) a tooltip that will include the current date, currently the tooltip shows the numeric value of the date, instead of the date format
the Data:
> dput(for_plot)
structure(list(ext_install_date = structure(c(16638, 16660, 16700,
16710, 16712, 16729, 16730, 16736, 16752, 16768, 16717, 16755,
16756, 16757, 16758, 16662, 16750, 16769, 16785, 16665, 16699,
16632, 16682, 16683, 16684, 16698, 16634, 16679, 16720, 16754,
16701, 16718, 16685, 16687, 16774, 16775, 16647, 16680, 16596,
16630, 16721, 16725, 16703, 16706, 16723, 16726, 16650, 16651,
16686, 16707, 16771, 16722, 16739, 16760, 16794, 16724, 16742,
16777, 16648, 16653, 16670, 16744, 16676, 16636, 16671, 16654,
16740, 16746, 16763, 16642, 16728, 16733, 16743, 16765, 16692,
16689, 16709, 16711, 16735, 16639, 16695, 16780, 16784, 16640,
16644, 16645, 16731, 16643, 16714, 16659, 16753, 16786, 16657,
16737, 16663, 16749, 16702, 16719, 16770, 16751, 16776, 16681,
16631, 16789, 16674, 16727, 16772, 16762, 16795, 16778, 16649,
16672, 16766, 16655, 16741, 16764, 16693, 16747, 16781, 16732,
16715, 16803, 16787, 16658, 16696, 16664, 16646, 16626, 16629,
16790, 16773, 16652, 16796, 16667, 16673, 16759, 16656, 16748,
16637, 16708, 16691, 16641, 16799, 16697, 16782, 16688, 16716,
16804, 16788, 16627, 16633, 16791, 16797, 16668, 16800, 16783,
16734, 16677, 16666, 16704, 16628, 16792, 16798, 16669, 16675,
16761, 16767, 16678, 16661, 16738, 16635, 16793, 16745, 16694,
16779, 16801, 16802, 16590, 16591, 16593, 16617, 16618, 16619,
16589, 16592, 16608, 16610, 16611, 16624, 16623, 16595, 16713,
16594, 16615, 16609, 16601, 16616, 16597, 16602, 16598, 16599,
16605, 16600, 16620, 16606, 16622, 16588, 16603, 16604, 16607,
16705, 16625, 16690, NA, 16613, 16612, 16621, 16587, 16614), class = "Date"),
sum = c(42133989L, 55439830L, 12474088L, 16782989L, 12498558L,
23097594L, 18694459L, 18613591L, 17164639L, 10998479L, 17434372L,
16134588L, 15765093L, 14780377L, 12238314L, 60259367L, 14299153L,
9228893L, 7181921L, 57044570L, 12671147L, 34528083L, 28685034L,
27538625L, 23861138L, 12939897L, 34804466L, 27256411L, 15470425L,
17683150L, 13919150L, 17683908L, 24785274L, 17297336L, 10883551L,
10765214L, 52309168L, 32311372L, 33612420L, 30994837L, 17528116L,
17979427L, 12904065L, 14452388L, 16659048L, 24373216L, 50197552L,
64194782L, 18635738L, 12370060L, 10714545L, 17080052L, 17320119L,
11792389L, 4250804L, 17337770L, 16257833L, 10389087L, 55407448L,
60149494L, 52791093L, 16748038L, 26324948L, 35491474L, 39187635L,
58805623L, 17361957L, 15002820L, 11834933L, 42022359L, 18834611L,
18499567L, 15771667L, 12734880L, 9901353L, 11215141L, 12242264L,
13603119L, 17919976L, 41100805L, 14435765L, 9727462L, 7604216L,
41830337L, 48193262L, 53128495L, 19248325L, 43074450L, 11930683L,
57259190L, 17360447L, 6925452L, 49781307L, 17483336L, 60223307L,
14877194L, 11216973L, 17906140L, 11152617L, 15247289L, 9919111L,
30596442L, 31624492L, 6494032L, 29419861L, 22205115L, 11612651L,
12506364L, 3977433L, 9251065L, 52886830L, 30459500L, 10314486L,
62828525L, 16266340L, 11414242L, 11315183L, 14865891L, 9219453L,
19531171L, 12390920L, 554893L, 6871604L, 56267484L, 13478614L,
59179677L, 56843397L, 33077108L, 29693238L, 5709800L, 10984800L,
62188950L, 3424377L, 55865206L, 33250188L, 11883725L, 61208251L,
14620505L, 33824988L, 9605466L, 10086150L, 43150201L, 2434175L,
13636535L, 9149052L, 12859396L, 15342563L, 299795L, 6557079L,
29079786L, 35154155L, 5720813L, 3213738L, 54942898L, 2011266L,
7788028L, 17631115L, 26168243L, 55355445L, 12363848L, 32406026L,
5181923L, 3045645L, 55879245L, 27803689L, 12997556L, 9993556L,
24094397L, 61278488L, 16146261L, 30860019L, 4851695L, 16059845L,
9734641L, 9682186L, 1434076L, 787507L, 33627937L, 35299118L,
31196723L, 23152630L, 18430364L, 16404624L, 21956375L, 31145208L,
29812678L, 28715201L, 31587264L, 18888020L, 14560381L, 28866133L,
11402958L, 28100484L, 21889495L, 33006788L, 32671209L, 22527130L,
36680524L, 30021920L, 35047621L, 33187732L, 30610149L, 34511947L,
21200181L, 31763855L, 16891242L, 28242299L, 31096620L, 35093501L,
28600363L, 14257733L, 32070016L, 10522891L, 785L, 17111781L,
25138826L, 21459015L, 28940910L, 21906624L)), row.names = c(NA,
-219L), class = c("data.table", "data.frame"), .Names = c("ext_install_date",
"sum"), .internal.selfref = <pointer: 0x0000000001290788>)
The ggvis code:
library(ggvis)
for_plot %>% ggvis(x = ~ext_install_date, y = ~sum, stroke := "red") %>% layer_lines(stroke=2) %>%
add_tooltip( function(data){(data$ext_install_date)}, "hover") #showing only numeric values
thanks or any help on that!
Unfortunately the tooltip in ggvis doesn't handle the values underpinning layer_lines as separate data points. Just as in your example, it only displays the first value. The second problem is the way the tooltip displays date objects.
We can hack our way around both problems, with the same approach as outlined here. Please note that the tooltip will still erroneously show the first date in between data points.
for_plot %>% ggvis(x = ~ext_install_date, y = ~sum) %>%
layer_points(opacity:=0) %>%
add_tooltip( function(data){(as.Date(data$ext_install_date/86400000,
origin='1970-01-01'))}) %>%
layer_lines()
I believe the googleVis package doesn't suffer from the same issues, so you might want to consider using it for similar graphs in the future.
My case is of little difference. I have a column of Date format on X axis, and I want to display it on the hover, but Date input of add_tooltip() is automatically transformed.
I finally figure out how to display date from X axis in ggvis tooltip, according to mtoto's code.
Here's my example for those who have the same case as I do:
dat<- as.data.frame(matrix(rnorm(100),ncol=5))
dat$Date=seq(as.Date("2018-01-01"), as.Date("2018-01-20"),by='days')
myhover<- function(x){
if(is.null(x)) return(NULL)
paste('Date: ',format(as.Date(x$Date/86400000,origin='1970-01-01'),'%Y-%m-%d'),br(),'Data:',format(x$V1))
}
dat %>% ggvis(~Date,~V1) %>% layer_points() %>% add_tooltip(myhover,'hover')

Resources