Different errors in using left_join in r - r

I am trying to interpolate two series but I get different errors when it comes to use left_join. These are the two series:
df1 = structure(list(Date = structure(c(11690, 11725, 11753, 11781,
11809, 11844, 11872, 11900, 11942, 11970, 11998, 12026, 12061,
12089, 12117, 12145, 12180, 12208, 12243, 12265, 12299, 12327,
12362, 12390, 12425, 12453, 12481, 12509, 12544, 12572, 12600,
12631, 12663, 12698, 12726, 12754, 12796, 12817, 12845, 12880,
12907, 12936, 12971, 12996, 13027, 13062, 13090, 13118, 13160,
13181, 13209, 13244, 13272, 13307, 13335, 13363, 13392, 13426,
13454, 13489, 13524, 13552, 13580, 13615, 13643, 13670, 13699,
13726, 13762, 13790, 13825, 13853, 13888, 13916, 13944, 13979,
14007, 14035, 14063, 14098, 14126, 14154, 14189, 14217, 14259,
14280, 14308, 14336, 14371, 14399, 14427, 14462, 14490, 14525,
14553, 14581, 14623, 14644, 14672, 14707, 14735, 14770, 14798,
14826, 14854, 14889, 14917, 14945, 14987, 15008, 15036, 15071,
15099, 15134, 15162, 15190, 15225, 15253, 15281, 15316, 15351,
15379, 15407, 15434, 15463, 15497, 15526, 15554, 15589, 15617,
15652, 15680, 15715, 15743, 15771, 15799, 15827, 15862, 15890,
15918, 15953, 15980, 16016, 16044, 16079, 16107, 16135, 16163,
16198, 16226, 16254, 16289, 16317, 16345, 16380, 16408, 16457,
16467, 16499, 16540, 16556, 16589, 16632, 16648, 16681, 16730,
16740, 16772, 16821, 16832, 16870, 16912, 16922, 16954, 17003,
17014, 17052, 17094, 17106, 17143, 17185, 17198, 17234, 17283,
17287, 17325, 17367, 17379, 17416, 17465, 17471, 17514, 17556,
17563, 17598, 17647, 17652, 17696, 17738, 17744, 17787, 17829,
17836, 17878, 17920, 17928, 17962, 17996, 18017, 18053, 18102,
18109, 18151, 18193, 18201, 18242), class = "Date"), Fit = c(-1.68038051095608,
-2.12317945962401, -2.71086209338424, -3.22489682411764, -2.51769032592554,
-1.33242532610804, -2.13564807610995, -2.13564807610995, -2.03415137348661,
-1.58909921518124, -0.68844714029518, -1.94691881575563, -1.16714425518695,
-2.15153420569546, -2.04779261960842, -0.867515299774483, -1.47986823637587,
-0.650513604798111, -1.61361732632524, -1.61361732632524, -1.48596960028163,
-2.20004804407501, -2.64689217553021, -2.67436545120372, -3.48049123019991,
-2.28510809912552, -2.32172665536549, -1.76823348887895, -2.763353378483,
-2.09381469041352, -2.08400217235893, -2.08400217235893, -1.59720187270177,
-2.06034560841579, -2.56317571167687, -1.32635640217861, -1.48729782102413,
-2.00732693090646, -3.40272319833461, -2.49810610074565, -3.32810591309226,
-1.92402348117091, -1.35397391665409, -1.35397391665409, -1.76640461987233,
-2.51735528772741, -2.35332514240503, -2.35272634525907, -2.68468172493552,
-2.92171051216825, -3.16925509035157, -2.23093489115309, -3.06530983495044,
-2.30445613039677, -3.30583207178147, -2.82188405397887, -2.82188405397887,
-2.51962207875813, -2.66982767393931, -2.98041595989062, -2.55306093500464,
-3.36871970472208, -2.6201699311654, -2.49494986723432, -2.78910296607766,
-2.57178346785915, -3.28445145020619, -3.28445145020619, -2.42213276484722,
-2.38709926337358, -2.99568739417641, -2.65138790450656, -1.58074660422518,
-1.91906056315361, -2.90337379582732, -2.20117735700684, -1.29303194740331,
-2.2494711668963, -1.53224235243069, -2.04481468859739, -0.929377541149286,
-0.0656900034556943, -2.20508088335906, -0.578502434372448, -0.858925093713048,
-2.96970181427872, -2.97862851965814, -3.31779605911778, -2.08207766441392,
-2.4848907725881, -1.99650762208841, -2.17076028347941, -1.27061305909,
-1.92537835567221, -1.49409886851971, -2.32667673830125, -1.90852299188928,
-2.88390697795999, -2.9172411130509, -3.5788915130035, -2.92151370364156,
-2.71478221054495, -3.40505963822921, -3.21129210245385, -2.66481268506047,
-3.27494238949828, -3.37230078139583, -3.53346393522174, -3.99626066800013,
-3.41713548837371, -3.67409843863548, -3.50182175058264, -3.37467436298585,
-3.40580625489191, -2.99003362011982, -3.66653724632765, -3.61588309823573,
-3.18167532125541, -3.35619362672467, -2.8806023910338, -2.23185392156432,
-0.626338062660262, -2.00482507082523, -1.7359344838577, -0.800609837957656,
-1.93997314433068, -0.93482911672218, -2.63652739226028, -1.93546315176661,
-1.17524044037369, -2.274143231604, -2.69679235052359, -2.23731851002543,
-1.86716852238077, -0.716926538642468, -1.30258784685856, -1.08194224250233,
-0.930472602419788, 1.13081617308243, 2.57874064965174, -0.388461792958877,
-0.340421132850094, 0.638071432169484, -1.57318833539501, 4.40961161388978,
1.62060735764472, -0.674369921428344, 1.76679629890753, -0.748857461264583,
0.520407646064921, -0.575555044309392, 4.34511452569889, 3.34194433067617,
3.21304624521961, 1.97147139745885, 3.45052500620869, -0.399783726365629,
-0.399783726365629, -0.519126753266423, -0.924795366280377, -0.924795366280377,
4.49023560078066, 2.40210157814194, 2.40210157814194, 3.48634734098189,
2.72330542145941, 2.72330542145941, 2.44175433903678, 1.83754103018167,
1.83754103018167, 2.00744190890702, 2.84495436947268, 2.84495436947268,
3.68678762972081, 3.16657087164265, 3.16657087164265, 2.54382298188246,
3.23525009614645, 3.23525009614645, 2.25680930181096, 1.44116917115332,
1.44116917115332, -0.510916286085479, -0.212859586548733, -0.212859586548733,
-0.740774827758169, -0.740774827758169, -0.740774827758169, -0.740774827758169,
-0.41701391913469, -0.41701391913469, -1.22601225946438, -0.632288989161026,
-0.632288989161026, -1.04309509051099, -1.17291896442253, -1.17291896442253,
-1.17291896442253, -1.17291896442253, -1.17291896442253, -1.11302828507398,
-2.78220449305609, -2.78220449305609, -2.86224137302201, -2.86224137302201,
-2.86224137302201, 1.34122649704384, 1.33515381941954, 1.33515381941954,
2.56866171677324, 0.963485086246604, 0.963485086246604, 4.22653525972732,
5.35095831479335, 5.35095831479335, 3.93448842430935)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -216L))
df2 = structure(list(Fit = c(-3.36981025066699, -2.83074686834444,
-3.4112766646918, -2.45754914212114, -2.16728424394746, -3.61860873481586,
-3.61860873481586, -2.2916834860219, -1.37942237747605, -3.95034004701435,
-1.46235520552567, -2.91367969639407, -3.95034004701435, -3.57714232079105,
-2.16728424394746, -2.20875065797227, -2.6648812122452, -2.74781404029482,
-2.00141858784822, -3.07954535249331, -3.07954535249331, -3.24541100859256,
-3.36981025066699, -2.49901555614595, -2.6648812122452, -3.95034004701435,
-3.0380789384685, -3.24541100859256, -3.12101176651812, -2.58194838419558,
-2.83074686834444, -3.4112766646918, -2.6648812122452, -2.99661252444369,
-3.61860873481586, -2.99661252444369, -3.57714232079105, -2.74781404029482,
-3.20394459456775, -2.25021707199708, -2.37461631407152, -3.61860873481586,
-3.36981025066699, -2.95514611041888, -3.61860873481586, -3.32834383664218,
-2.74781404029482, -3.20394459456775, -2.00141858784822, -3.12101176651812,
-3.66007514884067, -3.47347628572902, -3.95034004701435, -3.45274307871661,
-3.45274307871661, -2.83074686834444, -3.95034004701435, -2.99661252444369,
-3.61860873481586, -2.16728424394746, -3.70154156286548, -2.37461631407152,
-2.83074686834444, -2.12581782992265, -3.32834383664218, -2.6648812122452,
-3.0380789384685, -3.20394459456775, -3.4112766646918, -2.83074686834444,
-2.91367969639407, -3.95034004701435, -2.6648812122452, -3.36981025066699,
-2.2916834860219, -3.45274307871661, -2.37461631407152, -1.54528803357529,
-2.91367969639407, -3.49420949274142, -2.95514611041888, -3.32834383664218,
-2.45754914212114, -3.57714232079105, -3.36981025066699, -3.45274307871661,
-3.95034004701435, -3.16247818054293, -2.83074686834444, -3.36981025066699,
-2.6648812122452, -2.6648812122452, -2.45754914212114, -3.32834383664218,
-3.66007514884067, -3.36981025066699, -3.95034004701435, -2.83074686834444,
-2.91367969639407, -3.45274307871661, -2.20875065797227, -2.6648812122452,
-3.95034004701435, -3.95034004701435, -2.74781404029482, -2.83074686834444,
-2.99661252444369, -3.57714232079105, -3.95034004701435, -1.54528803357529,
-2.45754914212114, -2.91367969639407, -3.20394459456775, -2.83074686834444,
-3.70154156286548, -1.29648954942642, -3.36981025066699, -2.20875065797227,
-2.45754914212114, -3.24541100859256, -2.6648812122452, -1.87701934577378,
-2.83074686834444, -2.37461631407152, -3.95034004701435, -3.07954535249331,
-2.6648812122452, -2.99661252444369, -2.91367969639407, -3.49420949274142,
-3.61860873481586, -3.20394459456775, 0.44509983961565, 0.776831151814141,
-2.37461631407152, -3.61860873481586, -0.259829198806142, -2.37461631407152,
3.72094654757574, -1.37942237747605, -3.12101176651812, 0.196301355466782,
-2.20875065797227, -0.923291823203123, -2.58194838419558, 0.569499081690084,
0.486566253640461, 1.56469301828556, -1.62822086162492, 3.59654730550131,
-0.923291823203123, -2.2916834860219, -0.633026925029444, 0.486566253640461,
2.14522281463291, 1.15002887803744, 2.60135336890584, 2.97455109512914,
1.02562963596301, 1.89642433048405, 2.97455109512914, 3.38921523537725,
3.22334957927801, 3.47214806342688, 1.77202508840961, 4.05267785977424,
3.30628240732763, -0.0939635427068968, -0.342762026855765, -0.0939635427068968,
1.23296170608707, -0.342762026855765, -0.425694854905387, -3.36981025066699,
-0.674493339054255, -1.08915747930237, -1.37942237747605, -2.08435141589784,
-1.46235520552567, -1.46235520552567, -2.83074686834444, -2.25021707199708,
-3.0380789384685, 3.05748392317876, 1.15002887803744, 2.68428619695546,
0.196301355466782, 2.26962205670735, 2.85015185305471, 2.85015185305471
), Date = c("2002-01-03", "2002-02-07", "2002-03-07", "2002-04-04",
"2002-05-02", "2002-06-06", "2002-07-04", "2002-09-12", "2002-10-10",
"2002-11-07", "2002-12-05", "2003-01-09", "2003-02-06", "2003-03-06",
"2003-04-03", "2003-05-08", "2003-06-05", "2003-07-10", "2003-09-04",
"2003-10-02", "2003-11-06", "2003-12-04", "2004-01-08", "2004-02-05",
"2004-03-04", "2004-04-01", "2004-05-06", "2004-06-03", "2004-07-01",
"2004-09-02", "2004-10-07", "2004-11-04", "2004-12-02", "2005-01-13",
"2005-02-03", "2005-03-03", "2005-04-07", "2005-05-04", "2005-06-02",
"2005-07-07", "2005-09-01", "2005-10-06", "2005-11-03", "2005-12-01",
"2006-01-12", "2006-02-02", "2006-03-02", "2006-04-06", "2006-05-04",
"2006-06-08", "2006-07-06", "2006-08-03", "2006-10-05", "2006-11-02",
"2006-12-07", "2007-01-11", "2007-02-08", "2007-03-08", "2007-04-12",
"2007-05-10", "2007-06-06", "2007-07-05", "2007-09-06", "2007-10-04",
"2007-11-08", "2007-12-06", "2008-01-10", "2008-02-07", "2008-03-06",
"2008-04-10", "2008-05-08", "2008-06-05", "2008-07-03", "2008-08-07",
"2008-09-04", "2008-10-02", "2008-11-06", "2008-12-04", "2009-01-15",
"2009-02-05", "2009-03-05", "2009-04-02", "2009-05-07", "2009-06-04",
"2009-07-02", "2009-08-06", "2009-09-03", "2009-10-08", "2009-11-05",
"2009-12-03", "2010-01-14", "2010-02-04", "2010-03-04", "2010-04-08",
"2010-05-06", "2010-06-10", "2010-07-08", "2010-08-05", "2010-09-02",
"2010-10-07", "2010-11-04", "2010-12-02", "2011-01-13", "2011-02-03",
"2011-03-03", "2011-04-07", "2011-05-05", "2011-06-09", "2011-07-07",
"2011-08-04", "2011-09-08", "2011-10-06", "2011-11-03", "2011-12-08",
"2012-01-12", "2012-02-09", "2012-03-08", "2012-04-04", "2012-05-03",
"2012-06-06", "2012-07-05", "2012-08-02", "2012-09-06", "2012-10-04",
"2012-11-08", "2012-12-06", "2013-01-10", "2013-02-07", "2013-03-07",
"2013-04-04", "2013-05-02", "2013-06-06", "2013-07-04", "2013-08-01",
"2013-09-05", "2013-10-02", "2013-11-07", "2013-12-05", "2014-01-09",
"2014-02-06", "2014-03-06", "2014-04-03", "2014-05-08", "2014-06-05",
"2014-07-03", "2014-08-07", "2014-09-04", "2014-10-02", "2014-11-06",
"2014-12-04", "2015-01-22", "2015-03-05", "2015-04-15", "2015-06-03",
"2015-07-16", "2015-09-03", "2015-10-22", "2015-12-03", "2016-01-21",
"2016-03-10", "2016-04-21", "2016-06-02", "2016-07-21", "2016-09-08",
"2016-10-20", "2016-12-08", "2017-01-19", "2017-03-09", "2017-04-27",
"2017-06-08", "2017-07-20", "2017-09-07", "2017-10-26", "2017-12-14",
"2018-01-25", "2018-03-08", "2018-04-26", "2018-06-14", "2018-07-26",
"2018-09-13", "2018-10-25", "2018-12-13", "2019-01-24", "2019-03-07",
"2019-04-10", "2019-06-06", "2019-07-25", "2019-09-12", "2019-10-24",
"2019-12-12")), class = "data.frame", row.names = c(NA, -190L
))
I then used these codes to interpolate the series:
# first series
monthss <- data.frame(Date = seq(as.Date("2002-01-03"), as.Date("2019-12-12"), by = "month"), stringsAsFactors = F)
df1 <- left_join(x = monthss, y = df1, by = "Date")
df1 <- na.approx(object = df1$'as it comes from the previous step', rule = 2)
# second series
df2 <- left_join(x = monthss, y = df2, by = "Date")
df2 <- na.approx(object = df2$'as it comes from the previous step', rule = 2)
However, in the first case, I get a weird result when using left_join (I get too many NAs that don't correspond to the missing dates), while in the second case, when using left_join, I get Error: cannot join a Date object with an object that is not a Date object. For this error, I tried to look for some solutions online, but what I found doesn't help.
Can anyone help me fix this?
Thanks!

You get the left_join error in the second case because df2$Date is not of class Date. This fixes the second join:
library(lubridate)
# second series
df2$Date <- lubridate::date(df2$Date) #convert Date variable to date
df2 <- left_join(x = monthss, y = df2, by = "Date")
df2 <- na.approx(object = df2$'as it comes from the previous step', rule = 2)
Changing this will fix the first join:
# first series
monthss <- data.frame(Date = seq(as.Date("2002-01-03"), as.Date("2019-12-12"), by = "day"), stringsAsFactors = F)
You did a sequence by month and thus only got the dates for each month where the day is "3" e.g., 2002-01-03, 2002-02-03 etc. Thus you only got joins for Dates ending on "03" but not for other dates that you have Fit values for in df1.

Related

format dates as quarters in ggplot2 x axis

I have the following chart generated using ggplot2. Is there a way in which I can format my dates on the x axis as quarters (for example 2022-Q1) instead of the current date format which I have?
data <- structure(list(Date = structure(c(19083, 19086, 19087, 19088,
19089, 19090, 19093, 19094, 19095, 19096, 19101, 19102, 19103,
19104, 19107, 19108, 19109, 19110, 19111, 19114, 19115, 19116,
19117, 19118, 19121, 19122, 19123, 19124, 19125, 19128, 19129,
19130, 19131, 19132, 19135, 19136, 19137, 19138, 19139, 19143,
19144, 19145, 19146, 19149, 19150, 19151, 19152, 19153, 19156,
19157, 19158, 19159, 19160, 19164, 19165, 19166, 19167, 19170,
19171, 19172, 19173, 19174, 19178, 19179, 19180, 19181, 19184,
19185, 19186, 19187, 19188, 19191, 19192, 19193, 19194, 19195,
19198, 19199, 19200, 19201, 19202, 19205, 19206, 19207, 19208,
19209, 19212, 19213, 19214, 19215, 19216, 19219, 19220, 19221,
19222, 19223, 19226, 19227, 19228, 19229, 19230, 19233, 19234,
19235, 19236, 19237, 19241, 19242, 19243, 19244, 19247), class = "Date"),
US = c(-0.099, -0.082, -0.102, -0.276, -0.265, -0.214, -0.254,
-0.321, -0.263, -0.195, -0.189, -0.077, -0.025, -0.278, -0.543,
-0.595, -0.638, -0.587, -0.571, -0.754, -0.681, -0.597, -0.68,
-0.738, -0.942, -0.882, -0.657, -0.673, -0.468, -0.394, -0.323,
-0.357, -0.604, -0.533, -0.409, -0.529, -0.577, -0.496, -0.344,
-0.362, -0.326, -0.367, -0.365, -0.318, -0.43, -0.405, -0.515,
-0.658, -0.77, -1.033, -1.058, -1.217, -1.112, -1.096, -1.114,
-1.158, -1.013, -0.915, -0.863, -1.099, -1.192, -1.236, -1.395,
-1.144, -1.09, -1.167, -1.325, -1.378, -1.343, -1.492, -1.591,
-1.56, -1.254, -1.505, -1.327, -1.505, -1.542, -1.528, -1.526,
-1.591, -1.416, -1.513, -1.577, -1.389, -1.429, -1.486, -1.547,
-1.649, -1.61, -1.585, -1.475, -1.393, -1.307, -1.376, -1.571,
-1.744, -2.067, -2.294, -2.401, -2.423, -2.449, -2.69, -2.633,
-2.654, -2.812, -2.909, -3.037, -2.912, -2.456, -2.523, -2.51
)), row.names = c(NA, -111L), class = c("tbl_df", "tbl",
"data.frame"))
library(tidyverse)
ggplot(data, aes(Date, US)) +
geom_line(size=2) +
scale_x_date(breaks = '3 months')
You could try making use of the zoo package:
ggplot(data, aes(Date, US)) +
geom_line(size=2) +
scale_x_date(breaks = "3 months",
labels = function(x) zoo::format.yearqtr(x, "%Y-Q%q"))

How to establish if the dates in a column are unique?

I have a simple question to ask. I couldn't find a solution on SO.
I have a column vector of dates and I would like to see whether months are unique or not. I tried to use unique but I may have used it in the wrong way.
An example:
Date = structure(c(11690, 11725, 11753, 11781, 11809, 11844, 11872,
11900, 11942, 11970, 11998, 12026, 12061, 12089, 12117, 12145,
12180, 12208, 12243, 12264, 12265, 12299, 12327, 12362, 12390,
12425, 12453, 12481, 12509, 12544, 12572, 12600, 12635, 12663,
12698, 12726, 12754, 12796, 12817, 12845, 12880, 12907, 12936,
12971, 12999, 13027, 13062, 13090, 13118, 13160, 13181, 13209,
13244, 13272, 13307, 13335, 13363, 13392, 13426, 13454, 13489,
13524, 13552, 13580, 13615, 13643, 13670, 13699, 13727, 13762,
13790, 13825, 13853, 13888, 13916, 13944, 13979, 14007, 14035,
14063, 14098, 14126, 14154, 14160, 14189, 14217, 14259, 14280,
14308, 14336, 14371, 14399, 14427, 14462, 14490, 14525, 14553,
14581, 14623, 14644, 14672, 14707, 14735, 14770, 14798, 14826,
14854, 14889, 14917, 14945, 14987, 15008, 15036, 15071, 15099,
15134, 15162, 15190, 15225, 15253, 15281, 15316, 15351, 15379,
15407, 15434, 15463, 15497, 15526, 15554, 15589, 15617, 15652,
15680, 15715, 15743, 15771, 15799, 15827, 15862, 15890, 15918,
15953, 15980, 16016, 16044, 16079, 16107, 16135, 16163, 16198,
16226, 16254, 16289, 16317, 16345, 16380, 16408, 16457, 16467,
16499, 16540, 16556, 16589, 16632, 16648, 16681, 16730, 16740,
16772, 16821, 16832, 16870, 16912, 16922, 16954, 17003, 17014,
17052, 17094, 17106, 17143, 17185, 17198, 17234, 17283, 17287,
17325, 17367, 17379, 17416, 17465, 17471, 17514, 17556, 17563,
17598, 17647, 17652, 17696, 17738, 17744, 17787, 17829, 17836,
17878, 17920, 17928, 17962, 17996, 18017, 18053, 18102, 18109,
18151, 18193, 18201, 18242), class = "Date")
In this column vector I would like to see whether there are two observations in the same month (there are 2 for "2003-07" and "2008-10"). I can I check it with one line of command?
Thanks!
In base R, we can format the Date to get only year and month, use table to count their occurrences, Filter to select only those month-year which occur more than once.
names(Filter(function(x) x > 1, table(format(Date, "%Y-%m"))))
#[1] "2003-07" "2008-10"
Same logic using zoo::as.yearmon.
names(Filter(function(x) x > 1, table(zoo::as.yearmon(Date))))
#[1] "Jul 2003" "Oct 2008"
library('lubridate')
library('tidyverse')
Date %>%
enframe() %>%
count(year(value), month(value)) %>%
filter(n > 1)
One line with as.yearmon from zoo
library(zoo)
Date[which(duplicated(as.yearmon(Date))==TRUE)]
[1] "2003-07-31" "2008-10-08"
Use str_sub to get year-month and use table to count frequency of occurence:
library(tidyverse)
year_month <- str_sub(Date, 1, 7) #this will extract characters 1-7 (year-mo)
result <- as_tibble(table(year_month))
#or pipe it all
year_month2 <- str_sub(Date, 1, 7) %>%
table() %>%
as_tibble()
#or filter to get only those with > 1 occurence
year_month3 <- str_sub(Date, 1, 7) %>%
table() %>%
as_tibble() %>%
filter(n > 1)

"Breakpoints" don't display dates but a continuous measure

I'm trying to detect some structural breaks in my series. The problem is that it displays continuous numbers rather than dates, despite my series being a ts object.
I found this solution but it doesn't work in my case.
This is my dataset and code:
df = structure(list(Date = structure(c(11690, 11725, 11753, 11781,
11809, 11844, 11872, 11900, 11942, 11970, 11998, 12026, 12061,
12089, 12117, 12145, 12180, 12208, 12243, 12265, 12299, 12327,
12362, 12390, 12425, 12453, 12481, 12509, 12544, 12572, 12600,
12631, 12663, 12698, 12726, 12754, 12796, 12817, 12845, 12880,
12907, 12936, 12971, 12996, 13027, 13062, 13090, 13118, 13160,
13181, 13209, 13244, 13272, 13307, 13335, 13363, 13392, 13426,
13454, 13489, 13524, 13552, 13580, 13615, 13643, 13670, 13699,
13726, 13762, 13790, 13825, 13853, 13888, 13916, 13944, 13979,
14007, 14035, 14063, 14098, 14126, 14154, 14189, 14217, 14259,
14280, 14308, 14336, 14371, 14399, 14427, 14462, 14490, 14525,
14553, 14581, 14623, 14644, 14672, 14707, 14735, 14770, 14798,
14826, 14854, 14889, 14917, 14945, 14987, 15008, 15036, 15071,
15099, 15134, 15162, 15190, 15225, 15253, 15281, 15316, 15351,
15379, 15407, 15434, 15463, 15497, 15526, 15554, 15589, 15617,
15652, 15680, 15715, 15743, 15771, 15799, 15827, 15862, 15890,
15918, 15953, 15980, 16016, 16044, 16079, 16107, 16135, 16163,
16198, 16226, 16254, 16289, 16317, 16345, 16380, 16408, 16457,
16467, 16499, 16540, 16556, 16589, 16632, 16648, 16681, 16730,
16740, 16772, 16821, 16832, 16870, 16912, 16922, 16954, 17003,
17014, 17052, 17094, 17106, 17143, 17185, 17198, 17234, 17283,
17287, 17325, 17367, 17379, 17416, 17465, 17471, 17514, 17556,
17563, 17598, 17647, 17652, 17696, 17738, 17744, 17787, 17829,
17836, 17878, 17920, 17928, 17962, 17996, 18017, 18053, 18102,
18109, 18151, 18193, 18201, 18242), class = "Date"), Fit = c(-2.01864866574525,
-2.51081772611801, -3.07896216512166, -3.02724722640642, -0.764567739958455,
-1.81459657078637, -2.13093106123547, -2.13093106123547, -1.91543051022373,
-1.31418467170089, -1.86573850139921, -2.42539556395029, -1.26414303389104,
-2.5433900359616, -1.99767537794132, -1.34728409808229, -1.64315561542246,
-0.687106946387411, -2.48041219070826, -2.48041219070826, -1.78680159845671,
-2.13687301896279, -2.6123923387608, -2.84563515334999, -3.41506073833104,
-2.74565641471061, -2.3682788731863, -1.77410755661286, -2.46191758167165,
-2.34829604543204, -2.37030627525843, -2.37030627525843, -1.75944822651175,
-2.21875944722698, -2.60249841953241, -2.6758310533823, -1.99157259723667,
-2.34860918772813, -3.24977356678388, -3.1998805120359, -3.64471855523435,
-2.80762315792921, -1.46910836105049, -1.46910836105049, -2.24153954648439,
-2.64718944648088, -2.61088260257325, -2.45889016663966, -2.59732356608009,
-3.49037732690643, -2.75284369990193, -2.56284320115193, -3.01470163344929,
-2.24267403694233, -3.36759206183078, -2.65899770326269, -2.65899770326269,
-3.83487166356133, -2.30405890853423, -3.83487166356133, -2.91420930066836,
-2.92649062542454, -2.45288174087111, -2.59203353843301, -2.37211828478634,
-2.35485833573613, -3.28807932670479, -3.28807932670479, -2.69856893402308,
-2.4482421908289, -3.42965769805337, -2.4002640291758, -1.72498017056001,
-2.10246950134994, -2.75989530409431, -2.04609226712013, -1.50354129352453,
-1.721866774994, -1.42652131446034, -1.99149928941641, -0.924508173463412,
-0.34424720787331, -1.47956887747857, -0.699260660882747, -0.705970004477605,
-2.89615299118885, -2.87168709242964, -3.49698896688496, -1.80133944039088,
-2.3066390154612, -2.16578274820764, -1.62064416630292, -1.50034889686538,
-1.64551702528081, -1.54888542275039, -2.36526073757675, -2.17980843362752,
-2.61987658921009, -2.99580131757171, -3.27224528690084, -2.90968038360951,
-2.43786428440244, -3.53447897261775, -2.94164730632451, -2.67914051197011,
-3.08963971104142, -3.30489291781406, -3.16112222668117, -3.78875309229899,
-3.27799815735179, -3.27546357519604, -3.28715323339141, -3.277230212033,
-2.73537305926061, -2.63360778909794, -3.42285993586989, -3.02592822360864,
-2.80491835054881, -3.1610709896381, -2.69912996631718, -2.48975331263934,
-0.134524884114962, -2.3485759078928, -1.67019370390805, -1.30630530826772,
-2.2627030307026, -1.19967822767006, -2.18902328617136, -2.32822018421121,
-1.05335780233708, -2.32765305050142, -3.70136681094428, -2.47624061269887,
-2.2395891355029, -0.873612387550348, -2.52750186765166, -1.58254587448088,
-1.3519682697086, -2.67716755653968, -2.09120993997918, -2.83947106437091,
-1.59227436938979, -2.70393468772428, -3.07475393381032, -1.72535933812472,
-2.62864985613023, -2.1788856069182, -1.66072722296379, -2.02593106477748,
-0.236862069023111, -2.20046381510765, 2.67747589830398, 2.03103654671807,
-0.411843127888723, 0.15392859458, 3.15264600488878, -0.115883494946465,
-0.115883494946465, -2.48408112888983, -2.13179786204659, -2.13179786204659,
-0.421916196665926, -1.81454302259545, -1.81454302259545, -0.719344207794365,
2.30623888786222, 2.30623888786222, 0.233349485130917, 0.807655736612547,
0.807655736612547, 0.00810498434400109, 1.73561337499853, 1.73561337499853,
2.05294933680988, 2.52332617911213, 2.52332617911213, 1.6590362509139,
2.44897469036036, 2.44897469036036, 1.48162277916561, 0.109012820753664,
0.109012820753664, -0.552382527186447, 0.342735574558364, 0.342735574558364,
0.352860787128766, 0.352860787128766, 0.352860787128766, 0.352860787128766,
0.726520452040748, 0.726520452040748, 0.176144461112964, 2.28171712015304,
2.28171712015304, 0.256037205994603, 0.10686264754173, 0.10686264754173,
0.10686264754173, 0.10686264754173, 0.10686264754173, -0.871047910469186,
-1.61892724112359, -1.61892724112359, -2.04847571973674, -2.04847571973674,
-2.04847571973674, 1.22730660297267, 1.94291846403141, 1.94291846403141,
2.64766715573213, 1.66439852581802, 1.66439852581802, 3.92242045719081,
2.92445832371034, 2.92445832371034, 4.09796304281725)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -216L))
word = ts(df$Fit, start = c(2002, 1), end = c(2019, 12), frequency = 12)
breaks = breakpoints(df$Fit ~ 1, h = 0.1)
plot(breaks)
coef(breaks, breaks = 2)
word[breaks$breakpoints] # July 2014
plot(df[,2], type = "b")
lines(fitted(breaks), col = 4, lwd = 2)
abline(v = df[breaks$breakpoints], lty = 2)
Can anyone help me sort this out?
Thanks a lot!
You created a timeseries object but you did not use it:
plot(word)
breaks <- breakpoints(word ~ 1, h = 0.1)
lines(fitted(breaks), col="blue", lwd=2)
abline(v = time(word)[breaks$breakpoints], lty = 2)
If I remember correctly, for irregularly spaced intervals you use zoo, you can try something like below:
library(strucchange)
library(zoo)
breaks = breakpoints(df$Fit ~ 1, h = 0.1)
zoo_df = as.zoo(df[,2])
time(zoo_df) = df[,1]
plot(zoo_df, type = "b")
abline(v = time(zoo_df)[breaks$breakpoints], lty = 2)
This gets you the plot with date on x-axis. Now to get the x-axis values, you do time(zoo_df) and it's a matter of subsetting this according to the index. For the fitted line, if you don't want the lines to join, you need to split them into breakpoints + 1 groups, and draw each one separately:
#group your breaks
grps = cut(1:length(zoo_df),
breaks=c(0,breaks$breakpoints,+Inf),
labels=1:(length(breaks$breakpoints)+1))
for(i in unique(grps)){
lines(time(zoo_df)[grps==i],fitted(breaks)[grps==i])
}

ggplot and two different geom_line(): the legend does not appear

I have the following code (dput data for the data sets is here):
ruz <- structure(list(date = structure(c(16617, 16618, 16619, 16622,
16623, 16624, 16625, 16626, 16629, 16630, 16631, 16632, 16633,
16636, 16637, 16638, 16639, 16640, 16643, 16644, 16645, 16646,
16647, 16650, 16651, 16652, 16653, 16654, 16657, 16658, 16659,
16660, 16661, 16664, 16665, 16666, 16667, 16668, 16671, 16672,
16673, 16674, 16675, 16678, 16679, 16680, 16681, 16682, 16685,
16686, 16687, 16688, 16689, 16692, 16693, 16694, 16695, 16696,
16699, 16700, 16701, 16702, 16703, 16706, 16707, 16708, 16709,
16710, 16713, 16714, 16715, 16716, 16717, 16720, 16721, 16722,
16723, 16724, 16727, 16728, 16729, 16730, 16731, 16734, 16735,
16736, 16737, 16738, 16741, 16742, 16743, 16744, 16745, 16748,
16749, 16750, 16751, 16752, 16755, 16756, 16757, 16758, 16759,
16762, 16763, 16764, 16765, 16766, 16769), class = "Date"), val1 = c(61.8954,
61.6297, 61.7859, 62.2135, 62.692, 63.026, 63.1511, 63.008, 62.7991,
62.5304, 62.3971, 62.1703, 61.9535, 61.7927, 61.8367, 62.1856,
62.7663, 63.5846, 64.859, 66.0745, 65.9327, 65.1387, 65.8362,
67.9171, 68.8917, 68.7714, 69.295, 69.9932, 70.0878, 70.0563,
71.0985, 71.7451, 71.9923, 72.3836, 72.6186, 72.7895, 74.1316,
76.3577, 79.6818, 80.4601, 79.637, 77.1905, 74.7982, 74.0868,
73.6844, 74.7815, 75.1829, 75.0874, 76.0362, 76.5334, 76.1729,
76.2661, 76.521, 76.5815, 76.1411, 74.7473, 74.229, 74.8073,
74.8083, 74.2189, 73.7976, 74.0765, 73.7323, 73.5319, 73.8853,
73.7351, 73.2462, 73.7254, 73.4657, 72.5227, 70.9683, 70.1357,
69.7459, 69.7823, 70.714, 71.5863, 71.3391, 70.2717, 70.1001,
70.3965, 70.964, 70.901, 69.6083, 69.0542, 70.325, 71.2619, 70.6912,
70.5258, 70.6195, 69.9786, 68.9845, 68.7403, 69.5909, 69.6324,
69.2801, 69.3884, 70.4129, 71.6024, 70.7705, 69.6673, 69.2706,
69.2517, 69.2788, 69.3983, 69.7819, 69.8404, 69.8002, 69.9816,
70.1287)), .Names = c("date", "val1"), row.names = c("2015-07-01",
"2015-07-02", "2015-07-03", "2015-07-06", "2015-07-07", "2015-07-08",
"2015-07-09", "2015-07-10", "2015-07-13", "2015-07-14", "2015-07-15",
"2015-07-16", "2015-07-17", "2015-07-20", "2015-07-21", "2015-07-22",
"2015-07-23", "2015-07-24", "2015-07-27", "2015-07-28", "2015-07-29",
"2015-07-30", "2015-07-31", "2015-08-03", "2015-08-04", "2015-08-05",
"2015-08-06", "2015-08-07", "2015-08-10", "2015-08-11", "2015-08-12",
"2015-08-13", "2015-08-14", "2015-08-17", "2015-08-18", "2015-08-19",
"2015-08-20", "2015-08-21", "2015-08-24", "2015-08-25", "2015-08-26",
"2015-08-27", "2015-08-28", "2015-08-31", "2015-09-01", "2015-09-02",
"2015-09-03", "2015-09-04", "2015-09-07", "2015-09-08", "2015-09-09",
"2015-09-10", "2015-09-11", "2015-09-14", "2015-09-15", "2015-09-16",
"2015-09-17", "2015-09-18", "2015-09-21", "2015-09-22", "2015-09-23",
"2015-09-24", "2015-09-25", "2015-09-28", "2015-09-29", "2015-09-30",
"2015-10-01", "2015-10-02", "2015-10-05", "2015-10-06", "2015-10-07",
"2015-10-08", "2015-10-09", "2015-10-12", "2015-10-13", "2015-10-14",
"2015-10-15", "2015-10-16", "2015-10-19", "2015-10-20", "2015-10-21",
"2015-10-22", "2015-10-23", "2015-10-26", "2015-10-27", "2015-10-28",
"2015-10-29", "2015-10-30", "2015-11-02", "2015-11-03", "2015-11-04",
"2015-11-05", "2015-11-06", "2015-11-09", "2015-11-10", "2015-11-11",
"2015-11-12", "2015-11-13", "2015-11-16", "2015-11-17", "2015-11-18",
"2015-11-19", "2015-11-20", "2015-11-23", "2015-11-24", "2015-11-25",
"2015-11-26", "2015-11-27", "2015-11-30"), class = "data.frame")
dfr <- structure(list(date = structure(c(16616, 16646, 16677, 16708,
16738, 16769), class = "Date"), val2 = c(0, 0.0920000000000001,
0.120392, 0.136077488, 0.0917704659680001, 0.0874033841041282
)), .Names = c("date", "val2"), row.names = c("2015-06", "2015-07",
"2015-08", "2015-09", "2015-10", "2015-11"), class = "data.frame")
ggplot:
ggplot() +
geom_line(data = ruz, aes(date, val1), size = 1.5, color = "blue") +
geom_line(data = dfr, aes(date, val2 * 100), size = 1.5, color = "red") +
scale_fill_manual(values = c("blue", "red"))
which produces the following graph:
My question is, how to modify it to show a legend after all?
If you want to avoid combining the data.frames, you can do this:
ggplot() +
geom_line(data = ruz, aes(date, val1, color = "a"), size = 1.5) +
geom_line(data = dfr, aes(date, val2 * 100, color = "b"), size = 1.5) +
scale_color_manual(name = "Colors",
values = c("a" = "blue", "b" = "red"))
In order to get a legend, you have to map something to color within aes. You can then use scale_color_manual to define the colors for the mapped character values. There are situations where this trick is easier and results in more readable code then reshaping/combining data.
You could rbind them and use color
ruz$type <- "ruz"
dfr$val2 <- dfr$val2 * 100
dfr$type <- "dfr"
names(ruz) <- names(dfr)
df <- rbind(ruz, dfr)
ggplot(df, aes(date, val2, color = type), size = 1.5) + geom_line()

object of type 'builtin' is not subsettable [duplicate]

This question already has answers here:
Error in <my code> : object of type 'closure' is not subsettable
(6 answers)
Closed 8 years ago.
I wrote some code last week wich worked perfectly.
I'm trying to use it again today. The part in trouble takes a subset of a data frame and applies diff and diffinv from column 2 to the length of the data frame/matrix.
This is my data frame:
dput(d)
structure(list(Tiempo = c(1158, 1158.2, 1158.4, 1158.6, 1158.8,
1159, 1159.2, 1159.4, 1159.6, 1159.8, 1160), UT1 = c(10500, 10500,
10499, 10499, 10499, 10499, 10498, 10498, 10498, 10498, 10497
), UT2 = c(8238, 8238, 8238, 8238, 8238, 8238, 8238, 8238, 8238,
8238, 8238), UT3 = c(16991, 16991, 16991, 16991, 16991, 16991,
16990, 16990, 16990, 16989, 16989), UT4 = c(17903, 17904, 17905,
17906, 17906, 17907, 17908, 17910, 17912, 17913, 17914), UT5 = c(16044,
16044, 16044, 16044, 16044, 16044, 16044, 16044, 16044, 16044,
16044), UT6 = c(16261, 16261, 16261, 16261, 16261, 16261, 16261,
16261, 16261, 16261, 16261), UT7 = c(17094, 17094, 17094, 17094,
17094, 17094, 17094, 17094, 17094, 17094, 17094), UT8 = c(19036,
19037, 19036, 19037, 19037, 19037, 19038, 19038, 19039, 19040,
19041), UT9 = c(15192, 15192, 15192, 15191, 15191, 15191, 15190,
15190, 15189, 15189, 15188), UT10 = c(7113, 7113, 7113, 7113,
7113, 7113, 7113, 7113, 7113, 7113, 7113), UT11 = c(3701, 3701,
3701, 3700, 3700, 3698, 3696, 3695, 3692, 3690, 3689), UT12 = c(11666,
11666, 11666, 11666, 11666, 11666, 11666, 11667, 11669, 11670,
11671), TR1 = c(15805, 15805, 15805, 15805, 15805, 15805, 15805,
15805, 15805, 15805, 15805), TR2 = c(13718, 13718, 13718, 13718,
13718, 13718, 13718, 13718, 13718, 13718, 13718), TR3 = c(19047,
19048, 19050, 19050, 19049, 19050, 19055, 19070, 19071, 19079,
19087), TR4 = c(15847, 15847, 15847, 15849, 15849, 15849, 15849,
15849, 15849, 15848, 15849), TR5 = c(21087, 21087, 21087, 21087,
21087, 21087, 21087, 21087, 21087, 21087, 21086), TR6 = c(11434,
11434, 11434, 11434, 11434, 11434, 11433, 11433, 11433, 11425,
11403), TR7 = c(15769, 15769, 15769, 15769, 15769, 15769, 15769,
15769, 15769, 15769, 15769), TR8 = c(16915, 16915, 16915, 16915,
16915, 16915, 16915, 16915, 16915, 16915, 16915), TR9 = c(15800,
15800, 15800, 15796, 15773, 15755, 15745, 15744, 15735, 15706,
15697), TR10 = c(14121, 14121, 14121, 14121, 14121, 14121, 14121,
14121, 14120, 14121, 14120), TR11 = c(18247, 18247, 18247, 18247,
18247, 18247, 18247, 18247, 18247, 18247, 18247), TR12 = c(10877,
10877, 10877, 10877, 10878, 10878, 10878, 10878, 10878, 10878,
10878)), .Names = c("Tiempo", "UT1", "UT2", "UT3", "UT4", "UT5",
"UT6", "UT7", "UT8", "UT9", "UT10", "UT11", "UT12", "TR1", "TR2",
"TR3", "TR4", "TR5", "TR6", "TR7", "TR8", "TR9", "TR10", "TR11",
"TR12"), row.names = 5791:5801, class = "data.frame")
The code worked I'm trying to run is
dd<-data.frame(cbind(d$Tiempo,(diffinv(abs(diff(as.matrix(c[,2:length(d)])))))))
And I get this error:
Error in c[, 2:length(d)] : object of type 'builtin' is not subsettable
Moreover, when I use:
q<-c(1:25)
length(q)
[1] 25
q[4:length(q)]
[1] 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
Works just as it's supposed to.
I don't have a clue about what's happening. I'd appreciate some help.
Thank you
I think you intended to have this:
d[,2:length(d)]

Resources