I'm trying to detect some structural breaks in my series. The problem is that it displays continuous numbers rather than dates, despite my series being a ts object.
I found this solution but it doesn't work in my case.
This is my dataset and code:
df = structure(list(Date = structure(c(11690, 11725, 11753, 11781,
11809, 11844, 11872, 11900, 11942, 11970, 11998, 12026, 12061,
12089, 12117, 12145, 12180, 12208, 12243, 12265, 12299, 12327,
12362, 12390, 12425, 12453, 12481, 12509, 12544, 12572, 12600,
12631, 12663, 12698, 12726, 12754, 12796, 12817, 12845, 12880,
12907, 12936, 12971, 12996, 13027, 13062, 13090, 13118, 13160,
13181, 13209, 13244, 13272, 13307, 13335, 13363, 13392, 13426,
13454, 13489, 13524, 13552, 13580, 13615, 13643, 13670, 13699,
13726, 13762, 13790, 13825, 13853, 13888, 13916, 13944, 13979,
14007, 14035, 14063, 14098, 14126, 14154, 14189, 14217, 14259,
14280, 14308, 14336, 14371, 14399, 14427, 14462, 14490, 14525,
14553, 14581, 14623, 14644, 14672, 14707, 14735, 14770, 14798,
14826, 14854, 14889, 14917, 14945, 14987, 15008, 15036, 15071,
15099, 15134, 15162, 15190, 15225, 15253, 15281, 15316, 15351,
15379, 15407, 15434, 15463, 15497, 15526, 15554, 15589, 15617,
15652, 15680, 15715, 15743, 15771, 15799, 15827, 15862, 15890,
15918, 15953, 15980, 16016, 16044, 16079, 16107, 16135, 16163,
16198, 16226, 16254, 16289, 16317, 16345, 16380, 16408, 16457,
16467, 16499, 16540, 16556, 16589, 16632, 16648, 16681, 16730,
16740, 16772, 16821, 16832, 16870, 16912, 16922, 16954, 17003,
17014, 17052, 17094, 17106, 17143, 17185, 17198, 17234, 17283,
17287, 17325, 17367, 17379, 17416, 17465, 17471, 17514, 17556,
17563, 17598, 17647, 17652, 17696, 17738, 17744, 17787, 17829,
17836, 17878, 17920, 17928, 17962, 17996, 18017, 18053, 18102,
18109, 18151, 18193, 18201, 18242), class = "Date"), Fit = c(-2.01864866574525,
-2.51081772611801, -3.07896216512166, -3.02724722640642, -0.764567739958455,
-1.81459657078637, -2.13093106123547, -2.13093106123547, -1.91543051022373,
-1.31418467170089, -1.86573850139921, -2.42539556395029, -1.26414303389104,
-2.5433900359616, -1.99767537794132, -1.34728409808229, -1.64315561542246,
-0.687106946387411, -2.48041219070826, -2.48041219070826, -1.78680159845671,
-2.13687301896279, -2.6123923387608, -2.84563515334999, -3.41506073833104,
-2.74565641471061, -2.3682788731863, -1.77410755661286, -2.46191758167165,
-2.34829604543204, -2.37030627525843, -2.37030627525843, -1.75944822651175,
-2.21875944722698, -2.60249841953241, -2.6758310533823, -1.99157259723667,
-2.34860918772813, -3.24977356678388, -3.1998805120359, -3.64471855523435,
-2.80762315792921, -1.46910836105049, -1.46910836105049, -2.24153954648439,
-2.64718944648088, -2.61088260257325, -2.45889016663966, -2.59732356608009,
-3.49037732690643, -2.75284369990193, -2.56284320115193, -3.01470163344929,
-2.24267403694233, -3.36759206183078, -2.65899770326269, -2.65899770326269,
-3.83487166356133, -2.30405890853423, -3.83487166356133, -2.91420930066836,
-2.92649062542454, -2.45288174087111, -2.59203353843301, -2.37211828478634,
-2.35485833573613, -3.28807932670479, -3.28807932670479, -2.69856893402308,
-2.4482421908289, -3.42965769805337, -2.4002640291758, -1.72498017056001,
-2.10246950134994, -2.75989530409431, -2.04609226712013, -1.50354129352453,
-1.721866774994, -1.42652131446034, -1.99149928941641, -0.924508173463412,
-0.34424720787331, -1.47956887747857, -0.699260660882747, -0.705970004477605,
-2.89615299118885, -2.87168709242964, -3.49698896688496, -1.80133944039088,
-2.3066390154612, -2.16578274820764, -1.62064416630292, -1.50034889686538,
-1.64551702528081, -1.54888542275039, -2.36526073757675, -2.17980843362752,
-2.61987658921009, -2.99580131757171, -3.27224528690084, -2.90968038360951,
-2.43786428440244, -3.53447897261775, -2.94164730632451, -2.67914051197011,
-3.08963971104142, -3.30489291781406, -3.16112222668117, -3.78875309229899,
-3.27799815735179, -3.27546357519604, -3.28715323339141, -3.277230212033,
-2.73537305926061, -2.63360778909794, -3.42285993586989, -3.02592822360864,
-2.80491835054881, -3.1610709896381, -2.69912996631718, -2.48975331263934,
-0.134524884114962, -2.3485759078928, -1.67019370390805, -1.30630530826772,
-2.2627030307026, -1.19967822767006, -2.18902328617136, -2.32822018421121,
-1.05335780233708, -2.32765305050142, -3.70136681094428, -2.47624061269887,
-2.2395891355029, -0.873612387550348, -2.52750186765166, -1.58254587448088,
-1.3519682697086, -2.67716755653968, -2.09120993997918, -2.83947106437091,
-1.59227436938979, -2.70393468772428, -3.07475393381032, -1.72535933812472,
-2.62864985613023, -2.1788856069182, -1.66072722296379, -2.02593106477748,
-0.236862069023111, -2.20046381510765, 2.67747589830398, 2.03103654671807,
-0.411843127888723, 0.15392859458, 3.15264600488878, -0.115883494946465,
-0.115883494946465, -2.48408112888983, -2.13179786204659, -2.13179786204659,
-0.421916196665926, -1.81454302259545, -1.81454302259545, -0.719344207794365,
2.30623888786222, 2.30623888786222, 0.233349485130917, 0.807655736612547,
0.807655736612547, 0.00810498434400109, 1.73561337499853, 1.73561337499853,
2.05294933680988, 2.52332617911213, 2.52332617911213, 1.6590362509139,
2.44897469036036, 2.44897469036036, 1.48162277916561, 0.109012820753664,
0.109012820753664, -0.552382527186447, 0.342735574558364, 0.342735574558364,
0.352860787128766, 0.352860787128766, 0.352860787128766, 0.352860787128766,
0.726520452040748, 0.726520452040748, 0.176144461112964, 2.28171712015304,
2.28171712015304, 0.256037205994603, 0.10686264754173, 0.10686264754173,
0.10686264754173, 0.10686264754173, 0.10686264754173, -0.871047910469186,
-1.61892724112359, -1.61892724112359, -2.04847571973674, -2.04847571973674,
-2.04847571973674, 1.22730660297267, 1.94291846403141, 1.94291846403141,
2.64766715573213, 1.66439852581802, 1.66439852581802, 3.92242045719081,
2.92445832371034, 2.92445832371034, 4.09796304281725)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -216L))
word = ts(df$Fit, start = c(2002, 1), end = c(2019, 12), frequency = 12)
breaks = breakpoints(df$Fit ~ 1, h = 0.1)
plot(breaks)
coef(breaks, breaks = 2)
word[breaks$breakpoints] # July 2014
plot(df[,2], type = "b")
lines(fitted(breaks), col = 4, lwd = 2)
abline(v = df[breaks$breakpoints], lty = 2)
Can anyone help me sort this out?
Thanks a lot!
You created a timeseries object but you did not use it:
plot(word)
breaks <- breakpoints(word ~ 1, h = 0.1)
lines(fitted(breaks), col="blue", lwd=2)
abline(v = time(word)[breaks$breakpoints], lty = 2)
If I remember correctly, for irregularly spaced intervals you use zoo, you can try something like below:
library(strucchange)
library(zoo)
breaks = breakpoints(df$Fit ~ 1, h = 0.1)
zoo_df = as.zoo(df[,2])
time(zoo_df) = df[,1]
plot(zoo_df, type = "b")
abline(v = time(zoo_df)[breaks$breakpoints], lty = 2)
This gets you the plot with date on x-axis. Now to get the x-axis values, you do time(zoo_df) and it's a matter of subsetting this according to the index. For the fitted line, if you don't want the lines to join, you need to split them into breakpoints + 1 groups, and draw each one separately:
#group your breaks
grps = cut(1:length(zoo_df),
breaks=c(0,breaks$breakpoints,+Inf),
labels=1:(length(breaks$breakpoints)+1))
for(i in unique(grps)){
lines(time(zoo_df)[grps==i],fitted(breaks)[grps==i])
}
Related
I am trying to write R code to summarize count and median amounts of a date variable, and a dollar amount variable in my data frame. The summarize part is not the issue, my issue is in trying to group_by time periods to then summarize. I understand the distinction between a time period and a time duration. I am interested in time periods, in this particular case I want to summarize in years from a specific date i.e. max(close_date) to the corresponding date in one year previous periods of time, as in 2022-02-6 to 2021-02-06 to 2020-02-06 and so on. The data frame goes back about 30 months in total, so there is two full years of data to summarize.
The code I wrote here groups by the calendar year, and not what I need;
> sum_closed_date_yr <- scrubbed_data01 %>%
+ group_by(time_period = year(close_date)) %>%
+ summarize(close_count = (close_date = n()), med_close_price = median(close_price, na.rm = TRUE))
> sum_closed_date_yr
# A tibble: 5 × 3
time_period close_count med_close_price
<dbl> <int> <dbl>
1 2019 31 570000
2 2020 80 661250
3 2021 104 930750
4 2022 9 1010000
5 NA 8 0
I am very new to coding in R, I am a real estate appraiser not a statwhizzician. I have taken 23 DataCamp tutorials in R, so I have a newbie working knowledge of R. I have searched through multiple SO posts on summarizing by date, but can not find what I am specifically looking for. Any help would be greatly appreciated, thank you - Joe
Second try, I selected just the two of many variables
scrubbed_data01 %>%
+ select(close_date, close_price) %>%
+ dput()
structure(list(close_date = structure(c(NA, NA, NA, 19039, 19038,
19034, 19024, 19020, 19016, 19013, 18999, 18989, 18976, 18969,
18969, 18968, 18955, 18955, 18954, 18953, 18953, 18949, 18948,
18943, 18940, 18936, 18934, 18933, 18929, 18922, 18921, 18921,
18921, 18918, 18915, 18912, 18908, 18908, 18907, 18906, 18905,
18900, 18900, 18899, 18897, 18897, 18897, 18891, 18891, 18890,
18887, 18880, 18879, 18878, 18878, 18873, 18873, 18873, 18869,
18866, 18866, 18851, 18850, 18844, 18836, 18836, 18831, 18830,
18822, 18821, 18821, 18815, 18810, 18806, 18802, 18796, 18795,
18789, 18786, 18782, 18781, 18781, 18780, 18779, 18775, 18775,
18774, 18761, 18761, 18753, 18752, 18747, 18746, 18746, 18740,
18739, 18737, 18729, 18718, 18715, 18705, 18704, 18701, 18695,
18689, 18683, 18677, 18655, 18652, 18648, 18646, 18640, 18634,
18633, 18631, 18619, 18613, 18611, 18590, 18585, 18579, 18576,
18569, 18569, 18569, 18563, 18558, 18557, 18557, 18556, 18554,
18549, 18544, 18540, 18540, 18533, 18519, 18519, 18519, 18514,
18514, 18513, 18507, 18502, 18502, 18501, 18501, 18499, 18495,
18492, 18492, 18491, 18488, 18484, 18472, 18466, 18464, 18459,
18459, 18453, 18451, 18450, 18445, 18443, 18442, 18423, 18422,
18411, 18401, 18400, 18397, 18397, 18397, 18396, 18387, 18386,
18366, 18361, 18360, 18340, 18338, 18331, 18317, 18313, 18302,
18297, 18289, 18283, 18283, 18277, 18274, 18271, 18271, 18269,
18263, 18261, 18261, 18261, 18260, 18250, 18247, 18239, 18208,
18200, 18199, 18197, 18194, 18190, 18185, 18185, 18180, 18179,
18177, 18177, 18176, 18170, 18169, 18156, 18155, 18152, 18151,
18142, 18142, 18138, 18137, 18136, NA, NA, NA, NA, NA, 19044), class = "Date"),
close_price = c(0, 0, 0, 1150001, 940000, 1253000, 979000,
881000, 1010000, 1060000, 1100000, 1070000, 1025000, 755000,
740000, 930000, 1250000, 990000, 930000, 931500, 975000,
950000, 850000, 865000, 921000, 790000, 778000, 935000, 1270000,
970000, 1061500, 960000, 1015000, 1100000, 1082000, 880000,
1000000, 1140000, 950000, 852000, 1045000, 795000, 950000,
950000, 880000, 850000, 945000, 949500, 1220000, 1015000,
899000, 1100000, 805000, 868000, 1102000, 1015000, 923000,
810000, 890000, 826000, 1140000, 970000, 830000, 790000,
1151000, 835500, 1080000, 870000, 1049000, 985000, 962000,
926000, 1008888, 950000, 810000, 760000, 955000, 930000,
985000, 1210000, 878000, 950000, 855000, 930000, 960000,
1180000, 980000, 960000, 898000, 1100000, 1215000, 885000,
985000, 880000, 1100000, 810000, 1210000, 810000, 970700,
1010000, 800000, 850000, 849000, 770000, 925000, 930000,
875000, 755000, 675000, 875500, 715000, 837000, 747000, 805000,
785000, 801200, 900000, 800000, 610000, 720000, 730000, 700000,
695000, 720000, 750000, 860000, 915000, 787000, 785000, 710000,
735000, 620000, 788000, 780000, 780000, 645000, 700000, 686000,
686000, 745000, 745000, 605000, 730000, 625000, 625000, 685000,
731000, 715000, 695000, 710000, 700000, 575000, 561000, 590000,
595000, 720500, 670000, 711000, 645000, 595000, 700000, 545000,
695000, 531000, 581000, 518000, 645000, 562500, 530000, 640000,
643000, 680000, 700000, 540000, 630000, 658000, 675000, 525000,
600000, 664500, 590000, 569595, 620500, 555000, 585000, 630000,
639900, 515000, 475000, 670000, 610000, 524888, 550000, 520000,
650000, 500000, 500000, 540000, 608000, 575000, 570000, 639900,
645000, 648500, 635000, 530000, 655000, 520000, 555000, 542500,
515000, 620000, 580140, 535000, 638888, 540000, 590000, 535000,
497500, 505000, 675000, 545000, 640000, 555000, 630000, 590000,
0, 0, 0, 0, 0, 985000)), row.names = c(NA, 232L), class = "data.frame")
>
You can create a new variable to assign the year to your desired period:
library(dplyr)
df %>% mutate(period_year = case_when(close_date < "2019-02-06" ~ 2018,
close_date >= "2019-02-06" & close_date < "2020-02-06" ~ 2019,
close_date >= "2020-02-06" & close_date < "2021-02-06" ~ 2020,
close_date >= "2021-02-06" & close_date < "2022-02-06" ~ 2021,
close_date >= "2022-02-06" & close_date < "2023-02-06" ~ 2022)) %>%
group_by(time_period = period_year) %>%
summarize(close_count = (close_date = n()), med_close_price = median(close_price, na.rm = TRUE))
I am trying to interpolate two series but I get different errors when it comes to use left_join. These are the two series:
df1 = structure(list(Date = structure(c(11690, 11725, 11753, 11781,
11809, 11844, 11872, 11900, 11942, 11970, 11998, 12026, 12061,
12089, 12117, 12145, 12180, 12208, 12243, 12265, 12299, 12327,
12362, 12390, 12425, 12453, 12481, 12509, 12544, 12572, 12600,
12631, 12663, 12698, 12726, 12754, 12796, 12817, 12845, 12880,
12907, 12936, 12971, 12996, 13027, 13062, 13090, 13118, 13160,
13181, 13209, 13244, 13272, 13307, 13335, 13363, 13392, 13426,
13454, 13489, 13524, 13552, 13580, 13615, 13643, 13670, 13699,
13726, 13762, 13790, 13825, 13853, 13888, 13916, 13944, 13979,
14007, 14035, 14063, 14098, 14126, 14154, 14189, 14217, 14259,
14280, 14308, 14336, 14371, 14399, 14427, 14462, 14490, 14525,
14553, 14581, 14623, 14644, 14672, 14707, 14735, 14770, 14798,
14826, 14854, 14889, 14917, 14945, 14987, 15008, 15036, 15071,
15099, 15134, 15162, 15190, 15225, 15253, 15281, 15316, 15351,
15379, 15407, 15434, 15463, 15497, 15526, 15554, 15589, 15617,
15652, 15680, 15715, 15743, 15771, 15799, 15827, 15862, 15890,
15918, 15953, 15980, 16016, 16044, 16079, 16107, 16135, 16163,
16198, 16226, 16254, 16289, 16317, 16345, 16380, 16408, 16457,
16467, 16499, 16540, 16556, 16589, 16632, 16648, 16681, 16730,
16740, 16772, 16821, 16832, 16870, 16912, 16922, 16954, 17003,
17014, 17052, 17094, 17106, 17143, 17185, 17198, 17234, 17283,
17287, 17325, 17367, 17379, 17416, 17465, 17471, 17514, 17556,
17563, 17598, 17647, 17652, 17696, 17738, 17744, 17787, 17829,
17836, 17878, 17920, 17928, 17962, 17996, 18017, 18053, 18102,
18109, 18151, 18193, 18201, 18242), class = "Date"), Fit = c(-1.68038051095608,
-2.12317945962401, -2.71086209338424, -3.22489682411764, -2.51769032592554,
-1.33242532610804, -2.13564807610995, -2.13564807610995, -2.03415137348661,
-1.58909921518124, -0.68844714029518, -1.94691881575563, -1.16714425518695,
-2.15153420569546, -2.04779261960842, -0.867515299774483, -1.47986823637587,
-0.650513604798111, -1.61361732632524, -1.61361732632524, -1.48596960028163,
-2.20004804407501, -2.64689217553021, -2.67436545120372, -3.48049123019991,
-2.28510809912552, -2.32172665536549, -1.76823348887895, -2.763353378483,
-2.09381469041352, -2.08400217235893, -2.08400217235893, -1.59720187270177,
-2.06034560841579, -2.56317571167687, -1.32635640217861, -1.48729782102413,
-2.00732693090646, -3.40272319833461, -2.49810610074565, -3.32810591309226,
-1.92402348117091, -1.35397391665409, -1.35397391665409, -1.76640461987233,
-2.51735528772741, -2.35332514240503, -2.35272634525907, -2.68468172493552,
-2.92171051216825, -3.16925509035157, -2.23093489115309, -3.06530983495044,
-2.30445613039677, -3.30583207178147, -2.82188405397887, -2.82188405397887,
-2.51962207875813, -2.66982767393931, -2.98041595989062, -2.55306093500464,
-3.36871970472208, -2.6201699311654, -2.49494986723432, -2.78910296607766,
-2.57178346785915, -3.28445145020619, -3.28445145020619, -2.42213276484722,
-2.38709926337358, -2.99568739417641, -2.65138790450656, -1.58074660422518,
-1.91906056315361, -2.90337379582732, -2.20117735700684, -1.29303194740331,
-2.2494711668963, -1.53224235243069, -2.04481468859739, -0.929377541149286,
-0.0656900034556943, -2.20508088335906, -0.578502434372448, -0.858925093713048,
-2.96970181427872, -2.97862851965814, -3.31779605911778, -2.08207766441392,
-2.4848907725881, -1.99650762208841, -2.17076028347941, -1.27061305909,
-1.92537835567221, -1.49409886851971, -2.32667673830125, -1.90852299188928,
-2.88390697795999, -2.9172411130509, -3.5788915130035, -2.92151370364156,
-2.71478221054495, -3.40505963822921, -3.21129210245385, -2.66481268506047,
-3.27494238949828, -3.37230078139583, -3.53346393522174, -3.99626066800013,
-3.41713548837371, -3.67409843863548, -3.50182175058264, -3.37467436298585,
-3.40580625489191, -2.99003362011982, -3.66653724632765, -3.61588309823573,
-3.18167532125541, -3.35619362672467, -2.8806023910338, -2.23185392156432,
-0.626338062660262, -2.00482507082523, -1.7359344838577, -0.800609837957656,
-1.93997314433068, -0.93482911672218, -2.63652739226028, -1.93546315176661,
-1.17524044037369, -2.274143231604, -2.69679235052359, -2.23731851002543,
-1.86716852238077, -0.716926538642468, -1.30258784685856, -1.08194224250233,
-0.930472602419788, 1.13081617308243, 2.57874064965174, -0.388461792958877,
-0.340421132850094, 0.638071432169484, -1.57318833539501, 4.40961161388978,
1.62060735764472, -0.674369921428344, 1.76679629890753, -0.748857461264583,
0.520407646064921, -0.575555044309392, 4.34511452569889, 3.34194433067617,
3.21304624521961, 1.97147139745885, 3.45052500620869, -0.399783726365629,
-0.399783726365629, -0.519126753266423, -0.924795366280377, -0.924795366280377,
4.49023560078066, 2.40210157814194, 2.40210157814194, 3.48634734098189,
2.72330542145941, 2.72330542145941, 2.44175433903678, 1.83754103018167,
1.83754103018167, 2.00744190890702, 2.84495436947268, 2.84495436947268,
3.68678762972081, 3.16657087164265, 3.16657087164265, 2.54382298188246,
3.23525009614645, 3.23525009614645, 2.25680930181096, 1.44116917115332,
1.44116917115332, -0.510916286085479, -0.212859586548733, -0.212859586548733,
-0.740774827758169, -0.740774827758169, -0.740774827758169, -0.740774827758169,
-0.41701391913469, -0.41701391913469, -1.22601225946438, -0.632288989161026,
-0.632288989161026, -1.04309509051099, -1.17291896442253, -1.17291896442253,
-1.17291896442253, -1.17291896442253, -1.17291896442253, -1.11302828507398,
-2.78220449305609, -2.78220449305609, -2.86224137302201, -2.86224137302201,
-2.86224137302201, 1.34122649704384, 1.33515381941954, 1.33515381941954,
2.56866171677324, 0.963485086246604, 0.963485086246604, 4.22653525972732,
5.35095831479335, 5.35095831479335, 3.93448842430935)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -216L))
df2 = structure(list(Fit = c(-3.36981025066699, -2.83074686834444,
-3.4112766646918, -2.45754914212114, -2.16728424394746, -3.61860873481586,
-3.61860873481586, -2.2916834860219, -1.37942237747605, -3.95034004701435,
-1.46235520552567, -2.91367969639407, -3.95034004701435, -3.57714232079105,
-2.16728424394746, -2.20875065797227, -2.6648812122452, -2.74781404029482,
-2.00141858784822, -3.07954535249331, -3.07954535249331, -3.24541100859256,
-3.36981025066699, -2.49901555614595, -2.6648812122452, -3.95034004701435,
-3.0380789384685, -3.24541100859256, -3.12101176651812, -2.58194838419558,
-2.83074686834444, -3.4112766646918, -2.6648812122452, -2.99661252444369,
-3.61860873481586, -2.99661252444369, -3.57714232079105, -2.74781404029482,
-3.20394459456775, -2.25021707199708, -2.37461631407152, -3.61860873481586,
-3.36981025066699, -2.95514611041888, -3.61860873481586, -3.32834383664218,
-2.74781404029482, -3.20394459456775, -2.00141858784822, -3.12101176651812,
-3.66007514884067, -3.47347628572902, -3.95034004701435, -3.45274307871661,
-3.45274307871661, -2.83074686834444, -3.95034004701435, -2.99661252444369,
-3.61860873481586, -2.16728424394746, -3.70154156286548, -2.37461631407152,
-2.83074686834444, -2.12581782992265, -3.32834383664218, -2.6648812122452,
-3.0380789384685, -3.20394459456775, -3.4112766646918, -2.83074686834444,
-2.91367969639407, -3.95034004701435, -2.6648812122452, -3.36981025066699,
-2.2916834860219, -3.45274307871661, -2.37461631407152, -1.54528803357529,
-2.91367969639407, -3.49420949274142, -2.95514611041888, -3.32834383664218,
-2.45754914212114, -3.57714232079105, -3.36981025066699, -3.45274307871661,
-3.95034004701435, -3.16247818054293, -2.83074686834444, -3.36981025066699,
-2.6648812122452, -2.6648812122452, -2.45754914212114, -3.32834383664218,
-3.66007514884067, -3.36981025066699, -3.95034004701435, -2.83074686834444,
-2.91367969639407, -3.45274307871661, -2.20875065797227, -2.6648812122452,
-3.95034004701435, -3.95034004701435, -2.74781404029482, -2.83074686834444,
-2.99661252444369, -3.57714232079105, -3.95034004701435, -1.54528803357529,
-2.45754914212114, -2.91367969639407, -3.20394459456775, -2.83074686834444,
-3.70154156286548, -1.29648954942642, -3.36981025066699, -2.20875065797227,
-2.45754914212114, -3.24541100859256, -2.6648812122452, -1.87701934577378,
-2.83074686834444, -2.37461631407152, -3.95034004701435, -3.07954535249331,
-2.6648812122452, -2.99661252444369, -2.91367969639407, -3.49420949274142,
-3.61860873481586, -3.20394459456775, 0.44509983961565, 0.776831151814141,
-2.37461631407152, -3.61860873481586, -0.259829198806142, -2.37461631407152,
3.72094654757574, -1.37942237747605, -3.12101176651812, 0.196301355466782,
-2.20875065797227, -0.923291823203123, -2.58194838419558, 0.569499081690084,
0.486566253640461, 1.56469301828556, -1.62822086162492, 3.59654730550131,
-0.923291823203123, -2.2916834860219, -0.633026925029444, 0.486566253640461,
2.14522281463291, 1.15002887803744, 2.60135336890584, 2.97455109512914,
1.02562963596301, 1.89642433048405, 2.97455109512914, 3.38921523537725,
3.22334957927801, 3.47214806342688, 1.77202508840961, 4.05267785977424,
3.30628240732763, -0.0939635427068968, -0.342762026855765, -0.0939635427068968,
1.23296170608707, -0.342762026855765, -0.425694854905387, -3.36981025066699,
-0.674493339054255, -1.08915747930237, -1.37942237747605, -2.08435141589784,
-1.46235520552567, -1.46235520552567, -2.83074686834444, -2.25021707199708,
-3.0380789384685, 3.05748392317876, 1.15002887803744, 2.68428619695546,
0.196301355466782, 2.26962205670735, 2.85015185305471, 2.85015185305471
), Date = c("2002-01-03", "2002-02-07", "2002-03-07", "2002-04-04",
"2002-05-02", "2002-06-06", "2002-07-04", "2002-09-12", "2002-10-10",
"2002-11-07", "2002-12-05", "2003-01-09", "2003-02-06", "2003-03-06",
"2003-04-03", "2003-05-08", "2003-06-05", "2003-07-10", "2003-09-04",
"2003-10-02", "2003-11-06", "2003-12-04", "2004-01-08", "2004-02-05",
"2004-03-04", "2004-04-01", "2004-05-06", "2004-06-03", "2004-07-01",
"2004-09-02", "2004-10-07", "2004-11-04", "2004-12-02", "2005-01-13",
"2005-02-03", "2005-03-03", "2005-04-07", "2005-05-04", "2005-06-02",
"2005-07-07", "2005-09-01", "2005-10-06", "2005-11-03", "2005-12-01",
"2006-01-12", "2006-02-02", "2006-03-02", "2006-04-06", "2006-05-04",
"2006-06-08", "2006-07-06", "2006-08-03", "2006-10-05", "2006-11-02",
"2006-12-07", "2007-01-11", "2007-02-08", "2007-03-08", "2007-04-12",
"2007-05-10", "2007-06-06", "2007-07-05", "2007-09-06", "2007-10-04",
"2007-11-08", "2007-12-06", "2008-01-10", "2008-02-07", "2008-03-06",
"2008-04-10", "2008-05-08", "2008-06-05", "2008-07-03", "2008-08-07",
"2008-09-04", "2008-10-02", "2008-11-06", "2008-12-04", "2009-01-15",
"2009-02-05", "2009-03-05", "2009-04-02", "2009-05-07", "2009-06-04",
"2009-07-02", "2009-08-06", "2009-09-03", "2009-10-08", "2009-11-05",
"2009-12-03", "2010-01-14", "2010-02-04", "2010-03-04", "2010-04-08",
"2010-05-06", "2010-06-10", "2010-07-08", "2010-08-05", "2010-09-02",
"2010-10-07", "2010-11-04", "2010-12-02", "2011-01-13", "2011-02-03",
"2011-03-03", "2011-04-07", "2011-05-05", "2011-06-09", "2011-07-07",
"2011-08-04", "2011-09-08", "2011-10-06", "2011-11-03", "2011-12-08",
"2012-01-12", "2012-02-09", "2012-03-08", "2012-04-04", "2012-05-03",
"2012-06-06", "2012-07-05", "2012-08-02", "2012-09-06", "2012-10-04",
"2012-11-08", "2012-12-06", "2013-01-10", "2013-02-07", "2013-03-07",
"2013-04-04", "2013-05-02", "2013-06-06", "2013-07-04", "2013-08-01",
"2013-09-05", "2013-10-02", "2013-11-07", "2013-12-05", "2014-01-09",
"2014-02-06", "2014-03-06", "2014-04-03", "2014-05-08", "2014-06-05",
"2014-07-03", "2014-08-07", "2014-09-04", "2014-10-02", "2014-11-06",
"2014-12-04", "2015-01-22", "2015-03-05", "2015-04-15", "2015-06-03",
"2015-07-16", "2015-09-03", "2015-10-22", "2015-12-03", "2016-01-21",
"2016-03-10", "2016-04-21", "2016-06-02", "2016-07-21", "2016-09-08",
"2016-10-20", "2016-12-08", "2017-01-19", "2017-03-09", "2017-04-27",
"2017-06-08", "2017-07-20", "2017-09-07", "2017-10-26", "2017-12-14",
"2018-01-25", "2018-03-08", "2018-04-26", "2018-06-14", "2018-07-26",
"2018-09-13", "2018-10-25", "2018-12-13", "2019-01-24", "2019-03-07",
"2019-04-10", "2019-06-06", "2019-07-25", "2019-09-12", "2019-10-24",
"2019-12-12")), class = "data.frame", row.names = c(NA, -190L
))
I then used these codes to interpolate the series:
# first series
monthss <- data.frame(Date = seq(as.Date("2002-01-03"), as.Date("2019-12-12"), by = "month"), stringsAsFactors = F)
df1 <- left_join(x = monthss, y = df1, by = "Date")
df1 <- na.approx(object = df1$'as it comes from the previous step', rule = 2)
# second series
df2 <- left_join(x = monthss, y = df2, by = "Date")
df2 <- na.approx(object = df2$'as it comes from the previous step', rule = 2)
However, in the first case, I get a weird result when using left_join (I get too many NAs that don't correspond to the missing dates), while in the second case, when using left_join, I get Error: cannot join a Date object with an object that is not a Date object. For this error, I tried to look for some solutions online, but what I found doesn't help.
Can anyone help me fix this?
Thanks!
You get the left_join error in the second case because df2$Date is not of class Date. This fixes the second join:
library(lubridate)
# second series
df2$Date <- lubridate::date(df2$Date) #convert Date variable to date
df2 <- left_join(x = monthss, y = df2, by = "Date")
df2 <- na.approx(object = df2$'as it comes from the previous step', rule = 2)
Changing this will fix the first join:
# first series
monthss <- data.frame(Date = seq(as.Date("2002-01-03"), as.Date("2019-12-12"), by = "day"), stringsAsFactors = F)
You did a sequence by month and thus only got the dates for each month where the day is "3" e.g., 2002-01-03, 2002-02-03 etc. Thus you only got joins for Dates ending on "03" but not for other dates that you have Fit values for in df1.
I have a simple question to ask. I couldn't find a solution on SO.
I have a column vector of dates and I would like to see whether months are unique or not. I tried to use unique but I may have used it in the wrong way.
An example:
Date = structure(c(11690, 11725, 11753, 11781, 11809, 11844, 11872,
11900, 11942, 11970, 11998, 12026, 12061, 12089, 12117, 12145,
12180, 12208, 12243, 12264, 12265, 12299, 12327, 12362, 12390,
12425, 12453, 12481, 12509, 12544, 12572, 12600, 12635, 12663,
12698, 12726, 12754, 12796, 12817, 12845, 12880, 12907, 12936,
12971, 12999, 13027, 13062, 13090, 13118, 13160, 13181, 13209,
13244, 13272, 13307, 13335, 13363, 13392, 13426, 13454, 13489,
13524, 13552, 13580, 13615, 13643, 13670, 13699, 13727, 13762,
13790, 13825, 13853, 13888, 13916, 13944, 13979, 14007, 14035,
14063, 14098, 14126, 14154, 14160, 14189, 14217, 14259, 14280,
14308, 14336, 14371, 14399, 14427, 14462, 14490, 14525, 14553,
14581, 14623, 14644, 14672, 14707, 14735, 14770, 14798, 14826,
14854, 14889, 14917, 14945, 14987, 15008, 15036, 15071, 15099,
15134, 15162, 15190, 15225, 15253, 15281, 15316, 15351, 15379,
15407, 15434, 15463, 15497, 15526, 15554, 15589, 15617, 15652,
15680, 15715, 15743, 15771, 15799, 15827, 15862, 15890, 15918,
15953, 15980, 16016, 16044, 16079, 16107, 16135, 16163, 16198,
16226, 16254, 16289, 16317, 16345, 16380, 16408, 16457, 16467,
16499, 16540, 16556, 16589, 16632, 16648, 16681, 16730, 16740,
16772, 16821, 16832, 16870, 16912, 16922, 16954, 17003, 17014,
17052, 17094, 17106, 17143, 17185, 17198, 17234, 17283, 17287,
17325, 17367, 17379, 17416, 17465, 17471, 17514, 17556, 17563,
17598, 17647, 17652, 17696, 17738, 17744, 17787, 17829, 17836,
17878, 17920, 17928, 17962, 17996, 18017, 18053, 18102, 18109,
18151, 18193, 18201, 18242), class = "Date")
In this column vector I would like to see whether there are two observations in the same month (there are 2 for "2003-07" and "2008-10"). I can I check it with one line of command?
Thanks!
In base R, we can format the Date to get only year and month, use table to count their occurrences, Filter to select only those month-year which occur more than once.
names(Filter(function(x) x > 1, table(format(Date, "%Y-%m"))))
#[1] "2003-07" "2008-10"
Same logic using zoo::as.yearmon.
names(Filter(function(x) x > 1, table(zoo::as.yearmon(Date))))
#[1] "Jul 2003" "Oct 2008"
library('lubridate')
library('tidyverse')
Date %>%
enframe() %>%
count(year(value), month(value)) %>%
filter(n > 1)
One line with as.yearmon from zoo
library(zoo)
Date[which(duplicated(as.yearmon(Date))==TRUE)]
[1] "2003-07-31" "2008-10-08"
Use str_sub to get year-month and use table to count frequency of occurence:
library(tidyverse)
year_month <- str_sub(Date, 1, 7) #this will extract characters 1-7 (year-mo)
result <- as_tibble(table(year_month))
#or pipe it all
year_month2 <- str_sub(Date, 1, 7) %>%
table() %>%
as_tibble()
#or filter to get only those with > 1 occurence
year_month3 <- str_sub(Date, 1, 7) %>%
table() %>%
as_tibble() %>%
filter(n > 1)
I want to add to a plot with Date axis (x axis) a tooltip that will include the current date, currently the tooltip shows the numeric value of the date, instead of the date format
the Data:
> dput(for_plot)
structure(list(ext_install_date = structure(c(16638, 16660, 16700,
16710, 16712, 16729, 16730, 16736, 16752, 16768, 16717, 16755,
16756, 16757, 16758, 16662, 16750, 16769, 16785, 16665, 16699,
16632, 16682, 16683, 16684, 16698, 16634, 16679, 16720, 16754,
16701, 16718, 16685, 16687, 16774, 16775, 16647, 16680, 16596,
16630, 16721, 16725, 16703, 16706, 16723, 16726, 16650, 16651,
16686, 16707, 16771, 16722, 16739, 16760, 16794, 16724, 16742,
16777, 16648, 16653, 16670, 16744, 16676, 16636, 16671, 16654,
16740, 16746, 16763, 16642, 16728, 16733, 16743, 16765, 16692,
16689, 16709, 16711, 16735, 16639, 16695, 16780, 16784, 16640,
16644, 16645, 16731, 16643, 16714, 16659, 16753, 16786, 16657,
16737, 16663, 16749, 16702, 16719, 16770, 16751, 16776, 16681,
16631, 16789, 16674, 16727, 16772, 16762, 16795, 16778, 16649,
16672, 16766, 16655, 16741, 16764, 16693, 16747, 16781, 16732,
16715, 16803, 16787, 16658, 16696, 16664, 16646, 16626, 16629,
16790, 16773, 16652, 16796, 16667, 16673, 16759, 16656, 16748,
16637, 16708, 16691, 16641, 16799, 16697, 16782, 16688, 16716,
16804, 16788, 16627, 16633, 16791, 16797, 16668, 16800, 16783,
16734, 16677, 16666, 16704, 16628, 16792, 16798, 16669, 16675,
16761, 16767, 16678, 16661, 16738, 16635, 16793, 16745, 16694,
16779, 16801, 16802, 16590, 16591, 16593, 16617, 16618, 16619,
16589, 16592, 16608, 16610, 16611, 16624, 16623, 16595, 16713,
16594, 16615, 16609, 16601, 16616, 16597, 16602, 16598, 16599,
16605, 16600, 16620, 16606, 16622, 16588, 16603, 16604, 16607,
16705, 16625, 16690, NA, 16613, 16612, 16621, 16587, 16614), class = "Date"),
sum = c(42133989L, 55439830L, 12474088L, 16782989L, 12498558L,
23097594L, 18694459L, 18613591L, 17164639L, 10998479L, 17434372L,
16134588L, 15765093L, 14780377L, 12238314L, 60259367L, 14299153L,
9228893L, 7181921L, 57044570L, 12671147L, 34528083L, 28685034L,
27538625L, 23861138L, 12939897L, 34804466L, 27256411L, 15470425L,
17683150L, 13919150L, 17683908L, 24785274L, 17297336L, 10883551L,
10765214L, 52309168L, 32311372L, 33612420L, 30994837L, 17528116L,
17979427L, 12904065L, 14452388L, 16659048L, 24373216L, 50197552L,
64194782L, 18635738L, 12370060L, 10714545L, 17080052L, 17320119L,
11792389L, 4250804L, 17337770L, 16257833L, 10389087L, 55407448L,
60149494L, 52791093L, 16748038L, 26324948L, 35491474L, 39187635L,
58805623L, 17361957L, 15002820L, 11834933L, 42022359L, 18834611L,
18499567L, 15771667L, 12734880L, 9901353L, 11215141L, 12242264L,
13603119L, 17919976L, 41100805L, 14435765L, 9727462L, 7604216L,
41830337L, 48193262L, 53128495L, 19248325L, 43074450L, 11930683L,
57259190L, 17360447L, 6925452L, 49781307L, 17483336L, 60223307L,
14877194L, 11216973L, 17906140L, 11152617L, 15247289L, 9919111L,
30596442L, 31624492L, 6494032L, 29419861L, 22205115L, 11612651L,
12506364L, 3977433L, 9251065L, 52886830L, 30459500L, 10314486L,
62828525L, 16266340L, 11414242L, 11315183L, 14865891L, 9219453L,
19531171L, 12390920L, 554893L, 6871604L, 56267484L, 13478614L,
59179677L, 56843397L, 33077108L, 29693238L, 5709800L, 10984800L,
62188950L, 3424377L, 55865206L, 33250188L, 11883725L, 61208251L,
14620505L, 33824988L, 9605466L, 10086150L, 43150201L, 2434175L,
13636535L, 9149052L, 12859396L, 15342563L, 299795L, 6557079L,
29079786L, 35154155L, 5720813L, 3213738L, 54942898L, 2011266L,
7788028L, 17631115L, 26168243L, 55355445L, 12363848L, 32406026L,
5181923L, 3045645L, 55879245L, 27803689L, 12997556L, 9993556L,
24094397L, 61278488L, 16146261L, 30860019L, 4851695L, 16059845L,
9734641L, 9682186L, 1434076L, 787507L, 33627937L, 35299118L,
31196723L, 23152630L, 18430364L, 16404624L, 21956375L, 31145208L,
29812678L, 28715201L, 31587264L, 18888020L, 14560381L, 28866133L,
11402958L, 28100484L, 21889495L, 33006788L, 32671209L, 22527130L,
36680524L, 30021920L, 35047621L, 33187732L, 30610149L, 34511947L,
21200181L, 31763855L, 16891242L, 28242299L, 31096620L, 35093501L,
28600363L, 14257733L, 32070016L, 10522891L, 785L, 17111781L,
25138826L, 21459015L, 28940910L, 21906624L)), row.names = c(NA,
-219L), class = c("data.table", "data.frame"), .Names = c("ext_install_date",
"sum"), .internal.selfref = <pointer: 0x0000000001290788>)
The ggvis code:
library(ggvis)
for_plot %>% ggvis(x = ~ext_install_date, y = ~sum, stroke := "red") %>% layer_lines(stroke=2) %>%
add_tooltip( function(data){(data$ext_install_date)}, "hover") #showing only numeric values
thanks or any help on that!
Unfortunately the tooltip in ggvis doesn't handle the values underpinning layer_lines as separate data points. Just as in your example, it only displays the first value. The second problem is the way the tooltip displays date objects.
We can hack our way around both problems, with the same approach as outlined here. Please note that the tooltip will still erroneously show the first date in between data points.
for_plot %>% ggvis(x = ~ext_install_date, y = ~sum) %>%
layer_points(opacity:=0) %>%
add_tooltip( function(data){(as.Date(data$ext_install_date/86400000,
origin='1970-01-01'))}) %>%
layer_lines()
I believe the googleVis package doesn't suffer from the same issues, so you might want to consider using it for similar graphs in the future.
My case is of little difference. I have a column of Date format on X axis, and I want to display it on the hover, but Date input of add_tooltip() is automatically transformed.
I finally figure out how to display date from X axis in ggvis tooltip, according to mtoto's code.
Here's my example for those who have the same case as I do:
dat<- as.data.frame(matrix(rnorm(100),ncol=5))
dat$Date=seq(as.Date("2018-01-01"), as.Date("2018-01-20"),by='days')
myhover<- function(x){
if(is.null(x)) return(NULL)
paste('Date: ',format(as.Date(x$Date/86400000,origin='1970-01-01'),'%Y-%m-%d'),br(),'Data:',format(x$V1))
}
dat %>% ggvis(~Date,~V1) %>% layer_points() %>% add_tooltip(myhover,'hover')
I have the following code (dput data for the data sets is here):
ruz <- structure(list(date = structure(c(16617, 16618, 16619, 16622,
16623, 16624, 16625, 16626, 16629, 16630, 16631, 16632, 16633,
16636, 16637, 16638, 16639, 16640, 16643, 16644, 16645, 16646,
16647, 16650, 16651, 16652, 16653, 16654, 16657, 16658, 16659,
16660, 16661, 16664, 16665, 16666, 16667, 16668, 16671, 16672,
16673, 16674, 16675, 16678, 16679, 16680, 16681, 16682, 16685,
16686, 16687, 16688, 16689, 16692, 16693, 16694, 16695, 16696,
16699, 16700, 16701, 16702, 16703, 16706, 16707, 16708, 16709,
16710, 16713, 16714, 16715, 16716, 16717, 16720, 16721, 16722,
16723, 16724, 16727, 16728, 16729, 16730, 16731, 16734, 16735,
16736, 16737, 16738, 16741, 16742, 16743, 16744, 16745, 16748,
16749, 16750, 16751, 16752, 16755, 16756, 16757, 16758, 16759,
16762, 16763, 16764, 16765, 16766, 16769), class = "Date"), val1 = c(61.8954,
61.6297, 61.7859, 62.2135, 62.692, 63.026, 63.1511, 63.008, 62.7991,
62.5304, 62.3971, 62.1703, 61.9535, 61.7927, 61.8367, 62.1856,
62.7663, 63.5846, 64.859, 66.0745, 65.9327, 65.1387, 65.8362,
67.9171, 68.8917, 68.7714, 69.295, 69.9932, 70.0878, 70.0563,
71.0985, 71.7451, 71.9923, 72.3836, 72.6186, 72.7895, 74.1316,
76.3577, 79.6818, 80.4601, 79.637, 77.1905, 74.7982, 74.0868,
73.6844, 74.7815, 75.1829, 75.0874, 76.0362, 76.5334, 76.1729,
76.2661, 76.521, 76.5815, 76.1411, 74.7473, 74.229, 74.8073,
74.8083, 74.2189, 73.7976, 74.0765, 73.7323, 73.5319, 73.8853,
73.7351, 73.2462, 73.7254, 73.4657, 72.5227, 70.9683, 70.1357,
69.7459, 69.7823, 70.714, 71.5863, 71.3391, 70.2717, 70.1001,
70.3965, 70.964, 70.901, 69.6083, 69.0542, 70.325, 71.2619, 70.6912,
70.5258, 70.6195, 69.9786, 68.9845, 68.7403, 69.5909, 69.6324,
69.2801, 69.3884, 70.4129, 71.6024, 70.7705, 69.6673, 69.2706,
69.2517, 69.2788, 69.3983, 69.7819, 69.8404, 69.8002, 69.9816,
70.1287)), .Names = c("date", "val1"), row.names = c("2015-07-01",
"2015-07-02", "2015-07-03", "2015-07-06", "2015-07-07", "2015-07-08",
"2015-07-09", "2015-07-10", "2015-07-13", "2015-07-14", "2015-07-15",
"2015-07-16", "2015-07-17", "2015-07-20", "2015-07-21", "2015-07-22",
"2015-07-23", "2015-07-24", "2015-07-27", "2015-07-28", "2015-07-29",
"2015-07-30", "2015-07-31", "2015-08-03", "2015-08-04", "2015-08-05",
"2015-08-06", "2015-08-07", "2015-08-10", "2015-08-11", "2015-08-12",
"2015-08-13", "2015-08-14", "2015-08-17", "2015-08-18", "2015-08-19",
"2015-08-20", "2015-08-21", "2015-08-24", "2015-08-25", "2015-08-26",
"2015-08-27", "2015-08-28", "2015-08-31", "2015-09-01", "2015-09-02",
"2015-09-03", "2015-09-04", "2015-09-07", "2015-09-08", "2015-09-09",
"2015-09-10", "2015-09-11", "2015-09-14", "2015-09-15", "2015-09-16",
"2015-09-17", "2015-09-18", "2015-09-21", "2015-09-22", "2015-09-23",
"2015-09-24", "2015-09-25", "2015-09-28", "2015-09-29", "2015-09-30",
"2015-10-01", "2015-10-02", "2015-10-05", "2015-10-06", "2015-10-07",
"2015-10-08", "2015-10-09", "2015-10-12", "2015-10-13", "2015-10-14",
"2015-10-15", "2015-10-16", "2015-10-19", "2015-10-20", "2015-10-21",
"2015-10-22", "2015-10-23", "2015-10-26", "2015-10-27", "2015-10-28",
"2015-10-29", "2015-10-30", "2015-11-02", "2015-11-03", "2015-11-04",
"2015-11-05", "2015-11-06", "2015-11-09", "2015-11-10", "2015-11-11",
"2015-11-12", "2015-11-13", "2015-11-16", "2015-11-17", "2015-11-18",
"2015-11-19", "2015-11-20", "2015-11-23", "2015-11-24", "2015-11-25",
"2015-11-26", "2015-11-27", "2015-11-30"), class = "data.frame")
dfr <- structure(list(date = structure(c(16616, 16646, 16677, 16708,
16738, 16769), class = "Date"), val2 = c(0, 0.0920000000000001,
0.120392, 0.136077488, 0.0917704659680001, 0.0874033841041282
)), .Names = c("date", "val2"), row.names = c("2015-06", "2015-07",
"2015-08", "2015-09", "2015-10", "2015-11"), class = "data.frame")
ggplot:
ggplot() +
geom_line(data = ruz, aes(date, val1), size = 1.5, color = "blue") +
geom_line(data = dfr, aes(date, val2 * 100), size = 1.5, color = "red") +
scale_fill_manual(values = c("blue", "red"))
which produces the following graph:
My question is, how to modify it to show a legend after all?
If you want to avoid combining the data.frames, you can do this:
ggplot() +
geom_line(data = ruz, aes(date, val1, color = "a"), size = 1.5) +
geom_line(data = dfr, aes(date, val2 * 100, color = "b"), size = 1.5) +
scale_color_manual(name = "Colors",
values = c("a" = "blue", "b" = "red"))
In order to get a legend, you have to map something to color within aes. You can then use scale_color_manual to define the colors for the mapped character values. There are situations where this trick is easier and results in more readable code then reshaping/combining data.
You could rbind them and use color
ruz$type <- "ruz"
dfr$val2 <- dfr$val2 * 100
dfr$type <- "dfr"
names(ruz) <- names(dfr)
df <- rbind(ruz, dfr)
ggplot(df, aes(date, val2, color = type), size = 1.5) + geom_line()