Add Date Tooltip to GGvis object R - r

I want to add to a plot with Date axis (x axis) a tooltip that will include the current date, currently the tooltip shows the numeric value of the date, instead of the date format
the Data:
> dput(for_plot)
structure(list(ext_install_date = structure(c(16638, 16660, 16700,
16710, 16712, 16729, 16730, 16736, 16752, 16768, 16717, 16755,
16756, 16757, 16758, 16662, 16750, 16769, 16785, 16665, 16699,
16632, 16682, 16683, 16684, 16698, 16634, 16679, 16720, 16754,
16701, 16718, 16685, 16687, 16774, 16775, 16647, 16680, 16596,
16630, 16721, 16725, 16703, 16706, 16723, 16726, 16650, 16651,
16686, 16707, 16771, 16722, 16739, 16760, 16794, 16724, 16742,
16777, 16648, 16653, 16670, 16744, 16676, 16636, 16671, 16654,
16740, 16746, 16763, 16642, 16728, 16733, 16743, 16765, 16692,
16689, 16709, 16711, 16735, 16639, 16695, 16780, 16784, 16640,
16644, 16645, 16731, 16643, 16714, 16659, 16753, 16786, 16657,
16737, 16663, 16749, 16702, 16719, 16770, 16751, 16776, 16681,
16631, 16789, 16674, 16727, 16772, 16762, 16795, 16778, 16649,
16672, 16766, 16655, 16741, 16764, 16693, 16747, 16781, 16732,
16715, 16803, 16787, 16658, 16696, 16664, 16646, 16626, 16629,
16790, 16773, 16652, 16796, 16667, 16673, 16759, 16656, 16748,
16637, 16708, 16691, 16641, 16799, 16697, 16782, 16688, 16716,
16804, 16788, 16627, 16633, 16791, 16797, 16668, 16800, 16783,
16734, 16677, 16666, 16704, 16628, 16792, 16798, 16669, 16675,
16761, 16767, 16678, 16661, 16738, 16635, 16793, 16745, 16694,
16779, 16801, 16802, 16590, 16591, 16593, 16617, 16618, 16619,
16589, 16592, 16608, 16610, 16611, 16624, 16623, 16595, 16713,
16594, 16615, 16609, 16601, 16616, 16597, 16602, 16598, 16599,
16605, 16600, 16620, 16606, 16622, 16588, 16603, 16604, 16607,
16705, 16625, 16690, NA, 16613, 16612, 16621, 16587, 16614), class = "Date"),
sum = c(42133989L, 55439830L, 12474088L, 16782989L, 12498558L,
23097594L, 18694459L, 18613591L, 17164639L, 10998479L, 17434372L,
16134588L, 15765093L, 14780377L, 12238314L, 60259367L, 14299153L,
9228893L, 7181921L, 57044570L, 12671147L, 34528083L, 28685034L,
27538625L, 23861138L, 12939897L, 34804466L, 27256411L, 15470425L,
17683150L, 13919150L, 17683908L, 24785274L, 17297336L, 10883551L,
10765214L, 52309168L, 32311372L, 33612420L, 30994837L, 17528116L,
17979427L, 12904065L, 14452388L, 16659048L, 24373216L, 50197552L,
64194782L, 18635738L, 12370060L, 10714545L, 17080052L, 17320119L,
11792389L, 4250804L, 17337770L, 16257833L, 10389087L, 55407448L,
60149494L, 52791093L, 16748038L, 26324948L, 35491474L, 39187635L,
58805623L, 17361957L, 15002820L, 11834933L, 42022359L, 18834611L,
18499567L, 15771667L, 12734880L, 9901353L, 11215141L, 12242264L,
13603119L, 17919976L, 41100805L, 14435765L, 9727462L, 7604216L,
41830337L, 48193262L, 53128495L, 19248325L, 43074450L, 11930683L,
57259190L, 17360447L, 6925452L, 49781307L, 17483336L, 60223307L,
14877194L, 11216973L, 17906140L, 11152617L, 15247289L, 9919111L,
30596442L, 31624492L, 6494032L, 29419861L, 22205115L, 11612651L,
12506364L, 3977433L, 9251065L, 52886830L, 30459500L, 10314486L,
62828525L, 16266340L, 11414242L, 11315183L, 14865891L, 9219453L,
19531171L, 12390920L, 554893L, 6871604L, 56267484L, 13478614L,
59179677L, 56843397L, 33077108L, 29693238L, 5709800L, 10984800L,
62188950L, 3424377L, 55865206L, 33250188L, 11883725L, 61208251L,
14620505L, 33824988L, 9605466L, 10086150L, 43150201L, 2434175L,
13636535L, 9149052L, 12859396L, 15342563L, 299795L, 6557079L,
29079786L, 35154155L, 5720813L, 3213738L, 54942898L, 2011266L,
7788028L, 17631115L, 26168243L, 55355445L, 12363848L, 32406026L,
5181923L, 3045645L, 55879245L, 27803689L, 12997556L, 9993556L,
24094397L, 61278488L, 16146261L, 30860019L, 4851695L, 16059845L,
9734641L, 9682186L, 1434076L, 787507L, 33627937L, 35299118L,
31196723L, 23152630L, 18430364L, 16404624L, 21956375L, 31145208L,
29812678L, 28715201L, 31587264L, 18888020L, 14560381L, 28866133L,
11402958L, 28100484L, 21889495L, 33006788L, 32671209L, 22527130L,
36680524L, 30021920L, 35047621L, 33187732L, 30610149L, 34511947L,
21200181L, 31763855L, 16891242L, 28242299L, 31096620L, 35093501L,
28600363L, 14257733L, 32070016L, 10522891L, 785L, 17111781L,
25138826L, 21459015L, 28940910L, 21906624L)), row.names = c(NA,
-219L), class = c("data.table", "data.frame"), .Names = c("ext_install_date",
"sum"), .internal.selfref = <pointer: 0x0000000001290788>)
The ggvis code:
library(ggvis)
for_plot %>% ggvis(x = ~ext_install_date, y = ~sum, stroke := "red") %>% layer_lines(stroke=2) %>%
add_tooltip( function(data){(data$ext_install_date)}, "hover") #showing only numeric values
thanks or any help on that!

Unfortunately the tooltip in ggvis doesn't handle the values underpinning layer_lines as separate data points. Just as in your example, it only displays the first value. The second problem is the way the tooltip displays date objects.
We can hack our way around both problems, with the same approach as outlined here. Please note that the tooltip will still erroneously show the first date in between data points.
for_plot %>% ggvis(x = ~ext_install_date, y = ~sum) %>%
layer_points(opacity:=0) %>%
add_tooltip( function(data){(as.Date(data$ext_install_date/86400000,
origin='1970-01-01'))}) %>%
layer_lines()
I believe the googleVis package doesn't suffer from the same issues, so you might want to consider using it for similar graphs in the future.

My case is of little difference. I have a column of Date format on X axis, and I want to display it on the hover, but Date input of add_tooltip() is automatically transformed.
I finally figure out how to display date from X axis in ggvis tooltip, according to mtoto's code.
Here's my example for those who have the same case as I do:
dat<- as.data.frame(matrix(rnorm(100),ncol=5))
dat$Date=seq(as.Date("2018-01-01"), as.Date("2018-01-20"),by='days')
myhover<- function(x){
if(is.null(x)) return(NULL)
paste('Date: ',format(as.Date(x$Date/86400000,origin='1970-01-01'),'%Y-%m-%d'),br(),'Data:',format(x$V1))
}
dat %>% ggvis(~Date,~V1) %>% layer_points() %>% add_tooltip(myhover,'hover')

Related

How do I summarise in years from a specific date i.e. max(close_date) to the corresponding date in one year previous periods of time in R

I am trying to write R code to summarize count and median amounts of a date variable, and a dollar amount variable in my data frame. The summarize part is not the issue, my issue is in trying to group_by time periods to then summarize. I understand the distinction between a time period and a time duration. I am interested in time periods, in this particular case I want to summarize in years from a specific date i.e. max(close_date) to the corresponding date in one year previous periods of time, as in 2022-02-6 to 2021-02-06 to 2020-02-06 and so on. The data frame goes back about 30 months in total, so there is two full years of data to summarize.
The code I wrote here groups by the calendar year, and not what I need;
> sum_closed_date_yr <- scrubbed_data01 %>%
+ group_by(time_period = year(close_date)) %>%
+ summarize(close_count = (close_date = n()), med_close_price = median(close_price, na.rm = TRUE))
> sum_closed_date_yr
# A tibble: 5 × 3
time_period close_count med_close_price
<dbl> <int> <dbl>
1 2019 31 570000
2 2020 80 661250
3 2021 104 930750
4 2022 9 1010000
5 NA 8 0
I am very new to coding in R, I am a real estate appraiser not a statwhizzician. I have taken 23 DataCamp tutorials in R, so I have a newbie working knowledge of R. I have searched through multiple SO posts on summarizing by date, but can not find what I am specifically looking for. Any help would be greatly appreciated, thank you - Joe
Second try, I selected just the two of many variables
scrubbed_data01 %>%
+ select(close_date, close_price) %>%
+ dput()
structure(list(close_date = structure(c(NA, NA, NA, 19039, 19038,
19034, 19024, 19020, 19016, 19013, 18999, 18989, 18976, 18969,
18969, 18968, 18955, 18955, 18954, 18953, 18953, 18949, 18948,
18943, 18940, 18936, 18934, 18933, 18929, 18922, 18921, 18921,
18921, 18918, 18915, 18912, 18908, 18908, 18907, 18906, 18905,
18900, 18900, 18899, 18897, 18897, 18897, 18891, 18891, 18890,
18887, 18880, 18879, 18878, 18878, 18873, 18873, 18873, 18869,
18866, 18866, 18851, 18850, 18844, 18836, 18836, 18831, 18830,
18822, 18821, 18821, 18815, 18810, 18806, 18802, 18796, 18795,
18789, 18786, 18782, 18781, 18781, 18780, 18779, 18775, 18775,
18774, 18761, 18761, 18753, 18752, 18747, 18746, 18746, 18740,
18739, 18737, 18729, 18718, 18715, 18705, 18704, 18701, 18695,
18689, 18683, 18677, 18655, 18652, 18648, 18646, 18640, 18634,
18633, 18631, 18619, 18613, 18611, 18590, 18585, 18579, 18576,
18569, 18569, 18569, 18563, 18558, 18557, 18557, 18556, 18554,
18549, 18544, 18540, 18540, 18533, 18519, 18519, 18519, 18514,
18514, 18513, 18507, 18502, 18502, 18501, 18501, 18499, 18495,
18492, 18492, 18491, 18488, 18484, 18472, 18466, 18464, 18459,
18459, 18453, 18451, 18450, 18445, 18443, 18442, 18423, 18422,
18411, 18401, 18400, 18397, 18397, 18397, 18396, 18387, 18386,
18366, 18361, 18360, 18340, 18338, 18331, 18317, 18313, 18302,
18297, 18289, 18283, 18283, 18277, 18274, 18271, 18271, 18269,
18263, 18261, 18261, 18261, 18260, 18250, 18247, 18239, 18208,
18200, 18199, 18197, 18194, 18190, 18185, 18185, 18180, 18179,
18177, 18177, 18176, 18170, 18169, 18156, 18155, 18152, 18151,
18142, 18142, 18138, 18137, 18136, NA, NA, NA, NA, NA, 19044), class = "Date"),
close_price = c(0, 0, 0, 1150001, 940000, 1253000, 979000,
881000, 1010000, 1060000, 1100000, 1070000, 1025000, 755000,
740000, 930000, 1250000, 990000, 930000, 931500, 975000,
950000, 850000, 865000, 921000, 790000, 778000, 935000, 1270000,
970000, 1061500, 960000, 1015000, 1100000, 1082000, 880000,
1000000, 1140000, 950000, 852000, 1045000, 795000, 950000,
950000, 880000, 850000, 945000, 949500, 1220000, 1015000,
899000, 1100000, 805000, 868000, 1102000, 1015000, 923000,
810000, 890000, 826000, 1140000, 970000, 830000, 790000,
1151000, 835500, 1080000, 870000, 1049000, 985000, 962000,
926000, 1008888, 950000, 810000, 760000, 955000, 930000,
985000, 1210000, 878000, 950000, 855000, 930000, 960000,
1180000, 980000, 960000, 898000, 1100000, 1215000, 885000,
985000, 880000, 1100000, 810000, 1210000, 810000, 970700,
1010000, 800000, 850000, 849000, 770000, 925000, 930000,
875000, 755000, 675000, 875500, 715000, 837000, 747000, 805000,
785000, 801200, 900000, 800000, 610000, 720000, 730000, 700000,
695000, 720000, 750000, 860000, 915000, 787000, 785000, 710000,
735000, 620000, 788000, 780000, 780000, 645000, 700000, 686000,
686000, 745000, 745000, 605000, 730000, 625000, 625000, 685000,
731000, 715000, 695000, 710000, 700000, 575000, 561000, 590000,
595000, 720500, 670000, 711000, 645000, 595000, 700000, 545000,
695000, 531000, 581000, 518000, 645000, 562500, 530000, 640000,
643000, 680000, 700000, 540000, 630000, 658000, 675000, 525000,
600000, 664500, 590000, 569595, 620500, 555000, 585000, 630000,
639900, 515000, 475000, 670000, 610000, 524888, 550000, 520000,
650000, 500000, 500000, 540000, 608000, 575000, 570000, 639900,
645000, 648500, 635000, 530000, 655000, 520000, 555000, 542500,
515000, 620000, 580140, 535000, 638888, 540000, 590000, 535000,
497500, 505000, 675000, 545000, 640000, 555000, 630000, 590000,
0, 0, 0, 0, 0, 985000)), row.names = c(NA, 232L), class = "data.frame")
>
You can create a new variable to assign the year to your desired period:
library(dplyr)
df %>% mutate(period_year = case_when(close_date < "2019-02-06" ~ 2018,
close_date >= "2019-02-06" & close_date < "2020-02-06" ~ 2019,
close_date >= "2020-02-06" & close_date < "2021-02-06" ~ 2020,
close_date >= "2021-02-06" & close_date < "2022-02-06" ~ 2021,
close_date >= "2022-02-06" & close_date < "2023-02-06" ~ 2022)) %>%
group_by(time_period = period_year) %>%
summarize(close_count = (close_date = n()), med_close_price = median(close_price, na.rm = TRUE))

Plotting density plots on Shiny in R; getting "Error: non-numeric argument to mathematical function"

I am building a shiny app and a feature of it involves using density plots to look at the distribution of numerical data. The problem is that when the app is running, I receive this error message when I try to plot:-
Error: non-numeric argument to mathematical function
Here is some mock data:-
outcome<-c("Answered","Abandoned","Engaged")
Duration_A<-floor(runif(300,min=1,max = 2000))
Duration_A
Duration_B<-floor(runif(300,min=200,max = 5000))
Duration_B
Duration_C<-floor(runif(300,min=400,max = 8000))
Duration_C
Center<-c("Center A","Center B","Center C","Center D","Center E","Center F")
df<-as.data.frame(cbind(outcome,Center,Duration_A,Duration_B,Duration_C))
df$Duration_A<-as.numeric(df$Duration_A)
df$Duration_B<-as.numeric(df$Duration_B)
df$Duration_C<-as.numeric(df$Duration_C)
df$Date<-structure(c(18191, 17659, 17592, 18392, 17799, 17781, 18398,
17636, 17719, 18085, 18505, 18494, 17580, 18507, 18270, 17613,
17559, 17731, 17619, 17578, 17678, 18434, 18265, 18210, 18380,
17696, 17617, 17755, 18246, 17684, 18424, 17701, 18236, 17567,
18416, 18222, 18293, 18136, 18315, 17762, 17999, 17824, 17715,
18487, 18398, 17702, 18428, 18478, 18178, 18110, 18042, 18029,
17559, 18281, 18025, 17769, 18160, 17665, 17589, 17867, 18065,
18357, 18154, 18483, 18021, 17902, 18527, 17966, 18249, 17751,
17971, 18177, 17959, 18522, 18068, 17566, 17973, 18127, 18265,
17552, 17889, 18395, 17812, 17898, 18357, 18442, 18454, 17659,
17644, 18519, 18547, 17591, 18095, 17847, 17700, 17584, 17745,
18182, 18246, 18102, 17689, 17822, 17804, 17597, 18159, 17555,
17566, 17770, 17572, 17720, 17753, 18237, 18308, 18410, 17805,
17579, 17857, 17698, 17865, 18342, 18338, 17998, 17817, 18128,
18249, 18104, 17731, 18418, 17796, 18477, 17824, 17939, 17546,
18059, 18496, 18056, 18488, 18394, 18526, 17945, 17998, 18402,
17882, 18325, 17744, 18422, 18300, 17601, 18451, 17852, 17560,
18238, 17923, 18267, 17593, 18342, 18021, 18075, 18243, 17688,
18519, 17737, 17786, 17648, 17705, 18221, 17680, 17631, 17601,
17612, 18043, 17534, 17881, 17783, 17716, 17665, 18260, 17918,
17805, 18155, 18423, 17688, 17642, 17890, 17953, 17772, 18315,
18389, 18167, 18463, 18294, 17773, 18215, 17999, 18520, 17687,
17961, 17955, 17955, 17758, 18029, 18055, 17769, 17975, 18556,
17745, 17883, 18500, 17812, 18448, 18121, 17740, 18082, 17613,
17761, 18030, 18321, 17755, 18047, 18180, 17848, 17756, 17784,
18401, 17634, 17823, 18458, 18159, 17753, 17813, 17822, 17685,
18374, 18505, 18104, 18501, 17856, 17932, 18246, 18465, 18098,
17934, 17737, 17731, 18471, 17899, 18342, 18077, 18317, 17639,
17540, 17971, 17798, 17827, 18121, 17641, 18469, 17686, 18239,
18309, 18081, 17908, 17897, 18092, 18510, 18551, 17822, 18557,
17904, 18170, 18246, 18382, 17766, 18511, 18105, 18381, 18369,
17952, 18274, 17709, 18403, 17686, 18401, 17700, 17954, 18217,
17935, 17811, 17719, 17885, 17771, 17735, 17555, 17555, 18446,
17928, 17748, 17547, 18487, 18221), class = "Date")
Here is the UI:-
ui<-fluidPage(
titlePanel('Minimal example'),
tabsetPanel(
tabPanel("Duration analysis",
sidebarLayout(
sidebarPanel(width = 4,
dateRangeInput("duration_daterange","Select date range", format="yyyy-mm-dd",
start=min(df$Date),
end=max(df$Date)),
pickerInput("duration_col", "Select duration column",
choices = c("Duration_A",
"Duration_B",
"Duration_C")),
pickerInput("contactoutcome", "Select contact outcome",
choices=c("Answered", "Abandoned", "Engaged")),
pickerInput("center_choice", "Select centers",
choices = c("Do not specify",
"Center A",
"Center B",
"Center C",
"Center D",
"Center E",
"Center F"),options = list('actions-box'=TRUE, 'live-search'=TRUE), multiple = T),
actionButton("runduration", "Run analysis")
),
mainPanel(
column(width = 8, box("Plot", plotOutput("densityplot"), width = "100%"))
)
)
)
))#end of fluidpage
And my server:-
server<-function(input,output,session){
observeEvent(input$runduration,{
outcome<-as.character(input$contactoutcome)
durationcolumn<-as.character(input$duration_col)
reactiveDF<-reactive({
if(input$center_choice=="Do not specify"){
df%>%
filter(Date>=input$duration_daterange[1] & Date<=input$duration_daterange[2])
} else
if(input$center_choice!="Do not specify"){
df%>%
filter(Date>=input$duration_daterange[1] & Date<=input$duration_daterange[2])%>%
filter(Center %in% input$center_choice)
}
})
output$densityplot<-renderPlot({
ggplot(reactiveDF(), aes(x=input$duration_col))+
geom_density()+scale_x_log10()
})
})
}
shinyApp(ui,server)
Note: if I want to select all centers for analysis, I want it to be executed by stating "Do not specify" in the pickerInput. I'm sure I've made a simple error somewhere but it's not so obvious to me. Any help would be greatly appreciated :)
Perhaps you should try this
server<-function(input,output,session){
observeEvent(input$runduration,{
outcome<-as.character(input$contactoutcome)
durationcolumn<-as.character(input$duration_col)
reactiveDF <- reactive({
if (is.null(input$center_choice)) {
data <- NULL
}else if(length(input$center_choice)==1 & input$center_choice=="Do not specify") {
data <- df %>% filter(Date>=input$duration_daterange[1] & Date<=input$duration_daterange[2])
}else {
data <- df %>% filter(Date>=input$duration_daterange[1] & Date<=input$duration_daterange[2])%>%
filter(Center %in% input$center_choice)
}
return(data)
})
output$densityplot<-renderPlot({
ggplot(data=req(reactiveDF()), aes(x=as.numeric(.data[[as.name(input$duration_col)]]))) +
geom_density()+scale_x_log10()
})
})
}

How to establish if the dates in a column are unique?

I have a simple question to ask. I couldn't find a solution on SO.
I have a column vector of dates and I would like to see whether months are unique or not. I tried to use unique but I may have used it in the wrong way.
An example:
Date = structure(c(11690, 11725, 11753, 11781, 11809, 11844, 11872,
11900, 11942, 11970, 11998, 12026, 12061, 12089, 12117, 12145,
12180, 12208, 12243, 12264, 12265, 12299, 12327, 12362, 12390,
12425, 12453, 12481, 12509, 12544, 12572, 12600, 12635, 12663,
12698, 12726, 12754, 12796, 12817, 12845, 12880, 12907, 12936,
12971, 12999, 13027, 13062, 13090, 13118, 13160, 13181, 13209,
13244, 13272, 13307, 13335, 13363, 13392, 13426, 13454, 13489,
13524, 13552, 13580, 13615, 13643, 13670, 13699, 13727, 13762,
13790, 13825, 13853, 13888, 13916, 13944, 13979, 14007, 14035,
14063, 14098, 14126, 14154, 14160, 14189, 14217, 14259, 14280,
14308, 14336, 14371, 14399, 14427, 14462, 14490, 14525, 14553,
14581, 14623, 14644, 14672, 14707, 14735, 14770, 14798, 14826,
14854, 14889, 14917, 14945, 14987, 15008, 15036, 15071, 15099,
15134, 15162, 15190, 15225, 15253, 15281, 15316, 15351, 15379,
15407, 15434, 15463, 15497, 15526, 15554, 15589, 15617, 15652,
15680, 15715, 15743, 15771, 15799, 15827, 15862, 15890, 15918,
15953, 15980, 16016, 16044, 16079, 16107, 16135, 16163, 16198,
16226, 16254, 16289, 16317, 16345, 16380, 16408, 16457, 16467,
16499, 16540, 16556, 16589, 16632, 16648, 16681, 16730, 16740,
16772, 16821, 16832, 16870, 16912, 16922, 16954, 17003, 17014,
17052, 17094, 17106, 17143, 17185, 17198, 17234, 17283, 17287,
17325, 17367, 17379, 17416, 17465, 17471, 17514, 17556, 17563,
17598, 17647, 17652, 17696, 17738, 17744, 17787, 17829, 17836,
17878, 17920, 17928, 17962, 17996, 18017, 18053, 18102, 18109,
18151, 18193, 18201, 18242), class = "Date")
In this column vector I would like to see whether there are two observations in the same month (there are 2 for "2003-07" and "2008-10"). I can I check it with one line of command?
Thanks!
In base R, we can format the Date to get only year and month, use table to count their occurrences, Filter to select only those month-year which occur more than once.
names(Filter(function(x) x > 1, table(format(Date, "%Y-%m"))))
#[1] "2003-07" "2008-10"
Same logic using zoo::as.yearmon.
names(Filter(function(x) x > 1, table(zoo::as.yearmon(Date))))
#[1] "Jul 2003" "Oct 2008"
library('lubridate')
library('tidyverse')
Date %>%
enframe() %>%
count(year(value), month(value)) %>%
filter(n > 1)
One line with as.yearmon from zoo
library(zoo)
Date[which(duplicated(as.yearmon(Date))==TRUE)]
[1] "2003-07-31" "2008-10-08"
Use str_sub to get year-month and use table to count frequency of occurence:
library(tidyverse)
year_month <- str_sub(Date, 1, 7) #this will extract characters 1-7 (year-mo)
result <- as_tibble(table(year_month))
#or pipe it all
year_month2 <- str_sub(Date, 1, 7) %>%
table() %>%
as_tibble()
#or filter to get only those with > 1 occurence
year_month3 <- str_sub(Date, 1, 7) %>%
table() %>%
as_tibble() %>%
filter(n > 1)

"Breakpoints" don't display dates but a continuous measure

I'm trying to detect some structural breaks in my series. The problem is that it displays continuous numbers rather than dates, despite my series being a ts object.
I found this solution but it doesn't work in my case.
This is my dataset and code:
df = structure(list(Date = structure(c(11690, 11725, 11753, 11781,
11809, 11844, 11872, 11900, 11942, 11970, 11998, 12026, 12061,
12089, 12117, 12145, 12180, 12208, 12243, 12265, 12299, 12327,
12362, 12390, 12425, 12453, 12481, 12509, 12544, 12572, 12600,
12631, 12663, 12698, 12726, 12754, 12796, 12817, 12845, 12880,
12907, 12936, 12971, 12996, 13027, 13062, 13090, 13118, 13160,
13181, 13209, 13244, 13272, 13307, 13335, 13363, 13392, 13426,
13454, 13489, 13524, 13552, 13580, 13615, 13643, 13670, 13699,
13726, 13762, 13790, 13825, 13853, 13888, 13916, 13944, 13979,
14007, 14035, 14063, 14098, 14126, 14154, 14189, 14217, 14259,
14280, 14308, 14336, 14371, 14399, 14427, 14462, 14490, 14525,
14553, 14581, 14623, 14644, 14672, 14707, 14735, 14770, 14798,
14826, 14854, 14889, 14917, 14945, 14987, 15008, 15036, 15071,
15099, 15134, 15162, 15190, 15225, 15253, 15281, 15316, 15351,
15379, 15407, 15434, 15463, 15497, 15526, 15554, 15589, 15617,
15652, 15680, 15715, 15743, 15771, 15799, 15827, 15862, 15890,
15918, 15953, 15980, 16016, 16044, 16079, 16107, 16135, 16163,
16198, 16226, 16254, 16289, 16317, 16345, 16380, 16408, 16457,
16467, 16499, 16540, 16556, 16589, 16632, 16648, 16681, 16730,
16740, 16772, 16821, 16832, 16870, 16912, 16922, 16954, 17003,
17014, 17052, 17094, 17106, 17143, 17185, 17198, 17234, 17283,
17287, 17325, 17367, 17379, 17416, 17465, 17471, 17514, 17556,
17563, 17598, 17647, 17652, 17696, 17738, 17744, 17787, 17829,
17836, 17878, 17920, 17928, 17962, 17996, 18017, 18053, 18102,
18109, 18151, 18193, 18201, 18242), class = "Date"), Fit = c(-2.01864866574525,
-2.51081772611801, -3.07896216512166, -3.02724722640642, -0.764567739958455,
-1.81459657078637, -2.13093106123547, -2.13093106123547, -1.91543051022373,
-1.31418467170089, -1.86573850139921, -2.42539556395029, -1.26414303389104,
-2.5433900359616, -1.99767537794132, -1.34728409808229, -1.64315561542246,
-0.687106946387411, -2.48041219070826, -2.48041219070826, -1.78680159845671,
-2.13687301896279, -2.6123923387608, -2.84563515334999, -3.41506073833104,
-2.74565641471061, -2.3682788731863, -1.77410755661286, -2.46191758167165,
-2.34829604543204, -2.37030627525843, -2.37030627525843, -1.75944822651175,
-2.21875944722698, -2.60249841953241, -2.6758310533823, -1.99157259723667,
-2.34860918772813, -3.24977356678388, -3.1998805120359, -3.64471855523435,
-2.80762315792921, -1.46910836105049, -1.46910836105049, -2.24153954648439,
-2.64718944648088, -2.61088260257325, -2.45889016663966, -2.59732356608009,
-3.49037732690643, -2.75284369990193, -2.56284320115193, -3.01470163344929,
-2.24267403694233, -3.36759206183078, -2.65899770326269, -2.65899770326269,
-3.83487166356133, -2.30405890853423, -3.83487166356133, -2.91420930066836,
-2.92649062542454, -2.45288174087111, -2.59203353843301, -2.37211828478634,
-2.35485833573613, -3.28807932670479, -3.28807932670479, -2.69856893402308,
-2.4482421908289, -3.42965769805337, -2.4002640291758, -1.72498017056001,
-2.10246950134994, -2.75989530409431, -2.04609226712013, -1.50354129352453,
-1.721866774994, -1.42652131446034, -1.99149928941641, -0.924508173463412,
-0.34424720787331, -1.47956887747857, -0.699260660882747, -0.705970004477605,
-2.89615299118885, -2.87168709242964, -3.49698896688496, -1.80133944039088,
-2.3066390154612, -2.16578274820764, -1.62064416630292, -1.50034889686538,
-1.64551702528081, -1.54888542275039, -2.36526073757675, -2.17980843362752,
-2.61987658921009, -2.99580131757171, -3.27224528690084, -2.90968038360951,
-2.43786428440244, -3.53447897261775, -2.94164730632451, -2.67914051197011,
-3.08963971104142, -3.30489291781406, -3.16112222668117, -3.78875309229899,
-3.27799815735179, -3.27546357519604, -3.28715323339141, -3.277230212033,
-2.73537305926061, -2.63360778909794, -3.42285993586989, -3.02592822360864,
-2.80491835054881, -3.1610709896381, -2.69912996631718, -2.48975331263934,
-0.134524884114962, -2.3485759078928, -1.67019370390805, -1.30630530826772,
-2.2627030307026, -1.19967822767006, -2.18902328617136, -2.32822018421121,
-1.05335780233708, -2.32765305050142, -3.70136681094428, -2.47624061269887,
-2.2395891355029, -0.873612387550348, -2.52750186765166, -1.58254587448088,
-1.3519682697086, -2.67716755653968, -2.09120993997918, -2.83947106437091,
-1.59227436938979, -2.70393468772428, -3.07475393381032, -1.72535933812472,
-2.62864985613023, -2.1788856069182, -1.66072722296379, -2.02593106477748,
-0.236862069023111, -2.20046381510765, 2.67747589830398, 2.03103654671807,
-0.411843127888723, 0.15392859458, 3.15264600488878, -0.115883494946465,
-0.115883494946465, -2.48408112888983, -2.13179786204659, -2.13179786204659,
-0.421916196665926, -1.81454302259545, -1.81454302259545, -0.719344207794365,
2.30623888786222, 2.30623888786222, 0.233349485130917, 0.807655736612547,
0.807655736612547, 0.00810498434400109, 1.73561337499853, 1.73561337499853,
2.05294933680988, 2.52332617911213, 2.52332617911213, 1.6590362509139,
2.44897469036036, 2.44897469036036, 1.48162277916561, 0.109012820753664,
0.109012820753664, -0.552382527186447, 0.342735574558364, 0.342735574558364,
0.352860787128766, 0.352860787128766, 0.352860787128766, 0.352860787128766,
0.726520452040748, 0.726520452040748, 0.176144461112964, 2.28171712015304,
2.28171712015304, 0.256037205994603, 0.10686264754173, 0.10686264754173,
0.10686264754173, 0.10686264754173, 0.10686264754173, -0.871047910469186,
-1.61892724112359, -1.61892724112359, -2.04847571973674, -2.04847571973674,
-2.04847571973674, 1.22730660297267, 1.94291846403141, 1.94291846403141,
2.64766715573213, 1.66439852581802, 1.66439852581802, 3.92242045719081,
2.92445832371034, 2.92445832371034, 4.09796304281725)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -216L))
word = ts(df$Fit, start = c(2002, 1), end = c(2019, 12), frequency = 12)
breaks = breakpoints(df$Fit ~ 1, h = 0.1)
plot(breaks)
coef(breaks, breaks = 2)
word[breaks$breakpoints] # July 2014
plot(df[,2], type = "b")
lines(fitted(breaks), col = 4, lwd = 2)
abline(v = df[breaks$breakpoints], lty = 2)
Can anyone help me sort this out?
Thanks a lot!
You created a timeseries object but you did not use it:
plot(word)
breaks <- breakpoints(word ~ 1, h = 0.1)
lines(fitted(breaks), col="blue", lwd=2)
abline(v = time(word)[breaks$breakpoints], lty = 2)
If I remember correctly, for irregularly spaced intervals you use zoo, you can try something like below:
library(strucchange)
library(zoo)
breaks = breakpoints(df$Fit ~ 1, h = 0.1)
zoo_df = as.zoo(df[,2])
time(zoo_df) = df[,1]
plot(zoo_df, type = "b")
abline(v = time(zoo_df)[breaks$breakpoints], lty = 2)
This gets you the plot with date on x-axis. Now to get the x-axis values, you do time(zoo_df) and it's a matter of subsetting this according to the index. For the fitted line, if you don't want the lines to join, you need to split them into breakpoints + 1 groups, and draw each one separately:
#group your breaks
grps = cut(1:length(zoo_df),
breaks=c(0,breaks$breakpoints,+Inf),
labels=1:(length(breaks$breakpoints)+1))
for(i in unique(grps)){
lines(time(zoo_df)[grps==i],fitted(breaks)[grps==i])
}

ggplot and two different geom_line(): the legend does not appear

I have the following code (dput data for the data sets is here):
ruz <- structure(list(date = structure(c(16617, 16618, 16619, 16622,
16623, 16624, 16625, 16626, 16629, 16630, 16631, 16632, 16633,
16636, 16637, 16638, 16639, 16640, 16643, 16644, 16645, 16646,
16647, 16650, 16651, 16652, 16653, 16654, 16657, 16658, 16659,
16660, 16661, 16664, 16665, 16666, 16667, 16668, 16671, 16672,
16673, 16674, 16675, 16678, 16679, 16680, 16681, 16682, 16685,
16686, 16687, 16688, 16689, 16692, 16693, 16694, 16695, 16696,
16699, 16700, 16701, 16702, 16703, 16706, 16707, 16708, 16709,
16710, 16713, 16714, 16715, 16716, 16717, 16720, 16721, 16722,
16723, 16724, 16727, 16728, 16729, 16730, 16731, 16734, 16735,
16736, 16737, 16738, 16741, 16742, 16743, 16744, 16745, 16748,
16749, 16750, 16751, 16752, 16755, 16756, 16757, 16758, 16759,
16762, 16763, 16764, 16765, 16766, 16769), class = "Date"), val1 = c(61.8954,
61.6297, 61.7859, 62.2135, 62.692, 63.026, 63.1511, 63.008, 62.7991,
62.5304, 62.3971, 62.1703, 61.9535, 61.7927, 61.8367, 62.1856,
62.7663, 63.5846, 64.859, 66.0745, 65.9327, 65.1387, 65.8362,
67.9171, 68.8917, 68.7714, 69.295, 69.9932, 70.0878, 70.0563,
71.0985, 71.7451, 71.9923, 72.3836, 72.6186, 72.7895, 74.1316,
76.3577, 79.6818, 80.4601, 79.637, 77.1905, 74.7982, 74.0868,
73.6844, 74.7815, 75.1829, 75.0874, 76.0362, 76.5334, 76.1729,
76.2661, 76.521, 76.5815, 76.1411, 74.7473, 74.229, 74.8073,
74.8083, 74.2189, 73.7976, 74.0765, 73.7323, 73.5319, 73.8853,
73.7351, 73.2462, 73.7254, 73.4657, 72.5227, 70.9683, 70.1357,
69.7459, 69.7823, 70.714, 71.5863, 71.3391, 70.2717, 70.1001,
70.3965, 70.964, 70.901, 69.6083, 69.0542, 70.325, 71.2619, 70.6912,
70.5258, 70.6195, 69.9786, 68.9845, 68.7403, 69.5909, 69.6324,
69.2801, 69.3884, 70.4129, 71.6024, 70.7705, 69.6673, 69.2706,
69.2517, 69.2788, 69.3983, 69.7819, 69.8404, 69.8002, 69.9816,
70.1287)), .Names = c("date", "val1"), row.names = c("2015-07-01",
"2015-07-02", "2015-07-03", "2015-07-06", "2015-07-07", "2015-07-08",
"2015-07-09", "2015-07-10", "2015-07-13", "2015-07-14", "2015-07-15",
"2015-07-16", "2015-07-17", "2015-07-20", "2015-07-21", "2015-07-22",
"2015-07-23", "2015-07-24", "2015-07-27", "2015-07-28", "2015-07-29",
"2015-07-30", "2015-07-31", "2015-08-03", "2015-08-04", "2015-08-05",
"2015-08-06", "2015-08-07", "2015-08-10", "2015-08-11", "2015-08-12",
"2015-08-13", "2015-08-14", "2015-08-17", "2015-08-18", "2015-08-19",
"2015-08-20", "2015-08-21", "2015-08-24", "2015-08-25", "2015-08-26",
"2015-08-27", "2015-08-28", "2015-08-31", "2015-09-01", "2015-09-02",
"2015-09-03", "2015-09-04", "2015-09-07", "2015-09-08", "2015-09-09",
"2015-09-10", "2015-09-11", "2015-09-14", "2015-09-15", "2015-09-16",
"2015-09-17", "2015-09-18", "2015-09-21", "2015-09-22", "2015-09-23",
"2015-09-24", "2015-09-25", "2015-09-28", "2015-09-29", "2015-09-30",
"2015-10-01", "2015-10-02", "2015-10-05", "2015-10-06", "2015-10-07",
"2015-10-08", "2015-10-09", "2015-10-12", "2015-10-13", "2015-10-14",
"2015-10-15", "2015-10-16", "2015-10-19", "2015-10-20", "2015-10-21",
"2015-10-22", "2015-10-23", "2015-10-26", "2015-10-27", "2015-10-28",
"2015-10-29", "2015-10-30", "2015-11-02", "2015-11-03", "2015-11-04",
"2015-11-05", "2015-11-06", "2015-11-09", "2015-11-10", "2015-11-11",
"2015-11-12", "2015-11-13", "2015-11-16", "2015-11-17", "2015-11-18",
"2015-11-19", "2015-11-20", "2015-11-23", "2015-11-24", "2015-11-25",
"2015-11-26", "2015-11-27", "2015-11-30"), class = "data.frame")
dfr <- structure(list(date = structure(c(16616, 16646, 16677, 16708,
16738, 16769), class = "Date"), val2 = c(0, 0.0920000000000001,
0.120392, 0.136077488, 0.0917704659680001, 0.0874033841041282
)), .Names = c("date", "val2"), row.names = c("2015-06", "2015-07",
"2015-08", "2015-09", "2015-10", "2015-11"), class = "data.frame")
ggplot:
ggplot() +
geom_line(data = ruz, aes(date, val1), size = 1.5, color = "blue") +
geom_line(data = dfr, aes(date, val2 * 100), size = 1.5, color = "red") +
scale_fill_manual(values = c("blue", "red"))
which produces the following graph:
My question is, how to modify it to show a legend after all?
If you want to avoid combining the data.frames, you can do this:
ggplot() +
geom_line(data = ruz, aes(date, val1, color = "a"), size = 1.5) +
geom_line(data = dfr, aes(date, val2 * 100, color = "b"), size = 1.5) +
scale_color_manual(name = "Colors",
values = c("a" = "blue", "b" = "red"))
In order to get a legend, you have to map something to color within aes. You can then use scale_color_manual to define the colors for the mapped character values. There are situations where this trick is easier and results in more readable code then reshaping/combining data.
You could rbind them and use color
ruz$type <- "ruz"
dfr$val2 <- dfr$val2 * 100
dfr$type <- "dfr"
names(ruz) <- names(dfr)
df <- rbind(ruz, dfr)
ggplot(df, aes(date, val2, color = type), size = 1.5) + geom_line()

Resources