Merge multiple xts objects with matching or nearest dates - r

I have two xts files with daily data (The data is only one date for a month).
The first file is this: - The dates in this xts are typically end of the month trading dates in a given month.
structure(c(-0.0329199999999997, 0.0874901766141374, 0.0545883292605231,
0.0687945180777207, 0.0550784545301166, 0.074678777314922, -0.0866534235058661,
0.161206236457536, 0.0704023794825748, 0.074691325661258), class = c("xts",
"zoo"), ret_type = "discrete", coredata_content = "discreteReturn", index = structure(c(1114732800,
1117497600, 1120089600, 1122595200, 1125446400, 1128038400, 1130716800,
1133308800, 1135900800, 1138665600), tzone = "UTC", tclass = "Date"), dim = c(10L,
1L), dimnames = list(NULL, "xts_left"))
The second xts file is:
structure(c(0.0052512320343876, 0.00540733325225928, 0.00580017750416384,
0.005701283061746, 0.00556285472234541, 0.00561113650865441,
0.00580424365658105, 0.005816988308881, 0.00571552920344676,
0.00574088497469671, 0.00574737930337577, 0.00589584054618375,
0.00592325487612455), class = c("xts", "zoo"), .CLASS = "double", index = structure(c(1107216000,
1109635200, 1112313600, 1114905600, 1117584000, 1120176000, 1122854400,
1125532800, 1128124800, 1130803200, 1133395200, 1136073600, 1138752000
), tzone = "UTC", tclass = "Date"), dim = c(13L, 1L))
This is how I want the output of the merge: To pick the value from right xts that corresponds to the closest date value in the left xts. For example, the value on 29-04-2005 be matched with the nearest, i.e. 01-05-2005 (dd-mm-yyyy format).
I have seen a possible way to do this using data.table with rolling joins, but I wanted to know if there is a way to do this within the xts (or similar) framework.

Using x1 and x2 in the Note at the end define near which given a date, tt, finds the nearest date in x2 and returns the corresponding data value. Then apply that to each date in x1.
near <- function(tt) x2[which.min(abs(time(x2) - tt))]
x12 <- transform(x1, xts_right = sapply(time(x1), near)); x12
giving:
xts_left xts_right
2005-04-29 -0.03292000 0.005701283
2005-05-31 0.08749018 0.005562855
2005-06-30 0.05458833 0.005611137
2005-07-29 0.06879452 0.005804244
2005-08-31 0.05507845 0.005816988
2005-09-30 0.07467878 0.005715529
2005-10-31 -0.08665342 0.005740885
2005-11-30 0.16120624 0.005747379
2005-12-30 0.07040238 0.005895841
2006-01-31 0.07469133 0.005923255
In the example shown in the question the nearest x2 is always at a strictly later date than x1 and x2 starts before x1. If those are general features of the problem it could alternately be expressed as:
transform(x1, xts_right = coredata(x2)[findInterval(time(x1), time(x2)) + 1])
Note
x1 <-
structure(c(-0.0329199999999997, 0.0874901766141374, 0.0545883292605231,
0.0687945180777207, 0.0550784545301166, 0.074678777314922, -0.0866534235058661,
0.161206236457536, 0.0704023794825748, 0.074691325661258), .Dim = c(10L,
1L), class = c("xts", "zoo"), ret_type = "discrete",
coredata_content = "discreteReturn", index = structure(c(1114732800,
1117497600, 1120089600, 1122595200, 1125446400, 1128038400, 1130716800,
1133308800, 1135900800, 1138665600), tzone = "UTC", tclass = "Date"),
.Dimnames = list(NULL, "xts_left"))
x2 <-
structure(c(0.0052512320343876, 0.00540733325225928, 0.00580017750416384,
0.005701283061746, 0.00556285472234541, 0.00561113650865441,
0.00580424365658105, 0.005816988308881, 0.00571552920344676,
0.00574088497469671, 0.00574737930337577, 0.00589584054618375,
0.00592325487612455), .Dim = c(13L, 1L), class = c("xts", "zoo"
), .CLASS = "double", index = structure(c(1107216000, 1109635200,
1112313600, 1114905600, 1117584000, 1120176000, 1122854400, 1125532800,
1128124800, 1130803200, 1133395200, 1136073600, 1138752000),
tzone = "UTC", tclass = "Date"))

Related

time average for specific time range in r

I am trying to extract average values of all variables between 0 to 40 minutes every hour.
dput(head(df))
structure(list(DateTime = structure(c(1563467460, 1563468060,
1563468660, 1563469260, 1563469860, 1563470460), class = c("POSIXct",
"POSIXt"), tzone = "GMT"), date = structure(c(1563467460, 1563468060,
1563468660, 1563469260, 1563469860, 1563470460), class = c("POSIXct",
"POSIXt"), tzone = "GMT"), Date = structure(c(18095, 18095, 18095,
18095, 18095, 18095), class = "Date"), TimeCtr = structure(c(1563467460,
1563468060, 1563468660, 1563469260, 1563469860, 1563470460), class = c("POSIXct",
"POSIXt"), tzone = "GMT"), MassConc = c(0.397627, 0.539531, 0.571902,
0.608715, 0.670382, 0.835773), VolConc = c(175.038, 160.534,
174.386, 183.004, 191.074, 174.468), NumbConc = c(234.456, 326.186,
335.653, 348.996, 376.018, 488.279), MassD = c(101.426, 102.462,
101.645, 102.145, 101.255, 101.433)), .Names = c("DateTime",
"date", "Date", "TimeCtr", "MassConc", "VolConc", "NumbConc",
"MassD"), row.names = c(NA, 6L), class = "data.frame")
What I've tried so far..
hourly_mean<-mydata %>%
filter(between(as.numeric(format(DateTime, "%M")), 0, 40)) %>%
group_by(DateTime=format(DateTime, "%Y-%m-%d %H")) %>%
summarise(variable1_mean=mean(variable1))
But it gives me a single average value for the whole period. Any help is very much welcomed.
We can convert DateTime , use ceiling_date with hourly unit to round Datetime, extract minutes from DateTime and filter to keep minutes which are less than 40, group_by hour and take mean of values.
library(lubridate)
library(dplyr)
df %>%
dplyr::mutate(DateTime = ymd_hm(DateTime),
hour = ceiling_date(DateTime, "hour"),
minutes = minute(DateTime)) %>%
filter(minutes <= 40) %>%
group_by(hour) %>%
summarise_at(vars(ends_with("Conc")), mean)
data
df <- structure(list(DateTime = structure(1:7, .Label = c("2019-08-0810:07",
"2019-08-0810:17", "2019-08-0810:27", "2019-08-0810:37", "2019-08-0810:47",
"2019-08-0810:57", "2019-08-0811:07"), class = "factor"), MassConc = c(0.556398,
1.06868, 0.777654, 0.87289, 0.789704, 0.51948, 0.416676), NumbConc = c(588.069,
984.018, 964.634, 997.678, 1013.52, 924.271, 916.357), VolConc = c(582.887,
979.685, 963.3, 994.178, 1009.52, 922.104, 916.856), Conc = c(281.665,
486.176, 420.058, 422.101, 429.841, 346.539, 330.282)), class =
"data.frame", row.names = c(NA, -7L))

Merging xts in R - Converting Characters to NA

I have 3 xts objects
logged <- xts::xts(x = loggedInUsers$loggedInUsers, order.by = Sys.time())
loadValue <- xts::xts(x = loadAvg, order.by = Sys.time())
hostname <- xts::xts(x = loadHost, order.by = Sys.time())
dput(hostname)
dput(loadValue)
dput(logged)
dput gives the following result
structure("deliverforgoodportal", .Dim = c(1L, 1L), index = structure(1551088127.27724, tzone = "", tclass = c("POSIXct",
"POSIXt")), class = c("xts", "zoo"), .indexCLASS = c("POSIXct",
"POSIXt"), tclass = c("POSIXct", "POSIXt"), .indexTZ = "", tzone = "")
structure(0, .Dim = c(1L, 1L), .Dimnames = list(NULL, "load"), index = structure(1551088127.27676, tzone = "", tclass = c("POSIXct",
"POSIXt")), .indexCLASS = c("POSIXct", "POSIXt"), tclass = c("POSIXct",
"POSIXt"), .indexTZ = "", tzone = "", class = c("xts", "zoo"))
structure(1, .Dim = c(1L, 1L), index = structure(1551088127.27637, tzone = "", tclass = c("POSIXct",
"POSIXt")), class = c("xts", "zoo"), .indexCLASS = c("POSIXct",
"POSIXt"), tclass = c("POSIXct", "POSIXt"), .indexTZ = "", tzone = "")
When I am merging this three and printing the hostname is converted to NA
tmp <- merge.xts(hostname, logged, loadValue, all = TRUE)
print(tmp)
Output is: (hostname is NA)
hostname logged load
2019-02-25 09:48:47 NA 1 NA
2019-02-25 09:48:47 NA NA 0
2019-02-25 09:48:47 NA NA NA
Why is this coming as NA?
You should realise that an xts object is a timeseries and a matrix. Now a matrix can only contain one type of values, either character or numeric. But not both. Your merge is trying to combine a character value matrix (hostname) with numeric values (logged and load). This results in the hostname values being coerced to NA.
If you want to join this data, you have to use a data.frame (or data.table). Also note that your time values are not equal, they are of by milliseconds. So if you want to join on minutes, first use floor_date from the lubridate package. See below two examples with and without lubridate. I use the package timetk to convert the xts objects to a tibble, but depending on your source data that might not be necessary.
with full_join, no lubridate
library(timetk)
library(dplyr)
hostname <- tk_tbl(hostname)
loadValue <- tk_tbl(loadValue)
logged <- tk_tbl(logged)
hostname %>%
full_join(loadValue) %>%
full_join(logged,
by = "index",
suffix = c("_hostname", "_logged"))
Joining, by = "index"
# A tibble: 3 x 4
index value_hostname load value_logged
<dttm> <chr> <dbl> <dbl>
1 2019-02-25 10:48:47 deliverforgoodportal NA NA
2 2019-02-25 10:48:47 NA 0 NA
3 2019-02-25 10:48:47 NA NA 1
with lubridate and left join:
hostname %>%
mutate(index = lubridate::floor_date(index, unit = "seconds")) %>%
left_join(loadValue %>% mutate(index = lubridate::floor_date(index, unit = "seconds"))) %>%
left_join(logged %>% mutate(index = lubridate::floor_date(index, unit = "seconds")),
by = "index",
suffix = c("_hostname", "_logged"))
Joining, by = "index"
# A tibble: 1 x 4
index value_hostname load value_logged
<dttm> <chr> <dbl> <dbl>
1 2019-02-25 10:48:47 deliverforgoodportal 0 1

Legend for plot.zoo

The code below uses plot.zoo function to plot two variables (in xts format) on the same panel with different colors. I need to generate a legend for variables, showing variable name and associated color (to distinguish one from the other). I could not find any examples. How can this be achieved?
library(xts)
XTS1 <- structure(c(0.228369333217119, 0.228289904084397, 0.228447230658951, 0.228791576808238, 0.230747528582736, 0.231930951391005, 0.23218095279688, 0.232655671733178, 0.232780332790686, 0.232737419368931, 0.232662430253128, 0.232742568778743, 0.233149068581523, 0.233801848401534, 0.233772492034511, 0.234624948258082, 0.23483556890299, 0.234796321794256, 0.23604671397228, 0.236133475815726, 0.236165665606426, 0.236028593613328, 0.23653404809854, 0.236405363926972, 0.236289501951192, 0.23595748354442, 0.235856148573546, 0.236430492268466, 0.236509873541663, 0.236320952959145, 0.236324663117125, 0.236230849130264, 0.236256142711785, 0.236443262637705, 0.236474635240435, 0.236478152698594, 0.236446010557742, 0.236433585738065, 0.236318823709326, 0.236343709733496, 0.236353484497234, 0.236293047785645, 0.236299398621113, 0.23632602130195, 0.236415592036867, 0.23644183016732, 0.236410860513062, 0.236407738860613, 0.236488620207848, 0.236521885178186, 0.236524525976254, 0.236543261190294, 0.236545852954505, 0.236554043549182, 0.236558510962661, 0.236500857781485, 0.236456811183667, 0.236429475415132, 0.23640238691203, 0.236406136067832, 0.236417470442709, 0.236338677460503, 0.236460106937516, 0.236499259900878, 0.236586474063617, 0.236543271644404, 0.236513672740977, 0.236369639034221, 0.236362697687712, 0.236797843111073, 0.237345016578261, 0.23783668813706, 0.237923216897962, 0.238248672738757, 0.23827277432708, 0.238353796658431, 0.238432929684165, 0.238426329912625, 0.238345183139418, 0.238458037645863, 0.238479196815153, 0.238658887053061, 0.238465939669501, 0.238523649852224, 0.238510811967291, 0.238618072611544, 0.238704138338676, 0.238707447338575, 0.23996516914318, 0.239424589962875, 0.239561518553478, 0.239421256904757, 0.239816852104079, 0.239445276547969, 0.239609107802299, 0.240012364436436, 0.24002458654273, 0.240130590848021, 0.239736081751086, 0.239857898122929, 0.249230991607954, 0.239805445609213, 0.220333558462471, 0.210410600760776, 0.210378078591145, 0.240399004222477, 0.239804814201628, 0.240033060931268, 0.240089397482534, 0.240197041264942, 0.239940687229403, 0.239983219836939, 0.240022917769706, 0.240337756345468, 0.240638638953238, 0.240145924499555, 0.240402820873626, 0.240634154733532, 0.240611140050359, 0.240534865617682, 0.240951950137048, 0.241127845406939, 0.241125700147753, 0.241126235785769, 0.24107570794763, 0.241088551839332, 0.241092988182315, 0.241086472173767, 0.241083390403282, 0.241091888348645, 0.241083158087253, 0.241064751836135, 0.241064210485496, 0.241091022867438, 0.240953810187424, 0.240481188224338, 0.240621074487305, 0.240541023392083, 0.240396295416436, 0.240434646218793, 0.240610526667198, 0.240808207542551, 0.240631718256586, 0.24064699063015, 0.24068954416015, 0.240711128194114, 0.240723728654195, 0.240775499747258, 0.24076580240029, 0.240772143433637, 0.240793451595961, 0.240796153528682, 0.240819395590463, 0.240807928484687, 0.241070198676456, 0.241015031342511, 0.240992277437459, 0.241067975400449, 0.241065024303712, 0.241163875314065, 0.241210015023487, 0.24111104460957, 0.241143389170502, 0.241126406233165, 0.241236385470386, 0.241216017627354, 0.241242224901225, 0.241204855537627, 0.241161113202215, 0.241188009018793, 0.241155655393916, 0.241104796660299, 0.241229168370518, 0.241187912977691, 0.241205571324508, 0.241275488693839, 0.241201890756208, 0.241062150030789, 0.241056667454678, 0.241020382598737, 0.241011174472952, 0.241075153997628, 0.240860609085096, 0.241671867834864, 0.242222875244792, 0.242105277990476, 0.242789197437053, 0.24262651942461, 0.242835209836078, 0.242065722773181, 0.242128191776504, 0.241866750527435, 0.242222755781399, 0.242405920724485, 0.242704642796114, 0.243681731065406, 0.243823985845211, 0.244075416453679, 0.244168166839201), .indexCLASS = c("POSIXct", "POSIXt"), .indexTZ = "", tclass = c("POSIXct", "POSIXt"), tzone = "", class = c("xts", "zoo"), .CLASS = structure("double", class = "CLASS"), formattable = structure(list(
formatter = "formatC", format = structure(list(format = "f", digits = 2), .Names = c("format", "digits")), preproc = "percent_preproc", postproc = "percent_postproc"), .Names = c("formatter", "format", "preproc", "postproc")), index = structure(c(1413981900, 1413982800, 1413983700, 1413984600, 1413985500, 1413986400, 1413987300, 1413988200, 1413989100, 1413990000, 1413990900, 1413991800, 1413992700, 1413993600, 1413994500, 1413995400, 1413996300, 1413997200, 1413998100, 1413999000, 1413999900, 1414000800, 1414001700, 1414002600, 1414003500, 1414004400, 1414005300, 1414006200, 1414007100, 1414008000, 1414009800, 1414010700, 1414011600, 1414015200, 1414016100, 1414017000, 1414017900, 1414018800, 1414019700, 1414020600, 1414021500, 1414022400, 1414023300, 1414024200, 1414025100, 1414026000, 1414026900, 1414027800, 1414028700, 1414029600, 1414030500, 1414031400, 1414032300, 1414033200, 1414034100, 1414035000, 1414035900, 1414036800, 1414037700, 1414038600, 1414039500, 1414040400, 1414041300, 1414042200, 1414043100, 1414044000, 1414044900, 1414045800, 1414046700, 1414047600, 1414048500, 1414049400, 1414050300, 1414051200, 1414052100, 1414053000, 1414053900, 1414054800, 1414055700, 1414056600, 1414057500, 1414058400, 1414059300, 1414060200, 1414061100, 1414062000, 1414062900, 1414063800, 1414064700, 1414065600, 1414066500, 1414067400, 1414068300, 1414069200, 1414070100, 1414071000, 1414071900, 1414072800, 1414073700, 1414074600, 1414075500, 1414076400, 1414077300, 1414078200, 1414079100, 1414080000, 1414080900, 1414081800, 1414082700, 1414083600, 1414084500, 1414085400, 1414086300, 1414087200, 1414088100, 1414089000, 1414089900, 1414090800, 1414091700, 1414092600, 1414093500, 1414094400, 1414096200, 1414097100, 1414098000, 1414101600, 1414102500, 1414103400, 1414104300, 1414105200, 1414106100, 1414107000, 1414107900, 1414108800, 1414109700, 1414110600, 1414111500, 1414112400, 1414113300, 1414114200, 1414115100, 1414116000, 1414116900, 1414117800, 1414118700, 1414119600, 1414120500, 1414121400, 1414122300, 1414123200, 1414124100, 1414125000, 1414125900, 1414126800, 1414127700, 1414128600, 1414129500, 1414130400, 1414131300, 1414132200, 1414133100, 1414134000, 1414134900, 1414135800, 1414136700, 1414137600, 1414138500, 1414139400, 1414140300, 1414141200, 1414142100, 1414143000, 1414143900, 1414144800, 1414145700, 1414146600, 1414147500, 1414148400, 1414149300, 1414150200, 1414151100, 1414152000, 1414152900, 1414153800, 1414154700, 1414155600, 1414156500, 1414157400, 1414158300, 1414159200, 1414160100, 1414161000, 1414161900, 1414162800, 1414163700, 1414164600, 1414165500, 1414166400, 1414167300), tzone = "", tclass = c("POSIXct", "POSIXt")), .Dim = c(199L, 1L))
XTS2 <- structure(c(0.238369333217119, 0.238289904084397, 0.238447230658951, 0.238791576808238, 0.230747528582736, 0.231930951391005, 0.23218095279688, 0.232655671733178, 0.232780332790686, 0.232737419368931, 0.232662430253128, 0.232742568778743, 0.233149068581523, 0.233801848401534, 0.233772492034511, 0.234624948258082, 0.23483556890299, 0.234796321794256, 0.23604671397238, 0.236133475815726, 0.236165665606426, 0.236028593613328, 0.23653404809854, 0.236405363926972, 0.236289501951192, 0.23595748354442, 0.235856148573546, 0.236430492368466, 0.236509873541663, 0.236320952959145, 0.236324663117125, 0.236230849130264, 0.236256142711785, 0.236443262637705, 0.236474635240435, 0.236478152698594, 0.236446010557742, 0.236433585738065, 0.236318823709326, 0.236343709733496, 0.236353484497234, 0.236293047785645, 0.236299398621113, 0.23632602130195, 0.236415592036867, 0.23644183016732, 0.236410860513062, 0.236407738860613, 0.236488620207848, 0.236521885178186, 0.236524525976254, 0.236543261190294, 0.236545852954505, 0.236554043549182, 0.236558510962661, 0.236500857781485, 0.236456811183667, 0.236429475415132, 0.23640238691203, 0.236406136067832, 0.236417470442709, 0.236338677460503, 0.236460106937516, 0.236499259900878, 0.236586474063617, 0.236543271644404, 0.216513672740977, 0.216369639034221, 0.216362697687712, 0.216797843111073, 0.217345016578261, 0.21783668813706, 0.217921216897962, 0.218248672738757, 0.21827277432708, 0.218353796658431, 0.218432929684165, 0.218426329912625, 0.218345183139418, 0.218458037645863, 0.218479196815153, 0.238658887053061, 0.238465939669501, 0.238523649852224, 0.238510811967291, 0.238618072611544, 0.238704138338676, 0.238707447338575, 0.23996516914318, 0.239424589962875, 0.239561518553478, 0.239421256904757, 0.239816852104079, 0.239445276547969, 0.239609107802299, 0.240012364436436, 0.24002458654273, 0.240130590848021, 0.239736081751086, 0.239857898122929, 0.239230991607954, 0.239805445609213, 0.240333558462471, 0.240410600760776, 0.240378078591145, 0.240399004222477, 0.239804814201628, 0.240033060931268, 0.240089397482534, 0.240197041264942, 0.239940687229403, 0.239983219836939, 0.240022917769706, 0.240337756345468, 0.240638638953238, 0.240145924499555, 0.240402820873626, 0.240634154733532, 0.240611140050359, 0.240534865617682, 0.240951950137048, 0.241127845406939, 0.241125700147753, 0.241126235785769, 0.24107570794763, 0.241088551839332, 0.241092988182315, 0.241086472173767, 0.241083390403282, 0.241091888348645, 0.241083158087253, 0.241064751836135, 0.241064210485496, 0.241091022867438, 0.240953810187424, 0.240481188224338, 0.240621074487305, 0.240541023392083, 0.240396295416436, 0.240434646218793, 0.240610526667198, 0.240808207542551, 0.240631718256586, 0.24064699063015, 0.24068954416015, 0.240711128194114, 0.240723728654195, 0.240775499747258, 0.24076580240029, 0.240772143433637, 0.240793451595961, 0.240796153528682, 0.240819395590463, 0.240807928484687, 0.241070198676456, 0.241015031342511, 0.240992277437459, 0.241067975400449, 0.241065024303712, 0.241163875314065, 0.241210015023487, 0.24111104460957, 0.241143389170502, 0.241126406233165, 0.241236385470386, 0.241216017627354, 0.241242224901225, 0.241204855537627, 0.241161113202215, 0.241188009018793, 0.241155655393916, 0.241104796660299, 0.241229168370518, 0.241187912977691, 0.241205571324508, 0.241275488693839, 0.241201890756208, 0.241062150030789, 0.241056667454678, 0.241020382598737, 0.241011174472952, 0.241075153997628, 0.240860609085096, 0.241671867834864, 0.242222875244792, 0.242105277990476, 0.242789197437053, 0.24262651942461, 0.242835209836078, 0.242065722773181, 0.242128191776504, 0.241866750527435, 0.242222755781399, 0.242405920724485, 0.242704642796114, 0.243681731065406, 0.243823985845211, 0.244075416453679, 0.244168166839201), .indexCLASS = c("POSIXct", "POSIXt"), .indexTZ = "", tclass = c("POSIXct", "POSIXt"), tzone = "", class = c("xts", "zoo"), .CLASS = structure("double", class = "CLASS"), formattable = structure(list(
formatter = "formatC", format = structure(list(format = "f", digits = 2), .Names = c("format", "digits")), preproc = "percent_preproc", postproc = "percent_postproc"), .Names = c("formatter", "format", "preproc", "postproc")), index = structure(c(1413981900, 1413982800, 1413983700, 1413984600, 1413985500, 1413986400, 1413987300, 1413988200, 1413989100, 1413990000, 1413990900, 1413991800, 1413992700, 1413993600, 1413994500, 1413995400, 1413996300, 1413997200, 1413998100, 1413999000, 1413999900, 1414000800, 1414001700, 1414002600, 1414003500, 1414004400, 1414005300, 1414006200, 1414007100, 1414008000, 1414009800, 1414010700, 1414011600, 1414015200, 1414016100, 1414017000, 1414017900, 1414018800, 1414019700, 1414020600, 1414021500, 1414022400, 1414023300, 1414024200, 1414025100, 1414026000, 1414026900, 1414027800, 1414028700, 1414029600, 1414030500, 1414031400, 1414032300, 1414033200, 1414034100, 1414035000, 1414035900, 1414036800, 1414037700, 1414038600, 1414039500, 1414040400, 1414041300, 1414042200, 1414043100, 1414044000, 1414044900, 1414045800, 1414046700, 1414047600, 1414048500, 1414049400, 1414050300, 1414051200, 1414052100, 1414053000, 1414053900, 1414054800, 1414055700, 1414056600, 1414057500, 1414058400, 1414059300, 1414060200, 1414061100, 1414062000, 1414062900, 1414063800, 1414064700, 1414065600, 1414066500, 1414067400, 1414068300, 1414069200, 1414070100, 1414071000, 1414071900, 1414072800, 1414073700, 1414074600, 1414075500, 1414076400, 1414077300, 1414078200, 1414079100, 1414080000, 1414080900, 1414081800, 1414082700, 1414083600, 1414084500, 1414085400, 1414086300, 1414087200, 1414088100, 1414089000, 1414089900, 1414090800, 1414091700, 1414092600, 1414093500, 1414094400, 1414096200, 1414097100, 1414098000, 1414101600, 1414102500, 1414103400, 1414104300, 1414105200, 1414106100, 1414107000, 1414107900, 1414108800, 1414109700, 1414110600, 1414111500, 1414112400, 1414113300, 1414114200, 1414115100, 1414116000, 1414116900, 1414117800, 1414118700, 1414119600, 1414120500, 1414121400, 1414122300, 1414123200, 1414124100, 1414125000, 1414125900, 1414126800, 1414127700, 1414128600, 1414129500, 1414130400, 1414131300, 1414132200, 1414133100, 1414134000, 1414134900, 1414135800, 1414136700, 1414137600, 1414138500, 1414139400, 1414140300, 1414141200, 1414142100, 1414143000, 1414143900, 1414144800, 1414145700, 1414146600, 1414147500, 1414148400, 1414149300, 1414150200, 1414151100, 1414152000, 1414152900, 1414153800, 1414154700, 1414155600, 1414156500, 1414157400, 1414158300, 1414159200, 1414160100, 1414161000, 1414161900, 1414162800, 1414163700, 1414164600, 1414165500, 1414166400, 1414167300), tzone = "", tclass = c("POSIXct", "POSIXt")), .Dim = c(199L, 1L))
z <- cbind(XTS1, XTS2)
colnames(z) <- c("XTS1", "XTS2")
plot.zoo(z, plot.type = "single", col = 1:2, xy.labels = "text")
Please note I need to use plot.zoo rather than plot.
Solution was provided in one of the comments. Here's full solution plus some explanatory notes:
z <- cbind(XTS1, XTS2)
colnames(z) <- c("XTS1", "XTS2")
plot.zoo(z, plot.type = "single", col = 1:2)
legend("topright", inset=c(0,0), y.intersp = 1, legend = c("XTS1", "XTS2"), lty = 1, bty = "n", col = c(1,2), cex = .5)
#cex: font size
#col: color
#bty: box around legends
#pch: symbol for legends (eg pch = c(1,2) => circle, triangle)
#lty: lines for legends (eg: lty = c(1,2) => solid, dash)
#y.intersp: vert interspacing (wout it legends may be far apart from each other)
#inset: for adjusting pos relative to eg "topright" (useful for xts as x coord in posixct format)

Change data to numeric type to determine which distribution fits better

I am trying to figure out which distribution fits best logarithmic stock returns. Here is my code:
library(TTR)
sign="^GSPC"
start=19900101
end=20160101
x <- getYahooData(sign, start = start, end = end, freq = "daily")
x$logret <- log(x$Close) - lag(log(x$Close))
x=x[,6]
I want to use the function descdist(x, discrete = FALSE) which I got from this amazing post https://stats.stackexchange.com/questions/132652/how-to-determine-which-distribution-fits-my-data-best Nonetheless r gives me this error: Error in descdist(x, discrete = FALSE) : data must be a numeric vector How do I transform my data to numeric vector??
The output from dput(head(x)) is:
structure(c(NA, -0.00258888580664607, -0.00865029791190164, -0.00980414107803274,
0.00450431207515223, -0.011856706127011), class = c("xts", "zoo"
), .indexCLASS = "Date", .indexTZ = "UTC", tclass = "Date", tzone = "UTC", index = structure(c(631238400,
631324800, 631411200, 631497600, 631756800, 631843200), tzone = "UTC", tclass = "Date"), .Dim = c(6L,
1L), .Dimnames = list(NULL, "logret"))
Pre-process x using as.numeric(na.omit(x)), or simply run
descdist(as.numeric(na.omit(x)), discrete = FALSE)

How to remove a row from zoo/xts object, given a timestamp

I was happily running with this code:
z=lapply(filename_list, function(fname){
read.zoo(file=fname,header=TRUE,sep = ",",tz = "")
})
xts( do.call(rbind,z) )
until Dirty Data came along with this at the end of one file:
Open High Low Close Volume
2011-09-20 21:00:00 1.370105 1.370105 1.370105 1.370105 1
and this at the start of the next file:
Open High Low Close Volume
2011-09-20 21:00:00 1.370105 1.371045 1.369685 1.3702 2230
So rbind.zoo complains about a duplicate.
I can't use something like:
y <- x[ ! duplicated( index(x) ), ]
as they are in different zoo objects, inside a list. And I cannot use aggregate, as suggested here because they are a list of zoo objects, not one big zoo object. And I can't get one big object 'cos of the duplicates. Catch-22.
So, when the going gets tough, the tough hack together some for loops (excuse the prints and a stop, as this isn't working code yet):
indexes <- do.call("c", unname(lapply(z, index)))
dups=duplicated(indexes)
if(any(dups)){
duplicate_timestamps=indexes[dups]
for(tix in 1:length(duplicate_timestamps)){
t=duplicate_timestamps[tix]
print("We have a duplicate:");print(t)
for(zix in 1:length(z)){
if(t %in% index(z[[zix]])){
print(z[[zix]][t])
if(z[[zix]][t]$Volume==1){
print("-->Deleting this one");
z[[zix]][t]=NULL #<-- PROBLEM
}
}
}
}
stop("There are duplicate bars!!")
}
The bit I've got stumped on is assigning NULL to a zoo row doesn't delete it (Error in NextMethod("[<-") : replacement has length zero). OK, so I do a filter-copy, without the offending item... but I'm tripping up on these:
> z[[zix]][!t,]
Error in Ops.POSIXt(t) : unary '!' not defined for "POSIXt" objects
> z[[zix]][-t,]
Error in `-.POSIXt`(t) : unary '-' is not defined for "POSIXt" objects
P.S. While high-level solutions to my real problem of "duplicates rows across a list of zoo objects" are very welcome, the question here is specifically about how to delete a row from a zoo object given a POSIXt index object.
A small bit of test data:
list(structure(c(1.36864, 1.367045, 1.370105, 1.36928, 1.37039,
1.370105, 1.36604, 1.36676, 1.370105, 1.367065, 1.37009, 1.370105,
5498, 3244, 1), .Dim = c(3L, 5L), .Dimnames = list(NULL, c("Open",
"High", "Low", "Close", "Volume")), index = structure(c(1316512800,
1316516400, 1316520000), class = c("POSIXct", "POSIXt"), tzone = ""), class = "zoo"),
structure(c(1.370105, 1.370115, 1.36913, 1.371045, 1.37023,
1.37075, 1.369685, 1.36847, 1.367885, 1.3702, 1.36917, 1.37061,
2230, 2909, 2782), .Dim = c(3L, 5L), .Dimnames = list(NULL,
c("Open", "High", "Low", "Close", "Volume")), index = structure(c(1316520000,
1316523600, 1316527200), class = c("POSIXct", "POSIXt"), tzone = ""), class = "zoo"))
UPDATE: Thanks to G. Grothendieck for the row-deleting solution. In the actual code I followed the advice of Joshua and GSee to get a list of xts objects instead of a list of zoo objects. So my code became:
z=lapply(filename_list, function(fname){
xts(read.zoo(file=fname,header=TRUE,sep = ",",tz = ""))
})
x=do.call.rbind(z)
(As an aside, please note the call to do.call.rbind. This is because rbind.xts has some serious memory issues. See https://stackoverflow.com/a/12029366/841830 )
Then I remove duplicates as a post-process step:
dups=duplicated(index(x))
if(any(dups)){
duplicate_timestamps=index(x)[dups]
to_delete=x[ (index(x) %in% duplicate_timestamps) & x$Volume<=1]
if(nrow(to_delete)>0){
#Next line says all lines that are not in the duplicate_timestamp group
# OR are in the duplicate timestamps, but have a volume greater than 1.
print("Will delete the volume=1 entry")
x=x[ !(index(x) %in% duplicate_timestamps) | x$Volume>1]
}else{
stop("Duplicate timestamps, and we cannot easily remove them just based on low volume.")
}
}
If z1 and z2 are your zoo objects then to rbind while removing any duplicates in z2:
rbind( z1, z2[ ! time(z2) %in% time(z1) ] )
Regarding deleting points in a zoo object having specified times, the above already illustrates this but in general if tt is a vector of times to delete:
z[ ! time(z) %in% tt ]
or if we knew there were a single element in tt then z[ time(z) != tt ] .
rbind.xts will allow duplicate index values, so it could work if you convert to xts first.
x <- lapply(z, as.xts)
y <- do.call(rbind, x)
# keep last value of any duplicates
y <- y[!duplicated(index(y),fromLast=TRUE),]
I think you'll have better luck if you convert to xts first.
a <- structure(c(1.370105, 1.370105, 1.370105, 1.370105, 1), .Dim = c(1L,
5L), index = structure(1316570400, tzone = "", tclass = c("POSIXct",
"POSIXt")), .indexCLASS = c("POSIXct", "POSIXt"), tclass = c("POSIXct",
"POSIXt"), .indexTZ = "", tzone = "", .Dimnames = list(NULL,
c("Open", "High", "Low", "Close", "Volume")), class = c("xts",
"zoo"))
b <- structure(c(1.370105, 1.371045, 1.369685, 1.3702, 2230), .Dim = c(1L,
5L), index = structure(1316570400, tzone = "", tclass = c("POSIXct",
"POSIXt")), .indexCLASS = c("POSIXct", "POSIXt"), tclass = c("POSIXct",
"POSIXt"), .indexTZ = "", tzone = "", .Dimnames = list(NULL,
c("Open", "High", "Low", "Close", "Volume")), class = c("xts",
"zoo"))
(comb <- rbind(a, b))
# Open High Low Close Volume
#2011-09-20 21:00:00 1.370105 1.370105 1.370105 1.370105 1
#2011-09-20 21:00:00 1.370105 1.371045 1.369685 1.370200 2230
dupidx <- index(comb)[duplicated(index(comb))] # indexes of duplicates
tail(comb[dupidx], 1) #last duplicate
# now rbind the last duplicated row with all non-duplicated data
rbind(comb[!index(comb) %in% dupidx], tail(comb[dupidx], 1))

Resources