Building and analysing trends in time series - r

I need advice about building time series. I have a bunch of files with monthly data for sea surface temperature for an number of locations across 408 months. I have aggregated monthly values in a data frame with the following structure
longitude, latitude, SST for month 1, SST for month 2, .... SST for month n
This is just a small piece of the data frame so you can see
dput(sst_subset)
structure(list(lon = c(-19.875, -19.625, -19.375, -19.125), lat = c(30.125,
30.125, 30.125, 30.125), sst = c(293.197412803228, 293.092251515256,
292.999348291526, 293.013219258958), sst.1 = c(292.490350607051,
292.504279178168, 292.502850606771, 292.438922036772), sst.2 = c(291.994832184947,
291.887412832509, 291.832896704695, 291.810638640677), sst.3 = c(292.095993473008,
292.066660140331, 292.091993473098, 292.110326806021), sst.4 = c(293.071606354427,
293.095799902274, 293.106445063326, 293.116122482465), sst.5 = c(294.981993408501,
294.996326741514, 295.004660074661, 295.018993407674), sst.6 = c(295.568703072806,
295.600315975326, 295.597735330222, 295.49418694544), sst.7 = c(296.250961122073,
296.175154672154, 296.079348222683, 296.052251449095)), .Names = c("lon",
"lat", "sst", "sst.1", "sst.2", "sst.3", "sst.4", "sst.5", "sst.6",
"sst.7"), row.names = c(NA, 4L), class = "data.frame")
To build a time series I have extracted a row of the data frame, that corresponds to all the monthly values in a location (defined by longitude and latitude), transposed to a column and created a new data frame
ncolumnes<-ncol(sst_all)
sst_point1<-sst_all[1:3,ncolumnes]
sst1_df <- as.data.frame(t(sst_point1))
dput(sst1_ts)
structure(c(293.197412803228, 292.490350607051, 291.994832184947,
292.095993473008, 293.071606354427, 294.981993408501, 295.568703072806,
296.250961122073, 296.73166003606, 296.385154667461, 294.611660083445,
293.484186990367, 292.372896692626, 291.348207775437, 291.627090257683,
291.957326809441, 292.71063862056, 293.545326773947, 295.897412742879,
296.671928854599, 296.681326703851, 296.483864342674, 294.934660076226,
293.76709020985, 292.45870314232, 291.399993488565, 291.446767681068,
291.918993476964, 292.889025713347, 293.71099343691, 294.01418697852,
296.219025638916, 296.90166003226, 296.119993383065, 294.936326742855,
293.405154734069, 291.834509607885, 291.638564911804, 291.527412840556,
292.055326807251, 292.020961216621, 294.573660084295, 295.850315969738,
295.978380483004, 296.863660033109, 297.228380455065, 296.00866005222,
294.711606317771, 293.067735386772, 291.577136341748, 291.426445100877,
291.602993484028, 292.42096120768, 293.742993436195, 294.709348253305,
295.973219192797, 296.913993365318, 296.213219187433, 294.494326752735,
293.59225150408, 292.492251528667, 291.838207764485, 292.225477341082,
292.385993466526, 294.063864396765, 295.407326732328, 295.98386435385,
297.471928836718, 297.880660010378, 297.070638523107, 294.419993421063,
293.154509578381, 292.307735403759, 291.263441767479, 291.197412847932,
292.566660129155, 293.590316020253, 294.627660083088, 295.085477277156,
296.166122414292, 296.608660038809, 296.143864350273, 294.568660084407,
293.292251510786, 292.269670888481, 291.425350630855, 291.424832197687,
291.351326822986, 292.945799905626, 296.319660045269, 297.158380456629,
297.712251411991, 297.68699334804, 296.391928860858, 294.519660085502,
292.856445068914, 291.953864443927, 291.813922050742, 291.561606388179,
291.680660148958, 293.242574092542, 294.903326743593, 295.748057907507,
297.715799799009, 298.00999334082, 297.161606263009, 295.690326726002,
294.133541814562, 292.727412813734, 292.312493468169, 291.931928960546,
291.646326816392, 291.639670902563, 293.339326778551, 295.357090174311,
297.108703038385, 298.576993328147, 296.577735308317, 295.347660066995,
293.425154733622, 292.446445078078, 291.951027959007, 291.967735411359,
291.957993476093, 292.77838055453, 294.320326756624, 295.738703069007,
296.466122407586, 296.747993369028, 296.3506385392, 294.958326742363,
293.579348278562, 292.182574116234, 291.279279205549, 291.659993482754,
291.872993477993, 292.670316040816, 294.635326749583, 295.305477272238,
296.348057894096, 297.221993358433, 296.08612241608, 294.042993429489,
292.95160635711, 292.009670894293, 291.243207777784, 290.859025758721,
291.319993490353, 292.587412816863, 294.628660083066, 294.788057928965,
296.454832085258, 296.454326708925, 296.265477250781, 295.604326727924,
294.013219236607, 293.043541838926, 292.523922034872, 292.038703151708,
292.477326797818, 294.406122453631, 295.478993397392, 296.886122398199,
297.362251419814, 297.879993343726, 296.978703041291, 295.939326720436,
293.980638592173, 293.048703129133, 291.979993475601, 291.462896712966,
292.266326802534, 293.046445064667, 294.074993428774, 295.435477269333,
296.886122398199, 297.262660024191, 296.517090148383, 295.193326737111,
293.43967086233, 292.486122496546, 292.043564902752, 291.806767673021,
292.480660131077, 293.707735372467, 295.127326738586, 295.877735323964,
296.78192885214, 297.788326679108, 297.02450949188, 295.75766005783,
294.890315991195, 293.371606347722, 292.426422037051, 292.379670886022,
292.746993458457, 293.078057967186, 294.512993418984, 295.54612242815,
296.109348222013, 297.133660027074, 296.816767561039, 295.519326729824,
294.220638586809, 292.947412808816, 291.781422051468, 291.450638648723,
292.118660139168, 293.846122466148, 294.885993410647, 295.964832096211,
297.745154637062, 298.001326674347, 297.287735292448, 295.068993406557,
293.324509574581, 291.593864451974, 291.534821071758, 291.633219289804,
292.017993474752, 292.164187019871, 293.516660107921, 295.506122429044,
296.33321918475, 297.117660027432, 296.34741273282, 294.993660074907,
293.8032192413, 293.077735386549, 292.511779178, 292.344832177124,
292.459326798221, 293.437412797864, 295.860326722202, 296.416444989342,
297.083864329263, 298.678993325867, 297.782251410427, 295.657993393391,
293.652251502739, 293.274186995061, 292.307136325432, 291.922251541408,
291.564993484877, 292.452574110199, 293.996326763866, 294.823219218502,
296.541283696229, 297.421660020637, 296.747735304518, 295.771993390843,
294.041928913384, 293.317090219908, 292.421422037163, 292.680316040593,
292.577660128909, 293.240316028076, 295.254993402399, 296.815477238487,
297.524186900066, 298.126326671553, 297.598380446795, 295.563326728841,
294.207735361291, 293.43805795914, 293.115855519178, 292.753864426046,
292.466993464716, 292.925154744798, 296.035326718291, 296.538380470487,
298.612573972513, 298.241993335634, 297.065154652261, 295.770993390866,
293.72934827521, 292.379670886022, 291.370350632085, 291.601928967922,
292.473326797908, 293.597412794288, 294.678993415274, 296.042896610595,
297.383541741919, 297.729326680427, 296.714186918171, 295.008993407898,
293.465154732728, 292.365154757315, 292.279993468896, 291.722896707154,
292.651993460581, 293.469670861659, 295.145993404835, 296.262896605677,
297.257090131842, 297.550326684428, 297.544832060895, 296.194326714737,
294.499670838637, 293.095799902274, 292.836064885038, 292.445799916802,
292.78566012426, 293.216445060867, 294.3869934218, 295.256767595908,
296.333864346026, 296.692993370257, 296.250315960797, 295.23466006952,
293.713864404588, 292.874187004001, 292.378614156346, 291.931606379908,
292.099326806267, 293.999348269175, 295.055660073521, 296.170638543223,
296.729670788792, 297.024993362837, 296.646444984201, 294.817993412167,
293.368057960704, 292.39579991792, 291.174279207896, 291.343541876924,
291.974660142387, 292.742574103717, 294.785993412882, 296.685477241393,
297.067735297365, 297.318326689613, 297.265154647791, 296.419993376359,
294.439993420616, 293.224509576816, 293.140707735371, 292.928057970539,
293.028326785502, 293.116767643741, 294.067993428931, 295.034832116997,
296.24192886421, 297.204660025487, 297.0212836855, 295.618993394263,
294.195477297049, 293.26644505975, 292.1507077575, 291.842574123834,
292.212326803741, 292.898380551848, 293.698660103853, 294.868057927177,
296.104832093081, 297.440660020212, 296.802574012969, 295.234993402846,
293.692574082483, 292.617090235554, 291.535510726915, 291.344832199475,
292.175660137894, 293.799025693007, 295.795993390307, 296.195799832983,
297.432573998888, 298.643659993323, 297.612251414226, 296.027326718469,
294.692896640769, 293.446122475089, 292.611779175765, 292.494832173771,
293.027326785525, 293.948380528378, 294.144326760558, 295.259670821649,
296.524509503055, 297.014660029734, 296.854832076317, 295.413326732193,
294.306122455866, 292.857735391466, 291.982493475545, 291.549025743299,
292.710993459262, 293.044832161478, 294.210660092408, 296.063864352061,
296.959993364289, 298.161660004097, 297.040315943139, 295.179326737424,
293.474509571228, 292.265799920826, 291.409993488342, 291.042574141715,
291.81732681257, 293.374186992826, 294.908993410133, 296.215799832536,
297.686767541593, 298.667326659461, 297.63999334909, 295.589993394911,
294.077412783559), .Dim = c(408L, 1L), .Dimnames = list(NULL,
"1"), .Tsp = c(1982, 2015.91666666667, 12), class = "ts")
and then decompose in its additive trend, seasonal and random components and remove seasonal component from original data
sst1_dec<-decompose(sst1_ts)
sst1_noseason<-sst1_ts - sst1_dec$seasonal
Now, how do I get a linear regression for this data (sst1_noseason)? I have tried lm() but as there is only single var in the dataframe I think I can't. Should I build a new date column (time) with monthly dates and then run lm (sst ~ time)?
Is there any other R package that deals with time series that can do better? I have looked at ggseas and tidyr, they seem promising but maybe I need to build than date column to run this analysis in any case.
My final objective is to have a single value for the trend in each longitude and latitude point and plot a map to look for the areas with the highest climatic trend for sea surface temperature.
Maybe there is a better procedure and you could point me to another R package running spatio-temporal analysis. Any help would be appreciated.
Thanks in advance for your help

I am not a fan of specialised class in R, since they are usually not as intuitive and require additional vocabulary to deal with. Here's an attempt to convert the time-series you'd made into a data.frame, using zoo package:
library(zoo)
df1 <- data.frame(zoo(sst1_ts), time=as.yearmon(time(sst1_ts)))
df1$jday <- as.Date(df1$time)
(fit1<-lm(X1 ~ jday, df1))
Call:
lm(formula = X1 ~ jday, data = df1)
Coefficients:
(Intercept) jday
2.937e+02 6.025e-05
Plotting is more intuitve with a data.frame as well:
library(ggplot2)
base <- ggplot(df1, aes(jday, X1)) + geom_line() + stat_smooth(method="lm")
p<-base + scale_x_date(date_labels = "%Y")
You can further use an interactive package such as plotly to navigate the plot created with ggplotly.
library(plotly)
ggplotly(p)

Related

Add exponential fit to time series in ggplot

I've searched around StackOverflow for the issue I'm facing and can't quite find something similar.
I'm working with a large time series, with a portion of the dataset below. With that, I'm trying to find a way to add an exponential fit to it using ggplot. Others have used geom_smooth(method = "lm", formula = (y ~ exp(x))) but that doesn't work with time series data or POSIXct class variables and returns the error "Computation failed in stat_smooth(): NA/NaN/Inf in 'x'". Previously, I simply used method = "loess", span = 0.1, but it doesn't capture the nature of the data very well.
Any help you could provide would be greatly appreciated!
data<-structure(list(avg_time = structure(c(1551420000, 1551506400,
1551592800, 1551679200, 1551765600, 1551852000, 1551938400, 1552024800,
1552111200, 1552197600, 1552280400, 1552366800, 1552453200, 1552539600,
1552626000, 1552712400, 1552798800, 1552885200, 1552971600, 1553058000,
1553144400, 1553230800, 1553317200, 1553403600, 1553490000, 1553576400,
1553662800, 1553749200, 1553835600, 1553922000, 1554008400, 1554094800,
1554181200, 1554267600, 1554354000, 1554440400, 1554526800, 1554613200,
1554699600, 1554786000, 1554872400, 1554958800, 1555045200, 1555131600,
1555218000, 1555304400, 1555390800, 1555477200, 1555563600, 1555650000,
1555736400, 1555822800, 1555909200, 1555995600, 1556082000, 1556168400,
1556254800, 1556341200, 1556427600, 1556514000, 1556600400, 1556686800,
1556773200, 1556859600, 1556946000, 1557032400, 1557118800, 1557205200,
1557291600, 1557378000, 1557464400, 1557550800, 1557637200, 1557723600,
1557810000, 1557896400, 1557982800, 1558069200, 1558155600, 1558242000,
1558328400, 1558414800, 1558501200, 1558587600, 1558674000, 1558760400,
1558846800, 1558933200, 1559019600, 1559106000, 1559192400, 1559278800,
1559365200, 1559451600, 1559538000, 1559624400, 1559710800), tzone = "", class = c("POSIXct",
"POSIXt")), ChlaMed = c(7.49786224129294, 6.33265484668835, 8.02891354394607,
8.36583527788548, 7.21848200004542, 3.87836804380364, 6.12041645730209,
6.11129053757413, 3.82314913061958, 6.66935722139803, 10.5846145945807,
1.3922819262622, 2.46397555374784, 3.5387541991258, 9.4377648342203,
3.8359888625491, 9.92938437268906, 9.84931346445947, 7.61136832417625,
10.422317215878, 9.92795625389519, 10.2145441518957, 9.87188069822321,
6.75768698400432, 7.50045495545547, 7.3979513362914, 12.0524471187313,
11.0031790178811, 9.23929610466274, 12.2253404703908, 10.8260865574934,
5.79312487695101, 7.86859910828088, 13.9784098169617, 13.3707820039944,
8.11038273190177, 13.852156279962, 6.94197529427832, 10.1752314872054,
10.3435349795235, 14.4105077850521, 12.3100928225917, 11.4965118440029,
13.5176883961026, 10.4577799463301, 11.8074169933709, 13.245655700942,
13.5716513275785, 14.0549071116729, 14.6034112846714, 13.8998981372714,
11.0290734663967, 12.7725741301044, 14.0037640681163, 12.99276716795,
12.9177278644427, 15.6103759408624, 11.4159351143177, 14.7053508114725,
14.3380030612979, 14.846661975045, 14.1918024501013, 14.1478311220769,
15.4169566103641, 14.1251696199414, 13.4057098254015, 15.0936022765442,
14.94796281727, 11.9943525040373, 15.6886181916423, 15.7057435474498,
16.1855936444667, 17.4195546581076, 16.977113306558, 16.4826655395595,
14.273959862613, 18.6570604979906, 15.2969835201503, 15.6502935625097,
16.4619111787213, 17.8995674961064, 16.9938925321631, 17.409705465615,
19.7838080835222, 18.7386731671602, 19.6515930205419, 20.4308399460097,
18.787235170191, 18.758368516805, 19.2927499812326, 19.4763785903839,
20.4249755976496, 19.0471858942877, 20.0134726662527, 20.9237871993584,
20.0967875761179, 20.7116516016657)), row.names = c(NA, -97L), class = c("tbl_df",
"tbl", "data.frame"))
You could use nls() to get an exponential fit, make predictions, and plot those in addition to the raw data points:
data %>%
mutate(
d = as.numeric(difftime(as.Date(avg_time),min(as.Date(avg_time)),units = "days")),
preds =predict(nls(ChlaMed~a*exp(r*d), start = list(a=0.5, r=0.1), data=data))
) %>%
ggplot(aes(x=avg_time)) +
geom_point(aes(y=ChlaMed)) +
geom_line(aes(y=preds),color="red", linewidth=1.5)
You can give it a try the timetk package using the natural log function for the response variable.
library(timetk)
data %>%
plot_time_series_regression(
.date_var = avg_time,
.formula = log(ChlaMed) ~ avg_time,
.interactive =FALSE
)

Use lag(x,1) or lag(x,-1) for dynamic regression?

I have a simple yet somehow confusing question about dynamic regressions and lagged independent variables. I have 3 time series and I want to study the effect of 3 indedendent variables (namely PSVI, NSVI, and BTC_Ret) from the previous week on the current weeks bitcoin log returns. I want to analyse for example if a negative change in PSVI (Positve Sentiment Index) from the previous week can tell us something about the direction of the BTC returns in the following week.
I came across the lag function which can do exactly do that.
If I understand the function correctly, I would use the the lag function in combination with the dyn$lm function from the package dyn to get the results I want.
My code would then look as follows:
test1 <- dyn$lm(BTC_Ret~lag(PSVI,1)+lag(NSVI,1)+lag(BTC_Ret,1))
summary(test1)
Am I right to assume that I need to use lag(x,1) and not lag(x,-1)?
And should I use dyn$lm to study the effect or is there a better way to do all of this?
My data looks as follows:
structure(c(0.151825062532955, -0.179352391776254, -0.171610266403897,
0.0159227765884022, -0.353420091085592, -0.0179223189753976,
0.260710954985742, -0.0878045204765083, 0.17494222283881, -0.183889954532262,
-0.15249960475038, 0.0325479482522972, -0.216135243885031, 0.0258548317723122,
0.170469815313808, 0.0552681180119521, 0.0676987678252168, 0.0247151614282206,
-0.101373110320685, -0.0244444101458825, -0.363995910827583,
-0.819549195465083, -0.311532754839479, -0.661660753934884, -0.036159476713393,
-0.0116417252109642, -0.219357256430676, -0.386169350367107,
-0.468384245564164, 0.226420789220966, -0.2366560332375, 0.2425676656972,
-0.351430535471613, -0.287492079068963, 0.548071569094531, -0.228973857164721,
-0.139490538928287, 0.247548840497568, -0.361502742177194, 0.0604938285432965,
0.619445016304069, 0.0947076213861557, -0.887137767470338, 0.0485516007581502,
0.0429273907756451, -0.701341407090506, 0.34191134646093, -0.428167056300805,
-0.298917079322128, 0.517537828051947, 0.0474069010338689, -0.118044838446349,
-0.414289228784203, 0.143198527419672, 0.0733053148180489, 0.0131259707878403,
-0.106103445964187, 0.107827719520595, -0.604074345624302, 0.444400965939648
), .Dim = c(20L, 3L), .Dimnames = list(NULL, c("BTC_Ret", "PSVI",
"NSVI")), .Tsp = c(2018, 2018.36538461538, 52), class = c("mts",
"ts", "matrix"))
Many thanks!
Assuming tt defined in the Note at the end (copied from the question) we use the following.
ts class is normally used with R's lag. The -1 in that means move the series 1 forward so that the previous value lines up with the current row. There is more information in ?lag.
Do not use dplyr's lag which does not work with ts class and furthermore is different and uses the opposite convention or if you want to load dplyr use library(dplyr, exclude = c("filter", "lag")) to ensure that you are using R's lag.
library(dyn)
test1 <- dyn$lm(BTC_Ret ~ lag(PSVI,-1) + lag(NSVI,-1) + lag(BTC_Ret,-1), tt)
These alternatives also work:
Lag <- function(x, k = 1) lag(x, -k)
test2 <- dyn$lm(BTC_Ret ~ Lag(PSVI) + Lag(NSVI) + Lag(BTC_Ret), tt)
test3 <- dyn$lm(BTC_Ret ~ lag(tt, -1), tt)
Note
tt <- structure(c(0.151825062532955, -0.179352391776254, -0.171610266403897, 0.0159227765884022, -0.353420091085592, -0.0179223189753976, 0.260710954985742, -0.0878045204765083, 0.17494222283881, -0.183889954532262, -0.15249960475038, 0.0325479482522972, -0.216135243885031, 0.0258548317723122, 0.170469815313808, 0.0552681180119521, 0.0676987678252168, 0.0247151614282206, -0.101373110320685, -0.0244444101458825, -0.363995910827583, -0.819549195465083, -0.311532754839479, -0.661660753934884, -0.036159476713393, -0.0116417252109642, -0.219357256430676, -0.386169350367107, -0.468384245564164, 0.226420789220966, -0.2366560332375, 0.2425676656972, -0.351430535471613, -0.287492079068963, 0.548071569094531, -0.228973857164721, -0.139490538928287, 0.247548840497568, -0.361502742177194, 0.0604938285432965, 0.619445016304069, 0.0947076213861557, -0.887137767470338, 0.0485516007581502, 0.0429273907756451, -0.701341407090506, 0.34191134646093, -0.428167056300805, -0.298917079322128, 0.517537828051947, 0.0474069010338689, -0.118044838446349, -0.414289228784203, 0.143198527419672, 0.0733053148180489, 0.0131259707878403, -0.106103445964187, 0.107827719520595, -0.604074345624302, 0.444400965939648 ), .Dim = c(20L, 3L), .Dimnames = list(NULL, c("BTC_Ret", "PSVI", "NSVI")), .Tsp = c(2018, 2018.36538461538, 52), class = c("mts", "ts", "matrix"))

How to extract time series from a raster stack in R

I have created a RasterStack from NDVI layers of the MODIS data. Now i want to extract time series data from different locations of this data so that i can use BFAST/greenbrown package to estimate trend and breakpoints.
Here is how i have created the stack:
#runGdal(Job="testJob","MYD13Q1",begin = "2018.01.09", end = "2018.12.27",
# tileH = 26:29, tileV = 4:7
# , SDSstring = "1000000000000000000000")
###NDVI files path
NDVI_files_path <- "/media/MyData/Data/MODIS/PROCESSED/MYD13Q1.006_20190527193158"
all_NDVI_files <- list.files(NDVI_files_path,
full.names = TRUE,
pattern = ".tif$")
all_NDVI_files
### Raster Stack
NDVI_stack <- stack(all_NDVI_files)
How can i extract time series data for any specific area in Raster stack ?
You can use lubridate and rts to create a RasterStackTS object as follows:
#Load libraries
library(rts)
library(lubridate)
#Reproducible example (use your files here)
all_NDVI_files = c('MOD13Q1.006__250m_16_days_EVI_doy2000049_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2000065_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2000081_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2000097_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2000113_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2000129_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2000145_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2000161_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2000177_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2000193_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2000209_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2000225_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2000241_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2000257_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2000273_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2000289_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2000305_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2000321_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2000337_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2000353_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2001001_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2001017_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2001033_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2001049_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2001065_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2001081_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2001097_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2001113_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2001129_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2001145_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2001161_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2001177_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2001193_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2001209_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2001225_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2001241_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2001257_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2001273_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2001289_aid0001.tif',
'MOD13Q1.006__250m_16_days_EVI_doy2001305_aid0001.tif')
#Depending on file format, extract dates (this example uses MODIS 16-day composite NDVI)
ndvi.time = data.frame(year=substr(basename(all_NDVI_files),34,37),
julD=substr(basename(all_NDVI_files),38,40))
ndvi.time$dateJ = paste(ndvi.time$year,ndvi.time$julD,sep='-')
ndvi.time$julD = parse_date_time(ndvi.time$dateJ,'y-j')
# create your RasterStackTS object
# Use your actual rasterstack here i.e. it's not reproducible with above code
ndvi.rts = rts(NDVI_stack ,ndvi.time$julD)
ndvi.rts
plot(ndvi.rts)
Hope this helps!

ARIMA forecasts are way off

I am using ARIMA (auto.arima) to forecast for 52 weeks. The time series model fits the data well (see plot below, red line is the fitted value). The input data has a decreasing trend.
The forecasts (highlighted area) however seems to just taking off after the actual values end.
How can the forecasts be tamed?
dput of the input
> dput(baseTs)
structure(c(5.41951956469523, 5.49312499014084, 5.56299025716832,
5.64442852110163, 5.71385023974044, 5.77578632033402, 5.82985917237953,
5.86346591034374, 5.89626165157029, 5.92013286862512, 5.94200331713403,
5.93996840759539, 5.93917517855891, 5.90355191030718, 5.87180377346416,
5.83190030607801, 5.79624428055153, 5.75377043604686, 5.71445345904649,
5.70025269940165, 5.69789272204017, 5.73728731204876, 5.77015169357394,
5.78936321107329, 5.80113284575595, 5.79449448552444, 5.78193215198878,
5.74003482344406, 5.71694163930612, 5.66689345413153, 5.614357635737,
5.58578389962286, 5.55824727570498, 5.58495146060423, 5.61344117957187,
5.63637441850401, 5.65948408172102, 5.65558124383951, 5.64909390802285,
5.6664546352889, 5.68205689033408, 5.69991437586231, 5.72273650369514,
5.72006065065194, 5.71556512542993, 5.6717608006789, 5.64610326418084,
5.57193975508467, 5.49406607804055, 5.40126523530993, 5.31513540386482,
5.238437956722, 5.15362077920702, 5.11960611878249, 5.08498887979172,
5.08408134201562, 5.07361213981111, 5.04830559379816, 5.01401413448689,
5.0418662607737, 5.06947584464062, 5.08771495309317, 5.10587165060358,
5.1438369937098, 5.1815251206981, 5.2318657906363, 5.29385492077065,
5.29652029253008, 5.29998067741868, 5.28242409629194, 5.2722770646788,
5.24927444462166, 5.22226735874711, 5.16555064465208, 5.10956459841778,
5.09439240612378, 5.07617974794969, 5.04418337811006, 5.0075619037348,
4.99108423417745, 4.9874504485194, 4.99135285004736, 4.99217791657733,
4.94874445528885, 4.90320874819525, 4.84508278068469, 4.79086127023963,
4.75236840849279, 4.71431573721527, 4.71936529020481, 4.72422850167074,
4.72203091743033, 4.71732868614755, 4.71175323610448, 4.70566162766782,
4.71165837247331, 4.71767529028615, 4.75129316683193, 4.7863855803437,
4.85248191548789, 4.91865394024373, 4.9590849617955, 4.99960686851895,
5.02020678181827, 5.04201201976595, 5.02025906892952, 4.99735920720967,
4.92520279823639, 4.84822505567723, 4.81118504683572, 4.77330440072099,
4.72636395544651, 4.6861111959621, 4.64912520396312, 4.61348981514599,
4.58517820348434, 4.56378688913207, 4.549011597464, 4.52900600122321,
4.56028365470815, 4.60248987909752, 4.65628990381626, 4.70496326660038,
4.73779351647955, 4.76616725791407, 4.79569018347378, 4.83185281078024,
4.85177852259102, 4.87488251014986, 4.89468916229158, 4.9077984323135,
4.92375782591088, 4.96363767543938, 5.05416277704822, 5.1426680212522,
5.232495043331, 5.32153608753653, 5.41780853915163, 5.51131526881126,
5.62791210324026), .Tsp = c(2015.05769230769, 2017.73076923077,
52), class = "ts")
The code used
fc <- try(auto.arima(baseTs,ic='aic',approximation = F))
baseFc <- forecast(fc,h = weeks_forecasted)
baseVolume_forecast_new <- baseFc$mean
What could be the reason behind the forecasts exploding?

transform "mFilter" object (list of Time-Series) to plot with ggplot2

I'm working with the hpfilter from the mFilter package and I can't seem to find a simple way to convert the list of Time-Series objects by hpfilter to a format I can use with ggplot2. I realize I can take it all apart and put it back together, but I imagine there's some simple way I have overlooked? I tried the code suggested in the SO discussion R list to data frame. However I couldn't find a way to convert the list of Time-Series objects to a data.frame in any simple way. The final goal is to reproduce the default plot produced by the mFilter object (see below)
Here's some example code
# install.packages(c("mFilter"), dependencies = TRUE)
library(mFilter)
data(unemp)
unemp.hp <- hpfilter(unemp, type=c("lambda"), freq = 1606)
# str(unemp.hp)
class(unemp.hp)
# [1] "mFilter"
plot(unemp.hp)
Hit <Return> to see next plot:
Also, why am I asked to " Hit <Return>" to see the plot?
The plot function calls plot.mFilter which has parameter ask=interactive() and it is set as TRUE for interactive sessions,
you could disable this by ask=FALSE in call for plot
plot(unemp.hp,ask=FALSE)
Data:
library(mFilter)
library(ggplot2)
library(gridExtra)
# library(zoo)
data(unemp)
unemp.hp <- hpfilter(unemp, type=c("lambda"), freq = 1606)
# str(unemp.hp)
class(unemp.hp)
# [1] "mFilter"
plot(unemp.hp,ask=FALSE)
To check for slots of object unemp.hp
names(unemp.hp)
# [1] "cycle" "trend" "fmatrix" "title" "xname" "call" "type" "lambda" "method"
#[10] "x"
The relevant objects are x (the main unemp series) , trend and cycle. All three objects are of class ts, we first convert them to
data.frame using custom function and plot using ggplot and gridExtra (for grid.arrange)
objectList = list(unemp.hp$x,unemp.hp$trend,unemp.hp$cycle)
names(objectList) = c("unemp","trend","cycle")
sapply(objectList,class)
#unemp trend cycle
# "ts" "ts" "ts"
Conversion from ts to data.frame:
fn_ts_to_DF = function(x) {
DF = data.frame(date=zoo::as.Date(time(objectList[[x]])),tseries=as.matrix(objectList[[x]]))
colnames(DF)[2]=names(objectList)[x]
return(DF)
}
DFList=lapply(seq_along(objectList),fn_ts_to_DF)
names(DFList) = c("unemp","trend","cycle")
seriesTrend = merge(DFList$unemp,DFList$trend,by="date")
cycleSeries = DFList$cycle
Plots:
gSeries = ggplot(melt(seriesTrend,"date"),aes(x=date,y=value,color=variable)) + geom_line() +
ggtitle('Hodrick-Prescot Filter for unemp') +
theme(legend.title = element_blank(),legend.justification = c(0.1, 0.8), legend.position = c(0, 1),
legend.direction = "horizontal",legend.background = element_rect(fill="transparent",size=.5, linetype="dotted"))
gCycle = ggplot(cycleSeries,aes(x=date,y=cycle)) + geom_line(color="#619CFF") + ggtitle("Cyclical component (deviations from trend)")
gComb = grid.arrange(gSeries,gCycle,nrow=2)
I tried to use the prior answer, didn't worked for me.
I was getting the trend and cycle from a GDP quarterly series.
This data was a time series, so I did this, and worked for me:
list <- list(gdp_ln$x, gdp_ln$trend, gdp_ln$cycle)
names(list)=c("gdp","trend","cycle")
gdp<- data.frame((sapply(list,c)))
Data:
> dput(gdp_ln)
structure(c(16.0275785360442, 16.0477176062761, 16.0718936895007,
16.0899963371452, 16.0875707712141, 16.0981391378223, 16.0988601288276,
16.1110815092797, 16.1244321329861, 16.1384685077996, 16.1451472350838,
16.148178781735, 16.161163569502, 16.1418894206861, 16.1634877625667,
16.1965372621761, 16.2216815829736, 16.2387677536829, 16.249412380526,
16.2690521777631, 16.2812185880068, 16.2951024427095, 16.2964024092233,
16.3127733881018, 16.3233290487177, 16.3369922768377, 16.3486515031696,
16.3489275708763, 16.3451264371757, 16.3524856433069, 16.3666338513045,
16.3801691039135, 16.3959993202765, 16.4135937981601, 16.4321203154987,
16.4488104165345, 16.4344524213544, 16.4302554348621, 16.4240722287677,
16.425087582257, 16.4350803035092, 16.4507216431126, 16.4670532627455,
16.4985227751756, 16.5094864456079, 16.5352746165004, 16.5504689966469,
16.5594976247513, 16.5754312535087, 16.592641573353, 16.6003340665324,
16.6063100774853, 16.6163655606058, 16.6370227688187, 16.6564363783854,
16.6577160570216, 16.6543595214556, 16.6773721241902, 16.6911082706925,
16.6935398489076, 16.6956102943815, 16.6798673418354, 16.6772670544553,
16.6678707780266, 16.6606889172344, 16.6678398460835, 16.6668473810049,
16.676020524389, 16.6775934319312, 16.6882821147755, 16.6957985899994,
16.7032334217472, 16.6926036544774, 16.7027214366522, 16.7103625977254,
16.7105344224572, 16.7042504851486, 16.7063913529457, 16.7100598555556,
16.6960591147037, 16.686477079594, 16.5740423808036, 16.6181175035946
), .Tsp = c(2000, 2020.5, 4), class = "ts")

Resources