Add a 3d surface at point 0 in plotly - r

I have a 3D plot using plotly which can take on negative and positive values. I would like to add a see through 3D flat surface at point 0 (see through here means setting alpha = 0.3 as in ggplot2) in order to emphasize the positive/negative values a little more. Alternatively adding a "grid" as in the background through the point 0 would be useful.
Code:
library(plotly)
library(dplyr)
df %>%
select(date, everything()) %>%
plot_ly(
x = colnames(subset(., select = c(2:4))), # probably not the most efficient method
y = ~date,
z = data.matrix(subset(., select = c(2:4))),
type = "surface",
colors = c("darkblue", "yellow", "darkred")
)
Data:
df <- structure(list(wind = c(0.938535690307617, 0.976551234722137,
0.954551994800568, 0.920722126960754, 0.889751732349396, 0.701366603374481,
0.718878328800201, 0.685763895511627, 0.677822828292847, 0.876205325126648,
0.67054146528244, 0.738650739192963, 0.725420415401459, 0.851324200630188,
0.6589714884758, 0.880357265472412, 0.997677683830261, 0.968335390090942,
0.882899045944214, 1.09453165531158, 1.06842839717865, 0.819347560405731,
0.995919525623322, 0.695173263549805, 0.860199570655823, 0.977508246898651,
0.807886302471161, 0.778182446956635, 0.886279463768005, 0.879809498786926,
0.903579652309418, 1.09579062461853, 1.03768181800842, 0.802022874355316,
0.928451955318451, 0.744936227798462, 0.621560990810394, 0.706887602806091,
0.749234974384308, 0.746754884719849, 0.65381270647049, 0.710927248001099,
0.517793655395508, 0.477172255516052, 0.631662607192993, 0.55011123418808,
0.584086775779724, 0.617783904075623, 0.438043504953384, 0.577566087245941,
0.684398949146271, 1.00848543643951, 0.957233726978302, 0.84071296453476,
0.90149587392807, 0.888661623001099, 0.923080563545227, 0.945727407932281,
0.965020060539246, 1.07903909683228, 0.870955109596252, 0.934546709060669,
0.93314516544342, 0.926109850406647, 0.981102645397186, 0.770903468132019,
0.829349219799042, 0.980291783809662, 1.00809383392334, 0.821328639984131,
0.89130437374115, 0.886775732040405, 0.896964132785797, 1.08184945583344,
1.17395043373108, 1.11537420749664, 1.15379846096039, 1.20203125476837,
1.12583827972412, 1.10014677047729, 0.891100168228149, 1.01484513282776,
1.01270127296448, 0.895487844944, 0.877909421920776, 1.11780989170074,
1.18047833442688, 1.14531397819519, 1.28634309768677, 1.23702371120453,
1.01186645030975, 1.15563869476318, 1.0918824672699, 1.2023059129715,
1.11806273460388, 1.11690294742584, 1.13390302658081, 1.10064888000488,
1.1418149471283, 1.06309700012207), holiday = c(-0.323977619409561,
-0.32814821600914, -0.335260361433029, -0.355545252561569, -0.347469061613083,
-0.349574476480484, -0.331554502248764, -0.351303607225418, -0.342486947774887,
-0.34791961312294, -0.342795491218567, -0.344868391752243, -0.344075173139572,
-0.342763870954514, -0.365125387907028, -0.301046937704086, -0.303365021944046,
-0.301353365182877, -0.316689401865005, -0.312132269144058, -0.317412197589874,
-0.331865310668945, -0.306569844484329, -0.306523144245148, -0.328399240970612,
-0.328934848308563, -0.340233236551285, -0.310919582843781, -0.352815061807632,
-0.324335247278214, -0.333071410655975, -0.25605234503746, -0.28141376376152,
-0.267955660820007, -0.261935144662857, -0.23365119099617, -0.204256281256676,
-0.271868377923965, -0.266900211572647, -0.268673747777939, -0.246791422367096,
-0.261210560798645, -0.275096118450165, -0.272033154964447, -0.307673662900925,
-0.316463977098465, -0.315838783979416, -0.278884381055832, -0.29575651884079,
-0.300951153039932, -0.275152236223221, -0.292507320642471, -0.283683449029922,
-0.333169460296631, -0.323910266160965, -0.317758291959763, -0.318425863981247,
-0.318803340196609, -0.316879868507385, -0.321441829204559, -0.342198520898819,
-0.336640536785126, -0.334549427032471, -0.304668426513672, -0.281678169965744,
-0.287939876317978, -0.278673946857452, -0.299236595630646, -0.295697629451752,
-0.297617554664612, -0.300621718168259, -0.294943511486053, -0.291149199008942,
-0.263380706310272, -0.264828890562057, -0.260132044553757, -0.294039487838745,
-0.284087061882019, -0.27564924955368, -0.275646597146988, -0.294898957014084,
-0.290615618228912, -0.264040410518646, -0.267250239849091, -0.27454400062561,
-0.22650308907032, -0.220207497477531, -0.233633011579514, -0.205283910036087,
-0.201104089617729, -0.235431581735611, -0.252072185277939, -0.257701843976974,
-0.233586445450783, -0.239779070019722, -0.233227252960205, -0.26639312505722,
-0.256356805562973, -0.248022571206093, -0.285306513309479),
month_10 = c(0.109010718762875, 0.112969301640987, 0.110368527472019,
0.107052445411682, 0.0947703272104263, 0.109930463135242,
0.106371931731701, 0.114236004650593, 0.102411419153214,
0.0184143912047148, 0.0301767271012068, 0.0376978516578674,
0.0472327470779419, 0.0622735135257244, 0.043902475386858,
0.0611664243042469, 0.0618763938546181, 0.0555795393884182,
0.0623081848025322, 0.0636096075177193, 0.0675770491361618,
0.0629641935229301, 0.0288578178733587, 0.025121470913291,
0.0300221722573042, 0.0611664243042469, 0.0659252777695656,
0.0159557648003101, 0.0376978516578674, 0.032174538820982,
0.0317839048802853, 0.0626140907406807, 0.0577763170003891,
0.0571180321276188, 0.0571180321276188, 0.0590739175677299,
0.0760120898485184, 0.0670360922813416, 0.0699110627174377,
0.0717969089746475, 0.0640148967504501, 0.0670360922813416,
0.164964601397514, 0.152724280953407, 0.115071900188923,
0.129654854536057, 0.122471310198307, 0.0768138542771339,
0.0400041155517101, 0.0573667995631695, 0.0685276389122009,
0.0337212830781937, 0.0616994015872478, 0.0589617975056171,
0.059433214366436, 0.0567467175424099, 0.0661386772990227,
0.0704409778118134, 0.0611028559505939, 0.0579087659716606,
0.0622764676809311, 0.0538184903562069, 0.0553129874169827,
0.058912742882967, 0.059433214366436, 0.0575473643839359,
0.0575473643839359, 0.058104183524847, 0.055678017437458,
0.0575473643839359, 0.059433214366436, 0.0603105537593365,
0.0589617975056171, 0.0601625964045525, 0.0577935017645359,
0.0574705749750137, 0.0550192892551422, 0.0475285314023495,
0.0577935017645359, 0.0543489865958691, 0.0526755712926388,
0.0553129874169827, 0.0589617975056171, 0.0593133755028248,
0.0547068528831005, 0.0676216259598732, 0.0511428378522396,
0.0564789660274982, 0.0641900449991226, 0.0666491389274597,
0.0390096306800842, 0.058005329221487, 0.0449355207383633,
0.062143836170435, 0.0591179206967354, 0.0637796893715858,
0.0633300691843033, 0.0702810436487198, 0.0647093132138252,
0.0427459664642811), month_12 = c(0.0733551606535912, 0.0707022771239281,
0.0707631036639214, 0.0718683376908302, 0.071540854871273,
0.0993443354964256, 0.0973133370280266, 0.0668289735913277,
0.0731536969542503, 0.0717423185706139, 0.0750747397542,
0.0754749700427055, 0.0746353641152382, 0.0912141725420952,
0.100288398563862, 0.07671108096838, 0.0708827450871468,
0.0708827450871468, 0.0741974636912346, 0.0761196836829185,
0.0648202076554298, 0.114817388355732, 0.100519739091396,
0.100288398563862, 0.102664910256863, 0.0566539540886879,
0.0648743882775307, 0.0706770494580269, 0.0746353641152382,
0.0750747397542, 0.0750747397542, 0.113439425826073, 0.127338454127312,
0.114467553794384, 0.112322382628918, 0.136203452944756,
0.131634846329689, 0.144591823220253, 0.139709115028381,
0.140504062175751, 0.145617410540581, 0.166768744587898,
0.151759415864944, 0.151587069034576, 0.156320676207542,
0.131974145770073, 0.135847419500351, 0.159762278199196,
0.163790658116341, 0.158604919910431, 0.127949997782707,
0.109744042158127, 0.101787634193897, 0.0582009926438332,
0.0671374276280403, 0.0669510439038277, 0.074992410838604,
0.074992410838604, 0.0787725821137428, 0.0696783438324928,
0.0965322777628899, 0.0666131302714348, 0.0666131302714348,
0.0925824269652367, 0.0875711515545845, 0.0873847678303719,
0.0873847678303719, 0.0711322501301765, 0.0696783438324928,
0.0667495802044868, 0.0667495802044868, 0.0685289725661278,
0.0582009926438332, 0.0599979534745216, 0.0702869072556496,
0.068938173353672, 0.0610831864178181, 0.074992410838604,
0.0702869072556496, 0.0683296099305153, 0.0671374276280403,
0.0666131302714348, 0.0815844461321831, 0.070037417113781,
0.068938173353672, 0.0862899348139763, 0.0824234709143639,
0.0761196836829185, 0.0663527771830559, 0.0751885995268822,
0.072908379137516, 0.0599117167294025, 0.0743952021002769,
0.0666681602597237, 0.0667495802044868, 0.0748060271143913,
0.0800538137555122, 0.0787725821137428, 0.0741974636912346,
0.0741937384009361), date = structure(c(14610, 14611, 14612,
14613, 14614, 14615, 14616, 14617, 14618, 14619, 14620, 14621,
14622, 14623, 14624, 14625, 14626, 14627, 14628, 14629, 14630,
14631, 14632, 14633, 14634, 14635, 14636, 14637, 14638, 14639,
14640, 14641, 14642, 14643, 14644, 14645, 14646, 14647, 14648,
14649, 14650, 14651, 14652, 14653, 14654, 14655, 14656, 14657,
14658, 14659, 14660, 14661, 14662, 14663, 14664, 14665, 14666,
14667, 14668, 14669, 14670, 14671, 14672, 14673, 14674, 14675,
14676, 14677, 14678, 14679, 14680, 14681, 14682, 14683, 14684,
14685, 14686, 14687, 14688, 14689, 14690, 14691, 14692, 14693,
14694, 14695, 14696, 14697, 14698, 14699, 14700, 14701, 14702,
14703, 14704, 14705, 14706, 14707, 14708, 14709), class = "Date")), row.names = c(NA,
100L), class = "data.frame")

Adding a plane at the value of 0 for z:
library(plotly)
library(dplyr)
# I separated your df wrangling to better understand the data
df <- df %>%
select(date, everything())
# Code you posted
df %>%
plot_ly(
x = colnames(subset(., select = c(2:4))), # probably not the most efficient method
y = ~date,
z = data.matrix(subset(., select = c(2:4))),
type = "surface",
colors = c("darkblue", "yellow", "darkred")
) %>%
# The surface added by using y & x from df
# and making z a matrix of zeros 3 rows by 100 columns.
add_surface(z = matrix(0, ncol = 3, nrow = 100),
y = df$date,
x = colnames(subset(df, select = c(2:4))),
opacity = .8)
Your plot:
With added zero z plane:

Related

Plot time series without straight lines

I am trying to plot some time series but since I have data only for the summer I get these straight lines. Any idea how to fix that? The code I used: Any idea would be helpful!
ggplot(ba, aes(x=date1, y=pc1)) +
geom_line(color="turquoise4") +
theme_minimal() +
labs(x="", y="Loading", title="Correlation of PC1 and original series") +
theme(plot.title = element_text(hjust=0.5, size=20, face="bold"))+
scale_x_date(date_labels= ("%Y"))
And the plot:
The data: structure(list(date1 = structure(c(10712, 10713, 10714, 10715,
10716, 10717, 10718, 10719, 10720, 10721, 10722, 10723, 10724,
10725, 10726, 10727, 10728, 10729, 10730, 10731, 10732, 10733,
10734, 10735, 10736, 10737, 10738, 10739, 10740, 10741, 10742,
10743, 10744, 10745, 10746, 10747, 10748, 10749, 10750, 10751,
10752, 10753, 10754, 10755, 10756, 10757, 10758, 10759, 10760,
10761, 10762, 10763, 10764, 10765, 10766, 10767, 10768, 10769,
10770, 10771, 10772, 10773, 10774, 10775, 10776, 10777, 10778,
10779, 10780, 10781, 10782, 10783, 10784, 10785, 10786, 10787,
10788, 10789, 10790, 10791, 10792, 10793, 10794, 10795, 10796,
10797, 10798, 10799, 10800, 10801, 10802, 10803, 10804, 10805,
10806, 10807, 10808, 10809, 10810, 10811, 10812, 10813, 10814,
10815, 10816, 10817, 10818, 10819, 10820, 10821, 10822, 10823,
10824, 10825, 10826, 10827, 10828, 10829, 10830, 10831, 10832,
10833, 10834, 10835, 10836, 10837, 10838, 10839, 10840, 10841,
10842, 10843, 10844, 10845, 10846, 10847, 10848, 10849, 10850,
10851, 10852, 10853, 10854, 10855, 10856, 10857, 10858, 10859,
10860, 10861, 10862, 10863, 10864, 11078, 11079, 11080, 11081,
11082, 11083, 11084, 11085, 11086, 11087, 11088, 11089, 11090,
11091, 11092, 11093, 11094, 11095, 11096, 11097, 11098, 11099,
11100, 11101, 11102, 11103, 11104, 11105, 11106, 11107, 11108,
11109, 11110, 11111, 11112, 11113, 11114, 11115, 11116, 11117,
11118, 11119, 11120, 11121, 11122, 11123, 11124), class = "Date"),
pc1 = c(2.64462123197862, 2.4380313244096, 2.21417935009087,
2.02249236956036, 1.75829175459456, 1.58770371446918, 1.62230139615394,
1.73502227021784, 1.75083678213192, 1.64509065138032, 1.57921033180313,
1.70228767677341, 1.77303175099386, 1.78384290706931, 1.86580160595479,
1.9106874120324, 1.73936455049801, 1.50577059168685, 1.24226003967481,
1.07813468676617, 1.06276891964951, 1.09622663209529, 1.07692457712675,
0.978692818737612, 1.06365064520783, 1.2525349982313, 1.08237838015766,
0.645239033194787, 0.479482241789711, 0.683701830568681,
0.792197472275541, 0.631531270886538, 0.520337262457156,
0.667200695099021, 0.767559380073353, 0.7856163663635, 0.737745147101418,
0.654712633988225, 0.440140874164089, 0.111631055132755,
-0.22450806112272, -0.444238159039355, -0.584576558346287,
-0.444097467542865, -0.227821057355029, -0.120935149111578,
-0.0932195161137341, 0.037283855810637, 0.206479031035409,
0.173515424607062, 0.234536409515456, 0.317957256707112,
0.290090191780606, 0.0607339338833623, -0.27556992053308,
-0.3586166955826, -0.3534130521313, -0.501651666926942, -0.571570071652576,
-0.79110428934397, -0.985635595643097, -0.994138228085185,
-0.839909782593256, -0.699274458194957, -0.580683825031177,
-0.530811870371419, -0.4746353951302, -0.489386570992314,
-0.787222651887671, -1.1059671054324, -1.17983265148469,
-1.1058432515423, -0.970485807735322, -0.679713450749357,
-0.516950863200668, -0.495312393712548, -0.673645368786615,
-0.792675131421433, -0.692021409445821, -0.611096320716252,
-0.676712376641795, -0.723244566814595, -0.621986199057006,
-0.563969216349158, -0.649311354664407, -0.679237194242732,
-0.624476984795223, -0.738344795218295, -0.877867797047079,
-0.879375052767018, -0.84262582765393, -0.845707036138972,
-0.959691974084994, -1.06904324062176, -0.97905489332525,
-0.847145240762566, -0.86837819324592, -0.935323976060101,
-0.796486491787169, -0.461073031709012, -0.275818888900351,
-0.513613296467615, -0.786611502858454, -0.799843667083875,
-0.632676241199403, -0.468611824279096, -0.534017599627378,
-0.501551518704511, -0.239313348556757, -0.208935210151003,
-0.510483950549102, -0.62974750963569, -0.399113422985878,
-0.072812659658845, 0.0377885597304766, -0.0102829082610216,
-0.0571349366394233, -0.101917027852624, -0.202941574141862,
-0.22849727264844, -0.125157862652187, 0.168703915373856,
0.43626132948925, 0.446099489882147, 0.435379929023588, 0.236210503991287,
-0.122289033919648, -0.288101855449495, -0.186400543130663,
0.0316721901308679, 0.121240481805255, -0.0753698973566349,
-0.384779730900963, -0.531179497125517, -0.373632181420806,
-0.0148926315001478, 0.146040939981569, 0.13371186468668,
0.200262938351445, 0.465073170745138, 0.506805629621484,
0.345398737766814, 0.171110245173291, 0.176555396235594,
0.262743070740985, 0.398601589660576, 0.433248104072272,
0.453883432665361, 0.604637145172226, 0.843278371818699,
1.13506306230201, 1.42652005730684, 1.63221068108998, 1.86442509826484,
1.97067279998339, 2.0139860665512, 2.13720187260212, 2.31355711206366,
2.32477728002809, 2.36236228869303, 2.24108767618426, 2.12991693141636,
2.11677885248848, 2.01466853738993, 1.77967782944265, 1.48938981000699,
1.34042958586002, 1.33016846412245, 1.31770813627339, 1.26104969519401,
1.37385446004522, 1.61517275597383, 1.84510291043685, 1.91280500843462,
1.84897419443657, 1.52674793906846, 1.29429812528379, 1.06717755247561,
0.910500917679731, 0.904461327314293, 1.05380123048097, 1.08631739987863,
1.04843964584885, 1.10153891962662, 1.15936307711726, 1.20129772010444,
1.18746954945955, 1.00056619329093, 0.725225823060771, 0.573790799694267,
0.655776789864271, 0.780033607405981, 0.664875593837605,
0.452000300833336, 0.394589410057676, 0.402170545544567,
0.403979206396259, 0.395485848597801, 0.433314713756909,
0.437960603442615)), row.names = c(NA, 200L), class = "data.frame")strong text
1) Using the input in the Note at the end expand the dates to include the missing ones using NA's for them. Then plot.
library(ggplot2)
library(zoo)
z <- read.zoo(ba)
zz <- merge(z, zoo(, seq(start(z), end(z), 1)))
autoplot(zz) + xlab("")
2) Another approach is to use distinct facets for each year.
library(ggplot2)
breaks <- unique(as.Date(cut(ba$date1, "month")))
ba2 <- transform(ba, year = as.integer(format(date1, "%Y")))
p <- ggplot(ba2, aes(date1, pc1)) +
geom_line() +
facet_grid(cols = vars(year), scales = "free_x", space = "free_x")
p + scale_x_date(breaks = breaks, date_labels = "%b")
(continued after image)
or to remove strip text and only place the year below each facet use p from above with
breaks <- as.Date(tapply(format(ba$date1), format(ba$date1, "%Y"), min))
p +
scale_x_date(breaks = breaks, date_labels = "%Y") +
theme(strip.text.x = element_blank())
3) The facet idea could also be implemented in lattice.
library(lattice)
ba3 <- transform(ba, year = format(date1, "%Y"))
xyplot(pc1 ~ date1 | year, ba3, type = "l",
scales = list(x = list(relation = "free")), layout = c(NA, 1))
Note
There was a problem with the dput output in the question so the following was used.
ba <-
structure(list(date1 = structure(c(10712, 10713, 10714, 10715,
10716, 10717, 10718, 10719, 10720, 10721, 10722, 10723, 10724,
10725, 10726, 10727, 10728, 10729, 10730, 10731, 10732, 10733,
10734, 10735, 10736, 10737, 10738, 10739, 10740, 10741, 10742,
10743, 10744, 10745, 10746, 10747, 10748, 10749, 10750, 10751,
10752, 10753, 10754, 10755, 10756, 10757, 10758, 10759, 10760,
10761, 10762, 10763, 10764, 10765, 10766, 10767, 10768, 10769,
10770, 10771, 10772, 10773, 10774, 10775, 10776, 10777, 10778,
10779, 10780, 10781, 10782, 10783, 10784, 10785, 10786, 10787,
10788, 10789, 10790, 10791, 10792, 10793, 10794, 10795, 10796,
10797, 10798, 10799, 10800, 10801, 10802, 10803, 10804, 10805,
10806, 10807, 10808, 10809, 10810, 10811, 10812, 10813, 10814,
10815, 10816, 10817, 10818, 10819, 10820, 10821, 10822, 10823,
10824, 10825, 10826, 10827, 10828, 10829, 10830, 10831, 10832,
10833, 10834, 10835, 10836, 10837, 10838, 10839, 10840, 10841,
10842, 10843, 10844, 10845, 10846, 10847, 10848, 10849, 10850,
10851, 10852, 10853, 10854, 10855, 10856, 10857, 10858, 10859,
10860, 10861, 10862, 10863, 10864, 11078, 11079, 11080, 11081,
11082, 11083, 11084, 11085, 11086, 11087, 11088, 11089, 11090,
11091, 11092, 11093, 11094, 11095, 11096, 11097, 11098, 11099,
11100, 11101, 11102, 11103, 11104, 11105, 11106, 11107, 11108,
11109, 11110, 11111, 11112, 11113, 11114, 11115, 11116, 11117,
11118, 11119, 11120, 11121, 11122, 11123, 11124), class = "Date"),
pc1 = c(2.64462123197862, 2.4380313244096, 2.21417935009087,
2.02249236956036, 1.75829175459456, 1.58770371446918, 1.62230139615394,
1.73502227021784, 1.75083678213192, 1.64509065138032, 1.57921033180313,
1.70228767677341, 1.77303175099386, 1.78384290706931, 1.86580160595479,
1.9106874120324, 1.73936455049801, 1.50577059168685, 1.24226003967481,
1.07813468676617, 1.06276891964951, 1.09622663209529, 1.07692457712675,
0.978692818737612, 1.06365064520783, 1.2525349982313, 1.08237838015766,
0.645239033194787, 0.479482241789711, 0.683701830568681,
0.792197472275541, 0.631531270886538, 0.520337262457156,
0.667200695099021, 0.767559380073353, 0.7856163663635, 0.737745147101418,
0.654712633988225, 0.440140874164089, 0.111631055132755,
-0.22450806112272, -0.444238159039355, -0.584576558346287,
-0.444097467542865, -0.227821057355029, -0.120935149111578,
-0.0932195161137341, 0.037283855810637, 0.206479031035409,
0.173515424607062, 0.234536409515456, 0.317957256707112,
0.290090191780606, 0.0607339338833623, -0.27556992053308,
-0.3586166955826, -0.3534130521313, -0.501651666926942, -0.571570071652576,
-0.79110428934397, -0.985635595643097, -0.994138228085185,
-0.839909782593256, -0.699274458194957, -0.580683825031177,
-0.530811870371419, -0.4746353951302, -0.489386570992314,
-0.787222651887671, -1.1059671054324, -1.17983265148469,
-1.1058432515423, -0.970485807735322, -0.679713450749357,
-0.516950863200668, -0.495312393712548, -0.673645368786615,
-0.792675131421433, -0.692021409445821, -0.611096320716252,
-0.676712376641795, -0.723244566814595, -0.621986199057006,
-0.563969216349158, -0.649311354664407, -0.679237194242732,
-0.624476984795223, -0.738344795218295, -0.877867797047079,
-0.879375052767018, -0.84262582765393, -0.845707036138972,
-0.959691974084994, -1.06904324062176, -0.97905489332525,
-0.847145240762566, -0.86837819324592, -0.935323976060101,
-0.796486491787169, -0.461073031709012, -0.275818888900351,
-0.513613296467615, -0.786611502858454, -0.799843667083875,
-0.632676241199403, -0.468611824279096, -0.534017599627378,
-0.501551518704511, -0.239313348556757, -0.208935210151003,
-0.510483950549102, -0.62974750963569, -0.399113422985878,
-0.072812659658845, 0.0377885597304766, -0.0102829082610216,
-0.0571349366394233, -0.101917027852624, -0.202941574141862,
-0.22849727264844, -0.125157862652187, 0.168703915373856,
0.43626132948925, 0.446099489882147, 0.435379929023588, 0.236210503991287,
-0.122289033919648, -0.288101855449495, -0.186400543130663,
0.0316721901308679, 0.121240481805255, -0.0753698973566349,
-0.384779730900963, -0.531179497125517, -0.373632181420806,
-0.0148926315001478, 0.146040939981569, 0.13371186468668,
0.200262938351445, 0.465073170745138, 0.506805629621484,
0.345398737766814, 0.171110245173291, 0.176555396235594,
0.262743070740985, 0.398601589660576, 0.433248104072272,
0.453883432665361, 0.604637145172226, 0.843278371818699,
1.13506306230201, 1.42652005730684, 1.63221068108998, 1.86442509826484,
1.97067279998339, 2.0139860665512, 2.13720187260212, 2.31355711206366,
2.32477728002809, 2.36236228869303, 2.24108767618426, 2.12991693141636,
2.11677885248848, 2.01466853738993, 1.77967782944265, 1.48938981000699,
1.34042958586002, 1.33016846412245, 1.31770813627339, 1.26104969519401,
1.37385446004522, 1.61517275597383, 1.84510291043685, 1.91280500843462,
1.84897419443657, 1.52674793906846, 1.29429812528379, 1.06717755247561,
0.910500917679731, 0.904461327314293, 1.05380123048097, 1.08631739987863,
1.04843964584885, 1.10153891962662, 1.15936307711726, 1.20129772010444,
1.18746954945955, 1.00056619329093, 0.725225823060771, 0.573790799694267,
0.655776789864271, 0.780033607405981, 0.664875593837605,
0.452000300833336, 0.394589410057676, 0.402170545544567,
0.403979206396259, 0.395485848597801, 0.433314713756909,
0.437960603442615)), row.names = c(NA, -200L), class = "data.frame")

Finding statistics after grouping in data.table

I had a small question in regards to data.table. Since i'm not so good at it i'm not quite sure how I can do this in data.table.
Basically I have 3 columns and want to group by the first two columns ( key and date ) and then for each key and each date, find the maximum and minimum that occurred in the third column ( fare)
I tried doing this but it gives me an error
flights[, c("max_day", "min_day") := unlist(lapply(gross_fare, findr)), by = c("key", "created_date")]
Error in `[.data.table`(flights, , `:=`(c("max_day", "min_day"), unlist(lapply(gross_fare, :
Supplied 18 items to be assigned to group 1 of size 9 in column 'max_day'. The RHS length must either be 1 (single values are ok) or match the LHS length exactly. If you wish to 'recycle' the RHS please use rep() explicitly to make this intent clear to readers of your code.
findr is a function which just finds the max and min i.e.
findr <- function(x) {list(max = max(x), min = min(x)}
I've done what I want to do in dplyr and I'll attach the code for that, but since i have millions of rows, dplyr eats up my ram so data.table would help
test <- flights %>%
select(key, created_date, gross_fare) %>%
group_by(key, created_date) %>%
summarise(
max_day = max(gross_fare),
min_day = min(gross_fare),
diff = max_day - min_day) %>%
arrange(created_date)
I've put the dput output if anyone wants to use that
If anyone can help that'd be great, thank you :)
data.table::setDT(structure(list(key = c("LHE_KHI_LHE+KHI_PA-405_15.0_1", "KHI_ISB_KHI+ISB_PK-370_20.0_0",
"LHE_KHI_LHE+KHI_PK-307_20.0_0", "ISB_KHI_ISB+KHI_PF-124_20.0_1",
"LHE_KHI_LHE+KHI_PK-307_20.0_0", "LHE_KHI_LHE+KHI_PA-405_15.0_1",
"KHI_LHE_KHI+LHE_PK-304_20.0_0", "KHI_ISB_KHI+ISB_PA-204_15.0_1",
"ISB_KHI_ISB+KHI_PA-207_15.0_1", "KHI_ISB_KHI+ISB_PA-200_20.0_1",
"KHI_LHE_KHI+LHE_PK-304_40.0_0", "ISB_KHI_ISB+KHI_PA-201_35.0_1",
"ISB_KHI_ISB+KHI_ER-501_20.0_1", "KHI_LHE_KHI+LHE_PF-145_20.0_2",
"KHI_ISB_KHI+ISB_PA-204_20.0_1", "LHE_KHI_LHE+KHI_PA-401_0.0_0",
"ISB_KHI_ISB+KHI_PK-309_40.0_0", "KHI_ISB_KHI+ISB_PF-123_20.0_2",
"ISB_KHI_ISB+KHI_PA-205_15.0_1", "LHE_KHI_LHE+KHI_PF-142_0.0_0",
"ISB_KHI_ISB+KHI_PA-223_15.0_1", "ISB_KHI_ISB+KHI_PF-126_20.0_2",
"ISB_KHI_ISB+KHI_PK-309_20.0_0", "KHI_ISB_KHI+ISB_PF-121_20.0_2",
"ISB_KHI_ISB+KHI_PK-373_20.0_0", "KHI_LHE_KHI+LHE_PF-145_20.0_2",
"KHI_LHE_KHI+LHE_PA-402_15.0_1", "LHE_KHI_LHE+KHI_PA-407_20.0_1",
"KHI_ISB_KHI+ISB_PK-308_40.0_0", "KHI_LHE_KHI+LHE_PF-145_20.0_2",
"LHE_KHI_LHE+KHI_PF-144_0.0_0", "ISB_KHI_ISB+KHI_PK-369_40.0_0",
"ISB_KHI_ISB+KHI_PF-124_20.0_2", "KHI_ISB_KHI+ISB_PA-204_15.0_1",
"KHI_ISB_KHI+ISB_PA-200_15.0_1", "ISB_KHI_ISB+KHI_PF-124_20.0_1",
"KHI_ISB_KHI+ISB_PK-300_20.0_0", "ISB_KHI_ISB+KHI_PF-122_20.0_2",
"KHI_ISB_KHI+ISB_PK-368_20.0_0", "KHI_ISB_KHI+ISB_PA-204_15.0_1",
"ISB_KHI_ISB+KHI_ER-503_20.0_1", "ISB_KHI_ISB+KHI_PA-209_15.0_1",
"KHI_ISB_KHI+ISB_PK-308_40.0_0", "ISB_KHI_ISB+KHI_PF-124_20.0_1",
"ISB_KHI_ISB+KHI_PK-301_40.0_0", "KHI_LHE_KHI+LHE_PA-408_35.0_1",
"LHE_KHI_LHE+KHI_PF-144_20.0_2", "KHI_ISB_KHI+ISB_PF-121_20.0_2",
"KHI_ISB_KHI+ISB_PA-204_35.0_1", "ISB_KHI_ISB+KHI_PK-309_40.0_0",
"ISB_KHI_ISB+KHI_PA-223_20.0_1", "KHI_ISB_KHI+ISB_PA-206_35.0_1",
"LHE_KHI_LHE+KHI_PF-142_32.0_1", "LHE_KHI_LHE+KHI_PF-142_20.0_1",
"KHI_ISB_KHI+ISB_PF-123_20.0_2", "ISB_KHI_ISB+KHI_PA-209_15.0_1",
"KHI_ISB_KHI+ISB_PA-204_35.0_1", "ISB_KHI_ISB+KHI_PA-201_20.0_1",
"KHI_ISB_KHI+ISB_PK-368_20.0_0", "ISB_KHI_ISB+KHI_PA-205_20.0_1",
"KHI_ISB_KHI+ISB_PF-121_20.0_1", "ISB_KHI_ISB+KHI_PF-124_20.0_1",
"ISB_KHI_ISB+KHI_PA-205_15.0_1", "KHI_LHE_KHI+LHE_PF-145_20.0_2",
"KHI_LHE_KHI+LHE_PA-406_35.0_1", "KHI_ISB_KHI+ISB_PK-308_20.0_0",
"LHE_KHI_LHE+KHI_PA-401_20.0_1", "LHE_KHI_LHE+KHI_PA-401_15.0_1",
"KHI_ISB_KHI+ISB_PA-204_35.0_1", "KHI_LHE_KHI+LHE_PA-406_35.0_1",
"KHI_ISB_KHI+ISB_PA-206_35.0_1", "KHI_ISB_KHI+ISB_PF-121_20.0_1",
"ISB_KHI_ISB+KHI_PA-205_20.0_1", "LHE_KHI_LHE+KHI_PF-142_20.0_1",
"LHE_KHI_LHE+KHI_PF-146_20.0_2", "LHE_KHI_LHE+KHI_PA-401_35.0_1",
"ISB_KHI_ISB+KHI_PA-209_15.0_1", "ISB_KHI_ISB+KHI_PK-301_40.0_0",
"ISB_KHI_ISB+KHI_PA-205_35.0_1", "KHI_LHE_KHI+LHE_PA-406_15.0_1",
"KHI_ISB_KHI+ISB_PF-123_20.0_1", "ISB_KHI_ISB+KHI_PA-201_35.0_1",
"KHI_ISB_KHI+ISB_PK-300_40.0_0", "KHI_LHE_KHI+LHE_PA-402_35.0_1",
"ISB_KHI_ISB+KHI_ER-505_20.0_1", "ISB_KHI_ISB+KHI_PF-122_20.0_2",
"ISB_KHI_ISB+KHI_PA-207_15.0_1", "KHI_LHE_KHI+LHE_PA-404_35.0_1",
"KHI_ISB_KHI+ISB_PF-123_20.0_1", "ISB_KHI_ISB+KHI_ER-503_20.0_1",
"ISB_GIL_ISB+GIL_PK-605_20.0_0", "KHI_ISB_KHI+ISB_PF-123_20.0_1",
"KHI_ISB_KHI+ISB_PA-200_15.0_1", "ISB_KHI_ISB+KHI_PF-122_20.0_2",
"KHI_LHE_KHI+LHE_PA-404_35.0_1", "ISB_KHI_ISB+KHI_PF-122_20.0_2",
"PEW_KHI_PEW+KHI_PF-152_20.0_1", "LHE_KHI_LHE+KHI_PK-303_20.0_0",
"KHI_ISB_KHI+ISB_PA-222_35.0_1", "ISB_KHI_ISB+KHI_PF-124_20.0_1"
), created_date = c("2021-04-20", "2021-05-27", "2021-02-13",
"2021-08-14", "2021-08-11", "2021-08-21", "2021-01-26", "2021-08-21",
"2021-05-24", "2021-09-15", "2021-06-05", "2021-07-19", "2021-09-29",
"2021-07-02", "2021-08-10", "2021-01-04", "2021-07-15", "2021-07-14",
"2021-08-13", "2021-01-11", "2021-09-13", "2021-09-20", "2021-05-27",
"2021-02-20", "2021-08-15", "2021-07-27", "2021-08-26", "2021-09-15",
"2021-08-02", "2021-06-25", "2021-05-15", "2021-08-26", "2021-07-30",
"2021-06-27", "2021-08-07", "2021-03-19", "2021-03-02", "2021-06-06",
"2021-08-15", "2021-06-27", "2021-09-19", "2021-07-28", "2021-08-09",
"2021-08-16", "2021-09-09", "2021-06-04", "2021-08-12", "2021-05-15",
"2021-07-26", "2021-05-27", "2021-08-12", "2021-08-02", "2021-01-26",
"2021-04-20", "2021-08-26", "2021-08-26", "2021-03-21", "2021-01-09",
"2021-04-23", "2021-01-04", "2021-08-13", "2021-06-22", "2021-05-31",
"2021-08-18", "2021-06-16", "2021-08-14", "2021-08-10", "2021-06-16",
"2021-04-08", "2021-05-20", "2021-06-22", "2021-04-20", "2021-01-05",
"2021-02-27", "2021-07-07", "2021-03-26", "2021-08-16", "2021-05-01",
"2021-07-31", "2021-06-14", "2021-06-16", "2021-03-25", "2021-09-14",
"2021-06-06", "2021-09-02", "2021-08-06", "2021-07-18", "2021-02-28",
"2021-04-28", "2021-09-19", "2021-08-25", "2021-06-17", "2021-06-07",
"2021-06-17", "2021-07-07", "2021-08-23", "2021-07-09", "2021-07-19",
"2021-07-14", "2021-05-21"), gross_fare = c(7796, 7427, 11504,
6870, 6580, 14945, 8697, 7524, 7124, 6785, 11858, 7524, 11500,
9525, 6785, 8739, 8200, 13560, 9045, 7400, 7524, 12500, 7458,
14000, 6570, 9525, 6220, 10545, 8310, 7900, 7820, 8410, 11285,
19892, 6810, 9800, 11441, 11900, 6570, 13592, 11500, 8300, 20380,
8525, 7340, 9707, 7870, 10655, 10545, 11798, 14645, 10545, 8650,
8650, 7870, 12945, 10799, 10227, 6765, 10227, 20120, 11045, 9403,
7870, 7124, 6570, 6810, 6531, 8605, 7124, 11072, 7390, 10227,
13435, 10530, 12280, 18945, 11147, 10545, 6531, 6620, 10799,
18480, 32702, 5606, 13560, 23895, 8027, 9655, 11500, 11990, 6620,
9403, 7620, 14645, 19105, 9000, 6440, 12645, 8025)), row.names = c(NA,
-100L), class = c("data.table", "data.frame")))
I guess this line of code should do the job:
library(data.table)
flights[, .(min_day = min(gross_fare), max_day = max(gross_fare), diff = max(gross_fare) - min(gross_fare)), by = .(key, created_date)][]
Since the function findr returns a list, there's no need to complicate things:
findr <- function(x) {list(max = max(x), min = min(x))}
flights[, c("max_day", "min_day") := findr(gross_fare), by = list(key, created_date)][]
To also return the difference between max and min, use
findr2 <- function(x) {
list(max = max(x), min = min(x), diff = diff(range(x)))
}
flights[, c("max_day", "min_day", "diff_day") := findr2(gross_fare), by = list(key, created_date)][]

How to format date on x-axis to month and year in R

I've seen several questions posted about formatting the date and haven't been able to find one to resolve the issue I'm having here.
My data ranges from May-November from 2008-2015 and then drops May and goes from June-November from 2016-2018.
My goal is to create a plot to show the months sampled for each year and eliminate the months that were not sampled.
I asked a similar question here: How to plot mean density by year and month
But this answer does not resolve the date issue.
I've tried this code
MeanCPUE <- BD %>%
group_by(date) %>%
summarise(mean_cpue = mean (CPUE)) %>%
ungroup()
MeanCPUE
p <- ggplot(MeanCPUE, aes(x=date, y=mean_cpue))+
geom_line(aes()) +
geom_point(aes())+
labs(title = "Mean Density", y = "Mean Density (# fish/100m2)", x = "Date")+
theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
axis.text.x = element_text(angle = 90))
Which gives me this plot.. This is close, but it doesn't label the months and it's connected the sampled months with a line through the un-sampled months.
I'm trying to make something like this with the way the dates are displayed on the x-axis with all the sampled months vertical and the corresponding year horizontal below.
Is this possible with the gaps caused by un-sampled months that I have? Is there a better way to display data like this?
I'm using this data
structure(list(date = structure(c(14012, 14021, 14050, 14056,
14076, 14083, 14110, 14118, 14124, 14139, 14141, 14168, 14174,
14201, 14208, 14371, 14372, 14398, 14405, 14439, 14442, 14468,
14469, 14498, 14512, 14525, 14547, 14562, 14565, 14733, 14736,
14764, 14770, 14811, 14813, 14826, 14848, 14854, 14862, 14895,
14896, 14905, 14924, 14931, 15105, 15106, 15135, 15147, 15161,
15168, 15189, 15191, 15231, 15233, 15261, 15272, 15300, 15301,
15469, 15476, 15504, 15506, 15527, 15539, 15567, 15576, 15604,
15605, 15631, 15637, 15665, 15671, 15835, 15841, 15868, 15869,
15896, 15902, 15924, 15926, 15961, 15966, 15988, 15995, 16017,
16028, 16211, 16213, 16238, 16247, 16267, 16270, 16274, 16302,
16304, 16332, 16333, 16366, 16367, 16387, 16395, 16563, 16564,
16592, 16596, 16637, 16638, 16666, 16668, 16693, 16696, 16723,
16731, 16756, 16759, 16961, 16962, 16991, 16996, 17017, 17025,
17051, 17060, 17093, 17094, 17109, 17116, 17334, 17345, 17366,
17367, 17396, 17402, 17430, 17437, 17451, 17457, 17479, 17480,
17695, 17696, 17725, 17726, 17758, 17760, 17786, 17788, 17821,
17845, 17848), class = "Date"), mean_cpue = c(1.34147348124682,
1.86964964075215, 1.81794067339267, 0.665667643204071, 0.58288080140313,
1.21618562008796, 0.915544650452313, 2.08485242966155, 1.03716945493794,
1.32653950869479, 1.30387017192198, 0.818696953343472, 1.80344708020158,
0.462802005064552, 0.500089944234575, 0.468688145292134, 3.77629506069873,
2.06756426400247, 2.60938827682934, 0.936798217363299, 1.50723642250895,
1.72839753752143, 1.58423249977652, 1.85978443365114, 1.22923787251904,
1.86003452037686, 1.10727777349472, 1.4293872174252, 1.50010309934371,
3.06022031046633, 1.49412683285299, 2.14503371546005, 1.06420000907848,
1.1748728446591, 1.10022108873144, 1.44768795978073, 1.25036934931668,
2.13048237961316, 1.06834543832824, 1.8752166760057, 0.758904479222882,
0.961385073330007, 1.8663428674222, 0.745490766022096, 1.15427953668245,
1.98858189404051, 2.27561614501712, 1.71143613797583, 1.49915849827066,
0.841476235553629, 2.31692888870439, 1.35038151346165, 1.18518826020175,
1.22169567368232, 1.10179032565335, 0.694614793373563, 0.983229683310633,
1.12892772438535, 1.53364097932751, 0.787500879889251, 1.33114386888348,
0.75361846092619, 1.71920865710658, 1.35852187828517, 1.689863533171,
1.28266297991069, 1.86158062664132, 1.5293414824795, 1.0998008820738,
1.94141263432714, 0.342436942826791, 0.714994071540338, 5.62005947683777,
1.62440270638761, 3.82262996941896, 3.25889497807134, 1.54143260146352,
1.86769619502575, 2.8059505280639, 3.58304933614583, 1.20470762672598,
2.29674008364814, 0.689037082722997, 0.959492687511224, 0.567406828456012,
0.844493420881774, 0.513055729898873, 0.986197068455855, 1.86584317723012,
0.229688101851428, 1.25022486058644, 1.1701621335822, 0.742587041289875,
0.625081199656213, 1.29798096608674, 0.396058527329973, 1.16988984259697,
0.739772339659098, 0.39805108067041, 0.606186664268154, 0.300984338326838,
1.27916344040608, 5.75511524259318, 3.58466880968582, 1.02559789553853,
1.26985222650329, 1.54851107697843, 1.10318926209412, 1.3337665961789,
1.21571684379214, 1.18341136267535, 1.44015621029248, 0.669940304071688,
1.80318080838661, 0.919979963710556, 0.555260577151843, 2.32733965810763,
1.08046383335355, 0.621234946065283, 1.79207073432654, 1.3398375035128,
1.67582836446281, 1.3687859338262, 1.64687245759481, 0.880872177773362,
0.972535713988714, 0.83891596232892, 2.09933443707235, 1.24879096699177,
1.99677155577555, 1.67638127852795, 1.50710438838265, 2.27326799319906,
1.83813683299605, 2.34551745405533, 1.93879794527393, 1.74635992056851,
0.966061489637619, 1.1709324985094, 2.77527932681737, 4.63212808059003,
2.50472488366812, 2.19392784029982, 3.64096784095526, 1.91684726897895,
1.6466312330028, 2.84951794706046, 1.6841559743533, 1.62105139974506,
1.46007933985039)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-150L), .internal.selfref = <pointer: 0x000001d8f6d11ef0>, sorted = "NODCCODE")
Thanks in advance for any help!!
In the following, I have added group = year(date) to aesthetics so that line will connect only those points in the same year.
The
scale_x_date(date_labels = "%b-%Y") is to have monthly ticks - not sure if you still want this.
library(ggplot2)
library(dplyr)
library(lubridate)
ggplot(MeanCPUE, aes(x=date, y=mean_cpue, group = year(date)))+
geom_line() +
geom_point()+
labs(title = "Mean Density", y = "Mean Density (# fish/100m2)", x = "Date") +
theme_bw() +
theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
axis.text.x = element_text(angle = 90)) +
scale_x_date(date_breaks = '1 year', date_labels = "%b-%Y")

maintain date class for x-axis with geom_smooth

I am trying to smooth three lines and shade the area between the min and max lines using this approach. My x-axis is a date class at the start, but I'm getting an error in the last step.
Invalid input: date_trans works with objects of class Date only
I'm losing the date class when smoothing into p1. How can I prevent this or otherwise pass a date to geom_ribbon in the final step?
library(tidyverse)
mmr <- structure(list(year = structure(c(3834, 3926, 3987, 4108, 4169,
4230, 4352, 4442, 4503, 4595, 4687, 4748, 4929, 4991, 5052, 5204,
5265, 5326, 5448, 5538, 5599, 5752, 5813, 5844, 6025, 6087, 6148,
6299, 6360, 6574, 6634, 6695, 6848, 6909, 7091, 7183, 7213, 7395,
7456, 7517, 7639, 7729, 7790, 7943, 8035, 8187, 8248, 8309, 8460,
8552, 8613, 8735, 8825, 8886, 9009, 9100, 9131, 9282, 9343, 9404,
9556, 9617, 9678, 9831, 9862, 9952, 10105, 10135, 10196, 10347,
10408, 10470, 10592, 10682, 10896, 10957, 11078, 11170, 11231,
11382, 11474, 11596, 11657, 11688, 11869, 11931, 11961, 12112,
12173, 12234, 12357, 12418, 12478, 12600, 12692, 12874, 12935,
12996, 13118, 13208, 13239, 13392, 13483, 13665, 13757, 13787,
13939), class = "Date"), mmrU = c(13.8231596, 13.7529708, 13.7148858,
13.6118164, 13.5638772, 13.5478975, 13.519933, 13.5012899, 13.474657,
13.4014166, 13.4173963, 13.3787786, 13.2882268, 13.2589306, 13.2354049,
13.1763686, 13.1470724, 13.1255442, 13.0618471, 13.032551, 13.0108008,
13.0139079, 13.0232294, 13.0352143, 13.0498623, 13.0498623, 13.0376556,
13.0232294, 13.0205662, 12.987275, 12.9739586, 12.9033814, 12.9792852,
12.99127, 13.0059181, 13.0059181, 13.0059181, 13.0205662, 13.0130202,
13.0034767, 12.9606421, 12.9406675, 12.9277949, 12.8727536, 12.8203756,
12.7728802, 12.7582321, 12.7569005, 12.7422524, 12.7382575, 12.7171414,
12.6397157, 12.601098, 12.5737993, 12.6583588, 12.7382575, 12.7979151,
12.9473257, 12.9965966, 13.0327729, 13.1191079, 13.1617205, 13.2061087,
13.332171, 13.39609, 13.4586773, 13.6477709, 13.7676189, 13.8408594,
14.0619124, 14.1604541, 14.2246394, 14.3815071, 14.4851535, 14.7996436,
14.9234866, 15.0436305, 15.3022444, 15.4277361, 15.7198102, 15.9745982,
16.2684478, 16.4362351, 16.5409911, 16.8277386, 16.9728879, 17.0634398,
17.3204472, 17.4616016, 17.5578129, 17.819814, 17.9623, 18.0631721,
18.3045327, 18.4784455, 18.6694034, 18.7612869, 18.8153517, 18.9250792,
18.9770134, 19.0189602, 19.0675652, 19.07955, 19.1008563, 19.0582437,
19.0292138, 18.9410589), mmr = c(12.1431454, 12.11401197, 12.05086241,
12.02612341, 11.96590348, 11.93823486, 11.8828976, 11.8601117,
11.8275604, 11.8047745, 11.7784079, 11.7461821, 11.6955796, 11.7256748,
11.6689467, 11.6396505, 11.5997012, 11.6033632, 11.5384455, 11.5144759,
11.4532202, 11.4532202, 11.5411088, 11.5198025, 11.4971645, 11.5118126,
11.5118126, 11.5345985, 11.5586865, 11.4532202, 11.4532202, 11.4692,
11.5144759, 11.5201829, 11.5494791, 11.525129, 11.5662198, 11.570405,
11.4665367, 11.4984962, 11.4345772, 11.4692, 11.4092759, 11.3893013,
11.3653317, 11.3799798, 11.4406647, 11.3014127, 11.2721165, 11.2833024,
11.2720179, 11.2161875, 11.1675824, 11.1749065, 11.1928097, 11.2546572,
11.3295252, 11.397883, 11.568574, 11.5020472, 11.5437721, 11.5792826,
11.6846601, 11.7494372, 11.8112847, 11.8868038, 12.0684401, 12.1749717,
12.2844995, 12.3747184, 12.4756571, 12.5664753, 12.7422524, 12.8274777,
13.114314, 13.2216445, 13.3164576, 13.575862, 13.7156847, 13.8555074,
14.1513989, 14.2922869, 14.4147983, 14.5559526, 14.8196183, 14.9421296,
15.0779574, 15.203132, 15.3283066, 15.4521496, 15.686519, 15.7970456,
15.9248835, 16.0292844, 16.21678, 16.3945102, 16.4693782, 16.5442463,
16.6028386, 16.661431, 16.7265336, 16.7734075, 16.8304019, 16.8407888,
16.781464, 16.7411817, 16.6532932), mmrL = c(10.68619282, 10.63692196,
10.5963068, 10.49976255, 10.45848156, 10.44250182, 10.4092107,
10.39189932, 10.38390945, 10.31732721, 10.28270445, 10.25695931,
10.20813234, 10.21612221, 10.22278043, 10.25207662, 10.26139813,
10.24719392, 10.21079563, 10.19481589, 10.17883615, 10.17883615,
10.17883615, 10.17883615, 10.1921526, 10.19481589, 10.20813234,
10.20946398, 10.22011714, 10.19481589, 10.18283109, 10.17883615,
10.17883615, 10.17883615, 10.17883615, 10.17484122, 10.16418806,
10.14554503, 10.13356023, 10.12024378, 10.09893747, 10.08828431,
10.0762995, 10.06165141, 10.04700332, 10.02436536, 9.999064105,
9.9810869, 9.936476799, 9.911175548, 9.890756995, 9.840598374,
9.815297123, 9.79798574, 9.880547718, 9.947129958, 9.997199802,
10.1149172, 10.14021845, 10.14953997, 10.18948931, 10.21345892,
10.24963527, 10.36659807, 10.42519044, 10.47179801, 10.63958525,
10.74478519, 10.82335224, 10.99261909, 11.11726527, 11.15293432,
11.27611147, 11.33869877, 11.52113411, 11.58638471, 11.6377862,
11.82075419, 11.96190854, 12.31346277, 12.45195383, 12.52985505,
12.74491568, 12.85810549, 12.93633962, 13.16971038, 13.29887992,
13.38676848, 13.61581138, 13.71834803, 13.78226698, 13.95005423,
14.03927443, 14.10159541, 14.27497556, 14.42971269, 14.57725893,
14.60788676, 14.63185637, 14.69843861, 14.72373986, 14.73439302,
14.73439302, 14.73439302, 14.73683436, 14.674469, 14.60744288
)), row.names = c(NA, -107L), class = "data.frame")
# smooth lines
p1 <-
mmr %>%
ggplot(.) +
geom_smooth(aes(x=year, y=mmrL)) +
geom_smooth(aes(x=year, y=mmr)) +
geom_smooth(aes(x=year, y=mmrU)) +
ylim(0, 25) +
scale_x_date(date_breaks="2 years", date_labels = "%Y")
# build plot object for rendering
pp1 <- ggplot_build(p1)
# extract data from the upper and lower lines
df2 <- data.frame(x = pp1$data[[1]]$x,
ymin = pp1$data[[2]]$y,
ymax = pp1$data[[3]]$y)
# use the lm data to add the ribbon to the plot
p1 + geom_ribbon(data = df2, aes(x = x, ymin = ymin, ymax = ymax), fill = "grey", alpha = 0.4)
The issue is that pp1$data[[1]]$x is no longer of date format. We may instead use
df2 <- data.frame(x = as.Date(pp1$data[[1]]$x, origin = "1970-01-01"),
ymin = pp1$data[[1]]$y,
ymax = pp1$data[[3]]$y)
where I've also adjusted ymin. This gives

R prediction and visualization

I have fitted a polynomial to my data and visualized the results. I'm trying to extend my plot to future and predict the x value (date) when y is lower than 70. My data is HERE to replicate. My current code is below.
data <- read.table("data.txt", sep="\t", header=T)
data$date<- as.Date(data$date)
data$y <- as.numeric(data$y)
attach(data)
x <- 1:88 # vector for formula coordinates. I haven't found a way to plot polynomial formula with dates..
p <- qplot(date, y, data=data , geom="line", xlab="Time", ylab="y")
p+ geom_smooth(method = "lm", formula = y ~ poly(x, 3))
fit <- lm(y~poly(x,3))
summary(fit) #Fit is adequate
Which results to this plot:
The third order polynomial was made with numeric x vector because I didn't know how to use dates as "coordinates" for the formula. What I would like is to forecast i.e. extend this plot to the future and find out at what date is y lower than 70 using this formula.
A bit hacky, but gets the job done:
Code
# Define timeframe to predict, convert dates to numeric
days <- as.numeric(seq.Date(max(df$date) + 1, max(df$date) + 120, by = "days"))
# Build model
model <- loess(y ~ as.numeric(date), df, control = loess.control(surface = "direct"))
# Apply model to timeframe
p <- predict(model, days)
# Convert date back to Date format, build result dataframe
result <- data.frame(date = as.Date(days, origin = "1970-01-01"),
y = p)
# Plot three elements: original data, model, prediction
ggplot() +
geom_line(data = df, aes(date, y)) +
geom_smooth(data = df, aes(date, y), method = "loess", se = FALSE) +
geom_line(data = result, aes(date, y), linetype = "dashed", color = "red", size = 1)
Data
df <- structure(list(date = structure(c(16166, 16167, 16168, 16169, 16170, 16171, 16172, 16173, 16174, 16175, 16176, 16177, 16178, 16179, 16180, 16186, 16187, 16188, 16189, 16190, 16191, 16205, 16206, 16207, 16208, 16209, 16210, 16211, 16212, 16216, 16217, 16218, 16219, 16261, 16262, 16263, 16264, 16265, 16266, 16267, 16268, 16269, 16270, 16271, 16272, 16273, 16274, 16275, 16282, 16283, 16284, 16285, 16286, 16287, 16288, 16289, 16290, 16291, 16292, 16293, 16294, 16295, 16296, 16297, 16298, 16299, 16300, 16301, 16302, 16303, 16304, 16305, 16306, 16307, 16308, 16309, 16310, 16311, 16312, 16313, 16315, 16316, 16317, 16318, 16319, 16320, 16321, 16322), class = "Date"), y = c(95.543962, 95.573412, 95.589183, 95.500536, 95.563371, 95.579541, 94.979131, 95.56979, 95.545374, 95.912162, 95.687874, 95.564335, 95.538733, 95.579036, 95.539545, 94.068515, 94.584192, 95.479851, 95.554502, 95.517236, 95.514891, 95.541116, 95.52134, 95.545067, 95.551372, 95.520105, 95.535395, 95.494109, 95.501609, 95.544039, 95.545912, 95.560667, 95.435162, 94.934045, 95.072639, 95.050748, 94.676876, 94.68793, 95.068279, 95.038642, 94.408982, 94.429949, 94.990296, 94.75853, 95.1649, 95.095966, 93.945934, 93.934546, 92.71179, 92.757176, 93.429478, 93.730306, 93.840446, 93.769516, 93.958374, 93.94293, 93.940904, 93.776711, 93.474757, 92.255233, 92.779808, 92.508432, 92.869858, 92.846158, 93.533357, 93.233847, 93.392017, 93.613915, 93.520494, 93.761786, 93.562945, 93.584771, 93.650417, 93.091347, 92.813293, 92.650896, 92.577961, 92.468491, 93.269589, 93.242729, 91.626408, 91.157243, 90.486782, 90.989062, 91.766393, 91.477911, 90.463049, 91.182974)), row.names = c(NA, -88L), class = "data.frame")

Resources