R prediction and visualization - r

I have fitted a polynomial to my data and visualized the results. I'm trying to extend my plot to future and predict the x value (date) when y is lower than 70. My data is HERE to replicate. My current code is below.
data <- read.table("data.txt", sep="\t", header=T)
data$date<- as.Date(data$date)
data$y <- as.numeric(data$y)
attach(data)
x <- 1:88 # vector for formula coordinates. I haven't found a way to plot polynomial formula with dates..
p <- qplot(date, y, data=data , geom="line", xlab="Time", ylab="y")
p+ geom_smooth(method = "lm", formula = y ~ poly(x, 3))
fit <- lm(y~poly(x,3))
summary(fit) #Fit is adequate
Which results to this plot:
The third order polynomial was made with numeric x vector because I didn't know how to use dates as "coordinates" for the formula. What I would like is to forecast i.e. extend this plot to the future and find out at what date is y lower than 70 using this formula.

A bit hacky, but gets the job done:
Code
# Define timeframe to predict, convert dates to numeric
days <- as.numeric(seq.Date(max(df$date) + 1, max(df$date) + 120, by = "days"))
# Build model
model <- loess(y ~ as.numeric(date), df, control = loess.control(surface = "direct"))
# Apply model to timeframe
p <- predict(model, days)
# Convert date back to Date format, build result dataframe
result <- data.frame(date = as.Date(days, origin = "1970-01-01"),
y = p)
# Plot three elements: original data, model, prediction
ggplot() +
geom_line(data = df, aes(date, y)) +
geom_smooth(data = df, aes(date, y), method = "loess", se = FALSE) +
geom_line(data = result, aes(date, y), linetype = "dashed", color = "red", size = 1)
Data
df <- structure(list(date = structure(c(16166, 16167, 16168, 16169, 16170, 16171, 16172, 16173, 16174, 16175, 16176, 16177, 16178, 16179, 16180, 16186, 16187, 16188, 16189, 16190, 16191, 16205, 16206, 16207, 16208, 16209, 16210, 16211, 16212, 16216, 16217, 16218, 16219, 16261, 16262, 16263, 16264, 16265, 16266, 16267, 16268, 16269, 16270, 16271, 16272, 16273, 16274, 16275, 16282, 16283, 16284, 16285, 16286, 16287, 16288, 16289, 16290, 16291, 16292, 16293, 16294, 16295, 16296, 16297, 16298, 16299, 16300, 16301, 16302, 16303, 16304, 16305, 16306, 16307, 16308, 16309, 16310, 16311, 16312, 16313, 16315, 16316, 16317, 16318, 16319, 16320, 16321, 16322), class = "Date"), y = c(95.543962, 95.573412, 95.589183, 95.500536, 95.563371, 95.579541, 94.979131, 95.56979, 95.545374, 95.912162, 95.687874, 95.564335, 95.538733, 95.579036, 95.539545, 94.068515, 94.584192, 95.479851, 95.554502, 95.517236, 95.514891, 95.541116, 95.52134, 95.545067, 95.551372, 95.520105, 95.535395, 95.494109, 95.501609, 95.544039, 95.545912, 95.560667, 95.435162, 94.934045, 95.072639, 95.050748, 94.676876, 94.68793, 95.068279, 95.038642, 94.408982, 94.429949, 94.990296, 94.75853, 95.1649, 95.095966, 93.945934, 93.934546, 92.71179, 92.757176, 93.429478, 93.730306, 93.840446, 93.769516, 93.958374, 93.94293, 93.940904, 93.776711, 93.474757, 92.255233, 92.779808, 92.508432, 92.869858, 92.846158, 93.533357, 93.233847, 93.392017, 93.613915, 93.520494, 93.761786, 93.562945, 93.584771, 93.650417, 93.091347, 92.813293, 92.650896, 92.577961, 92.468491, 93.269589, 93.242729, 91.626408, 91.157243, 90.486782, 90.989062, 91.766393, 91.477911, 90.463049, 91.182974)), row.names = c(NA, -88L), class = "data.frame")

Related

How can I smooth lines in ggplot?

I want to reproduce the following graph but with smoother lines:
Such that the lines are similar to the following graph:
So far, I've tried the following, but I only get a trend instead of smoothing the two series:
plot_fig4 <- ggplot(fig4, aes(x=dias))+
geom_line(aes(y=complete_preds_means), color="#9a6584", size=0.5)+
geom_line(aes(y=contrafact), colour="#000000", size=0.5) +
geom_line(aes(y=complete_preds_means), method = "lm", formula=y~spline(x,21))+
geom_ribbon(aes(ymin=complete_preds_lower, ymax=complete_preds_upper), fill="#9a6584", alpha=0.2)
My data:
structure(list(dias = structure(c(19052, 19053, 19054, 19055,
19056, 19057, 19058, 19059, 19060, 19061, 19062, 19063, 19064,
19065, 19066, 19067, 19068, 19069, 19070, 19071), class = "Date"),
complete_preds_means = c(341.07434, 381.59167, 455.47815,
485.05597, 527.60876, 562.63965, 602.48975, 624.663, 626.5637,
527.2239, 420.71643, 389.30804, 378.74396, 366.61548, 361.36566,
363.37253, 319.31824, 314.39688, 303.60342, 294.8934), contrafact = c(364.5,
358.89, 466.64, 470.11, 464.25, 487.27, 591.2, 715.33, 628.02,
505.98, 402.9, 316.81, 323.35, 358.61, 354.26, 369.5, 317.01,
336.5, 285.33, 270.91), complete_preds_lower = c(320.6368042,
361.7870895, 432.4487762, 461.2275833, 503.2255051, 535.7108551,
576.3850006, 597.9762146, 601.4407013, 504.0448837, 398.7777023,
368.0046799, 356.3603165, 345.5847885, 339.9679932, 342.7514801,
298.3247482, 293.4419693, 282.5286865, 275.4635284), complete_preds_upper = c(359.9897186,
402.5708664, 477.4746765, 508.7775711, 550.3326447, 587.6521027,
628.5320251, 649.9691833, 649.4831665, 547.9886108, 442.046402,
410.8121475, 399.0208908, 389.8615128, 387.4929993, 386.2935928,
340.140834, 336.3622116, 324.793483, 315.4606934)), row.names = c(NA,
-20L), class = c("tbl_df", "tbl", "data.frame"))
Like this?
df %>%
pivot_longer(-dias) %>%
ggplot() +
aes(x = dias, y = value, col = name) +
geom_smooth(se = FALSE)
You can use the function smooth, with the folowing parameters
Add one geom_smooth(...) line for each of your columns.
If you want the interval confidence for one serie, you switch the "se = FALSE" to True.
> ggplot()+ geom_smooth(data=data, aes(x=dias,
> y=complete_preds_means), method = loess, se=FALSE)

Plot time series without straight lines

I am trying to plot some time series but since I have data only for the summer I get these straight lines. Any idea how to fix that? The code I used: Any idea would be helpful!
ggplot(ba, aes(x=date1, y=pc1)) +
geom_line(color="turquoise4") +
theme_minimal() +
labs(x="", y="Loading", title="Correlation of PC1 and original series") +
theme(plot.title = element_text(hjust=0.5, size=20, face="bold"))+
scale_x_date(date_labels= ("%Y"))
And the plot:
The data: structure(list(date1 = structure(c(10712, 10713, 10714, 10715,
10716, 10717, 10718, 10719, 10720, 10721, 10722, 10723, 10724,
10725, 10726, 10727, 10728, 10729, 10730, 10731, 10732, 10733,
10734, 10735, 10736, 10737, 10738, 10739, 10740, 10741, 10742,
10743, 10744, 10745, 10746, 10747, 10748, 10749, 10750, 10751,
10752, 10753, 10754, 10755, 10756, 10757, 10758, 10759, 10760,
10761, 10762, 10763, 10764, 10765, 10766, 10767, 10768, 10769,
10770, 10771, 10772, 10773, 10774, 10775, 10776, 10777, 10778,
10779, 10780, 10781, 10782, 10783, 10784, 10785, 10786, 10787,
10788, 10789, 10790, 10791, 10792, 10793, 10794, 10795, 10796,
10797, 10798, 10799, 10800, 10801, 10802, 10803, 10804, 10805,
10806, 10807, 10808, 10809, 10810, 10811, 10812, 10813, 10814,
10815, 10816, 10817, 10818, 10819, 10820, 10821, 10822, 10823,
10824, 10825, 10826, 10827, 10828, 10829, 10830, 10831, 10832,
10833, 10834, 10835, 10836, 10837, 10838, 10839, 10840, 10841,
10842, 10843, 10844, 10845, 10846, 10847, 10848, 10849, 10850,
10851, 10852, 10853, 10854, 10855, 10856, 10857, 10858, 10859,
10860, 10861, 10862, 10863, 10864, 11078, 11079, 11080, 11081,
11082, 11083, 11084, 11085, 11086, 11087, 11088, 11089, 11090,
11091, 11092, 11093, 11094, 11095, 11096, 11097, 11098, 11099,
11100, 11101, 11102, 11103, 11104, 11105, 11106, 11107, 11108,
11109, 11110, 11111, 11112, 11113, 11114, 11115, 11116, 11117,
11118, 11119, 11120, 11121, 11122, 11123, 11124), class = "Date"),
pc1 = c(2.64462123197862, 2.4380313244096, 2.21417935009087,
2.02249236956036, 1.75829175459456, 1.58770371446918, 1.62230139615394,
1.73502227021784, 1.75083678213192, 1.64509065138032, 1.57921033180313,
1.70228767677341, 1.77303175099386, 1.78384290706931, 1.86580160595479,
1.9106874120324, 1.73936455049801, 1.50577059168685, 1.24226003967481,
1.07813468676617, 1.06276891964951, 1.09622663209529, 1.07692457712675,
0.978692818737612, 1.06365064520783, 1.2525349982313, 1.08237838015766,
0.645239033194787, 0.479482241789711, 0.683701830568681,
0.792197472275541, 0.631531270886538, 0.520337262457156,
0.667200695099021, 0.767559380073353, 0.7856163663635, 0.737745147101418,
0.654712633988225, 0.440140874164089, 0.111631055132755,
-0.22450806112272, -0.444238159039355, -0.584576558346287,
-0.444097467542865, -0.227821057355029, -0.120935149111578,
-0.0932195161137341, 0.037283855810637, 0.206479031035409,
0.173515424607062, 0.234536409515456, 0.317957256707112,
0.290090191780606, 0.0607339338833623, -0.27556992053308,
-0.3586166955826, -0.3534130521313, -0.501651666926942, -0.571570071652576,
-0.79110428934397, -0.985635595643097, -0.994138228085185,
-0.839909782593256, -0.699274458194957, -0.580683825031177,
-0.530811870371419, -0.4746353951302, -0.489386570992314,
-0.787222651887671, -1.1059671054324, -1.17983265148469,
-1.1058432515423, -0.970485807735322, -0.679713450749357,
-0.516950863200668, -0.495312393712548, -0.673645368786615,
-0.792675131421433, -0.692021409445821, -0.611096320716252,
-0.676712376641795, -0.723244566814595, -0.621986199057006,
-0.563969216349158, -0.649311354664407, -0.679237194242732,
-0.624476984795223, -0.738344795218295, -0.877867797047079,
-0.879375052767018, -0.84262582765393, -0.845707036138972,
-0.959691974084994, -1.06904324062176, -0.97905489332525,
-0.847145240762566, -0.86837819324592, -0.935323976060101,
-0.796486491787169, -0.461073031709012, -0.275818888900351,
-0.513613296467615, -0.786611502858454, -0.799843667083875,
-0.632676241199403, -0.468611824279096, -0.534017599627378,
-0.501551518704511, -0.239313348556757, -0.208935210151003,
-0.510483950549102, -0.62974750963569, -0.399113422985878,
-0.072812659658845, 0.0377885597304766, -0.0102829082610216,
-0.0571349366394233, -0.101917027852624, -0.202941574141862,
-0.22849727264844, -0.125157862652187, 0.168703915373856,
0.43626132948925, 0.446099489882147, 0.435379929023588, 0.236210503991287,
-0.122289033919648, -0.288101855449495, -0.186400543130663,
0.0316721901308679, 0.121240481805255, -0.0753698973566349,
-0.384779730900963, -0.531179497125517, -0.373632181420806,
-0.0148926315001478, 0.146040939981569, 0.13371186468668,
0.200262938351445, 0.465073170745138, 0.506805629621484,
0.345398737766814, 0.171110245173291, 0.176555396235594,
0.262743070740985, 0.398601589660576, 0.433248104072272,
0.453883432665361, 0.604637145172226, 0.843278371818699,
1.13506306230201, 1.42652005730684, 1.63221068108998, 1.86442509826484,
1.97067279998339, 2.0139860665512, 2.13720187260212, 2.31355711206366,
2.32477728002809, 2.36236228869303, 2.24108767618426, 2.12991693141636,
2.11677885248848, 2.01466853738993, 1.77967782944265, 1.48938981000699,
1.34042958586002, 1.33016846412245, 1.31770813627339, 1.26104969519401,
1.37385446004522, 1.61517275597383, 1.84510291043685, 1.91280500843462,
1.84897419443657, 1.52674793906846, 1.29429812528379, 1.06717755247561,
0.910500917679731, 0.904461327314293, 1.05380123048097, 1.08631739987863,
1.04843964584885, 1.10153891962662, 1.15936307711726, 1.20129772010444,
1.18746954945955, 1.00056619329093, 0.725225823060771, 0.573790799694267,
0.655776789864271, 0.780033607405981, 0.664875593837605,
0.452000300833336, 0.394589410057676, 0.402170545544567,
0.403979206396259, 0.395485848597801, 0.433314713756909,
0.437960603442615)), row.names = c(NA, 200L), class = "data.frame")strong text
1) Using the input in the Note at the end expand the dates to include the missing ones using NA's for them. Then plot.
library(ggplot2)
library(zoo)
z <- read.zoo(ba)
zz <- merge(z, zoo(, seq(start(z), end(z), 1)))
autoplot(zz) + xlab("")
2) Another approach is to use distinct facets for each year.
library(ggplot2)
breaks <- unique(as.Date(cut(ba$date1, "month")))
ba2 <- transform(ba, year = as.integer(format(date1, "%Y")))
p <- ggplot(ba2, aes(date1, pc1)) +
geom_line() +
facet_grid(cols = vars(year), scales = "free_x", space = "free_x")
p + scale_x_date(breaks = breaks, date_labels = "%b")
(continued after image)
or to remove strip text and only place the year below each facet use p from above with
breaks <- as.Date(tapply(format(ba$date1), format(ba$date1, "%Y"), min))
p +
scale_x_date(breaks = breaks, date_labels = "%Y") +
theme(strip.text.x = element_blank())
3) The facet idea could also be implemented in lattice.
library(lattice)
ba3 <- transform(ba, year = format(date1, "%Y"))
xyplot(pc1 ~ date1 | year, ba3, type = "l",
scales = list(x = list(relation = "free")), layout = c(NA, 1))
Note
There was a problem with the dput output in the question so the following was used.
ba <-
structure(list(date1 = structure(c(10712, 10713, 10714, 10715,
10716, 10717, 10718, 10719, 10720, 10721, 10722, 10723, 10724,
10725, 10726, 10727, 10728, 10729, 10730, 10731, 10732, 10733,
10734, 10735, 10736, 10737, 10738, 10739, 10740, 10741, 10742,
10743, 10744, 10745, 10746, 10747, 10748, 10749, 10750, 10751,
10752, 10753, 10754, 10755, 10756, 10757, 10758, 10759, 10760,
10761, 10762, 10763, 10764, 10765, 10766, 10767, 10768, 10769,
10770, 10771, 10772, 10773, 10774, 10775, 10776, 10777, 10778,
10779, 10780, 10781, 10782, 10783, 10784, 10785, 10786, 10787,
10788, 10789, 10790, 10791, 10792, 10793, 10794, 10795, 10796,
10797, 10798, 10799, 10800, 10801, 10802, 10803, 10804, 10805,
10806, 10807, 10808, 10809, 10810, 10811, 10812, 10813, 10814,
10815, 10816, 10817, 10818, 10819, 10820, 10821, 10822, 10823,
10824, 10825, 10826, 10827, 10828, 10829, 10830, 10831, 10832,
10833, 10834, 10835, 10836, 10837, 10838, 10839, 10840, 10841,
10842, 10843, 10844, 10845, 10846, 10847, 10848, 10849, 10850,
10851, 10852, 10853, 10854, 10855, 10856, 10857, 10858, 10859,
10860, 10861, 10862, 10863, 10864, 11078, 11079, 11080, 11081,
11082, 11083, 11084, 11085, 11086, 11087, 11088, 11089, 11090,
11091, 11092, 11093, 11094, 11095, 11096, 11097, 11098, 11099,
11100, 11101, 11102, 11103, 11104, 11105, 11106, 11107, 11108,
11109, 11110, 11111, 11112, 11113, 11114, 11115, 11116, 11117,
11118, 11119, 11120, 11121, 11122, 11123, 11124), class = "Date"),
pc1 = c(2.64462123197862, 2.4380313244096, 2.21417935009087,
2.02249236956036, 1.75829175459456, 1.58770371446918, 1.62230139615394,
1.73502227021784, 1.75083678213192, 1.64509065138032, 1.57921033180313,
1.70228767677341, 1.77303175099386, 1.78384290706931, 1.86580160595479,
1.9106874120324, 1.73936455049801, 1.50577059168685, 1.24226003967481,
1.07813468676617, 1.06276891964951, 1.09622663209529, 1.07692457712675,
0.978692818737612, 1.06365064520783, 1.2525349982313, 1.08237838015766,
0.645239033194787, 0.479482241789711, 0.683701830568681,
0.792197472275541, 0.631531270886538, 0.520337262457156,
0.667200695099021, 0.767559380073353, 0.7856163663635, 0.737745147101418,
0.654712633988225, 0.440140874164089, 0.111631055132755,
-0.22450806112272, -0.444238159039355, -0.584576558346287,
-0.444097467542865, -0.227821057355029, -0.120935149111578,
-0.0932195161137341, 0.037283855810637, 0.206479031035409,
0.173515424607062, 0.234536409515456, 0.317957256707112,
0.290090191780606, 0.0607339338833623, -0.27556992053308,
-0.3586166955826, -0.3534130521313, -0.501651666926942, -0.571570071652576,
-0.79110428934397, -0.985635595643097, -0.994138228085185,
-0.839909782593256, -0.699274458194957, -0.580683825031177,
-0.530811870371419, -0.4746353951302, -0.489386570992314,
-0.787222651887671, -1.1059671054324, -1.17983265148469,
-1.1058432515423, -0.970485807735322, -0.679713450749357,
-0.516950863200668, -0.495312393712548, -0.673645368786615,
-0.792675131421433, -0.692021409445821, -0.611096320716252,
-0.676712376641795, -0.723244566814595, -0.621986199057006,
-0.563969216349158, -0.649311354664407, -0.679237194242732,
-0.624476984795223, -0.738344795218295, -0.877867797047079,
-0.879375052767018, -0.84262582765393, -0.845707036138972,
-0.959691974084994, -1.06904324062176, -0.97905489332525,
-0.847145240762566, -0.86837819324592, -0.935323976060101,
-0.796486491787169, -0.461073031709012, -0.275818888900351,
-0.513613296467615, -0.786611502858454, -0.799843667083875,
-0.632676241199403, -0.468611824279096, -0.534017599627378,
-0.501551518704511, -0.239313348556757, -0.208935210151003,
-0.510483950549102, -0.62974750963569, -0.399113422985878,
-0.072812659658845, 0.0377885597304766, -0.0102829082610216,
-0.0571349366394233, -0.101917027852624, -0.202941574141862,
-0.22849727264844, -0.125157862652187, 0.168703915373856,
0.43626132948925, 0.446099489882147, 0.435379929023588, 0.236210503991287,
-0.122289033919648, -0.288101855449495, -0.186400543130663,
0.0316721901308679, 0.121240481805255, -0.0753698973566349,
-0.384779730900963, -0.531179497125517, -0.373632181420806,
-0.0148926315001478, 0.146040939981569, 0.13371186468668,
0.200262938351445, 0.465073170745138, 0.506805629621484,
0.345398737766814, 0.171110245173291, 0.176555396235594,
0.262743070740985, 0.398601589660576, 0.433248104072272,
0.453883432665361, 0.604637145172226, 0.843278371818699,
1.13506306230201, 1.42652005730684, 1.63221068108998, 1.86442509826484,
1.97067279998339, 2.0139860665512, 2.13720187260212, 2.31355711206366,
2.32477728002809, 2.36236228869303, 2.24108767618426, 2.12991693141636,
2.11677885248848, 2.01466853738993, 1.77967782944265, 1.48938981000699,
1.34042958586002, 1.33016846412245, 1.31770813627339, 1.26104969519401,
1.37385446004522, 1.61517275597383, 1.84510291043685, 1.91280500843462,
1.84897419443657, 1.52674793906846, 1.29429812528379, 1.06717755247561,
0.910500917679731, 0.904461327314293, 1.05380123048097, 1.08631739987863,
1.04843964584885, 1.10153891962662, 1.15936307711726, 1.20129772010444,
1.18746954945955, 1.00056619329093, 0.725225823060771, 0.573790799694267,
0.655776789864271, 0.780033607405981, 0.664875593837605,
0.452000300833336, 0.394589410057676, 0.402170545544567,
0.403979206396259, 0.395485848597801, 0.433314713756909,
0.437960603442615)), row.names = c(NA, -200L), class = "data.frame")

Finding the turning point (inflection point) of COVID-19 infections using inflection package

I am not sure if I have done my inflection point calculation correctly. Based on lab confirmed cumulative case data in the epicenter of the current epidemic, we have tried to identify the inflection point. I used the inflection package and calculated the inflection point as "08 Feb 2020". I have also tried to calculate the first and second directives as estimated increase each and changing rate.
Are those results from the following graphs consistent?
df<-structure(list(date = structure(c(18277, 18278, 18279, 18280,
18281, 18282, 18283, 18284, 18285, 18286, 18287, 18288, 18289,
18290, 18291, 18292, 18293, 18294, 18295, 18296, 18297, 18298,
18299, 18300, 18301, 18302, 18303, 18304, 18305, 18306, 18307),
class = "Date"),
cases = c(45, 62, 121, 198, 258, 363, 425,
495, 572, 618, 698, 1590, 1905, 2261, 2639, 3125, 4109, 5142,
6384, 8351, 10117, 11618, 13603, 14982, 16903, 18454, 19558,
20630, 21960, 22961, 23621)),
class = "data.frame", row.names = c(NA, -31L))
xlb_0<- structure(c(18281, 18285, 18289, 18293,
18297, 18301, 18305,
18309), class = "Date")
library(tidyverse)
# Smooth cumulative cases over time
df$x = as.numeric(df$date)
fit_1<- loess(cases ~ x, span = 1/3, data = df)
df$case_sm <-fit_1$fitted
# use inflection to obtain inflection point
library(inflection)
guai_0 <- check_curve(df$x, df$case_sm)
check_curve(df$x, df$cases)
#> $ctype
#> [1] "convex_concave"
#>
#> $index
#> [1] 0
guai_1 <- bese(df$x, df$cases, guai_0$index)
structure(guai_1$iplast, class = "Date")
#> [1] "2020-02-08"
# Plot cumulativew numbers of cases
df %>%
ggplot(aes(x = date, y = cases ))+
geom_line(aes(y = case_sm), color = "red") +
geom_point() +
geom_vline(xintercept = guai_1$iplast) +
labs(y = "Cumulative lab confirmed infections")
# Daily new cases (first derivative) and changing rate (second derivative)
df$dt1 = c(0, diff(df$case_sm)/diff(df$x))
fit_2<- loess(dt1 ~ x, span = 1/3, data = df)
df$change_sm <-fit_2$fitted
df$dt2 <- c(NA, diff(df$change_sm)/diff(df$x))
df %>%
ggplot(aes(x = date, y = dt1))+
geom_line(aes(y = dt1,
color = "Estimated number of new cases")) +
geom_point(aes(y = dt2*2, color = "Changing rate")) +
geom_line(aes(y = dt2*2, color = "Changing rate"))+
geom_vline(xintercept = guai_1$iplast) +
labs(y = "Estimatede number of new cases") +
scale_x_date(breaks = xlb_0,
date_labels = "%b%d") +
theme(legend.title = element_blank())
#> Warning: Removed 1 rows containing missing values (geom_point).
#> Warning: Removed 1 row(s) containing missing values (geom_path).
Created on 2020-02-17 by the reprex package (v0.3.0)
I was gonna write a comment, but I was pushing the character limit.
I am not familiar with the inflection package so I am not one to judge if the 2020-02-08 is the true inflection. However, I will say this is difficult to answer with R because R is not necessarily good at calculating derivatives. If you had an estimated line equation - then you could potentially use this to plot first and second derivatives. Calculating rough delta's by doing the difference in (Y_n+1-Y_n)/(X_n+1-X_n) is never optimal because a derivate in theory is the delta of two points infinitesimally close to each other. You fundamentally cannot get a great estimate of the derivative. You can even see this because you are forced to shift this estimate to either n or n+1. Furthermore, you would expect the inflection point of x_0 to be a local min/max in the first derivative and equal to zero in the second derivative. So I don't think your second plot helps. But this could just be due to the delta's calculated.
What I would do is first fit your data to some type of model.
In this example I'm going to use the package dr4pl to model your data to the 4 parameter logistic model.
Since the function of the 4 parameter model is well known, I can write what the first and second derivative functions should be, then plot those values using stat_function in the ggplot2 package.
library(ggplot2)
library(dr4pl)
df<-structure(list(date = structure(c(18277, 18278, 18279, 18280,
18281, 18282, 18283, 18284, 18285, 18286, 18287, 18288, 18289,
18290, 18291, 18292, 18293, 18294, 18295, 18296, 18297, 18298,
18299, 18300, 18301, 18302, 18303, 18304, 18305, 18306, 18307),
class = "Date"),
cases = c(45, 62, 121, 198, 258, 363, 425,
495, 572, 618, 698, 1590, 1905, 2261, 2639, 3125, 4109, 5142,
6384, 8351, 10117, 11618, 13603, 14982, 16903, 18454, 19558,
20630, 21960, 22961, 23621)),
class = "data.frame", row.names = c(NA, -31L))
xlb_0<- structure(c(18281, 18285, 18289, 18293,
18297, 18301, 18305,
18309), class = "Date")
df$dat_as_num <- as.numeric(df$date)
dr4pl_obj <- dr4pl(cases~dat_as_num, data = df, init.parm = c(30000, 18300, 2, 0))
#first derivative derivation
d1_dr4pl <- function(x, theta, scale = F){
if (any(is.na(theta))) {
stop("One of the parameter values is NA.")
}
if (theta[2] <= 0) {
stop("An IC50 estimate should always be positive.")
}
f <- -theta[3]*((theta[4]-theta[1])/((1+(x/theta[2])^theta[3])^2))*((x/theta[2])^(theta[3]-1))
if(scale) {
f <- scales::rescale(x = f, to = c(theta[4],theta[1]))
}
return(f)
}
#Second derivative derivation
d2_dr4pl <- function(x, theta, scale = F){
if (any(is.na(theta))) {
stop("One of the parameter values is NA.")
}
if (theta[2] <= 0) {
stop("An IC50 estimate should always be positive.")
}
f <- 2*((theta[3]*(x/theta[2])^(theta[3]-1))^2)*((theta[4]-theta[1])/((1+(x/theta[2])^(theta[3]))^3))-theta[3]*(theta[3]-1)*((x/theta[2])^(theta[3]-2))*((theta[4]-theta[1])/((1+(x/theta[2])^theta[3])^2))
if(scale) {
f <- scales::rescale(x = f, to = c(theta[4],theta[1]))
f <- f - f[1]
}
return(f)
}
ggplot(df, aes(x = dat_as_num)) +
geom_hline(yintercept = 0) +
[![enter image description here][1]][1]geom_point(aes(y = cases), color = "grey", alpha = .6, size = 5) +
stat_function(fun = d1_dr4pl, args = list(theta = dr4pl_obj$parameters, scale = T), color = "red") +
stat_function(fun = d2_dr4pl, args = list(theta = dr4pl_obj$parameters, scale = T), color = "blue") +
stat_function(fun = dr4pl::MeanResponse, args = list(theta = dr4pl_obj$parameters), color = "gold") +
geom_vline(xintercept = dr4pl_obj$parameters[2], linetype = "dotted") +
theme_classic()
As you can see, the inflection point, which is the IC50 value (theta 2) of the 4 parameter logistic model, lines up well when we approach it this way.
summary(dr4pl_obj)
#$call
#dr4pl.formula(formula = cases ~ dat_as_num, data = df, init.parm = c(30000, 18300, 2, 0))
#
#$coefficients
# Estimate StdErr 2.5 % 97.5 %
#Upper limit 25750.61451 4.301008e-05 25750.59681 25750.63221
#Log10(IC50) 18298.75347 4.301008e-09 18298.67889 18298.82806
#Slope 5154.35449 4.301008e-05 5154.33678 5154.37219
#Lower limit 58.48732 4.301008e-05 58.46962 58.50503
#
#attr(,"class")
#[1] "summary.dr4pl"
Furthermore, using dr4pl, it says the IC50 value is roughly 18298.8, which is late 2020-02-06. Not too far off from the inflection value. I'm sure there may be a better model to use than the 4pl, but it was just the one that I knew I could write first and second derivatives for the purposes of answering this question.
I'm sure other coding languages are more specialized when it comes to derivatives, and can even calculate them for you so long as you start with an initial function. I think one of these languages is mathematica.
A disclaimer, I ended up scaling the first and second derivatives so that they could be plotted together. Their actual values are much larger than shown here.
A very quick back-of-the-envelope plot based on your data shows
calc_d <- function(x) c(0, diff(x))
df %>%
mutate(
first_deriv_cases = calc_d(cases),
second_deriv_cases = calc_d(calc_d(cases))) %>%
pivot_longer(-date) %>%
ggplot(aes(date, value)) +
geom_line() +
facet_wrap(~name, scale = "free_y", ncol = 1) +
geom_smooth()
So the inflection point at 8 February is consistent with the first derivative (i.e. the density function) having a maximum at that point.

Add a 3d surface at point 0 in plotly

I have a 3D plot using plotly which can take on negative and positive values. I would like to add a see through 3D flat surface at point 0 (see through here means setting alpha = 0.3 as in ggplot2) in order to emphasize the positive/negative values a little more. Alternatively adding a "grid" as in the background through the point 0 would be useful.
Code:
library(plotly)
library(dplyr)
df %>%
select(date, everything()) %>%
plot_ly(
x = colnames(subset(., select = c(2:4))), # probably not the most efficient method
y = ~date,
z = data.matrix(subset(., select = c(2:4))),
type = "surface",
colors = c("darkblue", "yellow", "darkred")
)
Data:
df <- structure(list(wind = c(0.938535690307617, 0.976551234722137,
0.954551994800568, 0.920722126960754, 0.889751732349396, 0.701366603374481,
0.718878328800201, 0.685763895511627, 0.677822828292847, 0.876205325126648,
0.67054146528244, 0.738650739192963, 0.725420415401459, 0.851324200630188,
0.6589714884758, 0.880357265472412, 0.997677683830261, 0.968335390090942,
0.882899045944214, 1.09453165531158, 1.06842839717865, 0.819347560405731,
0.995919525623322, 0.695173263549805, 0.860199570655823, 0.977508246898651,
0.807886302471161, 0.778182446956635, 0.886279463768005, 0.879809498786926,
0.903579652309418, 1.09579062461853, 1.03768181800842, 0.802022874355316,
0.928451955318451, 0.744936227798462, 0.621560990810394, 0.706887602806091,
0.749234974384308, 0.746754884719849, 0.65381270647049, 0.710927248001099,
0.517793655395508, 0.477172255516052, 0.631662607192993, 0.55011123418808,
0.584086775779724, 0.617783904075623, 0.438043504953384, 0.577566087245941,
0.684398949146271, 1.00848543643951, 0.957233726978302, 0.84071296453476,
0.90149587392807, 0.888661623001099, 0.923080563545227, 0.945727407932281,
0.965020060539246, 1.07903909683228, 0.870955109596252, 0.934546709060669,
0.93314516544342, 0.926109850406647, 0.981102645397186, 0.770903468132019,
0.829349219799042, 0.980291783809662, 1.00809383392334, 0.821328639984131,
0.89130437374115, 0.886775732040405, 0.896964132785797, 1.08184945583344,
1.17395043373108, 1.11537420749664, 1.15379846096039, 1.20203125476837,
1.12583827972412, 1.10014677047729, 0.891100168228149, 1.01484513282776,
1.01270127296448, 0.895487844944, 0.877909421920776, 1.11780989170074,
1.18047833442688, 1.14531397819519, 1.28634309768677, 1.23702371120453,
1.01186645030975, 1.15563869476318, 1.0918824672699, 1.2023059129715,
1.11806273460388, 1.11690294742584, 1.13390302658081, 1.10064888000488,
1.1418149471283, 1.06309700012207), holiday = c(-0.323977619409561,
-0.32814821600914, -0.335260361433029, -0.355545252561569, -0.347469061613083,
-0.349574476480484, -0.331554502248764, -0.351303607225418, -0.342486947774887,
-0.34791961312294, -0.342795491218567, -0.344868391752243, -0.344075173139572,
-0.342763870954514, -0.365125387907028, -0.301046937704086, -0.303365021944046,
-0.301353365182877, -0.316689401865005, -0.312132269144058, -0.317412197589874,
-0.331865310668945, -0.306569844484329, -0.306523144245148, -0.328399240970612,
-0.328934848308563, -0.340233236551285, -0.310919582843781, -0.352815061807632,
-0.324335247278214, -0.333071410655975, -0.25605234503746, -0.28141376376152,
-0.267955660820007, -0.261935144662857, -0.23365119099617, -0.204256281256676,
-0.271868377923965, -0.266900211572647, -0.268673747777939, -0.246791422367096,
-0.261210560798645, -0.275096118450165, -0.272033154964447, -0.307673662900925,
-0.316463977098465, -0.315838783979416, -0.278884381055832, -0.29575651884079,
-0.300951153039932, -0.275152236223221, -0.292507320642471, -0.283683449029922,
-0.333169460296631, -0.323910266160965, -0.317758291959763, -0.318425863981247,
-0.318803340196609, -0.316879868507385, -0.321441829204559, -0.342198520898819,
-0.336640536785126, -0.334549427032471, -0.304668426513672, -0.281678169965744,
-0.287939876317978, -0.278673946857452, -0.299236595630646, -0.295697629451752,
-0.297617554664612, -0.300621718168259, -0.294943511486053, -0.291149199008942,
-0.263380706310272, -0.264828890562057, -0.260132044553757, -0.294039487838745,
-0.284087061882019, -0.27564924955368, -0.275646597146988, -0.294898957014084,
-0.290615618228912, -0.264040410518646, -0.267250239849091, -0.27454400062561,
-0.22650308907032, -0.220207497477531, -0.233633011579514, -0.205283910036087,
-0.201104089617729, -0.235431581735611, -0.252072185277939, -0.257701843976974,
-0.233586445450783, -0.239779070019722, -0.233227252960205, -0.26639312505722,
-0.256356805562973, -0.248022571206093, -0.285306513309479),
month_10 = c(0.109010718762875, 0.112969301640987, 0.110368527472019,
0.107052445411682, 0.0947703272104263, 0.109930463135242,
0.106371931731701, 0.114236004650593, 0.102411419153214,
0.0184143912047148, 0.0301767271012068, 0.0376978516578674,
0.0472327470779419, 0.0622735135257244, 0.043902475386858,
0.0611664243042469, 0.0618763938546181, 0.0555795393884182,
0.0623081848025322, 0.0636096075177193, 0.0675770491361618,
0.0629641935229301, 0.0288578178733587, 0.025121470913291,
0.0300221722573042, 0.0611664243042469, 0.0659252777695656,
0.0159557648003101, 0.0376978516578674, 0.032174538820982,
0.0317839048802853, 0.0626140907406807, 0.0577763170003891,
0.0571180321276188, 0.0571180321276188, 0.0590739175677299,
0.0760120898485184, 0.0670360922813416, 0.0699110627174377,
0.0717969089746475, 0.0640148967504501, 0.0670360922813416,
0.164964601397514, 0.152724280953407, 0.115071900188923,
0.129654854536057, 0.122471310198307, 0.0768138542771339,
0.0400041155517101, 0.0573667995631695, 0.0685276389122009,
0.0337212830781937, 0.0616994015872478, 0.0589617975056171,
0.059433214366436, 0.0567467175424099, 0.0661386772990227,
0.0704409778118134, 0.0611028559505939, 0.0579087659716606,
0.0622764676809311, 0.0538184903562069, 0.0553129874169827,
0.058912742882967, 0.059433214366436, 0.0575473643839359,
0.0575473643839359, 0.058104183524847, 0.055678017437458,
0.0575473643839359, 0.059433214366436, 0.0603105537593365,
0.0589617975056171, 0.0601625964045525, 0.0577935017645359,
0.0574705749750137, 0.0550192892551422, 0.0475285314023495,
0.0577935017645359, 0.0543489865958691, 0.0526755712926388,
0.0553129874169827, 0.0589617975056171, 0.0593133755028248,
0.0547068528831005, 0.0676216259598732, 0.0511428378522396,
0.0564789660274982, 0.0641900449991226, 0.0666491389274597,
0.0390096306800842, 0.058005329221487, 0.0449355207383633,
0.062143836170435, 0.0591179206967354, 0.0637796893715858,
0.0633300691843033, 0.0702810436487198, 0.0647093132138252,
0.0427459664642811), month_12 = c(0.0733551606535912, 0.0707022771239281,
0.0707631036639214, 0.0718683376908302, 0.071540854871273,
0.0993443354964256, 0.0973133370280266, 0.0668289735913277,
0.0731536969542503, 0.0717423185706139, 0.0750747397542,
0.0754749700427055, 0.0746353641152382, 0.0912141725420952,
0.100288398563862, 0.07671108096838, 0.0708827450871468,
0.0708827450871468, 0.0741974636912346, 0.0761196836829185,
0.0648202076554298, 0.114817388355732, 0.100519739091396,
0.100288398563862, 0.102664910256863, 0.0566539540886879,
0.0648743882775307, 0.0706770494580269, 0.0746353641152382,
0.0750747397542, 0.0750747397542, 0.113439425826073, 0.127338454127312,
0.114467553794384, 0.112322382628918, 0.136203452944756,
0.131634846329689, 0.144591823220253, 0.139709115028381,
0.140504062175751, 0.145617410540581, 0.166768744587898,
0.151759415864944, 0.151587069034576, 0.156320676207542,
0.131974145770073, 0.135847419500351, 0.159762278199196,
0.163790658116341, 0.158604919910431, 0.127949997782707,
0.109744042158127, 0.101787634193897, 0.0582009926438332,
0.0671374276280403, 0.0669510439038277, 0.074992410838604,
0.074992410838604, 0.0787725821137428, 0.0696783438324928,
0.0965322777628899, 0.0666131302714348, 0.0666131302714348,
0.0925824269652367, 0.0875711515545845, 0.0873847678303719,
0.0873847678303719, 0.0711322501301765, 0.0696783438324928,
0.0667495802044868, 0.0667495802044868, 0.0685289725661278,
0.0582009926438332, 0.0599979534745216, 0.0702869072556496,
0.068938173353672, 0.0610831864178181, 0.074992410838604,
0.0702869072556496, 0.0683296099305153, 0.0671374276280403,
0.0666131302714348, 0.0815844461321831, 0.070037417113781,
0.068938173353672, 0.0862899348139763, 0.0824234709143639,
0.0761196836829185, 0.0663527771830559, 0.0751885995268822,
0.072908379137516, 0.0599117167294025, 0.0743952021002769,
0.0666681602597237, 0.0667495802044868, 0.0748060271143913,
0.0800538137555122, 0.0787725821137428, 0.0741974636912346,
0.0741937384009361), date = structure(c(14610, 14611, 14612,
14613, 14614, 14615, 14616, 14617, 14618, 14619, 14620, 14621,
14622, 14623, 14624, 14625, 14626, 14627, 14628, 14629, 14630,
14631, 14632, 14633, 14634, 14635, 14636, 14637, 14638, 14639,
14640, 14641, 14642, 14643, 14644, 14645, 14646, 14647, 14648,
14649, 14650, 14651, 14652, 14653, 14654, 14655, 14656, 14657,
14658, 14659, 14660, 14661, 14662, 14663, 14664, 14665, 14666,
14667, 14668, 14669, 14670, 14671, 14672, 14673, 14674, 14675,
14676, 14677, 14678, 14679, 14680, 14681, 14682, 14683, 14684,
14685, 14686, 14687, 14688, 14689, 14690, 14691, 14692, 14693,
14694, 14695, 14696, 14697, 14698, 14699, 14700, 14701, 14702,
14703, 14704, 14705, 14706, 14707, 14708, 14709), class = "Date")), row.names = c(NA,
100L), class = "data.frame")
Adding a plane at the value of 0 for z:
library(plotly)
library(dplyr)
# I separated your df wrangling to better understand the data
df <- df %>%
select(date, everything())
# Code you posted
df %>%
plot_ly(
x = colnames(subset(., select = c(2:4))), # probably not the most efficient method
y = ~date,
z = data.matrix(subset(., select = c(2:4))),
type = "surface",
colors = c("darkblue", "yellow", "darkred")
) %>%
# The surface added by using y & x from df
# and making z a matrix of zeros 3 rows by 100 columns.
add_surface(z = matrix(0, ncol = 3, nrow = 100),
y = df$date,
x = colnames(subset(df, select = c(2:4))),
opacity = .8)
Your plot:
With added zero z plane:

axis ticks with scale_x_break: more ticks with only some labeled

So I have a plot where I want the x-axis to show specific dates starting at the 16-06-2016 through to the 04-08-2016, with week dates labeled on the x-axis. This so far I have managed to do - however, I would also like there to be blank tick marks per day, as well as the week labels - but I am not sure I can apply multiple scale_x_date(breaks =) conditions.
Any help on how to add the additional tick marks would be appreciated!
Dummy data set to play with:
library(ggplot2)
library(reshape2)
#some data
df <- structure(list(Date = structure(c(16968, 16969, 16970, 16971,
16972, 16973, 16974, 16975, 16976, 16977, 16978, 16979, 16980,
16981, 16982, 16983, 16984, 16985, 16986, 16987, 16988, 16989,
16990, 16991, 16992, 16993, 16994, 16995, 16996, 16997, 16998,
16999, 17000, 17001, 17002, 17003, 17004, 17005, 17006, 17007,
17008, 17009, 17010, 17011, 17012, 17013, 17014, 17015, 17016
), class = "Date"), Tc = c("0.0964", "0.0780", "0.1265", "0.1503",
"0.1548", "0.1028", "0.1112", "0.1283", "0.0956", "0.0847", "0.0785",
"0.0859", "0.0879", "0.1203", "0.1677", "0.2174", "", "", "0.1496",
"0.1080", "0.1101", "0.1289", "0.0942", "0.0835", "0.0851", "0.0881",
"0.1216", "0.0766", "0.0744", "0.0626", "", "0.1116", "", "0.0862",
"", "0.1210", "", "", "0.1074", "", "0.1527", "", "0.1513", "",
"0.1246", "", "0.1415", "", "0.0827")), .Names = c("Date", "Tc"
), class = "data.frame", row.names = 3:51)
# melt data frame
df <- melt(df, id.vars = c("Date"))
#basic plot
plot1 <- ggplot(df[!is.na(df$value), ],
aes(x=Date, y=value, color=variable, group = variable,shape
= variable, linetype = variable, fill = variable))
# points
plot1 <- plot1 + geom_line(lwd =3)+geom_point(size=17, stroke =2)
break.vec <- c(as.Date("2016-06-16"),
seq(from=as.Date("2016-06-16"), to=as.Date("2016-08-04"),
by="week"))
plot1 <- plot1 + scale_x_date(breaks = break.vec, date_labels = "%d-%m", limits=range(break.vec))
Ok thank you for the input! This is what I went with from the comments, although I am sure there might be a more concise way to do so - but it does the job!
break.vec <- seq(from=as.Date("2016-06-16"), to=as.Date("2016-08-04"),
by="day")
plot1 <- plot1 + scale_x_date(breaks = break.vec,
labels=c("16-06","","","","","","","23-06","","","","","","", "30-06",
"","","","","","", "07-07", "","","","","","", "14-07",
"","","","","","","21-07", "","","","","","", "28-07",
"","","","","","", "04-08"),expand = c(0.05,0))

Resources