Problem:
I have quarterly level data. I need to perform a month over month analysis. Is there a function, or a ggplot feature that will interpolate the quarterly data and fill in monthly data?
Reference Data:
dput(HPF[1:25, ])
structure(list(region = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), path = c(1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
), date = structure(c(16116, 16205, 16297, 16389, 16481, 16570,
16662, 16754, 16846, 16936, 17028, 17120, 17212, 17301, 17393,
17485, 17577, 17666, 17758, 17850, 17942, 18031, 18123, 18215,
18307), class = "Date"), index_value = c(1, 1.033852765, 1.041697122,
1.038876363, 1.041043093, 1.060900982, 1.073728928, 1.075879441,
1.080898915, 1.10368893, 1.119240863, 1.122827602, 1.128639801,
1.15275796, 1.169021733, 1.172707492, 1.178666441, 1.203634882,
1.220348482, 1.223890323, 1.229770019, 1.255791539, 1.273560554,
1.278236959, 1.285508086), index = c(0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
24), counter = 1:25, BaseQoQ = c(0, 0.033852765, 0.00758749917354051,
-0.00270784947028013, 0.00208564760655761, 0.0190749923163842,
0.0120915582298895, 0.00200284535874973, 0.00466546139717505,
0.0210843166587877, 0.0140908661646175, 0.00320461762840418,
0.00517639483536669, 0.0213692260175751, 0.0141085757499344,
0.00315285755256367, 0.00508136004984272, 0.0211836361259394,
0.0138859385432799, 0.00290231933930496, 0.00480410367620832,
0.021159663675294, 0.0141496533844698, 0.00367191413499146, 0.00568840303732765
), fdate = structure(c(16116, 16205, 16297, 16389, 16481, 16570,
16662, 16754, 16846, 16936, 17028, 17120, 17212, 17301, 17393,
17485, 17577, 17666, 17758, 17850, 17942, 18031, 18123, 18215,
18307), class = "Date"), StressC = c(0.99749, 1.031342765, 1.039187122,
1.036366363, 1.038533093, 1.058390982, 1.071218928, 1.073369441,
1.078388915, 1.10117893, 1.116730863, 1.120317602, 1.126129801,
1.15024796, 1.166511733, 1.170197492, 1.176156441, 1.201124882,
1.217838482, 1.221380323, 1.229770019, 1.255791539, 1.273560554,
1.278236959, 1.285508086), StressQoQ = c(0, 0.0339379492526242,
0.00760596502560418, -0.0027143898728953, 0.00209069888540969,
0.0191210941026796, 0.0121202336548254, 0.00200753827606026,
0.00467637125510434, 0.0211333913794913, 0.0141229845362187,
0.00321182042946733, 0.00518799221722843, 0.021416855302633,
0.0141393626118667, 0.00315964160130755, 0.00509225924746737,
0.021228843485116, 0.0139149560969629, 0.00290830110260876, 0.0068690282969297,
0.021159663675294, 0.0141496533844698, 0.00367191413499146, 0.00568840303732765
)), .Names = c("region", "path", "date", "index_value", "index",
"counter", "BaseQoQ", "fdate", "StressC", "StressQoQ"), row.names = c(NA,
-25L), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), vars = "region", drop = TRUE, indices = list(
0:24), group_sizes = 25L, biggest_group_size = 25L, labels = structure(list(
region = 1), row.names = c(NA, -1L), class = "data.frame", vars = "region", drop = TRUE, .Names = "region"))
Any insight would be much appreciated! I hope the data I have provided is enough to offer suggestions.
Goal:
I only need to plot/graph the monthly information, I do not need it reference in a data.frame.
So this is what you can do:
1) create a vector of dates to interpolate
months <- lapply(X = data$date, FUN = seq.Date, by = "month", length.out = 3)
months <- data.frame(date = do.call(what = c, months))
2) left join you date.frame to the months data.frame to create NAs for extrapolation
library(dplyr)
monthly_data <- left_join(x = months, y = data, by = "date")
3) use one of na.locf() na.appox() na.spline() to interpolate f.ex StressC
library(zoo)
monthly_data$StressC <- na.spline(object = monthly_data$StressC)
Note: the extrapolations above are
na.locf() - most recent point
na.appox() - linear
na.spline() - spline (often used in graphics)
Run the following (before interpolation) to see the difference:
plot(x = monthly_data$StressC, ylab = "StressC", xlab="", xaxt = "n")
lines(x = na.locf(monthly_data$StressC), col = "red")
lines(x = na.approx(monthly_data$StressC), col = "green")
lines(x = na.spline(monthly_data$StressC), col = "blue")
Can also do something like this to get gglot:
ggplot(monthly_data, aes(x=date)) +
geom_point(aes(y = StressC), colour="black") +
geom_line(aes(y = na.locf(StressC)), col="red") +
geom_line(aes(y = na.spline(StressC)), col="red")
Related
I would like to create the following plots in parallel
I have used the following code using the wide format dataset:
sumstatz_1 <- data.frame(whichstat = c("mean",
"sd upr",
"sd lwr",
"median"),
value = c(mean(data$score),
mean(data$score)+sd(data$score),
mean(data$score)-sd(data$score),
median(data$score)))
plot2 = ggplot(data, aes(x = score)) +
geom_histogram(aes(y =..density..),
breaks = seq(0, max(data$score), by = 5),
colour = "black",
fill = "white") + stat_function(fun = dnorm,
args = list(mean = mean(data$score, na.rm = TRUE),
sd = sd(data$score, na.rm = TRUE)),
colour = 'black', size = 1) +
labs(title='score', x='score', y= 'Distribution') +
geom_vline(data=sumstatz_1,aes(xintercept = value,
linetype = whichstat,
col = whichstat),size=1)
I have taken it by changing just the variable of interest to create the second graph. Anyway, I would like to create the same result by using an interactive graph. Here I have set up the following code that I have converted into a long format for convenience and then I have coded the following for loop:
for (i in 101:ncol(long)) {
p <- ggplot(long, aes(x = points)) +
geom_histogram(aes(y =..density..),
breaks = seq(0, 50, by = 3),
colour = "black",
fill = "white") + facet_grid(.~ score)
} for (j in seq_along(long$score)){
p +
stat_function(fun = dnorm[???],
args = list(mean = mean(long$points[long$score == 'j'], na.rm = TRUE),
sd = mean(long$points[long$score == 'j'], na.rm = TRUE)),
colour = 'black', size = 1)
}
print(p)
But I have no clue how to set parameters in stat_function() nor wether it is possible to use in a for loop or another iterative method. Would you have possibly any suggestion?
Here the dataset
structure(list(ID = c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7,
7, 8, 8, 9, 9, 10, 10), score = structure(list(MM_score = c("score_2",
"score_1", "score_2", "score_1", "score_2", "score_1", "score_2",
"score_1", "score_2", "score_1", "score_2", "score_1", "score_2",
"score_1", "score_2", "score_1", "score_2", "score_1", "score_2",
"score_1")), row.names = c(NA, -20L), class = c("tbl_df", "tbl",
"data.frame")), points = c(53, 13.25, 17.5, 1.59090909090909,
48.5, 6.92857142857143, 40, 3.63636363636364, 46, 7.07692307692308,
38, 4.47058823529412, 14.5, 1.61111111111111, 19.5, 3.54545454545455,
37.5, 3.40909090909091, 5.5, 0.916666666666667)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -20L), groups = structure(list(
ID = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), .rows = structure(list(
1:2, 3:4, 5:6, 7:8, 9:10, 11:12, 13:14, 15:16, 17:18,
19:20), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -10L), .drop = TRUE))
I have a dataframe with approximately 3 million rows. Each row is assigned a unique ID and has up to 4 dates. I wish to create a set of new columns for month and year (i.e. Jan-21, Feb-21, Mar-21, etc) and assign a value of "0" for each month/year prior to the first date, and then a value of "1" for the month/year containing the date for each ID, and maintain the value of "1" in each subsequent month/year column until the next column that matches the 2nd date.
I understand that it's easier to help me with examples, so I have put together this dput output with an example of what my current data looks like:
structure(list(id = c(1, 2, 3, 4, 5), date1 = structure(c(1623801600,
1615420800, 1654560000, 1620259200, 1615248000), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), date2 = structure(c(1629158400, 1621987200,
1658448000, 1623974400, NA), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
date3 = structure(c(NA, 1630454400, 1662076800, 1647907200,
NA), class = c("POSIXct", "POSIXt"), tzone = "UTC"), date4 = structure(c(NA,
1639008000, NA, NA, NA), class = c("POSIXct", "POSIXt"), tzone = "UTC")), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -5L))
And this is what I would like it to look like:
structure(list(id = c(1, 2, 3, 4, 5), `Mar-21` = c(0, 1, 0, 0,
1), `Apr-21` = c(0, 1, 0, 0, 1), `May-21` = c(0, 2, 0, 1, 1),
`Jun-21` = c(1, 2, 0, 2, 1), `Jul-21` = c(1, 2, 0, 2, 1),
`Aug-21` = c(2, 2, 0, 2, 1), `Sep-21` = c(2, 3, 0, 2, 1),
`Oct-21` = c(2, 3, 0, 2, 1), `Nov-21` = c(2, 3, 0, 2, 1),
`Dec-21` = c(2, 4, 0, 2, 1), `Jan-22` = c(2, 4, 0, 2, 1),
`Feb-22` = c(2, 4, 0, 2, 1), `Mar-22` = c(2, 4, 0, 3, 1),
`Apr-22` = c(2, 4, 0, 3, 1), `May-22` = c(2, 4, 0, 3, 1),
`Jun-22` = c(2, 4, 1, 3, 1), `Jul-22` = c(2, 4, 2, 3, 1),
`Aug-22` = c(2, 4, 2, 3, 1), `Sep-22` = c(2, 4, 3, 3, 1)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -5L))
Just a note that I have this dataset in both wide and long format, in case using it in a long format makes more sense.
Thank you!
This was a fun exercise! I'm sure there are a billion ways to do this more efficiently, but I think this works and was a fun puzzle for me. I first put the dates into long format to get a min and max. Then I made a sequence of those dates by month. I then used expand grid to make all combinations of the months with each ID to join it to the original data frame. Then I just summed how many dates1:4 were greater then the months in the list. I had to use floor_date to change dates1:4 to the first of the month. Hopefully this helps!
library(dplyr)
library(lubridate)
library(tidyr)
dat2<-dat%>%
tidyr::pivot_longer(cols = -id, values_drop_na = T)
dat_min_max<-data.frame("Min" = min(dat2$value), "Max" = max(dat2$value))
month_seq<-seq(dat_min_max$Min, dat_min_max$Max+months(1), by = "month")
dat3<-dat%>%
mutate(date1 = floor_date(date1, "month"),
date2 = floor_date(date2, "month"),
date3 = floor_date(date3, "month"),
date4 = floor_date(date4, "month")
)%>%
left_join(expand.grid(dat$id, month_seq), by = c("id" = "Var1"))%>%
rowwise()%>%
mutate(c = sum(date1 <= Var2, date2 <= Var2, date3 <= Var2, date4 <= Var2, na.rm = T))%>%
mutate(Var2 = format(Var2, "%b-%y"))%>%
select(-date1, -date2, -date3, -date4)%>%
tidyr::pivot_wider(names_from = Var2, values_from = c)
I have a dataset comprised of leaves which I've weighed individually in order of emergence (first emerged through final emergence), and I'd like to combine these masses so that I have the entire mass of all the leaves for each individual plant.
How would I add these up using R programming language, or what would I need to google to get started on figuring this out?
structure(list(Tray = c(1, 1, 1, 1, 1, 1), Plant = c(2, 2, 2,
2, 3, 3), Treatment = structure(c(4L, 4L, 4L, 4L, 4L, 4L), .Label = c("2TLH",
"E2TL", "EH", "WL"), class = "factor"), PreSwitch = c("Soil",
"Soil", "Soil", "Soil", "Soil", "Soil"), PostSwitch = c("Soil",
"Soil", "Soil", "Soil", "Soil", "Soil"), Pellet = c(1, 1, 1,
1, 1, 1), Rep = c(1, 1, 1, 1, 1, 1), Date = structure(c(1618963200,
1618963200, 1618963200, 1618963200, 1618963200, 1618963200), tzone = "UTC", class = c("POSIXct",
"POSIXt")), DAP = c(60, 60, 60, 60, 60, 60), Position = c(2,
1, 3, 4, 4, 3), Whorl = structure(c(1L, 1L, 2L, 2L, 2L, 2L), .Label = c("1",
"2", "3", "4", "5"), class = "factor"), PetioleLength = c(1.229,
1.365, 1.713, 1.02, 0, 1.408), BladeLength = c(1.604, 1.755,
2.466, 2.672, 0.267, 2.662), BladeWidth = c(1.023, 1.185, 1.803,
1.805, 0.077, 1.771), BladeArea = c(1.289, 1.634, 3.492, 3.789,
0.016, 3.704), BladePerimeter = c(6.721, 7.812, 11.61, 12.958,
1.019, 14.863), BladeCircularity = c(0.359, 0.336, 0.326, 0.284,
0.196, 0.211), BPR = c(1.30512611879577, 1.28571428571429, 1.43957968476357,
2.61960784313725, NA, 1.890625), Leaf.Mass = c(9, 11, 31, 33,
32, 33), BladeAR = c(1.56793743890518, 1.48101265822785, 1.36772046589018,
1.4803324099723, 3.46753246753247, 1.50310559006211), Subirrigation = c(0,
0, 0, 0, 0, 0), Genotype = c(1, 1, 1, 1, 1, 1), Location = c(0,
0, 0, 0, 0, 0)), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"))
I may be missing something but isn't this a sum by Plant?
One solution below sums it for each plant into a separate table with just the totals and the second summarizes and adds it back to the main data set in a single step.
library(tidyverse)
#summary data set
plant_total <- df %>% group_by(Plant) %>% summarize(plant_weight = sum(Leaf.Mass, na.rm= TRUE))
#add plant_weight column to df data set
plant_total <- df %>% group_by(Plant) %>% mutate(plant_weight = sum(Leaf.Mass, na.rm = TRUE))
I have the following graph made with plot. I basically plotted the outcome of an arima model. The problem, as you can see, is the y-axis. I want to rescale it so that it shows values as integers and not in scientific notation. I already tried with ylim = c(a,b) but it didn't work.
This is the data to plot:
structure(list(method = "ARIMA(1,2,0)", model = structure(list(
coef = c(ar1 = 0.165440211592995), sigma2 = 314372.871343033,
var.coef = structure(0.0387588365491072, .Dim = c(1L, 1L), .Dimnames = list(
"ar1", "ar1")), mask = TRUE, loglik = -201.464633423226,
aic = 406.929266846451, arma = c(1L, 0L, 0L, 0L, 1L, 2L,
0L), residuals = structure(c(0.144002762945477, -0.257594259049227,
169.62992413163, -40.455716409227, 3.98528254071288, 325.669119576814,
-277.933508979317, 161.058607396831, 100.485413762468, 161.981734397248,
-21.1101185099251, 467.511038095663, 167.408540762885, 264.467148159716,
-870.459264535865, 1471.66097350626, 116.971877311758, -159.918791518434,
967.205782005673, -64.1682010133445, -372.385939678148, 352.062155538701,
632.526018003249, 1002.33521590517, 479.534164073812, 461.147699502253,
-1091.4663608196, -614.056109041783), .Tsp = c(1, 28, 1), class = "ts"),
call = arima(x = corona_total$Total_Cases, order = c(1, 2,
0)), series = "corona_total$Total_Cases", code = 0L,
n.cond = 0L, nobs = 26L, model = list(phi = 0.165440211592995,
theta = numeric(0), Delta = c(2, -1), Z = c(1, 2, -1),
a = c(-779, 59138, 53578), P = structure(c(-2.22044604925031e-16,
2.86887593857152e-17, -5.56124814802562e-17, 2.86887593857152e-17,
-3.31423141286073e-17, -1.61722928090181e-32, -5.56124814802562e-17,
-3.75958688714994e-17, -5.56124814802562e-17), .Dim = c(3L,
3L)), T = structure(c(0.165440211592995, 1, 0, 0, 2,
1, 0, -1, 0), .Dim = c(3L, 3L)), V = structure(c(1, 0,
0, 0, 0, 0, 0, 0, 0), .Dim = c(3L, 3L)), h = 0, Pn = structure(c(1,
-5.4830714621183e-18, 1.21812129054869e-17, -5.48307146211831e-18,
-3.31423141286073e-17, -1.84889274661175e-32, 1.21812129054869e-17,
-3.75958688714994e-17, -5.56124814802562e-17), .Dim = c(3L,
3L))), x = structure(c(322, 400, 650, 888, 1128, 1694,
2036, 2502, 3089, 3858, 4636, 5883, 7375, 9172, 10149, 12462,
15113, 17660, 21157, 24747, 27980, 31506, 35713, 41035, 47021,
53578, 59138, 63919), .Tsp = c(1, 28, 1), class = "ts")), class = "Arima"),
level = c(80, 95), mean = structure(c(68571.1220751691, 73201.9225591844,
77829.1955946478, 82455.8850482763, 87082.4779540027, 91709.0548868236,
96335.6291770837, 100962.203030158, 105588.776810904, 110215.350579684,
114841.924346485, 119468.498112958, 124095.071879377, 128721.645645786,
133348.219412195, 137974.793178603, 142601.366945011, 147227.940711419,
151854.514477827, 156481.088244235, 161107.662010643, 165734.235777051,
170360.80954346, 174987.383309868, 179613.957076276, 184240.530842684,
188867.104609092, 193493.6783755, 198120.252141908, 202746.825908316,
207373.399674724, 211999.973441132, 216626.54720754, 221253.120973948,
225879.694740356, 230506.268506765, 235132.842273173, 239759.416039581,
244385.989805989, 249012.563572397, 253639.137338805, 258265.711105213,
262892.284871621, 267518.858638029, 272145.432404437, 276772.006170845,
281398.579937253, 286025.153703662, 290651.72747007, 295278.301236478,
299904.875002886, 304531.448769294, 309158.022535702, 313784.59630211,
318411.170068518, 323037.743834926, 327664.317601334, 332290.891367742,
336917.46513415, 341544.038900558), .Tsp = c(29, 88, 1), class = "ts"),
lower = structure(c(67852.5693904542, 71488.0378850631, 74869.4056219101,
78042.7559156995, 81035.3037876344, 83865.5016552685, 86546.988586515,
89090.4113186268, 91504.3946218833, 93796.1160212266, 95971.6728237902,
98036.3298321095, 99994.6937502293, 101850.840164951, 103608.408563905,
105270.675078771, 106840.60926587, 108320.919172912, 109714.087632186,
111022.401864165, 112247.977899771, 113392.780933102, 114458.642437768,
115447.274680314, 116360.283118863, 117199.177067588, 117965.378927058,
118660.232219392, 119285.008620154, 119840.914142592, 120329.094601246,
120750.640459457, 121106.591147333, 121397.938922313, 121625.632332819,
121790.579335963, 121893.650112513, 121935.679615911, 121917.46988679,
121839.792160025, 121703.388787634, 121508.974997728, 121257.240507039,
120948.851002351, 120584.449504222, 120164.657624752, 119690.076729762,
119161.289014511, 118578.858501069, 117943.331964511, 117255.239794357,
116515.096796942, 115723.402943843, 114880.644070922, 113987.29253211,
113043.807811626, 112050.637097962, 111008.215822666, 109916.968166633,
108777.307536393, 67472.1905761175, 70580.7621429779, 73302.5874546909,
75706.5864702675, 77834.1231526988, 79713.3753855171, 81365.1952663977,
82805.8644073931, 84048.5730632326, 85104.2982790952, 85982.3650762524,
86690.8252744766, 87236.7242194817, 87626.2949833746, 87865.1036821527,
87958.1607268483, 87910.0076619409, 87724.7860890043, 87406.2931723477,
86958.0269138267, 86383.2235034666, 85684.8884462828, 84865.8227394482,
83928.6450686659, 82875.8107702521, 81709.6281410368, 80432.2725549711,
79045.7987518185, 77552.151591525, 75953.1755121868, 74250.6228859222,
72446.1614324952, 70541.3808230744, 68537.798584461, 66436.8653962784,
64239.9698590982, 61948.4427995644, 59563.561168772, 57086.5515820169,
54518.5935412548, 51860.8223759273, 49114.3319330342, 46280.1770432903,
43359.3757867774, 40352.9115785747, 37261.7350923526, 34086.7660377659,
30828.8948056289, 27488.983993257, 24067.8698209688, 20566.3634495346,
16985.2522073028, 13325.3007348137, 9587.25205390016, 5771.82856756041,
1879.73299626436, -2088.35074420535, -6131.75671876397, -10249.8361987147,
-14441.9569333302), .Dim = c(60L, 2L), .Dimnames = list(NULL,
c("80%", "95%")), .Tsp = c(29, 88, 1), class = c("mts",
"ts", "matrix")), upper = structure(c(69289.674759884, 74915.8072333057,
80788.9855673855, 86869.0141808532, 93129.6521203709, 99552.6081183786,
106124.269767652, 112833.994741689, 119673.158999925, 126634.585138142,
133712.175869179, 140900.666393806, 148195.450008524, 155592.451126622,
163088.030260485, 170678.911278435, 178362.124624152, 186134.962249926,
193994.941323469, 201939.774624305, 209967.346121516, 218075.690621001,
226262.976649151, 234527.491939421, 242867.631033688, 251281.88461778,
259768.830291125, 268327.124531608, 276955.495663662, 285652.73767404,
294417.704748202, 303249.306422807, 312146.503267748, 321108.303025584,
330133.757147894, 339221.957677567, 348372.034433833, 357583.15246325,
366854.509725187, 376185.334984769, 385574.885889976, 395022.447212698,
404527.329236203, 414088.866273707, 423706.415304653, 433379.354716939,
443107.083144745, 452889.018392812, 462724.59643907, 472613.270508444,
482554.510211415, 492547.800741645, 502592.64212756, 512688.548533298,
522835.047604926, 533031.679858227, 543277.998104707, 553573.566912819,
563917.962101668, 574310.770264724, 69670.0535742206, 75823.0829753909,
82355.8037346047, 89205.1836262851, 96330.8327553065, 103704.73438813,
111306.06308777, 119118.541652923, 127128.980558576, 135326.402880273,
143701.483616717, 152246.170951439, 160953.419539271, 169816.996308198,
178831.335142237, 187991.425630358, 197292.726228081, 206731.095333834,
216302.735783307, 226004.149574644, 235832.10051782, 245783.58310782,
255855.796347471, 266046.121551069, 276352.103382299, 286771.433544331,
297301.936663213, 307941.557999181, 318688.352692291, 329540.476304445,
340496.176463526, 351553.785449769, 362711.713592006, 373968.443363436,
385322.524084435, 396772.567154431, 408317.241746781, 419955.270910389,
431685.428029961, 443506.533603539, 455417.452301683, 467417.090277392,
479504.392699952, 491678.341489281, 503937.9532303, 516282.277249338,
528710.393836741, 541221.412601694, 553814.470946882, 566488.732651987,
579243.386556237, 592077.645331285, 604990.74433659, 617981.94055032,
631050.511569476, 644195.754673588, 657416.985946874, 670713.539454249,
684084.766467015, 697530.034734447), .Dim = c(60L, 2L), .Dimnames = list(
NULL, c("80%", "95%")), .Tsp = c(29, 88, 1), class = c("mts",
"ts", "matrix")), x = structure(c(322, 400, 650, 888, 1128,
1694, 2036, 2502, 3089, 3858, 4636, 5883, 7375, 9172, 10149,
12462, 15113, 17660, 21157, 24747, 27980, 31506, 35713, 41035,
47021, 53578, 59138, 63919), .Tsp = c(1, 28, 1), class = "ts"),
series = "corona_total$Total_Cases", fitted = structure(c(321.855997237055,
400.257594259049, 480.37007586837, 928.455716409227, 1124.01471745929,
1368.33088042319, 2313.93350897932, 2340.94139260317, 2988.51458623753,
3696.01826560275, 4657.11011850993, 5415.48896190434, 7207.59145923711,
8907.53285184028, 11019.4592645359, 10990.3390264937, 14996.0281226882,
17819.9187915184, 20189.7942179943, 24811.1682010133, 28352.3859396781,
31153.9378444613, 35080.4739819968, 40032.6647840948, 46541.4658359262,
53116.8523004977, 60229.4663608196, 64533.0561090418), .Tsp = c(1,
28, 1), class = "ts"), residuals = structure(c(0.144002762945477,
-0.257594259049227, 169.62992413163, -40.455716409227, 3.98528254071288,
325.669119576814, -277.933508979317, 161.058607396831, 100.485413762468,
161.981734397248, -21.1101185099251, 467.511038095663, 167.408540762885,
264.467148159716, -870.459264535865, 1471.66097350626, 116.971877311758,
-159.918791518434, 967.205782005673, -64.1682010133445, -372.385939678148,
352.062155538701, 632.526018003249, 1002.33521590517, 479.534164073812,
461.147699502253, -1091.4663608196, -614.056109041783), .Tsp = c(1,
28, 1), class = "ts")), class = "forecast")
This is the code I used to make the plot (ignore the dotted exponential curve):
plot(forecast, shaded = TRUE, shadecols=NULL, lambda = NULL, col = 1, fcol = 4, pi.col=1,
pi.lty=2, ylim = NULL, main = "Out-of-Sample Forecast", ylab = "Number of Cases",
xlab = "Days (since 23/03/2020)") + abline(v = 28:29, col= "#FF000033", lty=1, lwd=5)
Output:
Can anyone please help me with this?
I couldn't load your object in my R session, so I'm assuming your plot works like a regular one.
You have 2 options.
Either you set options(scipen = 10) (or some high value), which is a quick fix, but if you need some plots with scientific notation and others without on the same graphics window, this will not work.
You define the axis yourself, with the format you need.
You can use axTicks(2) to get the position of default ticks and then format the labels as you need.
I recommend option 2. Here's a quick example :
x <- seq(1,10, l = 100)
y <- x*1e5
par(mfrow = c(1,2))
plot(x, y, main = "custom axis", yaxt = "n")
ticks <- axTicks(2) # get axis ticks
axis(2, at = ticks, labels = formatC(ticks, format = 'd')) # make axis
plot(x, y, main = "default axis")
Outputs :
You can take a look at other potential options in the answers to this post
To draw arrows in ggplot, I use geom_segment and arrow=arrow().
I would like the arrow head size to match the segment width (or size).
However, arrow does not recognize variables directly from the data argument in ggplot and one must specify data.frame containing the variable using the $ operator. This causes a disjunct between the values used for plotting the line and those used for plotting the arrow head (the largest arrow head can be on the thinest segment).
Example:
d <- structure(list(Process = structure(c(2L, 1L, 1L, 1L, 2L, 2L,
1L, 1L, 2L, 1L, 2L), .Label = c("First", "Second"), class = "factor"),
x.sink = c(1, 3, 1, 2, 2, 3, 3, 2, 2, 2, 2), y.sink = c(1,
1, 1, 2, 2, 1, 1, 1, 1, 2, 2), x.source = c(2, 2, 2, 2, 2,
2, 2, 1, 1, 1, 3), y.source = c(2, 2, 2, 1, 1, 1, 1, 1, 1,
2, 1), offset = c(1, 1, 1, -1, -1, -1, -1, -1, -1, 1, -1),
Std.Flux = c(0.179487179487179, 0.170940170940171, 0.944444444444444,
0.0854700854700855, 0.726495726495726, 0.128205128205128,
0.213675213675214, 0.213675213675214, 0.128205128205128,
0.106837606837607, 1)), .Names = c("Process", "x.sink", "y.sink",
"x.source", "y.source", "offset", "Std.Flux"), class = "data.frame", row.names = c(NA,
-11L))
p <- qplot(data=d,
#alpha=I(0.4),
colour=Process,
size=Std.Flux,
xlim=c(0,4),
ylim=c(0,3),
x=x.source+as.numeric(Process)/10,
y=y.source+as.numeric(Process)/10,
xend=x.sink+as.numeric(Process)/10,
yend=y.sink+as.numeric(Process)/10,
geom="segment",
arrow = arrow(type="closed",
length = unit(d$Std.Flux,"cm")))
print(p)
Any suggestions?
Here's one way:
require(ggplot2)
df <- mtcars
arrow_pos <- data.frame(y = 250)
ggplot(df, aes(x=factor(cyl), y=mpg)) +
geom_bar(width = .4, stat="identity", fill="darkblue") +
geom_segment(data=arrow_pos,
aes(x=1.526, xend=1.01, y=y + 90.02, yend=y + 0.25),
arrow=arrow(length=unit(4.2, "mm")), lwd=2,
color="black") +
geom_segment(data=arrow_pos,
aes(x=1.525, xend=1.01, y=y + 90, yend=y + 0.25),
arrow=arrow(length=unit(4, "mm")), lwd=1,
color="gold2") +
annotate("text", x=2.39, y=360,
label='This arrow points to the highest MPG.') +
scale_y_continuous(limits = c(0,400)) +
xlab('CYL') + ylab('MPG')
Output:
Must have been fixed in the last 8 years :)
Here translated into a call to ggplot()
library(ggplot2)
ggplot(d, aes(colour=Process, size=Std.Flux)) +
geom_segment(aes(x=x.source+as.numeric(Process)/10,
y=y.source+as.numeric(Process)/10,
xend=x.sink+as.numeric(Process)/10,
yend=y.sink+as.numeric(Process)/10),
arrow = arrow(type="closed",
length = unit(d$Std.Flux,"cm")))