overlapping the predicted time series on the original series in R

overlapping the predicted time series on the original series in R - r

I perform forecast
w=read.csv("C:/Users/admin/Documents/aggrmonth.csv", sep=";",dec=",")
w
#create time series object
w=ts(w$new,frequency = 12,start=c(2015,1))
w
#timeplot
plot.ts(w)
#forecast for the next months
library("forecast")
m <- stats::HoltWinters(w)
test=forecast:::forecast.HoltWinters(m,h=4) #h is how much month do you want to predict
test
now i want get forecast for 4 months ahead.
From 01.2017-04.2017. I this know original values.
1-Jan-17 1020
1-Feb-17 800
1-Mar-17 1130
1-Apr-17 600
But i need get plot where displayed predicted values with CI are overlapped with original value.
Of course if i don't clearly exlplain, i attached the plot.
The green curve is the initial value of the series(my 4 months)
and green dotted line is predictied values are overlapped on original values.
Dashes on the predicted dotted curve are confidence intervals.
How to create such plot
w=
structure(list(yearMon = structure(c(9L, 7L, 15L, 1L, 17L, 13L,
11L, 3L, 23L, 21L, 19L, 5L, 10L, 8L, 16L, 2L, 18L, 14L, 12L,
4L, 24L, 22L, 20L, 6L), .Label = c("1-Apr-15", "1-Apr-16", "1-Aug-15",
"1-Aug-16", "1-Dec-15", "1-Dec-16", "1-Feb-15", "1-Feb-16", "1-Jan-15",
"1-Jan-16", "1-Jul-15", "1-Jul-16", "1-Jun-15", "1-Jun-16", "1-Mar-15",
"1-Mar-16", "1-May-15", "1-May-16", "1-Nov-15", "1-Nov-16", "1-Oct-15",
"1-Oct-16", "1-Sep-15", "1-Sep-16"), class = "factor"), new = c(8575L,
8215L, 16399L, 16415L, 15704L, 19805L, 17484L, 18116L, 19977L,
14439L, 9258L, 12259L, 4909L, 9539L, 8802L, 11253L, 11971L, 7838L,
2095L, 4157L, 3910L, 1306L, 3429L, 1390L)), .Names = c("yearMon",
"new"), class = "data.frame", row.names = c(NA, -24L))

We can use ggfortify to create a data frame then plot both timeseries with ggplot2
# Load required libraries
library(lubridate)
library(magrittr)
library(tidyverse)
library(scales)
library(forecast)
library(ggfortify)
w <- structure(list(yearMon = structure(c(9L, 7L, 15L, 1L, 17L, 13L,
11L, 3L, 23L, 21L, 19L, 5L, 10L, 8L, 16L, 2L, 18L, 14L, 12L,
4L, 24L, 22L, 20L, 6L), .Label = c("1-Apr-15", "1-Apr-16", "1-Aug-15",
"1-Aug-16", "1-Dec-15", "1-Dec-16", "1-Feb-15", "1-Feb-16", "1-Jan-15",
"1-Jan-16", "1-Jul-15", "1-Jul-16", "1-Jun-15", "1-Jun-16", "1-Mar-15",
"1-Mar-16", "1-May-15", "1-May-16", "1-Nov-15", "1-Nov-16", "1-Oct-15",
"1-Oct-16", "1-Sep-15", "1-Sep-16"), class = "factor"), new = c(8575L,
8215L, 16399L, 16415L, 15704L, 19805L, 17484L, 18116L, 19977L,
14439L, 9258L, 12259L, 4909L, 9539L, 8802L, 11253L, 11971L, 7838L,
2095L, 4157L, 3910L, 1306L, 3429L, 1390L)), .Names = c("yearMon",
"new"), class = "data.frame", row.names = c(NA, -24L))
# create time series object
w = ts(w$new, frequency = 12, start=c(2015, 1))
w
#> Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov
#> 2015 8575 8215 16399 16415 15704 19805 17484 18116 19977 14439 9258
#> 2016 4909 9539 8802 11253 11971 7838 2095 4157 3910 1306 3429
#> Dec
#> 2015 12259
#> 2016 1390
# forecast for the next months
m <- stats::HoltWinters(w)
# h is how much month do you want to predict
pred = forecast:::forecast.HoltWinters(m, h=4)
pred
#> Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
#> Jan 2017 -5049.00381 -9644.003 -454.0045 -12076.449 1978.441
#> Feb 2017 37.44605 -5599.592 5674.4843 -8583.660 8658.552
#> Mar 2017 -256.41474 -6770.890 6258.0601 -10219.444 9706.615
#> Apr 2017 2593.09445 -4693.919 9880.1079 -8551.431 13737.620
# plot
plot(pred, include = 24, showgap = FALSE)
# Convert pred from list to data frame object
df1 <- fortify(pred) %>% as_tibble()
# Create Date column, remove Index column and rename other columns
df1 %<>%
mutate(Date = as.Date(Index, "%Y-%m-%d")) %>%
select(-Index) %>%
rename("Low95" = "Lo 95",
"Low80" = "Lo 80",
"High95" = "Hi 95",
"High80" = "Hi 80",
"Forecast" = "Point Forecast")
df1
#> # A tibble: 28 x 8
#> Data Fitted Forecast Low80 High80 Low95 High95 Date
#> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <date>
#> 1 8575 NA NA NA NA NA NA 2015-01-01
#> 2 8215 NA NA NA NA NA NA 2015-02-01
#> 3 16399 NA NA NA NA NA NA 2015-03-01
#> 4 16415 NA NA NA NA NA NA 2015-04-01
#> 5 15704 NA NA NA NA NA NA 2015-05-01
#> 6 19805 NA NA NA NA NA NA 2015-06-01
#> 7 17484 NA NA NA NA NA NA 2015-07-01
#> 8 18116 NA NA NA NA NA NA 2015-08-01
#> 9 19977 NA NA NA NA NA NA 2015-09-01
#> 10 14439 NA NA NA NA NA NA 2015-10-01
#> # ... with 18 more rows
### Avoid the gap between data and forcast
# Find the last non missing NA values in obs then use that
# one to initialize all forecast columns
lastNonNAinData <- max(which(complete.cases(df1$Data)))
df1[lastNonNAinData,
!(colnames(df1) %in% c("Data", "Fitted", "Date"))] <- df1$Data[lastNonNAinData]
ggplot(df1, aes(x = Date)) +
geom_ribbon(aes(ymin = Low95, ymax = High95, fill = "95%")) +
geom_ribbon(aes(ymin = Low80, ymax = High80, fill = "80%")) +
geom_point(aes(y = Data, colour = "Data"), size = 4) +
geom_line(aes(y = Data, group = 1, colour = "Data"),
linetype = "dotted", size = 0.75) +
geom_line(aes(y = Fitted, group = 2, colour = "Fitted"), size = 0.75) +
geom_line(aes(y = Forecast, group = 3, colour = "Forecast"), size = 0.75) +
scale_x_date(breaks = scales::pretty_breaks(), date_labels = "%b %y") +
scale_colour_brewer(name = "Legend", type = "qual", palette = "Dark2") +
scale_fill_brewer(name = "Intervals") +
guides(colour = guide_legend(order = 1), fill = guide_legend(order = 2)) +
theme_bw(base_size = 14)
Edit: To included known values from "2017-01-01" to "2017-04-01"
# Create new column which has known values
df1$Obs <- NA
df1$Obs[(nrow(df1)-3):(nrow(df1))] <- c(1020, 800, 1130, 600)
ggplot(df1, aes(x = Date)) +
geom_ribbon(aes(ymin = Low95, ymax = High95, fill = "95%")) +
geom_ribbon(aes(ymin = Low80, ymax = High80, fill = "80%")) +
geom_point(aes(y = Data, colour = "Data"), size = 4) +
geom_line(aes(y = Data, group = 1, colour = "Data"),
linetype = "dotted", size = 0.75) +
geom_line(aes(y = Fitted, group = 2, colour = "Fitted"), size = 0.75) +
geom_line(aes(y = Forecast, group = 3, colour = "Forecast"), size = 0.75) +
scale_x_date(breaks = scales::pretty_breaks(), date_labels = "%b %y") +
scale_colour_brewer(name = "Legend", type = "qual", palette = "Dark2") +
scale_fill_brewer(name = "Intervals") +
guides(colour = guide_legend(order = 1), fill = guide_legend(order = 2)) +
theme_bw(base_size = 14) +
geom_line(aes(y = Obs, group = 4, colour = "Obs"), linetype = "dotted", size = 0.75)
Or put those values directly into Data column
df1$Data[(nrow(df1)-3):(nrow(df1))] <- c(1020, 800, 1130, 600)
ggplot(df1, aes(x = Date)) +
geom_ribbon(aes(ymin = Low95, ymax = High95, fill = "95%")) +
geom_ribbon(aes(ymin = Low80, ymax = High80, fill = "80%")) +
geom_point(aes(y = Data, colour = "Data"), size = 3) +
geom_line(aes(y = Data, group = 1, colour = "Data"),
linetype = "dotted", size = 0.75) +
geom_line(aes(y = Fitted, group = 2, colour = "Fitted"), size = 0.75) +
geom_line(aes(y = Forecast, group = 3, colour = "Forecast"), size = 0.75) +
scale_x_date(breaks = scales::pretty_breaks(), date_labels = "%b %y") +
scale_colour_brewer(name = "Legend", type = "qual", palette = "Dark2") +
scale_fill_brewer(name = "Intervals") +
guides(colour = guide_legend(order = 1), fill = guide_legend(order = 2)) +
theme_bw(base_size = 14)
Created on 2018-04-21 by the reprex package (v0.2.0).

Related

Convert data frame to time series for plotting

I have the dataframe:
year sum1
<chr> <dbl>
1 1979 4
2 1980 14
3 1981 14
4 1982 13
5 1983 6
6 1984 15
7 1985 15
8 1986 7
9 1987 10
10 1988 12
and I would like to convert it to time series in order to make a plot. Any idea?

You only need to convert the year to numeric rather than character
DF2 <- transform(DF, year = as.numeric(year))
plot(DF2, type = "l", lwd = 2, lty = 2, col = "blue",
xlab = "year", ylab = "value", main = "my title")
If you do want to convert it to a time series then convert it to a zoo series and use classic graphics or ggplot2 (autoplot).
library(zoo)
z <- read.zoo(DF, FUN = as.numeric)
plot(z, type = "l", lwd = 2, lty = 2, col = "blue",
xlab = "year", ylab = "value", main = "my title")
library(ggplot2)
autoplot(z, linetype = I("dashed"), lwd = I(1.5), col = I("blue")) +
xlab("year") +
ylab("value") +
ggtitle("my title")
Note
Lines <- "year sum1
1 1979 4
2 1980 14
3 1981 14
4 1982 13
5 1983 6
6 1984 15
7 1985 15
8 1986 7
9 1987 10
10 1988 12"
DF <- read.table(text = Lines, colClasses = c(year = "character"))

Convert as.xts and plot. First complete the dates with January 1.
my_ts <- xts::as.xts(dat$sum1, order.by=as.Date(paste0(dat$year, '-01-01')))
plot(my_ts)
Data:
dat <- structure(list(year = 1979:1988, sum1 = c(4L, 14L, 14L, 13L,
6L, 15L, 15L, 7L, 10L, 12L)), class = "data.frame", row.names = 1979:1988)

We could use tsibble package to transfrom dataframe to times series obejct:
library(tsibble)
library(fable)
df %>%
as_tsibble(index = year) %>%
autoplot(vars(sum1))

Proper x axis scale with years only

I have a grid with two plots, each one consist of two time series of mean values: one come from an elaboration with R df5 the other one mmzep is not (I received this dataset already calculated).
library(dplyr)
library(lubridate)
df5 <- data.frame(df$Date, df$Price)
colnames(df5)<- c("date","price")
df5$date <- as.Date(df5$date,"%Y/%m/%d")
df5$price<- as.numeric(gsub(",",".",df5$price))
colnames(mmzep)<- c("date","Mar","Apr")
Then, I created other two dfs from df5 , I tried to group in only one df, but I was not able to do it.
meanM <- df5 %>%
mutate(Month = month(date), Year = year(date)) %>%
filter(month(df5$date) %in% 3 & year(df5$date) %in% 2010:2019) %>%
group_by(Year, Month) %>%
summarise_all(list(mean=mean, sd=sd), na.rm=TRUE) %>%
na.omit()
Year Month date_mean price_mean date_sd price_sd
<dbl> <dbl> <date> <dbl> <dbl> <dbl>
1 2010 3 2010-03-23 1082. 5.48 685.
2 2012 3 2012-03-27 858. 2.74 333.
3 2015 3 2015-03-16 603. 8.86 411.
4 2017 3 2017-03-15 674. 9.65 512.
5 2018 3 2018-03-16 318. 9.09 202.
6 2019 3 2019-03-14 840. 9.42 329.
meanA <- df5 %>%
mutate(Month = month(date), Year = year(date)) %>%
filter(month(df5$date) %in% 4 & year(df5$date) %in% 2010:2019) %>%
group_by(Year, Month) %>%
summarise_all(list(mean=mean, sd=sd), na.rm=TRUE) %>%
na.omit()
Year Month date_mean price_mean date_sd price_sd
<dbl> <dbl> <date> <dbl> <dbl> <dbl>
1 2010 4 2010-04-18 361. 9.00 334.
2 2011 4 2011-04-14 527. 8.36 312.
3 2012 4 2012-04-15 726. 8.80 435.
4 2013 4 2013-04-16 872. 8.50 521.
5 2014 4 2014-04-09 668. 5.34 354.
6 2015 4 2015-04-15 689. 8.80 436.
7 2017 4 2017-04-15 806. 8.80 531.
8 2018 4 2018-04-15 727. 8.80 291.
9 2019 4 2019-04-15 600. 8.94 690.
#mmzep
date Mar Apr
<dbl> <dbl> <dbl>
1 2010 793. 540
2 2011 650 378.
3 2012 813. 612.
4 2013 755. 717
5 2014 432. 634
6 2015 474. 782.
7 2016 590 743.
8 2017 544. 628
9 2018 249. 781
10 2019 547. 393
I plot the dfs
g5 = ggplot() +
geom_point(data=meanM, aes(x = (Year), y = (price_mean)),size = 3, colour="gray40") +
geom_point(data=mmzep, aes(x= (date), y=(Mar)), size =3, colour = "red") +
geom_line(data=meanM, aes(group = 1, x = (Year), y = (price_mean)), colour="gray40") +
geom_line(data=mmzep, aes(x = (date), y = (Mar)), colour="red") +
stat_smooth(data=meanM,aes(group = 1, x = (Year), y = (price_mean)),
method = "lm", size = 1, se = FALSE, formula = y ~ x,
colour = "black") +
stat_smooth(data=mmzep, aes(x = (date), y = (Mar)),
method = "lm", size = 1, se = FALSE, formula = y ~ x,
colour = "red3") +
scale_y_continuous(expand = c(0, 0), limits = c(0, 1500)) +
theme(panel.background = element_rect(fill = 'white', colour = 'black'),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.ticks.length = unit(-0.25, "lines"),
plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
axis.text.x = element_text(margin = margin(t = 0.25, unit = "cm")),
axis.text.y = element_text(margin = margin(r = 0.25, unit = "cm"))) +
labs(y = expression(March),
x = NULL) +
theme(axis.text.x = element_text(size=10),
axis.title = element_text(size=10))
I plot g5 and g6 in the same way, than the grid, to obtain this:
enter image description here
As you can see the x axis is not correct, I tried scale_x_date(breaks="year", labels=date_format("%Y")) , scale_x_discrete(labels=c("2010","2011","2012","2013","2014","2015","2016","2017","2018","2019")), scale_x_continuous in different ways.
I also tried mmzep$date <- as.Date(mmzep$date,"%Y") but I saw the R needs a day (in my case a day and a month?) mmzep$date <- as.Date(paste("01", mmzep$date, sep="/"), "%d/%m/%Y") , but R substitutes the years with NA. I think that the errors is in the the way R see the date in mmzep, but I don't understand how can I made R recognized the correct object.
Anyone have any suggestion? Thanks in advance!

There are a few ways to do this. In your data, your year values are stored as type double. This tells ggplot that you have a continuous variable. If you want to leave your data as is, then the solution is
+ scale_x_continuous(breaks = seq(2010, 2020, 2))
# or something else that expressly lists the years you want to see on the axis.
You cannot use scale_x_date without your year data being converted to a date. You can do that with, for example
MeanM$Year <- as.Date(paste(MeanM$Year, "01", "01", sep = "/"))
Then you can use
+ scale_x_date(date_labels = "%Y")
Or you can convert your years into discrete data with factor. You cannot use scale_x_discrete on a continuous variable.
MeanM$Year <- factor(MeanM$Year)
And then use
+ scale_x_discrete()

Try this approach tested on MeanM without using mmzep which we do not have data. The issue is that as you are using multiple geom the functions are adding strange labels to axis. Changing all x-axis variables to factor can alleviate the issue. In the case of mmzep with aes(x= (date),..) also be careful on formating the date as year with a code like this aes(x= factor(format(date,'%Y')) so that all labels fit well into axis. Here the code:
#Code
ggplot() +
geom_point(data=meanM, aes(x = factor(Year), y = (price_mean)),size = 3, colour="gray40") +
geom_line(data=meanM, aes(group = 1, x = factor(Year), y = (price_mean)), colour="gray40") +
stat_smooth(data=meanM,aes(group = 1, x = factor(Year), y = (price_mean)),
method = "lm", size = 1, se = FALSE, formula = y ~ x,
colour = "black") +
scale_y_continuous(expand = c(0, 0), limits = c(0, 1500)) +
theme(panel.background = element_rect(fill = 'white', colour = 'black'),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.ticks.length = unit(-0.25, "lines"),
plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
axis.text.x = element_text(margin = margin(t = 0.25, unit = "cm")),
axis.text.y = element_text(margin = margin(r = 0.25, unit = "cm"))) +
labs(y = expression(March),
x = NULL) +
theme(axis.text.x = element_text(size=10),
axis.title = element_text(size=10))
Output:
Some data used:
#Data
meanM <- structure(list(Year = c(2010L, 2012L, 2015L, 2017L, 2018L, 2019L
), Month = c(3L, 3L, 3L, 3L, 3L, 3L), date_mean = c("23/03/2010",
"27/03/2012", "16/03/2015", "15/03/2017", "16/03/2018", "14/03/2019"
), price_mean = c(1082L, 858L, 603L, 674L, 318L, 840L), date_sd = c(5.48,
2.74, 8.86, 9.65, 9.09, 9.42), price_sd = c(685L, 333L, 411L,
512L, 202L, 329L), Year2 = structure(1:6, .Label = c("2010",
"2012", "2015", "2017", "2018", "2019"), class = "factor")), row.names = c(NA,
-6L), class = "data.frame")

Can't display lines and points with ggplot2

I have a dataframe df as follows:
structure(list(date = structure(c(1L, 13L, 16L, 19L, 22L, 25L,
28L, 31L, 34L, 4L, 7L, 10L, 2L, 14L, 17L, 20L, 23L, 26L, 29L,
32L, 35L, 5L, 8L, 11L, 3L, 15L, 18L, 21L, 24L, 27L, 30L, 33L,
36L, 6L, 9L, 12L), .Label = c("1/1/2010", "1/1/2011", "1/1/2012",
"10/1/2010", "10/1/2011", "10/1/2012", "11/1/2010", "11/1/2011",
"11/1/2012", "12/1/2010", "12/1/2011", "12/1/2012", "2/1/2010",
"2/1/2011", "2/1/2012", "3/1/2010", "3/1/2011", "3/1/2012", "4/1/2010",
"4/1/2011", "4/1/2012", "5/1/2010", "5/1/2011", "5/1/2012", "6/1/2010",
"6/1/2011", "6/1/2012", "7/1/2010", "7/1/2011", "7/1/2012", "8/1/2010",
"8/1/2011", "8/1/2012", "9/1/2010", "9/1/2011", "9/1/2012"), class = "factor"),
a = c(NA, 365.07, 653.19, 980.72, 1455.6, 1867.07, 2036.92,
2372.84, 2693.96, 2973.04, 3227.23, 3678.01, NA, 555.51,
1058.18, 1539.01, 2102.23, 2769.65, 3146.88, 3604.71, 4043.18,
4438.55, 4860.76, 5360.94, NA, 594.67, 1287.05, 1666.5, 2362.27,
2818.16, 3226, 3924.67, 4295.79, 4751.97, 5410.37, 5986.46
), b = c(NA, 158.18, 268.53, 331.81, 434.19, 538.49, 606.62,
651.46, 736.55, 890.81, 981.65, 1748.44, NA, 227.68, 366.95,
486.41, 614.75, 729.44, 836.46, 929.72, 1092.73, 1222.48,
1409.07, 2179.42, NA, 172.99, 359.8, 478.05, 597.88, 660.4,
823.61, 924.57, 1020.33, 1189.15, 1347.44, 2315.36), ratio_a = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 52.17, 62,
56.93, 44.42, 48.34, 54.49, 51.92, 50.08, 49.29, 50.62, 45.76,
NA, 7.05, 21.63, 8.28, 12.37, 1.75, 2.51, 8.88, 6.25, 7.06,
11.31, 11.67), ratio_b = c(NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, 43.94, 36.65, 46.59, 41.59, 35.46, 37.89,
42.71, 48.36, 37.23, 43.54, 24.65, NA, -24.02, -1.95, -1.72,
-2.74, -9.46, -1.54, -0.55, -6.63, -2.73, -4.37, 6.24)), class = "data.frame", row.names = c(NA,
-36L))
Out:
date a b ratio_a ratio_b
0 1/1/2010 NaN NaN NaN NaN
1 2/1/2010 365.07 158.18 NaN NaN
2 3/1/2010 653.19 268.53 NaN NaN
3 4/1/2010 980.72 331.81 NaN NaN
4 5/1/2010 1455.60 434.19 NaN NaN
5 6/1/2010 1867.07 538.49 NaN NaN
6 7/1/2010 2036.92 606.62 NaN NaN
7 8/1/2010 2372.84 651.46 NaN NaN
8 9/1/2010 2693.96 736.55 NaN NaN
9 10/1/2010 2973.04 890.81 NaN NaN
10 11/1/2010 3227.23 981.65 NaN NaN
11 12/1/2010 3678.01 1748.44 NaN NaN
12 1/1/2011 NaN NaN NaN NaN
13 2/1/2011 555.51 227.68 52.17 43.94
14 3/1/2011 1058.18 366.95 62.00 36.65
15 4/1/2011 1539.01 486.41 56.93 46.59
16 5/1/2011 2102.23 614.75 44.42 41.59
17 6/1/2011 2769.65 729.44 48.34 35.46
18 7/1/2011 3146.88 836.46 54.49 37.89
19 8/1/2011 3604.71 929.72 51.92 42.71
20 9/1/2011 4043.18 1092.73 50.08 48.36
21 10/1/2011 4438.55 1222.48 49.29 37.23
22 11/1/2011 4860.76 1409.07 50.62 43.54
23 12/1/2011 5360.94 2179.42 45.76 24.65
24 1/1/2012 NaN NaN NaN NaN
25 2/1/2012 594.67 172.99 7.05 -24.02
26 3/1/2012 1287.05 359.80 21.63 -1.95
27 4/1/2012 1666.50 478.05 8.28 -1.72
28 5/1/2012 2362.27 597.88 12.37 -2.74
29 6/1/2012 2818.16 660.40 1.75 -9.46
30 7/1/2012 3226.00 823.61 2.51 -1.54
31 8/1/2012 3924.67 924.57 8.88 -0.55
32 9/1/2012 4295.79 1020.33 6.25 -6.63
33 10/1/2012 4751.97 1189.15 7.06 -2.73
34 11/1/2012 5410.37 1347.44 11.31 -4.37
35 12/1/2012 5986.46 2315.36 11.67 6.24
I'm trying to use the code below to plot a and b for y axis left with barchart, ratio_a and ratio_b for y axis right with lines and point:
library(ggplot2)
library(dplyr)
df$date <- as.Date(df$date, format = "%m/%d/%Y")
df_m <- melt(df, id.vars='date')
df_m_x <- df_m %>%
filter(variable %in% c("a", 'b'))
df_m_ratio_x <- df_m %>%
filter(variable %in% c("ratio_a", 'ratio_b')) %>%
mutate(value = value * 80)
coeff = 1/80
ggplot() +
geom_bar(data = df_m_x, aes(x = date, y = value, fill = variable), alpha = 0.6, position = 'dodge', stat = 'identity') +
geom_line(data = df_m_ratio_x, aes(x = date, y = value, linetype = variable, col = variable), alpha = 1, size = 1.5) +
geom_point(data = df_m_ratio_x, aes(x = date, y = value, col = variable), size = 3) +
scale_y_continuous(
name = "㎡",
sec.axis = sec_axis(~.*coeff, name = "%")) +
scale_color_manual(values = c("a" = "#E7B800", "b" = "#FC4E07")) +
theme(
legend.title = element_blank(),
legend.position = "bottom",
panel.grid.major = element_line(colour = "grey99"),
panel.border = element_rect(colour = "grey95", fill=NA),
panel.background = element_blank(),
legend.text = element_text(size = 18),
) +
scale_x_date(breaks = date_breaks("6 months"), date_labels = "%Y-%m")
Out:
Removed 6 rows containing missing values (geom_bar).
Removed 72 row(s) containing missing values (geom_path).
Removed 72 rows containing missing values (geom_point).
It works for bar charts, however it doesn't display lines and points for ratio_a and ratio_b? Does anyone can help me find out why this happens? Thanks a lot.

The issue is that you used the wrong labels in scale_color_manual. Instead of a and b you have to use ratio_a/b as these are the values of variable in your dataset. Put differently ggplot2 finds no values for ratio_a/b in the color scale and hence they are removed:
library(ggplot2)
library(reshape2)
library(dplyr)
library(scales)
df$date <- as.Date(df$date, format = "%m/%d/%Y")
df_m <- melt(df, id.vars = "date")
df_m_x <- df_m %>%
filter(variable %in% c("a", "b"))
df_m_ratio_x <- df_m %>%
filter(variable %in% c("ratio_a", "ratio_b")) %>%
mutate(value = value * 80)
coeff <- 1 / 80
ggplot() +
geom_bar(data = df_m_x, aes(x = date, y = value, fill = variable), alpha = 0.6, position = "dodge", stat = "identity") +
geom_line(data = df_m_ratio_x, aes(x = date, y = value, linetype = variable, col = variable), alpha = 1, size = 1.5) +
geom_point(data = df_m_ratio_x, aes(x = date, y = value, col = variable), size = 3) +
scale_y_continuous(
name = "<U+33A1>",
sec.axis = sec_axis(~ . * coeff, name = "%")
) +
scale_color_manual(values = c("ratio_a" = "#E7B800", "ratio_b" = "#FC4E07")) +
theme(
legend.title = element_blank(),
legend.position = "bottom",
panel.grid.major = element_line(colour = "grey99"),
panel.border = element_rect(colour = "grey95", fill = NA),
panel.background = element_blank(),
legend.text = element_text(size = 18),
) +
scale_x_date(breaks = date_breaks("6 months"), date_labels = "%Y-%m")
#> Warning: Removed 6 rows containing missing values (geom_bar).
#> Warning: Removed 26 row(s) containing missing values (geom_path).
#> Warning: Removed 28 rows containing missing values (geom_point).

Nested donut chart for comparison

I am trying to make two donut plots to compare some metrics. The data frame is as below,
new_sum var `1` `2`
<dbl> <chr> <dbl> <dbl>
1 98.7 cnt_alerts 45.1 NA
2 98.7 cnt_incidents_total 15.6 NA
3 98.7 sum_of_events 100 NA
4 100 cnt_alerts NA 44.4
5 100 cnt_incidents_total NA 16.2
6 100 sum_of_events NA 100
So the two plots should represent the 1 and 2 columns, but the row sum_of_events should be taken from new_sum column. So in the end the two plots will look as follows (Trying to replicate with paint)
DATA
structure(list(new_sum = c(98.7093505166464, 98.7093505166464,
98.7093505166464, 100, 100, 100), var = c("cnt_alerts", "cnt_incidents_total",
"sum_of_events", "cnt_alerts", "cnt_incidents_total", "sum_of_events"
), `1` = c(45.0519047096481, 15.6423424701131, 100, NA, NA, NA
), `2` = c(NA, NA, NA, 44.4483592005942, 16.201786624667, 100
)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-6L))

Something like this?
library(tidyverse)
df1 %>%
mutate(
id = rep(1:2, each = 3),
value = coalesce(`1`, `2`),
value = ifelse(var == "sum_of_events", new_sum, value)
) %>%
ggplot(aes(var)) +
geom_col(aes(y = 100), position = 'identity', fill = 'white', col = 1, width = 0.5) +
geom_col(aes(y = value), position = 'identity', fill = 'grey60', col = 1, width = 0.5) +
facet_grid(~id) +
coord_polar(theta = 'y') +
theme_minimal()

Issue with ggplot with discrete x-axis in date format (yyyy-mm-dd)

I have a data frame like below. I need to plot a line plot using ggplot each line showing for each year (i.e. 2014, 2015 etc) with different colors for each year.
I cannot connect the points by a line with my code showing below. But, it does plot the points only with different colors for each year. This may be a simple thing but I just cannot figure this out.
library(reshape2)
library(ggplot2)
plot.data <- melt(Table_1, id.vars = 'Day Obs')
ggplot(plot.data, aes(x = `Day Obs`, y = value)) +
geom_line(mapping = aes(x = `Day Obs`, y = value, colour = variable),size=1.0) +
geom_point(mapping = aes(x = `Day Obs`, y = value, colour = variable),size=2.3)
Table_1:
Day Obs 2014 2015 2016 2017 2018
2018-08-01 NA NA NA NA 1.002
2018-08-03 NA 0.85 NA NA NA
2018-08-06 NA NA NA NA 0.9
2018-08-07 NA NA 0.78 0.88 NA
.
.

The issue is that you have missing values i between observations and geom_line then doesn't connect the points (as information is missing). This can be seenfor the year 2018:
library(reshape2)
library(ggplot2)
plot.data <- melt(Table_1, id.vars = 'Day Obs')
plot.data[plot.data$variable == 2018, ]
# Day Obs variable value
# 17 2018-08-01 2018 1.002
# 18 2018-08-03 2018 NA
# 19 2018-08-06 2018 0.900
# 20 2018-08-07 2018 NA
Here the information for 2018-08-03 is missing explicitly. Therfore there is no connection between the points, if we plot them.
ggplot(plot.data, aes(x = `Day Obs`, y = value, colour = variable)) +
geom_line(size = 1.0) +
geom_point(size = 2.3)
You can remove the explicit missing values and it works:
ggplot(plot.data[!is.na(plot.data$value), ],
aes(x = `Day Obs`, y = value, colour = variable)) +
geom_line(size = 1.0) +
geom_point(size = 2.3)
Data
Table_1 <- structure(list(`Day Obs` = structure(c(17744, 17746, 17749, 17750), class = "Date"),
`2014` = c(NA, NA, NA, NA),
`2015` = c(NA, 0.85, NA, NA),
`2016` = c(NA, NA, NA, 0.78),
`2017` = c(NA, NA, NA, 0.88),
`2018` = c(1.002, NA, 0.9, NA)),
row.names = c(NA, -4L), class = "data.frame")

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

overlapping the predicted time series on the original series in R - r

Related

Convert data frame to time series for plotting

Proper x axis scale with years only

Can't display lines and points with ggplot2

Nested donut chart for comparison

Issue with ggplot with discrete x-axis in date format (yyyy-mm-dd)

Categories

Resources