Proper x axis scale with years only - r

I have a grid with two plots, each one consist of two time series of mean values: one come from an elaboration with R df5 the other one mmzep is not (I received this dataset already calculated).
library(dplyr)
library(lubridate)
df5 <- data.frame(df$Date, df$Price)
colnames(df5)<- c("date","price")
df5$date <- as.Date(df5$date,"%Y/%m/%d")
df5$price<- as.numeric(gsub(",",".",df5$price))
colnames(mmzep)<- c("date","Mar","Apr")
Then, I created other two dfs from df5 , I tried to group in only one df, but I was not able to do it.
meanM <- df5 %>%
mutate(Month = month(date), Year = year(date)) %>%
filter(month(df5$date) %in% 3 & year(df5$date) %in% 2010:2019) %>%
group_by(Year, Month) %>%
summarise_all(list(mean=mean, sd=sd), na.rm=TRUE) %>%
na.omit()
Year Month date_mean price_mean date_sd price_sd
<dbl> <dbl> <date> <dbl> <dbl> <dbl>
1 2010 3 2010-03-23 1082. 5.48 685.
2 2012 3 2012-03-27 858. 2.74 333.
3 2015 3 2015-03-16 603. 8.86 411.
4 2017 3 2017-03-15 674. 9.65 512.
5 2018 3 2018-03-16 318. 9.09 202.
6 2019 3 2019-03-14 840. 9.42 329.
meanA <- df5 %>%
mutate(Month = month(date), Year = year(date)) %>%
filter(month(df5$date) %in% 4 & year(df5$date) %in% 2010:2019) %>%
group_by(Year, Month) %>%
summarise_all(list(mean=mean, sd=sd), na.rm=TRUE) %>%
na.omit()
Year Month date_mean price_mean date_sd price_sd
<dbl> <dbl> <date> <dbl> <dbl> <dbl>
1 2010 4 2010-04-18 361. 9.00 334.
2 2011 4 2011-04-14 527. 8.36 312.
3 2012 4 2012-04-15 726. 8.80 435.
4 2013 4 2013-04-16 872. 8.50 521.
5 2014 4 2014-04-09 668. 5.34 354.
6 2015 4 2015-04-15 689. 8.80 436.
7 2017 4 2017-04-15 806. 8.80 531.
8 2018 4 2018-04-15 727. 8.80 291.
9 2019 4 2019-04-15 600. 8.94 690.
#mmzep
date Mar Apr
<dbl> <dbl> <dbl>
1 2010 793. 540
2 2011 650 378.
3 2012 813. 612.
4 2013 755. 717
5 2014 432. 634
6 2015 474. 782.
7 2016 590 743.
8 2017 544. 628
9 2018 249. 781
10 2019 547. 393
I plot the dfs
g5 = ggplot() +
geom_point(data=meanM, aes(x = (Year), y = (price_mean)),size = 3, colour="gray40") +
geom_point(data=mmzep, aes(x= (date), y=(Mar)), size =3, colour = "red") +
geom_line(data=meanM, aes(group = 1, x = (Year), y = (price_mean)), colour="gray40") +
geom_line(data=mmzep, aes(x = (date), y = (Mar)), colour="red") +
stat_smooth(data=meanM,aes(group = 1, x = (Year), y = (price_mean)),
method = "lm", size = 1, se = FALSE, formula = y ~ x,
colour = "black") +
stat_smooth(data=mmzep, aes(x = (date), y = (Mar)),
method = "lm", size = 1, se = FALSE, formula = y ~ x,
colour = "red3") +
scale_y_continuous(expand = c(0, 0), limits = c(0, 1500)) +
theme(panel.background = element_rect(fill = 'white', colour = 'black'),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.ticks.length = unit(-0.25, "lines"),
plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
axis.text.x = element_text(margin = margin(t = 0.25, unit = "cm")),
axis.text.y = element_text(margin = margin(r = 0.25, unit = "cm"))) +
labs(y = expression(March),
x = NULL) +
theme(axis.text.x = element_text(size=10),
axis.title = element_text(size=10))
I plot g5 and g6 in the same way, than the grid, to obtain this:
enter image description here
As you can see the x axis is not correct, I tried scale_x_date(breaks="year", labels=date_format("%Y")) , scale_x_discrete(labels=c("2010","2011","2012","2013","2014","2015","2016","2017","2018","2019")), scale_x_continuous in different ways.
I also tried mmzep$date <- as.Date(mmzep$date,"%Y") but I saw the R needs a day (in my case a day and a month?) mmzep$date <- as.Date(paste("01", mmzep$date, sep="/"), "%d/%m/%Y") , but R substitutes the years with NA. I think that the errors is in the the way R see the date in mmzep, but I don't understand how can I made R recognized the correct object.
Anyone have any suggestion? Thanks in advance!

There are a few ways to do this. In your data, your year values are stored as type double. This tells ggplot that you have a continuous variable. If you want to leave your data as is, then the solution is
+ scale_x_continuous(breaks = seq(2010, 2020, 2))
# or something else that expressly lists the years you want to see on the axis.
You cannot use scale_x_date without your year data being converted to a date. You can do that with, for example
MeanM$Year <- as.Date(paste(MeanM$Year, "01", "01", sep = "/"))
Then you can use
+ scale_x_date(date_labels = "%Y")
Or you can convert your years into discrete data with factor. You cannot use scale_x_discrete on a continuous variable.
MeanM$Year <- factor(MeanM$Year)
And then use
+ scale_x_discrete()

Try this approach tested on MeanM without using mmzep which we do not have data. The issue is that as you are using multiple geom the functions are adding strange labels to axis. Changing all x-axis variables to factor can alleviate the issue. In the case of mmzep with aes(x= (date),..) also be careful on formating the date as year with a code like this aes(x= factor(format(date,'%Y')) so that all labels fit well into axis. Here the code:
#Code
ggplot() +
geom_point(data=meanM, aes(x = factor(Year), y = (price_mean)),size = 3, colour="gray40") +
geom_line(data=meanM, aes(group = 1, x = factor(Year), y = (price_mean)), colour="gray40") +
stat_smooth(data=meanM,aes(group = 1, x = factor(Year), y = (price_mean)),
method = "lm", size = 1, se = FALSE, formula = y ~ x,
colour = "black") +
scale_y_continuous(expand = c(0, 0), limits = c(0, 1500)) +
theme(panel.background = element_rect(fill = 'white', colour = 'black'),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.ticks.length = unit(-0.25, "lines"),
plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
axis.text.x = element_text(margin = margin(t = 0.25, unit = "cm")),
axis.text.y = element_text(margin = margin(r = 0.25, unit = "cm"))) +
labs(y = expression(March),
x = NULL) +
theme(axis.text.x = element_text(size=10),
axis.title = element_text(size=10))
Output:
Some data used:
#Data
meanM <- structure(list(Year = c(2010L, 2012L, 2015L, 2017L, 2018L, 2019L
), Month = c(3L, 3L, 3L, 3L, 3L, 3L), date_mean = c("23/03/2010",
"27/03/2012", "16/03/2015", "15/03/2017", "16/03/2018", "14/03/2019"
), price_mean = c(1082L, 858L, 603L, 674L, 318L, 840L), date_sd = c(5.48,
2.74, 8.86, 9.65, 9.09, 9.42), price_sd = c(685L, 333L, 411L,
512L, 202L, 329L), Year2 = structure(1:6, .Label = c("2010",
"2012", "2015", "2017", "2018", "2019"), class = "factor")), row.names = c(NA,
-6L), class = "data.frame")

Related

ggplot in R define breaks for character column in x axis

graf4<-ggplot(DF,
aes(x=variable,y=value,color=paese)) +
geom_line(aes(linetype=paese,group=paese),size=2.5)+
theme_bw()+
labs(x="",y="")+
scale_colour_manual(values=c("darkorange1","darkorchid1","darkgreen","steelblue3","maroon2","grey2","yellow4",
"burlywood4", "chocolate"))+
scale_y_continuous(labels=fmt_dcimals(1))+
theme(panel.grid.major = element_line(colour = "grey",size=0.1)) +
theme(panel.grid.minor = element_line(colour = "white",linetype="dashed",size=0.1)) +
theme(strip.text.y=element_text(angle=0))+
theme(axis.text.x = element_text(angle=90,vjust=0.5,size=12),
axis.text.y = element_text(size=12),
legend.position = "bottom",
legend.box="horizontal",
legend.box.background=element_rect(),
legend.title = element_blank(),
legend.text=element_text(size=12))+
guides(col=guide_legend(nrow=1,byrow=TRUE))
print(graf4)
The code above work but my problem is that the tick (label) on x axis are too many and I would like to have less tick on the graph. For example have a breaks every 3 observation.
Note that VARIABLE isn't date format but character format. I dont want, for some other reason date format.
I tried with scale_x_discrete, scale_x_continuous and other instruction with parameters or not but I cant have on the x axis every 3 observation tick, for example.
I know that for numeric type on the x axis I cant set a breaks, but I don't know hot to set for character value type.
Variable * value * paese
#1 1999 q1 12 UK
#2 1999 q2 15 UK
#3 1999 q3 55 UK
#4 1999 q4 67 UK
#5 1999 q1 12 DE
#6 1999 q2 15 DE
#7 1999 q3 55 DE
#8 1999 q4 67 DE
#9 2000 q1 33 UK
#10 2000 q2 23 UK
#11 2000 q3 65 UK
#12 2000 q4 34 UK
#13 2000 q1 33 DE
#14 2000 q2 23 DE
#15 2000 q3 65 DE
#16 2000 q4 34 DE
# .... . ..
#45 2023 q1 23 UK
#46 2023 q2 11 UK
#47 2023 q3 23 UK
#48 2023 q1 23 DE
#49 2023 q2 11 DE
#50 2023 q3 23 DE
Something like this perhaps?
library(tidyverse)
df <- data.frame(variable = rep(LETTERS[1:21], 2, each = 2),
value = round(rnorm(84, 10, 3), 0),
paese = c("UK", "US"))
df |>
ggplot(aes(variable, value)) +
geom_line(aes(linetype = paese, group = paese)) +
scale_x_discrete(breaks = unique(df$variable)[seq(1, length(unique(df$variable)), 3)])
Created on 2023-02-17 with reprex v2.0.2
You can pass a custom labelling function in scale_x_discrete via the labels = parameter. This allows you to make blank labels for any of the points on the x axis.
For example, suppose I wanted to only show every year instead of every quarter. I could look for the string "q1" inside each label, and if it is present, return just the year component. If "q1" is not present, we would return an empty string:
ggplot(DF, aes(Variable, value, color = paese)) +
geom_line(aes(linetype = paese, group = paese), size = 2.5) +
scale_x_discrete(labels = ~ifelse(grepl('q1', .x), sub(' q1', '', .x), ''),
name = NULL) +
scale_colour_manual(values = c("darkorange1", "darkorchid1")) +
scale_y_continuous(NULL, labels = fmt_dcimals(1)) +
guides(col = guide_legend(nrow = 1, byrow = TRUE)) +
theme_bw() +
theme(panel.grid.major = element_line(colour = "grey", size = 0.1),
panel.grid.minor = element_blank(),
strip.text.y = element_text(angle = 0),
axis.text.x = element_text(size = 12, vjust = -1),
axis.text.y = element_text(size = 12),
legend.position = "bottom",
legend.box = "horizontal",
legend.box.background = element_rect(),
legend.title = element_blank(),
legend.text = element_text(size = 12))

Can't display lines and points with ggplot2

I have a dataframe df as follows:
structure(list(date = structure(c(1L, 13L, 16L, 19L, 22L, 25L,
28L, 31L, 34L, 4L, 7L, 10L, 2L, 14L, 17L, 20L, 23L, 26L, 29L,
32L, 35L, 5L, 8L, 11L, 3L, 15L, 18L, 21L, 24L, 27L, 30L, 33L,
36L, 6L, 9L, 12L), .Label = c("1/1/2010", "1/1/2011", "1/1/2012",
"10/1/2010", "10/1/2011", "10/1/2012", "11/1/2010", "11/1/2011",
"11/1/2012", "12/1/2010", "12/1/2011", "12/1/2012", "2/1/2010",
"2/1/2011", "2/1/2012", "3/1/2010", "3/1/2011", "3/1/2012", "4/1/2010",
"4/1/2011", "4/1/2012", "5/1/2010", "5/1/2011", "5/1/2012", "6/1/2010",
"6/1/2011", "6/1/2012", "7/1/2010", "7/1/2011", "7/1/2012", "8/1/2010",
"8/1/2011", "8/1/2012", "9/1/2010", "9/1/2011", "9/1/2012"), class = "factor"),
a = c(NA, 365.07, 653.19, 980.72, 1455.6, 1867.07, 2036.92,
2372.84, 2693.96, 2973.04, 3227.23, 3678.01, NA, 555.51,
1058.18, 1539.01, 2102.23, 2769.65, 3146.88, 3604.71, 4043.18,
4438.55, 4860.76, 5360.94, NA, 594.67, 1287.05, 1666.5, 2362.27,
2818.16, 3226, 3924.67, 4295.79, 4751.97, 5410.37, 5986.46
), b = c(NA, 158.18, 268.53, 331.81, 434.19, 538.49, 606.62,
651.46, 736.55, 890.81, 981.65, 1748.44, NA, 227.68, 366.95,
486.41, 614.75, 729.44, 836.46, 929.72, 1092.73, 1222.48,
1409.07, 2179.42, NA, 172.99, 359.8, 478.05, 597.88, 660.4,
823.61, 924.57, 1020.33, 1189.15, 1347.44, 2315.36), ratio_a = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 52.17, 62,
56.93, 44.42, 48.34, 54.49, 51.92, 50.08, 49.29, 50.62, 45.76,
NA, 7.05, 21.63, 8.28, 12.37, 1.75, 2.51, 8.88, 6.25, 7.06,
11.31, 11.67), ratio_b = c(NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, 43.94, 36.65, 46.59, 41.59, 35.46, 37.89,
42.71, 48.36, 37.23, 43.54, 24.65, NA, -24.02, -1.95, -1.72,
-2.74, -9.46, -1.54, -0.55, -6.63, -2.73, -4.37, 6.24)), class = "data.frame", row.names = c(NA,
-36L))
Out:
date a b ratio_a ratio_b
0 1/1/2010 NaN NaN NaN NaN
1 2/1/2010 365.07 158.18 NaN NaN
2 3/1/2010 653.19 268.53 NaN NaN
3 4/1/2010 980.72 331.81 NaN NaN
4 5/1/2010 1455.60 434.19 NaN NaN
5 6/1/2010 1867.07 538.49 NaN NaN
6 7/1/2010 2036.92 606.62 NaN NaN
7 8/1/2010 2372.84 651.46 NaN NaN
8 9/1/2010 2693.96 736.55 NaN NaN
9 10/1/2010 2973.04 890.81 NaN NaN
10 11/1/2010 3227.23 981.65 NaN NaN
11 12/1/2010 3678.01 1748.44 NaN NaN
12 1/1/2011 NaN NaN NaN NaN
13 2/1/2011 555.51 227.68 52.17 43.94
14 3/1/2011 1058.18 366.95 62.00 36.65
15 4/1/2011 1539.01 486.41 56.93 46.59
16 5/1/2011 2102.23 614.75 44.42 41.59
17 6/1/2011 2769.65 729.44 48.34 35.46
18 7/1/2011 3146.88 836.46 54.49 37.89
19 8/1/2011 3604.71 929.72 51.92 42.71
20 9/1/2011 4043.18 1092.73 50.08 48.36
21 10/1/2011 4438.55 1222.48 49.29 37.23
22 11/1/2011 4860.76 1409.07 50.62 43.54
23 12/1/2011 5360.94 2179.42 45.76 24.65
24 1/1/2012 NaN NaN NaN NaN
25 2/1/2012 594.67 172.99 7.05 -24.02
26 3/1/2012 1287.05 359.80 21.63 -1.95
27 4/1/2012 1666.50 478.05 8.28 -1.72
28 5/1/2012 2362.27 597.88 12.37 -2.74
29 6/1/2012 2818.16 660.40 1.75 -9.46
30 7/1/2012 3226.00 823.61 2.51 -1.54
31 8/1/2012 3924.67 924.57 8.88 -0.55
32 9/1/2012 4295.79 1020.33 6.25 -6.63
33 10/1/2012 4751.97 1189.15 7.06 -2.73
34 11/1/2012 5410.37 1347.44 11.31 -4.37
35 12/1/2012 5986.46 2315.36 11.67 6.24
I'm trying to use the code below to plot a and b for y axis left with barchart, ratio_a and ratio_b for y axis right with lines and point:
library(ggplot2)
library(dplyr)
df$date <- as.Date(df$date, format = "%m/%d/%Y")
df_m <- melt(df, id.vars='date')
df_m_x <- df_m %>%
filter(variable %in% c("a", 'b'))
df_m_ratio_x <- df_m %>%
filter(variable %in% c("ratio_a", 'ratio_b')) %>%
mutate(value = value * 80)
coeff = 1/80
ggplot() +
geom_bar(data = df_m_x, aes(x = date, y = value, fill = variable), alpha = 0.6, position = 'dodge', stat = 'identity') +
geom_line(data = df_m_ratio_x, aes(x = date, y = value, linetype = variable, col = variable), alpha = 1, size = 1.5) +
geom_point(data = df_m_ratio_x, aes(x = date, y = value, col = variable), size = 3) +
scale_y_continuous(
name = "㎡",
sec.axis = sec_axis(~.*coeff, name = "%")) +
scale_color_manual(values = c("a" = "#E7B800", "b" = "#FC4E07")) +
theme(
legend.title = element_blank(),
legend.position = "bottom",
panel.grid.major = element_line(colour = "grey99"),
panel.border = element_rect(colour = "grey95", fill=NA),
panel.background = element_blank(),
legend.text = element_text(size = 18),
) +
scale_x_date(breaks = date_breaks("6 months"), date_labels = "%Y-%m")
Out:
Removed 6 rows containing missing values (geom_bar).
Removed 72 row(s) containing missing values (geom_path).
Removed 72 rows containing missing values (geom_point).
It works for bar charts, however it doesn't display lines and points for ratio_a and ratio_b? Does anyone can help me find out why this happens? Thanks a lot.
The issue is that you used the wrong labels in scale_color_manual. Instead of a and b you have to use ratio_a/b as these are the values of variable in your dataset. Put differently ggplot2 finds no values for ratio_a/b in the color scale and hence they are removed:
library(ggplot2)
library(reshape2)
library(dplyr)
library(scales)
df$date <- as.Date(df$date, format = "%m/%d/%Y")
df_m <- melt(df, id.vars = "date")
df_m_x <- df_m %>%
filter(variable %in% c("a", "b"))
df_m_ratio_x <- df_m %>%
filter(variable %in% c("ratio_a", "ratio_b")) %>%
mutate(value = value * 80)
coeff <- 1 / 80
ggplot() +
geom_bar(data = df_m_x, aes(x = date, y = value, fill = variable), alpha = 0.6, position = "dodge", stat = "identity") +
geom_line(data = df_m_ratio_x, aes(x = date, y = value, linetype = variable, col = variable), alpha = 1, size = 1.5) +
geom_point(data = df_m_ratio_x, aes(x = date, y = value, col = variable), size = 3) +
scale_y_continuous(
name = "<U+33A1>",
sec.axis = sec_axis(~ . * coeff, name = "%")
) +
scale_color_manual(values = c("ratio_a" = "#E7B800", "ratio_b" = "#FC4E07")) +
theme(
legend.title = element_blank(),
legend.position = "bottom",
panel.grid.major = element_line(colour = "grey99"),
panel.border = element_rect(colour = "grey95", fill = NA),
panel.background = element_blank(),
legend.text = element_text(size = 18),
) +
scale_x_date(breaks = date_breaks("6 months"), date_labels = "%Y-%m")
#> Warning: Removed 6 rows containing missing values (geom_bar).
#> Warning: Removed 26 row(s) containing missing values (geom_path).
#> Warning: Removed 28 rows containing missing values (geom_point).

How to fix my geom_text labels so that they fit inside the bars of my column chart?

I have the following R codes running in RStudio:
df2<-df1 %>%
gather(year, value, X2016:X2019) %>%
mutate(Mth = Mth %>% fct_rev() %>% fct_relevel('January')) %>%
group_by(Mth) %>%
mutate(y_pos = min(value) / 2)
df2$Mth <- as.character(df2$Mth)
df2$Mth <- factor(df2$Mth, levels=unique(df2$Mth))
df2$AsAt2 = factor(df2$AsAt, levels=c("D-150", "D-120", "D-90", "D-60", "D-30"))
g1<-df2 %>% filter(value!=0)%>%
ggplot(aes(
x = Mth,
y = value,
fill = Mth,
group = year
)) +
geom_col(
position = position_dodge(.65),
width = .5
) +
geom_text(aes(
y = value + max(value) * .03,
label = round(value * 100) %>% str_c('%')
),
position = position_dodge(.65), size=3.5
) +
geom_text(aes(
y = y_pos,
label = str_remove(year, 'X')
),
color = 'white',
angle = 90,
fontface = 'bold',
position = position_dodge(0.65), size=3.5
) +
scale_y_continuous(
breaks = seq(0, .9, .1),
labels = function(x) round(x * 100) %>% str_c('%')
) +
scale_fill_manual(values = c(
rgb(47, 85, 151, maxColorValue = 255),
rgb(255, 51, 51, maxColorValue = 255),
rgb(84, 130, 53, maxColorValue = 255),
rgb(244, 177, 131, maxColorValue = 255),
rgb(112, 48, 160, maxColorValue = 255)
)) +
theme(
plot.title = element_text(hjust = .5),
panel.background = element_blank(),
panel.grid.major.y = element_line(color = rgb(.9, .9, .9)),
axis.ticks = element_blank(),
legend.position = 'none'
) +
xlab('') +
ylab('') +
ggtitle('January to May')
g1 + facet_grid(rows = vars(AsAt2))
The output is shown below:
My issue is that the Year labels inside the bar disappear in some cases. I have tried reducing the font size to 3.5 but I am getting the same issue.
Is there a way to ensure that the labels fit inside all the bars?
Note: I am also adding an extract of the tibble df2.
>df2
# A tibble: 100 x 6
# Groups: Mth [5]
Mth AsAt year value y_pos AsAt2
<fct> <chr> <chr> <dbl> <dbl> <fct>
1 January D-150 X2016 0.26 0.12 D-150
2 February D-150 X2016 0.25 0 D-150
3 March D-150 X2016 0.27 0 D-150
4 April D-150 X2016 0.290 0 D-150
5 May D-150 X2016 0.27 0 D-150
6 January D-120 X2016 0.38 0.12 D-120
7 February D-120 X2016 0.25 0 D-120
8 March D-120 X2016 0.36 0 D-120
9 April D-120 X2016 0.35 0 D-120
10 May D-120 X2016 0.31 0 D-120
I will answer my own question here. I tried fiddling with the following codes:
df2<-df1 %>%
gather(year, value, X2016:X2019) %>%
mutate(Mth = Mth %>% fct_rev() %>% fct_relevel('January')) %>%
group_by(Mth) %>%
mutate(y_pos = min(value) / 2)
and replaced the last line with the following:
mutate(y_pos = max(value) / 6.0)
The 6.0 was after some trial and error starting with 3.0.

Issue with ggplot with discrete x-axis in date format (yyyy-mm-dd)

I have a data frame like below. I need to plot a line plot using ggplot each line showing for each year (i.e. 2014, 2015 etc) with different colors for each year.
I cannot connect the points by a line with my code showing below. But, it does plot the points only with different colors for each year. This may be a simple thing but I just cannot figure this out.
library(reshape2)
library(ggplot2)
plot.data <- melt(Table_1, id.vars = 'Day Obs')
ggplot(plot.data, aes(x = `Day Obs`, y = value)) +
geom_line(mapping = aes(x = `Day Obs`, y = value, colour = variable),size=1.0) +
geom_point(mapping = aes(x = `Day Obs`, y = value, colour = variable),size=2.3)
Table_1:
Day Obs 2014 2015 2016 2017 2018
2018-08-01 NA NA NA NA 1.002
2018-08-03 NA 0.85 NA NA NA
2018-08-06 NA NA NA NA 0.9
2018-08-07 NA NA 0.78 0.88 NA
.
.
The issue is that you have missing values i between observations and geom_line then doesn't connect the points (as information is missing). This can be seenfor the year 2018:
library(reshape2)
library(ggplot2)
plot.data <- melt(Table_1, id.vars = 'Day Obs')
plot.data[plot.data$variable == 2018, ]
# Day Obs variable value
# 17 2018-08-01 2018 1.002
# 18 2018-08-03 2018 NA
# 19 2018-08-06 2018 0.900
# 20 2018-08-07 2018 NA
Here the information for 2018-08-03 is missing explicitly. Therfore there is no connection between the points, if we plot them.
ggplot(plot.data, aes(x = `Day Obs`, y = value, colour = variable)) +
geom_line(size = 1.0) +
geom_point(size = 2.3)
You can remove the explicit missing values and it works:
ggplot(plot.data[!is.na(plot.data$value), ],
aes(x = `Day Obs`, y = value, colour = variable)) +
geom_line(size = 1.0) +
geom_point(size = 2.3)
Data
Table_1 <- structure(list(`Day Obs` = structure(c(17744, 17746, 17749, 17750), class = "Date"),
`2014` = c(NA, NA, NA, NA),
`2015` = c(NA, 0.85, NA, NA),
`2016` = c(NA, NA, NA, 0.78),
`2017` = c(NA, NA, NA, 0.88),
`2018` = c(1.002, NA, 0.9, NA)),
row.names = c(NA, -4L), class = "data.frame")

overlapping the predicted time series on the original series in R

I perform forecast
w=read.csv("C:/Users/admin/Documents/aggrmonth.csv", sep=";",dec=",")
w
#create time series object
w=ts(w$new,frequency = 12,start=c(2015,1))
w
#timeplot
plot.ts(w)
#forecast for the next months
library("forecast")
m <- stats::HoltWinters(w)
test=forecast:::forecast.HoltWinters(m,h=4) #h is how much month do you want to predict
test
now i want get forecast for 4 months ahead.
From 01.2017-04.2017. I this know original values.
1-Jan-17 1020
1-Feb-17 800
1-Mar-17 1130
1-Apr-17 600
But i need get plot where displayed predicted values with CI are overlapped with original value.
Of course if i don't clearly exlplain, i attached the plot.
The green curve is the initial value of the series(my 4 months)
and green dotted line is predictied values are overlapped on original values.
Dashes on the predicted dotted curve are confidence intervals.
How to create such plot
w=
structure(list(yearMon = structure(c(9L, 7L, 15L, 1L, 17L, 13L,
11L, 3L, 23L, 21L, 19L, 5L, 10L, 8L, 16L, 2L, 18L, 14L, 12L,
4L, 24L, 22L, 20L, 6L), .Label = c("1-Apr-15", "1-Apr-16", "1-Aug-15",
"1-Aug-16", "1-Dec-15", "1-Dec-16", "1-Feb-15", "1-Feb-16", "1-Jan-15",
"1-Jan-16", "1-Jul-15", "1-Jul-16", "1-Jun-15", "1-Jun-16", "1-Mar-15",
"1-Mar-16", "1-May-15", "1-May-16", "1-Nov-15", "1-Nov-16", "1-Oct-15",
"1-Oct-16", "1-Sep-15", "1-Sep-16"), class = "factor"), new = c(8575L,
8215L, 16399L, 16415L, 15704L, 19805L, 17484L, 18116L, 19977L,
14439L, 9258L, 12259L, 4909L, 9539L, 8802L, 11253L, 11971L, 7838L,
2095L, 4157L, 3910L, 1306L, 3429L, 1390L)), .Names = c("yearMon",
"new"), class = "data.frame", row.names = c(NA, -24L))
We can use ggfortify to create a data frame then plot both timeseries with ggplot2
# Load required libraries
library(lubridate)
library(magrittr)
library(tidyverse)
library(scales)
library(forecast)
library(ggfortify)
w <- structure(list(yearMon = structure(c(9L, 7L, 15L, 1L, 17L, 13L,
11L, 3L, 23L, 21L, 19L, 5L, 10L, 8L, 16L, 2L, 18L, 14L, 12L,
4L, 24L, 22L, 20L, 6L), .Label = c("1-Apr-15", "1-Apr-16", "1-Aug-15",
"1-Aug-16", "1-Dec-15", "1-Dec-16", "1-Feb-15", "1-Feb-16", "1-Jan-15",
"1-Jan-16", "1-Jul-15", "1-Jul-16", "1-Jun-15", "1-Jun-16", "1-Mar-15",
"1-Mar-16", "1-May-15", "1-May-16", "1-Nov-15", "1-Nov-16", "1-Oct-15",
"1-Oct-16", "1-Sep-15", "1-Sep-16"), class = "factor"), new = c(8575L,
8215L, 16399L, 16415L, 15704L, 19805L, 17484L, 18116L, 19977L,
14439L, 9258L, 12259L, 4909L, 9539L, 8802L, 11253L, 11971L, 7838L,
2095L, 4157L, 3910L, 1306L, 3429L, 1390L)), .Names = c("yearMon",
"new"), class = "data.frame", row.names = c(NA, -24L))
# create time series object
w = ts(w$new, frequency = 12, start=c(2015, 1))
w
#> Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov
#> 2015 8575 8215 16399 16415 15704 19805 17484 18116 19977 14439 9258
#> 2016 4909 9539 8802 11253 11971 7838 2095 4157 3910 1306 3429
#> Dec
#> 2015 12259
#> 2016 1390
# forecast for the next months
m <- stats::HoltWinters(w)
# h is how much month do you want to predict
pred = forecast:::forecast.HoltWinters(m, h=4)
pred
#> Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
#> Jan 2017 -5049.00381 -9644.003 -454.0045 -12076.449 1978.441
#> Feb 2017 37.44605 -5599.592 5674.4843 -8583.660 8658.552
#> Mar 2017 -256.41474 -6770.890 6258.0601 -10219.444 9706.615
#> Apr 2017 2593.09445 -4693.919 9880.1079 -8551.431 13737.620
# plot
plot(pred, include = 24, showgap = FALSE)
# Convert pred from list to data frame object
df1 <- fortify(pred) %>% as_tibble()
# Create Date column, remove Index column and rename other columns
df1 %<>%
mutate(Date = as.Date(Index, "%Y-%m-%d")) %>%
select(-Index) %>%
rename("Low95" = "Lo 95",
"Low80" = "Lo 80",
"High95" = "Hi 95",
"High80" = "Hi 80",
"Forecast" = "Point Forecast")
df1
#> # A tibble: 28 x 8
#> Data Fitted Forecast Low80 High80 Low95 High95 Date
#> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <date>
#> 1 8575 NA NA NA NA NA NA 2015-01-01
#> 2 8215 NA NA NA NA NA NA 2015-02-01
#> 3 16399 NA NA NA NA NA NA 2015-03-01
#> 4 16415 NA NA NA NA NA NA 2015-04-01
#> 5 15704 NA NA NA NA NA NA 2015-05-01
#> 6 19805 NA NA NA NA NA NA 2015-06-01
#> 7 17484 NA NA NA NA NA NA 2015-07-01
#> 8 18116 NA NA NA NA NA NA 2015-08-01
#> 9 19977 NA NA NA NA NA NA 2015-09-01
#> 10 14439 NA NA NA NA NA NA 2015-10-01
#> # ... with 18 more rows
### Avoid the gap between data and forcast
# Find the last non missing NA values in obs then use that
# one to initialize all forecast columns
lastNonNAinData <- max(which(complete.cases(df1$Data)))
df1[lastNonNAinData,
!(colnames(df1) %in% c("Data", "Fitted", "Date"))] <- df1$Data[lastNonNAinData]
ggplot(df1, aes(x = Date)) +
geom_ribbon(aes(ymin = Low95, ymax = High95, fill = "95%")) +
geom_ribbon(aes(ymin = Low80, ymax = High80, fill = "80%")) +
geom_point(aes(y = Data, colour = "Data"), size = 4) +
geom_line(aes(y = Data, group = 1, colour = "Data"),
linetype = "dotted", size = 0.75) +
geom_line(aes(y = Fitted, group = 2, colour = "Fitted"), size = 0.75) +
geom_line(aes(y = Forecast, group = 3, colour = "Forecast"), size = 0.75) +
scale_x_date(breaks = scales::pretty_breaks(), date_labels = "%b %y") +
scale_colour_brewer(name = "Legend", type = "qual", palette = "Dark2") +
scale_fill_brewer(name = "Intervals") +
guides(colour = guide_legend(order = 1), fill = guide_legend(order = 2)) +
theme_bw(base_size = 14)
Edit: To included known values from "2017-01-01" to "2017-04-01"
# Create new column which has known values
df1$Obs <- NA
df1$Obs[(nrow(df1)-3):(nrow(df1))] <- c(1020, 800, 1130, 600)
ggplot(df1, aes(x = Date)) +
geom_ribbon(aes(ymin = Low95, ymax = High95, fill = "95%")) +
geom_ribbon(aes(ymin = Low80, ymax = High80, fill = "80%")) +
geom_point(aes(y = Data, colour = "Data"), size = 4) +
geom_line(aes(y = Data, group = 1, colour = "Data"),
linetype = "dotted", size = 0.75) +
geom_line(aes(y = Fitted, group = 2, colour = "Fitted"), size = 0.75) +
geom_line(aes(y = Forecast, group = 3, colour = "Forecast"), size = 0.75) +
scale_x_date(breaks = scales::pretty_breaks(), date_labels = "%b %y") +
scale_colour_brewer(name = "Legend", type = "qual", palette = "Dark2") +
scale_fill_brewer(name = "Intervals") +
guides(colour = guide_legend(order = 1), fill = guide_legend(order = 2)) +
theme_bw(base_size = 14) +
geom_line(aes(y = Obs, group = 4, colour = "Obs"), linetype = "dotted", size = 0.75)
Or put those values directly into Data column
df1$Data[(nrow(df1)-3):(nrow(df1))] <- c(1020, 800, 1130, 600)
ggplot(df1, aes(x = Date)) +
geom_ribbon(aes(ymin = Low95, ymax = High95, fill = "95%")) +
geom_ribbon(aes(ymin = Low80, ymax = High80, fill = "80%")) +
geom_point(aes(y = Data, colour = "Data"), size = 3) +
geom_line(aes(y = Data, group = 1, colour = "Data"),
linetype = "dotted", size = 0.75) +
geom_line(aes(y = Fitted, group = 2, colour = "Fitted"), size = 0.75) +
geom_line(aes(y = Forecast, group = 3, colour = "Forecast"), size = 0.75) +
scale_x_date(breaks = scales::pretty_breaks(), date_labels = "%b %y") +
scale_colour_brewer(name = "Legend", type = "qual", palette = "Dark2") +
scale_fill_brewer(name = "Intervals") +
guides(colour = guide_legend(order = 1), fill = guide_legend(order = 2)) +
theme_bw(base_size = 14)
Created on 2018-04-21 by the reprex package (v0.2.0).

Resources