Plot time series and forecast simultaneously using ggplot2 - r

I have a time series with forecast and confidence interval data, I wanted to plot them simultaneously using ggplot2. I'm doing it by the code below:
set.seed(321)
library(ggplot2)
#create some dummy data similar to mine
sample<-rnorm(350)
forecast<-rnorm(24)
upper<-forecast+2*sd(forecast)
lower<-forecast-2*sd(forecast)
## wrap data into a data.frame
df1 = data.frame(time = seq(325,350,length=26), M = sample[325:350], isin = "observations")
df2 = data.frame(time = seq(351,374,length=24), M = forecast , isin = "my_forecast")
df3 = data.frame(time = seq(351,374,length=24), M = upper ,isin = "upper_bound")
df4 = data.frame(time = seq(351,374,length=24), M = lower, isin = "lower_bound")
df = rbind(df1, df2, df3, df4)
## ggplot object
ggplot(df, aes(x = time, y = M, color = isin)) + geom_line()
How can I join upper and lower lines in one color? and also how can I set specific colors to forecast and sample?

Use scale_colour_manual:
ggplot(df, aes(x = time, y = M, color = isin)) + geom_line() +
scale_colour_manual(values=c(observations='blue', my_forecast='red', upper_bound='black', lower_bound='black'))
edit
This is another option, inspired by #rnso answer.
ggplot(df1, aes(x = time, y = M)) + geom_line(colour='blue') +
geom_smooth(aes(x=time, y=M, ymax=upper_bound, ymin=lower_bound),
colour='red', data=df5, stat='identity')

Following may be useful:
ggplot() +
geom_line(data=df1, aes(x = time, y = M, color = isin)) +
stat_smooth(data=df2, aes(x = time, y = M, color = isin))
'method' option can also be used in stat_smooth()

Related

Dodging vertical lines for median_hilow in ggplot

I need to plot lines that show median and IQR for 3 replicates, across multiple samples.
Data:
sampleid <- rep(1:20, each = 3)
replicate <- rep(1:3, 20)
sample1 <- seq(120,197, length.out = 60)
sample2 <- seq(113, 167, length.out = 60)
sample3 <- seq(90,180, length.out = 60)
What I have done so far?
df <- as.data.frame(cbind(sampleid,replicate,sample1, sample2, sample3))
library(reshape2)
long <- melt(df,id.vars = c('sampleid', 'replicate'))
ggplot(data = long, aes(x = variable, y = value, colour = factor(replicate))) + stat_summary(fun.data=median_hilow, conf.int=.5)
However, the plot of the IQR for replicates that I am getting are overlapped with each other for each sample. I would like to find out a way to "dodge" these 3 lines so that they are visible next to each other, without changing other parameters of the plot that I have achieved. Is this achievable?
You have to introduce jitter to the lines:
ggplot(data = long, aes(x = variable, y = value, colour = factor(replicate))) +
stat_summary(fun.data=median_hilow, fun.args = (conf.int=.5), position = "jitter")
Please note you also need to have your conf.int=5 wrapped in the fun.args.
Alternatively, change your x to factor(replicate) and add facet_wrap:
ggplot(data = long, aes(x = factor(replicate), y = value, colour = factor(replicate))) +
stat_summary(fun.data=median_hilow, fun.args = (conf.int=.5)) +
facet_wrap(~variable)

ggplot change line color specified by x axis values

Code to reproduce:
myDat <- data.frame(Event = rep(c("Arrival", "Departure"), 3),
AtNode = c("StationA", "StationA", "Track", "Track", "StationB", "StationB"),
Lane = c("Lane1", "Lane1", "Lane2", "Lane2", "Lane1", "Lane1"),
atTime = c(10, 12, 18, 20, 34, 36),
Type = c("Station", "Station", "Track", "Track", "Station", "Station"),
Train = 1 )
ggplot(data =myDat, aes(x = atTime, y=factor(AtNode, levels = unique(paste(myDat[order(myDat$atTime),"AtNode"]))), group = Train, colour = Lane ))+
geom_point(data = myDat)+
geom_path(data = myDat[which(!grepl(pattern = "Track", myDat$Type)),])
Now i need to project the two green points (Y = "Track") on the orange line and color the line between the projected points the same color as the points.
Expected result: (without the points (Y ="Track")
Thanks in advance for every hint or trick!
Cheers
I don't think your output is the right way of showing what you want. You have factors on your y-axis, which means it ranges between 1 and 3.
Therefore, projecting a line there means nothing in terms of y-axis values.
For me, the correct way of showing your data would be like this
ggplot(data =myDat,
aes(x = atTime, y=factor(AtNode, levels = unique(paste(myDat[order(myDat$atTime),"AtNode"]))),
group = AtNode, colour = Lane ))+
geom_point()+
geom_line() +
labs(y = 'AtNode')
However, to do it how you asked, you can do some simple trigonometry to project your line segment
x1 = 1 + tan(asin(2/sqrt(484)))*6 #y projection given x = 18
x2 = 1 + tan(asin(2/sqrt(484)))*8 #y projection given x = 20
foo = data.frame(x = c(18,20), y = c(x1, x2), Lane = "Lane2")
ggplot(data = myDat, aes(x = atTime, y=factor(AtNode, levels = unique(paste(myDat[order(myDat$atTime),"AtNode"]))), group = 1, colour = Lane ))+
geom_path(data = myDat[which(!grepl(pattern = "Track", myDat$Type)),]) +
geom_line(data = foo, aes(x = x, y = y, color = Lane), size = 1) +
scale_y_discrete(drop = FALSE)
I don't think there is a quick solution to this, but you could do something like this:
myDat$AtNode <- factor(myDat$AtNode, levels = unique(paste(myDat[order(myDat$atTime),"AtNode"]))) #Generate factor here so we can use in imputation calculation
impute_rows <- which(myDat$Type == "Track") #Select rows to impute
slope_df <- myDat[impute_rows + c(-1,1), ] #Select rows before and after imputation to calculate slope
line <- lm(as.numeric(AtNode) ~ atTime, data = slope_df) #Get slope of line so we can do the calculations
df <- data.frame(x = myDat[impute_rows, "atTime"], y = myDat[impute_rows, "atTime"]*line$coefficients[["atTime"]] + line$coefficients[["(Intercept)"]], Lane = myDat[impute_rows,"Lane"], Train = myDat[impute_rows,"Train"])
ggplot(data =myDat, aes(x = atTime, y=AtNode, group = Train, colour = Lane ))+
geom_path(data = myDat[which(!grepl(pattern = "Track", myDat$Type)),]) +
geom_path(data = df, aes(x = x, y = y), size = 2) +
scale_y_discrete(drop = FALSE)
The idea is as follows:
Identify the rows you want to impute: which()
Identify the rows before and after the ones to impute slope_df
Using the rows before and after the desired values to impute generate equation of line you want to impute along (using the slope_df)
Generate data based on the line df <- data.frame(...)
Note that you also need the scale_y_discrete(drop = FALSE) so that the Track level isn't removed from the plot.

facet_grid() causing crash

I can not figure out what I'm missing. I keep crashing r or causing it to give very weird plots.
> head(vData)
vix.Close vstoxx vxfxi.Close Date
2011-03-16 29.40 35.2293 35.84 2011-03-16
2011-03-17 26.37 30.6133 31.77 2011-03-17
2011-03-18 24.44 28.5337 29.31 2011-03-18
2011-03-21 20.61 25.2355 25.95 2011-03-21
2011-03-22 20.21 24.3914 24.52 2011-03-22
2011-03-23 19.17 23.9226 24.03 2011-03-23
The below works:
p1.1<-ggplot(data = vData, aes(x = Date, y = vix.Close)) + geom_line(col= "red")
p1.1
p2<-p1.1 + geom_line(data = vData[!is.na(vData$vstoxx),], aes(x = Date, y = vstoxx), col="blue")
p2
p3<-p2 + geom_line(data = vData[!is.na(vData$vxfxi.Close),], aes(x = Date, y = vxfxi.Close), col="green")
p3
p4<-p3 + labs(title = "Volatility Indexes", x = "Time", y = "Index")
p4
But this is the part that is giving me trouble:
p5<- p4 + facet_grid(Date~., scales = Date)
p5
I echo what baptiste said: what is it you're trying to do? The code you've provided suggests that you're trying to create a separate line chart for each date in the dataset, which doesn't make much sense. For this demonstration, I'll show you how to facet the data by year to see the correlations between the different measurements of volatility over time. If you provide more detail as a comment, I'll revisit the code.
First let's take a look at what you've already done.
library(tidyverse)
library(gridExtra)
library(lubridate)
library(reshape2)
#Generate dummy data
vData <- tibble(
vix.Close = rnorm(1000, mean = 12, sd = 5),
vstoxx = rnorm(1000, mean = 12, sd = 5),
vxfxi.Close = rnorm(1000, mean = 12, sd = 5),
Date = as.Date(1:1000, origin = '2011-01-01')
)
# Generate individual plots per your question
p1.1 <-
ggplot(data = vData, aes(x = Date, y = vix.Close)) + geom_line(col = "red")
p1.1
p2 <-
p1.1 + geom_line(data = vData[!is.na(vData$vstoxx), ], aes(x = Date, y = vstoxx), col =
"blue")
p2
p3 <-
p2 + geom_line(data = vData[!is.na(vData$vxfxi.Close), ], aes(x = Date, y = vxfxi.Close), col =
"green")
p3
p4 <-
p3 + labs(title = "Volatility Indexes", x = "Time", y = "Index")
p4
You're creating four different plots and then layering them on top of each other. This approach works here, but it's cumbersome to make changes to each of the calls to ggplot or if you want to add/remove variables. Let's move your data to a "long" format and simplify the ggplot call.
# Melt the data into three columns and remove NAs
vData <- melt(vData, id = "Date") %>%
filter(!is.na(value)) %>%
tbl_df()
# Create one ggplot for all three indexes
ggplot(data = vData, aes(x = Date, y = value, color = variable)) +
geom_line() +
labs(title = "Volatility Indexes", x = "Time", y = "Index")
Now back to the big problem: you shouldn't be faceting by date because that would give you a huge number of tiny unreadable line charts. There are a number of other facets that might make sense. For example, you could look at the distribution of the three indexes by year.
ggplot(data = vData, aes(x = variable, y = value, color = variable)) +
geom_boxplot() +
labs(title = "Volatility Indexes", x = "", y = "") +
facet_grid(year(Date) ~ .)
So put some thought into what exactly you want to show.

Bar plot of group means with lines of individual results overlaid

this is my first stack overflow post and I am a relatively new R user, so please go gently!
I have a data frame with three columns, a participant identifier, a condition (factor with 2 levels either Placebo or Experimental), and an outcome score.
set.seed(1)
dat <- data.frame(Condition = c(rep("Placebo",10),rep("Experimental",10)),
Outcome = rnorm(20,15,2),
ID = factor(rep(1:10,2)))
I would like to construct a bar plot with two bars with the mean outcome score for each condition and the standard deviation as an error bar. I would like to then overlay lines connecting points for each participant's score in each condition. So the plot displays the individual response as well as the group mean.If it is also possible I would like to include an axis break.
I don't seem to be able to find any advice in other threads, apologies if I am repeating a question.
Many Thanks.
p.s. I realise that presenting data in this way will not be to everyones tastes. It is for a specific requirement!
This ought to work:
library(ggplot2)
library(dplyr)
dat.summ <- dat %>% group_by(Condition) %>%
summarize(mean.outcome = mean(Outcome),
sd.outcome = sd(Outcome))
ggplot(dat.summ, aes(x = Condition, y = mean.outcome)) +
geom_bar(stat = "identity") +
geom_errorbar(aes(ymin = mean.outcome - sd.outcome,
ymax = mean.outcome + sd.outcome),
color = "dodgerblue", width = 0.3) +
geom_point(data = dat, aes(x = Condition, y = Outcome),
color = "firebrick", size = 1.2) +
geom_line(data = dat, aes(x = Condition, y = Outcome, group = ID),
color = "firebrick", size = 1.2, alpha = 0.5) +
scale_y_continuous(limits = c(0, max(dat$Outcome)))
Some people are better with ggplot's stat functions and arguments than I am and might do it differently. I prefer to just transform my data first.
set.seed(1)
dat <- data.frame(Condition = c(rep("Placebo",10),rep("Experimental",10)),
Outcome = rnorm(20,15,2),
ID = factor(rep(1:10,2)))
dat.w <- reshape(dat, direction = 'wide', idvar = 'ID', timevar = 'Condition')
means <- colMeans(dat.w[, 2:3])
sds <- apply(dat.w[, 2:3], 2, sd)
ci.l <- means - sds
ci.u <- means + sds
ci.width <- .25
bp <- barplot(means, ylim = c(0,20))
segments(bp, ci.l, bp, ci.u)
segments(bp - ci.width, ci.u, bp + ci.width, ci.u)
segments(bp - ci.width, ci.l, bp + ci.width, ci.l)
segments(x0 = bp[1], x1 = bp[2], y0 = dat.w[, 2], y1 = dat.w[, 3], col = 1:10)
points(c(rep(bp[1], 10), rep(bp[2], 10)), dat$Outcome, col = 1:10, pch = 19)
Here is a method using the transfomations inside ggplot2
ggplot(dat) +
stat_summary(aes(x=Condition, y=Outcome, group=Condition), fun.y="mean", geom="bar") +
stat_summary(aes(x=Condition, y=Outcome, group=Condition), fun.data="mean_se", geom="errorbar", col="green", width=.8, size=2) +
geom_line(aes(x=Condition, y=Outcome, group=ID), col="red")

ggplot2 histogram binwidth [duplicate]

This question already has answers here:
Different breaks per facet in ggplot2 histogram
(4 answers)
Closed 8 years ago.
I would like to create multiple histograms within one plot (using facet_wrap).
This could be an example code:
df <- data.frame(p1 = rnorm(100,5,2), p2 = rnorm(100,80,20), group = rep(LETTERS[1:4],25))
library(ggplot2)
library(reshape)
plotData <- melt(df, id.vars = "group", measure.vars = c("p1","p2") )
m <- ggplot(plotData, aes(x = value, color = group, fill = group, group = group))
m <- m + geom_bar(position=position_dodge())
m <- m + facet_wrap( ~ variable,scales = "free_x")
print(m)
Now, I would like to modify the plot that it creates per parameter ("p1,"p2") let's say 10 bins.
Up to now, I could not find a way to do this as binwidth/breaks calculation should be dependent on a subset of data.
Is it possible at all?
I want to share my solution (taken from the answered question linked above) extended by the possibility to overlay the histograms with density curves scaled to histogram counts:
df <- data.frame(p1 = rnorm(1000,5,2), p2 = rnorm(1000,80,20), group = rep(LETTERS[1:4],25))
library(ggplot2)
library(reshape)
library(plyr)
plotData <- melt(df, id.vars = "group", measure.vars = c("p1","p2") )
nBins <- 10
groupedData <- dlply(plotData, .(variable))
groupedBinWidth <- llply(groupedData, .fun = function(data, nBins) {
r <- range(data$value, na.rm = TRUE, finite = TRUE)
widthOfBins = (r[2] - r[1])/nBins
if (is.na(widthOfBins) || is.infinite(widthOfBins) || (widthOfBins <= 0)) widthOfBins <- NULL
widthOfBins
}, nBins = nBins)
densData <- dlply(plotData, .(variable, group), .fun = function(subData){
param <- subData$variable[1]
group <- subData$group[1]
d <- density(subData$value)
bw <- groupedBinWidth[[param]]
data.frame(x = d$x, y = d$y * nrow(subData) * bw , group = group, variable = param)
})
hls <- mapply(function(x, b) geom_bar(aes(x = value), position = position_dodge(), data = x, binwidth = b),
groupedData, groupedBinWidth)
dLay <- mapply(function(data) geom_density(data = data, aes(x = x, y = y), stat = "identity", fill = NA, size = 1),
densData)
m <- ggplot(plotData, aes(x = value, color = group, fill = group, group = group))
m <- m + hls
m <- m + dLay
m <- m + facet_wrap( ~ variable,scales = "free")
print(m)
Try this - really ugly code, but works if I understand you correctly. You might want to play with geom_density and maybe remove fill to make it more readable.
nbin<- 5
m <- ggplot(plotData, aes(x = value, color = group, fill = group, group = group))
m <- m + geom_histogram(data = subset(plotData, variable == "p1"), binwidth=diff(range(subset(plotData, variable == "p1")$value))/nbin)
m <- m + geom_histogram(data = subset(plotData, variable == "p2"), binwidth=diff(range(subset(plotData, variable == "p2")$value))/nbin)
m <- m + facet_wrap( ~ variable,scales = "free_x")
print(m)

Resources