library(fpp)
library(forecast)
ausbeer.train <- window(ausbeer, end=c(1999,4))
ausbeer.test <- window(ausbeer, start=c(2000,1))
autoplot(ausbeer.train, xlab="Rok", ylab="beer") +
autolayer(snaive(ausbeer.train, h=32), PI=FALSE, series="snaive") +
autolayer(meanf(ausbeer.train, h=32), PI=FALSE, series="meanf") +
autolayer(ausbeer.test)
produces
What if I wanted to plot only data from 1995 up to 2008? Can I somehow limit the range on the x axis? I don't want to subset my data (as snaive and meanf and probably other methods will need the entire train data), I only need to limit what I draw on the plot.
If p is the value of the autoplot statement in the question then this will plot only 1995 to the end of the series.
library(ggplot2)
p + xlim(1995, NA)
Related
My question is about the representation of time series analysis from tslm with ggplot2.
I have used forecast package to decompose SST time series in the Mediterranean in trend, seasonal and remainder components. Then I have looked for the slope (trend) of the linear regression for the trend component with tslm. But I can't figure out how to plot the tslm with ggplot2. Should I ggplot SST trend component with geom_smooth(model=lm)? Would lm provide the same results (slope) than tslm?
This is the code used to build and decompose SST time series
library(forecast)
# Loop to calculate trend for any grid point/column
for (i in 2:length(data)){
# read variable/column to analyse
var<-paste("V",i,sep="")
ff<-data$fecha
valor<-data[,i]
datos2<-as.data.frame(cbind(data$fecha,valor))
#Build time series
datos.ts<-ts(datos2$valor, frequency = 365)
datos.stl <- stl(datos.ts,s.window = 365)
# tslm: Save trend component
datos.tslm<-tslm(datos.ts ~ trend)
output[,i-1]<-datos.stl$time.series[,2]
}
# Summarize trends for the whole Mediterranean (mean value to be plotted)
trend<-as.data.frame(rowMeans(output[,1:length(output)]))
And the code to plot with geom_smooth
trend.plot<-ggplot(data=trend, aes(x=fecha, y=trend)) + geom_point(size=0.1) +
geom_smooth(method='lm', data = trend[1:12784,])
EDIT 1
As SST data consists of a bunch of files, I've uploaded trend data to Dropbox and made available in this csv file
I am trying to understand you question and as the first try, I have revised your code as following (the data attached only contains 2 columns, so I removed the for loop, but generalization should not be hard)
library(forecast)
library(ggplot2)
library(zoo)
data <- read.csv('../Downloads/trend_data.csv', header=TRUE)
data$fecha <- as.Date(data$fecha)
i <- 2
# read variable/column to analyse
var<-paste("V",i,sep="")
ff<-data$fecha
valor<-data[,i]
datos2<-as.data.frame(cbind(data$fecha,valor))
#Build time series
datos.ts<-ts(datos2$valor, frequency = 365)
datos.stl <- stl(datos.ts,s.window = 365)
# tslm: Save trend component
datos.tslm<-tslm(datos.ts ~ trend)
output <-datos.stl$time.series[,2]
# Summarize trends for the whole Mediterranean (mean value to be plotted)
# trend<-as.data.frame(rowMeans(output[,1:length(output)]))
ggplot(data=data, aes(x=fecha, y=trend)) + geom_point(size=0.1) +
geom_smooth(method='lm', data = data.frame(fecha=data$fecha, trend=output), aes(x=fecha, y=output))
Let me know if I misinterpret your intention here.
UPDATE: I feel like what you want might be just line plot of the output trend of tslm?
ggplot(data=data, aes(x=fecha, y=trend)) + geom_point(size=0.1) +
geom_line(data = data.frame(fecha=data$fecha, trend=output), aes(x=fecha, y=output))
If you want a smoothed version of the trend,
ggplot(data=data, aes(x=fecha, y=trend)) + geom_point(size=0.1, col="red") +
geom_smooth(data = data.frame(fecha=data$fecha, trend=output), aes(x=fecha, y=output),col="blue",size=0.1)
The data you provided, plotted as a linegraph with one dot per day. Does this solve your problem?
library(dplyr)
library(ggplot2)
trend_data <- read.csv2("../trend_data.csv",
sep = ",",stringsAsFactors = FALSE)
df <- trend_data %>% mutate(fecha = as.Date(fecha), trend = as.numeric(trend))
ggplot(df, aes(x = fecha, y = trend)) +
geom_line() +
geom_point()
I have time-series data of four years. Now I want to plot the same data year-wise and do comparative analysis. The dummy data is as
library(xts)
library(ggplot2)
timeindex <- seq(as.POSIXct('2016-01-01'),as.POSIXct('2016-12-31 23:59:59'), by = "1 mins")
dataframe <- data.frame(year1=rnorm(length(timeindex),100,10),year2=rnorm(length(timeindex),150,7),
year3=rnorm(length(timeindex),200,3),
year4=rnorm(length(timeindex),350,4))
xts_df <- xts(dataframe,timeindex)
Now, when I use ggplot it takes too long to plot all the series using following lines
visualize_dataframe_all_columns(xts_df)
The above function is defined as:
visualize_dataframe_all_columns <- function(xts_data) {
library(RColorBrewer)# to increase no. of colors
library(plotly)
dframe <- data.frame(timeindex=index(xts_data),coredata(xts_data))
df_long <- reshape2::melt(dframe,id.vars = "timeindex")
colourCount = length(unique(df_long$variable))
getPalette = colorRampPalette(brewer.pal(8, "Dark2"))(colourCount) # brewer.pal(8, "Dark2") or brewer.pal(9, "Set1")
g <- ggplot(df_long,aes(timeindex,value,col=variable,group=variable))
g <- g + geom_line() + scale_colour_manual(values=getPalette)
ggplotly(g)
}
Problems with above approach are:
It takes long time to plot. Can I reduce the plot time?
It is very diffcult to zoom into the plot using plotly. Is there any other better way
Are there any better approaches to visualize this data?
I faced more or less the same problem with frequency of 10 mins data. However, the question is that, does it make sense to plot the minute data for whole year? Human eyes cannot recognize the difference.
I would create a daily xts from that data and and plot it for the year. And modify the function to plot for a period of time for the minute data.
How would one use the smooth.spline() method in a ggplot2 scatterplot?
If my data is in the data frame called data, with two columns, x and y.
The smooth.spline would be sm <- smooth.spline(data$x, data$y). I believe I should use geom_line(), with sm$x and sm$y as the xy coordinates. However, how would one plot a scatterplot and a lineplot on the same graph that are completely unrelated? I suspect it has something to do with the aes() but I am getting a little confused.
You can use different data(frames) in different geoms and call the relevant variables using aes or you could combine the relevant variables from the output of smooth.spline
# example data
set.seed(1)
dat <- data.frame(x = rnorm(20, 10,2))
dat$y <- dat$x^2 - 20*dat$x + rnorm(20,10,2)
# spline
s <- smooth.spline(dat)
# plot - combine the original x & y and the fitted values returned by
# smooth.spline into a data.frame
library(ggplot2)
ggplot(data.frame(x=s$data$x, y=s$data$y, xfit=s$x, yfit=s$y)) +
geom_point(aes(x,y)) + geom_line(aes(xfit, yfit))
# or you could use geom_smooth
ggplot(dat, aes(x , y)) + geom_point() + geom_smooth()
To train with ggplot and to improve my skills in writing R functions I decided to build a series of functions that produces survival plots, with all kinds of extras. I managed to build a good working function for the basic survival plot, now I am getting to the extras. One thing I would like to do is an option that stacks an area plot of the number at risk at a given time point, on top of the survival plot. I would like it to look just like the facet_grid option of ggplot, but I did not manage to do it with this function. I do not want the two plots binded, like we can do with grid.arrange, but rather to have the same x-axis.
The following code produces the two (simplified) plots that I would like to stack on top of each other. I tried to do this with facet_grid, but I don't think the solution lies in this
library(survival)
library(ggplot2)
data(lung)
s <- survfit(Surv(time, status) ~ 1, data = lung)
dat <- data.frame(time = c(0, s$time),
surv = c(1, s$surv),
nr = c(s$n, s$n.risk))
pl1 <- ggplot(dat, aes(time, surv)) + geom_step()
pl2 <- ggplot(dat, aes(time, nr)) + geom_area()
First, melt your data to long format.
library(reshape2)
dat.long<-melt(dat,id.vars="time")
head(dat.long)
time variable value
1 0 surv 1.0000000
2 5 surv 0.9956140
3 11 surv 0.9824561
4 12 surv 0.9780702
5 13 surv 0.9692982
6 15 surv 0.9649123
Then use subset() to use only surv data in geom_step() and nr data in geom_area() and with facet_grid() you will get each plot in separate facet as variable is used to divide data for facetting and for subsetting. scales="free_y" will make pretty axis.
ggplot()+geom_step(data=subset(dat.long,variable=="surv"),aes(time,value))+
geom_area(data=subset(dat.long,variable=="nr"),aes(time,value))+
facet_grid(variable~.,scales="free_y")
I am trying to add a regression line to this facet_grid, but i seem to be having problems.
qplot(date, data=ua, geom="bar",
weight=count,
ylab="New User Count",
xlab='Date',
main='New Users by Type Last 3 Months',
colour=I('lightgreen'),
fill=I('grey')) +
facet_grid(un_region~role, scales='free', space='free_y') +
opts(axis.text.x =theme_text(angle=45, size=5))
Here's a sample of the data i am working with, note that the counts need to be summed, this is why i am using weight=count, not sure if there is a better way.
date role name un_region un_subregion us_state count
1 2012-06-21 ENTREPRENEUR Australia Oceania Australia and New Zealand 2
2 2012-06-21 ENTREPRENEUR Belgium Europe Western Europe 1
Thanks.
I'm not sure what you're drawing the slope of since it seems that you're using a bar plot. There are three ways you can do it, I'll illustrate two. If you have actual xy data and you want to fit individual regression lines by facet, just use stat_smooth(method="lm"). Here's some code:
library(ggplot2)
x <- rnorm(100)
y <- + .7*x + rnorm(100)
f1 <- as.factor(c(rep("A",50),rep("B",50)))
f2 <- as.factor(rep(c(rep("C",25),rep("D",25)),2))
df <- data.frame(cbind(x,y))
df$f1 <- f1
df$f2 <- f2
ggplot(df,aes(x=x,y=y))+geom_point()+facet_grid(f1~f2)+stat_smooth(method="lm",se=FALSE)
This yields:
Or you can use the geom_abline() command to set your own intercept and slope. Here's an example using the diamonds data set overlaid on a bar plot, maybe what you're looking for.
You can see an example with just the scatterplots with this snippet
ggplot(df,aes(x=x,y=y))+geom_point()+facet_grid(f1~f2)+geom_abline(intercept = 0, slope = 1 )