With acf we can make ACF plot in base R graph.
x <- lh
acf(x)
The following code can be used to get the ACF plot in ggplot2.
conf.level <- 0.95
ciline <- qnorm((1 - conf.level)/2)/sqrt(length(x))
bacf <- acf(x, plot = FALSE)
bacfdf <- with(bacf, data.frame(lag, acf))
library(ggplot2)
q <- ggplot(data=bacfdf, mapping=aes(x=lag, y=acf)) +
geom_bar(stat = "identity", position = "identity")
q
Question
How to get lines rather than bars or how to set the width of bars so that they look like lines? Thanks
You're probably better off plotting with line segments via geom_segment()
library(ggplot2)
set.seed(123)
x <- arima.sim(n = 200, model = list(ar = 0.6))
bacf <- acf(x, plot = FALSE)
bacfdf <- with(bacf, data.frame(lag, acf))
q <- ggplot(data = bacfdf, mapping = aes(x = lag, y = acf)) +
geom_hline(aes(yintercept = 0)) +
geom_segment(mapping = aes(xend = lag, yend = 0))
q
How about using geom_errorbar with width=0?
ggplot(data=bacfdf, aes(x=lag, y=acf)) +
geom_errorbar(aes(x=lag, ymax=acf, ymin=0), width=0)
#konrad; try the following code:
library(ggfortify)
p1 <- autoplot(acf(AirPassengers, plot = FALSE), conf.int.fill = '#0000FF', conf.int.value = 0.8, conf.int.type = 'ma')
print(p1)
library(cowplot)
ggdraw(switch_axis_position(p1, axis = 'xy', keep = 'xy'))
From the forecast package comes a function ggtsdisplay that plots both ACF and PACF with ggplot. x is the residuals from the model fit (fit$residuals).
forecast::ggtsdisplay(x,lag.max=30)
From your answers, I synthesized a ggplot ACF / PACF plotting method :
require(zoo)
require(tseries)
require(ggplot2)
require(cowplot)
ts= zoo(data[[2]]) # data[[2]] because my time series data was the second column
# Plot ACP / ACF with IC
# How to compute IC for ACF and PACF :
# https://stats.stackexchange.com/questions/211628/how-is-the-confidence-interval-calculated-for-the-acf-function
ic_alpha= function(alpha, acf_res){
return(qnorm((1 + (1 - alpha))/2)/sqrt(acf_res$n.used))
}
ggplot_acf_pacf= function(res_, lag, label, alpha= 0.05){
df_= with(res_, data.frame(lag, acf))
# IC alpha
lim1= ic_alpha(alpha, res_)
lim0= -lim1
ggplot(data = df_, mapping = aes(x = lag, y = acf)) +
geom_hline(aes(yintercept = 0)) +
geom_segment(mapping = aes(xend = lag, yend = 0)) +
labs(y= label) +
geom_hline(aes(yintercept = lim1), linetype = 2, color = 'blue') +
geom_hline(aes(yintercept = lim0), linetype = 2, color = 'blue')
}
acf_ts= ggplot_acf_pacf(res_= acf(ts, plot= F)
, 20
, label= "ACF")
pacf_ts= ggplot_acf_pacf(res_= pacf(ts, plot= F)
, 20
, label= "PACF")
# Concat our plots
acf_pacf= plot_grid(acf_ts, pacf_ts, ncol = 2, nrow = 1)
acf_pacf
Results:
forecast::ggAcf() is another option:
library(ggplot2)
library(forecast)
ggAcf(wineind,lag.max=24)+
labs(title='wineind')
Related
I was trying to recreate this plot:
using the following code -
library(tidyverse)
set.seed(0); r <- rnorm(10000);
df <- as.data.frame(r)
avg <- round(mean(r),2)
SD <- round(sd(r),2)
x.scale <- seq(from = avg - 3*SD, to = avg + 3*SD, by = SD)
x.lab <- c("-3SD", "-2SD", "-1SD", "Mean", "1SD", "2SD", "3SD")
df %>% ggplot(aes(r)) +
geom_histogram(aes(y=..density..), bins = 20,
colour="black", fill="lightblue") +
geom_density(alpha=.2, fill="darkblue") +
scale_x_continuous(breaks = x.scale, labels = x.lab) +
labs(x = "")
Using the code I plotted this:
,
but this isn't near to the plot that I am trying to create. How do I make an additional axis with the X axis? How do I add the lines to automatically show the percentage of observations? Is there any way, that I can create the plot as nearly identical as possible using ggplot2?
Welcome to SO. Excellent first question!
It's actually quite tricky. You'd need to create a second plot (the second x axis) but it's not the most straight forward to align both perfectly.
I will be using Z.lin's amazing modification of the cowplot package.
I am not using the reprex package, because I think I'd need to define every single function (and I don't know how to use trace within reprex.)
library(tidyverse)
library(cowplot)
set.seed(0); r <- rnorm(10000);
foodf <- as.data.frame(r)
avg <- round(mean(r),2)
SD <- round(sd(r),2)
x.scale <- round(seq(from = avg - 3*SD, to = avg + 3*SD, by = SD), 1)
x.lab <- c("-3SD", "-2SD", "-1SD", "Mean", "1SD", "2SD", "3SD")
x2lab <- -3:3
# calculate the density manually
dens_r <- density(r)
# for each x value, calculate the closest x value in the density object and get the respective y values
y_dens <- dens_r$y[sapply(x.scale, function(x) which.min(abs(dens_r$x - x)))]
# added annotation for segments and labels.
# Arrow segments can be added in a similar way.
p1 <-
ggplot(foodf, aes(r)) +
geom_histogram(aes(y=..density..), bins = 20,
colour="black", fill="lightblue") +
geom_density(alpha=.2, fill="darkblue") +
scale_x_continuous(breaks = x.scale, labels = x.lab) +
labs(x = NULL) +# use NULL here
annotate(geom = "segment", x = x.scale, xend = x.scale,
yend = 1.1 * max(dens_r$y), y = y_dens, lty = 2 ) +
annotate(geom = "text", label = x.lab,
x = x.scale, y = 1.2 * max(dens_r$y))
p2 <-
ggplot(foodf, aes(r)) +
scale_x_continuous(breaks = x.scale, labels = x2lab) +
labs(x = NULL) +
theme_classic() +
theme(axis.line.y = element_blank())
# This is with the modified plot_grid() / align_plot() function!!!
plot_grid(p1, p2, ncol = 1, align = "v", rel_heights = c(1, 0.1))
I'm trying to write my own Central Limit Theorem demonstration using ggplot2 and am unable to get my stat_function to display a changing normal distribution.
below is my code, I want the normal distribution in stat_function to transition through different states; specifically, I'm hoping for it to change the standard deviation to correspond with each value in dataset. Any help would be greatly appreciated.
#library defs
library(gganimate)
library(ggplot2)
library(transformr)
#initialization for distribution, rolls, and vectors
k = 2
meanr = 1/k
sdr = 1/k
br = sdr/10
rolls <- 200
avg <- 1
dataset <- 1
s <- 1
#loop through to create vectors of sample statistics from 200 samples of size i
#avg is sample average, s is standard deviations of sample means, and dataset is the indexes to run the transition states
for (i in c(1:40)){
for (j in 1:rolls){
avg <- c(avg,mean(rexp(i,k)))
}
dataset <- c(dataset, rep(i,rolls))
s <- c(s,rep(sdr/sqrt(i),rolls))
}
#remove initialized vector information as it was only created to start loops
avg <- avg[-1]
rn <- rn[-1]
dataset <- dataset[-1]
s <- s[-1]
#dataframe
a <- data.frame(avgf=avg, rnf = rn,datasetf = dataset,sf = s)
#plot histogram, density function, and normal distribution
ggplot(a,aes(x=avg,y=s))+
geom_histogram(aes(y = ..density..), binwidth = br,fill='beige',col='black')+
geom_line(aes(y = ..density..,colour = 'Empirical'),lwd=2, stat = 'density') +
stat_function(fun = dnorm, aes(colour = 'Normal', y = s),lwd=2,args=list(mean=meanr,sd = mean(s)))+
scale_y_continuous(labels = scales::percent_format()) +
scale_color_discrete(name = "Densities", labels = c("Empirical", "Normal"))+
labs(x = 'Sample Average',title = 'Sample Size: {closest_state}')+
transition_states(dataset,4,4)+ view_follow(fixed_x = TRUE)
I think it's difficult to use stat_function here because the dnorm function that you are passing includes a grouped variable (mean(s)). There is no way to indicate that you wish to group s by the dataset column, and the transition_states function doesn't filter the whole data frame. You could use transition_filter to filter the whole data frame, but this would be laborious.
It's not much work to just add a dnorm to your input data frame and plot it as a line, particularly since the rest of your code can be simplified substantially. Here's a fully reproducible example:
library(gganimate)
library(ggplot2)
library(transformr)
k <- 2
meanr <- sdr <- 1/k
br <- sdr/10
rolls <- 200
a <- do.call(rbind, lapply(1:40, function(i){
data.frame(avg = replicate(rolls, mean(rexp(i, k))),
dataset = rep(i, rolls),
x = seq(0, 2, length.out = rolls),
s = dnorm(seq(0, 2, length.out = rolls),
meanr, sdr/sqrt(i))) }))
ggplot(a, aes(x = avg, group = dataset)) +
geom_histogram(aes(y = ..density..), fill = 'beige',
colour = "black", binwidth = br) +
geom_line(aes(y = ..density.., colour = 'Empirical'),
lwd = 2, stat = 'density', alpha = 0.5) +
geom_line(aes(x = x, y = s, colour = "Normal"), size = 2, alpha = 0.5) +
scale_y_continuous(labels = scales::percent_format()) +
coord_cartesian(xlim = c(0, 2)) +
scale_color_discrete(name = "Densities", labels = c("Empirical", "Normal")) +
labs(x = 'Sample Average', title = 'Sample Size: {closest_state}') +
transition_states(dataset, 4, 4) +
view_follow(fixed_x = TRUE, fixed_y = TRUE)
I'd like to plot histogram and density on the same plot. What I would like to add to the following is custom y-axis label which would be something like sprintf("[%s] %s", ..density.., ..count..) - two numbers at one tick value. Is it possible to obtain this with scale_y_continuous or do I need to work this around somehow?
Below current progress using scales::trans_new and sec_axis. sec_axis is kind of acceptable but the most desirable output is as on the image below.
set.seed(1)
var <- rnorm(4000)
binwidth <- 2 * IQR(var) / length(var) ^ (1 / 3)
count_and_proportion_label <- function(x) {
sprintf("%s [%.2f%%]", x, x/sum(x) * 100)
}
ggplot(data = data.frame(var = var), aes(x = var, y = ..count..)) +
geom_histogram(binwidth = binwidth) +
geom_density(aes(y = ..count.. * binwidth)) +
scale_y_continuous(
# this way
trans = trans_new(name = "count_and_proportion",
format = count_and_proportion_label,
transform = function(x) x,
inverse = function(x) x),
# or this way
sec.axis = sec_axis(trans = ~./sum(.),
labels = percent,
name = "proportion (in %)")
)
I've tried to create object with breaks before basing on the graphics::hist output - but these two histogram differs.
bins <- (max(var) - min(var))/binwidth
hdata <- hist(var, breaks = bins, right = FALSE)
# hist generates different bins than `ggplot2`
At the end I would like to get something like this:
Would it be acceptable to add percentage as a secondary axis? E.g.
your_plot + scale_y_continuous(sec.axis = sec_axis(~.*2, name = "[%]"))
Perhaps it would be possible to overlay the secondary axis on the primary one, but I'm not sure how you would go about doing that.
You can achieve your desired output by creating a custom set of labels, and adding it to the plot:
library(tidyverse)
library(ggplot2)
set.seed(1)
var <- rnorm(400)
bins <- .1
df <- data.frame(yvals = seq(0, 20, 5), labels = c("[0%]", "[10%]", "[20%]", "[30%]", "[40%]"))
df <- df %>% tidyr::unite("custom_labels", labels, yvals, sep = " ", remove = TRUE)
ggplot(data = data.frame(var = var), aes(x = var, y = ..count..)) +
geom_histogram(aes(y = ..count..), binwidth = bins) +
geom_density(aes(y = ..count.. * bins), color = "black", alpha = 0.7) +
ylab("[density] count") +
scale_y_continuous(breaks = seq(0, 20, 5), labels = df$custom_labels)
I'm trying to re-create a plot like this in ggplot:.
This graph takes the residuals from a regression output, and plots them in order (with the X-axis being a rank of residuals).
My best attempt at this was something like the following:
library(ggplot2)
library(modelr)
d <- d %>% add_residuals(mod1, var = "resid")
d$resid_rank <- rank(d$resid)
ggplot(data = d, aes(x = resid_rank, y = resid)) +
geom_bar(stat="identity") +
theme_bw()
However, this yields a completely blank graph. I tried something like this:
ggplot(data = d, aes(x = resid_rank, y = resid)) +
geom_segment(yend = 0, aes(xend=resid)) +
theme_bw()
But this yields the segments that go in the wrong direction. What is the right way to do this, and to color those lines by a third factor?
FAKE DATASET:
library(estimatr)
library(fabricatr)
#simulation
dat <- fabricate(
N = 10000,
y = runif(N, 0, 10),
x = runif(N, 0, 100)
)
#add an outlier
dat <- rbind(dat, c(300, 5))
dat <- rbind(dat, c(500, 3))
dat$y_log <- log(dat$y)
dat$x_log <- log(dat$x)
dat$y_log_s <- scale(log(dat$y))
dat$x_log_s <- scale(log(dat$x))
mod1 <- lm(y_log ~ x_log, data = dat))
I used the build in dataset from the help page on lm() to create this example. I also just directly used resid() to get the residuals. It's unclear where / why the colored bars would be different, but basically you'd need to add a column to your data.frame that specificies why they are red or blue, then pass that to fill.
library(ggplot2)
#> Warning: package 'ggplot2' was built under R version 3.4.4
#example from lm
ctl <- c(4.17,5.58,5.18,6.11,4.50,4.61,5.17,4.53,5.33,5.14)
trt <- c(4.81,4.17,4.41,3.59,5.87,3.83,6.03,4.89,4.32,4.69)
group <- gl(2, 10, 20, labels = c("Ctl","Trt"))
weight <- c(ctl, trt)
lm.D9 <- lm(weight ~ group)
resids <- data.frame(resid = resid(lm.D9))
#why are some bars red and some blue? No clue - so I'll pick randomly
resids$group <- sample(c("group 1", "group 2"), nrow(resids), replace = TRUE)
#rank
resids$rank <- rank(-1 * resids$resid)
ggplot(resids, aes(rank, resid, fill = group)) +
geom_bar(stat = "identity", width = 1) +
geom_hline(yintercept = c(-1,1), colour = "darkgray", linetype = 2) +
geom_hline(yintercept = c(-2,2), colour = "lightgray", linetype = 1) +
theme_bw() +
theme(panel.grid = element_blank()) +
scale_fill_manual(values = c("group 1" = "red", "group 2" = "blue"))
Created on 2019-01-24 by the reprex package (v0.2.1)
I am creating animated plotly graph for my assignment in r, where I am comparing several models with various number of observations. I would like to add annotation showing what is the RMSE of the current model - this means I would like to have text that changes together with slider. Is there any easy way how to do that?
Here is my dataset stored on GitHub. There already is created variable with RMSE: data
The base ggplot graphic is as follows:
library(tidyverse)
library(plotly)
p <- ggplot(values_predictions, aes(x = x)) +
geom_line(aes(y = preds_BLR, frame = n, colour = "BLR")) +
geom_line(aes(y = preds_RLS, frame = n, colour = "RLS")) +
geom_point(aes(x = x, y = target, frame = n, colour = "target"), alpha = 0.3) +
geom_line(aes(x = x, y = sin(2 * pi * x), colour = "sin(2*pi*x)"), alpha = 0.3) +
ggtitle("Comparison of performance) +
labs(y = "predictions and targets", colour = "colours")
This is converted to plotly, and I have added an animation to the Plotly graph:
plot <- ggplotly(p) %>%
animation_opts(easing = "linear",redraw = FALSE)
plot
Thanks!
You can add annotations to a ggplot graph using the annotate function: http://ggplot2.tidyverse.org/reference/annotate.html
df <- data.frame(x = rnorm(100, mean = 10), y = rnorm(100, mean = 10))
# Build model
fit <- lm(x ~ y, data = df)
# function finds RMSE
RMSE <- function(error) { sqrt(mean(error^2)) }
library(ggplot2)
ggplot(df, aes(x, y)) +
geom_point() +
annotate("text", x = Inf, y = Inf, hjust = 1.1, vjust = 2,
label = paste("RMSE", RMSE(fit$residuals)) )
There seems to be a bit of a problem converting between ggplot and plotly. However this workaround here shows a workaround which can be used:
ggplotly(plot) %>%
layout(annotations = list(x = 12, y = 13, text = paste("RMSE",
RMSE(fit$residuals)), showarrow = F))
Here's an example of adding data dependent text using the built in iris dataset with correlation as text to ggplotly.
library(plotly)
library(ggplot2)
library(dplyr)
mydata = iris %>% rename(variable1=Sepal.Length, variable2= Sepal.Width)
shift_right = 0.1 # number from 0-1 where higher = more right
shift_down = 0.02 # number from 0-1 where higher = more down
p = ggplot(mydata, aes(variable1,variable2))+
annotate(geom = "text",
label = paste0("Cor = ",as.character(round(cor.test(mydata$variable1,mydata$variable2)$estimate,2))),
x = min(mydata$variable1)+abs(shift_right*(min(mydata$variable1)-max(mydata$variable1))),
y = max(mydata$variable2)-abs(shift_down*(min(mydata$variable2)-max(mydata$variable2))), size=4)+
geom_point()
ggplotly(p) %>% style(hoverinfo = "none", traces = 1) # remove hover on text