Faceted time series with mean profile in ggplot2 - r

Using the following simulated time series:
n=70
m1 = matrix(rnorm(n), ncol=7)
m2 = matrix(rnorm(n, 0,4), ncol=7)
d = data.frame(rbind(m1,m2), cl=rep(c(1,2), each=5))
(first 7 columns represent the time point, last column the class)
Is it possible to construct a faceted time series that includes the mean curve in each plot, using ggplot2?
The results should look something like this:

It might not be the most beautiful code, but I believe it gets you what you are looking for,
n=70
m1 = matrix(rnorm(n), ncol=7)
m2 = matrix(rnorm(n, 0,4), ncol=7)
d = data.frame(rbind(m1,m2), cl=rep(c(1,2), each=5))
d <- cbind(paste("d", 1:NROW(d), sep = ""), d)
names(d)[1] <- "id.var"
library(reshape)
longDF <- melt(d, id=c("cl", "id.var"))
library(ggplot2)
p <- ggplot(data = longDF, aes(x = variable, y = value, group = id.var))
p + geom_line() + stat_smooth(aes(group = 1), method = "lm",
se = FALSE, colour="red") + facet_grid(cl ~ .)
Please don't hesitate to improve my code.

Related

Plotting the theoretical distribution of exponential with a minimum on facets (ggh4x)

Users of a Shiny app can test data sets for Poisson, normality, and exponentiality. I am returning the results of the statistical test they chose. In addition, I thought it would be nice to plot the density from the data along with the theoretical distribution. They could be testing multiple sets of data at once, so I am faceting the plot.
From ggplot add Normal Distribution while using `facet_wrap` I found the really great ggh4x package. However, since this could be industry data, there may be a minimum that is not zero.
The problem is that theodensity(distri="exp") uses dexp which doesn't account for a minimum number, so the theoretical distribution plot doesn't match the data.
How can I tell the stat_theodensity that there is an xmin for each facet, which is the min of the data in the facet? I see that fitdistrplus can use different methods to fit an exponential curve, and that, for example, method="mse" would work. Is there a way to pass this through stat_theodensity?
library(ggh4x)
#generate 2 exponential distributions with xmin > 0
data1 <- rexp(n = 500,rate = 1/100)+100
data2 <- rexp(n = 500,rate = 1/250)+500
data <- c(data1,data2)
#generate a code for facets
ID1 <- c(rep("Set 1",times=500))
ID2 <- c(rep("Set 2",times=500))
ID <- c(ID1,ID2)
#make the data for plotting
plot_dat <- data.frame(ID,data)
#make the graph
p <- ggplot(data = plot_dat, aes(x=data))+
geom_density()+
stat_theodensity(distri = "exp")+
facet_wrap(facets = ~ID,scales = "free")
p
#what the first point of the graphs should be
dexp(x = 100-100,rate = 1/100)
#[1] 0.01
dexp(x = 500-500,rate = 1/250)
#[1] 0.004
********EDIT
OK I am getting closer. The following code works, but only for the second pass through the loop. If I change the numbers around for data1 and data2, it is always only the second one that plots the theoretical distribution.
I did ggplot_build after the first loop and it gives an error in fitdist(), which is code 100. I don't know why it would always fail on the first one but not on the second one, even with the same data.
Any ideas?
#generate 2 exponential distributions with xmin > 0
data1 <- rexp(n = 500,rate = 1/250)+500
data2 <- rexp(n = 500,rate = 1/100)+250
data <- c(data1,data2)
#generate a code for facets
ID1 <- c(rep("Set 1",times=500))
ID2 <- c(rep("Set 2",times=500))
ID <- c(ID1,ID2)
#make the data for plotting
plot_dat <- data.frame(ID,data)
#make the graph
p <- ggplot(data = plot_dat, aes(x=data))+
geom_density(color="red")
#loop through sets and add facets
for (set in unique(plot_dat$ID)){
xmin <- min(plot_dat$data[ID == set])
p<-p+
stat_theodensity(
data = ~subset(.x, ID == set),
aes(x = stage(data - xmin, after_stat = x + xmin)),
distri = "exp"
)
}
#stat_theodensity(distri = "exp")+
p<-p+
facet_wrap(facets = ~ID,scales = "free")
p
I don't know about the statistics of your problem, but if the issue is subtracting a number before calculating the density and afterwards adding it, you might do that with stage(). I couldn't find a more elegant way than hardcoding these values for each set separately, but I'd be happy to hear about more creative solutions.
library(ggh4x)
#> Loading required package: ggplot2
#generate 2 exponential distributions with xmin > 0
data1 <- rexp(n = 500,rate = 1/100)+100
data2 <- rexp(n = 500,rate = 1/250)+500
data <- c(data1,data2)
#generate a code for facets
ID1 <- c(rep("Set 1",times=500))
ID2 <- c(rep("Set 2",times=500))
ID <- c(ID1,ID2)
#make the data for plotting
plot_dat <- data.frame(ID,data)
#make the graph
ggplot(data = plot_dat, aes(x=data))+
geom_density() +
stat_theodensity(
data = ~ subset(.x, ID == "Set 1"),
aes(x = stage(data - 100, after_stat = x + 100)),
distri = "exp"
) +
stat_theodensity(
data = ~ subset(.x, ID == "Set 2"),
aes(x = stage(data - 500, after_stat = x + 500)),
distri = "exp"
) +
facet_wrap(facets = ~ID,scales = "free")
Created on 2022-09-26 by the reprex package (v2.0.1)
EDIT
I think OP's update had a problem with non-standard evaluation. It should work when you use a lapply() loop instead of a for-loop because then xmin is not a global variable that might be mistakingly looked up.
library(ggh4x)
#> Loading required package: ggplot2
library(ggplot2)
#generate 2 exponential distributions with xmin > 0
data1 <- rexp(n = 500,rate = 1/250)+500
data2 <- rexp(n = 500,rate = 1/100)+250
data <- c(data1,data2)
#generate a code for facets
ID1 <- c(rep("Set 1",times=500))
ID2 <- c(rep("Set 2",times=500))
ID <- c(ID1,ID2)
#make the data for plotting
plot_dat <- data.frame(ID,data)
#make the graph
p <- ggplot(data = plot_dat, aes(x=data))+
geom_density(color="red") +
facet_wrap(facets = ~ ID, scales = "free")
#loop through sets and add facets
p + lapply(unique(plot_dat$ID), function(i) {
xmin <- min(plot_dat$data[plot_dat$ID == i])
stat_theodensity(
data = ~ subset(.x, ID == i),
aes(x = stage(data - xmin, after_stat = x + xmin)),
distri = "exp"
)
})
Created on 2022-09-27 by the reprex package (v2.0.1)

How to do a non-linear regression using geom_smooth when start values are separated by category in a different data frame?

I have 2 data frames: one with experimental data that need to be fitted to a non-linear model and another with the starting values for fitting by the nls method. Both experimental data and starting values are separated into categories a and b. I want to make a graph using ggplot2 that shows the curve fitted to the points and separated by category, but I can't indicate the starting values, which are in another data frame, for each category.
In MWE, I present the data frame with the starting values in two ways: 1. each column is a category, or 2. each row is a category. (see Constants1 and Constants2 objects). I thought this organization was relevant to call the values in ggplot
library(ggplot2)
Category <- c("a", "b")
k1 <- c(10, 20)
k2 <- c(0.01, 0.02)
Constants1 <- data.frame(Category, k1, k2)
Constants2 <- data.frame(rbind(k1, k2))
colnames(Constants2) <- Category
x <- seq(0,100,20)
y <- c(0, 2, 3.5, 4.5, 5.5, 6,
0, 7, 11, 14, 16, 17)
df <- expand.grid(x = x,
Category = Category)
df$y <- y
ggplot(data = df,
aes(x = x,
y = y)) +
geom_point(aes(shape = Category)) +
geom_smooth(aes(linetype = Category),
formula = y ~ k1*(1-exp((-k2)*x)),
method.args = list(start = list(k1 = ??, #Help here
k2 = ??)),
se = FALSE,
method = "nls")
Maybe this is what you are looking for. Instead of making use of just one geom_smooth you could add one for each combination of starting values. To this end I make use of purrr::pmap to loop over the data frame with the starting values to create a list of geom_smooth layers which could then be added to the ggplot:
library(ggplot2)
library(purrr)
layer_smooth <- pmap(Constants1, function(...) {
args <- list(...)
geom_smooth(aes(linetype = Category),
formula = y ~ k1*(1-exp((-k2)*x)),
method.args = list(start = list(k1 = args$k1, #Help here
k2 = args$k2)),
se = FALSE,
method = "nls")
})
ggplot(data = df,
aes(x = x,
y = y)) +
geom_point(aes(shape = Category)) +
layer_smooth

Countor plot of bivariate normal functions

Could someone explain me why such function doesn't produce a countor plot as I expected.
I've a bivariate normal function whit:
means = c(5,1)
var_cov = matrix(c(2,1,1,1),2)
I'd like to plot its contour plot; I'm able to reach the result but I'd like to ask why in one case I don't get expected result.
Working Example:
library(MASS)
library(ggplot2)
N <- 100
set.seed(123)
var_cov_matrix <- matrix(c(2,1,1,1),2)
mean <- c(5,1)
bivariate_points <- expand.grid(s.1 = seq(-25, 25, length.out=N), s.2 = seq(-25, 25, length.out=N))
z <- mvtnorm::dmvnorm(bivariate_points, mean = mean, sigma = var_cov_matrix)
data <- cbind(bivariate_points,z)
colnames(data) <- c("X1","X2","Z")
data.df <- as.data.frame(data)
ggplot() +
geom_contour(data=data.df,aes(x=X1,y=X2,z=Z))
Non Working Example:
library(MASS)
library(ggplot2)
N <- 100
set.seed(123)
var_cov_matrix <- matrix(c(2,1,1,1),2)
mean <- c(5,1)
bivariate_points <- mvrnorm(N, mu = mean, Sigma = var_cov_matrix ) # <---- EDITED
z <- mvtnorm::dmvnorm(bivariate_points, mean = mean, sigma = var_cov_matrix)
data <- cbind(bivariate_points,z)
colnames(data) <- c("X1","X2","Z")
data.df <- as.data.frame(data)
ggplot() +
geom_contour(data=data.df,aes(x=X1,y=X2,z=Z))
In your non-working example, since you don't have regular grid for contour plot, you can use stat_density2d instead, i.e.,
ggplot(data.df, aes(x = X1, y = X2, z = Z)) +
geom_point(aes(colour = z)) +
stat_density2d()

gam plots with ggplot

I need to create some gam plots in ggplot. I can do them with the general plot function, but am unsure how to do with ggplot. Here is my code and plots with the regular plot function. I'm using the College data set from the ISLR package.
train.2 <- sample(dim(College)[1],2*dim(College)[1]/3)
train.college <- College[train.2,]
test.college <- College[-train.2,]
gam.college <- gam(Outstate~Private+s(Room.Board)+s(Personal)+s(PhD)+s(perc.alumni)+s(Expend)+s(Grad.Rate), data=train.college)
par(mfrow=c(2,2))
plot(gam.college, se=TRUE,col="blue")
See update below old answer.
Old answer:
There is an implementation of GAM plotting using ggplot2 in voxel library. Here is how you would go about it:
library(ISLR)
library(mgcv)
library(voxel)
library(tidyverse)
library(gridExtra)
data(College)
set.seed(1)
train.2 <- sample(dim(College)[1],2*dim(College)[1]/3)
train.college <- College[train.2,]
test.college <- College[-train.2,]
gam.college <- gam(Outstate~Private+s(Room.Board)+s(Personal)+s(PhD)+s(perc.alumni)+s(Expend)+s(Grad.Rate), data=train.college)
vars <- c("Room.Board", "Personal", "PhD", "perc.alumni","Expend", "Grad.Rate")
map(vars, function(x){
p <- plotGAM(gam.college, smooth.cov = x) #plot customization goes here
g <- ggplotGrob(p)
}) %>%
{grid.arrange(grobs = (.), ncol = 2, nrow = 3)}
after a bunch of errors: In plotGAM(gam.college, smooth.cov = x) :
There are one or more factors in the model fit, please consider plotting by group since plot might be unprecise
To compare to the plot.gam:
par(mfrow=c(2,3))
plot(gam.college, se=TRUE,col="blue")
You might also want to plot the observed values:
map(vars, function(x){
p <- plotGAM(gam.college, smooth.cov = x) +
geom_point(data = train.college, aes_string(y = "Outstate", x = x ), alpha = 0.2) +
geom_rug(data = train.college, aes_string(y = "Outstate", x = x ), alpha = 0.2)
g <- ggplotGrob(p)
}) %>%
{grid.arrange(grobs = (.), ncol = 3, nrow = 2)}
or per group (especially important if you used the by argument (interaction in gam).
map(vars, function(x){
p <- plotGAM(gam.college, smooth.cov = x, groupCovs = "Private") +
geom_point(data = train.college, aes_string(y = "Outstate", x = x, color= "Private"), alpha = 0.2) +
geom_rug(data = train.college, aes_string(y = "Outstate", x = x, color= "Private" ), alpha = 0.2) +
scale_color_manual("Private", values = c("#868686FF", "#0073C2FF")) +
theme(legend.position="none")
g <- ggplotGrob(p)
}) %>%
{grid.arrange(grobs = (.), ncol = 3, nrow = 2)}
Update, 08. Jan. 2020.
I currently think the package mgcViz offers superior functionality compared to the voxel::plotGAMfunction. An example using the above data set and models:
library(mgcViz)
viz <- getViz(gam.college)
print(plot(viz, allTerms = T), pages = 1)
plot customization is similar go ggplot2 syntax:
trt <- plot(viz, allTerms = T) +
l_points() +
l_fitLine(linetype = 1) +
l_ciLine(linetype = 3) +
l_ciBar() +
l_rug() +
theme_grey()
print(trt, pages = 1)
This vignette shows many more examples.

Partially fill density plot for area of interest [duplicate]

I frequently use kernel density plots to illustrate distributions. These are easy and fast to create in R like so:
set.seed(1)
draws <- rnorm(100)^2
dens <- density(draws)
plot(dens)
#or in one line like this: plot(density(rnorm(100)^2))
Which gives me this nice little PDF:
I'd like to shade the area under the PDF from the 75th to 95th percentiles. It's easy to calculate the points using the quantile function:
q75 <- quantile(draws, .75)
q95 <- quantile(draws, .95)
But how do I shade the the area between q75 and q95?
With the polygon() function, see its help page and I believe we had similar questions here too.
You need to find the index of the quantile values to get the actual (x,y) pairs.
Edit: Here you go:
x1 <- min(which(dens$x >= q75))
x2 <- max(which(dens$x < q95))
with(dens, polygon(x=c(x[c(x1,x1:x2,x2)]), y= c(0, y[x1:x2], 0), col="gray"))
Output (added by JDL)
Another solution:
dd <- with(dens,data.frame(x,y))
library(ggplot2)
qplot(x,y,data=dd,geom="line")+
geom_ribbon(data=subset(dd,x>q75 & x<q95),aes(ymax=y),ymin=0,
fill="red",colour=NA,alpha=0.5)
Result:
An expanded solution:
If you wanted to shade both tails (copy & paste of Dirk's code) and use known x values:
set.seed(1)
draws <- rnorm(100)^2
dens <- density(draws)
plot(dens)
q2 <- 2
q65 <- 6.5
qn08 <- -0.8
qn02 <- -0.2
x1 <- min(which(dens$x >= q2))
x2 <- max(which(dens$x < q65))
x3 <- min(which(dens$x >= qn08))
x4 <- max(which(dens$x < qn02))
with(dens, polygon(x=c(x[c(x1,x1:x2,x2)]), y= c(0, y[x1:x2], 0), col="gray"))
with(dens, polygon(x=c(x[c(x3,x3:x4,x4)]), y= c(0, y[x3:x4], 0), col="gray"))
Result:
This question needs a lattice answer. Here's a very basic one, simply adapting the method employed by Dirk and others:
#Set up the data
set.seed(1)
draws <- rnorm(100)^2
dens <- density(draws)
#Put in a simple data frame
d <- data.frame(x = dens$x, y = dens$y)
#Define a custom panel function;
# Options like color don't need to be hard coded
shadePanel <- function(x,y,shadeLims){
panel.lines(x,y)
m1 <- min(which(x >= shadeLims[1]))
m2 <- max(which(x <= shadeLims[2]))
tmp <- data.frame(x1 = x[c(m1,m1:m2,m2)], y1 = c(0,y[m1:m2],0))
panel.polygon(tmp$x1,tmp$y1,col = "blue")
}
#Plot
xyplot(y~x,data = d, panel = shadePanel, shadeLims = c(1,3))
Here's another ggplot2 variant based on a function that approximates the kernel density at the original data values:
approxdens <- function(x) {
dens <- density(x)
f <- with(dens, approxfun(x, y))
f(x)
}
Using the original data (rather than producing a new data frame with the density estimate's x and y values) has the benefit of also working in faceted plots where the quantile values depend on the variable by which the data is being grouped:
Code used
library(tidyverse)
library(RColorBrewer)
# dummy data
set.seed(1)
n <- 1e2
dt <- tibble(value = rnorm(n)^2)
# function that approximates the density at the provided values
approxdens <- function(x) {
dens <- density(x)
f <- with(dens, approxfun(x, y))
f(x)
}
probs <- c(0.75, 0.95)
dt <- dt %>%
mutate(dy = approxdens(value), # calculate density
p = percent_rank(value), # percentile rank
pcat = as.factor(cut(p, breaks = probs, # percentile category based on probs
include.lowest = TRUE)))
ggplot(dt, aes(value, dy)) +
geom_ribbon(aes(ymin = 0, ymax = dy, fill = pcat)) +
geom_line() +
scale_fill_brewer(guide = "none") +
theme_bw()
# dummy data with 2 groups
dt2 <- tibble(category = c(rep("A", n), rep("B", n)),
value = c(rnorm(n)^2, rnorm(n, mean = 2)))
dt2 <- dt2 %>%
group_by(category) %>%
mutate(dy = approxdens(value),
p = percent_rank(value),
pcat = as.factor(cut(p, breaks = probs,
include.lowest = TRUE)))
# faceted plot
ggplot(dt2, aes(value, dy)) +
geom_ribbon(aes(ymin = 0, ymax = dy, fill = pcat)) +
geom_line() +
facet_wrap(~ category, nrow = 2, scales = "fixed") +
scale_fill_brewer(guide = "none") +
theme_bw()
Created on 2018-07-13 by the reprex package (v0.2.0).

Resources