Countor plot of bivariate normal functions - r

Could someone explain me why such function doesn't produce a countor plot as I expected.
I've a bivariate normal function whit:
means = c(5,1)
var_cov = matrix(c(2,1,1,1),2)
I'd like to plot its contour plot; I'm able to reach the result but I'd like to ask why in one case I don't get expected result.
Working Example:
library(MASS)
library(ggplot2)
N <- 100
set.seed(123)
var_cov_matrix <- matrix(c(2,1,1,1),2)
mean <- c(5,1)
bivariate_points <- expand.grid(s.1 = seq(-25, 25, length.out=N), s.2 = seq(-25, 25, length.out=N))
z <- mvtnorm::dmvnorm(bivariate_points, mean = mean, sigma = var_cov_matrix)
data <- cbind(bivariate_points,z)
colnames(data) <- c("X1","X2","Z")
data.df <- as.data.frame(data)
ggplot() +
geom_contour(data=data.df,aes(x=X1,y=X2,z=Z))
Non Working Example:
library(MASS)
library(ggplot2)
N <- 100
set.seed(123)
var_cov_matrix <- matrix(c(2,1,1,1),2)
mean <- c(5,1)
bivariate_points <- mvrnorm(N, mu = mean, Sigma = var_cov_matrix ) # <---- EDITED
z <- mvtnorm::dmvnorm(bivariate_points, mean = mean, sigma = var_cov_matrix)
data <- cbind(bivariate_points,z)
colnames(data) <- c("X1","X2","Z")
data.df <- as.data.frame(data)
ggplot() +
geom_contour(data=data.df,aes(x=X1,y=X2,z=Z))

In your non-working example, since you don't have regular grid for contour plot, you can use stat_density2d instead, i.e.,
ggplot(data.df, aes(x = X1, y = X2, z = Z)) +
geom_point(aes(colour = z)) +
stat_density2d()

Related

Q-Q plot with ggplot2::stat_qq, colours, multiple groups with Q-Q lines

I need to do something similar to what's shown in this excellent question:
Q-Q plot with ggplot2::stat_qq, colours, single group
but unfortunately there's a slight difference which is blocking me. Unlike the original question, I do want to separate the quantile computations by group, but I also want to add a QQ-line for each group. Following the OP's code, I can create the quantile-quantile plots by group:
library(dplyr)
library(ggplot2)
library(broom) ## for augment()
set.seed(1001)
N <- 1000
G <- 10
dd <- data_frame(x = runif(N),
group = factor(sample(LETTERS[1:G], size=N, replace=TRUE)),
y = rnorm(N) + 2*x + as.numeric(group))
m1 <- lm(y~x, data=dd)
dda <- cbind(augment(m1), group=dd$group)
sample_var <- "y"
group_var <- "group"
p <- ggplot(dda)+stat_qq(aes_string(sample=sample_var, colour=group_var))
p
How can I add the quantile-quantile lines for each group? NOTE: ideally I would like to specify the sample column and the group column at runtime. That's why I used aes_string.
EDIT to better clarify my problem, I add code to compute quantile-quantile lines when there's only one group. I need to generalize the code to multiple groups.
library(dplyr)
library(ggplot2)
library(broom) ## for augment()
# this section of the code is the same as before, EXCEPT G = 1, because for
# now the code only works for 1 group
set.seed(1001)
N <- 1000
G <- 1
dd <- data_frame(x = runif(N),
group = factor(sample(LETTERS[1:G], size=N, replace=TRUE)),
y = rnorm(N) + 2*x + as.numeric(group))
m1 <- lm(y~x, data=dd)
dda <- cbind(augment(m1), group=dd$group)
sample_var <- "y"
group_var <- "group"
# code to compute the slope and the intercept of the qq-line: basically,
# I would need to compute the slopes and the intercepts of the qq-lines
# for each group
vec <- dda[, sample_var]
y <- quantile(vec[!is.na(vec)], c(0.25, 0.75))
x <- qnorm(c(0.25, 0.75))
slope <- diff(y)/diff(x)
int <- y[1] - slope * x[1]
# now plot with ggplot2
p <- ggplot(dda)+stat_qq(aes_string(sample=sample_var, colour=group_var))+geom_abline(slope = slope, intercept = int)
p
Turning the code to calculate the qqlines into a function and then using lapply to create a separate data.frame for your qqlines is one approach.
library(dplyr)
library(ggplot2)
library(broom) ## for augment()
set.seed(1001)
N <- 1000
G <- 3
dd <- data_frame(x = runif(N),
group = factor(sample(LETTERS[1:G], size=N, replace=TRUE)),
y = rnorm(N) + 2*x + as.numeric(group))
m1 <- lm(y~x, data=dd)
dda <- cbind(augment(m1), group=dd$group)
sample_var <- "y"
group_var <- "group"
# code to compute the slope and the intercept of the qq-line
qqlines <- function(vec, group) {
x <- qnorm(c(0.25, 0.75))
y <- quantile(vec[!is.na(vec)], c(0.25, 0.75))
slope <- diff(y)/diff(x)
int <- y[1] - slope * x[1]
data.frame(slope, int, group)
}
slopedf <- do.call(rbind,lapply(unique(dda$group), function(grp) qqlines(dda[dda$group == grp,sample_var], grp)))
# now plot with ggplot2
p <- ggplot(dda)+stat_qq(aes_string(sample=sample_var, colour=group_var)) +
geom_abline(data = slopedf, aes(slope = slope, intercept = int, colour = group))
p
A more concise alternative. In ggplot2 v.3.0.0 and above you can use stat_qq_line:
ggplot(dda, aes(sample = y, colour = group)) +
stat_qq() +
stat_qq_line()
Output:
Data, from Jeremy Voisey's answer:
library(ggplot2)
library(broom)
set.seed(1001)
N <- 1000
G <- 3
dd <- data_frame(
x = runif(N),
group = factor(sample(LETTERS[1:G], size = N, replace = TRUE)),
y = rnorm(N) + 2 * x + as.numeric(group)
)
m1 <- lm(y ~ x, data = dd)
dda <- cbind(augment(m1), group = dd$group)

Cumulative Density Plots with ggplot and plotly

When we take the following example from ggplot2 docs
df <- data.frame(x = c(rnorm(100, 0, 3), rnorm(100, 0, 10)),
g = gl(2, 100))
library(ggplot2)
p <- ggplot(df, aes(x, colour = g)) +
stat_ecdf(geom = "step", na.rm = T) + # interchange point and step
theme_bw()
p
We can create a standard cdf plot. Now if we want to play with the plot in plotly, I obtain a very confusing image when I use the step command. See below. However, when I use the point command plotly behaves like it should. What is happening with the step command? Why can't I recreate the image from using ggplot only?
library(plotly)
ggplotly(p)
I found the solution here https://community.plotly.com/t/bug-with-ggplot2-stat-ecdf-function/1187/3.
You should reorder the dataframe along x.
df <- dplyr::arrange(df, x)
library(ggplot2)
p <- ggplot(df, aes(x, colour = g)) +
stat_ecdf(geom = "step", na.rm = T) +
theme_bw()
p
library(plotly)
ggplotly(p)
This can be solved using ecdf() function.
## ecdf function to get y and 1-y
rcdf <- function (x) {
cdf <- ecdf(x)
y1 <- cdf(x)
y <- unique(y1)
# xrcdf <- 1-y ## to get reverse cdf
xrcdf <- y ## to get cdf
}
ug <- unique(df$g)
ng <- length(ug)
xll <- min(df$x)
xul <- max(df$x)
adr <- data.frame(myxx=c(), myyy=c(), mygg=c())
lapply(1:ng, function(i){
ad2r <- subset(df, g==ug[i])
myx1 <- unique(ad2r$x)
myxx <- c(xll,myx1,xul) ## add lowest value - dummy to assign 100%
myy1 <- rcdf(ad2r$x)
# myyy <- c(1.0,myy1,0.0) ## add 100% to get reverse cdf
myyy <- c(0.0,myy1,1.0) ## add 0% to get cdf
mygg <- ug[i]
ad2rf <- data.frame(myxx,myyy,mygg)
adr <<- rbind(adr,ad2rf)
})
adf <- adr[order(adr$myxx),]
pp <- ggplot(data=adf,
aes_(x=adf$myxx, y=100*adf$myyy, col=adf$mygg, group=adf$mygg)) +
geom_step() +
labs(title="CDF", y = "Y", x = "X", col=NULL)
ppp <- ggplotly(pp, tooltip=c("x","y"))
ppp
This gives the following output:
CDF

Fill negative value area below geom_line [duplicate]

I frequently use kernel density plots to illustrate distributions. These are easy and fast to create in R like so:
set.seed(1)
draws <- rnorm(100)^2
dens <- density(draws)
plot(dens)
#or in one line like this: plot(density(rnorm(100)^2))
Which gives me this nice little PDF:
I'd like to shade the area under the PDF from the 75th to 95th percentiles. It's easy to calculate the points using the quantile function:
q75 <- quantile(draws, .75)
q95 <- quantile(draws, .95)
But how do I shade the the area between q75 and q95?
With the polygon() function, see its help page and I believe we had similar questions here too.
You need to find the index of the quantile values to get the actual (x,y) pairs.
Edit: Here you go:
x1 <- min(which(dens$x >= q75))
x2 <- max(which(dens$x < q95))
with(dens, polygon(x=c(x[c(x1,x1:x2,x2)]), y= c(0, y[x1:x2], 0), col="gray"))
Output (added by JDL)
Another solution:
dd <- with(dens,data.frame(x,y))
library(ggplot2)
qplot(x,y,data=dd,geom="line")+
geom_ribbon(data=subset(dd,x>q75 & x<q95),aes(ymax=y),ymin=0,
fill="red",colour=NA,alpha=0.5)
Result:
An expanded solution:
If you wanted to shade both tails (copy & paste of Dirk's code) and use known x values:
set.seed(1)
draws <- rnorm(100)^2
dens <- density(draws)
plot(dens)
q2 <- 2
q65 <- 6.5
qn08 <- -0.8
qn02 <- -0.2
x1 <- min(which(dens$x >= q2))
x2 <- max(which(dens$x < q65))
x3 <- min(which(dens$x >= qn08))
x4 <- max(which(dens$x < qn02))
with(dens, polygon(x=c(x[c(x1,x1:x2,x2)]), y= c(0, y[x1:x2], 0), col="gray"))
with(dens, polygon(x=c(x[c(x3,x3:x4,x4)]), y= c(0, y[x3:x4], 0), col="gray"))
Result:
This question needs a lattice answer. Here's a very basic one, simply adapting the method employed by Dirk and others:
#Set up the data
set.seed(1)
draws <- rnorm(100)^2
dens <- density(draws)
#Put in a simple data frame
d <- data.frame(x = dens$x, y = dens$y)
#Define a custom panel function;
# Options like color don't need to be hard coded
shadePanel <- function(x,y,shadeLims){
panel.lines(x,y)
m1 <- min(which(x >= shadeLims[1]))
m2 <- max(which(x <= shadeLims[2]))
tmp <- data.frame(x1 = x[c(m1,m1:m2,m2)], y1 = c(0,y[m1:m2],0))
panel.polygon(tmp$x1,tmp$y1,col = "blue")
}
#Plot
xyplot(y~x,data = d, panel = shadePanel, shadeLims = c(1,3))
Here's another ggplot2 variant based on a function that approximates the kernel density at the original data values:
approxdens <- function(x) {
dens <- density(x)
f <- with(dens, approxfun(x, y))
f(x)
}
Using the original data (rather than producing a new data frame with the density estimate's x and y values) has the benefit of also working in faceted plots where the quantile values depend on the variable by which the data is being grouped:
Code used
library(tidyverse)
library(RColorBrewer)
# dummy data
set.seed(1)
n <- 1e2
dt <- tibble(value = rnorm(n)^2)
# function that approximates the density at the provided values
approxdens <- function(x) {
dens <- density(x)
f <- with(dens, approxfun(x, y))
f(x)
}
probs <- c(0.75, 0.95)
dt <- dt %>%
mutate(dy = approxdens(value), # calculate density
p = percent_rank(value), # percentile rank
pcat = as.factor(cut(p, breaks = probs, # percentile category based on probs
include.lowest = TRUE)))
ggplot(dt, aes(value, dy)) +
geom_ribbon(aes(ymin = 0, ymax = dy, fill = pcat)) +
geom_line() +
scale_fill_brewer(guide = "none") +
theme_bw()
# dummy data with 2 groups
dt2 <- tibble(category = c(rep("A", n), rep("B", n)),
value = c(rnorm(n)^2, rnorm(n, mean = 2)))
dt2 <- dt2 %>%
group_by(category) %>%
mutate(dy = approxdens(value),
p = percent_rank(value),
pcat = as.factor(cut(p, breaks = probs,
include.lowest = TRUE)))
# faceted plot
ggplot(dt2, aes(value, dy)) +
geom_ribbon(aes(ymin = 0, ymax = dy, fill = pcat)) +
geom_line() +
facet_wrap(~ category, nrow = 2, scales = "fixed") +
scale_fill_brewer(guide = "none") +
theme_bw()
Created on 2018-07-13 by the reprex package (v0.2.0).

Partially fill density plot for area of interest [duplicate]

I frequently use kernel density plots to illustrate distributions. These are easy and fast to create in R like so:
set.seed(1)
draws <- rnorm(100)^2
dens <- density(draws)
plot(dens)
#or in one line like this: plot(density(rnorm(100)^2))
Which gives me this nice little PDF:
I'd like to shade the area under the PDF from the 75th to 95th percentiles. It's easy to calculate the points using the quantile function:
q75 <- quantile(draws, .75)
q95 <- quantile(draws, .95)
But how do I shade the the area between q75 and q95?
With the polygon() function, see its help page and I believe we had similar questions here too.
You need to find the index of the quantile values to get the actual (x,y) pairs.
Edit: Here you go:
x1 <- min(which(dens$x >= q75))
x2 <- max(which(dens$x < q95))
with(dens, polygon(x=c(x[c(x1,x1:x2,x2)]), y= c(0, y[x1:x2], 0), col="gray"))
Output (added by JDL)
Another solution:
dd <- with(dens,data.frame(x,y))
library(ggplot2)
qplot(x,y,data=dd,geom="line")+
geom_ribbon(data=subset(dd,x>q75 & x<q95),aes(ymax=y),ymin=0,
fill="red",colour=NA,alpha=0.5)
Result:
An expanded solution:
If you wanted to shade both tails (copy & paste of Dirk's code) and use known x values:
set.seed(1)
draws <- rnorm(100)^2
dens <- density(draws)
plot(dens)
q2 <- 2
q65 <- 6.5
qn08 <- -0.8
qn02 <- -0.2
x1 <- min(which(dens$x >= q2))
x2 <- max(which(dens$x < q65))
x3 <- min(which(dens$x >= qn08))
x4 <- max(which(dens$x < qn02))
with(dens, polygon(x=c(x[c(x1,x1:x2,x2)]), y= c(0, y[x1:x2], 0), col="gray"))
with(dens, polygon(x=c(x[c(x3,x3:x4,x4)]), y= c(0, y[x3:x4], 0), col="gray"))
Result:
This question needs a lattice answer. Here's a very basic one, simply adapting the method employed by Dirk and others:
#Set up the data
set.seed(1)
draws <- rnorm(100)^2
dens <- density(draws)
#Put in a simple data frame
d <- data.frame(x = dens$x, y = dens$y)
#Define a custom panel function;
# Options like color don't need to be hard coded
shadePanel <- function(x,y,shadeLims){
panel.lines(x,y)
m1 <- min(which(x >= shadeLims[1]))
m2 <- max(which(x <= shadeLims[2]))
tmp <- data.frame(x1 = x[c(m1,m1:m2,m2)], y1 = c(0,y[m1:m2],0))
panel.polygon(tmp$x1,tmp$y1,col = "blue")
}
#Plot
xyplot(y~x,data = d, panel = shadePanel, shadeLims = c(1,3))
Here's another ggplot2 variant based on a function that approximates the kernel density at the original data values:
approxdens <- function(x) {
dens <- density(x)
f <- with(dens, approxfun(x, y))
f(x)
}
Using the original data (rather than producing a new data frame with the density estimate's x and y values) has the benefit of also working in faceted plots where the quantile values depend on the variable by which the data is being grouped:
Code used
library(tidyverse)
library(RColorBrewer)
# dummy data
set.seed(1)
n <- 1e2
dt <- tibble(value = rnorm(n)^2)
# function that approximates the density at the provided values
approxdens <- function(x) {
dens <- density(x)
f <- with(dens, approxfun(x, y))
f(x)
}
probs <- c(0.75, 0.95)
dt <- dt %>%
mutate(dy = approxdens(value), # calculate density
p = percent_rank(value), # percentile rank
pcat = as.factor(cut(p, breaks = probs, # percentile category based on probs
include.lowest = TRUE)))
ggplot(dt, aes(value, dy)) +
geom_ribbon(aes(ymin = 0, ymax = dy, fill = pcat)) +
geom_line() +
scale_fill_brewer(guide = "none") +
theme_bw()
# dummy data with 2 groups
dt2 <- tibble(category = c(rep("A", n), rep("B", n)),
value = c(rnorm(n)^2, rnorm(n, mean = 2)))
dt2 <- dt2 %>%
group_by(category) %>%
mutate(dy = approxdens(value),
p = percent_rank(value),
pcat = as.factor(cut(p, breaks = probs,
include.lowest = TRUE)))
# faceted plot
ggplot(dt2, aes(value, dy)) +
geom_ribbon(aes(ymin = 0, ymax = dy, fill = pcat)) +
geom_line() +
facet_wrap(~ category, nrow = 2, scales = "fixed") +
scale_fill_brewer(guide = "none") +
theme_bw()
Created on 2018-07-13 by the reprex package (v0.2.0).

Shading a kernel density plot between two points.

I frequently use kernel density plots to illustrate distributions. These are easy and fast to create in R like so:
set.seed(1)
draws <- rnorm(100)^2
dens <- density(draws)
plot(dens)
#or in one line like this: plot(density(rnorm(100)^2))
Which gives me this nice little PDF:
I'd like to shade the area under the PDF from the 75th to 95th percentiles. It's easy to calculate the points using the quantile function:
q75 <- quantile(draws, .75)
q95 <- quantile(draws, .95)
But how do I shade the the area between q75 and q95?
With the polygon() function, see its help page and I believe we had similar questions here too.
You need to find the index of the quantile values to get the actual (x,y) pairs.
Edit: Here you go:
x1 <- min(which(dens$x >= q75))
x2 <- max(which(dens$x < q95))
with(dens, polygon(x=c(x[c(x1,x1:x2,x2)]), y= c(0, y[x1:x2], 0), col="gray"))
Output (added by JDL)
Another solution:
dd <- with(dens,data.frame(x,y))
library(ggplot2)
qplot(x,y,data=dd,geom="line")+
geom_ribbon(data=subset(dd,x>q75 & x<q95),aes(ymax=y),ymin=0,
fill="red",colour=NA,alpha=0.5)
Result:
An expanded solution:
If you wanted to shade both tails (copy & paste of Dirk's code) and use known x values:
set.seed(1)
draws <- rnorm(100)^2
dens <- density(draws)
plot(dens)
q2 <- 2
q65 <- 6.5
qn08 <- -0.8
qn02 <- -0.2
x1 <- min(which(dens$x >= q2))
x2 <- max(which(dens$x < q65))
x3 <- min(which(dens$x >= qn08))
x4 <- max(which(dens$x < qn02))
with(dens, polygon(x=c(x[c(x1,x1:x2,x2)]), y= c(0, y[x1:x2], 0), col="gray"))
with(dens, polygon(x=c(x[c(x3,x3:x4,x4)]), y= c(0, y[x3:x4], 0), col="gray"))
Result:
This question needs a lattice answer. Here's a very basic one, simply adapting the method employed by Dirk and others:
#Set up the data
set.seed(1)
draws <- rnorm(100)^2
dens <- density(draws)
#Put in a simple data frame
d <- data.frame(x = dens$x, y = dens$y)
#Define a custom panel function;
# Options like color don't need to be hard coded
shadePanel <- function(x,y,shadeLims){
panel.lines(x,y)
m1 <- min(which(x >= shadeLims[1]))
m2 <- max(which(x <= shadeLims[2]))
tmp <- data.frame(x1 = x[c(m1,m1:m2,m2)], y1 = c(0,y[m1:m2],0))
panel.polygon(tmp$x1,tmp$y1,col = "blue")
}
#Plot
xyplot(y~x,data = d, panel = shadePanel, shadeLims = c(1,3))
Here's another ggplot2 variant based on a function that approximates the kernel density at the original data values:
approxdens <- function(x) {
dens <- density(x)
f <- with(dens, approxfun(x, y))
f(x)
}
Using the original data (rather than producing a new data frame with the density estimate's x and y values) has the benefit of also working in faceted plots where the quantile values depend on the variable by which the data is being grouped:
Code used
library(tidyverse)
library(RColorBrewer)
# dummy data
set.seed(1)
n <- 1e2
dt <- tibble(value = rnorm(n)^2)
# function that approximates the density at the provided values
approxdens <- function(x) {
dens <- density(x)
f <- with(dens, approxfun(x, y))
f(x)
}
probs <- c(0.75, 0.95)
dt <- dt %>%
mutate(dy = approxdens(value), # calculate density
p = percent_rank(value), # percentile rank
pcat = as.factor(cut(p, breaks = probs, # percentile category based on probs
include.lowest = TRUE)))
ggplot(dt, aes(value, dy)) +
geom_ribbon(aes(ymin = 0, ymax = dy, fill = pcat)) +
geom_line() +
scale_fill_brewer(guide = "none") +
theme_bw()
# dummy data with 2 groups
dt2 <- tibble(category = c(rep("A", n), rep("B", n)),
value = c(rnorm(n)^2, rnorm(n, mean = 2)))
dt2 <- dt2 %>%
group_by(category) %>%
mutate(dy = approxdens(value),
p = percent_rank(value),
pcat = as.factor(cut(p, breaks = probs,
include.lowest = TRUE)))
# faceted plot
ggplot(dt2, aes(value, dy)) +
geom_ribbon(aes(ymin = 0, ymax = dy, fill = pcat)) +
geom_line() +
facet_wrap(~ category, nrow = 2, scales = "fixed") +
scale_fill_brewer(guide = "none") +
theme_bw()
Created on 2018-07-13 by the reprex package (v0.2.0).

Resources