How to plot multiple Poisson distribution in one plot - r

I would like to plot multiple Poisson (with different lambdas (1:10))
I found the following function to draw a plot
plot_pois = function(lambda = 5)
{
plot(0:20, dpois( x=0:20, lambda=lambda ), xlim=c(-2,20))
normden <- function(x){dnorm(x, mean= lambda, sd=sqrt(lambda))}
curve(normden, from=-4, to=20, add=TRUE, col=lambda)
}
plot.new()
plot_pois(2)
But I can't plot another Poisson over it. I tried to change plot to points or lines but it totally changes the plot. I would also like to add a legends containing different colors for different values of lambda.
If I could plot it using ggplot, it would be a better option.

Another possible tidyverse solution:
library(tidyverse)
# Build Poisson distributions
p_dat <- map_df(1:10, ~ tibble(
l = paste(.),
x = 0:20,
y = dpois(0:20, .)
))
# Build Normal distributions
n_dat <- map_df(1:10, ~ tibble(
l = paste(.),
x = seq(0, 20, by = 0.001),
y = dnorm(seq(0, 20, by = 0.001), ., sqrt(.))
))
# Use ggplot2 to plot
ggplot(n_dat, aes(x, y, color = factor(l, levels = 1:10))) +
geom_line() +
geom_point(data = p_dat, aes(x, y, color = factor(l, levels = 1:10))) +
labs(color = "Lambda:") +
theme_minimal()
Created on 2019-05-06 by the reprex package (v0.2.1)

In ggplot2 you can use lapply to loop over different lambdas:
library(ggplot2)
lambdas <- c(5, 2)
ggplot(data = data.frame(x = 0:20)) +
lapply(lambdas, function(l) geom_point(aes(x = x, y = dpois(x, lambda = l), col = factor(l)))) +
lapply(lambdas, function(l) stat_function(fun = dnorm, args = list(mean = l, sd = sqrt(l)),
aes(x = x, col = factor(l))))
Axes titles and limits, the legend title etc. can then be customized as usual in ggplot2.

Related

How to put plotmath labels in ggplot facets

We often want individual regression equations in ggplot facets. The best way to do this is build the labels in a dataframe and then add them manually. But what if the labels contain plotmath, e.g., superscripts?
Here is a way to do it. The plotmath is converted to a string and then parsed by ggplot. The test_eqn function is taken from another Stackoverflow post, I'll link it when I find it again. Sorry about that.
library(ggplot2)
library(dplyr)
test_eqn <- function(y, x){
m <- lm(log(y) ~ log(x)) # fit y = a * x ^ b in log space
p <- exp(predict(m)) # model prediction of y
eq <- substitute(expression(Y==a~X^~b),
list(
a = format(unname(exp(coef(m)[1])), digits = 3),
b = format(unname(coef(m)[2]), digits = 3)
))
list(eq = as.character(eq)[2], pred = p)
}
set.seed(123)
x <- runif(20)
y <- runif(20)
test_eqn(x,y)$eq
#> [1] "Y == \"0.57\" ~ X^~\"0.413\""
data <- data.frame(x = x,
y = y,
f = sample(c("A","B"), 20, replace = TRUE)) %>%
group_by(f) %>%
mutate(
label = test_eqn(y,x)$eq, # add label
labelx = mean(x),
labely = mean(y),
pred = test_eqn(y,x)$pred # add prediction
)
# plot fits (use slice(1) to avoid multiple copies of labels)
ggplot(data) +
geom_point(aes(x = x, y = y)) +
geom_line(aes(x = x, y = pred), colour = "red") +
geom_text(data = slice(data, 1), aes(x = labelx, y = labely, label = label), parse = TRUE) +
facet_wrap("f")
Created on 2021-10-20 by the reprex package (v2.0.1)

Match palette of persp graph to contour graph ggplot2

I want to make contour levels of bivariate normal density plotted in base persp function. Here is the code:
###############
library(pacman)
p_load(tidyverse)
p_load(mvtnorm)
p_load(GA)
my_mean<-c(25,65)
mycors<-seq(-1,1,by=.25)
sd_vec<-c(5,7)
i<-3
temp_cor<-matrix(c(1,mycors[i],
mycors[i],1),
byrow = T,ncol=2)
V<-sd_vec %*% t(sd_vec) *temp_cor
my_x<-seq(my_mean[1]-3*sd_vec[1], my_mean[1]+3*sd_vec[1], length.out=20)
my_y<-seq(my_mean[2]-3*sd_vec[2], my_mean[2]+3*sd_vec[2], length.out=20)
temp_f<-function(a,b){dmvnorm(cbind(a,b), my_mean,V)}
my_z<-outer(my_x, my_y,temp_f)
nlevels<-20
my_zlim <- range(my_z, finite = TRUE)
my_levels <- pretty(my_zlim, nlevels)
zz <- (my_z[-1, -1] + my_z[-1, -ncol(my_z)] + my_z[-nrow(my_z), -1] + my_z[-nrow(my_z),
-ncol(my_z)])/4
cols <- jet.colors(length(my_levels) - 1)
zzz <- cut(zz, breaks = my_levels, labels = cols)
persp(my_x, my_y, my_z, theta = -25, phi = 45, expand = 0.5,xlab="x",ylab="y",zlab="f(x,y)",col = as.character(zzz))
data.grid <- expand.grid(x = seq(my_mean[1]-3*sd_vec[1], my_mean[1]+3*sd_vec[1], length.out=200),
y = seq(my_mean[2]-3*sd_vec[2], my_mean[2]+3*sd_vec[2], length.out=200))
q.samp <- cbind(data.grid, prob = dmvnorm(data.grid, mean = my_mean, sigma = V))
ggplot(q.samp, aes(x=x, y=y, z=prob)) +
geom_contour( aes(z=prob, color=..level..)) +
#scale_color_gradient(level = jet.colors(length(my_levels) - 1))+
theme_bw()
Created on 2020-10-31 by the reprex package (v0.3.0)
Since the color palette of the persp graph seems to be discrete, I want to give colors to the contours which approximately resembles the colors in persp graph.
Are you looking for geom_contour_fill with a scale_fill_discrete according to the interpolated values of cols?
ggplot(q.samp, aes(x, y, z = prob)) +
geom_contour_filled( aes(fill = ..level..), col = "black", bins = 11) +
scale_fill_discrete(type = jet.colors(11)) +
theme_bw()
Or if you are looking for colored lines instead of fills you can use scale_gradientn
ggplot(q.samp, aes(x, y, z = prob)) +
geom_contour(aes(color = ..level..), bins = 11, size = 1) +
scale_color_gradientn(colours = jet.colors(11)) +
theme_bw()

gam plots with ggplot

I need to create some gam plots in ggplot. I can do them with the general plot function, but am unsure how to do with ggplot. Here is my code and plots with the regular plot function. I'm using the College data set from the ISLR package.
train.2 <- sample(dim(College)[1],2*dim(College)[1]/3)
train.college <- College[train.2,]
test.college <- College[-train.2,]
gam.college <- gam(Outstate~Private+s(Room.Board)+s(Personal)+s(PhD)+s(perc.alumni)+s(Expend)+s(Grad.Rate), data=train.college)
par(mfrow=c(2,2))
plot(gam.college, se=TRUE,col="blue")
See update below old answer.
Old answer:
There is an implementation of GAM plotting using ggplot2 in voxel library. Here is how you would go about it:
library(ISLR)
library(mgcv)
library(voxel)
library(tidyverse)
library(gridExtra)
data(College)
set.seed(1)
train.2 <- sample(dim(College)[1],2*dim(College)[1]/3)
train.college <- College[train.2,]
test.college <- College[-train.2,]
gam.college <- gam(Outstate~Private+s(Room.Board)+s(Personal)+s(PhD)+s(perc.alumni)+s(Expend)+s(Grad.Rate), data=train.college)
vars <- c("Room.Board", "Personal", "PhD", "perc.alumni","Expend", "Grad.Rate")
map(vars, function(x){
p <- plotGAM(gam.college, smooth.cov = x) #plot customization goes here
g <- ggplotGrob(p)
}) %>%
{grid.arrange(grobs = (.), ncol = 2, nrow = 3)}
after a bunch of errors: In plotGAM(gam.college, smooth.cov = x) :
There are one or more factors in the model fit, please consider plotting by group since plot might be unprecise
To compare to the plot.gam:
par(mfrow=c(2,3))
plot(gam.college, se=TRUE,col="blue")
You might also want to plot the observed values:
map(vars, function(x){
p <- plotGAM(gam.college, smooth.cov = x) +
geom_point(data = train.college, aes_string(y = "Outstate", x = x ), alpha = 0.2) +
geom_rug(data = train.college, aes_string(y = "Outstate", x = x ), alpha = 0.2)
g <- ggplotGrob(p)
}) %>%
{grid.arrange(grobs = (.), ncol = 3, nrow = 2)}
or per group (especially important if you used the by argument (interaction in gam).
map(vars, function(x){
p <- plotGAM(gam.college, smooth.cov = x, groupCovs = "Private") +
geom_point(data = train.college, aes_string(y = "Outstate", x = x, color= "Private"), alpha = 0.2) +
geom_rug(data = train.college, aes_string(y = "Outstate", x = x, color= "Private" ), alpha = 0.2) +
scale_color_manual("Private", values = c("#868686FF", "#0073C2FF")) +
theme(legend.position="none")
g <- ggplotGrob(p)
}) %>%
{grid.arrange(grobs = (.), ncol = 3, nrow = 2)}
Update, 08. Jan. 2020.
I currently think the package mgcViz offers superior functionality compared to the voxel::plotGAMfunction. An example using the above data set and models:
library(mgcViz)
viz <- getViz(gam.college)
print(plot(viz, allTerms = T), pages = 1)
plot customization is similar go ggplot2 syntax:
trt <- plot(viz, allTerms = T) +
l_points() +
l_fitLine(linetype = 1) +
l_ciLine(linetype = 3) +
l_ciBar() +
l_rug() +
theme_grey()
print(trt, pages = 1)
This vignette shows many more examples.

Nonparametric regression ggplot

I'm trying to plot some nonparametric regression curves with ggplot2. I achieved It with the base plot()function:
library(KernSmooth)
set.seed(1995)
X <- runif(100, -1, 1)
G <- X[which (X > 0)]
L <- X[which (X < 0)]
u <- rnorm(100, 0 , 0.02)
Y <- -exp(-20*L^2)-exp(-20*G^2)/(X+1)+u
m <- lm(Y~X)
plot(Y~X)
abline(m, col="red")
m2 <- locpoly(X, Y, bandwidth = 0.05, degree = 0)
lines(m2$x, m2$y, col = "red")
m3 <- locpoly(X, Y, bandwidth = 0.15, degree = 0)
lines(m3$x, m3$y, col = "black")
m4 <- locpoly(X, Y, bandwidth = 0.3, degree = 0)
lines(m4$x, m4$y, col = "green")
legend("bottomright", legend = c("NW(bw=0.05)", "NW(bw=0.15)", "NW(bw=0.3)"),
lty = 1, col = c("red", "black", "green"), cex = 0.5)
With ggplot2 have achieved plotting the linear regression:
With this code:
ggplot(m, aes(x = X, y = Y)) +
geom_point(shape = 1) +
geom_smooth(method = lm, se = FALSE) +
theme(axis.line = element_line(colour = "black", size = 0.25))
But I dont't know how to add the other lines to this plot, as in the base R plot. Any suggestions? Thanks in advance.
Solution
The shortest solution (though not the most beautiful one) is to add the lines using the data= argument of the geom_line function:
ggplot(m, aes(x = X, y = Y)) +
geom_point(shape = 1) +
geom_smooth(method = lm, se = FALSE) +
theme(axis.line = element_line(colour = "black", size = 0.25)) +
geom_line(data = as.data.frame(m2), mapping = aes(x=x,y=y))
Beautiful solution
To get beautiful colors and legend, use
# Need to convert lists to data.frames, ggplot2 needs data.frames
m2 <- as.data.frame(m2)
m3 <- as.data.frame(m3)
m4 <- as.data.frame(m4)
# Colnames are used as names in ggplot legend. Theres nothing wrong in using
# column names which contain symbols or whitespace, you just have to use
# backticks, e.g. m2$`NW(bw=0.05)` if you want to work with them
colnames(m2) <- c("x","NW(bw=0.05)")
colnames(m3) <- c("x","NW(bw=0.15)")
colnames(m4) <- c("x","NW(bw=0.3)")
# To give the different kernel density estimates different colors, they must all be in one data frame.
# For merging to work, all x columns of m2-m4 must be the same!
# the merge function will automatically detec columns of same name
# (that is, x) in m2-m4 and use it to identify y values which belong
# together (to the same x value)
mm <- Reduce(x=list(m2,m3,m4), f=function(a,b) merge(a,b))
# The above line is the same as:
# mm <- merge(m2,m3)
# mm <- merge(mm,m4)
# ggplot needs data in long (tidy) format
mm <- tidyr::gather(mm, kernel, y, -x)
ggplot(m, aes(x = X, y = Y)) +
geom_point(shape = 1) +
geom_smooth(method = lm, se = FALSE) +
theme(axis.line = element_line(colour = "black", size = 0.25)) +
geom_line(data = mm, mapping = aes(x=x,y=y,color=kernel))
Solution which will settle this for everyone and for eternity
The most beautiful and reproducable way though will be to create a custom stat in ggplot2 (see the included stats in ggplot).
There is this vignette of the ggplot2 team to this topic: Extending ggplot2. I have never undertaken such a heroic endeavour though.

Transform color scale to probability-transformed color distribution with scale_fill_gradientn()

I am trying to visualize heavily tailed raster data, and I would like a non-linear mapping of colors to the range of the values. There are a couple of similar questions, but they don't really solve my specific problem (see links below).
library(ggplot2)
library(scales)
set.seed(42)
dat <- data.frame(
x = floor(runif(10000, min=1, max=100)),
y = floor(runif(10000, min=2, max=1000)),
z = rlnorm(10000, 1, 1) )
# colors for the colour scale:
col.pal <- colorRampPalette(c("#00007F", "blue", "#007FFF", "cyan", "#7FFF7F", "yellow", "#FF7F00", "red", "#7F0000"))
fill.colors <- col.pal(64)
This is how the data look like if not transformed in some way:
ggplot(dat, aes(x = x, y = y, fill = z)) +
geom_tile(width=2, height=30) +
scale_fill_gradientn(colours=fill.colors)
My question is sort of a follow-up question related to
this one or this one , and the solution given here actually yields exactly the plot I want, except for the legend:
qn <- rescale(quantile(dat$z, probs=seq(0, 1, length.out=length(fill.colors))))
ggplot(dat, aes(x = x, y = y, fill = z)) +
geom_tile(width=2, height=30) +
scale_fill_gradientn(colours=fill.colors, values = qn)
Now I want the colour scale in the legend to represent the non-linear distribution of the values (now only the red part of the scale is visible), i.e. the legend should as well be based on quantiles. Is there a way to accomplish this?
I thought the trans argument within the colour scale might do the trick, as suggested here , but that throws an error, I think because qnorm(pnorm(dat$z)) results in some infinite values (I don't completely understand the function though..).
norm_trans <- function(){
trans_new('norm', function(x) pnorm(x), function(x) qnorm(x))
}
ggplot(dat, aes(x = x, y = y, fill = z)) +
geom_tile(width=2, height=30) +
scale_fill_gradientn(colours=fill.colors, trans = 'norm')
> Error in seq.default(from = best$lmin, to = best$lmax, by = best$lstep) : 'from' must be of length 1
So, does anybody know how to have a quantile-based colour distribution in the plot and in the legend?
This code will make manual breaks with a pnorm transformation. Is this what you are after?
ggplot(dat, aes(x = x, y = y, fill = z)) +
geom_tile(width=2, height=30) +
scale_fill_gradientn(colours=fill.colors,
trans = 'norm',
breaks = quantile(dat$z, probs = c(0, 0.25, 1))
)
I believe you have been looking for a quantile transform. Unfortunately there is none in scales, but it is not to hard to build one yourself (on the fly):
make_quantile_trans <- function(x, format = scales::label_number()) {
name <- paste0("quantiles_of_", deparse1(substitute(x)))
xs <- sort(x)
N <- length(xs)
transform <- function(x) findInterval(x, xs)/N # find the last element that is smaller
inverse <- function(q) xs[1+floor(q*(N-1))]
scales::trans_new(
name = name,
transform = transform,
inverse = inverse,
breaks = function(x, n = 5) inverse(scales::extended_breaks()(transform(x), n)),
minor_breaks = function(x, n = 5) inverse(scales::regular_minor_breaks()(transform(x), n)),
format = format,
domain = xs[c(1, N)]
)
}
ggplot(dat, aes(x = x, y = y, fill = z)) +
geom_tile(width=2, height=30) +
scale_fill_gradientn(colours=fill.colors, trans = make_quantile_trans(dat$z))
Created on 2021-11-12 by the reprex package (v2.0.1)

Resources