I want to make contour levels of bivariate normal density plotted in base persp function. Here is the code:
###############
library(pacman)
p_load(tidyverse)
p_load(mvtnorm)
p_load(GA)
my_mean<-c(25,65)
mycors<-seq(-1,1,by=.25)
sd_vec<-c(5,7)
i<-3
temp_cor<-matrix(c(1,mycors[i],
mycors[i],1),
byrow = T,ncol=2)
V<-sd_vec %*% t(sd_vec) *temp_cor
my_x<-seq(my_mean[1]-3*sd_vec[1], my_mean[1]+3*sd_vec[1], length.out=20)
my_y<-seq(my_mean[2]-3*sd_vec[2], my_mean[2]+3*sd_vec[2], length.out=20)
temp_f<-function(a,b){dmvnorm(cbind(a,b), my_mean,V)}
my_z<-outer(my_x, my_y,temp_f)
nlevels<-20
my_zlim <- range(my_z, finite = TRUE)
my_levels <- pretty(my_zlim, nlevels)
zz <- (my_z[-1, -1] + my_z[-1, -ncol(my_z)] + my_z[-nrow(my_z), -1] + my_z[-nrow(my_z),
-ncol(my_z)])/4
cols <- jet.colors(length(my_levels) - 1)
zzz <- cut(zz, breaks = my_levels, labels = cols)
persp(my_x, my_y, my_z, theta = -25, phi = 45, expand = 0.5,xlab="x",ylab="y",zlab="f(x,y)",col = as.character(zzz))
data.grid <- expand.grid(x = seq(my_mean[1]-3*sd_vec[1], my_mean[1]+3*sd_vec[1], length.out=200),
y = seq(my_mean[2]-3*sd_vec[2], my_mean[2]+3*sd_vec[2], length.out=200))
q.samp <- cbind(data.grid, prob = dmvnorm(data.grid, mean = my_mean, sigma = V))
ggplot(q.samp, aes(x=x, y=y, z=prob)) +
geom_contour( aes(z=prob, color=..level..)) +
#scale_color_gradient(level = jet.colors(length(my_levels) - 1))+
theme_bw()
Created on 2020-10-31 by the reprex package (v0.3.0)
Since the color palette of the persp graph seems to be discrete, I want to give colors to the contours which approximately resembles the colors in persp graph.
Are you looking for geom_contour_fill with a scale_fill_discrete according to the interpolated values of cols?
ggplot(q.samp, aes(x, y, z = prob)) +
geom_contour_filled( aes(fill = ..level..), col = "black", bins = 11) +
scale_fill_discrete(type = jet.colors(11)) +
theme_bw()
Or if you are looking for colored lines instead of fills you can use scale_gradientn
ggplot(q.samp, aes(x, y, z = prob)) +
geom_contour(aes(color = ..level..), bins = 11, size = 1) +
scale_color_gradientn(colours = jet.colors(11)) +
theme_bw()
Related
I would like to plot multiple Poisson (with different lambdas (1:10))
I found the following function to draw a plot
plot_pois = function(lambda = 5)
{
plot(0:20, dpois( x=0:20, lambda=lambda ), xlim=c(-2,20))
normden <- function(x){dnorm(x, mean= lambda, sd=sqrt(lambda))}
curve(normden, from=-4, to=20, add=TRUE, col=lambda)
}
plot.new()
plot_pois(2)
But I can't plot another Poisson over it. I tried to change plot to points or lines but it totally changes the plot. I would also like to add a legends containing different colors for different values of lambda.
If I could plot it using ggplot, it would be a better option.
Another possible tidyverse solution:
library(tidyverse)
# Build Poisson distributions
p_dat <- map_df(1:10, ~ tibble(
l = paste(.),
x = 0:20,
y = dpois(0:20, .)
))
# Build Normal distributions
n_dat <- map_df(1:10, ~ tibble(
l = paste(.),
x = seq(0, 20, by = 0.001),
y = dnorm(seq(0, 20, by = 0.001), ., sqrt(.))
))
# Use ggplot2 to plot
ggplot(n_dat, aes(x, y, color = factor(l, levels = 1:10))) +
geom_line() +
geom_point(data = p_dat, aes(x, y, color = factor(l, levels = 1:10))) +
labs(color = "Lambda:") +
theme_minimal()
Created on 2019-05-06 by the reprex package (v0.2.1)
In ggplot2 you can use lapply to loop over different lambdas:
library(ggplot2)
lambdas <- c(5, 2)
ggplot(data = data.frame(x = 0:20)) +
lapply(lambdas, function(l) geom_point(aes(x = x, y = dpois(x, lambda = l), col = factor(l)))) +
lapply(lambdas, function(l) stat_function(fun = dnorm, args = list(mean = l, sd = sqrt(l)),
aes(x = x, col = factor(l))))
Axes titles and limits, the legend title etc. can then be customized as usual in ggplot2.
I'm trying to plot some nonparametric regression curves with ggplot2. I achieved It with the base plot()function:
library(KernSmooth)
set.seed(1995)
X <- runif(100, -1, 1)
G <- X[which (X > 0)]
L <- X[which (X < 0)]
u <- rnorm(100, 0 , 0.02)
Y <- -exp(-20*L^2)-exp(-20*G^2)/(X+1)+u
m <- lm(Y~X)
plot(Y~X)
abline(m, col="red")
m2 <- locpoly(X, Y, bandwidth = 0.05, degree = 0)
lines(m2$x, m2$y, col = "red")
m3 <- locpoly(X, Y, bandwidth = 0.15, degree = 0)
lines(m3$x, m3$y, col = "black")
m4 <- locpoly(X, Y, bandwidth = 0.3, degree = 0)
lines(m4$x, m4$y, col = "green")
legend("bottomright", legend = c("NW(bw=0.05)", "NW(bw=0.15)", "NW(bw=0.3)"),
lty = 1, col = c("red", "black", "green"), cex = 0.5)
With ggplot2 have achieved plotting the linear regression:
With this code:
ggplot(m, aes(x = X, y = Y)) +
geom_point(shape = 1) +
geom_smooth(method = lm, se = FALSE) +
theme(axis.line = element_line(colour = "black", size = 0.25))
But I dont't know how to add the other lines to this plot, as in the base R plot. Any suggestions? Thanks in advance.
Solution
The shortest solution (though not the most beautiful one) is to add the lines using the data= argument of the geom_line function:
ggplot(m, aes(x = X, y = Y)) +
geom_point(shape = 1) +
geom_smooth(method = lm, se = FALSE) +
theme(axis.line = element_line(colour = "black", size = 0.25)) +
geom_line(data = as.data.frame(m2), mapping = aes(x=x,y=y))
Beautiful solution
To get beautiful colors and legend, use
# Need to convert lists to data.frames, ggplot2 needs data.frames
m2 <- as.data.frame(m2)
m3 <- as.data.frame(m3)
m4 <- as.data.frame(m4)
# Colnames are used as names in ggplot legend. Theres nothing wrong in using
# column names which contain symbols or whitespace, you just have to use
# backticks, e.g. m2$`NW(bw=0.05)` if you want to work with them
colnames(m2) <- c("x","NW(bw=0.05)")
colnames(m3) <- c("x","NW(bw=0.15)")
colnames(m4) <- c("x","NW(bw=0.3)")
# To give the different kernel density estimates different colors, they must all be in one data frame.
# For merging to work, all x columns of m2-m4 must be the same!
# the merge function will automatically detec columns of same name
# (that is, x) in m2-m4 and use it to identify y values which belong
# together (to the same x value)
mm <- Reduce(x=list(m2,m3,m4), f=function(a,b) merge(a,b))
# The above line is the same as:
# mm <- merge(m2,m3)
# mm <- merge(mm,m4)
# ggplot needs data in long (tidy) format
mm <- tidyr::gather(mm, kernel, y, -x)
ggplot(m, aes(x = X, y = Y)) +
geom_point(shape = 1) +
geom_smooth(method = lm, se = FALSE) +
theme(axis.line = element_line(colour = "black", size = 0.25)) +
geom_line(data = mm, mapping = aes(x=x,y=y,color=kernel))
Solution which will settle this for everyone and for eternity
The most beautiful and reproducable way though will be to create a custom stat in ggplot2 (see the included stats in ggplot).
There is this vignette of the ggplot2 team to this topic: Extending ggplot2. I have never undertaken such a heroic endeavour though.
I am trying to create a scatterplot that is summarized by hexagon bins of counts. I would like the user to be able to define the count breaks for the color scale. I have this working, using scale_fill_manual(). Oddly, however, it only works sometimes. In the MWE below, using the given seed value, if xbins=10, there are issues resulting in a plot as follows:
However, if xbins=20 or 40, for example, the plot doesn't seem to have problems:
My MWE is as follows:
library(ggplot2)
library(hexbin)
library(RColorBrewer)
set.seed(1)
xbins <- 20
x <- abs(rnorm(10000))
y <- abs(rnorm(10000))
minVal <- min(x, y)
maxVal <- max(x, y)
maxRange <- c(minVal, maxVal)
buffer <- (maxRange[2] - maxRange[1]) / (xbins / 2)
h <- hexbin(x = x, y = y, xbins = xbins, shape = 1, IDs = TRUE,
xbnds = maxRange, ybnds = maxRange)
hexdf <- data.frame (hcell2xy(h), hexID = h#cell, counts = h#count)
my_breaks <- c(2, 4, 6, 8, 20, 1000)
clrs <- brewer.pal(length(my_breaks) + 3, "Blues")
clrs <- clrs[3:length(clrs)]
hexdf$countColor <- cut(hexdf$counts, breaks = c(0, my_breaks, Inf),
labels = rev(clrs))
ggplot(hexdf, aes(x = x, y = y, hexID = hexID, fill = countColor)) +
scale_fill_manual(values = levels(hexdf$countColor)) +
geom_hex(stat = "identity") +
geom_abline(intercept = 0, color = "red", size = 0.25) +
coord_fixed(xlim = c(-0.5, (maxRange[2] + buffer)),
ylim = c(-0.5, (maxRange[2] + buffer))) +
theme(aspect.ratio=1)
My goal is to tweak this code so that the plot does not have problems (where suddenly certain hexagons are different sizes and shapes than the rest) regardless of the value assigned to xbins. However, I am puzzled what may be causing this problem for certain xbins values. Any advice would be greatly appreciated.
EDIT:
I am updating the example code after taking into account comments by #bdemarest and #Axeman. I followed the most popular answer in the link #Axeman recommends, and believe it is more useful when you are working with scale_fill_continuous() on an integer vector. Here, I am working on scale_fill_manual() on a factor vector. As a result, I am still unable to get this goal to work. Thank you.
library(ggplot2)
library(hexbin)
library(RColorBrewer)
set.seed(1)
xbins <- 10
x <- abs(rnorm(10000))
y <- abs(rnorm(10000))
minVal <- min(x, y)
maxVal <- max(x, y)
maxRange <- c(minVal, maxVal)
buffer <- (maxRange[2] - maxRange[1]) / (xbins / 2)
bindata = data.frame(x=x,y=y,factor=as.factor(1))
h <- hexbin(bindata, xbins = xbins, IDs = TRUE, xbnds = maxRange, ybnds = maxRange)
counts <- hexTapply (h, bindata$factor, table)
counts <- t (simplify2array (counts))
counts <- melt (counts)
colnames (counts) <- c ("factor", "ID", "counts")
counts$factor =as.factor(counts$factor)
hexdf <- data.frame (hcell2xy (h), ID = h#cell)
hexdf <- merge (counts, hexdf)
my_breaks <- c(2, 4, 6, 8, 20, 1000)
clrs <- brewer.pal(length(my_breaks) + 3, "Blues")
clrs <- clrs[3:length(clrs)]
hexdf$countColor <- cut(hexdf$counts, breaks = c(0, my_breaks, Inf), labels = rev(clrs))
ggplot(hexdf, aes(x = x, y = y, fill = countColor)) +
scale_fill_manual(values = levels(hexdf$countColor)) +
geom_hex(stat = "identity") +
geom_abline(intercept = 0, color = "red", size = 0.25) +
coord_cartesian(xlim = c(-0.5, maxRange[2]+buffer), ylim = c(-0.5, maxRange[2]+ buffer)) + theme(aspect.ratio=1)
you can define colors in 'geom' instead of 'scale' that modifies the scale of plot:
ggplot(hexdf, aes(x = x, y = y)) +
geom_hex(stat = "identity",fill =hexdf$countColor)
I have a parameterized contour that I'm plotting in R. What I'm trying to do is add arrows along the curve to show the viewer which direction the curve is going in.
Here's the code I'm using to generate the curve:
library(ggplot2)
library(grid)
set.seed(9)
T<-sort(runif(2^12,min=2^-5, max=16))
U<-function(t) exp(4*log(t) - 4*t)*(cos(log(t) + 3*t))
#Re(t^(4+1i)*t)*exp(-(4-3i)*t))
V<-function(t) exp(4*log(t) - 4*t)*(sin(log(t) + 3*t))
#Im(t^(4+1i)*t)*exp(-(4-3i)*t))
X<-sapply(T,U)
Y<-sapply(T,V)
df<-data.frame(X=X,Y=Y)
p<-ggplot(data=df,aes(x = df$X, y = df$Y))
p+theme_bw()+
geom_path(size=1,color='blue',linetype=1) #+
#geom_segment(aes(xend=c(tail(X, n=-1), NA), yend=c(tail(Y, n=-1), NA)),
#arrow=arrow(length=unit(0.2,"cm")),color='blue')
dev.off()
The last part I commented out:
#+
#geom_segment(aes(xend=c(tail(X, n=-1), NA), yend=c(tail(Y, n=-1), NA)),
#arrow=arrow(length=unit(0.2,"cm")),color='blue')
does something similar to what I want, but the arrows are very close together and the curve ends up looking "fuzzy" rather than directed.
Here's the fuzzy and non-fuzzy version of the curve:
Thank you!
It might look better if the arrows were more equally spaced along the curved path, e.g.
library(ggplot2)
library(grid)
set.seed(9)
T <- sort(runif(2^12,min=2^-5, max=16))
U <- function(t) exp(4*log(t) - 4*t)*(cos(log(t) + 3*t))
V <- function(t) exp(4*log(t) - 4*t)*(sin(log(t) + 3*t))
drough <- data.frame(x=sapply(T,U), y=sapply(T,V))
p <- ggplot(data = drough, aes(x = x, y = y)) +
geom_path()
## because the parametric curve was generated with uneven spacing
## we can try to resample more evenly along the path
parametric_smoothie <- function(x, y, N=1e2, phase=1, offset=0) {
lengths <- c(0, sqrt(diff(x)^2 + diff(y)^2))
l <- cumsum(lengths)
lmax <- max(l)
newpos <- seq(phase*lmax/N, lmax-phase*lmax/N, length.out = N) + offset*lmax/N
xx <- approx(l, x, newpos)$y
yy <- approx(l, y, newpos)$y
data.frame(x = xx, y = yy)
}
## this is a finer set of points
dfine <- parametric_smoothie(X, Y, 20)
gridExtra::grid.arrange(p + geom_point(data = drough, col="grey"),
p + geom_point(data = dfine, col="grey"), ncol=2)
## now we use this function to create N start points for the arrows
## and another N end points slightly apart to give a sense of direction
relay_arrow <- function(x, y, N=10, phase = 0.8, offset = 1e-2, ...){
start <- parametric_smoothie(x, y, N, phase)
end <- parametric_smoothie(x, y, N, phase, offset)
data.frame(xstart = start$x, xend = end$x,
ystart = start$y, yend = end$y)
}
breaks <- relay_arrow(drough$x, drough$y, N=20)
p + geom_point(data = breaks, aes(xstart, ystart), col="grey98", size=2) +
geom_segment(data = breaks, aes(xstart, ystart, xend = xend, yend = yend),
arrow = arrow(length = unit(0.5, "line")),
col="red", lwd=1)
One way to do it is to draw them on after. You can probably get the direction better by using the angle aesthetic (if it's easy enough to work out):
p<-ggplot(data=df,aes(x = X, y = Y))
p +
geom_path(size=1,color='blue',linetype=1)+
geom_segment(data = df[seq(1, nrow(df), 20), ], aes(x = X, y = Y, xend=c(tail(X, n=-1), NA), yend=c(tail(Y, n=-1), NA)),
arrow=arrow(length=unit(0.2,"cm"), type = "closed"), color="blue", linetype = 0, inherit.aes = FALSE)
Note the closed arrow type. I had to do that so they weren't interpreted as lines and hence disappear when linetype = 0.
Try this with slight modification of your code (you don't want to compromise the quality of the curve by having smaller number of points and at the same time you want to have smaller number of segments to draw the arrows for better quality of the arrows):
library(ggplot2)
library(grid)
set.seed(9)
T<-sort(runif(2^12,min=2^-5, max=16))
U<-function(t) exp(4*log(t) - 4*t)*(cos(log(t) + 3*t))
#Re(t^(4+1i)*t)*exp(-(4-3i)*t))
V<-function(t) exp(4*log(t) - 4*t)*(sin(log(t) + 3*t))
#Im(t^(4+1i)*t)*exp(-(4-3i)*t))
X<-sapply(T,U)
Y<-sapply(T,V)
df<-data.frame(X=X,Y=Y)
df1 <- df[seq(1,length(X), 8),]
p<-ggplot(data=df,aes(x = df$X, y = df$Y))
p+theme_bw()+
geom_path(size=1,color='blue',linetype=1) +
geom_segment(data=df1,aes(x=X, y=Y, xend=c(tail(X, n=-1), NA), yend=c(tail(Y, n=-1), NA)),
arrow=arrow(length=unit(0.3,"cm"),type='closed'),color='blue')
#dev.off()
Suppose I want to plot the following data:
# First set of X coordinates
x <- seq(0, 10, by = 0.2)
# Angles from 0 to 90 degrees
angles <- seq(0, 90, length.out = 10)
# Convert to radian
angles <- deg2rad(angles)
# Create an empty data frame
my.df <- data.frame()
# For each angle, populate the data frame
for (theta in angles) {
y <- sin(x + theta)
tmp <- data.frame(x = x, y = y, theta = as.factor(theta))
my.df <- rbind(my.df, tmp)
}
x1 <- seq(0, 12, by = 0.3)
y1 <- sin(x1 - 0.5)
tmp <- data.frame(x = x1, y = y1, theta = as.factor(-0.5))
my.df <- rbind(my.df, tmp)
ggplot(my.df, aes(x, y, color = theta)) + geom_line()
That gives me a nice plot:
Now I want to draw a heat map out of this data set. There are tutorials here and there that do it using geom_tile to do it.
So, let's try:
# Convert the angle values from factors to numerics
my.df$theta <- as.numeric(levels(my.df$theta))[my.df$theta]
ggplot(my.df, aes(theta, x)) + geom_tile(aes(fill = y)) + scale_fill_gradient(low = "blue", high = "red")
That does not work, and the reason is that my x coordinates do not have the same step:
x <- seq(0, 10, by = 0.2) vs x1 <- seq(0, 12, by = 0.3)
But as soon as I use the same step x1 <- seq(0, 12, by = 0.2), it works:
I real life, my data sets are not regularly spaced (these are experimental data), but I still need to display them as a heat map. How can I do?
You can use akima to interpolate the function into a form suitable for heat map plots.
library(akima)
library(ggplot2)
my.df.interp <- interp(x = my.df$theta, y = my.df$x, z = my.df$y, nx = 30, ny = 30)
my.df.interp.xyz <- as.data.frame(interp2xyz(my.df.interp))
names(my.df.interp.xyz) <- c("theta", "x", "y")
ggplot(my.df.interp.xyz, aes(x = theta, y = x, fill = y)) + geom_tile() +
scale_fill_gradient(low = "blue", high = "red")
If you wish to use a different resolution you can change the nx and ny arguments to interp.
Another way to do it with just ggplot2 is to use stat_summary_2d.
library(ggplot2)
ggplot(my.df, aes(x = theta, y = x, z = y)) + stat_summary_2d(binwidth = 0.3) +
scale_fill_gradient(low = "blue", high = "red")