How to convert lattice-based graphics to ggplot2? - r

The attached script performs equivalence tests on sample variables x, y and z.
equivalence.xyplot() is really handy, although the base lattice graphics are a pain to work with. How can I use ggplot2 to plot these data rather than the base lattice graphics?
Edit:
For example, using ggplot(plot1) returns the following error:
Error: ggplot2 doesn't know how to deal with data of class trellis
I'm not sure where to begin converting the trellis class of data to ggplot2 format. Any specific advice on converting trellis-based graphics to ggplot2 would be appreciated.
require(equivalence)
require(gridExtra)
require(lattice)
x = c(1,4,3,5,3,7,8,6,7,8,9)
y = c(1,5,4,5,3,6,7,6,7,2,8)
z = c(2,4,3,5,4,7,8,5,6,6,9)
mydata = data.frame(x,y,z)
plot1 = equivalence.xyplot(mydata$x~mydata$y,alpha=0.05, b0.ii=0.25, b1.ii=0.25)
plot2 = equivalence.xyplot(mydata$x~mydata$z,alpha=0.05, b0.ii=0.25, b1.ii=0.25)
plot3 = equivalence.xyplot(mydata$y~mydata$z,alpha=0.05, b0.ii=0.25, b1.ii=0.25)
# Combine plots into one figure
grid.arrange(plot1, plot2, plot3, ncol=2)

This is not a final solution but a good start . I just go through lattice panel function and replace :
xyplot ----------> geom_point
panel.abline ----------> geom_abline
grid.polygon ----------> geom_polygon
panel.loess ----------> stat_smooth
panel.arrows ----------> geom_errobar
For each geom, I create a data.frame which components are the data passed to the lattice function. For example :
panel.arrows(x.bar, ybar.hat$fit + ybar.hat$se.fit *
t.quant, x.bar, ybar.hat$fit - ybar.hat$se.fit *
t.quant, col = "darkgrey", length = 0.05, angle = 90,
code = 3)
becomes :
dat.arrow <- data.frame(x=x.bar, ymax= ybar.hat$fit + ybar.hat$se.fit *
t.quant, ymin= ybar.hat$fit - ybar.hat$se.fit *
t.quant)
pl <- pl + geom_errorbar(data=dat.arrow, aes(x,ymin=ymin,ymax=ymax),
col = "darkgrey", width = 0.10)
The final result is a new function equivalence.ggplot that take the same parameters as equivalence.xyplot:
equivalence.ggplot <- function(x,y, alpha, b0.ii, b1.ii,
b0.absolute = FALSE,add.smooth=FALSE){
x.bar <- mean(x, na.rm = TRUE)
min.x <- min(x, na.rm = TRUE)
max.x <- max(x, na.rm = TRUE)
the.model <- lm(y ~ x)
if (b0.absolute)
y.poly <- x.bar + b0.ii * c(-1, 1, 1, -1)
else y.poly <- x.bar * (1 + b0.ii * c(-1, 1, 1, -1))
dat.poly <- data.frame(x = c(min.x, min.x, max.x, max.x),
y = y.poly)
dat <- data.frame(x,y)
p <- function(dat,dat.poly){
h <- ggplot(dat) +
geom_polygon(data=dat.poly,aes(x,y),col = "light gray", fill = gray(0.9)) +
geom_point(aes(x,y)) +
stat_smooth(data=dat,col='black',
aes(x=x,y=y),method="lm", se=FALSE,
fullrange =TRUE)+
theme_bw()
if (add.smooth)
h <- h + geom_smooth(aes(x,y),method='loess')
h
}
pl <- p(dat,dat.poly)
n <- sum(complete.cases(cbind(x, y)))
ybar.hat <- predict(the.model, newdata = data.frame(x = x.bar),
se = TRUE)
t.quant <- qt(1 - alpha/2, df.residual(the.model))
dat.arrow <- data.frame(x=x.bar, ymax= ybar.hat$fit + ybar.hat$se.fit *
t.quant, ymin= ybar.hat$fit - ybar.hat$se.fit *
t.quant)
pl <- pl +
geom_errorbar(data=dat.arrow, aes(x,ymin=ymin,ymax=ymax),
col = "darkgrey", width = 0.10)
pl
se.slope <- coef(summary(the.model))[2, 2]
dat.arrow1 <- data.frame(x=x.bar, ymax= ybar.hat$fit + se.slope * t.quant *
x.bar, ymin=ybar.hat$fit - se.slope * t.quant *
x.bar)
pl <- pl +
geom_errorbar(data=dat.arrow1, aes(x,ymin=ymin,ymax=ymax),
col = "black", width = 0.10)
addLines <- function(pl,the.model){
pl <- pl + geom_abline(intercept = coef(summary(the.model))[1, 1], slope = 1 -
b1.ii, col = "darkgrey", lty = 2) +
geom_abline(intercept = coef(summary(the.model))[1, 1], slope = 1 +
b1.ii, col = "darkgrey", lty = 2)
}
pl <- addLines(pl,the.model)
pl
}
Comparing the lattice and the ggplot2 result :
library(gridExtra)
p.gg <- equivalence.ggplot(mydata$x,mydata$y,alpha=0.05, b0.ii=0.25, b1.ii=0.25)
p.lat <- equivalence.xyplot(mydata$y~mydata$x,alpha=0.05, b0.ii=0.25, b1.ii=0.25)
grid.arrange(p.gg,p.lat)

Related

Match palette of persp graph to contour graph ggplot2

I want to make contour levels of bivariate normal density plotted in base persp function. Here is the code:
###############
library(pacman)
p_load(tidyverse)
p_load(mvtnorm)
p_load(GA)
my_mean<-c(25,65)
mycors<-seq(-1,1,by=.25)
sd_vec<-c(5,7)
i<-3
temp_cor<-matrix(c(1,mycors[i],
mycors[i],1),
byrow = T,ncol=2)
V<-sd_vec %*% t(sd_vec) *temp_cor
my_x<-seq(my_mean[1]-3*sd_vec[1], my_mean[1]+3*sd_vec[1], length.out=20)
my_y<-seq(my_mean[2]-3*sd_vec[2], my_mean[2]+3*sd_vec[2], length.out=20)
temp_f<-function(a,b){dmvnorm(cbind(a,b), my_mean,V)}
my_z<-outer(my_x, my_y,temp_f)
nlevels<-20
my_zlim <- range(my_z, finite = TRUE)
my_levels <- pretty(my_zlim, nlevels)
zz <- (my_z[-1, -1] + my_z[-1, -ncol(my_z)] + my_z[-nrow(my_z), -1] + my_z[-nrow(my_z),
-ncol(my_z)])/4
cols <- jet.colors(length(my_levels) - 1)
zzz <- cut(zz, breaks = my_levels, labels = cols)
persp(my_x, my_y, my_z, theta = -25, phi = 45, expand = 0.5,xlab="x",ylab="y",zlab="f(x,y)",col = as.character(zzz))
data.grid <- expand.grid(x = seq(my_mean[1]-3*sd_vec[1], my_mean[1]+3*sd_vec[1], length.out=200),
y = seq(my_mean[2]-3*sd_vec[2], my_mean[2]+3*sd_vec[2], length.out=200))
q.samp <- cbind(data.grid, prob = dmvnorm(data.grid, mean = my_mean, sigma = V))
ggplot(q.samp, aes(x=x, y=y, z=prob)) +
geom_contour( aes(z=prob, color=..level..)) +
#scale_color_gradient(level = jet.colors(length(my_levels) - 1))+
theme_bw()
Created on 2020-10-31 by the reprex package (v0.3.0)
Since the color palette of the persp graph seems to be discrete, I want to give colors to the contours which approximately resembles the colors in persp graph.
Are you looking for geom_contour_fill with a scale_fill_discrete according to the interpolated values of cols?
ggplot(q.samp, aes(x, y, z = prob)) +
geom_contour_filled( aes(fill = ..level..), col = "black", bins = 11) +
scale_fill_discrete(type = jet.colors(11)) +
theme_bw()
Or if you are looking for colored lines instead of fills you can use scale_gradientn
ggplot(q.samp, aes(x, y, z = prob)) +
geom_contour(aes(color = ..level..), bins = 11, size = 1) +
scale_color_gradientn(colours = jet.colors(11)) +
theme_bw()

How to automatically define the height of a horizontal line from a stat_function on ggplot2?

I am trying to add a horizontal line above this density plot using the geom_segment, however the arguments of this function request y and yend. Since - depending on the SD - the height of the curve can vary, I would like to know how to obtain the stat_function maximum height and use it as a reference to add the line.
Density plot to add a line above the normal curve
The base code:
all_mean <- mean(mtcars$wt,na.rm = T)%>% round(2)
all_sd <- sd(mtcars$wt,na.rm = T)%>% round(2)
my_score <- mtcars[1,"wt"]
dd <- function(x) { dnorm(x, mean=all_mean, sd=all_sd) }
z <- (my_score - all_mean)/all_sd
pc <- round(100*(pnorm(z)), digits=0)
t1 <- paste0(as.character(pc),"th percentile")
p33 <- all_mean + (qnorm(0.3333) * all_sd)
p67 <- all_mean + (qnorm(0.6667) * all_sd)
funcShaded <- function(x, lower_bound) {
y = dnorm(x, mean = all_mean, sd = all_sd)
y[x < lower_bound] <- NA
return(y)
}
greenShaded <- function(x, lower_bound) {
y = dnorm(x, mean = all_mean, sd = all_sd)
y[x > (all_mean*2)] <- NA
return(y)
}
ggplot(data.frame(x=c(min(mtcars$wt-2), max(mtcars$wt+2))), aes(x=x)) +
stat_function(fun=dd, colour="black") +
stat_function(fun = greenShaded, args = list(lower_bound = pc),
geom = "area", fill = "green", alpha = 1)+
stat_function(fun = funcShaded, args = list(lower_bound = my_score),
geom = "area", fill = "white", alpha = .9)+
geom_vline(aes(xintercept=my_score), colour="black")
I added the line:
geom_segment(aes(x=0, y=max(dd(x)*1.05), xend=6, yend=max(dd(x))*1.05), colour="firebrick3")
And result is:

saving ggplot in a list gives me the same graph

I am trying to plot 12 different plots on a 3 by 4 grid. But,it only plots the last one 12 times. Can any one help me? I am so fed up with it. Thanks
library(ggplot2)
library(gridExtra)
pmax=0.85
K_min = 0.0017
T = seq(100,1200,by=100) ## ISIs
lambda =1/T
p=list()
for(i in (1:length(lambda))){
p[[i]]<-ggplot(data.frame(x = c(0, 1)), aes(x = x)) +
stat_function(fun = function (x) (lambda[i]*(1-(1-pmax))/K_min)*(1-x)^((lambda[i]/K_min)-1)*
(1-(1-pmax)*x)^-((lambda[i]/K_min)+1),colour = "dodgerblue3")+
scale_x_continuous(name = "Probability") +
scale_y_continuous(name = "Frequency") + theme_bw()
main <- grid.arrange(grobs=p,ncol=4)
}
This code produces the correct picture but I need to use ggplot since my other figures are in ggplot.
par( mfrow = c( 3, 4 ) )
for (i in (1:length(lambda))){
f <- function (x) ((lambda[i]*(1-(1-pmax))/K_min)*(1-x)^((lambda[i]/K_min)-1)*
(1-(1-pmax)*x)^-((lambda[i]/K_min)+1) )
curve(f,from=0, to=1, col = "violet",lwd=2,sub = paste0("ISI = ",round(1/lambda[i],3), ""),ylab="PDF",xlab="R")
}
Correct plot using curve:
ggplot objects created in a loop are evaluated at the end of the loop. Since all the ggplot objects in this case use data calculated with lambda[i], they get the same result based on the last i value (12). Here are two possible workarounds:
Workaround 1. Convert each ggplot object into a grob within the loop, & save that to the list:
for(i in (1:length(lambda))){
# code for generating each plot is unchanged
g <- ggplot(data.frame(x = c(0, 1)), aes(x = x)) +
stat_function(fun = function (x) (lambda[i]*(1-(1-pmax))/K_min)*(1-x)^((lambda[i]/K_min)-1)*
(1-(1-pmax)*x)^-((lambda[i]/K_min)+1),colour = "dodgerblue3")+
scale_x_continuous(name = "Probability") +
scale_y_continuous(name = "Frequency") + theme_bw()
p[[i]] <- ggplotGrob(g)
}
main <- grid.arrange(grobs=p, ncol=4)
Workaround 2. Put all the data in a data frame, & create a single ggplot with a facet for each ISI:
library(dplyr)
pmax = 0.85
K_min = 0.0017
ISI = seq(100, 1200, by = 100) # I changed this; using `T` as a name clashes with T from TRUE/FALSE
lambda = 1/ISI
df <- data.frame(
x = rep(seq(0, 1, length.out = 101), length(ISI)),
ISI = rep(ISI, each = 101),
l = rep(lambda, each = 101)
) %>%
mutate(y = (l * pmax / K_min) * (1-x) ^ ((l / K_min) - 1) *
(1 - (1 - pmax) * x)^-((l / K_min) + 1))
ggplot(data,
aes(x = x, y = y, group = 1)) +
geom_line(colour = "dodgerblue3") +
facet_wrap(~ISI, nrow = 3, scales = "free_y") +
labs(x = "Probability", y = "Frequency") +
theme_bw()

Manually assigning colors with scale_fill_manual only works for certain hexagon sizes

I am trying to create a scatterplot that is summarized by hexagon bins of counts. I would like the user to be able to define the count breaks for the color scale. I have this working, using scale_fill_manual(). Oddly, however, it only works sometimes. In the MWE below, using the given seed value, if xbins=10, there are issues resulting in a plot as follows:
However, if xbins=20 or 40, for example, the plot doesn't seem to have problems:
My MWE is as follows:
library(ggplot2)
library(hexbin)
library(RColorBrewer)
set.seed(1)
xbins <- 20
x <- abs(rnorm(10000))
y <- abs(rnorm(10000))
minVal <- min(x, y)
maxVal <- max(x, y)
maxRange <- c(minVal, maxVal)
buffer <- (maxRange[2] - maxRange[1]) / (xbins / 2)
h <- hexbin(x = x, y = y, xbins = xbins, shape = 1, IDs = TRUE,
xbnds = maxRange, ybnds = maxRange)
hexdf <- data.frame (hcell2xy(h), hexID = h#cell, counts = h#count)
my_breaks <- c(2, 4, 6, 8, 20, 1000)
clrs <- brewer.pal(length(my_breaks) + 3, "Blues")
clrs <- clrs[3:length(clrs)]
hexdf$countColor <- cut(hexdf$counts, breaks = c(0, my_breaks, Inf),
labels = rev(clrs))
ggplot(hexdf, aes(x = x, y = y, hexID = hexID, fill = countColor)) +
scale_fill_manual(values = levels(hexdf$countColor)) +
geom_hex(stat = "identity") +
geom_abline(intercept = 0, color = "red", size = 0.25) +
coord_fixed(xlim = c(-0.5, (maxRange[2] + buffer)),
ylim = c(-0.5, (maxRange[2] + buffer))) +
theme(aspect.ratio=1)
My goal is to tweak this code so that the plot does not have problems (where suddenly certain hexagons are different sizes and shapes than the rest) regardless of the value assigned to xbins. However, I am puzzled what may be causing this problem for certain xbins values. Any advice would be greatly appreciated.
EDIT:
I am updating the example code after taking into account comments by #bdemarest and #Axeman. I followed the most popular answer in the link #Axeman recommends, and believe it is more useful when you are working with scale_fill_continuous() on an integer vector. Here, I am working on scale_fill_manual() on a factor vector. As a result, I am still unable to get this goal to work. Thank you.
library(ggplot2)
library(hexbin)
library(RColorBrewer)
set.seed(1)
xbins <- 10
x <- abs(rnorm(10000))
y <- abs(rnorm(10000))
minVal <- min(x, y)
maxVal <- max(x, y)
maxRange <- c(minVal, maxVal)
buffer <- (maxRange[2] - maxRange[1]) / (xbins / 2)
bindata = data.frame(x=x,y=y,factor=as.factor(1))
h <- hexbin(bindata, xbins = xbins, IDs = TRUE, xbnds = maxRange, ybnds = maxRange)
counts <- hexTapply (h, bindata$factor, table)
counts <- t (simplify2array (counts))
counts <- melt (counts)
colnames (counts) <- c ("factor", "ID", "counts")
counts$factor =as.factor(counts$factor)
hexdf <- data.frame (hcell2xy (h), ID = h#cell)
hexdf <- merge (counts, hexdf)
my_breaks <- c(2, 4, 6, 8, 20, 1000)
clrs <- brewer.pal(length(my_breaks) + 3, "Blues")
clrs <- clrs[3:length(clrs)]
hexdf$countColor <- cut(hexdf$counts, breaks = c(0, my_breaks, Inf), labels = rev(clrs))
ggplot(hexdf, aes(x = x, y = y, fill = countColor)) +
scale_fill_manual(values = levels(hexdf$countColor)) +
geom_hex(stat = "identity") +
geom_abline(intercept = 0, color = "red", size = 0.25) +
coord_cartesian(xlim = c(-0.5, maxRange[2]+buffer), ylim = c(-0.5, maxRange[2]+ buffer)) + theme(aspect.ratio=1)
you can define colors in 'geom' instead of 'scale' that modifies the scale of plot:
ggplot(hexdf, aes(x = x, y = y)) +
geom_hex(stat = "identity",fill =hexdf$countColor)

filled.contour vs. ggplot2 + stat_contour

I am new to ggplot2, and I am trying to replicate a graph that I created using filled.contour with ggplot2.
below is my code:
require(ggplot2)
require(reshape2)
#data prep
scale <- 10
xs <- scale * c(0, 0.5, 0.8, 0.9, 0.95, 0.99, 1)
ys <- scale * c(0, 0.01, 0.05, 0.1, 0.2, 0.5, 1)
df <- data.frame(expand.grid(xs,ys))
colnames(df) <- c('x','y')
df$z <- ((scale-df$x) * df$y) / ((scale-df$x) * df$y + 1)
#filled contour looks good
filled.contour(xs, ys, acast(df, x~y, value.var='z'))
#ggplot contour looks bad
p <- ggplot(df, aes(x=x, y=y, z=z))
p + stat_contour(geom='polygon', aes(fill=..level..))
I can't figure out how to get ggplot contour to fill the polygons all the way up to the upper left hand side (there's a point at (0,10) with z= 0.99) ...all I get are these weird triangles
To create a ggplot version of the filled.contour plot you'll need to have a larger data.frame than the df object in your example and using geom_tile will produce the plot you are looking for. Consider the following:
# a larger data set
scl <- 10
dat <- expand.grid(x = scl * seq(0, 1, by = 0.01),
y = scl * seq(0, 1, by = 0.01))
dat$z <- ((scl - dat$x) * dat$y) / ((scl - dat$x) * dat$y + 1)
# create the plot, the geom_contour may not be needed, but I find it helpful
ggplot(dat) +
aes(x = x, y = y, z = z, fill = z) +
geom_tile() +
geom_contour(color = "white", alpha = 0.5) +
scale_fill_gradient(low = "lightblue", high = "magenta") +
theme_bw()

Resources