filled.contour vs. ggplot2 + stat_contour - r

I am new to ggplot2, and I am trying to replicate a graph that I created using filled.contour with ggplot2.
below is my code:
require(ggplot2)
require(reshape2)
#data prep
scale <- 10
xs <- scale * c(0, 0.5, 0.8, 0.9, 0.95, 0.99, 1)
ys <- scale * c(0, 0.01, 0.05, 0.1, 0.2, 0.5, 1)
df <- data.frame(expand.grid(xs,ys))
colnames(df) <- c('x','y')
df$z <- ((scale-df$x) * df$y) / ((scale-df$x) * df$y + 1)
#filled contour looks good
filled.contour(xs, ys, acast(df, x~y, value.var='z'))
#ggplot contour looks bad
p <- ggplot(df, aes(x=x, y=y, z=z))
p + stat_contour(geom='polygon', aes(fill=..level..))
I can't figure out how to get ggplot contour to fill the polygons all the way up to the upper left hand side (there's a point at (0,10) with z= 0.99) ...all I get are these weird triangles

To create a ggplot version of the filled.contour plot you'll need to have a larger data.frame than the df object in your example and using geom_tile will produce the plot you are looking for. Consider the following:
# a larger data set
scl <- 10
dat <- expand.grid(x = scl * seq(0, 1, by = 0.01),
y = scl * seq(0, 1, by = 0.01))
dat$z <- ((scl - dat$x) * dat$y) / ((scl - dat$x) * dat$y + 1)
# create the plot, the geom_contour may not be needed, but I find it helpful
ggplot(dat) +
aes(x = x, y = y, z = z, fill = z) +
geom_tile() +
geom_contour(color = "white", alpha = 0.5) +
scale_fill_gradient(low = "lightblue", high = "magenta") +
theme_bw()

Related

not centred concentric circles plot

I have spent sometimes doing this but I could not reach the solution. I have this code to plot concentric circles over 25 by 25 grids using ggplot2 in R. I do not know how to be able to manipulate the center of the concentric circles to be not at the origin(0,0), but at the center of the grid (5,5). I also would like to keep the scale of the grid from 25 to 25. Thank you very much in advance
require(ggplot2)
require(grid)
x <- rep(seq(25), 25)
y <- rep(seq(25), each=25)
circ_rads <- seq(1,5,2)
qplot(x, y) +
lapply(circ_rads, FUN = function(x)
annotation_custom(circleGrob(gp = gpar(fill = "transparent", color = "black")),
-x, x, -x, x)) +
geom_text(aes(x = 0, y = circ_rads + 0.1, label = circ_rads)) +
coord_fixed(ratio = 1)
We can use ggforce::geom_circle like this:
library(ggplot2)
library(ggforce)
x <- rep(seq(25), 25)
y <- rep(seq(25), each=25)
circ_rads <- seq(1,5,2)
xy <- data.frame(x=x, y=y)
circles <- data.frame(
x0 = 5, # You say circles should be a 'centre of the grid' and 5, 5
y0 = 5, # not sure what you really mean, so going with 5, 5 here
r = circ_rads
)
ggplot() +
geom_point(data = xy,
aes(x,
y)) +
geom_circle(data = circles,
aes(x0 = x0,
y0 = y0,
r = r)) +
coord_fixed()

How do I show the orientation of a curve in ggplot2?

I have a parameterized contour that I'm plotting in R. What I'm trying to do is add arrows along the curve to show the viewer which direction the curve is going in.
Here's the code I'm using to generate the curve:
library(ggplot2)
library(grid)
set.seed(9)
T<-sort(runif(2^12,min=2^-5, max=16))
U<-function(t) exp(4*log(t) - 4*t)*(cos(log(t) + 3*t))
#Re(t^(4+1i)*t)*exp(-(4-3i)*t))
V<-function(t) exp(4*log(t) - 4*t)*(sin(log(t) + 3*t))
#Im(t^(4+1i)*t)*exp(-(4-3i)*t))
X<-sapply(T,U)
Y<-sapply(T,V)
df<-data.frame(X=X,Y=Y)
p<-ggplot(data=df,aes(x = df$X, y = df$Y))
p+theme_bw()+
geom_path(size=1,color='blue',linetype=1) #+
#geom_segment(aes(xend=c(tail(X, n=-1), NA), yend=c(tail(Y, n=-1), NA)),
#arrow=arrow(length=unit(0.2,"cm")),color='blue')
dev.off()
The last part I commented out:
#+
#geom_segment(aes(xend=c(tail(X, n=-1), NA), yend=c(tail(Y, n=-1), NA)),
#arrow=arrow(length=unit(0.2,"cm")),color='blue')
does something similar to what I want, but the arrows are very close together and the curve ends up looking "fuzzy" rather than directed.
Here's the fuzzy and non-fuzzy version of the curve:
Thank you!
It might look better if the arrows were more equally spaced along the curved path, e.g.
library(ggplot2)
library(grid)
set.seed(9)
T <- sort(runif(2^12,min=2^-5, max=16))
U <- function(t) exp(4*log(t) - 4*t)*(cos(log(t) + 3*t))
V <- function(t) exp(4*log(t) - 4*t)*(sin(log(t) + 3*t))
drough <- data.frame(x=sapply(T,U), y=sapply(T,V))
p <- ggplot(data = drough, aes(x = x, y = y)) +
geom_path()
## because the parametric curve was generated with uneven spacing
## we can try to resample more evenly along the path
parametric_smoothie <- function(x, y, N=1e2, phase=1, offset=0) {
lengths <- c(0, sqrt(diff(x)^2 + diff(y)^2))
l <- cumsum(lengths)
lmax <- max(l)
newpos <- seq(phase*lmax/N, lmax-phase*lmax/N, length.out = N) + offset*lmax/N
xx <- approx(l, x, newpos)$y
yy <- approx(l, y, newpos)$y
data.frame(x = xx, y = yy)
}
## this is a finer set of points
dfine <- parametric_smoothie(X, Y, 20)
gridExtra::grid.arrange(p + geom_point(data = drough, col="grey"),
p + geom_point(data = dfine, col="grey"), ncol=2)
## now we use this function to create N start points for the arrows
## and another N end points slightly apart to give a sense of direction
relay_arrow <- function(x, y, N=10, phase = 0.8, offset = 1e-2, ...){
start <- parametric_smoothie(x, y, N, phase)
end <- parametric_smoothie(x, y, N, phase, offset)
data.frame(xstart = start$x, xend = end$x,
ystart = start$y, yend = end$y)
}
breaks <- relay_arrow(drough$x, drough$y, N=20)
p + geom_point(data = breaks, aes(xstart, ystart), col="grey98", size=2) +
geom_segment(data = breaks, aes(xstart, ystart, xend = xend, yend = yend),
arrow = arrow(length = unit(0.5, "line")),
col="red", lwd=1)
One way to do it is to draw them on after. You can probably get the direction better by using the angle aesthetic (if it's easy enough to work out):
p<-ggplot(data=df,aes(x = X, y = Y))
p +
geom_path(size=1,color='blue',linetype=1)+
geom_segment(data = df[seq(1, nrow(df), 20), ], aes(x = X, y = Y, xend=c(tail(X, n=-1), NA), yend=c(tail(Y, n=-1), NA)),
arrow=arrow(length=unit(0.2,"cm"), type = "closed"), color="blue", linetype = 0, inherit.aes = FALSE)
Note the closed arrow type. I had to do that so they weren't interpreted as lines and hence disappear when linetype = 0.
Try this with slight modification of your code (you don't want to compromise the quality of the curve by having smaller number of points and at the same time you want to have smaller number of segments to draw the arrows for better quality of the arrows):
library(ggplot2)
library(grid)
set.seed(9)
T<-sort(runif(2^12,min=2^-5, max=16))
U<-function(t) exp(4*log(t) - 4*t)*(cos(log(t) + 3*t))
#Re(t^(4+1i)*t)*exp(-(4-3i)*t))
V<-function(t) exp(4*log(t) - 4*t)*(sin(log(t) + 3*t))
#Im(t^(4+1i)*t)*exp(-(4-3i)*t))
X<-sapply(T,U)
Y<-sapply(T,V)
df<-data.frame(X=X,Y=Y)
df1 <- df[seq(1,length(X), 8),]
p<-ggplot(data=df,aes(x = df$X, y = df$Y))
p+theme_bw()+
geom_path(size=1,color='blue',linetype=1) +
geom_segment(data=df1,aes(x=X, y=Y, xend=c(tail(X, n=-1), NA), yend=c(tail(Y, n=-1), NA)),
arrow=arrow(length=unit(0.3,"cm"),type='closed'),color='blue')
#dev.off()

Add segments of circles to ggplot based on product of x & y

I want to add shaded areas to a chart to help people understand where bad, ok, and good points can fit.
Good = x*y>=.66
Ok = x*y>=.34
Bad = x*y<.34
Generating the right sequence of data to correctly apply the curved boundaries to the chart is proving tough.
What is the most elegant way to generate the curves?
Bonus Q: How would you do this to produce non-overlapping areas so that different colours could be used?
Updates
I've managed to do in a rather hacky way the drawing of the circle segments. I updated the MRE to use the revised segMaker function.
MRE
library(ggplot2)
pts<-seq(0,1,.02)
x<-sample(pts,50,replace=TRUE)
y<-sample(pts,50,replace=TRUE)
# What function will generate correct sequence of values as these are linear?
segMaker<-function(x,by){
# Original
# data.frame(x=c(seq(0,x,by),0)
# ,y=c(seq(x,0,-by),0)
# )
zero <- data.frame(x = 0, y = 0)
rs <- seq(0, pi, by)
xc <- x * cos(rs)
yc <- x * sin(rs)
gr <- data.frame(x = xc, y = yc)
gr <- rbind(gr[gr$x >= 0, ], zero)
return(gr)
}
firstSeg <-segMaker(.34,0.02)
secondSeg <-segMaker(.66,0.02)
thirdSeg <-segMaker(1,0.02)
ggplot(data.frame(x,y),aes(x,y, colour=x*y))+
geom_point() +
geom_polygon(data=firstSeg, fill="blue", alpha=.25)+
geom_polygon(data=secondSeg, fill="blue", alpha=.25)+
geom_polygon(data=thirdSeg, fill="blue", alpha=.25)
Current & desired shadings
You can create a data frame with the boundaries between each region and then use geom_ribbon to plot it. Here's an example using the conditions you supplied (which result in boundaries that are the reciprocal function, rather than circles, but the idea is the same, whichever function you use for the boundaries):
library(ggplot2)
# Fake data
pts<-seq(0,1,.02)
set.seed(19485)
x<-sample(pts,50,replace=TRUE)
y<-sample(pts,50,replace=TRUE)
df = data.frame(x,y)
# Region boundaries
x = seq(0.001,1.1,0.01)
bounds = data.frame(x, ymin=c(-100/x, 0.34/x, 0.66/x),
ymax=c(0.34/x, 0.66/x, 100/x),
g=rep(c("Bad","OK","Good"), each=length(x)))
bounds$g = factor(bounds$g, levels=c("Bad","OK","Good"))
ggplot() +
coord_cartesian(ylim=0:1, xlim=0:1) +
geom_ribbon(data=bounds, aes(x, ymin=ymin, ymax=ymax, fill=g), colour="grey50", lwd=0.2) +
geom_point(data=df, aes(x,y), colour="grey20") +
scale_fill_manual(values=hcl(c(15, 40, 240), 100, 80)) +
#scale_fill_manual(values=hcl(c(15, 40, 240), 100, 80, alpha=0.25)) + # If you want the fill colors to be transparent
labs(fill="") +
guides(fill=guide_legend(reverse=TRUE))
For circular boundaries, assuming we want boundaries at r=1/3 and r=2/3:
# Calculate y for circle, given r and x
cy = function(r, x) {sqrt(r^2 - x^2)}
n = 200
x = unlist(lapply(c(1/3,2/3,1), function(to) seq(0, to, len=n)))
bounds = data.frame(x, ymin = c(rep(0, n),
cy(1/3, seq(0, 1/3, len=n/2)), rep(0, n/2),
cy(2/3, seq(0, 2/3, len=2*n/3)), rep(0, n/3)),
ymax = c(cy(1/3, seq(0,1/3,len=n)),
cy(2/3, seq(0,2/3,len=n)),
rep(1,n)),
g=rep(c("Bad","OK","Good"), each=n))
bounds$g = factor(bounds$g, levels=c("Bad","OK","Good"))
If you can use a github package, ggforce adds geom_arc_bar():
# devtools::install_github('thomasp85/ggforce')
library(ggplot2)
library(ggforce)
pts<-seq(0,1,.02)
x<-sample(pts,50,replace=TRUE)
y<-sample(pts,50,replace=TRUE)
arcs <- data.frame(
x0 = 0,
y0 = 0,
start = 0,
end = pi / 2,
r0 = c(0, 1/3, 2/3),
r = c(1/3, 2/3, 1),
fill = c("bad", "ok", "good")
)
ggplot() +
geom_arc_bar(data = arcs,
aes(x0 = x0, y0 = y0, start = start, end = end, r0 = r0, r = r,
fill = fill), alpha = 0.6) +
geom_point(data = data.frame(x = x, y = y),
aes(x = x, y = y))
Based on #eipi10's great answer, to do the product component (basically ends up with the same thing) I did:
library(ggplot2)
library(data.table)
set.seed(19485)
pts <- seq(0, 1, .001)
x <- sample(pts, 50, replace = TRUE)
y <- sample(pts, 50, replace = TRUE)
df <- data.frame(x,y)
myRibbon<-CJ(pts,pts)
myRibbon[,prod:=V1 * V2]
myRibbon[,cat:=ifelse(prod<=1/3,"bad",
ifelse(prod<=2/3,"ok","good"))]
myRibbon<-myRibbon[
,.(ymin=min(V2),ymax=max(V2))
,.(cat,V1)]
ggplot() +
geom_ribbon(data=myRibbon
, aes(x=V1, ymin=ymin,ymax=ymax
, group=cat, fill=cat),
colour="grey90", lwd=0.2, alpha=.5)+
geom_point(data=df, aes(x,y), colour="grey20") +
theme_minimal()
This doesn't do anything fancy but works out for each value of x, what the smallest and largest values were that could give rise to a specific banding.
If I had just wanted arcs, the use of ggforce (#GregF) would be really great- it tucks away all the complexity.

3D plot of bivariate distribution using R or Matlab

i would like to know if someone could tell me how you plot something similar to this
with histograms of the sample generates from the code below under the two curves. Using R or Matlab but preferably R.
# bivariate normal with a gibbs sampler...
gibbs<-function (n, rho)
{
mat <- matrix(ncol = 2, nrow = n)
x <- 0
y <- 0
mat[1, ] <- c(x, y)
for (i in 2:n) {
x <- rnorm(1, rho * y, (1 - rho^2))
y <- rnorm(1, rho * x,(1 - rho^2))
mat[i, ] <- c(x, y)
}
mat
}
bvn<-gibbs(10000,0.98)
par(mfrow=c(3,2))
plot(bvn,col=1:10000,main="bivariate normal distribution",xlab="X",ylab="Y")
plot(bvn,type="l",main="bivariate normal distribution",xlab="X",ylab="Y")
hist(bvn[,1],40,main="bivariate normal distribution",xlab="X",ylab="")
hist(bvn[,2],40,main="bivariate normal distribution",xlab="Y",ylab="")
par(mfrow=c(1,1))`
Thanks in advance
Best regards,
JC T.
You could do it in Matlab programmatically.
This is the result:
Code:
% Generate some data.
data = randn(10000, 2);
% Scale and rotate the data (for demonstration purposes).
data(:,1) = data(:,1) * 2;
theta = deg2rad(130);
data = ([cos(theta) -sin(theta); sin(theta) cos(theta)] * data')';
% Get some info.
m = mean(data);
s = std(data);
axisMin = m - 4 * s;
axisMax = m + 4 * s;
% Plot data points on (X=data(x), Y=data(y), Z=0)
plot3(data(:,1), data(:,2), zeros(size(data,1),1), 'k.', 'MarkerSize', 1);
% Turn on hold to allow subsequent plots.
hold on
% Plot the ellipse using Eigenvectors and Eigenvalues.
data_zeroMean = bsxfun(#minus, data, m);
[V,D] = eig(data_zeroMean' * data_zeroMean / (size(data_zeroMean, 1)));
[D, order] = sort(diag(D), 'descend');
D = diag(D);
V = V(:, order);
V = V * sqrt(D);
t = linspace(0, 2 * pi);
e = bsxfun(#plus, 2*V * [cos(t); sin(t)], m');
plot3(...
e(1,:), e(2,:), ...
zeros(1, nPointsEllipse), 'g-', 'LineWidth', 2);
maxP = 0;
for side = 1:2
% Calculate the histogram.
p = [0 hist(data(:,side), 20) 0];
p = p / sum(p);
maxP = max([maxP p]);
dx = (axisMax(side) - axisMin(side)) / numel(p) / 2.3;
p2 = [zeros(1,numel(p)); p; p; zeros(1,numel(p))]; p2 = p2(:);
x = linspace(axisMin(side), axisMax(side), numel(p));
x2 = [x-dx; x-dx; x+dx; x+dx]; x2 = max(min(x2(:), axisMax(side)), axisMin(side));
% Calculate the curve.
nPtsCurve = numel(p) * 10;
xx = linspace(axisMin(side), axisMax(side), nPtsCurve);
% Plot the curve and the histogram.
if side == 1
plot3(xx, ones(1, nPtsCurve) * axisMax(3 - side), spline(x,p,xx), 'r-', 'LineWidth', 2);
plot3(x2, ones(numel(p2), 1) * axisMax(3 - side), p2, 'k-', 'LineWidth', 1);
else
plot3(ones(1, nPtsCurve) * axisMax(3 - side), xx, spline(x,p,xx), 'b-', 'LineWidth', 2);
plot3(ones(numel(p2), 1) * axisMax(3 - side), x2, p2, 'k-', 'LineWidth', 1);
end
end
% Turn off hold.
hold off
% Axis labels.
xlabel('x');
ylabel('y');
zlabel('p(.)');
axis([axisMin(1) axisMax(1) axisMin(2) axisMax(2) 0 maxP * 1.05]);
grid on;
I must admit, I took this on as a challenge because I was looking for different ways to show other datasets. I have normally done something along the lines of the scatterhist 2D graphs shown in other answers, but I've wanted to try my hand at rgl for a while.
I use your function to generate the data
gibbs<-function (n, rho) {
mat <- matrix(ncol = 2, nrow = n)
x <- 0
y <- 0
mat[1, ] <- c(x, y)
for (i in 2:n) {
x <- rnorm(1, rho * y, (1 - rho^2))
y <- rnorm(1, rho * x, (1 - rho^2))
mat[i, ] <- c(x, y)
}
mat
}
bvn <- gibbs(10000, 0.98)
Setup
I use rgl for the hard lifting, but I didn't know how to get the confidence ellipse without going to car. I'm guessing there are other ways to attack this.
library(rgl) # plot3d, quads3d, lines3d, grid3d, par3d, axes3d, box3d, mtext3d
library(car) # dataEllipse
Process the data
Getting the histogram data without plotting it, I then extract the densities and normalize them into probabilities. The *max variables are to simplify future plotting.
hx <- hist(bvn[,2], plot=FALSE)
hxs <- hx$density / sum(hx$density)
hy <- hist(bvn[,1], plot=FALSE)
hys <- hy$density / sum(hy$density)
## [xy]max: so that there's no overlap in the adjoining corner
xmax <- tail(hx$breaks, n=1) + diff(tail(hx$breaks, n=2))
ymax <- tail(hy$breaks, n=1) + diff(tail(hy$breaks, n=2))
zmax <- max(hxs, hys)
Basic scatterplot on the floor
The scale should be set to whatever is appropriate based on the distributions. Admittedly, the X and Y labels aren't placed beautifully, but that shouldn't be too hard to reposition based on the data.
## the base scatterplot
plot3d(bvn[,2], bvn[,1], 0, zlim=c(0, zmax), pch='.',
xlab='X', ylab='Y', zlab='', axes=FALSE)
par3d(scale=c(1,1,3))
Histograms on the back walls
I couldn't figure out how to get them automatically plotted on a plane in the overall 3D render, so I had to make each rect manually.
## manually create each histogram
for (ii in seq_along(hx$counts)) {
quads3d(hx$breaks[ii]*c(.9,.9,.1,.1) + hx$breaks[ii+1]*c(.1,.1,.9,.9),
rep(ymax, 4),
hxs[ii]*c(0,1,1,0), color='gray80')
}
for (ii in seq_along(hy$counts)) {
quads3d(rep(xmax, 4),
hy$breaks[ii]*c(.9,.9,.1,.1) + hy$breaks[ii+1]*c(.1,.1,.9,.9),
hys[ii]*c(0,1,1,0), color='gray80')
}
Summary Lines
## I use these to ensure the lines are plotted "in front of" the
## respective dot/hist
bb <- par3d('bbox')
inset <- 0.02 # percent off of the floor/wall for lines
x1 <- bb[1] + (1-inset)*diff(bb[1:2])
y1 <- bb[3] + (1-inset)*diff(bb[3:4])
z1 <- bb[5] + inset*diff(bb[5:6])
## even with draw=FALSE, dataEllipse still pops up a dev, so I create
## a dummy dev and destroy it ... better way to do this?
dev.new()
de <- dataEllipse(bvn[,1], bvn[,2], draw=FALSE, levels=0.95)
dev.off()
## the ellipse
lines3d(de[,2], de[,1], z1, color='green', lwd=3)
## the two density curves, probability-style
denx <- density(bvn[,2])
lines3d(denx$x, rep(y1, length(denx$x)), denx$y / sum(hx$density), col='red', lwd=3)
deny <- density(bvn[,1])
lines3d(rep(x1, length(deny$x)), deny$x, deny$y / sum(hy$density), col='blue', lwd=3)
Beautifications
grid3d(c('x+', 'y+', 'z-'), n=10)
box3d()
axes3d(edges=c('x-', 'y-', 'z+'))
outset <- 1.2 # place text outside of bbox *this* percentage
mtext3d('P(X)', edge='x+', pos=c(0, ymax, outset * zmax))
mtext3d('P(Y)', edge='y+', pos=c(xmax, 0, outset * zmax))
Final Product
One bonus of using rgl is that you can spin it around with your mouse and find the best perspective. Lacking making an animation for this SO page, doing all of the above should allow you the play-time. (If you spin it, you'll be able to see that the lines are slightly in front of the histograms and slightly above the scatterplot; otherwise I found intersections, so it looked noncontinuous at places.)
In the end, I find this a bit distracting (the 2D variants sufficed): showing the z-axis implies that there is a third dimension to the data; Tufte specifically discourages this behavior (Tufte, "Envisioning Information," 1990). However, with higher dimensionality, this technique of using RGL will allow significant perspective on patterns.
(For the record, Win7 x64, tested with R-3.0.3 in 32-bit and 64-bit, rgl v0.93.996, car v2.0-19.)
Create the dataframe with bvn <- as.data.frame(gibbs(10000,0.98)). Several 2d solutions in R:
1: A quick & dirty solution with the psych package:
library(psych)
scatter.hist(x=bvn$V1, y=bvn$V2, density=TRUE, ellipse=TRUE)
which results in:
2: A nice & pretty solution with ggplot2:
library(ggplot2)
library(gridExtra)
library(devtools)
source_url("https://raw.github.com/low-decarie/FAAV/master/r/stat-ellipse.R") # needed to create the 95% confidence ellipse
htop <- ggplot(data=bvn, aes(x=V1)) +
geom_histogram(aes(y=..density..), fill = "white", color = "black", binwidth = 2) +
stat_density(colour = "blue", geom="line", size = 1.5, position="identity", show_guide=FALSE) +
scale_x_continuous("V1", limits = c(-40,40), breaks = c(-40,-20,0,20,40)) +
scale_y_continuous("Count", breaks=c(0.0,0.01,0.02,0.03,0.04), labels=c(0,100,200,300,400)) +
theme_bw() + theme(axis.title.x = element_blank())
blank <- ggplot() + geom_point(aes(1,1), colour="white") +
theme(axis.ticks=element_blank(), panel.background=element_blank(), panel.grid=element_blank(),
axis.text.x=element_blank(), axis.text.y=element_blank(), axis.title.x=element_blank(), axis.title.y=element_blank())
scatter <- ggplot(data=bvn, aes(x=V1, y=V2)) +
geom_point(size = 0.6) + stat_ellipse(level = 0.95, size = 1, color="green") +
scale_x_continuous("label V1", limits = c(-40,40), breaks = c(-40,-20,0,20,40)) +
scale_y_continuous("label V2", limits = c(-20,20), breaks = c(-20,-10,0,10,20)) +
theme_bw()
hright <- ggplot(data=bvn, aes(x=V2)) +
geom_histogram(aes(y=..density..), fill = "white", color = "black", binwidth = 1) +
stat_density(colour = "red", geom="line", size = 1, position="identity", show_guide=FALSE) +
scale_x_continuous("V2", limits = c(-20,20), breaks = c(-20,-10,0,10,20)) +
scale_y_continuous("Count", breaks=c(0.0,0.02,0.04,0.06,0.08), labels=c(0,200,400,600,800)) +
coord_flip() + theme_bw() + theme(axis.title.y = element_blank())
grid.arrange(htop, blank, scatter, hright, ncol=2, nrow=2, widths=c(4, 1), heights=c(1, 4))
which results in:
3: A compact solution with ggplot2:
library(ggplot2)
library(devtools)
source_url("https://raw.github.com/low-decarie/FAAV/master/r/stat-ellipse.R") # needed to create the 95% confidence ellipse
ggplot(data=bvn, aes(x=V1, y=V2)) +
geom_point(size = 0.6) +
geom_rug(sides="t", size=0.05, col=rgb(.8,0,0,alpha=.3)) +
geom_rug(sides="r", size=0.05, col=rgb(0,0,.8,alpha=.3)) +
stat_ellipse(level = 0.95, size = 1, color="green") +
scale_x_continuous("label V1", limits = c(-40,40), breaks = c(-40,-20,0,20,40)) +
scale_y_continuous("label V2", limits = c(-20,20), breaks = c(-20,-10,0,10,20)) +
theme_bw()
which results in:
Matlab's implementation is called scatterhist and requires the Statistics Toolbox. Unfortunately it is not 3D, it is an extended 2D plot.
% some example data
x = randn(1000,1);
y = randn(1000,1);
h = scatterhist(x,y,'Location','SouthEast',...
'Direction','out',...
'Color','k',...
'Marker','o',...
'MarkerSize',4);
legend('data')
legend boxoff
grid on
It also allows grouping of datasets:
load fisheriris.mat;
x = meas(:,1); %// x-data
y = meas(:,2); %// y-data
gnames = species; %// assigning of names to certain elements of x and y
scatterhist(x,y,'Group',gnames,'Location','SouthEast',...
'Direction','out',...
'Color','kbr',...
'LineStyle',{'-','-.',':'},...
'LineWidth',[2,2,2],...
'Marker','+od',...
'MarkerSize',[4,5,6]);
R Implementation
Load library "car". We use only dataEllipse function to draw ellipse based on the percent of data (0.95 means 95% data falls within the ellipse).
library("car")
gibbs<-function (n, rho)
{
mat <- matrix(ncol = 2, nrow = n)
x <- 0
y <- 0
mat[1, ] <- c(x, y)
for (i in 2:n) {
x <- rnorm(1, rho * y, (1 - rho^2))
y <- rnorm(1, rho * x,(1 - rho^2))
mat[i, ] <- c(x, y)
}
mat
}
bvn<-gibbs(10000,0.98)
Open a PDF Device:
OUTFILE <- "bivar_dist.pdf"
pdf(OUTFILE)
Set up the layout first
layout(matrix(c(2,0,1,3),2,2,byrow=TRUE), widths=c(3,1), heights=c(1,3), TRUE)
Make Scatterplot
par(mar=c(5.1,4.1,0.1,0))
The commented lines can be used to plot a scatter diagram without "car" package from where we use dataEllipse function
# plot(bvn[,2], bvn[,1],
# pch=".",cex = 1, col=1:length(bvn[,2]),
# xlim=c(-0.6, 0.6),
# ylim=c(-0.6,0.6),
# xlab="X",
# ylab="Y")
#
# grid(NULL, NULL, lwd = 2)
dataEllipse(bvn[,2], bvn[,1],
levels = c(0.95),
pch=".",
col=1:length(bvn[,2]),
xlim=c(-0.6, 0.6),
ylim=c(-0.6,0.6),
xlab="X",
ylab="Y",
center.cex = 1
)
Plot histogram of X variable in the top row
par(mar=c(0,4.1,3,0))
hist(bvn[,2],
ann=FALSE,axes=FALSE,
col="light blue",border="black",
)
title(main = "Bivariate Normal Distribution")
Plot histogram of Y variable to the right of the scatterplot
yhist <- hist(bvn[,1],
plot=FALSE
)
par(mar=c(5.1,0,0.1,1))
barplot(yhist$density,
horiz=TRUE,
space=0,
axes=FALSE,
col="light blue",
border="black"
)
dev.off(which = dev.cur())
dataEllipse(bvn[,2], bvn[,1],
levels = c(0.5, 0.95),
pch=".",
col= 1:length(bvn[,2]),
xlim=c(-0.6, 0.6),
ylim=c(-0.6,0.6),
xlab="X",
ylab="Y",
center.cex = 1
)
I took #jaap's code above and turned it into a slightly more generalized function. The code can be sourced here. Note: I am not adding anything new to #jaap's code, just a few minor changes and wrapped it in a function. Hopefully it is helpful.
density.hist <- function(df, x=NULL, y=NULL) {
require(ggplot2)
require(gridExtra)
require(devtools)
htop <- ggplot(data=df, aes_string(x=x)) +
geom_histogram(aes(y=..density..), fill = "white", color = "black", bins=100) +
stat_density(colour = "blue", geom="line", size = 1, position="identity", show.legend=FALSE) +
theme_bw() + theme(axis.title.x = element_blank())
blank <- ggplot() + geom_point(aes(1,1), colour="white") +
theme(axis.ticks=element_blank(), panel.background=element_blank(), panel.grid=element_blank(),
axis.text.x=element_blank(), axis.text.y=element_blank(), axis.title.x=element_blank(),
axis.title.y=element_blank())
scatter <- ggplot(data=df, aes_string(x=x, y=y)) +
geom_point(size = 0.6) + stat_ellipse(type = "norm", linetype = 2, color="green",size=1) +
stat_ellipse(type = "t",color="green",size=1) +
theme_bw() + labs(x=x, y=y)
hright <- ggplot(data=df, aes_string(x=x)) +
geom_histogram(aes(y=..density..), fill = "white", color = "black", bins=100) +
stat_density(colour = "red", geom="line", size = 1, position="identity", show.legend=FALSE) +
coord_flip() + theme_bw() + theme(axis.title.y = element_blank())
grid.arrange(htop, blank, scatter, hright, ncol=2, nrow=2, widths=c(4, 1), heights=c(1, 4))
}

How to convert lattice-based graphics to ggplot2?

The attached script performs equivalence tests on sample variables x, y and z.
equivalence.xyplot() is really handy, although the base lattice graphics are a pain to work with. How can I use ggplot2 to plot these data rather than the base lattice graphics?
Edit:
For example, using ggplot(plot1) returns the following error:
Error: ggplot2 doesn't know how to deal with data of class trellis
I'm not sure where to begin converting the trellis class of data to ggplot2 format. Any specific advice on converting trellis-based graphics to ggplot2 would be appreciated.
require(equivalence)
require(gridExtra)
require(lattice)
x = c(1,4,3,5,3,7,8,6,7,8,9)
y = c(1,5,4,5,3,6,7,6,7,2,8)
z = c(2,4,3,5,4,7,8,5,6,6,9)
mydata = data.frame(x,y,z)
plot1 = equivalence.xyplot(mydata$x~mydata$y,alpha=0.05, b0.ii=0.25, b1.ii=0.25)
plot2 = equivalence.xyplot(mydata$x~mydata$z,alpha=0.05, b0.ii=0.25, b1.ii=0.25)
plot3 = equivalence.xyplot(mydata$y~mydata$z,alpha=0.05, b0.ii=0.25, b1.ii=0.25)
# Combine plots into one figure
grid.arrange(plot1, plot2, plot3, ncol=2)
This is not a final solution but a good start . I just go through lattice panel function and replace :
xyplot ----------> geom_point
panel.abline ----------> geom_abline
grid.polygon ----------> geom_polygon
panel.loess ----------> stat_smooth
panel.arrows ----------> geom_errobar
For each geom, I create a data.frame which components are the data passed to the lattice function. For example :
panel.arrows(x.bar, ybar.hat$fit + ybar.hat$se.fit *
t.quant, x.bar, ybar.hat$fit - ybar.hat$se.fit *
t.quant, col = "darkgrey", length = 0.05, angle = 90,
code = 3)
becomes :
dat.arrow <- data.frame(x=x.bar, ymax= ybar.hat$fit + ybar.hat$se.fit *
t.quant, ymin= ybar.hat$fit - ybar.hat$se.fit *
t.quant)
pl <- pl + geom_errorbar(data=dat.arrow, aes(x,ymin=ymin,ymax=ymax),
col = "darkgrey", width = 0.10)
The final result is a new function equivalence.ggplot that take the same parameters as equivalence.xyplot:
equivalence.ggplot <- function(x,y, alpha, b0.ii, b1.ii,
b0.absolute = FALSE,add.smooth=FALSE){
x.bar <- mean(x, na.rm = TRUE)
min.x <- min(x, na.rm = TRUE)
max.x <- max(x, na.rm = TRUE)
the.model <- lm(y ~ x)
if (b0.absolute)
y.poly <- x.bar + b0.ii * c(-1, 1, 1, -1)
else y.poly <- x.bar * (1 + b0.ii * c(-1, 1, 1, -1))
dat.poly <- data.frame(x = c(min.x, min.x, max.x, max.x),
y = y.poly)
dat <- data.frame(x,y)
p <- function(dat,dat.poly){
h <- ggplot(dat) +
geom_polygon(data=dat.poly,aes(x,y),col = "light gray", fill = gray(0.9)) +
geom_point(aes(x,y)) +
stat_smooth(data=dat,col='black',
aes(x=x,y=y),method="lm", se=FALSE,
fullrange =TRUE)+
theme_bw()
if (add.smooth)
h <- h + geom_smooth(aes(x,y),method='loess')
h
}
pl <- p(dat,dat.poly)
n <- sum(complete.cases(cbind(x, y)))
ybar.hat <- predict(the.model, newdata = data.frame(x = x.bar),
se = TRUE)
t.quant <- qt(1 - alpha/2, df.residual(the.model))
dat.arrow <- data.frame(x=x.bar, ymax= ybar.hat$fit + ybar.hat$se.fit *
t.quant, ymin= ybar.hat$fit - ybar.hat$se.fit *
t.quant)
pl <- pl +
geom_errorbar(data=dat.arrow, aes(x,ymin=ymin,ymax=ymax),
col = "darkgrey", width = 0.10)
pl
se.slope <- coef(summary(the.model))[2, 2]
dat.arrow1 <- data.frame(x=x.bar, ymax= ybar.hat$fit + se.slope * t.quant *
x.bar, ymin=ybar.hat$fit - se.slope * t.quant *
x.bar)
pl <- pl +
geom_errorbar(data=dat.arrow1, aes(x,ymin=ymin,ymax=ymax),
col = "black", width = 0.10)
addLines <- function(pl,the.model){
pl <- pl + geom_abline(intercept = coef(summary(the.model))[1, 1], slope = 1 -
b1.ii, col = "darkgrey", lty = 2) +
geom_abline(intercept = coef(summary(the.model))[1, 1], slope = 1 +
b1.ii, col = "darkgrey", lty = 2)
}
pl <- addLines(pl,the.model)
pl
}
Comparing the lattice and the ggplot2 result :
library(gridExtra)
p.gg <- equivalence.ggplot(mydata$x,mydata$y,alpha=0.05, b0.ii=0.25, b1.ii=0.25)
p.lat <- equivalence.xyplot(mydata$y~mydata$x,alpha=0.05, b0.ii=0.25, b1.ii=0.25)
grid.arrange(p.gg,p.lat)

Resources