Can you please tell me that is there any other way to plot data with duplication in more better way than this one ? The duplication is not clear in this plot.
library(ggplot2)
p <- ggplot(output, aes(output$Longitudes, output$Latitudes))
p + geom_text(aes(x = jitter(output$Longitudes), y =
jitter(output$Latitudes)),check_overlap = FALSE, size =5)
p + geom_point(position = "jitter")
The purpose of showing the duplication on a particular point is to show the occurrences.
To visualize duplicate points you can:
Add jitter (eg., use geom_jitter)
Lower alpha (eg., alpha = 0.1)
Decrease size of points (eg., size = 1)
Change shape of points (eg., shape = 21)
Code:
# Generate data
df <- reshape2::melt(data.frame(A = rep(0, 1e3), B = rep(1, 1e3)))
# Plot data
library(ggplot2)
ggplot(df, aes(variable, value)) +
geom_jitter(alpha = 0.5, size = 2, shape = 21) +
theme_classic()
Plot:
This is actually one of my biggest gripes about ggplot.
So much so that I wrote my own solution (other than jitter / alpha).
Solution
Essentially it is a new "position" called position_bunch, which distributes the points at each unique (X,Y) according to a pattern. It can be used like:
g = ggplot(...) +
geom_point(
position = position_bunch(
shape = 'hex',
width = .7,
sort = TRUE,
)
)
yielding stuff like:
Implementation
position_bunch = function(shape='hex',width=0.5,sort=1) {
if (shape == 'hex') {
n.layer.fun = n.layer.hex
delta.fun = delta.hex
}
if (shape == 'square') {
n.layer.fun = n.layer.square
delta.fun = delta.square
}
if (shape == 'spiral') {
n.layer.fun = n.layer.spiral
delta.fun = delta.spiral
}
if (sort) {
sort.fun = sorting.fun
} else {
sort.fun = identity
}
cols = c('x','y')
return(ggproto('PositionBunch',Position,
required_aes = cols,
compute_layer = function(self,data,params,layout) {
select = function(x,y) {
return((data$x==x) & (data$y==y))
}
u = unique(data[,cols])
n = mapply(function(x,y) {sum(select(x,y))},u$x,u$y)
l.max = n.layer.fun(max(n))
delta = sort.fun(delta.fun(l.max),1)
for (i in 1:nrow(u)) {
rows = select(u$x[i],u$y[i])
delta.i = sort.fun(delta[1:sum(rows),cols],sort) * (width/2/l.max)
data[rows,cols] = data[rows,cols] + delta.i
}
return(data)
})
)
}
sorting.fun = function(delta,dir) {
distance = apply(delta,1,function(d){sum(d^2)})
return(delta[order(distance,decreasing=(dir==-1)),])
}
# -----------------------------------------------------------------------------
# hex
n.layer.hex = function(n) {
return(floor(1+(-3+sqrt(9+12*(n-1)))/6))
}
delta.hex = function(layers) {
yv = sqrt(3)/2; yh = 0; xv = 0.5; xh = 1;
rep.steps = function(steps,layer,dim) {
steps = rep(steps,each=layer)
steps[1] = steps[1] + xv*(dim=='x') - yv*(dim=='y')
return(steps)
}
dx = 0; dy = 0;
for (layer in 1:layers) {
dx = c(dx,rep.steps(c(+xv,-xv,-xh,-xv,+xv,+xh),layer,dim='x'))
dy = c(dy,rep.steps(c(+yv,+yv, yh,-yv,-yv, yh),layer,dim='y'))
}
return(data.frame(x=cumsum(dx),y=cumsum(dy)))
}
# -----------------------------------------------------------------------------
# square
n.layer.square = function(n) {
return(floor(1+(-2+sqrt(4+8*(n-1)))/4))
}
delta.square = function(layers) {
yv = 1; yh = 0; xv = 0; xh = 1;
rep.steps = function(steps,layer,dim) {
steps = rep(steps,each=2*layer)
steps[1] = steps[1] + xh*(dim=='x') - yv*(dim=='y')
return(steps)
}
dx = 0; dy = 0;
for (layer in 1:layers) {
dx = c(dx,rep.steps(c( xv,-xh, xv,+xh),layer,dim='x'))
dy = c(dy,rep.steps(c(+yv, yh,-yv, yh),layer,dim='y'))
}
return(data.frame(x=cumsum(dx),y=cumsum(dy)))
}
# -----------------------------------------------------------------------------
# spiral
f.spiral = pi*(1+sqrt(5))
n.layer.spiral = function(n) {
return(ceiling(n/f.spiral))
}
delta.spiral = function(layers){
i = 0:ceiling(layers*f.spiral)
r = layers/2*sqrt(i/layers)
t = pi*(1+sqrt(5))*i
dx = r * cos(t)
dy = r * sin(t)
return(data.frame(x=cumsum(dx),y=cumsum(dy)))
}
Test Code
library('ggplot2')
library('gridExtra')
library('viridis')
source('ggpositions.r')
set.seed(1234)
g.list = list()
for (N in c(10,100,500)){
data = data.frame(
x = factor(floor(runif(N,1,3+1)),labels=c('A','B','C')),
y = factor(floor(runif(N,1,3+1))),
z = rev(sort(runif(N,1,N)))
)
for (shape in c('hex','square','spiral')){
g = ggplot(data,aes(x=x,y=y,color=z)) +
geom_point(position=position_bunch(
shape = shape,
width = .7,
),size=sqrt(2)/log10(N)) +
scale_color_viridis() +
xlab(NULL) + ylab(NULL) +
theme(legend.position='none')
g.list[[length(g.list)+1]] = g
}
}
G = do.call(arrangeGrob,g.list)
ggsave('test.png',G)
Notes
It's a work in progress -- feedback welcome!
I've only tested it with geom_point using aes(x= ,y= ) so far
Point sizes are hard to scale reliably, so you may have to tinker manually
After cleaning & testing, I plan to upload to the ggplot2 extensions library
Related
I have the following task statement:
In this task we want to simulate random variables with density
To do this, write a function r_density(n) that simulates n of such random variables.
Then use this function to simulate N = 1000 of such random variables. Using geom_density() you can now estimate the density from the simulated random variables. We can compare this estimate with the real density. To do this, create a graph that looks like this:
Problem is, however, that I don't understand why my output looks like this:
Why is the raked density plotted in such a weird way? Can someone explain to me why it looks like that and how to get the estimated density from the expected image?
This is the corresponding code I wrote for the above plot:
library(tidyverse)
N <- 1000
r_density <- function(n){
exp(-abs(n))/2
}
x <- runif(N)
tb <- tibble(
x = x,
density_fkt = r_density(x)
)
ggplot() +
geom_density(
data = tb,
mapping = aes(
x = density_fkt,
y = ..scaled..
)
) +
geom_function(
fun = r_density,
xlim = c(-6,6),
color = "red",
size = 1
) +
theme_minimal() +
labs(
x = "x",
y = "Dichtefunktion f(x)",
title = "Geschätzte (schwarz) vs echte (rot) Dichte"
)
You may use inverse transform sampling or rejection sampling. I choose rejection sampling.
library(tidyverse)
N <- 1000
r_density <- function(n){
exp(-abs(n))/2
}
x = c()
while (length(x) < N) {
y = rnorm(1)
while (y > 6 | y < -6) {
y = rnorm(1)
}
u = runif(1)
if (u < r_density(y)/(dnorm(y) * 3)) {
x=append(x, y)
}
}
tb <- tibble(
x = x,
density_fkt = r_density(x)
)
ggplot() +
geom_density(
data = tb,
mapping = aes(
x = x,
y = ..density..
)
) +
geom_function(
fun = r_density,
xlim = c(-6,6),
color = "red",
size = 1
) +
theme_minimal() +
labs(
x = "x",
y = "Dichtefunktion f(x)",
title = "Geschätzte (schwarz) vs echte (rot) Dichte"
)
Here's the inverse transform sampling method (this involves some difficult integration, so perhaps not what your teacher intended)
r_density <- function(n) {
cdf <- function(x) {
1/4 * exp(-x) * (-1 + 2 * exp(x) + exp(2*x) - (-1 + exp(x))^2 * sign(x))
}
sapply(runif(n), function(i) {
uniroot(function(x) cdf(x) - i, c(-30, 20))$root
})
}
Plotting gives:
ggplot() +
geom_density(aes(r_density(1000))) +
geom_function(
fun = function(x) exp(-abs(x))/2,
xlim = c(-6,6),
color = "red",
size = 1
) +
theme_minimal() +
labs(
x = "x",
y = "Dichtefunktion f(x)",
title = "Geschätzte (schwarz) vs echte (rot) Dichte"
)
I have been attempting to solve this issue for a considerable amount of time with no success. I am creating multiple partial dependence plots (PDPs) and utilising a package called zenplots to lay them out. However, the issue I am having is I cannot figure out a way to have a common legend for the multiple plots. I have tried placing them on a grid and plotting and tried changing the positioning of the grobs... but I cant figure it out. For example:
In the above plot, all PDPs are on the same scale and I would like a single legend. Currently, when I produce the image, it plots a legend for each individual plot. Whereas, what I want is something like the image below (which I made in photoshop):
The code Im providing to produce the plots is somewhat long ( which I hope won't deter people)... but essentially it's only the ggplot part of the code that I need to manipulate. That is, Im creating the actual ggplot on lines 103-105 and more generally between lines 103-125, where I use ggtable to build the plots. For example, changing the color argument on line 115 to: guides(fill = FALSE, color = "colour bar") will create the legend for each plot... setting color = FALSE will remove the legends.
below is the code used to make the plots and it's application on the air quality data:
library(randomForest)
library(ggplot2)
library(dplyr)
pdpLayout <- function(data,
fit,
response,
pal = rev(RColorBrewer::brewer.pal(11, "RdYlBu")),
gridSize = 10,
nmax = 500,
class = 1,
rug = TRUE,
...) {
data <- na.omit(data)
# if (is.numeric(nmax) && nmax < nrow(data)) {
# data <- data[sample(nrow(data), nmax), , drop = FALSE]
# }
gridSize <- min(gridSize, nmax)
predData <- predict(fit, data)
vars <- names(data)
vars <- vars[-match(response, vars)]
datap <- data[,vars]
zpath <- 1:length(vars)
zdata <- datap
zpairs <- t(sapply(1:(length(zpath)-1), function(i){
z <- zpath[i:(i+1)]
if (i %% 2 == 0) rev(z) else z
}))
zpairs <- cbind(vars[zpairs[, 1]], vars[zpairs[, 2]])
# loop through vars and create a list of pdps for each pair
pdplist <- vector("list", nrow(zpairs))
for (i in 1:nrow(zpairs)) {
ind <- zpairs[i, ]
if (!is.na(ind[1])) {
px <- pdp_data(data, ind, gridsize = gridSize)
px$.pid <- i
pdplist[[i]] <- px
} else {
pdplist[[i]] <- NULL
}
}
pdplist <- bind_rows(pdplist)
pdplist$fit <- predict(fit, pdplist)
pdplist <- split(pdplist, pdplist$.pid)
pdplist0 <- vector("list", nrow(zpairs))
j <- 1
for (i in 1:nrow(zpairs)) {
ind <- zpairs[i, ]
if (!is.na(ind[1])) {
pdplist0[[i]] <- pdplist[[j]] %>%
group_by(.data[[ind[1]]], .data[[ind[2]]]) %>%
summarise(fit = mean(fit))
j <- j + 1
} else {
pdplist0[[i]] <- NULL
}
}
pdplist <- pdplist0
pdplist0 <- NULL
names(pdplist) <- paste(zpairs[, 2], zpairs[, 1], sep = "pp")
message("Finished ice/pdp")
# Set limits for pairs
pdplist0 <- pdplist[!sapply(pdplist, is.null)]
r <- range(sapply(pdplist0, function(x) range(x$fit)))
limits <- range(labeling::rpretty(r[1], r[2]))
# Zenplot graphing function
data$pred <- predData
z2index <- 0
pdpnn <- function(zargs) {
z2index <<- z2index + 1
vars <- zpairs[z2index, ]
pdp <- pdplist[[z2index]]
if (!is.null(pdp)) {
if (is.factor(pdp[[vars[1]]]) + is.factor(pdp[[vars[2]]]) == 1) {
if (is.factor(pdp[[vars[1]]])) vars <- rev(vars)
p <- ggplot(data = pdp, aes(x = .data[[vars[1]]], y = fit, color = .data[[vars[2]]])) +
geom_line() +
geom_rug(data = data, sides = "b", aes(y = .data[["pred"]]))
} else {
if (is.factor(pdp[[vars[1]]])) posx <- "jitter" else posx <- "identity"
if (is.factor(pdp[[vars[2]]])) posy <- "jitter" else posy <- "identity"
p <- ggplot(data = pdp, aes(x = .data[[vars[1]]], y = .data[[vars[2]]])) +
geom_tile(aes(fill = fit)) +
scale_fill_gradientn(name = "y-hat", colors = pal, limits = limits, oob = scales::squish)
if (rug) {
p <- p +
geom_rug(data = data, sides = "b", position = posx, aes(color = .data[["pred"]])) +
geom_rug(data = data, sides = "l", position = posy, aes(color = .data[["pred"]])) +
scale_color_gradientn(name = "y-hat", colors = pal, limits = limits, oob = scales::squish)
}
}
p <- p +
guides(fill = FALSE, color = FALSE) +
theme_bw() +
theme(
axis.line = element_blank(),
axis.ticks = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.border = element_rect(colour = "gray", fill = NA, size = 1.5)
)
} else {
p <- ggplot() +
theme(panel.background = element_blank())
}
ggplot_gtable(ggplot_build(p))
}
suppressMessages({
zenplots::zenplot(zdata,
pkg = "grid", labs = list(group = NULL),
plot2d = pdpnn, ...
)
})
}
pdp_data <- function(d, var, gridsize = 30) {
if (length(var) == 1) {
pdpvar <- d[[var]]
if (is.factor(pdpvar)) {
gridvals <- levels(pdpvar)
} else {
gridvals <- seq(min(pdpvar, na.rm = T), max(pdpvar, na.rm = T), length.out = gridsize)
}
dnew <- do.call(rbind, lapply(gridvals, function(i) {
d1 <- d
d1[[var]] <- i
d1
}))
if (is.factor(pdpvar)) dnew[[var]] <- factor(dnew[[var]], levels = levels(pdpvar), ordered = is.ordered(pdpvar))
}
else {
pdpvar1 <- d[[var[1]]]
pdpvar2 <- d[[var[2]]]
if (is.factor(pdpvar1)) {
gridvals1 <- levels(pdpvar1)
} else {
gridvals1 <- seq(min(pdpvar1, na.rm = T), max(pdpvar1, na.rm = T), length.out = gridsize)
}
if (is.factor(pdpvar2)) {
gridvals2 <- levels(pdpvar2)
} else {
gridvals2 <- seq(min(pdpvar2, na.rm = T), max(pdpvar2, na.rm = T), length.out = gridsize)
}
gridvals <- expand.grid(gridvals1, gridvals2)
dnew <- do.call(rbind, lapply(1:nrow(gridvals), function(i) {
d1 <- d
d1[[var[1]]] <- gridvals[i, 1]
d1[[var[2]]] <- gridvals[i, 2]
d1
}))
if (is.factor(pdpvar1)) dnew[[var[1]]] <- factor(dnew[[var[1]]], levels = levels(pdpvar1), ordered = is.ordered(pdpvar1))
if (is.factor(pdpvar2)) dnew[[var[2]]] <- factor(dnew[[var[2]]], levels = levels(pdpvar2), ordered = is.ordered(pdpvar2))
}
dnew$.id <- 1:nrow(d)
rownames(dnew) <- NULL
dnew
}
Now use some data to create the plots:
aq <- na.omit(airquality)
rf <- randomForest(Ozone~., data = aq)
pdpLayout(aq, rf, "Ozone")
Any help or suggestions is greatly appreciated.
I'm making a plot of 2D random walk using R and ggplot2 library. It works, but I would like to show where the starting point and ending point are in my random walk plot.
I tried to create another geom_point and append it to the existing ggplot but it did not work. Any suggestions? Thanks!
x = 0
y = 0
vec1 <- vector()
xcor <- vector()
ycor <- vector()
number = 1000
list_num = c(1,2,3,4)
move = sample(list_num, size = number, replace = TRUE)
for (i in 1:number) {
if (move[i] == 1) {
x = x + 1
}
else if (move[i] == 2) {
x = x - 1
}
else if (move[i] == 3) {
y = y + 1
}
else if (move[i] == 4) {
y = y - 1
}
vec1 <- c(vec1, i)
xcor <- c(xcor, x)
ycor <- c(ycor, y)
}
df_randomwalk = data.frame(vec1, xcor, ycor)
ggplot(df_randomwalk, aes(x = xcor, y = ycor)) +
geom_point(alpha = 0.1, size = 0.3) + geom_path() +
theme_minimal()
This should do it.
start <- df_randomwalk %>% filter(vec1 == min(df_randomwalk$vec1))
end <- df_randomwalk %>% filter(vec1 == max(df_randomwalk$vec1))
ggplot(df_randomwalk, aes(x = xcor, y = ycor)) +
geom_point(alpha = 0.1, size = 0.3) + geom_path() +
geom_point(alpha = 0.1, size = 0.3) +
theme_minimal() +
geom_point(start, mapping=aes(x=xcor,y=ycor), colour="red", size=1) +
geom_point(end, mapping=aes(x=xcor,y=ycor), colour="blue", size=1)
I have a fairly large dataframe (df) with pathing information in the form of continuous x,y coordinates:
df$x
df$y
With these data, I would like to:
1. Calculate a set of continuous vectors
2. Determine the angle between each of these vectors (in degrees)
3. Count the number of angles in the dataframe that meet a certain threshold (i.e. <90°)
Thank you!
Please see the post here for reference
require("ggplot2")
Hypocycloid <- function(num_points) {
r = 1
k = 3
theta = seq(from = 0, to = 2*pi, length.out = num_points)
x = r*(k - 1)*cos(theta) + r*cos((k - 1)*theta)
y = r*(k - 1)*sin(theta) - r*sin((k - 1)*theta)
df = data.frame(x = x, y = y)
gg1 = ggplot(df,
aes(x = x, y = y),
size = 1) +
geom_path()
print(gg1)
return(df)
}
ComputeUnitVectors <- function(points_df) {
npoints = nrow(points_df)
vx = points_df$x[2:npoints] - points_df$x[1:(npoints-1)]
vy = points_df$y[2:npoints] - points_df$y[1:(npoints-1)]
length = sqrt(vx^2 + vy^2)
return(data.frame(vx = vx/length, vy = vy/length))
}
ComputeAngles <- function(vectors_df) {
Angle <- function(v1, v2) {
return(acos(as.numeric(v1) %*% as.numeric(v2))*180/pi)
}
nvectors = nrow(vectors_df)
v1 = vectors_df[1:(nvectors-1),]
v2 = vectors_df[2:nvectors,]
v_df = cbind(v1, v2)
angle = apply(v_df, 1, function(row) {Angle(row[1:2], row[3:4])})
return(data.frame(angle))
}
points.df = Hypocycloid(20)
vectors.df = ComputeUnitVectors(points.df)
print(vectors.df)
angles.df = ComputeAngles(vectors.df)
print(angles.df)
I'm trying to plot a principal component analysis using prcomp and ggbiplot. I'm getting data values outside of the unit circle, and haven't been able to rescale the data prior to calling prcomp in such a way that I can constrain the data to the unit circle.
data(wine)
require(ggbiplot)
wine.pca=prcomp(wine[,1:3],scale.=TRUE)
ggbiplot(wine.pca,obs.scale = 1,
var.scale=1,groups=wine.class,ellipse=TRUE,circle=TRUE)
I tried scaling by subtracting mean and dividing by standard deviation before calling prcomp:
wine2=wine[,1:3]
mean=apply(wine2,2,mean)
sd=apply(wine2,2,mean)
for(i in 1:ncol(wine2)){
wine2[,i]=(wine2[,i]-mean[i])/sd[i]
}
wine2.pca=prcomp(wine2,scale.=TRUE)
ggbiplot(wine2.pca,obs.scale=1,
var.scale=1,groups=wine.class,ellipse=TRUE,circle=TRUE)
ggbiplot package installed as follows:
require(devtools)
install_github('ggbiplot','vqv')
Output of either code chunk:
Per #Brian Hanson's comment below, I'm adding an additional image reflecting the output I'm trying to get.
I edited the code for the plot function and was able to get the functionality I wanted.
ggbiplot2=function(pcobj, choices = 1:2, scale = 1, pc.biplot = TRUE,
obs.scale = 1 - scale, var.scale = scale,
groups = NULL, ellipse = FALSE, ellipse.prob = 0.68,
labels = NULL, labels.size = 3, alpha = 1,
var.axes = TRUE,
circle = FALSE, circle.prob = 0.69,
varname.size = 3, varname.adjust = 1.5,
varname.abbrev = FALSE, ...)
{
library(ggplot2)
library(plyr)
library(scales)
library(grid)
stopifnot(length(choices) == 2)
# Recover the SVD
if(inherits(pcobj, 'prcomp')){
nobs.factor <- sqrt(nrow(pcobj$x) - 1)
d <- pcobj$sdev
u <- sweep(pcobj$x, 2, 1 / (d * nobs.factor), FUN = '*')
v <- pcobj$rotation
} else if(inherits(pcobj, 'princomp')) {
nobs.factor <- sqrt(pcobj$n.obs)
d <- pcobj$sdev
u <- sweep(pcobj$scores, 2, 1 / (d * nobs.factor), FUN = '*')
v <- pcobj$loadings
} else if(inherits(pcobj, 'PCA')) {
nobs.factor <- sqrt(nrow(pcobj$call$X))
d <- unlist(sqrt(pcobj$eig)[1])
u <- sweep(pcobj$ind$coord, 2, 1 / (d * nobs.factor), FUN = '*')
v <- sweep(pcobj$var$coord,2,sqrt(pcobj$eig[1:ncol(pcobj$var$coord),1]),FUN="/")
} else {
stop('Expected a object of class prcomp, princomp or PCA')
}
# Scores
df.u <- as.data.frame(sweep(u[,choices], 2, d[choices]^obs.scale, FUN='*'))
# Directions
v <- sweep(v, 2, d^var.scale, FUN='*')
df.v <- as.data.frame(v[, choices])
names(df.u) <- c('xvar', 'yvar')
names(df.v) <- names(df.u)
if(pc.biplot) {
df.u <- df.u * nobs.factor
}
# Scale the radius of the correlation circle so that it corresponds to
# a data ellipse for the standardized PC scores
r <- 1
# Scale directions
v.scale <- rowSums(v^2)
df.v <- df.v / sqrt(max(v.scale))
## Scale Scores
r.scale=sqrt(max(df.u[,1]^2+df.u[,2]^2))
df.u=.99*df.u/r.scale
# Change the labels for the axes
if(obs.scale == 0) {
u.axis.labs <- paste('standardized PC', choices, sep='')
} else {
u.axis.labs <- paste('PC', choices, sep='')
}
# Append the proportion of explained variance to the axis labels
u.axis.labs <- paste(u.axis.labs,
sprintf('(%0.1f%% explained var.)',
100 * pcobj$sdev[choices]^2/sum(pcobj$sdev^2)))
# Score Labels
if(!is.null(labels)) {
df.u$labels <- labels
}
# Grouping variable
if(!is.null(groups)) {
df.u$groups <- groups
}
# Variable Names
if(varname.abbrev) {
df.v$varname <- abbreviate(rownames(v))
} else {
df.v$varname <- rownames(v)
}
# Variables for text label placement
df.v$angle <- with(df.v, (180/pi) * atan(yvar / xvar))
df.v$hjust = with(df.v, (1 - varname.adjust * sign(xvar)) / 2)
# Base plot
g <- ggplot(data = df.u, aes(x = xvar, y = yvar)) +
xlab(u.axis.labs[1]) + ylab(u.axis.labs[2]) + coord_equal()
if(var.axes) {
# Draw circle
if(circle)
{
theta <- c(seq(-pi, pi, length = 50), seq(pi, -pi, length = 50))
circle <- data.frame(xvar = r * cos(theta), yvar = r * sin(theta))
g <- g + geom_path(data = circle, color = muted('white'),
size = 1/2, alpha = 1/3)
}
# Draw directions
g <- g +
geom_segment(data = df.v,
aes(x = 0, y = 0, xend = xvar, yend = yvar),
arrow = arrow(length = unit(1/2, 'picas')),
color = muted('red'))
}
# Draw either labels or points
if(!is.null(df.u$labels)) {
if(!is.null(df.u$groups)) {
g <- g + geom_text(aes(label = labels, color = groups),
size = labels.size)
} else {
g <- g + geom_text(aes(label = labels), size = labels.size)
}
} else {
if(!is.null(df.u$groups)) {
g <- g + geom_point(aes(color = groups), alpha = alpha)
} else {
g <- g + geom_point(alpha = alpha)
}
}
# Overlay a concentration ellipse if there are groups
if(!is.null(df.u$groups) && ellipse) {
theta <- c(seq(-pi, pi, length = 50), seq(pi, -pi, length = 50))
circle <- cbind(cos(theta), sin(theta))
ell <- ddply(df.u, 'groups', function(x) {
if(nrow(x) < 2) {
return(NULL)
} else if(nrow(x) == 2) {
sigma <- var(cbind(x$xvar, x$yvar))
} else {
sigma <- diag(c(var(x$xvar), var(x$yvar)))
}
mu <- c(mean(x$xvar), mean(x$yvar))
ed <- sqrt(qchisq(ellipse.prob, df = 2))
data.frame(sweep(circle %*% chol(sigma) * ed, 2, mu, FUN = '+'),
groups = x$groups[1])
})
names(ell)[1:2] <- c('xvar', 'yvar')
g <- g + geom_path(data = ell, aes(color = groups, group = groups))
}
# Label the variable axes
if(var.axes) {
g <- g +
geom_text(data = df.v,
aes(label = varname, x = xvar, y = yvar,
angle = angle, hjust = hjust),
color = 'darkred', size = varname.size)
}
# Change the name of the legend for groups
# if(!is.null(groups)) {
# g <- g + scale_color_brewer(name = deparse(substitute(groups)),
# palette = 'Dark2')
# }
# TODO: Add a second set of axes
return(g)
}