Based on the help below I tried this script for plotting PCA with Convex hulls without success, any idea how can I solve it?
library(ggbiplot)
library(plyr)
data <-read.csv("C:/Users/AAA.csv")
my.pca <- prcomp(data[,1:9] , scale. = TRUE)
find_hull <- function(my.pca) my.pca[chull(my.pca$x[,1], my.pca$x[,2]), ]
hulls <- ddply(my.pca , "Group", find_hull)
ggbiplot(my.pca, obs.scale = 1, var.scale = 1,groups = data$Group) +
scale_color_discrete(name = '') + geom_polygon(data=hulls, alpha=.2) +
theme_bw() + theme(legend.direction = 'horizontal', legend.position = 'top')
Thanks.
The script below plot PCA with ellipses (slightly modified example from https://github.com/vqv/ggbiplot as 'opts' is deprecated)
library(ggbiplot)
data(wine)
wine.pca <- prcomp(wine, scale. = TRUE)
g <- ggbiplot(wine.pca, obs.scale = 1, var.scale = 1,
groups = wine.class, ellipse = TRUE, circle = TRUE)
g <- g + scale_color_discrete(name = '')
g <- g + theme(legend.direction = 'horizontal', legend.position = 'top')
print(g)
Removing the the ellipses is easy but I am trying to to replace them with Convex hulls without any success, any idea how to do it?
Thanks
Yes, we can design a new geom for ggplot, and then use that with ggbiplot. Here's a new geom that will do convex hulls:
library(ggplot2)
StatBag <- ggproto("Statbag", Stat,
compute_group = function(data, scales, prop = 0.5) {
#################################
#################################
# originally from aplpack package, plotting functions removed
plothulls_ <- function(x, y, fraction, n.hull = 1,
col.hull, lty.hull, lwd.hull, density=0, ...){
# function for data peeling:
# x,y : data
# fraction.in.inner.hull : max percentage of points within the hull to be drawn
# n.hull : number of hulls to be plotted (if there is no fractiion argument)
# col.hull, lty.hull, lwd.hull : style of hull line
# plotting bits have been removed, BM 160321
# pw 130524
if(ncol(x) == 2){ y <- x[,2]; x <- x[,1] }
n <- length(x)
if(!missing(fraction)) { # find special hull
n.hull <- 1
if(missing(col.hull)) col.hull <- 1
if(missing(lty.hull)) lty.hull <- 1
if(missing(lwd.hull)) lwd.hull <- 1
x.old <- x; y.old <- y
idx <- chull(x,y); x.hull <- x[idx]; y.hull <- y[idx]
for( i in 1:(length(x)/3)){
x <- x[-idx]; y <- y[-idx]
if( (length(x)/n) < fraction ){
return(cbind(x.hull,y.hull))
}
idx <- chull(x,y); x.hull <- x[idx]; y.hull <- y[idx];
}
}
if(missing(col.hull)) col.hull <- 1:n.hull
if(length(col.hull)) col.hull <- rep(col.hull,n.hull)
if(missing(lty.hull)) lty.hull <- 1:n.hull
if(length(lty.hull)) lty.hull <- rep(lty.hull,n.hull)
if(missing(lwd.hull)) lwd.hull <- 1
if(length(lwd.hull)) lwd.hull <- rep(lwd.hull,n.hull)
result <- NULL
for( i in 1:n.hull){
idx <- chull(x,y); x.hull <- x[idx]; y.hull <- y[idx]
result <- c(result, list( cbind(x.hull,y.hull) ))
x <- x[-idx]; y <- y[-idx]
if(0 == length(x)) return(result)
}
result
} # end of definition of plothulls
#################################
# prepare data to go into function below
the_matrix <- matrix(data = c(data$x, data$y), ncol = 2)
# get data out of function as df with names
setNames(data.frame(plothulls_(the_matrix, fraction = prop)), nm = c("x", "y"))
# how can we get the hull and loop vertices passed on also?
},
required_aes = c("x", "y")
)
#' #inheritParams ggplot2::stat_identity
#' #param prop Proportion of all the points to be included in the bag (default is 0.5)
stat_bag <- function(mapping = NULL, data = NULL, geom = "polygon",
position = "identity", na.rm = FALSE, show.legend = NA,
inherit.aes = TRUE, prop = 0.5, alpha = 0.3, ...) {
layer(
stat = StatBag, data = data, mapping = mapping, geom = geom,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(na.rm = na.rm, prop = prop, alpha = alpha, ...)
)
}
geom_bag <- function(mapping = NULL, data = NULL,
stat = "identity", position = "identity",
prop = 0.5,
alpha = 0.3,
...,
na.rm = FALSE,
show.legend = NA,
inherit.aes = TRUE) {
layer(
data = data,
mapping = mapping,
stat = StatBag,
geom = GeomBag,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(
na.rm = na.rm,
alpha = alpha,
prop = prop,
...
)
)
}
#' #rdname ggplot2-ggproto
#' #format NULL
#' #usage NULL
#' #export
GeomBag <- ggproto("GeomBag", Geom,
draw_group = function(data, panel_scales, coord) {
n <- nrow(data)
if (n == 1) return(zeroGrob())
munched <- coord_munch(coord, data, panel_scales)
# Sort by group to make sure that colors, fill, etc. come in same order
munched <- munched[order(munched$group), ]
# For gpar(), there is one entry per polygon (not one entry per point).
# We'll pull the first value from each group, and assume all these values
# are the same within each group.
first_idx <- !duplicated(munched$group)
first_rows <- munched[first_idx, ]
ggplot2:::ggname("geom_bag",
grid:::polygonGrob(munched$x, munched$y, default.units = "native",
id = munched$group,
gp = grid::gpar(
col = first_rows$colour,
fill = alpha(first_rows$fill, first_rows$alpha),
lwd = first_rows$size * .pt,
lty = first_rows$linetype
)
)
)
},
default_aes = aes(colour = "NA", fill = "grey20", size = 0.5, linetype = 1,
alpha = NA, prop = 0.5),
handle_na = function(data, params) {
data
},
required_aes = c("x", "y"),
draw_key = draw_key_polygon
)
And here it is in use with ggbiplot, we set prop to 1 to indicate that we want to draw a polygon that encloses all the points:
library(ggbiplot)
data(wine)
wine.pca <- prcomp(wine, scale. = TRUE)
g <- ggbiplot(wine.pca, obs.scale = 1, var.scale = 1,
groups = wine.class, ellipse = FALSE, circle = TRUE)
g <- g + scale_color_discrete(name = '')
g <- g + theme(legend.direction = 'horizontal', legend.position = 'top')
g + geom_bag(aes(group = wine.class, fill = wine.class), prop = 1)
We can also do it with ggbiplot and a newer pkg called ggpubr:
library(ggpubr)
library(ggbiplot)
data(wine)
wine.pca <- prcomp(wine, scale. = TRUE)
ggbiplot(
wine.pca,
obs.scale = 1,
var.scale = 1,
groups = wine.class,
ellipse = FALSE,
circle = TRUE
) +
stat_chull(aes(color = wine.class,
fill = wine.class),
alpha = 0.1,
geom = "polygon") +
scale_colour_brewer(palette = "Set1",
name = '',
guide = 'none') +
scale_fill_brewer(palette = "Set1",
name = '') +
theme_minimal()
I have used scale_colour_brewer and scale_fill_brewer to control the colours of the hulls and points, and suppress one of the legends.
To keep things the same colour across multiple plots, I think converting the category to a ordered factor and ensuring that every level of the factor is present in all of the plotted datasets should do it.
Related
Based on the example in Master Software Development in R, I wrote a new geom_my_point(), adapting the alpha depending on the number of data points.
This works fine, but the alpha value of the label is not correct if alpha is explicitly set.
Here the code for the figures:
d <- data.frame(x = runif(200))
d$y <- 1 * d$x + rnorm(200, 0, 0.2)
d$z <- factor(sample(c("group1", "group2"), size = 200, replace = TRUE))
require("ggplot2")
gg1 <- ggplot(d) + geom_my_point(aes(x, y, colour = z)) + ggtitle("gg1")
gg2 <- ggplot(d) + geom_my_point(aes(x, y, colour = z), alpha = 1) + ggtitle("gg2")
gg3 <- ggplot(d) + geom_my_point(aes(x, y, colour = z, alpha = z)) + ggtitle("gg3")
Here the code for the geom_*():
geom_my_point <- function(mapping = NULL, data = NULL, stat = "identity",
position = "identity", na.rm = FALSE,
show.legend = NA, inherit.aes = TRUE, ...) {
ggplot2::layer(
geom = GeomMyPoint, mapping = mapping,
data = data, stat = stat, position = position,
show.legend = show.legend, inherit.aes = inherit.aes,
params = list(na.rm = na.rm, ...)
)
}
GeomMyPoint <- ggplot2::ggproto("GeomMyPoint", ggplot2::Geom,
required_aes = c("x", "y"),
non_missing_aes = c("size", "shape", "colour"),
default_aes = ggplot2::aes(
shape = 19, colour = "black", size = 2,
fill = NA, alpha = NA, stroke = 0.5
),
setup_params = function(data, params) {
n <- nrow(data)
if (n > 100 && n <= 200) {
params$alpha <- 0.3
} else if (n > 200) {
params$alpha <- 0.15
} else {
params$alpha <- 1
}
params
},
draw_panel = function(data, panel_scales, coord, alpha) {
if (is.character(data$shape)) {
data$shape <- translate_shape_string(data$shape)
}
## Transform the data first
coords <- coord$transform(data, panel_scales)
## Get alpha conditional on number of data points
if (any(is.na(coords$alpha))) {
coords$alpha <- alpha
}
## Construct a grid grob
grid::pointsGrob(
x = coords$x,
y = coords$y,
pch = coords$shape,
gp = grid::gpar(
col = alpha(coords$colour, coords$alpha),
fill = alpha(coords$fill, coords$alpha),
fontsize = coords$size * ggplot2::.pt + coords$stroke * ggplot2::.stroke / 2,
lwd = coords$stroke * ggplot2::.stroke / 2
)
)
},
draw_key = function(data, params, size) {
data$alpha <- params$alpha
ggplot2::draw_key_point(data, params, size)
}
)
EDIT:
According to the comment of #teunbrand, the problem for the plot qq2 can be solved by the following adaptions to the draw_key() function:
draw_key = function(data, params, size) {
if (is.na(data$alpha)) {
data$alpha <- params$alpha
}
ggplot2::draw_key_point(data, params, size)
}
But this still does not solve the problem with the graph qq3 - so the underlying question is why alpha is not correctly represented by the data argument of the draw_key() function. Compare also the following plot qq4, in which the size is correctly displayed in the legend (set a browser() w/i draw_key()):
gg4 <- ggplot(d) + geom_my_point(aes(x, y, colour = z, alpha = z, size = z)) + ggtitle("gg4")
I am generating split violin plots using the geom_split_violin function created here: Split violin plot with ggplot2.
Then, I add labels for sample sizes (n = ...) for each split violin. However, unfortunately the labels overlap. How could I please move them slightly to the left and right, so that they do not overlap?
Here is the full code that I am using and below it the result with overlapping "n = ..." labels.
# Create data
set.seed(20160229)
my_data = data.frame(
y=c(rnorm(500), rnorm(300, 0.5), rnorm(400, 1), rnorm(200, 1.5)),
x=c(rep('a', 800), rep('b', 600)),
m=c(rep('i', 300), rep('j', 700), rep('i', 400)))
# Code to create geom_split_violin function from link above
library('ggplot2')
GeomSplitViolin <- ggproto("GeomSplitViolin", GeomViolin,
draw_group = function(self, data, ..., draw_quantiles = NULL) {
data <- transform(data, xminv = x - violinwidth * (x - xmin), xmaxv = x + violinwidth * (xmax - x))
grp <- data[1, "group"]
newdata <- plyr::arrange(transform(data, x = if (grp %% 2 == 1) xminv else xmaxv), if (grp %% 2 == 1) y else -y)
newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
newdata[c(1, nrow(newdata) - 1, nrow(newdata)), "x"] <- round(newdata[1, "x"])
if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <=
1))
quantiles <- ggplot2:::create_quantile_segment_frame(data, draw_quantiles)
aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
aesthetics$alpha <- rep(1, nrow(quantiles))
both <- cbind(quantiles, aesthetics)
quantile_grob <- GeomPath$draw_panel(both, ...)
ggplot2:::ggname("geom_split_violin", grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob))
}
else {
ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
}
})
geom_split_violin <- function(mapping = NULL, data = NULL, stat = "ydensity", position = "identity", ...,
draw_quantiles = NULL, trim = TRUE, scale = "area", na.rm = FALSE,
show.legend = NA, inherit.aes = TRUE) {
layer(data = data, mapping = mapping, stat = stat, geom = GeomSplitViolin,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(trim = trim, scale = scale, draw_quantiles = draw_quantiles, na.rm = na.rm, ...))
}
# Add labels 'n = ...'
give_n = function(x, y_lo = min(my_data$y)) {
data.frame(y = y_lo * 1.06,
label = paste("n =", length(x)))
}
# Plot data
ggplot(my_data, aes(x, y, fill = m)) +
geom_split_violin() +
stat_summary(fun.data = give_n, aes(x = as.factor(x)), geom = "text")
Result (note overlapping 'n = ...' labels):
Does adding position_nudge() solve your problem?
ggplot(my_data, aes(x, y, fill = m)) +
geom_split_violin() +
stat_summary(fun.data = give_n, aes(x = as.factor(x)), geom = "text",
position = position_nudge(x = c(-0.25, 0.25)))
I'm writing a package that extends ggplot2. One of those extensions is a geom_arrow() that takes aesthetics mag and angle to plot vector fields by magnitude and direction. I also created a scale_mag() to manipulate the length of the arrows with the prospect of creating also a new guide. Right now both geom and scale work as expected when added together.
ggplot(geo, aes(lon, lat)) +
geom_arrow(aes(mag = mag, angle = angle)) +
scale_mag()
But if I don't add scale_mag(), it doesn't work at all. What I want is for this scale to work like scale_color(), which is added by default when the color aesthetic is present.
Here is the code as it is right now:
geom_arrow <- function(mapping = NULL, data = NULL,
stat = "arrow",
position = "identity", ...,
start = 0,
direction = 1,
# scale = 1,
min.mag = 0,
skip = 0,
skip.x = skip,
skip.y = skip,
arrow.angle = 15,
arrow.length = 0.5,
arrow.ends = "last",
arrow.type = "closed",
arrow = grid::arrow(arrow.angle, unit(arrow.length, "lines"),
ends = arrow.ends, type = arrow.type),
lineend = "butt",
na.rm = FALSE,
show.legend = NA,
inherit.aes = TRUE) {
layer(geom = GeomArrow,
mapping = mapping,
data = data,
stat = stat,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(
start = start,
direction = direction,
arrow = arrow,
lineend = lineend,
na.rm = na.rm,
# scale = scale,
skip.x = skip.x,
skip.y = skip.y,
min.mag = min.mag,
...)
)
}
GeomArrow <- ggplot2::ggproto("GeomArrow", Geom,
required_aes = c("x", "y"),
default_aes = ggplot2::aes(color = "black", size = 0.5, min.mag = 0,
linetype = 1, alpha = NA),
draw_key = ggplot2::draw_key_path,
draw_panel = function(data, panel_scales, coord,
arrow = arrow, lineend = lineend,
start = start, direction = direction,
preserve.dir = TRUE) {
coords <- coord$transform(data, panel_scales)
unit.delta <- "snpc"
if (preserve.dir == FALSE) {
coords$angle <- with(coords, atan2(yend - y, xend - x)*180/pi)
unit.delta <- "npc"
}
coords$dx <- with(coords, mag*cos(angle*pi/180))
coords$dy <- with(coords, mag*sin(angle*pi/180))
# from https://stackoverflow.com/questions/47814998/how-to-make-segments-that-preserve-angles-in-different-aspect-ratios-in-ggplot2
xx <- grid::unit.c(grid::unit(coords$x, "npc"),
grid::unit(coords$x, "npc") + grid::unit(coords$dx, unit.delta))
yy <- grid::unit.c(grid::unit(coords$y, "npc"),
grid::unit(coords$y, "npc") + grid::unit(coords$dy, unit.delta))
mag <- with(coords, mag/max(mag, na.rm = T))
arrow$length <- unit(as.numeric(arrow$length)*mag, attr(arrow$length, "unit"))
pol <- grid::polylineGrob(x = xx, y = yy,
default.units = "npc",
arrow = arrow,
gp = grid::gpar(col = coords$colour,
fill = scales::alpha(coords$colour, coords$alpha),
alpha = ifelse(is.na(coords$alpha), 1, coords$alpha),
lwd = coords$size*.pt,
lty = coords$linetype,
lineend = lineend),
id = rep(seq(nrow(coords)), 2))
pol
})
StatArrow <- ggplot2::ggproto("StatArrow", ggplot2::Stat,
required_aes = c("x", "y"),
default_aes = ggplot2::aes(min.mag = 0, dx = NULL, dy = NULL,
mag = NULL, angle = NULL),
compute_group = function(data, scales,
skip.x = skip.x, skip.y = skip.y,
min.mag = min.mag) {
min.mag <- data$min.mag %||% min.mag
if (is.null(data$mag) | is.null(data$angle)) {
if (is.null(data$dx) | is.null(data$dy)) stop("stat_arrow need dx, dy or mag angle (improve mesage!!)")
data$mag <- with(data, Mag(dx, dy))
data$angle <- with(data, atan2(dy, dx)*180/pi)
} else {
data$dx <- with(data, mag*cos(angle*pi/180))
data$dy <- with(data, mag*sin(angle*pi/180))
}
data <- subset(data, x %in% JumpBy(unique(x), skip.x + 1) &
y %in% JumpBy(unique(y), skip.y + 1) &
mag >= min.mag)
data$xend = with(data, x + dx)
data$yend = with(data, y + dy)
data
}
)
scale_mag <- function(length = 0.1,
max = waiver(),
default_unit = "lines") {
# if (!is.unit(length)) length <- ggplot2::unit(length, default_unit)
continuous_scale("mag",
"mag",
identity,
rescaler = rescale_mag(length, max),
guide = "none")
}
# scale_type.mag <- function(x) "vector"
rescale_mag <- function(length, max) {
function(x, from) {
if (is.waive(max)) max <- max(x, na.rm = T)
scales::rescale(x, c(0, length), c(0, max))
}
}
Finally, I find the answer!
Based on the code in ggplot2/R/scale-type.R, there should be a scale named scale_mag_continuous in the parent environment of find_scale function. Then, this scale can be find automatically.
geo <- tibble(lon = 1:10, lat = 1:10, mag = 1:10, angle = 1:10)
scale_mag_continuous <- scale_mag
ggplot(geo, aes(lon, lat)) +
geom_arrow(aes(mag = mag, angle = angle))
I added a default theme to ggplot for a work package by overloading the ggplot function, basically like this:
ggplot <- function(...) {ggplot2::ggplot(...) + your_added_thing()}
If you want it to be less obtrusive, rename your version of ggplot:
jjplot <- function (...) {ggplot2::ggplot(...) + my_added_thing()}
this page will be helpful for you.
https://gist.github.com/wch/3250485
especially, the code below:
#This tells ggplot2 what scale to look for, for yearmon
scale_type.yearmon <- function(x) "yearmon"
I want to draw a graph which is familiar to the enterotype plot in the research. But my new multiple-ggproto seems terrible as showed in p1, owing to the missing backgroup color of the label. I've tried multiple variations of this, for example modify GeomLabel$draw_panel in order to reset the default arguments of geom in ggplot2::ggproto. However, I could not find the labelGrob() function which is removed in ggplot2 and grid package. Thus, the solution of modification didn't work. How to modify the backgroup color of label in the multiple-ggproto. Any ideas? Thanks in advance. Here is my code and two pictures.
p1: the background color of label should be white or the text color should be black.
P2:displays the wrong point color, line color and legend.
geom_enterotype <- function(mapping = NULL, data = NULL, stat = "identity", position = "identity",
alpha = 0.3, prop = 0.5, ..., lineend = "butt", linejoin = "round",
linemitre = 1, arrow = NULL, na.rm = FALSE, parse = FALSE,
nudge_x = 0, nudge_y = 0, label.padding = unit(0.15, "lines"),
label.r = unit(0.15, "lines"), label.size = 0.1,
show.legend = TRUE, inherit.aes = TRUE) {
library(ggplot2)
# create new stat and geom for PCA scatterplot with ellipses
StatEllipse <- ggproto("StatEllipse", Stat,
required_aes = c("x", "y"),
compute_group = function(., data, scales, level = 0.75, segments = 51, ...) {
library(MASS)
dfn <- 2
dfd <- length(data$x) - 1
if (dfd < 3) {
ellipse <- rbind(c(NA, NA))
} else {
v <- cov.trob(cbind(data$x, data$y))
shape <- v$cov
center <- v$center
radius <- sqrt(dfn * qf(level, dfn, dfd))
angles <- (0:segments) * 2 * pi/segments
unit.circle <- cbind(cos(angles), sin(angles))
ellipse <- t(center + radius * t(unit.circle %*% chol(shape)))
}
ellipse <- as.data.frame(ellipse)
colnames(ellipse) <- c("x", "y")
return(ellipse)
})
# write new ggproto
GeomEllipse <- ggproto("GeomEllipse", Geom,
draw_group = function(data, panel_scales, coord) {
n <- nrow(data)
if (n == 1)
return(zeroGrob())
munched <- coord_munch(coord, data, panel_scales)
munched <- munched[order(munched$group), ]
first_idx <- !duplicated(munched$group)
first_rows <- munched[first_idx, ]
grid::pathGrob(munched$x, munched$y, default.units = "native",
id = munched$group,
gp = grid::gpar(col = first_rows$colour,
fill = alpha(first_rows$fill, first_rows$alpha), lwd = first_rows$size * .pt, lty = first_rows$linetype))
},
default_aes = aes(colour = "NA", fill = "grey20", size = 0.5, linetype = 1, alpha = NA, prop = 0.5),
handle_na = function(data, params) {
data
},
required_aes = c("x", "y"),
draw_key = draw_key_path
)
# create a new stat for PCA scatterplot with lines which totally directs to the center
StatConline <- ggproto("StatConline", Stat,
compute_group = function(data, scales) {
library(miscTools)
library(MASS)
df <- data.frame(data$x,data$y)
mat <- as.matrix(df)
center <- cov.trob(df)$center
names(center)<- NULL
mat_insert <- insertRow(mat, 2, center )
for(i in 1:nrow(mat)) {
mat_insert <- insertRow( mat_insert, 2*i, center )
next
}
mat_insert <- mat_insert[-c(2:3),]
rownames(mat_insert) <- NULL
mat_insert <- as.data.frame(mat_insert,center)
colnames(mat_insert) =c("x","y")
return(mat_insert)
},
required_aes = c("x", "y")
)
# create a new stat for PCA scatterplot with center labels
StatLabel <- ggproto("StatLabel" ,Stat,
compute_group = function(data, scales) {
library(MASS)
df <- data.frame(data$x,data$y)
center <- cov.trob(df)$center
names(center)<- NULL
center <- t(as.data.frame(center))
center <- as.data.frame(cbind(center))
colnames(center) <- c("x","y")
rownames(center) <- NULL
return(center)
},
required_aes = c("x", "y")
)
layer1 <- layer(data = data, mapping = mapping, stat = stat, geom = GeomPoint,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(na.rm = na.rm, ...))
layer2 <- layer(stat = StatEllipse, data = data, mapping = mapping, geom = GeomEllipse, position = position, show.legend = FALSE,
inherit.aes = inherit.aes, params = list(na.rm = na.rm, prop = prop, alpha = alpha, ...))
layer3 <- layer(data = data, mapping = mapping, stat = StatConline, geom = GeomPath,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(lineend = lineend, linejoin = linejoin,
linemitre = linemitre, arrow = arrow, na.rm = na.rm, ...))
if (!missing(nudge_x) || !missing(nudge_y)) {
if (!missing(position)) {
stop("Specify either `position` or `nudge_x`/`nudge_y`",
call. = FALSE)
}
position <- position_nudge(nudge_x, nudge_y)
}
layer4 <- layer(data = data, mapping = mapping, stat = StatLabel, geom = GeomLabel,
position = position, show.legend = FALSE, inherit.aes = inherit.aes,
params = list(parse = parse, label.padding = label.padding,
label.r = label.r, label.size = label.size, na.rm = na.rm, ...))
return(list(layer1,layer2,layer3,layer4))
}
# data
data(Cars93, package = "MASS")
car_df <- Cars93[, c(3, 5, 13:15, 17, 19:25)]
car_df <- subset(car_df, Type == "Large" | Type == "Midsize" | Type == "Small")
x1 <- mean(car_df$Price) + 2 * sd(car_df$Price)
x2 <- mean(car_df$Price) - 2 * sd(car_df$Price)
car_df <- subset(car_df, Price > x2 | Price < x1)
car_df <- na.omit(car_df)
# Principal Component Analysis
car.pca <- prcomp(car_df[, -1], scale = T)
car.pca_pre <- cbind(as.data.frame(predict(car.pca)[, 1:2]), car_df[, 1])
colnames(car.pca_pre) <- c("PC1", "PC2", "Type")
xlab <- paste("PC1(", round(((car.pca$sdev[1])^2/sum((car.pca$sdev)^2)), 2) * 100, "%)", sep = "")
ylab <- paste("PC2(", round(((car.pca$sdev[2])^2/sum((car.pca$sdev)^2)), 2) * 100, "%)", sep = "")
head(car.pca_pre)
#plot
library(ggplot2)
p1 <- ggplot(car.pca_pre, aes(PC1, PC2, fill = Type , color= Type ,label = Type)) +
geom_enterotype()
p2 <- ggplot(car.pca_pre, aes(PC1, PC2, fill = Type , label = Type)) +
geom_enterotype()
You can manually change the colour scale to give it more emphasis against the background fill colour:
p3 <- ggplot(car.pca_pre, aes(PC1, PC2, fill = Type , color = Type, label = Type)) +
geom_enterotype() +
scale_colour_manual(values = c("red4", "green4", "blue4"))
p3
You can additionally adjust your fill colours by changing the alpha values, or assigning different colour values to give better contrast to your labels.
p4 <- ggplot(car.pca_pre, aes(PC1, PC2, label = Type, shape = Type, fill = Type, colour = Type)) +
geom_enterotype() +
scale_fill_manual(values = alpha(c("pink", "lightgreen", "skyblue"), 1)) +
scale_colour_manual(values = c("red4", "green4", "blue4"))
p4
Finally, if you want a background white colour to your labels, you have to remove the fill option. You can also additionally assign a shape value.
As you can observe, the background text colour is associated with the shape fill colour, while the text label colour is associated with the line colour, the the shape border colour.
p5 <- ggplot(car.pca_pre, aes(PC1, PC2, label = Type, shape = Type, colour = Type)) +
geom_enterotype() + scale_colour_manual(values = c("red4", "green4", "blue4"))
p5
I've been reading the vignette on extending ggplot2, but I'm a bit stuck on how I can make a single geom that can add multiple geometries to the plot. Multiple geometries already exist in ggplot2 geoms, for example, we have things like geom_contour (multiple paths), and geom_boxplot (multiple paths and points). But I can't quite see how to extend those into new geoms.
Let's say I'm trying to make a geom_manythings that will draw two polygons and one point by computing on a single dataset. One polygon will be a convex hull for all the points, the second polygon will be a convex hull for a subset of the points, and the single point will represent the centre of the data. I want all of these to appear with a call to one geom, rather than three separate calls, as we see here:
# example data set
set.seed(9)
n <- 1000
x <- data.frame(x = rnorm(n),
y = rnorm(n))
# computations for the geometries
# chull for all the points
hull <- x[chull(x),]
# chull for all a subset of the points
subset_of_x <- x[x$x > 0 & x$y > 0 , ]
hull_of_subset <- subset_of_x[chull(subset_of_x), ]
# a point in the centre of the data
centre_point <- data.frame(x = mean(x$x), y = mean(x$y))
# plot
library(ggplot2)
ggplot(x, aes(x, y)) +
geom_point() +
geom_polygon(data = x[chull(x),], alpha = 0.1) +
geom_polygon(data = hull_of_subset, alpha = 0.3) +
geom_point(data = centre_point, colour = "green", size = 3)
I want to have a geom_manythings to replace the three geom_* in the code above.
In an attempt to make a custom geom, I started with code in geom_tufteboxplot and geom_boxplot as templates, along with the 'extending ggplot2' vignette:
library(ggplot2)
library(proto)
GeomManythings <- ggproto(
"GeomManythings",
GeomPolygon,
setup_data = function(self, data, params) {
data <- ggproto_parent(GeomPolygon, self)$setup_data(data, params)
data
},
draw_group = function(data, panel_scales, coord) {
n <- nrow(data)
if (n <= 2)
return(grid::nullGrob())
common <- data.frame(
colour = data$colour,
size = data$size,
linetype = data$linetype,
fill = alpha(data$fill, data$alpha),
group = data$group,
stringsAsFactors = FALSE
)
# custom bits...
# polygon hull for all points
hull <- data[chull(data), ]
hull_df <- data.frame(x = hull$x,
y = hull$y,
common,
stringsAsFactors = FALSE)
hull_grob <-
GeomPolygon$draw_panel(hull_df, panel_scales, coord)
# polygon hull for subset
subset_of_x <-
data[data$x > 0 & data$y > 0 ,]
hull_of_subset <-
subset_of_x[chull(subset_of_x),]
hull_of_subset_df <- data.frame(x = hull_of_subset$x,
y = hull_of_subset$y,
common,
stringsAsFactors = FALSE)
hull_of_subset_grob <-
GeomPolygon$draw_panel(hull_of_subset_df, panel_scales, coord)
# point for centre point
centre_point <-
data.frame(x = mean(coords$x),
y = coords(data$y),
common,
stringsAsFactors = FALSE)
centre_point_grob <-
GeomPoint$draw_panel(centre_point, panel_scales, coord)
# end of custom bits
ggname("geom_mypolygon",
grobTree(hull_grob,
hull_of_subset_grob,
centre_point_grob))
},
required_aes = c("x", "y"),
draw_key = draw_key_polygon,
default_aes = aes(
colour = "grey20",
fill = "grey20",
size = 0.5,
linetype = 1,
alpha = 1,
)
)
geom_manythings <-
function(mapping = NULL,
data = NULL,
stat = "identity",
position = "identity",
na.rm = FALSE,
show.legend = NA,
inherit.aes = TRUE,
...) {
layer(
geom = GeomManythings,
mapping = mapping,
data = data,
stat = stat,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(na.rm = na.rm, ...)
)
}
But clearly there are quite a few things not right in this geom, I must be missing some fundamental details...
ggplot(x, aes(x, y)) +
geom_point() +
geom_manythings()
How can I write this geom to get the desired result?
there are quite a few issues in your code, so I suggest you try with a simplified case first. In particular, the chull calculation was problematic. Try this,
library(ggplot2)
library(proto)
library(grid)
GeomManythings <- ggproto(
"GeomManythings",
Geom,
setup_data = function(self, data, params) {
data <- ggproto_parent(Geom, self)$setup_data(data, params)
data
},
draw_group = function(data, panel_scales, coord) {
n <- nrow(data)
if (n <= 2)
return(grid::nullGrob())
# polygon hull for all points
hull_df <- data[chull(data[,c("x", "y")]), ]
hull_grob <-
GeomPolygon$draw_panel(hull_df, panel_scales, coord)
# polygon hull for subset
subset_of_x <-
data[data$x > 0 & data$y > 0 ,]
hull_of_subset_df <-subset_of_x[chull(subset_of_x[,c("x", "y")]),]
hull_of_subset_df$fill <- "red" # testing
hull_of_subset_grob <- GeomPolygon$draw_panel(hull_of_subset_df, panel_scales, coord)
coords <- coord$transform(data, panel_scales)
pg <- pointsGrob(x=mean(coords$x), y=mean(coords$y),
default.units = "npc", gp=gpar(col="green", cex=3))
ggplot2:::ggname("geom_mypolygon",
grobTree(hull_grob,
hull_of_subset_grob, pg))
},
required_aes = c("x", "y"),
draw_key = draw_key_polygon,
default_aes = aes(
colour = "grey20",
fill = "grey50",
size = 0.5,
linetype = 1,
alpha = 0.5
)
)
geom_manythings <-
function(mapping = NULL,
data = NULL,
stat = "identity",
position = "identity",
na.rm = FALSE,
show.legend = NA,
inherit.aes = TRUE,
...) {
layer(
geom = GeomManythings,
mapping = mapping,
data = data,
stat = stat,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(na.rm = na.rm, ...)
)
}
set.seed(9)
n <- 20
d <- data.frame(x = rnorm(n),
y = rnorm(n))
ggplot(d, aes(x, y)) +
geom_manythings()+
geom_point()
(disclaimer: I haven't tried to write a geom in 5 years, so I don't know how it works nowadays)