Filling alphahull with ggplot2 - r

I used alphahull package to delineate dots on a map.
I plot the contours with a geom_segment.
My question is : how to fill the delineation given by the segment with a color ?
Here is a reproducible example :
set.seed(2)
dat <- data.frame(x = rnorm(20, 10, 5), y = rnorm(20, 20, 5), z = c(rep(1, 6), rep(2, 4)))
library(ggplot2)
library(alphahull)
alpha <- 100
alphashape1 <- ashape(dat[which(dat$z==1), c("x", "y")], alpha = alpha)
alphashape2 <- ashape(dat[which(dat$z==2), c("x", "y")], alpha = alpha)
map <- ggplot(dat, aes(x = x, y = y)) +
geom_point(data = dat, aes(x = x, y = y, colour = as.factor(dat$z))) +
geom_segment(data = data.frame(alphashape1$edges), aes(x = x1, y = y1, xend = x2, yend = y2, colour = levels(as.factor(dat$z))[1])) +
geom_segment(data = data.frame(alphashape2$edges), aes(x = x1, y = y1, xend = x2, yend = y2, colour = levels(as.factor(dat$z))[2]))
map

I believe this works w/o the need for graph ops:
fortify.ashape <- function(ashape_res) {
xdf <- data.frame(ashape_res$edges)
xdf <- do.call(
rbind,
lapply(1:nrow(xdf), function(i) {
rbind(
data.frame(x=xdf$x1[i], y=xdf$y1[i]),
data.frame(x=xdf$x2[i], y=xdf$y2[i])
)
})
)
xdf <- xdf[order(-1 * atan2(
xdf$y - mean(range(xdf$y)),
xdf$x - mean(range(xdf$x)))), c("x", "y")]
xdf <- rbind.data.frame(xdf[nrow(xdf),], xdf[1:(nrow(xdf)-1),])
xdf
}
alphashape1 <- ashape(dat[which(dat$z == 1), c("x", "y")], alpha = 15)
alphashape2 <- ashape(dat[which(dat$z == 2), c("x", "y")], alpha = 15)
ggplot() +
geom_point(data = dat, aes(x = x, y = y, colour = as.factor(dat$z))) +
geom_polygon(data=alphashape1, aes(x, y), fill="red", alpha=2/3) +
geom_polygon(data=alphashape2, aes(x, y), fill="blue", alpha=2/3)

This is because the ashape function only returns segments, not in any order.
The only way I found to reconstruct the order was by using the node information to form a graph and then find the shortest path along that graph.
A detailed example is here: https://rpubs.com/geospacedman/alphasimple - the code needs wrapping into a single function, which should be fairly easy to do. Once you have that order sorted, geom_polygon will draw it with filled shading in ggplot2.

Based on Spacedman's answer, I ordered separately the two sets of points and came up with this solution.
It could be optimized with a function that does it for each group automatically.
set.seed(2)
dat <- data.frame(x = rnorm(20, 10, 5), y = rnorm(20, 20, 5), z = c(rep(1, 6), rep(2, 4)))
library(ggplot2)
library(alphahull)
alpha <- 100
alphashape1 <- ashape(dat[which(dat$z==1), c("x", "y")], alpha = alpha)
alphashape2 <- ashape(dat[which(dat$z==2), c("x", "y")], alpha = alpha)
map <- ggplot(dat, aes(x = x, y = y)) +
geom_point(data = dat, aes(x = x, y = y, colour = as.factor(dat$z))) +
geom_segment(data = data.frame(alphashape1$edges), aes(x = x1, y = y1, xend = x2, yend = y2, colour = levels(as.factor(dat$z))[1])) +
geom_segment(data = data.frame(alphashape2$edges), aes(x = x1, y = y1, xend = x2, yend = y2, colour = levels(as.factor(dat$z))[2]))
map
alpha <- 15 # transparency argument
# First contour
alphashape1 <- ashape(dat[which(dat$z == 1), c("x", "y")], alpha = alpha)
alphashape1_ind <- alphashape1$edges[, c("ind1", "ind2")]
class(alphashape1_ind) = "character"
alphashape1_graph <- graph.edgelist(alphashape1_ind, directed = FALSE)
cut_graph1 <- alphashape1_graph - E(alphashape1_graph)[1] # Cut the first edge
ends1 <- names(which(degree(cut_graph1) == 1)) # Get two nodes with degree = 1
path1 <- get.shortest.paths(cut_graph1, ends1[1], ends1[2])$vpath[[1]]
path_nodes1 <- as.numeric(V(alphashape1_graph)[path1]$name)
# Second contour
alphashape2 <- ashape(dat[which(dat$z == 2), c("x", "y")], alpha = alpha)
alphashape2_ind <- alphashape2$edges[, c("ind1", "ind2")]
class(alphashape2_ind) = "character"
alphashape2_graph <- graph.edgelist(alphashape2_ind, directed = FALSE)
cut_graph2 <- alphashape2_graph - E(alphashape2_graph)[1] # Cut the first edge
ends2 <- names(which(degree(cut_graph2) == 1)) # Get two nodes with degree = 1
path2 <- get.shortest.paths(cut_graph2, ends2[1], ends2[2])$vpath[[1]]
path_nodes2 <- as.numeric(V(alphashape2_graph)[path2]$name)
# Updating of previous plot (see question)
map +
geom_polygon(data = dat[which(dat$z == 1), c("x", "y")][path_nodes1, ], aes(x = x, y = y),
fill = "red", colour = "red", size = 0.5, alpha = 0.3) +
geom_polygon(data = dat[which(dat$z == 2), c("x", "y")][path_nodes2, ],
aes(x = x, y = y), colour = "blue", fill = "blue", size = 0.5, alpha = 0.3)

Related

How to pass break values to stat_contour by facet or group

I am trying to use the ks library to calculate the 95% home range for groups within a data set. The problem is that the "break" values which define the cut-off for the 95% contours differ between groups. So far, I have been able to get my plots, but I have to manually add the break values for each group/level and I would really like to find a solution where I can create figures in ggplot where the break values are imported automatically.
require(ks)
require(dplyr)
require(ggplot2)
# define the ks function to pass to a grouped_df
ksFUN = function(data){
H = Hpi(data[,c("x","y")], binned = TRUE) * 1
fhata = kde(data[,c("x","y")], H = H, compute.cont = TRUE,
xmin = c(minX, minY), xmax = c(maxX, maxY))
res95 = data.frame(HR = contourSizes(fhata, cont = 95, approx = TRUE))
dimnames(fhata[['estimate']]) = list(fhata[["eval.points"]][[1]],
fhata[["eval.points"]][[2]])
dat = reshape2::melt(fhata[['estimate']])
dat$breaks50 = fhata[["cont"]]["50%"]
dat$breaks95 = fhata[["cont"]]["5%"]
return(dat)
}
set.seed(100)
# create some data
df1 = data.frame(x = rnorm(100, 0, 5),
y = rnorm(100, 0, 5),
Group = "Test1")
df2 = data.frame(x = rnorm(100, 10, 5),
y = rnorm(100, 10, 5),
Group = "Test2")
df = rbind(df1, df2)
# Set the minimum and maximum x and y values outside
# of the ksFUN to keep the data on the same scale
minX = min(df$x, na.rm = T); maxX = max(df$x, na.rm = T)
minY = min(df$y, na.rm = T); maxY = max(df$y, na.rm = T)
xx = df %>%
group_by(Group) %>%
do(as.data.frame(ksFUN(.)))
# extract the break value for the 95% contour (home range) and 50% (core area)
breaks = xx %>%
group_by(Group) %>%
summarize(breaks95 = mean(breaks95),
breaks50 = mean(breaks50))
breaks
# The only way I have been able to add the breaks is to manually add them
ggplot(data = xx, aes(x = Var1, y = Var2, fill = Group)) +
geom_point(data = df, aes(x = x, y = y, col = Group)) +
stat_contour(data = xx[xx$Group == "Test1",], aes(z = value),
breaks = 0.000587, alpha = 0.3, geom = "polygon") +
stat_contour(data = xx[xx$Group == "Test2",], aes(z = value),
breaks = 0.000527, alpha = 0.3, geom = "polygon")
I would really like to find a solution where I don't have to explicitly pass the break values to the stat_contour functions
Is there a problem with using the breaks column in breaks? e.g.
# base plot
pl <- ggplot(data = xx, aes(x = Var1, y = Var2, fill = Group)) +
geom_point(data = df, aes(x = x, y = y, col = Group))
groups <- unique(xx$Group)
# loop and add for each group
for(i in groups){
pl <- pl + stat_contour(data = xx[xx$Group == i,], aes(z = value),
breaks = breaks[breaks$Group == i, ]$breaks,
alpha = 0.3, geom = "polygon")
}
pl
I get some weird plots, at the edges, especially when I remove the breaks part from stat_contour, which leads me to think there might be a bug in ksFUN

Are there easy way to set limit of vertical or horizontal line in ggplot?

I want to plot part of geom_vline or geom_hline,codes as follows:
df <- data.frame(x=1:10,y=1:10)
plt <- ggplot(data=df)+
geom_point(aes(x=x,y=y))+
geom_vline(aes(xintercept=x[2]))+
geom_hline(aes(yintercept=y[2]))
I expect to display the left and lower part line as the cross point.
but there is no argument such as xlim or ylim in geom_vline
You can use geom_line.
library(ggplot2)
df <- data.frame(x=1:10,y=1:10)
plt <- ggplot(data=df)+
geom_point(aes(x = x, y = y))+
geom_line(data = data.frame(x = c(2, Inf), y = c(2, 2)), aes(x = x , y = y)) +
geom_line(data = data.frame(x = c(2, 2), y = c(2, Inf)), aes(x = x, y = y))
Or geom_segment. They resulted in the same plot.
plt <- ggplot(data=df)+
geom_point(aes(x = x, y = y))+
geom_segment(aes(x = 2 , y = 2, xend = Inf, yend = 2)) +
geom_segment(aes(x = 2 , y = 2, xend = 2, yend = Inf))

Generating multiple geom_smooth lines of data samples

Attempting to build a new geom function here that will take a sample of points from a dataset by group, and fit a number of local regressions through the individual subsets. This would generate multiple local regression lines as samples of a full dataset. In the end generating something akin to this:
Though I'm continuing to get errors with the function I've built below (with reprex). Any assistance is appreciated. Thank you!
library(ggplot2)
library(dplyr)
geom_mline <- function(mapping = NULL, data = NULL, stat = "mline",
position = "identity", show.legend = NA,
inherit.aes = TRUE, na.rm = TRUE,
SPAN = .9, N_size = 50, N_LOESS = 50, ...) {
layer(
geom = geomMline,
mapping = mapping,
data = data,
stat = stat,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(SPAN=SPAN,
N_size=N_size,
N_LOESS=N_LOESS,
...)
)
}
geomMline <- ggproto("geomMline", GeomLine,
required_aes = c("x", "y"),
default_aes = aes(colour = "black", size = 0.5, linetype = 1, alpha = NA)
)
stat_mline <- function(mapping = NULL, data = NULL, geom = "line",
position = "identity", show.legend = NA, inherit.aes = TRUE,
SPAN = .9, N_size = 50, N_LOESS = 50, ...) {
layer(
stat = StatMline,
data = data,
mapping = mapping,
geom = geom,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(SPAN=SPAN,
N_size=N_size,
N_LOESS=N_LOESS,
...
)
)
}
StatMline <- ggproto("StatMline", Stat,
required_aes = c("x", "y"),
compute_group = function(self, data, scales, params,
SPAN = .9, N_size = 50, N_LOESS = 50) {
tf <- tempfile(fileext=".png")
png(tf)
plot.new()
colnames(data) <- c("x", "variable", "y")
LOESS_DF <- data.frame(y = seq(min(data$x),
max(data$x),
length.out = 50))
for(i in 1:N_LOESS){
# sample N_size points
df_sample <- sample_n(data, N_size)
# fit a loess
xx <- df_sample$x
yy <- df_sample$y
tp_est <- loess(yy ~ xx , span = SPAN)
# predict accross range of x using loess model
loess_vec <- data.frame(
predict(tp_est, newdata =
data.frame(xx = seq(min(data$x), max(data$x), length.out = 500))))
colnames(loess_vec) <- as.character(i)
# repeat x times
LOESS_DF <- cbind(LOESS_DF,loess_vec)
#str(LOESS_DF)
}
invisible(dev.off())
unlink(tf)
data.frame(reshape2::melt(LOESS_DF, id = "y"))
}
)
# dummy data
library(reshape2)
x <- seq(1,1000,1)
y1 <- rnorm(n = 1000,mean = x*2^1.1, sd = 200)
y2 <- rnorm(n = 1000,mean = x*1, sd = 287.3)
y3 <- rnorm(n = 1000,mean = x*1.1, sd = 100.1)
data <- data.frame(x , y1, y2, y3)
data <- melt(data, id.vars = "x")
str(data)
ggplot(data,aes(x,value,group = variable, color = va
riable))+geom_point()
ggplot(data = data, aes(x = x, y = value, group=variable, color = variable)) +
#geom_point(color="black") +
#geom_smooth(se=FALSE, linetype="dashed", size=0.5) +
#stat_mline(SPAN = .2, N_size = 50, N_LOESS = 5)
geom_mline(SPAN = .2, N_size = 50, N_LOESS = 5)
#data <- subset(data, variable == "y2")
You could use the existing geom_smooth geom and use lapply to generate geom_smooth calls from multiple random samples from the original data frame. For example:
# Fake data
set.seed(2)
dat = data.frame(x = runif(100, 0, 10))
dat$y = 2*dat$x - 0.5*dat$x^2 - 5 + rnorm(100, 0, 5)
ggplot(dat, aes(x, y)) +
geom_point() +
lapply(1:10, function(i) {
geom_smooth(data=dat[sample(1:nrow(dat), 20), ], se=FALSE)
})
Or, keeping it all in the tidyverse:
library(tidyverse)
ggplot(dat, aes(x, y)) +
geom_point() +
map(1:10, ~geom_smooth(data=dat[sample(1:nrow(dat), 20), ], se=FALSE))
Here's a way to plot the quantiles within ggplot. I'm not sure if it's possible to get stat_quantile to plot a ribbon. To get that, you might have to calculate the quantile regression outside of ggplot and add use geom_ribbon to add the values.
ggplot(dat, aes(x, y)) +
geom_point() +
geom_quantile(quantiles=c(0.1, 0.5, 0.9), formula=y ~ poly(x, 2),
aes(color=factor(..quantile..), size=factor(..quantile..))) +
scale_color_manual(values=c("red","blue","red")) +
scale_size_manual(values=c(1,2,1)) +
labs(colour="Quantile") +
guides(colour=guide_legend(reverse=TRUE), size=FALSE) +
theme_classic()

operation between stat_summary_hex plots made in ggplot2

I have two populations A and B distributed spatially with one character Z, I want to be able to make an hexbin substracting the proportion of the character in each hexbin. Here I have the code for two theoretical populations A and B
library(hexbin)
library(ggplot2)
set.seed(2)
xA <- rnorm(1000)
set.seed(3)
yA <- rnorm(1000)
set.seed(4)
zA <- sample(c(1, 0), 20, replace = TRUE, prob = c(0.2, 0.8))
hbinA <- hexbin(xA, yA, xbins = 40, IDs = TRUE)
A <- data.frame(x = xA, y = yA, z = zA)
set.seed(5)
xB <- rnorm(1000)
set.seed(6)
yB <- rnorm(1000)
set.seed(7)
zB <- sample(c(1, 0), 20, replace = TRUE, prob = c(0.4, 0.6))
hbinB <- hexbin(xB, yB, xbins = 40, IDs = TRUE)
B <- data.frame(x = xB, y = yB, z = zB)
ggplot(A, aes(x, y, z = z)) + stat_summary_hex(fun = function(z) sum(z)/length(z), alpha = 0.8) +
scale_fill_gradientn(colours = c("blue","red")) +
guides(alpha = FALSE, size = FALSE)
ggplot(B, aes(x, y, z = z)) + stat_summary_hex(fun = function(z) sum(z)/length(z), alpha = 0.8) +
scale_fill_gradientn (colours = c("blue","red")) +
guides(alpha = FALSE, size = FALSE)
here is the two resulting graphs
My goal is to make a third graph with hexbins with the values of the difference between hexbins at the same coordinates but I don't even know how to start to do it, I have done something similar in the raster Package, but I need it as hexbins
Thanks a lot
You need to make sure that both plots use the exact same binning. In order to achieve this, I think it is best to do the binning beforehand and then plot the results with stat_identity / geom_hex. With the variables from your code sample you ca do:
## find the bounds for the complete data
xbnds <- range(c(A$x, B$x))
ybnds <- range(c(A$y, B$y))
nbins <- 30
# function to make a data.frame for geom_hex that can be used with stat_identity
makeHexData <- function(df) {
h <- hexbin(df$x, df$y, nbins, xbnds = xbnds, ybnds = ybnds, IDs = TRUE)
data.frame(hcell2xy(h),
z = tapply(df$z, h#cID, FUN = function(z) sum(z)/length(z)),
cid = h#cell)
}
Ahex <- makeHexData(A)
Bhex <- makeHexData(B)
## not all cells are present in each binning, we need to merge by cellID
byCell <- merge(Ahex, Bhex, by = "cid", all = T)
## when calculating the difference empty cells should count as 0
byCell$z.x[is.na(byCell$z.x)] <- 0
byCell$z.y[is.na(byCell$z.y)] <- 0
## make a "difference" data.frame
Diff <- data.frame(x = ifelse(is.na(byCell$x.x), byCell$x.y, byCell$x.x),
y = ifelse(is.na(byCell$y.x), byCell$y.y, byCell$y.x),
z = byCell$z.x - byCell$z.y)
## plot the results
ggplot(Ahex) +
geom_hex(aes(x = x, y = y, fill = z),
stat = "identity", alpha = 0.8) +
scale_fill_gradientn (colours = c("blue","red")) +
guides(alpha = FALSE, size = FALSE)
ggplot(Bhex) +
geom_hex(aes(x = x, y = y, fill = z),
stat = "identity", alpha = 0.8) +
scale_fill_gradientn (colours = c("blue","red")) +
guides(alpha = FALSE, size = FALSE)
ggplot(Diff) +
geom_hex(aes(x = x, y = y, fill = z),
stat = "identity", alpha = 0.8) +
scale_fill_gradientn (colours = c("blue","red")) +
guides(alpha = FALSE, size = FALSE)

How to draw boxes around "groups" in a heatmap?

I made a graph like this:
library(reshape2)
library(ggplot2)
m <- matrix(1:64 - 32, 8)
rownames(m) <- colnames(m) <-
c(paste0("a", 1:3), paste0("b", 1:2), paste0("c", 1:3))
d <- melt(m)
gg <- ggplot(d) +
geom_tile(aes(x = Var1, y = Var2, fill = value)) +
scale_fill_gradient2()
How can I programmatically draw boxes around the "a", "b", and "c" groups?
The matrix m will always be square. colnames(m) and rownames(m) will always be the same. Therefore the boxes will be cover the entire grid and will never overlap. The group sizes will vary, in general.
I'm also not married to ggplot2. I'm open to a solution in base graphics with image if it's not fussier than the ggplot2/grid version.
I got as far as
d$group <- substr(d$Var1, 1, 1)
before I realized I had no clue how to proceed.
What I have:
What I want:
Or geom_segment
library('reshape2')
library('ggplot2')
m <- matrix(1:64 - 32, 8)
rownames(m) <- colnames(m) <-
c(paste0("a", 1:3), paste0("b", 1:2), paste0("c", 1:3))
gg <- ggplot(melt(m)) +
geom_tile(aes(x = Var1, y = Var2, fill = value)) +
scale_fill_gradient2()
tt <- table(gsub('\\d+', '', colnames(m)))
ll <- unname(c(0, cumsum(tt)) + .5)
gg + geom_segment(aes(x = ll, xend = ll, y = head(ll, 1), yend = tail(ll, 1))) +
geom_segment(aes(y = ll, yend = ll, x = head(ll, 1), xend = tail(ll, 1)))
This may be considered cheating, but very easy:
# Depending on your data you may be able to generate `d2` directly
# here we need to re-order a bit
d2 <- transform(
d, V1 = substr(Var1, 1, 1),
V2=factor(substr(Var2, 1, 1), levels=letters[3:1])
)
ggplot(d2) +
geom_tile(aes(x = Var1, y = Var2, fill = value)) +
scale_fill_gradient2() +
facet_grid(V2 ~ V1, scales="free", space="free")
i.e. something like this:
xmin <- c(0.5,3.5,5.5)
xmax <- c(3.5,5.5,8.5)
ymin <- c(0.5,3.5,5.5)
ymax <- c(3.5,5.5,8.5)
box1 <- data.frame(xmin = rep(xmin,times = 3),
xmax = rep(xmax,each = 3),
ymin = rep(ymin,times = 3),
ymax = rep(ymax,each = 3))
ggplot(melt(m)) +
geom_tile(aes(x = Var1, y = Var2, fill = value)) +
geom_rect(data = box1,aes(xmin = xmin,xmax = xmax,ymin = ymin,ymax= ymax),
fill = NA,color = "black") +
scale_fill_gradient2()
Another option would be to simply use geom_hline and geom_vline, although you might have a hard time removing the little bits that extend past the edges of the colored rectangles.

Resources