extract points/data within ellipse ggplot2 - r

I am using geom_ellipse to create an ellipse on faithful dataset.
Here is the code:
t <- position_nudge(x=-1,y=-0.5)
obj <- ggplot(faithful, aes(waiting, eruptions))+
geom_point()+ geom_ellipse(aes(x0 = 70, y0 = 3, a= 3, b = 10,angle = pi/3),color="red",
position = t)
The graph looks like this:
I want to extract the points/data that are present within the ellipse? How can I do that? I tried what was done here , but it does not work for geom_ellipse.

The thing with geom_ellipse is that when you look at the data of the layer with ggplot_build, you can see that the ellipse is shown multiple times in a data loop. So what you could do is get a first copy of the values like 272 values (same as your data). Based on the answer you can do the following:
library(ggplot2)
library(ggforce)
library(sp)
t <- position_nudge(x=-1,y=-0.5)
obj <- ggplot(faithful, aes(waiting, eruptions))+
geom_point()+ geom_ellipse(aes(x0 = 70, y0 = 3, a= 3, b = 10,angle = pi/3),color="red",
position = t)
obj
#> Warning: Using the `size` aesthetic in this geom was deprecated in ggplot2 3.4.0.
#> ℹ Please use `linewidth` in the `default_aes` field and elsewhere instead.
# Extract components
build <- ggplot_build(obj)$data
points <- build[[1]]
ell <- build[[2]]
# Find which points are inside the ellipse, and add this to the data
dat <- data.frame(
points[1:2],
in.ell = as.logical(point.in.polygon(points$x, points$y, ell$x[1:272], ell$y[1:272]))
)
# Plot the result
ggplot(dat, aes(x, y)) +
geom_point(aes(col = in.ell)) +
geom_ellipse(aes(x0 = 70, y0 = 3, a= 3, b = 10,angle = pi/3),color="red",
position = t)
Created on 2022-12-17 with reprex v2.0.2
When you check the dat dataframe, you can see which points are in the ellipse.

Related

Convert an vector to a specific color for `plot()`

Below is a minimal working example.
library(ggplot2)
set.seed(926)
df <- data.frame(x. = rnorm(100),
y. = rnorm(100),
color. = rnorm(100))
library(ggplot2)
p <- ggplot(df, aes(x = x., y = y., color = color.)) +
geom_point() +
viridis::scale_color_viridis(option = "C")
p
p_build <- ggplot_build(p)
# The desired vector is below somehow I feel there must have an easier way to get it
p_build[["data"]][[1]][["colour"]]
df$color_converted <- p_build[["data"]][[1]][["colour"]]
Specifically, I like to use viridis::viridis(option = "C") color scheme. Could anyone help with this? Thanks.
*Modify*
Sorry, my question wasn't clear enough. Let me put it this way, I couldn't utilize ggplot2 package and had to use the pure plot() function that comes with R, in my specific project.
My goal is to try to reproduce the above plot with the base R package.
plot(df$x., df$y., color = df$color_converted)
If possible, could anyone also direct me on how to customize a gradient legend that is similar to ggplot2, with base legend()?
First of all you can assign the colors to a vector called "color2" and use scale_colour_gradientn to assign these colors to your plot. The problem is that the colors are not sorted right so you have to do that first by using the TSP package. In the output below you can see that you can recreate the plot without using scale_color_viridis:
set.seed(926)
df <- data.frame(x. = rnorm(100),
y. = rnorm(100),
color. = rnorm(100))
library(ggplot2)
library(TSP)
p <- ggplot(df, aes(x = x., y = y., color = color.)) +
geom_point() +
viridis::scale_color_viridis(option = "C")
p
p_build <- ggplot_build(p)
# The desired vector is below somehow I feel there must have an easier way to get it
color2 <- p_build[["data"]][[1]][["colour"]]
rgb <- col2rgb(color2)
lab <- convertColor(t(rgb), 'sRGB', 'Lab')
ordered_cols2 <- color2[order(lab[, 'L'])]
ggplot(df, aes(x = x., y = y.)) +
geom_point(aes(colour = color.)) +
scale_colour_gradientn(colours = ordered_cols2, guide = "colourbar")
#viridis::scale_color_viridis(option = "C")
Created on 2022-08-17 with reprex v2.0.2
Base r
You can use the following code:
color2 <- p_build[["data"]][[1]][["colour"]]
rgb <- col2rgb(color2)
lab <- convertColor(t(rgb), 'sRGB', 'Lab')
ordered_cols2 <- color2[order(lab[, 'L'])]
layout(matrix(1:2,ncol=2), width = c(2,1),height = c(1,1))
plot(df$x., df$y., col = df$color_converted)
legend_image <- as.raster(matrix(ordered_cols2, ncol=1))
plot(c(0,2),c(0,1),type = 'n', axes = F,xlab = '', ylab = '', main = 'legend title')
text(x=1.5, y = seq(0,1,l=5), labels = seq(-3,3,l=5))
rasterImage(legend_image, 0, 0, 1,1)
Output:

Position geom_label on the outside of a network using ggplot?

I am creating a network style plot using ggnet and ggplot. At the moment im just using geom_label's nudge_y argument to position the labels. But I was wondering if it's possible to position the labels so they are always on the outside of the circle (my network is always circular). A toy example is shown below.
library(ggplot2)
library(igraph)
library(GGally) # contains ggnet2
nam <- c("A", "B", "C", "D", "E") # Node name
g <- sample_pa(5, m = 5) # generate graph with x nodes
g <- igraph::as_data_frame(g) # create df
g <- rbind(g$to,g$from) # create matrix
net.bg <- make_graph(g, 5, directed = FALSE) #make graph
E(net.bg)$weight <- sample(1:3, 5,replace=T)
V(net.bg)$size <- sample(1:5, 5,replace=T)
p <- ggnet2(net.bg,
mode = "circle",
size = V(net.bg)$size,
node.color = "red",
edge.size = E(net.bg)$weight,
edge.alpha = 0.5,
edge.color = "blue") +
theme(legend.text = element_text(size = 10)) +
geom_label(aes(label = nam),nudge_y = 0.05)
p
The above code produces something like this:
As can be seen, the labels are all nudged in the y direction. But I was hoping to make something like this (which I made in powerpoint):
Is it possible to do such a thing?
It is possible, though not particularly easy or portable. The object p is a ggplot object, so contains all the information required to build the plot in terms of co-ordinates, geoms, mapping, data, etc.
This means you can directly change the labels layer so that its x, y co-ordinates are a small multiple above their previous values. So you could do:
geoms <- sapply(p$layers, function(x) class(x$geom)[1])
segments <- p$layers[[which(geoms == "GeomSegment")]]
labels <- p$layers[[which(geoms == "GeomLabel")]]
segments$data <- segments$data - 0.5
p$data$x <- p$data$x - 0.5
p$data$y <- p$data$y - 0.5
labels$position$y <- 0
labels$data <- p$data
labels$data$x <- labels$data$x * 1.1
labels$data$y <- labels$data$y * 1.1
p$scales$scales <- lapply(p$scales$scales, function(x) {
if(class(x)[1] == "ScaleContinuousPosition") ScaleContinuousPosition else x })
p <- p + theme(axis.text = element_blank())
p

created a nested cdf that doesn't reach 1

Here is some workable example of data I wish to plot:
set.seed(123)
x <- rweibull(n = 2000, shape = 2, scale = 10)
x <- round(x, digits = 0)
x <- sort(x, decreasing = FALSE)
y <- c(rep(0.1, times = 500),rep(0.25, times = 500),rep(0.4, times = 500),rep(0.85, times = 500))
z <- rbinom(n=2000, size=1, prob=y)
df1 <- data.frame(x,z)
I want to plot the overal fequency of z across x.
unlike a typical cdf, the function should not reach 1.0, but instead
sum(df1$z)/length(df1$z)
a ymax of 0.36 (721/2000).
using ggplot2 we can create a cdf of x with the following command:
library(ggplot2)
ggplot(df1, aes(x)) + stat_ecdf()
But i want to extend this plot to show the cumulative percentage of z (as a function of 'x')
The end result should like like
EDIT
with some very poor data manipulation I am able to generate the something similiar to a cdf plot, but there must be a more beautiful and easy method using various packages and ggplot
mytable <- table(df1$x, df1$z)
mydf <- as.data.frame.matrix(mytable)
colnames(mydf) <- c("z_no", "z_yes")
mydf$A <- 1:length(mydf$z_no)
mydf$sum <- cumsum(mydf$z_yes)
mydf$dis <- mydf$sum/length(z)
plot(mydf$A, mydf$dis)
You can use the package dplyr to process the data as follows:
library(dplyr)
plot_data <- group_by(df1, x) %>%
summarise(z_num = sum(z)) %>%
mutate(cum_perc_z = cumsum(z_num)/nrow(df1))
This gives the same result as the data processing that you describe in your edit. Note, however, that I get sum(df1$z) = 796 and the maximal y value is thus 796/2000 = 0.398.
For the plot, you can use geom_step() to have a step function and add the horizontal line with geom_hline():
ggplot(plot_data, aes(x = x, y = cum_perc_z)) +
geom_step(colour = "red", size = 0.8) +
geom_hline(yintercept = max(plot_data$cum_perc_z))

inheritance of aesthetics in ggplot2 0.9.3 & the behavior of annotation_custom

Following up on a recent question of mine, this one is a bit different and illustrates the problem more fully using simpler examples. Below are two data sets and three functions. The first one draws some points and a circle as expected:
library("ggplot2")
library("grid")
td1 <- data.frame(x = rnorm(10), y = rnorm(10))
tf1 <- function(df) { # works as expected
p <- ggplot(aes(x = x, y = y), data = df)
p <- p + geom_point(color = "red")
p <- p + annotation_custom(circleGrob())
print(p)
}
tf1(td1)
This next one seems to ask for the exact sample plot but the code is slightly different. It does not give an error but does not draw the circle:
tf2 <- function(df) { # circle isn't draw, but no error either
p <- ggplot()
p <- p + geom_point(data = df, aes(x = x, y = y), color = "red")
p <- p + annotation_custom(circleGrob())
print(p)
}
tf2(td1)
Finally, this one involves a more complex aesthetic and gives an empty layer when you try to create the circle:
td3 <- data.frame(r = c(rnorm(5, 5, 1.5), rnorm(5, 8, 2)),
f1 = c(rep("L", 5), rep("H", 5)), f2 = rep(c("A", "B"), 5))
tf3 <- function(df) {
p <- ggplot()
p <- p + geom_point(data = df,
aes(x = f1, y = r, color = f2, group = f2))
# p <- p + annotation_custom(circleGrob()) # comment out and it works
print(p)
}
tf3(td3)
Now, I suspect the problem here is not the code but my failure to grasp the inner workings of ggplot2. I could sure use an explanation of why the circle is not drawn in the 2nd case and why the layer is empty in the third case. I looked at the code for annotation_custom and it has a hard-wired inherit.aes = TRUE which I think is the problem. I don't see why this function needs any aesthetic at all (see the docs on it). I did try several ways to override it and set inherit.aes = FALSE but I was unable to fully penetrate the namespace and make it stick. I tried to example the objects created by ggplot2 but these proto objects are nested very deeply and hard to decipher.
To answer this :
"I don't see why this function needs any aesthetic at all".
In fact annotation_custom need x and y aes to scale its grob, and to use after the native units.
Basically it did this :
x_rng <- range(df$x, na.rm = TRUE) ## ranges of x :aes x
y_rng <- range(df$y, na.rm = TRUE) ## ranges of y :aes y
vp <- viewport(x = mean(x_rng), y = mean(y_rng), ## create a viewport
width = diff(x_rng), height = diff(y_rng),
just = c("center","center"))
dd <- editGrob(grod =circleGrob(), vp = vp) ##plot the grob in this vp
To illustrate this I add a grob to a dummy plot used as a scale for my grob. The first is a big scale and the second is a small one.
base.big <- ggplot(aes(x = x1, y = y1), data = data.frame(x1=1:100,y1=1:100))
base.small <- ggplot(aes(x = x1, y = y1), data = data.frame(x1=1:20,y1=1:1))
I define my grob, see I use the native scales for xmin,xmax,ymin,ymax
annot <- annotation_custom(grob = circleGrob(), xmin = 0,
xmax = 20,
ymin = 0,
ymax = 1)
Now see the scales difference(small point / big circle) between (base.big +annot) and (base.small + annot).
library(gridExtra)
grid.arrange(base.big+annot,
base.small+annot)

PCA FactoMineR plot data

I'm running an R script generating plots of the PCA analysis using FactorMineR.
I'd like to output the coordinates for the generated PCA plots but I'm having trouble finding the right coordinates. I found results1$ind$coord and results1$var$coord but neither look like the default plot.
I found
http://www.statistik.tuwien.ac.at/public/filz/students/seminar/ws1011/hoffmann_ausarbeitung.pdf
and
http://factominer.free.fr/classical-methods/principal-components-analysis.html
but neither describe the contents of the variable created by the PCA
library(FactoMineR)
data1 <- read.table(file=args[1], sep='\t', header=T, row.names=1)
result1 <- PCA(data1,ncp = 4, graph=TRUE) # graphs generated automatically
plot(result1)
I found that $ind$coord[,1] and $ind$coord[,2] are the first two pca coords in the PCA object. Here's a worked example that includes a few other things you might want to do with the PCA output...
# Plotting the output of FactoMineR's PCA using ggplot2
#
# load libraries
library(FactoMineR)
library(ggplot2)
library(scales)
library(grid)
library(plyr)
library(gridExtra)
#
# start with a clean slate
rm(list=ls(all=TRUE))
#
# load example data
data(decathlon)
#
# compute PCA
res.pca <- PCA(decathlon, quanti.sup = 11:12, quali.sup=13, graph = FALSE)
#
# extract some parts for plotting
PC1 <- res.pca$ind$coord[,1]
PC2 <- res.pca$ind$coord[,2]
labs <- rownames(res.pca$ind$coord)
PCs <- data.frame(cbind(PC1,PC2))
rownames(PCs) <- labs
#
# Just showing the individual samples...
ggplot(PCs, aes(PC1,PC2, label=rownames(PCs))) +
geom_text()
# Now get supplementary categorical variables
cPC1 <- res.pca$quali.sup$coor[,1]
cPC2 <- res.pca$quali.sup$coor[,2]
clabs <- rownames(res.pca$quali.sup$coor)
cPCs <- data.frame(cbind(cPC1,cPC2))
rownames(cPCs) <- clabs
colnames(cPCs) <- colnames(PCs)
#
# Put samples and categorical variables (ie. grouping
# of samples) all together
p <- ggplot() + theme(aspect.ratio=1) + theme_bw(base_size = 20)
# no data so there's nothing to plot...
# add on data
p <- p + geom_text(data=PCs, aes(x=PC1,y=PC2,label=rownames(PCs)), size=4)
p <- p + geom_text(data=cPCs, aes(x=cPC1,y=cPC2,label=rownames(cPCs)),size=10)
p # show plot with both layers
# Now extract the variables
#
vPC1 <- res.pca$var$coord[,1]
vPC2 <- res.pca$var$coord[,2]
vlabs <- rownames(res.pca$var$coord)
vPCs <- data.frame(cbind(vPC1,vPC2))
rownames(vPCs) <- vlabs
colnames(vPCs) <- colnames(PCs)
#
# and plot them
#
pv <- ggplot() + theme(aspect.ratio=1) + theme_bw(base_size = 20)
# no data so there's nothing to plot
# put a faint circle there, as is customary
angle <- seq(-pi, pi, length = 50)
df <- data.frame(x = sin(angle), y = cos(angle))
pv <- pv + geom_path(aes(x, y), data = df, colour="grey70")
#
# add on arrows and variable labels
pv <- pv + geom_text(data=vPCs, aes(x=vPC1,y=vPC2,label=rownames(vPCs)), size=4) + xlab("PC1") + ylab("PC2")
pv <- pv + geom_segment(data=vPCs, aes(x = 0, y = 0, xend = vPC1*0.9, yend = vPC2*0.9), arrow = arrow(length = unit(1/2, 'picas')), color = "grey30")
pv # show plot
# Now put them side by side in a single image
#
grid.arrange(p,pv,nrow=1)
#
# Now they can be saved or exported...
Adding something extra to Ben's answer. You'll note in the first chart in Ben's response that the labels overlap somewhat. The pointLabel() function in the maptools package attempts to find locations for the labels without overlap. It's not perfect, but you can adjust the positions in the new dataframe (see below) to fine tune if you want. (Also, when you load maptools you get a note about gpclibPermit(). You can ignore it if you're concerned about the restricted licence). The first part of the script below is Ben's script.
# load libraries
library(FactoMineR)
library(ggplot2)
library(scales)
library(grid)
library(plyr)
library(gridExtra)
#
# start with a clean slate
# rm(list=ls(all=TRUE))
#
# load example data
data(decathlon)
#
# compute PCA
res.pca <- PCA(decathlon, quanti.sup = 11:12, quali.sup=13, graph = FALSE)
#
# extract some parts for plotting
PC1 <- res.pca$ind$coord[,1]
PC2 <- res.pca$ind$coord[,2]
labs <- rownames(res.pca$ind$coord)
PCs <- data.frame(cbind(PC1,PC2))
rownames(PCs) <- labs
#
# Now, the code to produce Ben's first chart but with less overlap of the labels.
library(maptools)
PCs$label=rownames(PCs)
# Base plot first for pointLabels() to get locations
plot(PCs$PC1, PCs$PC2, pch = 20, col = "red")
new = pointLabel(PCs$PC1, PCs$PC2, PCs$label, cex = .7)
new = as.data.frame(new)
new$label = PCs$label
# Then plot using ggplot2
(p = ggplot(data = PCs) +
geom_hline(yintercept = 0, linetype = 3, colour = "grey20") +
geom_vline(xintercept = 0, linetype = 3, colour = "grey20") +
geom_point(aes(PC1, PC2), shape = 20, col = "red") +
theme_bw())
(p = p + geom_text(data = new, aes(x, y, label = label), size = 3))
The result is:
An alternative is to use the biplot function from CoreR or biplot.psych from the psych package. This will put the components and the data onto the same figure.
For the decathlon data set, use principal and biplot from the psych package:
library(FactoMineR) #needed to get the example data
library(psych) #needed for principal
data(decathlon) #the data set
pc2 <- principal(decathlon[1:10],2) #just the first 10 columns
biplot(pc2,labels = rownames(decathlon),cex=.5, main="Biplot of Decathlon results")
#this is a call to biplot.psych which in turn calls biplot.
#adjust the cex parameter to change the type size of the labels.
This looks like:
!a biplot http://personality-project.org/r/images/olympic.biplot.pdf
Bill

Resources