Plot unconnected graph in igraph - r

I have an unconnected graph that I plot with fruchterman-reingold layout in igraph
require(igraph)
er_graph <- erdos.renyi.game(100, 5/20)+erdos.renyi.game(100, 5/20)
coords<-layout.fruchterman.reingold(er_graph)
plot(er_graph,layout=coords, vertex.label=NA)
Plot Example :
The result was two distant clusters.
I wish to decrease the white area in my plot.
Is there a way to scale the coordinate in order to decrease the space between the clusters?

There may be an easy way to do this in one of the layout functions, but you can also directly change the node coordinates after creating the layout. If you look at coords, you can see it's just a matrix of node coordinates. You can use the cluster labels to move the two node clusters closer together programmatically:
require(igraph)
require(dplyr)
er_graph <- erdos.renyi.game(100, 5/20)+erdos.renyi.game(100, 5/20)
# Make layout reproducible
set.seed(40)
coords <- layout.fruchterman.reingold(er_graph)
# Original graph
plot(er_graph,layout=coords, vertex.label=NA)
Move clusters closer together: First, we add the cluster labels to the coordinates and set a parameter f for what fraction of the distance between clusters we want eliminate. Then we subtract from each node f times the difference between the mean coordinates for that cluster and the mean coordinates over both clusters.
# Add cluster labels to coords
coords = data.frame(coords, clust=clusters(er_graph)$membership)
# Move closer by a fraction "f" of mean distance between clusters
f = 0.6
# Shift each node closer to the overall center of mass of the node
coords = coords %>%
mutate(X1 = ifelse(clust==1, X1 - f*(mean(X1[clust==1]) - mean(X1)), X1 - f*(mean(X1[clust==2]) - mean(X1))),
X2 = ifelse(clust==1, X2 - f*(mean(X2[clust==1]) - mean(X2)), X2 - f*(mean(X2[clust==2]) - mean(X2))))
# Convert coords back to original matrix form
coords = as.matrix(coords[,1:2])
# Re-plot graph
plot(er_graph,layout=coords, vertex.label=NA)

Related

how can I extract a sublayout to match a subgraph in R

I'm using igraph in R, trying to extract a subgraph that comprises only the largest cliques in a graph. I want to plot the graph and the subgraph (1) without moving the vertices and (2) while maintaining numbering. I can't seem to make it work. I tried storing the layout coordinates directly in the vertices but plot seems to rescale things. Setting rescale=FALSE didn't work either. Here's what I have, first plotting the random graph, then highlighting the largest cliques, and finally displaying only the largest cliques:
# plot random graph
g <- sample_gnp(n=30, p=.1)
l=layout_with_fr(g)
V(g)$x <- l[,1]
V(g)$y <- l[,2]
V(g)$id <- 1:vcount(g)
plot(g,vertex.size=6,vertex.label.dist=1,vertex.label=V(g)$id,main="a random network",sub="where are the cliques?")
# highlight largest cliques
lc=unlist(largest_cliques(g))
vcol <- rep("grey80", vcount(g))
vcol[unlist(lc)] <- "gold"
plot(g, vertex.size=6,vertex.color=vcol,vertex.label.dist=1,vertex.label=V(g)$id,main="here they are!",
layout=l)
# plot only the largest cliques, without changing position or vertex numbers
sg = induced_subgraph(g,lc)
sl = cbind(V(sg)$x,V(sg)$y)
plot(sg,vertex.size=6,vertex.label.dist=1,vertex.label=V(sg)$id,vertex.color="gold",
layout=sl)
First of all, you did not set the random seed, so each time you run this code you will get a different graph and layout. I am setting the seed for reproducibility.
# plot random graph
set.seed(2021)
g <- sample_gnp(n=30, p=.1)
l=layout_with_fr(g)
V(g)$x <- l[,1]
V(g)$y <- l[,2]
V(g)$id <- 1:vcount(g)
# highlight largest cliques
lc=unlist(largest_cliques(g))
vcol <- rep("grey80", vcount(g))
vcol[unlist(lc)] <- "gold"
There are other ways to do this, but I will do it using rescale=F.
When you do that, you have to adjust xlim and ylim to the ranges of the layout. Also, it changes the scale, so you need to change the vertex size.
Putting all of that together, I get:
Rx =range(l[,1])
Ry =range(l[,2])
par(mfrow=c(1,2))
plot(g, vertex.size=45,vertex.color=vcol,vertex.label.dist=1,
vertex.label=V(g)$id,main="here they are!", layout=l,
rescale=F, xlim=Rx, ylim=Ry)
# plot only the largest cliques, without changing position or vertex numbers
sg = induced_subgraph(g,lc)
sl = cbind(V(sg)$x,V(sg)$y)
plot(sg,vertex.size=45, vertex.label.dist=1,vertex.label=V(sg)$id,vertex.color="gold",
layout=sl, rescale=F, xlim=Rx, ylim=Ry, main="Unmoved")
you can use set-vertex_attr to set the name attribute for graph g, and then subset the layout lc like below
# plot random graph
set.seed(2021)
g <- sample_gnp(n = 30, p = .1)
l <- layout_with_fr(g)
# highlight largest cliques
lc <- unlist(largest_cliques(g))
g %>%
set_vertex_attr(name = "name", value = seq(vcount(.))) %>%
induced.subgraph(lc) %>%
plot(layout = l[as.integer(names(V(.))), ], vertex.color = "gold")
and you will get

Plot two igraph networks using the same coordinates and same placement in the plot frame

I am trying to plot a network that changes in time. The network starts with a certain number of nodes and edges and each time step some of the nodes and edges are removed.
I want to be able to plot the network so that the nodes are in the same place in each. However when I try this. sometimes the nodes shift position in the plot frame even if the relation to each other is the same.
I am making the network change into a gif so even small changes are annoying. I think the change may occur when a large fraction of the nodes are removed but I am not sure.
The code below illustrates this using an ER graph.
library(igraph); library(dplyr)
#generate random graph
set.seed(500)
RandomGraph <- sample_gnm(1000, 2500)
#name nodes
V(RandomGraph)$name <- paste0("Node", 1:1000)
#Get the coordinates of the Nodes
Coords <- layout_with_fr(RandomGraph) %>%
as_tibble %>%
bind_cols(data_frame(names = names(V(RandomGraph))))
#Delete random vertices
deletevertex <-sample( V(RandomGraph)$name, 400)
RandomGraph2 <-delete.vertices(RandomGraph, deletevertex)
#get the coordinates of the remaining Nodes
NetCoords <- data_frame(names = names(V(RandomGraph2))) %>%
left_join(Coords, by= "names")
#plot both graphs
RandomGraph%>%
plot(.,vertex.size=.8, edge.arrow.size=.4, vertex.label = NA, layout = as.matrix(Coords[,1:2]))
RandomGraph2%>%
plot(.,vertex.size=.8, edge.arrow.size=.4, vertex.label = NA, layout = as.matrix(NetCoords[,2:3]))
#They nodes have the same relationship to each other but are not laid out in the same position in the frame
As you can see the plots have placed nodes in the same place relative to each other but not relative to the frame.
How can I have the plot position fixed.
plot.igraph rescales each axis by default (from -1 to +1 on both x and y).
You just need to turn that off: rescale = F and then explicitly set appropriate xlim and ylim values.
For your example code..
RandomGraph%>%
plot(.,vertex.size=.8, edge.arrow.size=.4, vertex.label = NA, layout = as.matrix(Coords[,1:2]),rescale=F,xlim=c(-25,30),ylim=c(-20,35))
RandomGraph2%>%
plot(.,vertex.size=.8, edge.arrow.size=.4, vertex.label = NA, layout = as.matrix(NetCoords[,2:3]),rescale=F,xlim=c(-25,30),ylim=c(-20,35))
The problem is that
identical(range(Coords[1]), range(NetCoords[2]))
# [1] FALSE
Since igraph normalizes the coordinates on a range between -1 and 1 before plotting, this leads to slightly different coordinates for NetCoords compared to Coords. I'd just calculate the normalized coordinates for all nodes beforehand:
coords_rescaled <- sapply(Coords[-3], function(x) -1+((x-min(x))*2)/diff(range(x)))
rownames(coords_rescaled) <- Coords$names
And then assign the normalized coordinates (or the required subset) and set rescale=FALSE (as #jul) already suggested:
par(mfrow=c(1,2), mar=c(1,.5,1,.5))
RandomGraph%>%
plot(.,edge.arrow.size=.4, layout = coords_rescaled, rescale=F);box()
RandomGraph2%>%
plot(.,edge.arrow.size=.4, layout = coords_rescaled[NetCoords$names, ], rescale=F);box()

R Surface Plot from List of X,Y,Z points

I am trying to make a surface plot for data that is in a very long list of x,y,z points. To do this, I am dividing the data into a grid of 10k squares and finding the max value of z within each square. From my understanding, each z value should be stored in a matrix where each element of the matrix corresponds to a square on the grid. Is there an easier way to do this than the code below? That last line is already pretty long and it is only one square.
x<-(sequence(101)-1)*max(eff$CFaR)/100
y<-(sequence(101)-1)*max(eff$EaR)/100
effmap<-matrix(ncol=length(x)-1, nrow=length(y)-1)
someMatrix <- max(eff$Cost[which(eff$EaR[which(eff$CFaR >= x[50] & eff$CFaR <x[51], arr.ind=TRUE)]>=y[20] & eff$EaR[which(eff$CFaR >= x[50] & eff$CFaR <x[51], arr.ind=TRUE)]< y[91])])
So this is my interpretation of what you are trying to accomplish...
df <- read.csv("effSample.csv") # downloaded from your link
df <- df[c("CFaR","EaR","Cost")] # remove unnecessary columns
df$x <- cut(df$CFaR,breaks=100,labels=FALSE) # establish bins: CFaR
df$y <- cut(df$EaR,breaks=100,labels=FALSE) # establish bins: EaR
df.max <- expand.grid(x=1:100,y=1:100) # template; 10,000 grid cells
# maximum cost in each grid cell - NOTE: most of the cells are *empty*
df.max <- merge(df.max,aggregate(Cost~x+y,df,max),all.x=TRUE)
z <- matrix(df.max$Cost,nr=100,nc=100) # Cost vector -> matrix
# colors based on z-value
palette <- rev(rainbow(20)) # palette of 20 colors
zlim <- range(z[!is.na(z)])
colors <- palette[19*(z-zlim[1])/diff(zlim) + 1]
# create the plot
library(rgl)
open3d(scale=c(1,1,10)) # CFaR and EaR range ~ 10 X Cost range
x.values <- min(df$CFaR)+(0:99)*diff(range(df$CFaR))/100
y.values <- min(df$EaR)+(0:99)*diff(range(df$EaR))/100
surface3d(x.values,y.values,z,col=colors)
axes3d()
title3d(xlab="CFaR",ylab="EaR",zlab="Cost")
The code above generates a rotatable 3D plot, so the image is just a screen shot. Notice how there are lots of "holes". This is (partially) because you provided only part of your data. However, it is important to realize that just because you imagine 10,000 grid cells (e.g., a 100 X 100 grid), does not mean that there will be data in every cell.

How to estimate the area of 95% contour of a kde object from ks R package

I'm trying to estimate the area of the 95% contour of a kde object from the ks package in R.
If I use the example data set from the ks package, I would create the kernel object as follow:
library(ks)
data(unicef)
H.scv <- Hscv(x=unicef)
fhat <- kde(x=unicef, H=H.scv)
I can easily plot the 25, 50, 75% contour using the plot function:
plot(fhat)
But I want to estimate the area within the contour.
I saw a similar question here, but the answer proposed does not solve the problem.
In my real application, my dataset is a time series of coordinates of an animal and I want to measure the home range size of this animal using a bivariate normal kernel. I'm using ks package because it allows to estimate the bandwith of a kernel distribution with methods such as plug-in and smoothed cross-validation.
Any help would be really appreciated!
Here are two ways to do it. They are both fairly complex conceptually, but actually very simple in code.
fhat <- kde(x=unicef, H=H.scv,compute.cont=TRUE)
contour.95 <- with(fhat,contourLines(x=eval.points[[1]],y=eval.points[[2]],
z=estimate,levels=cont["95%"])[[1]])
library(pracma)
with(contour.95,polyarea(x,y))
# [1] -113.677
library(sp)
library(rgeos)
poly <- with(contour.95,data.frame(x,y))
poly <- rbind(poly,poly[1,]) # polygon needs to be closed...
spPoly <- SpatialPolygons(list(Polygons(list(Polygon(poly)),ID=1)))
gArea(spPoly)
# [1] 113.677
Explanation
First, the kde(...) function returns a kde object, which is a list with 9 elements. You can read about this in the documentation, or you can type str(fhat) at the command line, or, if you're using RStudio (highly recommended), you can see this by expanding the fhat object in the Environment tab.
One of the elements is $eval.points, the points at which the kernel density estimates are evaluated. The default is to evaluate at 151 equally spaced points. $eval.points is itself a list of, in your case 2 vectors. So, fhat$eval.points[[1]] represents the points along "Under-5" and fhat$eval.points[[2]] represents the points along "Ave life exp".
Another element is $estimate, which has the z-values for the kernel density, evaluated at every combination of x and y. So $estimate is a 151 X 151 matrix.
If you call kde(...) with compute.cont=TRUE, you get an additional element in the result: $cont, which contains the z-value in $estimate corresponding to every percentile from 1% to 99%.
So, you need to extract the x- and y-values corresponding to the 95% contour, and use that to calculate the area. You would do that as follows:
fhat <- kde(x=unicef, H=H.scv,compute.cont=TRUE)
contour.95 <- with(fhat,contourLines(x=eval.points[[1]],y=eval.points[[2]],
z=estimate,levels=cont["95%"])[[1]])
Now, contour.95 has the x- and y-values corresponding to the 95% contour of fhat. There are (at least) two ways to get the area. One uses the pracma package and calculates
it directly.
library(pracma)
with(contour.95,polyarea(x,y))
# [1] -113.677
The reason for the negative value has to do with the ordering of x and y: polyarea(...) is interpreting the polygon as a "hole", so it has negative area.
An alternative uses the area calculation routines in rgeos (a GIS package). Unfortunately, this requires you to first turn your coordinates into a "SpatialPolygon" object, which is a bit of a bear. Nevertheless, it is also straightforward.
library(sp)
library(rgeos)
poly <- with(contour.95,data.frame(x,y))
poly <- rbind(poly,poly[1,]) # polygon needs to be closed...
spPoly <- SpatialPolygons(list(Polygons(list(Polygon(poly)),ID=1)))
gArea(spPoly)
# [1] 113.677
Another method would be to use the contourSizes() function within the kde package. I've also been interested in using this package to compare both 2D and 3D space use in ecology, but I wasn't sure how to extract the 2D density estimates. I tested this method by estimating the area of an "animal" which was limited to the area of a circle with a known radius. Below is the code:
set.seed(123)
require(GEOmap)
require(kde)
# need this library for the inpoly function
# Create a data frame centered at coordinates 0,0
data = data.frame(x=0,y=0)
# Create a vector of radians from 0 to 2*pi for making a circle to
# test the area
circle = seq(0,2*pi,length=100)
# Select a radius for your circle
radius = 10
# Create a buffer for when you simulate points (this will be more clear below)
buffer = radius+2
# Simulate x and y coordinates from uniform distribution and combine
# values into a dataframe
createPointsX = runif(1000,min = data$x-buffer, max = data$x+buffer)
createPointsY = runif(1000,min = data$y-buffer, max = data$y+buffer)
data1 = data.frame(x=createPointsX,y=createPointsY)
# Plot the raw data
plot(data1$x,data1$y)
# Calculate the coordinates used to create a cirle with center 0,0 and
# with radius specified above
coords = as.data.frame(t(rbind(data$x+sin(circle)*radius,
data$y+cos(circle)*radius)))
names(coords) = c("x","y")
# Add circle to plot with red line
lines(coords$x,coords$y,col=2,lwd=2)
# Use the inpoly function to calculate whether points lie within
# the circle or not.
inp = inpoly(data1$x, data1$y, coords)
data1 = data1[inp == 1,]
# Finally add points that lie with the circle as blue filled dots
points(data1$x,data1$y,pch=19,col="blue")
# Radius of the circle (known area)
pi * radius^2
#[1] 314.1593
# Sub in your own data here to calculate 95% homerange or 50% core area usage
H.pi = Hpi(data1,binned=T)
fhat = kde(data1,H=H.pi)
ct1 = contourSizes(fhat, cont = 95, approx=TRUE)
# Compare the known area of the circle to the 95% contour size
ct1
# 5%
# 291.466
I've also tried creating 2 un-connected circles and testing the contourSizes() function and it seems to work really well on disjointed distributions.

Finding euclidean distance in R{spatstat} between points, confined by an irregular polygon window

I'm trying to find the euclidean distance between two points, confined by an irregular polygon. (ie. the distance would have to be calculated as a route through the window given)
Here is an reproducible example:
library(spatstat)
#Simple example of a polygon and points.
ex.poly <- data.frame(x=c(0,5,5,2.5,0), y=c(0,0,5,2.5,5))
points <- data.frame(x=c(0.5, 2.5, 4.5), y=c(4,1,4))
bound <- owin(poly=data.frame(x=ex.poly$x, y=ex.poly$y))
test.ppp <- ppp(x=points$x, y=points$y, window=bound)
pairdist.ppp(test.ppp)#distance between every point
#The distance result from this function between point 1 and point 3, is given as 4.0
However we know just from plotting the points
plot(test.ppp)
that the distance when the route is confined to the polygon should be greater (in this case, 5.00).
Is there another function that I am not aware of in {spatstat} that would do this? Or does anybody have any other suggestions for another package that could do this?
I'm trying to find the distance between two points in a water body, so the irregular polygon in my actual data is more complex.
Any help is greatly appreciated!
Cheers
OK, here's the gdistance-based approach I mentioned in comments yesterday. It's not perfect, since the segments of the paths it computes are all constrained to occur in one of 16 directions on a chessboard (king's moves plus knight's moves). That said, it gets within 2% of the correct values (always slightly overestimating) for each of the three pairwise distances in your example.
library(maptools) ## To convert spatstat objects to sp objects
library(gdistance) ## Loads raster and provides cost-surface functions
## Convert *.ppp points to SpatialPoints object
Pts <- as(test.ppp, "SpatialPoints")
## Convert the lake's boundary to a raster, with values of 1 for
## cells within the lake and values of 0 for cells on land
Poly <- as(bound, "SpatialPolygons") ## 1st to SpatialPolygons-object
R <- raster(extent(Poly), nrow=100, ncol=100) ## 2nd to RasterLayer ...
RR <- rasterize(Poly, R) ## ...
RR[is.na(RR)]<-0 ## Set cells on land to "0"
## gdistance requires that you 1st prepare a sparse "transition matrix"
## whose values give the "conductance" of movement between pairs of
## adjacent and next-to-adjacent cells (when using directions=16)
tr1 <- transition(RR, transitionFunction=mean, directions=16)
tr1 <- geoCorrection(tr1,type="c")
## Compute a matrix of pairwise distances between points
## (These should be 5.00 and 3.605; all are within 2% of actual value).
costDistance(tr1, Pts)
## 1 2
## 2 3.650282
## 3 5.005259 3.650282
## View the selected paths
plot(RR)
plot(Pts, pch=16, col="gold", cex=1.5, add=TRUE)
SL12 <- shortestPath(tr1, Pts[1,], Pts[2,], output="SpatialLines")
SL13 <- shortestPath(tr1, Pts[1,], Pts[3,], output="SpatialLines")
SL23 <- shortestPath(tr1, Pts[2,], Pts[3,], output="SpatialLines")
lapply(list(SL12, SL13, SL23), function(X) plot(X, col="red", add=TRUE, lwd=2))

Resources