Strahler stream order using igraph or sfnetwork in R - r

I cannot fathom how to derive strahler order in R. Here's an example in postgres and neo4j. An attempt in R
There are three rules (from the GRASS 7.8 Manual):
if the node has no children, it's Strahler order is 1.
if the node has one and only one tributary with Strahler greatest order i, and all other tributaries have order less than i, then the order remains
i.
if the node has two or more tributaries with greatest order i, then the Strahler order of the node is i + 1.
Here's what I would expect
library(sfnetworks)
library(igraph)
library(sf)
library(dplyr)
library(tidygraph)
library(RColorBrewer)
# Create an example network.
n01 = st_sfc(st_point(c(0, 0)))
n02 = st_sfc(st_point(c(1, 2)))
n03 = st_sfc(st_point(c(1, 3)))
n04 = st_sfc(st_point(c(1, 4)))
n05 = st_sfc(st_point(c(2, 1)))
n06 = st_sfc(st_point(c(2, 3)))
n07 = st_sfc(st_point(c(2, 4)))
n08 = st_sfc(st_point(c(3, 2)))
n09 = st_sfc(st_point(c(3, 3)))
n10 = st_sfc(st_point(c(3, 4)))
n11 = st_sfc(st_point(c(4, 2)))
n12 = st_sfc(st_point(c(4, 4)))
from = c(1, 2, 2, 3, 3, 5, 5, 8, 8, 9, 9)
to = c(5, 3, 6, 4, 7, 2, 8, 9, 11, 10, 12)
nodes = st_as_sf(c(n01, n02, n03, n04, n05, n06, n07, n08, n09, n10, n11, n12))
edges = data.frame(from = from, to = to)
G = sfnetwork(nodes, edges) %>%
convert(to_spatial_explicit, .clean = TRUE)
nodes = st_as_sf(G, "nodes")
edges = st_as_sf(G, "edges")
# expected order
edges$expected_order = c(4,2,1,1,1,3,3,2,1,1,1)
cols = brewer.pal(4, "Blues")
pal = colorRampPalette(cols)
plot(st_geometry(edges))
plot(edges["expected_order"],
lwd = 4, ,
add = TRUE,
col = pal(4)[edges$expected_order])
legend(x = "topright",
legend = c("4","3","2","1"),
lwd = 4,
col = pal(4)[edges$expected_order],
title = "strahler order")
plot(nodes, pch = 20, add = TRUE)
Here's what I tried curtesy of jsta/streamnet/stream_order.R, which I can't load due to missing packages
stream_order_igraph <- function(tree){
tree <- as.igraph(tree)
leaf_nodes <- which(degree(tree,
v = igraph::V(tree),
mode = "in") == 0,
useNames = TRUE)
base_order <- 1
edgelist <- data.frame(as_edgelist(tree))
edgelist$order <- NA
names(edgelist)[c(1,2)] <- c("from", "to")
edgelist$order[edgelist$from %in% leaf_nodes] <- base_order
tree <- igraph::delete.vertices(tree, leaf_nodes)
while(igraph::vcount(tree) >= 1){
base_order <- max(edgelist$order, na.rm = TRUE) + 1
leaf_nodes <- which(degree(tree, v = igraph::V(tree),
mode = "in") == 0,
useNames = TRUE)
raised_nodes <- sapply(leaf_nodes,
function(x) all(edgelist$order[edgelist$to == x] == base_order - 1))
raised_nodes <- which(raised_nodes)
flat_nodes <- leaf_nodes[!(leaf_nodes %in% raised_nodes)]
edgelist$order[edgelist$from %in% raised_nodes] <- base_order
edgelist$order[edgelist$from %in% flat_nodes] <- base_order - 1
tree <- igraph::delete.vertices(tree, leaf_nodes)
}
edgelist$order
}
stream_order_igraph(G)
> stream_order_igraph(G)
[1] 4 3 3 3 3 2 2 NA NA NA NA

I have found a solution that converts class igraph to class phylo then uses phytools::StrahlerNumber. I had to modify phytools::igraph_to_phylo and reverse the order of my edges to get it to work.
library(phytools)
library(igraph)
library(sfnetworks)
library(sf)
library(dplyr)
library(RColorBrewer)
# reverse the edge direction
transposeGraph <- function(g) {
g %>% get.edgelist %>%
{cbind(.[, 2], .[, 1])} %>%
graph.edgelist
}
# convert igraph class to phylo class
# and calculate strahler number
igraphStrahler <- function(g){
if (!igraph::is_simple(g) |
!igraph::is_connected(g) |
!igraph::is_dag(g)) {
stop("Taxon graph is not a simple, connected, directed acylic graph")
}
root = which(sapply(V(g),
function(x) length(neighbors(g, x, mode = "in"))) == 0)
leaves = which(sapply(V(g),
function(x) length(neighbors(g, x, mode = "out"))) == 0)
g <- g %>%
set_vertex_attr("leaf", index = leaves, TRUE) %>%
set_vertex_attr("root", index = root, TRUE)
traverse <- igraph::dfs(g, root)
is_leaf <- igraph::vertex_attr(g, "leaf", traverse$order)
is_leaf[which(is.na(is_leaf))] <- FALSE
n_leaf <- sum(is_leaf)
n_node <- sum(!is_leaf)
node_id <- ifelse(is_leaf, cumsum(is_leaf), cumsum(!is_leaf) + n_leaf)
# Store the node ids on the graph
g <- igraph::set_vertex_attr(g, "node_id", index = traverse$order,
value = node_id)
# Extract the edge and vertex data
vertex_data <- igraph::as_data_frame(g, "vertices") %>%
mutate(name = row_number())
edge_data <- igraph::as_data_frame(g, "edges")
edge_data$geom <- NULL
# Substitute the node id numbers into the edge list
edge_data <- unlist(edge_data)
edge_data <- vertex_data$node_id[match(edge_data, vertex_data$name)]
edge_data <- matrix(edge_data, ncol = 2)
# lookup the tip and node labels
tip_labels <- 1:n_leaf
tip_labels <- vertex_data$name[match(tip_labels, vertex_data$node_id)]
node_labels <- (n_leaf + 1):(n_node + n_leaf)
node_labels <- vertex_data$name[match(node_labels, vertex_data$node_id)]
# Build the phylogeny
phy <- structure(list(edge = edge_data,
edge.length = rep(1, nrow(edge_data)),
tip.labels = tip_labels,
node.labels = node_labels,
Nnode = n_node),
class = "phylo")
stra <- as.data.frame(strahlerNumber(phy)) %>%
rename(strahler_order = `strahlerNumber(phy)`) %>%
mutate(node_id = row_number()) %>%
left_join(vertex_data, by = "node_id") %>%
rename(to = name)
return(stra)
}
ln <- st_read("streams.gpkg") %>%
st_cast("LINESTRING")
net <- as_sfnetwork(ln)
g <- net %>%
transposeGraph()
stra <- igraphStrahler(g)
edges = st_as_sf(net, "edges") %>%
left_join(stra, by = c("from" = "to"))
cols = brewer.pal(3, "Blues")
pal = colorRampPalette(cols)
plot(st_geometry(edges))
plot(edges["strahler_order"],
lwd = 3, ,
add = TRUE,
col = pal(3)[edges$strahler_order])
legend(x = "topright",
legend = c("1","2","3"),
lwd = 3,
col = cols,
title = "Strahler order")

Related

How to plot the igraph subgraphs with saving the nodes' positions and ids?

I have a igraph G, I need to sample two overlapping subgraphs G1, G2 and to plot them on the graph with the same layout.
My attempt is below:
library(igraph)
set.seed(1)
n <- 10
A <- matrix(sample(0:1, n * n, rep=TRUE), n, n)
diag(A) = 0
g <- graph_from_adjacency_matrix(A)
V(g)$names <- c(1:n)
id1 = sample(V(g), size = n %/% 2, replace = FALSE)
id2 = sample(V(g), size = n %/% 2, replace = FALSE)
g1 <- induced_subgraph(g, vids = id1)
g2 <- induced_subgraph(g, vids = id2)
V(g1)$names <- c(id1)
V(g2)$names <- c(id2)
#V(g)[id1]$color <- "red"
#V(g)[id2]$color <- "green"
par(mfrow=c(1,3))
layout <- layout.fruchterman.reingold(g)
plot(g, layout=layout, main="G")
plot(g1, layout = layout[-c(setdiff(1:n, id1)),], vertex.label=V(g)[id1], main="G1")
plot(g2, layout = layout[-c(setdiff(1:n, id2)),], vertex.label=V(g)[id2], main="G2")
My problem with ids and labels.
Question. How to plot the igraph subgraphs with saving the nodes' positions and ids?
Try the code below
library(igraph)
set.seed(1)
n <- 10
A <- matrix(sample(0:1, n * n, rep = TRUE), n, n)
diag(A) <- 0
g <- graph_from_adjacency_matrix(A)
id1 <- sort(as.integer(sample(V(g), size = n %/% 2, replace = FALSE)))
id2 <- sort(as.integer(sample(V(g), size = n %/% 2, replace = FALSE)))
g1 <- induced_subgraph(g, vids = id1)
g2 <- induced_subgraph(g, vids = id2)
par(mfrow = c(1,3))
layout <- layout.fruchterman.reingold(g)
layout2 <- layout[id2, ]
plot(g, layout = layout, main = "G")
plot(g1, layout = layout[id1, ], main = "G1")
plot(g2, layout = layout[id2, ], main = "G2")

How to compute nearest distance between points?

This is a tmp set of points with (x, y) coordinates and 0 or 1 categories.
tmp <- structure(list(cx = c(146.60916, 140.31737, 145.92917, 167.57799,
166.77618, 137.64381, 172.12157, 175.32881, 175.06154, 135.50566,
177.46696, 148.06731), cy = c(186.29814, 180.55231, 210.6084,
210.34111, 185.48505, 218.89375, 219.69554, 180.67421, 188.15775,
209.27205, 209.27203, 178.00151), category = c(1, 0, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0)), class = "data.frame", row.names = c(NA,
-12L))
I need to find the minimum spanning tree for category = 1 points, then to join (add edge) each point with category = 0 to its nearest category = 1 point.
The minimum spanning tree is built on points with the category = 1.
ones <- tmp[tmp$category == 1,]
n <- dim(ones)[1]
d <- matrix(0, n, n)
d <- as.matrix(dist(cbind(ones$cx, ones$cy)))
g1 <- graph.adjacency(d, weighted=TRUE, mode="undirected")
V(g1)$name <- tmp[tmp$category == 1,]$Name
mylayout = as.matrix(cbind(ones$cx, -ones$cy))
mst <- minimum.spanning.tree(g1) # Find a minimum spanning tree
plot(mst, layout=mylayout,
vertex.size = 10,
vertex.label = V(g1)$name,
vertex.label.cex =.75,
edge.label.cex = .7,
)
Expected result is in center of figure.
My current attempt is:
n <- dim(tmp)[1]
d <- matrix(0, n, n)
d <- as.matrix(dist(cbind(tmp$cx, tmp$cy)))
d[tmp$category %*% t(tmp$category) == 1] = Inf
d[!sweep(d, 2, apply(d, 2, min), `==`)] <- 0
g2 <- graph.adjacency(d, weighted=TRUE, mode="undirected")
mylayout = as.matrix(cbind(tmp$cx, -tmp$cy))
V(g2)$name <- tmp$Name
plot(g2, layout=mylayout,
vertex.size = 10,
vertex.label = V(g2)$name,
vertex.label.cex =.75,
edge.label = round(E(g2)$weight, 3),
edge.label.cex = .7,
)
One can see that I have found the minimum dist and add one edge only.
Question. How to define condition for all possible points?
You can try the code below
# two categories of point data frames
pts1 <- subset(tmp, category == 1)
pts0 <- subset(tmp, category == 0)
# generate minimum spanning tree `gmst`
gmst <- mst(graph_from_adjacency_matrix(as.matrix(dist(pts1[1:2])), mode = "undirected", weighted = TRUE))
# distance matrix between `pts0` and `pts1`
pts0_pts1 <- as.matrix(dist(tmp[1:2]))[row.names(pts0), row.names(pts1)]
# minimum distances of `pts0` to `pts1`
idx <- max.col(-pts0_pts1)
df0 <- data.frame(
from = row.names(pts0),
to = row.names(pts1)[idx],
weight = pts0_pts1[cbind(1:nrow(pts0), idx)]
)
# aggregate edges lists and produce final result
g <- graph_from_data_frame(rbind(get.data.frame(gmst), df0), directed = FALSE) %>%
set_vertex_attr(name = "color", value = names(V(.)) %in% names(V(gmst)))
mylayout <- as.matrix(tmp[names(V(g)), 1:2]) %*% diag(c(1, -1))
plot(g, edge.label = round(E(g)$weight, 1), layout = mylayout)
and you will get

Plot heatmaps of multiple data frames using a slider in R

I have multiple data.frames and each one of them represent the pairwise interactions of individuals at different time points.
Here is an example of how my data.frames look.
df1 <- matrix(data = rexp(9, rate = 10), nrow = 3, ncol = 3)
df2 <- matrix(data = rexp(16, rate = 10), nrow = 4, ncol = 4)
df3 <- matrix(data = rexp(4, rate = 10), nrow = 2, ncol = 2)
I would like to plot them as it is pointed in this page (https://plotly.com/r/sliders/)
where with a slider I can move from one heatmap to the other.
I have tried so far with plotly but I have not succeeded. Any help is highly appreciated.
I am struggling for long with this issue. I might be a bit blind at this point so please forgive me if the question is stupid.
Following the Sine Wave Slider example on https://plotly.com/r/sliders/ this can be achieved like so. The first step of my approach involves converting the matrices to dataframes with columns x, y, z. Second instead of lines we plot heatmaps.
df1 <- matrix(data = rexp(9, rate = 10), nrow = 3, ncol = 3)
df2 <- matrix(data = rexp(16, rate = 10), nrow = 4, ncol = 4)
df3 <- matrix(data = rexp(4, rate = 10), nrow = 2, ncol = 2)
library(tibble)
library(tidyr)
library(plotly)
# Make dataframes
d <- lapply(list(df1, df2, df3), function(d) {
d %>%
as_tibble(.colnames = seq(ncol(.))) %>%
rowid_to_column("x") %>%
pivot_longer(-x, names_to = "y", values_to = "z") %>%
mutate(y = stringr::str_extract(y, "\\d"),
y = as.numeric(y))
})
aval <- list()
for(step in seq_along(d)){
aval[[step]] <-list(visible = FALSE,
name = paste0('v = ', step),
x = d[[step]]$x,
y = d[[step]]$y,
z = d[[step]]$z)
}
aval[1][[1]]$visible = TRUE
steps <- list()
fig <- plot_ly()
for (i in seq_along(aval)) {
fig <- add_trace(fig, x = aval[i][[1]]$x, y = aval[i][[1]]$y, z = aval[i][[1]]$z, visible = aval[i][[1]]$visible,
name = aval[i][[1]]$name, type = "heatmap")
fig
step <- list(args = list('visible', rep(FALSE, length(aval))), method = 'restyle')
step$args[[2]][i] = TRUE
steps[[i]] = step
}
fig <- fig %>%
layout(sliders = list(list(active = 0,
currentvalue = list(prefix = "Heatmap: "),
steps = steps)))
fig

Creating a multi-plot figure with layout() in R

I am having difficulty finding a way to produce a multi-plot figure in R that has two rows but on the second row, I want the plot to be in the middle of the two plots that are side by side on the first row... Attached is my code and the graph I'm getting vs. the graph that I want. Any suggestions would be greatly appreciated! Thanks!
pie_chart <- function(parameter, title=parameter) {
parameter_df <- parameter_results %>%
select(results = parameter) %>%
filter(results != "Not Applicable") %>%
count(results) %>% #
mutate(prop = prop.table(n), perc = paste0(round(prop * 100),"%"))
color_code <- c("Attaining" = "#99FF99", "Insufficient Information" = "#FFFF99", "Non Attaining" = "#FF9999")
parameter_df <- parameter_results %>%
select(results = parameter) %>% #keep only column for the parameter you want to plot
filter(results != "Not Applicable") %>%
count(results) %>% #
mutate(prop = prop.table(n), perc = paste0(round(prop * 100),"%"))
values <- vector(mode = "numeric", length = nrow(parameter_df))
labs <- vector(mode = "character", length = nrow(parameter_df))
colors <- vector(mode = "character", length = nrow(parameter_df))
for (i in seq_along(1:nrow(parameter_df))) {
values[[i]] <- parameter_df$prop[[i]] * 100
labs[[i]] <- parameter_df$perc[i]
colors[[i]] <- color_code[[parameter_df$results[[i]]]]
}
pie(x = values, labels = labs, col = colors, main = title,font=2,font.main=2)
CairoPDF(file = "Recreation Use", width = 10, height = 7)
m <- matrix(c(1,2,
3,4
), nrow = 2, ncol = 2, byrow = TRUE)
layout(mat = m, heights = c(0.4, 0.4, 0.1))
par(oma = c(4, 0, 4, 0))
par(mar = c(2,2,2,2))
pie_chart('e.Coli', "E.coli(freshwater\n217 AUs")
pie_chart('Enterococcus',"Enterococcus(marine water)\n132 AUs")
pie_chart('Beach Closing (Enterococcus)', "Enterococcus(beach closings)\n45AUs")
legend(x=1,y=-1,inset = 0, text.width=c(1.4,.5,.9),legend = c("Attaining","Insufficient Information","Non Attaining"), fill = c("#99FF99", "#FFFF99", "#FF9999"),border="#000000",cex=1.2,bty="n",xpd = NA,horiz=TRUE)
plot.new()
title("Figure 2.8:Assessment Results for Key Parameters Associated with Recreation Use,\nPercent(%) of AUs",line = 1, outer = TRUE,font=2)
dev.off()
Graph I have:
Graph I want:

IDW parameters in R

I want to perform IDW interpolation using R using the idw command from the gstat package. I have this data:
#settings
library(gstat)
library(dplyr)
library(sp)
library(tidyr)
id_rep <- rep(c(1,2), 20)
f <- rep(c(930,930.2), each=20)
perc <- rep(c(90, 80), each=10)
x <- sample(1:50, 40)
y <- sample(50:100, 40)
E <- runif(40)
df <- data.frame(id_rep, perc, x,y, f, E)
df_split <- split(df, list(df$id_rep, df$perc, df$f), drop = TRUE, sep="_")
#grid
x.range <- range(df$x)
y.range <- range(df$y)
grid <- expand.grid(x = seq(x.range[1], x.range[2], by=1),
y = seq(y.range[1], y.range[2], by=1))
coordinates(grid) <- ~x + y
#interpolation
lst_interp_idw <- lapply(df_split, function(X) {
coordinates(X) <- ~x + y
E_idw <- idw(E~ 1, X, grid, idp=1, nmax=3) %>% as.data.frame()
df_interp <- select(E_idw, x,y,E_pred=var1.pred)
df_interp
})
df_interp_idw <- bind_rows(lst_interp_idw, .id = "interact") %>%
separate(interact, c("id_rep", "perc", "f"), sep = "\\_")
Now I want to perform each run with different idp and nmax parameters within certain values​ (idp from 1 to 3 by 0.5, and nmax 3 to 6 by 1) and get out a data frame with columns for each combination of idp and nmax values. I try with two for loops but it doesn't work.
EDIT
the code that doesn't work is:
idp = seq(from = 1, to = 3, by = 0.5)
nmax = seq(from = 3, to = 6, by = 1)
...
for(i in idp) {
for(j in nmax)
{ E_idw= idw(E ~ 1, X, grid, nmax = i, idp = j)
}
}
...
Here is a way how to store the result of every iteration in a list.
#settings
#install.packages("gstat")
library(gstat)
library(dplyr)
library(sp)
library(tidyr)
id_rep <- rep(c(1,2), 20)
f <- rep(c(930,930.2), each=20)
perc <- rep(c(90, 80), each=10)
x <- sample(1:50, 40)
y <- sample(50:100, 40)
E <- runif(40)
df <- data.frame(id_rep, perc, x,y, f, E)
df_split <- split(df, list(df$id_rep, df$perc, df$f), drop = TRUE, sep="_")
#grid
x.range <- range(df$x)
y.range <- range(df$y)
grid <- expand.grid(x = seq(x.range[1], x.range[2], by=1),
y = seq(y.range[1], y.range[2], by=1))
coordinates(grid) <- ~x + y
# ==============================================
# NEW function
# ==============================================
idp = seq(from = 1, to = 3, by = 0.5)
nmax = seq(from = 3, to = 6, by = 1)
#interpolation
lst_interp_idw <- lapply(df_split, function(X) {
coordinates(X) <- ~x + y
df_interp <- vector(length(idp)*length(nmax), mode = "list" )
k <- 0
for(i in idp) {
for(j in nmax) {
print(paste(i, j))
# Iterator
k <- k + 1
E_idw= idw(E ~ 1, X, grid, nmax = i, idp = j) %>% as.data.frame()
df_interp[[k]] <- select(E_idw, x,y,E_pred=var1.pred)
}
}
return(df_interp)
})
# ==============================================
Some plausibility checks (lapply is applied to 8 list elements and 20 variations are calculated):
length(lst_interp_idw) # 8
length(lst_interp_idw[[1]]) #20
length(lst_interp_idw[[1]]) #20
It should be easy for you to adapt the last line of your code
df_interp_idw <- bind_rows(lst_interp_idw, .id = "interact") %>%
separate(interact, c("id_rep", "perc", "f"), sep = "\\_")
to format the output in the desired format. This highly depends on how you want to present the different interpolation alternatives.

Resources