How to compute nearest distance between points? - r

This is a tmp set of points with (x, y) coordinates and 0 or 1 categories.
tmp <- structure(list(cx = c(146.60916, 140.31737, 145.92917, 167.57799,
166.77618, 137.64381, 172.12157, 175.32881, 175.06154, 135.50566,
177.46696, 148.06731), cy = c(186.29814, 180.55231, 210.6084,
210.34111, 185.48505, 218.89375, 219.69554, 180.67421, 188.15775,
209.27205, 209.27203, 178.00151), category = c(1, 0, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0)), class = "data.frame", row.names = c(NA,
-12L))
I need to find the minimum spanning tree for category = 1 points, then to join (add edge) each point with category = 0 to its nearest category = 1 point.
The minimum spanning tree is built on points with the category = 1.
ones <- tmp[tmp$category == 1,]
n <- dim(ones)[1]
d <- matrix(0, n, n)
d <- as.matrix(dist(cbind(ones$cx, ones$cy)))
g1 <- graph.adjacency(d, weighted=TRUE, mode="undirected")
V(g1)$name <- tmp[tmp$category == 1,]$Name
mylayout = as.matrix(cbind(ones$cx, -ones$cy))
mst <- minimum.spanning.tree(g1) # Find a minimum spanning tree
plot(mst, layout=mylayout,
vertex.size = 10,
vertex.label = V(g1)$name,
vertex.label.cex =.75,
edge.label.cex = .7,
)
Expected result is in center of figure.
My current attempt is:
n <- dim(tmp)[1]
d <- matrix(0, n, n)
d <- as.matrix(dist(cbind(tmp$cx, tmp$cy)))
d[tmp$category %*% t(tmp$category) == 1] = Inf
d[!sweep(d, 2, apply(d, 2, min), `==`)] <- 0
g2 <- graph.adjacency(d, weighted=TRUE, mode="undirected")
mylayout = as.matrix(cbind(tmp$cx, -tmp$cy))
V(g2)$name <- tmp$Name
plot(g2, layout=mylayout,
vertex.size = 10,
vertex.label = V(g2)$name,
vertex.label.cex =.75,
edge.label = round(E(g2)$weight, 3),
edge.label.cex = .7,
)
One can see that I have found the minimum dist and add one edge only.
Question. How to define condition for all possible points?

You can try the code below
# two categories of point data frames
pts1 <- subset(tmp, category == 1)
pts0 <- subset(tmp, category == 0)
# generate minimum spanning tree `gmst`
gmst <- mst(graph_from_adjacency_matrix(as.matrix(dist(pts1[1:2])), mode = "undirected", weighted = TRUE))
# distance matrix between `pts0` and `pts1`
pts0_pts1 <- as.matrix(dist(tmp[1:2]))[row.names(pts0), row.names(pts1)]
# minimum distances of `pts0` to `pts1`
idx <- max.col(-pts0_pts1)
df0 <- data.frame(
from = row.names(pts0),
to = row.names(pts1)[idx],
weight = pts0_pts1[cbind(1:nrow(pts0), idx)]
)
# aggregate edges lists and produce final result
g <- graph_from_data_frame(rbind(get.data.frame(gmst), df0), directed = FALSE) %>%
set_vertex_attr(name = "color", value = names(V(.)) %in% names(V(gmst)))
mylayout <- as.matrix(tmp[names(V(g)), 1:2]) %*% diag(c(1, -1))
plot(g, edge.label = round(E(g)$weight, 1), layout = mylayout)
and you will get

Related

How to rotate nodes of a time-calibrated phylogenetic tree to match a particular order in R?

I have a time-calibrated phylogenetic tree from BEAST and I would like to make a figure in which its nodes are rotated to match an arbitrary ordering. The following code works perfectly to plot the tree with the nodes in the order they are in the input file.
library("phytools")
library("phyloch")
library("strap")
library("coda")
t <- read.beast("mcctree.tre") # I couldn't upload the file here
t$root.time <- t$height[1]
num_taxa <- length(t$tip.label)
display_all_node_bars <- TRUE
names_list <-vector()
for (name in t$tip){
v <- strsplit(name, "_")[[1]]
if(display_all_node_bars){
names_list = c(names_list, name)
}
else if(v[length(v)]=="0"){
names_list = c(names_list, name)
}
}
nids <- vector()
pos <- 1
len_nl <- length(names_list)
for(n in names_list){
for(nn in names_list[pos:len_nl]){
if(n != nn){
m <- getMRCA(t,c(n, nn))
if(m %in% nids == FALSE){
nids <- c(nids, m)
}
}
}
pos <- pos+1
}
pdf("tree.pdf", width = 20, height = 20)
geoscalePhylo(tree = t,
x.lim = c(-2,21),
units = c("Epoch"),
tick.scale = "myr",
boxes = FALSE,
width = 1,
cex.tip = 2,
cex.age = 3,
cex.ts = 2,
erotate = 0,
label.offset = 0.1)
lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv)
for(nv in nids){
bar_xx_a <- c(lastPP$xx[nv]+t$height[nv-num_taxa]-t$"height_95%_HPD_MIN"[nv-num_taxa],
lastPP$xx[nv]-(t$"height_95%_HPD_MAX"[nv-num_taxa]-t$height[nv-num_taxa]))
lines(bar_xx_a, c(lastPP$yy[nv], lastPP$yy[nv]), col = rgb(0, 0, 1, alpha = 0.3), lwd = 12)
}
t$node.label <- t$posterior
p <- character(length(t$node.label))
p[t$node.label >= 0.95] <- "black"
p[t$node.label < 0.95 & t$node.label >= 0.75] <- "gray"
p[t$node.label < 0.75] <- "white"
nodelabels(pch = 21, cex = 1.5, bg = p)
dev.off()
The following code is my attempt to rotate the nodes in the way I want (following this tutorial: http://blog.phytools.org/2015/04/finding-closest-set-of-node-rotations.html). And it works for rotating the nodes. However, the blue bars indicating the confidence intervals of the divergence time estimates get out of their correct place - this is what I would like help to correct. This will be used in much larger files with hundreds of branches - the example here is simplified.
new.order <- c("Sp8","Sp9","Sp10","Sp7","Sp6","Sp5","Sp4","Sp2","Sp3","Ou1","Ou2","Sp1")
t2 <- setNames(1:Ntip(t), new.order)
new.order.tree <- minRotate(t, t2)
new.order.tree$root.time <- t$root.time
new.order.tree$height <- t$height
new.order.tree$"height_95%_HPD_MIN" <- t$"height_95%_HPD_MIN"
new.order.tree$"height_95%_HPD_MAX" <- t$"height_95%_HPD_MAX"
pdf("reordered_tree.pdf", width = 20, height = 20)
geoscalePhylo(tree = new.order.tree,
x.lim = c(-2,21),
units = c("Epoch"),
tick.scale = "myr",
boxes = FALSE,
width = 1,
cex.tip = 2,
cex.age = 3,
cex.ts = 2,
erotate = 0,
label.offset = 0.1)
lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv)
for(nv in nids){
bar_xx_a <- c(lastPP$xx[nv]+new.order.tree$height[nv-num_taxa]-new.order.tree$"height_95%_HPD_MIN"[nv-num_taxa],
lastPP$xx[nv]-(new.order.tree$"height_95%_HPD_MAX"[nv-num_taxa]-new.order.tree$height[nv-num_taxa]))
lines(bar_xx_a, c(lastPP$yy[nv], lastPP$yy[nv]), col = rgb(0, 0, 1, alpha = 0.3), lwd = 12)
}
new.order.tree$node.label <- t$posterior
p <- character(length(new.order.tree$node.label))
p[new.order.tree$node.label >= 0.95] <- "black"
p[new.order.tree$node.label < 0.95 & new.order.tree$node.label >= 0.75] <- "gray"
p[new.order.tree$node.label < 0.75] <- "white"
nodelabels(pch = 21, cex = 1.5, bg = p)
dev.off()
I've found several similar questions here and in other forums, but none dealing specifically with time-calibrated trees - which is the core of the problem described above.
The short answer is that phyTools::minRotate() doesn't recognize the confidence intervals as associated with nodes. If you contact the phyTools maintainers, they may well be able to add this functionality quite easily.
Meanwhile, you can correct this yourself.
I don't know how read.beast() saves confidence intervals – let's say they're saved in t$conf.int. (Type unclass(t) at the R command line to see the full structure; you should be able to identify the appropriate property.)
If the tree's node labels are unique, then you can infer the new sequence of nodes using match():
library("phytools")
new.order <- c("Sp8","Sp9","Sp10","Sp7","Sp6","Sp5","Sp4","Sp2","Sp3","Ou1","Ou2","Sp1")
# Set up a fake initial tree -- you would load the tree from a file
tree <- rtree(length(new.order))
tree$tip.label <- sort(new.order)
tree$node.label <- seq_len(tree$Nnode)
tree$conf.int <- seq_len(tree$Nnode) * 10
# Plot tree
par(mfrow = c(1, 2), mar = rep(0, 4), cex = 0.9) # Create space
plot(tree, show.node.label = TRUE)
nodelabels(tree$conf.int, adj = 1) # Annotate "correct" intervals
# Re-order nodes with minRotate
noTree <- minRotate(tree, setNames(seq_along(new.order), new.order))
plot(noTree, show.node.label = TRUE)
# Move confidence intervals to correct node
tree$conf.int <- tree$conf.int[match(noTree$node.label, tree$node.label)]
nodelabels(tree$conf.int, adj = 1)
If you can't guarantee that the node labels are unique, you can always overwrite them in a temporary object:
# Find node order
treeCopy <- tree
treeCopy$node.label <- seq_len(tree$Nnode)
nodeOrder <- match(minRotate(treeCopy)$node.label, treeCopy$node.label)
# Apply node order
tree$conf.int <- tree$conf.int[nodeOrder]

Strahler stream order using igraph or sfnetwork in R

I cannot fathom how to derive strahler order in R. Here's an example in postgres and neo4j. An attempt in R
There are three rules (from the GRASS 7.8 Manual):
if the node has no children, it's Strahler order is 1.
if the node has one and only one tributary with Strahler greatest order i, and all other tributaries have order less than i, then the order remains
i.
if the node has two or more tributaries with greatest order i, then the Strahler order of the node is i + 1.
Here's what I would expect
library(sfnetworks)
library(igraph)
library(sf)
library(dplyr)
library(tidygraph)
library(RColorBrewer)
# Create an example network.
n01 = st_sfc(st_point(c(0, 0)))
n02 = st_sfc(st_point(c(1, 2)))
n03 = st_sfc(st_point(c(1, 3)))
n04 = st_sfc(st_point(c(1, 4)))
n05 = st_sfc(st_point(c(2, 1)))
n06 = st_sfc(st_point(c(2, 3)))
n07 = st_sfc(st_point(c(2, 4)))
n08 = st_sfc(st_point(c(3, 2)))
n09 = st_sfc(st_point(c(3, 3)))
n10 = st_sfc(st_point(c(3, 4)))
n11 = st_sfc(st_point(c(4, 2)))
n12 = st_sfc(st_point(c(4, 4)))
from = c(1, 2, 2, 3, 3, 5, 5, 8, 8, 9, 9)
to = c(5, 3, 6, 4, 7, 2, 8, 9, 11, 10, 12)
nodes = st_as_sf(c(n01, n02, n03, n04, n05, n06, n07, n08, n09, n10, n11, n12))
edges = data.frame(from = from, to = to)
G = sfnetwork(nodes, edges) %>%
convert(to_spatial_explicit, .clean = TRUE)
nodes = st_as_sf(G, "nodes")
edges = st_as_sf(G, "edges")
# expected order
edges$expected_order = c(4,2,1,1,1,3,3,2,1,1,1)
cols = brewer.pal(4, "Blues")
pal = colorRampPalette(cols)
plot(st_geometry(edges))
plot(edges["expected_order"],
lwd = 4, ,
add = TRUE,
col = pal(4)[edges$expected_order])
legend(x = "topright",
legend = c("4","3","2","1"),
lwd = 4,
col = pal(4)[edges$expected_order],
title = "strahler order")
plot(nodes, pch = 20, add = TRUE)
Here's what I tried curtesy of jsta/streamnet/stream_order.R, which I can't load due to missing packages
stream_order_igraph <- function(tree){
tree <- as.igraph(tree)
leaf_nodes <- which(degree(tree,
v = igraph::V(tree),
mode = "in") == 0,
useNames = TRUE)
base_order <- 1
edgelist <- data.frame(as_edgelist(tree))
edgelist$order <- NA
names(edgelist)[c(1,2)] <- c("from", "to")
edgelist$order[edgelist$from %in% leaf_nodes] <- base_order
tree <- igraph::delete.vertices(tree, leaf_nodes)
while(igraph::vcount(tree) >= 1){
base_order <- max(edgelist$order, na.rm = TRUE) + 1
leaf_nodes <- which(degree(tree, v = igraph::V(tree),
mode = "in") == 0,
useNames = TRUE)
raised_nodes <- sapply(leaf_nodes,
function(x) all(edgelist$order[edgelist$to == x] == base_order - 1))
raised_nodes <- which(raised_nodes)
flat_nodes <- leaf_nodes[!(leaf_nodes %in% raised_nodes)]
edgelist$order[edgelist$from %in% raised_nodes] <- base_order
edgelist$order[edgelist$from %in% flat_nodes] <- base_order - 1
tree <- igraph::delete.vertices(tree, leaf_nodes)
}
edgelist$order
}
stream_order_igraph(G)
> stream_order_igraph(G)
[1] 4 3 3 3 3 2 2 NA NA NA NA
I have found a solution that converts class igraph to class phylo then uses phytools::StrahlerNumber. I had to modify phytools::igraph_to_phylo and reverse the order of my edges to get it to work.
library(phytools)
library(igraph)
library(sfnetworks)
library(sf)
library(dplyr)
library(RColorBrewer)
# reverse the edge direction
transposeGraph <- function(g) {
g %>% get.edgelist %>%
{cbind(.[, 2], .[, 1])} %>%
graph.edgelist
}
# convert igraph class to phylo class
# and calculate strahler number
igraphStrahler <- function(g){
if (!igraph::is_simple(g) |
!igraph::is_connected(g) |
!igraph::is_dag(g)) {
stop("Taxon graph is not a simple, connected, directed acylic graph")
}
root = which(sapply(V(g),
function(x) length(neighbors(g, x, mode = "in"))) == 0)
leaves = which(sapply(V(g),
function(x) length(neighbors(g, x, mode = "out"))) == 0)
g <- g %>%
set_vertex_attr("leaf", index = leaves, TRUE) %>%
set_vertex_attr("root", index = root, TRUE)
traverse <- igraph::dfs(g, root)
is_leaf <- igraph::vertex_attr(g, "leaf", traverse$order)
is_leaf[which(is.na(is_leaf))] <- FALSE
n_leaf <- sum(is_leaf)
n_node <- sum(!is_leaf)
node_id <- ifelse(is_leaf, cumsum(is_leaf), cumsum(!is_leaf) + n_leaf)
# Store the node ids on the graph
g <- igraph::set_vertex_attr(g, "node_id", index = traverse$order,
value = node_id)
# Extract the edge and vertex data
vertex_data <- igraph::as_data_frame(g, "vertices") %>%
mutate(name = row_number())
edge_data <- igraph::as_data_frame(g, "edges")
edge_data$geom <- NULL
# Substitute the node id numbers into the edge list
edge_data <- unlist(edge_data)
edge_data <- vertex_data$node_id[match(edge_data, vertex_data$name)]
edge_data <- matrix(edge_data, ncol = 2)
# lookup the tip and node labels
tip_labels <- 1:n_leaf
tip_labels <- vertex_data$name[match(tip_labels, vertex_data$node_id)]
node_labels <- (n_leaf + 1):(n_node + n_leaf)
node_labels <- vertex_data$name[match(node_labels, vertex_data$node_id)]
# Build the phylogeny
phy <- structure(list(edge = edge_data,
edge.length = rep(1, nrow(edge_data)),
tip.labels = tip_labels,
node.labels = node_labels,
Nnode = n_node),
class = "phylo")
stra <- as.data.frame(strahlerNumber(phy)) %>%
rename(strahler_order = `strahlerNumber(phy)`) %>%
mutate(node_id = row_number()) %>%
left_join(vertex_data, by = "node_id") %>%
rename(to = name)
return(stra)
}
ln <- st_read("streams.gpkg") %>%
st_cast("LINESTRING")
net <- as_sfnetwork(ln)
g <- net %>%
transposeGraph()
stra <- igraphStrahler(g)
edges = st_as_sf(net, "edges") %>%
left_join(stra, by = c("from" = "to"))
cols = brewer.pal(3, "Blues")
pal = colorRampPalette(cols)
plot(st_geometry(edges))
plot(edges["strahler_order"],
lwd = 3, ,
add = TRUE,
col = pal(3)[edges$strahler_order])
legend(x = "topright",
legend = c("1","2","3"),
lwd = 3,
col = cols,
title = "Strahler order")

How to plot the igraph subgraphs with saving the nodes' positions and ids?

I have a igraph G, I need to sample two overlapping subgraphs G1, G2 and to plot them on the graph with the same layout.
My attempt is below:
library(igraph)
set.seed(1)
n <- 10
A <- matrix(sample(0:1, n * n, rep=TRUE), n, n)
diag(A) = 0
g <- graph_from_adjacency_matrix(A)
V(g)$names <- c(1:n)
id1 = sample(V(g), size = n %/% 2, replace = FALSE)
id2 = sample(V(g), size = n %/% 2, replace = FALSE)
g1 <- induced_subgraph(g, vids = id1)
g2 <- induced_subgraph(g, vids = id2)
V(g1)$names <- c(id1)
V(g2)$names <- c(id2)
#V(g)[id1]$color <- "red"
#V(g)[id2]$color <- "green"
par(mfrow=c(1,3))
layout <- layout.fruchterman.reingold(g)
plot(g, layout=layout, main="G")
plot(g1, layout = layout[-c(setdiff(1:n, id1)),], vertex.label=V(g)[id1], main="G1")
plot(g2, layout = layout[-c(setdiff(1:n, id2)),], vertex.label=V(g)[id2], main="G2")
My problem with ids and labels.
Question. How to plot the igraph subgraphs with saving the nodes' positions and ids?
Try the code below
library(igraph)
set.seed(1)
n <- 10
A <- matrix(sample(0:1, n * n, rep = TRUE), n, n)
diag(A) <- 0
g <- graph_from_adjacency_matrix(A)
id1 <- sort(as.integer(sample(V(g), size = n %/% 2, replace = FALSE)))
id2 <- sort(as.integer(sample(V(g), size = n %/% 2, replace = FALSE)))
g1 <- induced_subgraph(g, vids = id1)
g2 <- induced_subgraph(g, vids = id2)
par(mfrow = c(1,3))
layout <- layout.fruchterman.reingold(g)
layout2 <- layout[id2, ]
plot(g, layout = layout, main = "G")
plot(g1, layout = layout[id1, ], main = "G1")
plot(g2, layout = layout[id2, ], main = "G2")

How to create a graph from an adjacency matrix by also specifying nodes coordinates in igraph?

I have the following R code:
library('igraph')
nodes <- c('a','b','c','d')
x <- c(0,1,2,3)
y <- c(0,1,2,3)
from <- c('a','b','c')
to <- c('b','c','d')
NodeList <- data.frame(nodes, x ,y)
EdgeList <- data.frame(from, to)
plot(graph_from_data_frame(vertices = NodeList, d= EdgeList, directed = FALSE))
Which emits the graph I want. However I need to be able to use the adjacency matrix instead of from and to vectors. Function graph_from_adjacency_matrix does not include a parameter to specify the coordinates of nodes. How to achieve this?
[As #user20650 mentioned above], you can specify the coordinates of the vertices latter, using vertex_attr():
library('igraph')
adjm <- matrix(
c(0, rep(c(1, 0, 0, 0, 0), times = 3)), 4, , F, list(letters[1:4], letters[1:4])
)
g01 <- graph_from_adjacency_matrix(adjm, 'undirected') -> g02
vertex_attr(g02, name = 'x') <- c(0, 0, 1, 1)
vertex_attr(g02, name = 'y') <- c(0, 1, 1, 0)
par(mfrow = 1:2)
plot(g01)
plot(g02)

R Visualization of markov chains | change values in transition matrix by hand

I run a markov model in R, primaly to get the markov graph.
I want to exclude all lines with a probability < 0,4 from transistion matrix (In this case the line from start to c2 should be deleted.). I tried this by setting these values to 0. But changing values in transition matrix results in an Error: Please see below: I marked the position of interrest with "#######################" (line 76)
# creating a data sample
df1 <- data.frame(path = c('c1 > c2 > c3', 'c1', 'c2 > c3'), conv = c(1, 0, 0), conv_null = c(0, 1, 1)) # original
df1
# calculating the models
mod1 <- markov_model(df1,
var_path = 'path',
var_conv = 'conv',
var_null = 'conv_null',
out_more = TRUE)
mod1
# extracting the results of attribution:
df_res1 <- mod1$result
df_res1
# extracting a transition matrix:
df_trans1 <- mod1$transition_matrix
df_trans1
df_trans1 <- dcast(df_trans1, channel_from ~ channel_to, value.var = 'transition_probability')
df_trans1
### plotting the Markov graph ###
df_trans <- mod1$transition_matrix
df_trans
# adding dummies in order to plot the graph
df_dummy <- data.frame(channel_from = c('(start)', '(conversion)', '(null)'),
channel_to = c('(start)', '(conversion)', '(null)'),
transition_probability = c(
0,
1,
1
)) # die Ãœbergangswarhscheinlichkeit von zu sich selber eintragen
df_dummy
df_trans <- rbind(df_trans, df_dummy)
df_trans
# ordering channels
df_trans$channel_from <- factor(df_trans$channel_from,
levels = c('(start)', '(conversion)', '(null)',
'c1',
'c2',
'c3'
))
df_trans$channel_from
df_trans$channel_to <- factor(df_trans$channel_to,
levels = c('(start)', '(conversion)', '(null)',
'c1',
'c2',
'c3'
))
df_trans$channel_to
df_trans <- dcast(df_trans, channel_from ~ channel_to, value.var = 'transition_probability')
df_trans
# creating the markovchain object
trans_matrix <- matrix(data = as.matrix(df_trans[, -1]),
nrow = nrow(df_trans[, -1]), ncol = ncol(df_trans[, -1]),
dimnames = list(c(as.character(df_trans[, 1])), c(colnames(df_trans[, -1]))))
trans_matrix[is.na(trans_matrix)] <- 0
trans_matrix
####################### I want to delete transition-propabilities < 0.4 from markov graph by setting these values to 0.
trans_matrix[trans_matrix < 0.4] <- 0 #
####################### After doing this, the following querie gives me an error: Error! Row sums not equal to one check positions: 1
trans_matrix1 <- new("markovchain", transitionMatrix = trans_matrix)
trans_matrix1
# plotting the graph
plot(trans_matrix1, edge.arrow.size = 0.5, size = 100, cex.main = 0.11, cex.lab = 0.5, cex.axis = 0.5)
The transition matrix is no longer a transition matrix if you set some positive entries to 0, because the row sums must equal one. So new("markovchain", ....) does not work with such a matrix.
But if you want the plot only, this is possible by modifying the slot transitionMatrix:
library(markovchain)
tm <- rbind(c(0.3, 0.5, 0.2), c(0.1, 0.1, 0.8), c(0.6, 0.2, 0.2))
states <- c("a", "b", "c")
mc <- new("markovchain", states=states, transitionMatrix=tm, name="X")
tm[tm<0.4] <- 0
dimnames(tm) <- list(states, states)
mc#transitionMatrix <- tm
plot(mc)

Resources