I have a network that looks like this
library(igraph)
library(igraphdata)
data("kite")
plot(kite)
I run a community detection and the result looks like this
community <- cluster_fast_greedy(kite)
plot(community,kite)
Now I want to extract a network based on the communities. The edge weight should be the number of ties between communities (how strong are communities connected to each other), the vertex attribute should be the number of nodes in the community (called numnodes).
d <- data.frame(E=c(1, 2, 3),
A=c(2, 3, 1))
g2 <- graph_from_data_frame(d, directed = F)
E(g2)$weight <- c(5, 1, 1)
V(g2)$numnodes <- c(4,3,3)
plot.igraph(g2,vertex.label=V(g2)$name, edge.color="black",edge.width=E(g2)$weight,vertex.size=V(g2)$numnodes)
The graph should look like this
One node is larger than the others, one edge has a lot of weight in comparison to the others.
As far as I know, igraph doesn't have method to count edges connecting groups of vertices. Therefore to count the edges connecting communities you need to iterate over each pairs of communities. To count the members for each community, you can use the sizes method.
library(igraph)
library(igraphdata)
data("kite")
plot(kite)
community <- cluster_fast_greedy(kite)
plot(community,kite)
cedges <- NULL
for(i in seq(1,max(community$membership) - 1)){
for(j in seq(i + 1, max(community$membership))){
imembers <- which(community$membership == i)
jmembers <- which(community$membership == j)
weight <- sum(
mapply(function(v1) mapply(
function(v2) are.connected(kite, v1, v2),
jmembers),
imembers)
)
cedges <- rbind(cedges, c(i, j, weight))
}
}
cedges <- as.data.frame(cedges)
names(cedges)[3] <- 'weight'
cgraph <- graph_from_data_frame(cedges, directed = FALSE)
V(cgraph)$numnodes <- sizes(community)
plot.igraph(cgraph,
vertex.label = V(cgraph)$name,
edge.color = "black",
edge.width = E(cgraph)$weight,
vertex.size = V(cgraph)$numnodes)
Related
I'm out of my depth when it comes to network graphs, but I have a table of ~6300 From/To links similar to the data frame df given below. Each vertex has a binary property called status.
What I would like to do is determine all of the vertices that are upstream of a vertex where status = 1, how would i do this in igraph? I've looked at data.tree but my data are not necessarily a single-root tree and "loops" are possible.
In the example below, this would mean that vertices Z, R, S, M, and K should have status = 1 (i.e. be orange in the plot), as they are upstream of Q, L, I, respectively.
library(igraph)
df <- data.frame(from = c("D","B","A","Q","Z","L","M","R","S","T","U","H","I","K"),
to = c("O","D","B","B","Q","O","L","Q","R","O","T","J","J","I"),
stringsAsFactors = FALSE
)
vertices <- data.frame(vertex = unique(c(df[,1], c(df[,2]))),
status = c(0,0,0,1,0,1,0,0,0,0,0,1,
1,0,0,0))
g <- graph_from_data_frame(df, vertices = vertices, directed = T)
plot(g, vertex.color = vertex_attr(g, "status"))
You can use subcomponent with mode='in'.
I have created my igraph from my dataset "allgenes", and found community modules based on the louvain method.
gD <- igraph::simplify(igraph::graph.data.frame(allgenes, directed=FALSE))
lou <- cluster_louvain(gD)
Plotting the modules, I note that there are several small communities that I wish to remove. How would I remove communities containing 5 nodes or less?
plot(lou, gD, vertex.label = NA, vertex.size=5, edge.arrow.size = .2)
Plot with distinguished modules:
Since you do not provide an example, I will illustrate with randomly generated data.
## First create an example like yours
library(igraph)
set.seed(123)
gD = erdos.renyi.game(50,0.05)
lou <- cluster_louvain(gD)
LO = layout_with_fr(gD)
plot(lou, gD, vertex.label = NA, vertex.size=5,
edge.arrow.size = .2, layout=LO)
## identify which communities have fewer than 5 members
Small = which(table(lou$membership) < 5)
## Which nodes should be kept?
Keep = V(gD)[!(lou$membership %in% Small)]
## Get subgraph & plot
gD2 = induced_subgraph(gD, Keep)
lou2 = cluster_louvain(gD2)
LO2 = LO[Keep,]
plot(lou2, gD2, vertex.label = NA, vertex.size=5,
edge.arrow.size = .2, layout=LO2)
The small communities have been removed
If you want to remove communities while maintaining the other existing communities you cannot create an induced subgraph with vertices you want to keep and cluster on the subgraph because the resulting communities can very likely change.
A workable approach would be to manually subset the communities object.
Also, if you want to plot the original graph and communities and new ones and maintain the same colors everywhere you have to do a couple additional steps.
suppressPackageStartupMessages(library(igraph))
set.seed(123)
g <- erdos.renyi.game(50, 0.05)
c <- cluster_louvain(g)
l <- layout_with_fr(g)
c_keep_ids <- as.numeric(names(sizes(c)[sizes(c) >= 5]))
c_keep_v_idxs <- which(c$membership %in% c_keep_ids)
g_sub <- induced_subgraph(g, V(g)[c_keep_v_idxs])
# igraph has no direct functionality to subset community objects so hack it
c_sub <- c
c_sub$names <- c$names[c_keep_v_idxs]
c_sub$membership <- c$membership[c_keep_v_idxs]
c_sub$vcount <- length(c_sub$names)
c_sub$modularity <- modularity(g_sub, c_sub$membership, E(g_sub)$weight)
par(mfrow = c(1, 2))
plot(c, g,
layout = l,
vertex.label = NA,
vertex.size = 5
)
plot(c_sub, g_sub,
col = membership(c)[c_keep_v_idxs],
layout = l[c_keep_v_idxs, ],
mark.border = rainbow(length(communities(c)), alpha = 1)[c_keep_ids],
mark.col = rainbow(length(communities(c)), alpha = 0.3)[c_keep_ids],
vertex.label = NA,
vertex.size = 5
)
par(mfrow = c(1, 1))
Allow me to add to this. I want to "remove" the color from small communities when visualizing, but keep them in the graph. e.g. I have a lot of isolates and that makes for some visual clutter while I have a very interesting core component, where looking at them gives a good representation.
I am starting with the code above. Not an issue, because I do not want subgraphs:
Small = which(table(g_community$membership) < 2)
g_community$membership[g_community$membership %in% Small] <- 999
This works well enough, but is there a smarter way to do this?
I have a graph net with two different types (1 and 2) of vertices, appearing n1 and n2 times, respectively:
net %v% "type" <- c(rep("1", n1), rep("2", n2))
We have some edges which were generated randomly with probabilities ps and pd, where ps is the edge probability with a same type (1-1 or 2-2) and pd with a different type (1-2).
I would like to plot this graph such that the edges between same types (i.e. 1-1 or 2-2) have a different color than edges between different types (1-2).
How do I do this?
I tried playing around with the %e% operator of the network package, but I'm confused about how to grab the type of the end node of each edge.
Thank you!
Do you want that?
from <- sample(1:2, 10, replace = T)
to <- sample(1:2, 10, replace = T)
node <- cbind(from, to)
library(igraph)
net <- graph_from_edgelist(node, directed = F)
edge_color <- function(from_to){
from_node <- from_to[1]
to_node <- from_to[2]
ifelse(from_node == to_node, return("red"), return("blue"))
}
color<- apply(node, 1, edge_color)
plot(net, edge.color=color)
I am doing some analysis using iGraph in R, and I am currently doing a calculation that is very expensive. I need to do it across all of the nodes in my graph, so if someone knows a more efficient way to do it, I would appreciate it.
I start out with a graph, g. I first do some community detection on the graph
library(igraph)
adj_matrix <- matrix(rbinom(10 * 5, 1, 0.5), ncol = 8000, nrow = 8000)
g <- graph_from_adjacency_matrix(adj_matrix, mode = 'undirected', diag = FALSE)
c <- cluster_louvain(g)
Then, I basically assign each cluster to 1 of 2 groups
nc <- length(c)
assignments <- rbinom(nc, 1, .5)
Now, for each node, I want to find out what percentage of its neighbors are in a given group (as defined by the cluster assignments). I currently do this in the current way:
pct_neighbors_1 <- function(g, vertex, c, assignments) {
sum(
ifelse(
assignments[membership(c)[neighbors(g, vertex)]] == 1, 1, 0)
)/length(neighbors(g, vertex))
}
And then, given that I have a dataframe with each row corresponding to one vertex in the graph, I do this for all vertices with
data$pct_neighbors_1 <- sapply(1:nrow(data),
pct_neighbors_1,
graph = g, community = c,
assignments = assignments)
Is there somewhere in here that I can make things more efficient? Thanks!
This should be faster :
library(igraph)
# for reproducibility's sake
set.seed(1234)
# create a random 1000 vertices graph
nverts <- 1000
g <- igraph::random.graph.game(nverts,0.1,type='gnp',directed=FALSE)
# clustering
c <- cluster_louvain(g)
# assignments
nc <- length(c)
assignments <- rbinom(nc, 1, .5)
# precalculate if a vertex belongs to the assigned communities
vertsInAssignments <- membership(c) %in% which(assignments==1)
# compute probabilities
probs <- sapply(1:vcount(g),FUN=function(i){
neigh <- neighbors(g,i)
sum(vertsInAssignments[neigh]) / length(neigh)
})
I have a graph of 6 million of nodes such as
require(igraph)
# Graph of 1000 nodes
g <- ba.game(1000)
with the following four attributes defined for each node
# Attributes
V(g)$attribute1 <- V(g) %in% sample(V(g), 20)
V(g)$attribute2 <- V(g) %in% sample(V(g), 20)
V(g)$attribute3 <- V(g) %in% sample(V(g), 20)
V(g)$attribute4 <- V(g) %in% sample(V(g), 20)
Among the nodes I have a subset of 12,000 that are of particular interest:
# Subset of 100 nodes
V(g)$subset <- V(g) %in% sample(V(g), 100)
What I want to obtain is an analysis (count) of the neighbourhood of my subset. That is, I want to define
V(g)$neigh.attr1 <- rep(NA, vcount(g))
V(g)$neigh.attr2 <- rep(NA, vcount(g))
V(g)$neigh.attr3 <- rep(NA, vcount(g))
V(g)$neigh.attr4 <- rep(NA, vcount(g))
such that NA is replaced for every node in the subset with the corresponding count of neighbouring nodes with V(g)$attribute{1..4}==TRUE.
I can easily create a list of the neighbourhood of interest with
neighbours <- neighborhood(g, order = 1, V(g)[V(g)$subset==TRUE], mode = "out")
but I can't think of an efficient way to iterate over every neighbours and compute the statistics for each of the four attributes. Indeed the only way I've came up with is a loop which given the size of my original graph takes just too long:
subset_indices <- as.numeric(V(g)[V(g)$subset==TRUE])
for (i in 1:length(neighbours)) {
V(g)$neigh.attr1[subset_indices[i]] <- sum(V(g)$attribute1[neighbours[[i]]])
V(g)$neigh.attr2[subset_indices[i]] <- sum(V(g)$attribute2[neighbours[[i]]])
V(g)$neigh.attr3[subset_indices[i]] <- sum(V(g)$attribute3[neighbours[[i]]])
V(g)$neigh.attr4[subset_indices[i]] <- sum(V(g)$attribute4[neighbours[[i]]])
}