Bipartite graph projection with nodes as edge attributes - r

I have a bipartite graph and I want the projections of this graph to have edge attributes that record via which nodes they were connected. For example:
require(igraph)
set.seed(123)
g <- sample_bipartite(5, 5, p =.5)
V(g)$name <- c(letters[1:5], 1:5)
g1 <- bipartite_projection(g)[[1]]
g2 <- bipartite_projection(g)[[2]]
par(mfrow = c(1, 3))
plot(g,
vertex.shape = ifelse(V(g)$type == FALSE, "square", "circle"),
vertex.color = ifelse(V(g)$type == FALSE, "gold", "tomato"),
main = "Bipartite")
plot(g1,
main = "Projection 1")
plot(g2,
main = "Projection 2")
par(mfrow = c(1, 1))
I want the information that I added by hand to the plot to be in the network object. It it easily done in igraph? Thanks.

With bipartite_projection
If you really want ot use bipartite_projection, you can try to define your custom function f like below:
f <- function(gp) {
df <- get.data.frame(gp)[1:2]
df$lbl <- apply(
df,
1,
function(v) {
max(do.call(intersect, unname(lapply(v, function(x) names(neighbors(g, x))))))
}
)
res <- graph_from_data_frame(df, directed = FALSE)
plot(res, edge.label = E(res)$lbl)
}
f(g1)
f(g2)
which gives
Without bipartite_projection
Below is an option without using bipartite_projection (take g1 as the an example, and g2 can be obtained in a similar way)
g1 <- simplify(
graph_from_data_frame(
do.call(
rbind,
lapply(
Filter(
function(x) nrow(x) > 1,
split(get.data.frame(g), ~to)
),
function(d) {
with(
d,
cbind(data.frame(t(combn(from, 2))), weight = unique(to))
)
}
)
),
directed = FALSE
),
edge.attr.comb = "max"
)
and plot(g1, edge.label = E(g1)$weight) gives

First, I made a dataframe of the as.edgelist results, then computed a label with paste0. Next, I used the edge_attr command to write the labels to the graph object.
el<-igraph::as_edgelist(g);el<-as.data.frame(el)
el$lab<-paste0(el$V1,"_",el$V2)
edge_attr(g,"label")<-el$lab
E(g)$label
set.seed(232)
plot(g,
edge.label.dist=.3,
edge.label.color="blue",
margin=-0.4,
layout=layout.fruchterman.reingold)

Related

How to plot the igraph subgraphs with saving the nodes' positions and ids?

I have a igraph G, I need to sample two overlapping subgraphs G1, G2 and to plot them on the graph with the same layout.
My attempt is below:
library(igraph)
set.seed(1)
n <- 10
A <- matrix(sample(0:1, n * n, rep=TRUE), n, n)
diag(A) = 0
g <- graph_from_adjacency_matrix(A)
V(g)$names <- c(1:n)
id1 = sample(V(g), size = n %/% 2, replace = FALSE)
id2 = sample(V(g), size = n %/% 2, replace = FALSE)
g1 <- induced_subgraph(g, vids = id1)
g2 <- induced_subgraph(g, vids = id2)
V(g1)$names <- c(id1)
V(g2)$names <- c(id2)
#V(g)[id1]$color <- "red"
#V(g)[id2]$color <- "green"
par(mfrow=c(1,3))
layout <- layout.fruchterman.reingold(g)
plot(g, layout=layout, main="G")
plot(g1, layout = layout[-c(setdiff(1:n, id1)),], vertex.label=V(g)[id1], main="G1")
plot(g2, layout = layout[-c(setdiff(1:n, id2)),], vertex.label=V(g)[id2], main="G2")
My problem with ids and labels.
Question. How to plot the igraph subgraphs with saving the nodes' positions and ids?
Try the code below
library(igraph)
set.seed(1)
n <- 10
A <- matrix(sample(0:1, n * n, rep = TRUE), n, n)
diag(A) <- 0
g <- graph_from_adjacency_matrix(A)
id1 <- sort(as.integer(sample(V(g), size = n %/% 2, replace = FALSE)))
id2 <- sort(as.integer(sample(V(g), size = n %/% 2, replace = FALSE)))
g1 <- induced_subgraph(g, vids = id1)
g2 <- induced_subgraph(g, vids = id2)
par(mfrow = c(1,3))
layout <- layout.fruchterman.reingold(g)
layout2 <- layout[id2, ]
plot(g, layout = layout, main = "G")
plot(g1, layout = layout[id1, ], main = "G1")
plot(g2, layout = layout[id2, ], main = "G2")

Subset graph based on edges weight

I have a graph, G=(V,E) with several attributes including an edge weight attribute. I'm trying to create a subgraph based on a condition where weights are higher than x.
I've tried the standard R subsetting option with g <- E(g)[weight > max(weight)*.10], but I always get a vector.
I'm not sure what I'm doing wrong here.
Maybe you want something like this
library(igraph)
set.seed(1)
m <- matrix(sample(c(.5, 2, 5), 100, replace=T, prob = c(.6,.3,.1)), nc=10, dimnames = rep(list(letters[1:10]), 2))
g <- graph_from_adjacency_matrix(m, weighted=T, diag=F, mode="undirected")
coords <- layout.auto(g)
par(mfrow = c(1,3))
plot(g, layout=coords, edge.width = E(g)$weight)
s1 <- subgraph.edges(g, E(g)[E(g)$weight>2], del=F)
plot(s1, layout=coords, edge.width = E(s1)$weight)
s2 <- delete_vertices(s1, degree(s1, mode = "in")==0)
plot(s2, layout=coords[V(g)$name%in%V(s2)$name,], edge.width = E(s2)$weight)
That would be because you replaced your graph g with just subsetted edges. If you want to remove edges below the threshold weight, you can use:
g_sub <- delete.edges(g, E(g)[weight <= max(weight)*.10])

How to colourise some cell borders in R corrplot?

I would like to keep some cells in attention by making their borders clearly distinct from anything else.
The parameter rect.col is used to colorise all borders but I want to colorise only borders of the cells (3,3) and (7,7), for instance, by any halo color etc heat.colors(100) or rainbow(12).
Code:
library("corrplot")
library("psych")
ids <- seq(1,11)
M.cor <- cor(mtcars)
colnames(M.cor) <- ids
rownames(M.cor) <- ids
p.mat <- psych::corr.test(M.cor, adjust = "none", ci = F)
p.mat <- p.mat[["r"]]
corrplot(M.cor,
method = "color",
type = "upper",
tl.col = 'black',
diag = TRUE,
p.mat = p.mat,
sig.level = 0.0000005
)
Fig. 1 Output of the top code without cell bordering,
Fig. 2 Output after manually converting all coordinates to upper triangle but artifact at (10,1),
Fig. 3 Output with window size fix
Input: locations by ids (3,3) and (7,7)
Expected output: two cells where borders marked on upper triangle
Pseudocode
# ids must be id.pairs
# or just a list of two lists
createBorders <- function(id.pairs) {
labbly(id.pairs,function(z){
x <- z$V1
y <- z$V2
rect(x+0.5, y+0.5, x+1.5, y+1.5) # user20650
})
}
corrplot(...)
# TODO Which datastructure to use there in the function as the paired list of ids?
createBorders(ids.pairs)
Testing user20650's proposal
rect(2+0.5, 9+0.5, 3+0.5, 10+0.5, border="white", lwd=2)
Output in Fig. 2.
It would be great to have a function for this.
Assume you have a list of IDs.
I think there is something wrong with the placement because (2,3),(9,10) leads to the point in (2,3),(2,3).
Iterating user20650's Proposal in Chat
library("corrplot")
library("psych")
ids <- seq(1,11)
M.cor <- cor(mtcars)
colnames(M.cor) <- ids
rownames(M.cor) <- ids
p.mat <- psych::corr.test(M.cor, adjust = "none", ci = F)
p.mat <- p.mat[["r"]]
# Chat of http://stackoverflow.com/q/40538304/54964 user20650
cb <- function(corrPlot, ..., rectArgs = list() ){
lst <- list(...)
n <- ncol(corrPlot)
nms <- colnames(corrPlot)
colnames(corrPlot) <- if(is.null(nms)) 1:ncol(corrPlot) else nms
xleft <- match(lst$x, colnames(corrPlot)) - 0.5
ybottom <- n - match(lst$y, colnames(corrPlot)) + 0.5
lst <- list(xleft=xleft, ybottom=ybottom, xright=xleft+1, ytop=ybottom+1)
do.call(rect, c(lst, rectArgs))
}
plt <- corrplot(M.cor,
method = "color",
type = "upper",
tl.col = 'black',
diag = TRUE,
p.mat = p.mat,
sig.level = 0.0000005
)
cb(plt, x=c(1, 3, 5), y=c(10, 7, 4), rectArgs=list(border="white", lwd=3))
Output where only one cell border marked in Fig. 3.
Expected output: three cell borders marked
Restriction in Fig. 2 approach
You have to work all coordinates first to upper triangle.
So you can now call only the following where output has an artifact at (10,1) in Fig. 2
cb(plt, x=c(10, 7, 5), y=c(1, 3, 4), rectArgs=list(border="white", lwd=3))
Expected output: no artifact at (10,1)
The cause of the artifact can be white background, but it occurs also if the border color is red so most probably it is not the cause.
Solution - fix the window size and its output in Fig. 3
pdf("Rplots.pdf", height=10, width=10)
plt <- corrplot(M.cor,
method = "color",
type = "upper",
tl.col = 'black',
diag = TRUE,
p.mat = p.mat,
sig.level = 0.0000005
)
cb(plt, x=c(10, 7, 5), y=c(1, 3, 4), rectArgs=list(border="red", lwd=3))
dev.off()
R: 3.3.1
OS: Debian 8.5
Docs corrplot: here
My proposal where still pseudocode mark.ids. I found best to have plt and mark.ids as the options of corrplotCellBorders which creates corrplot with bordered wanted cells
mark.ids <- {x <- c(1), y <- c(2)} # TODO pseudocode
corrplotCellBorders(plt, mark.ids)
cb(plt, x, y, rectArgs=list(border="red", lwd=3))
# Chat of https://stackoverflow.com/q/40538304/54964 user20650
# createBorders.r, test.createBorders.
cb <- function(corrPlot, ..., rectArgs = list() ){
# ... pass named vector of x and y names
# for upper x > y, lower x < y
lst <- list(...)
n <- ncol(corrPlot)
nms <- colnames(corrPlot)
colnames(corrPlot) <- if(is.null(nms)) 1:ncol(corrPlot) else nms
xleft <- match(lst$x, colnames(corrPlot)) - 0.5
ybottom <- n - match(lst$y, colnames(corrPlot)) + 0.5
lst <- list(xleft=xleft, ybottom=ybottom, xright=xleft+1, ytop=ybottom+1)
do.call(rect, c(lst, rectArgs))
}
corrplotCellBorders <- function(plt, mark.ids) {
x <- mark.ids$x
y <- mark.ids$y
cb(plt, x, y, rectArgs=list(border="red", lwd=3))
}
Open
How to create mark.ids such that you can call its items by mark.ids$x and mark.ids$y?
Integrate point order neutrality for the upper triangle here

R: how to define layout position of nodes

I have a random graph g and need to split this graph into two separated graphs g1 and g2 with a rule. The split rule is binary matrix E: if (E[i,j]=1) then move the corresponding node to the graph g1, else move the corresponding node to the graph g2. After separation I need to plot three graphs on the screen. I have use the 1s from matrix E in order to define position of nodes from graph g1 on the plot (i.e. mylayout1). My code is shown below.
library(igraph)
set.seed(42)
n <- m <- 5
B <- matrix(sample(0:255, (n*m)^2, replace=T), nrow = n*n, ncol = m*m)
g <- graph.adjacency(B, weighted=TRUE, mode="undirected", diag=FALSE)
V(g)$name <- as.character(1:(n*m))
E <- matrix(sample(0:1, n*m, replace=T), nrow = m, ncol = n)
# split into two graphs, if (E[i,j]=1) then the node move to g1, else to g2
vsubgraph <- c(1:length(E))*E
vsubgraph <- vsubgraph[vsubgraph != 0]
g1 <- induced_subgraph(g, vsubgraph)
g2 <- induced_subgraph(g, setdiff(V(g), vsubgraph))
V(g)[vsubgraph]$color <- "green"
V(g)[setdiff(V(g), vsubgraph)]$color <- "yellow"
V(g1)$name <- vsubgraph
V(g2)$name <- setdiff(V(g), vsubgraph)
V(g1)$color <- "green"
V(g2)$color <- "yellow"
par(mfrow=c(1,3))
# create layout
cx <-rep(1:n, each = m)
cy <-rep(c(1:m), times = n)
mylayout <- as.matrix(cbind(cx, -cy))
plot(g, layout=mylayout,
vertex.shape = "square",
vertex.label = V(g)$name,
edge.label.cex=.75,
xlab='Original graph'
)
cx <- cx * E
cy <- cy * E
cx <- cx[cx != 0]
cy <- cy[cy != 0]
mylayout1 <- as.matrix(cbind(cx, -cy))
plot(g1, layout=mylayout1,
vertex.shape = "square",
vertex.label = V(g)$name,
edge.label.cex=.75,
xlab='1st graph'
)
plot(g2, #layout=mylayout2,
vertex.shape = "square",
vertex.label = V(g)$name,
edge.label.cex=.75,
xlab='2nd graph'
)
Could someone please give an idea how to define mylayout2 for the second graph g2? I would like to use the original position of nodes from the mylayout. One of solution may be the using matrix E again. Unfortunately, I can't figure out how to use 0s from matrix E.
One of possible way is:
opE <- ifelse(E == 0, 1, 0)
cx <-rep(1:n, each = m)
cy <-rep(c(1:m), times = n)
cx <- cx * opE
cy <- cy * opE
cx <- cx[cx != 0]
cy <- cy[cy != 0]
mylayout2 <- as.matrix(cbind(cx, -cy))
plot(g2, layout=mylayout2,
vertex.shape = "square",
vertex.label = V(g)$name,
edge.label.cex=.75,
xlab='2nd graph'
)

Simplest way to plot changes in ranking between two ordered lists in R?

I'm wondering if there is an easy way to plot the changes in position of elements between 2 lists in the form of a directed bipartite graph in R. For example, list 1 and 2 are vectors of character strings, not necessarily containing the same elements:
list.1 <- c("a","b","c","d","e","f","g")
list.2 <- c("b","x","e","c","z","d","a")
I would like to generate something similar to:
I've had a slight bash at using the igraph package, but couldn't easily construct what I would like, which I imagine and hope shouldn't be too hard.
Cheers.
Here is a simple function to do what you want. Essentially it uses match to match elements from one vector to another and arrows to draw arrows.
plotRanks <- function(a, b, labels.offset=0.1, arrow.len=0.1)
{
old.par <- par(mar=c(1,1,1,1))
# Find the length of the vectors
len.1 <- length(a)
len.2 <- length(b)
# Plot two columns of equidistant points
plot(rep(1, len.1), 1:len.1, pch=20, cex=0.8,
xlim=c(0, 3), ylim=c(0, max(len.1, len.2)),
axes=F, xlab="", ylab="") # Remove axes and labels
points(rep(2, len.2), 1:len.2, pch=20, cex=0.8)
# Put labels next to each observation
text(rep(1-labels.offset, len.1), 1:len.1, a)
text(rep(2+labels.offset, len.2), 1:len.2, b)
# Now we need to map where the elements of a are in b
# We use the match function for this job
a.to.b <- match(a, b)
# Now we can draw arrows from the first column to the second
arrows(rep(1.02, len.1), 1:len.1, rep(1.98, len.2), a.to.b,
length=arrow.len, angle=20)
par(old.par)
}
A few example plots
par(mfrow=c(2,2))
plotRanks(c("a","b","c","d","e","f","g"),
c("b","x","e","c","z","d","a"))
plotRanks(sample(LETTERS, 20), sample(LETTERS, 5))
plotRanks(c("a","b","c","d","e","f","g"), 1:10) # No matches
plotRanks(c("a", "b", "c", 1:5), c("a", "b", "c", 1:5)) # All matches
par(mfrow=c(1,1))
Here's a solution using igraph functions.
rankchange <- function(list.1, list.2){
grp = c(rep(0,length(list.1)),rep(1,length(list.2)))
m = match(list.1, list.2)
m = m + length(list.1)
pairs = cbind(1:length(list.1), m)
pairs = pairs[!is.na(pairs[,1]),]
pairs = pairs[!is.na(pairs[,2]),]
g = graph.bipartite(grp, as.vector(t(pairs)), directed=TRUE)
V(g)$color = c("red","green")[grp+1]
V(g)$label = c(list.1, list.2)
V(g)$x = grp
V(g)$y = c(length(list.1):1, length(list.2):1)
g
}
This builds and then plots the graph from your vectors:
g = rankchange(list.1, list.2)
plot(g)
Adjust the colour scheme and symbolism to suit using options detailed in the igraph docs.
Note this is not thoroughly tested (only tried on your sample data) but you can see how it builds a bipartite graph from the code.
With ggplot2:
v1 <- c("a","b","c","d","e","f","g")
v2 <- c("b","x","e","c","z","d","a")
o <- 0.05
DF <- data.frame(x = c(rep(1, length(v1)), rep(2, length(v2))),
x1 = c(rep(1 + o, length(v1)), rep(2 - o, length(v2))),
y = c(rev(seq_along(v1)), rev(seq_along(v2))),
g = c(v1, v2))
library(ggplot2)
library(grid)
ggplot(DF, aes(x=x, y=y, group=g, label=g)) +
geom_path(aes(x=x1), arrow = arrow(length = unit(0.02,"npc")),
size=1, color="green") +
geom_text(size=10) +
theme_minimal() +
theme(axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
panel.grid = element_blank())
This can of course be wrapped in a function easily.
Here's a generalization of nico's result for use with data frames:
plotRanks <- function(df, rank_col, time_col, data_col, color_col = NA, labels_offset=0.1, arrow_len=0.1, ...){
time_vec <- df[ ,time_col]
unique_dates <- unique(time_vec)
unique_dates <- unique_dates[order(unique_dates)]
rank_ls <- lapply(unique_dates, function(d){
temp_df <- df[time_vec == d, ]
temp_df <- temp_df[order(temp_df[ ,data_col], temp_df[ ,rank_col]), ]
temp_d <- temp_df[ ,data_col]
temp_rank <- temp_df[ ,rank_col]
if(is.na(color_col)){
temp_color = rep("blue", length(temp_d))
}else{
temp_color = temp_df[ ,color_col]
}
temp_rank <- temp_df[ ,rank_col]
temp_ls <- list(temp_rank, temp_d, temp_color)
names(temp_ls) <- c("ranking", "data", "color")
temp_ls
})
first_rank <- rank_ls[[1]]$ranking
first_data <- rank_ls[[1]]$data
first_length <- length(first_rank)
y_max <- max(sapply(rank_ls, function(l) length(l$ranking)))
plot(rep(1, first_length), 1:first_length, pch=20, cex=0.8,
xlim=c(0, length(rank_ls) + 1), ylim = c(1, y_max), xaxt = "n", xlab = NA, ylab="Ranking", ...)
text_paste <- paste(first_rank, "\n", "(", first_data, ")", sep = "")
text(rep(1 - labels_offset, first_length), 1:first_length, text_paste)
axis(1, at = 1:(length(rank_ls)), labels = unique_dates)
for(i in 2:length(rank_ls)){
j = i - 1
ith_rank <- rank_ls[[i]]$ranking
ith_data <- rank_ls[[i]]$data
jth_color <- rank_ls[[j]]$color
jth_rank <- rank_ls[[j]]$ranking
ith_length <- length(ith_rank)
jth_length <- length(jth_rank)
points(rep(i, ith_length), 1:ith_length, pch = 20, cex = 0.8)
i_to_j <- match(jth_rank, ith_rank)
arrows(rep(i - 0.98, jth_length), 1:jth_length, rep(i - 0.02, ith_length), i_to_j
, length = 0.1, angle = 10, col = jth_color)
offset_choice <- ifelse(length(rank_ls) == 2, i + labels_offset, i - labels_offset)
text_paste <- paste(ith_rank, "\n", "(", ith_data, ")", sep = "")
text(rep(offset_choice, ith_length), 1:ith_length, text_paste)
}
}
Here's an example using a haphazard reshape of the presidents dataset:
data(presidents)
years <- rep(1945:1974, 4)
n <- length(presidents)
q1 <- presidents[seq(1, n, 4)]
q2 <- presidents[seq(2, n, 4)]
q3 <- presidents[seq(3, n, 4)]
q4 <- presidents[seq(4, n, 4)]
quarters <- c(q1, q2, q3, q4)
q_label <- c(rep("Q1", n / 4), rep("Q2", n / 4), rep("Q3", n / 4), rep("Q4", n / 4))
q_colors <- c(Q1 = "blue", Q2 = "red", Q3 = "green", Q4 = "orange")
q_colors <- q_colors[match(q_label, names(q_colors))]
new_prez <- data.frame(years, quarters, q_label, q_colors)
new_prez <- na.omit(new_prez)
png("C:/users/fasdfsdhkeos/desktop/prez.png", width = 15, height = 10, units = "in", res = 300)
plotRanks(new_prez[new_prez$years %in% 1960:1970, ], "q_label", "years", "quarters", "q_colors")
dev.off()
This produces a time series ranking plot, and it introduces color if tracking a certain observation is desired:

Resources