How to get the true node value in igraph - r

So I have read in a network data in iGraph(R) and would like to store the nodes into a list. Here's what I have done:
G = read_graph("somegraph.graphml",format="graphml")
x = list(V(G))
> x
+ 15/15 vertices, from ecb3920:
[1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
My question is, how do I get the true value, i.e. the actually node id in my data, from V(G). Thanks.
> dput(G)
structure(list(15, FALSE, c(13, 7, 9, 14, 10, 5, 4, 11, 6, 7,
14, 4, 13, 9, 10, 5, 5, 13, 9, 6, 7, 14, 12, 10, 14, 10, 11,
13, 9, 10, 12, 14, 8, 7, 11, 12, 8, 13, 14, 9, 11, 13, 13, 12,
14, 10, 13, 12, 14, 12, 13, 13, 14, 14), c(0, 0, 2, 2, 2, 2,
2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 6,
6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 9, 9, 9, 9, 10,
10, 10, 11, 11, 12, 12, 13), c(6, 11, 5, 15, 16, 8, 19, 1, 9,
20, 33, 32, 36, 2, 13, 18, 28, 39, 4, 14, 23, 25, 29, 45, 7,
26, 34, 40, 22, 30, 35, 43, 47, 49, 0, 12, 17, 27, 37, 41, 42,
46, 50, 51, 3, 10, 21, 24, 31, 38, 44, 48, 52, 53), c(1, 0, 6,
5, 2, 4, 3, 11, 15, 8, 9, 13, 14, 7, 12, 10, 16, 19, 20, 18,
23, 22, 17, 21, 25, 24, 33, 32, 28, 29, 26, 30, 27, 31, 36, 39,
34, 35, 37, 38, 40, 41, 45, 43, 42, 44, 47, 46, 48, 49, 50, 51,
52, 53), c(0, 0, 0, 0, 0, 2, 5, 7, 11, 13, 18, 24, 28, 34, 44,
54), c(0, 2, 2, 7, 16, 24, 26, 34, 40, 42, 46, 49, 51, 53, 54,
54), list(c(1, 0, 1), structure(list(), .Names = character(0)),
structure(list(id = c("1351920706", "500102244", "1454425532",
"1625050630", "510838353", "1262640078", "681721364", "1351920717",
"1260750116", "1524975171", "1070293410", "727198538", "715215233",
"1351920666", "500920034")), .Names = "id"), list()), <environment>), class = "igraph")

Just for closure (and to summarise from our chat): Based on the sample data you give, you can extract additional data for every vertex by indexing the corresponding element.
So
V(g)$id
returns
#[1] "1351920706" "500102244" "1454425532" "1625050630" "510838353"
#[6] "1262640078" "681721364" "1351920717" "1260750116" "1524975171"
#[11] "1070293410" "727198538" "715215233" "1351920666" "500920034"

Related

how to classify data inside a list file

I have a data looks like this
df<- structure(list(14, FALSE, c(1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12,
13, 6), c(0, 0, 0, 0, 0, 6, 6, 6, 6, 6, 6, 6, 0), c(0, 1, 2,
3, 4, 12, 5, 6, 7, 8, 9, 10, 11), c(0, 1, 2, 3, 4, 12, 5, 6,
7, 8, 9, 10, 11), c(0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13), c(0, 6, 6, 6, 6, 6, 6, 13, 13, 13, 13, 13, 13, 13, 13
), list(c(1, 0, 1), structure(list(), names = character(0)),
list(name = c("Bestman", "Tera1", "Tera2", "Tera3", "Tera4",
"Tera5", "Tetra", "Brownie1", "Brownie2", "Brownie3", "Brownie4",
"Brownie5", "Brownie6", "Brownie7")), list()), <environment>), class = "igraph")
I am trying to make a list and assign the two core as root
I can easily do this
as_tbl_graph(df) %>%
activate(nodes) %>%
mutate(type = ifelse(name %in% c("Bestman", "Tetra"), "root", "branch")) %>%
mutate(group = ifelse(name == "Bestman" | grepl("Tera", name),
"Bestman", "Tera"))
when the number of core grows, this method does not work, for example if I have more and I do the following
for example when my data becomes like this
df2<-structure(list(28, FALSE, c(1, 2, 3, 4, 5, 6, 1, 2, 8, 7, 9,
10, 11, 7, 7, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, 24, 26,
27, 7, 12, 18, 25, 12, 18, 25, 18, 25, 25), c(0, 0, 0, 0, 0,
0, 0, 0, 7, 6, 7, 7, 7, 2, 1, 12, 12, 12, 12, 12, 18, 18, 18,
18, 18, 18, 25, 25, 0, 0, 0, 0, 7, 7, 7, 12, 12, 18), c(6, 0,
7, 1, 2, 3, 4, 5, 28, 14, 13, 9, 8, 10, 11, 12, 29, 32, 15, 16,
17, 18, 19, 30, 33, 35, 20, 21, 22, 23, 24, 25, 31, 34, 36, 37,
26, 27), c(6, 0, 7, 1, 2, 3, 4, 5, 28, 29, 30, 31, 14, 13, 9,
8, 10, 11, 12, 32, 33, 34, 15, 16, 17, 18, 19, 35, 36, 20, 21,
22, 23, 24, 25, 37, 26, 27), c(0, 0, 2, 4, 5, 6, 7, 8, 12, 13,
14, 15, 16, 18, 19, 20, 21, 22, 23, 26, 27, 28, 29, 30, 31, 32,
36, 37, 38), c(0, 12, 13, 14, 14, 14, 14, 15, 22, 22, 22, 22,
22, 29, 29, 29, 29, 29, 29, 36, 36, 36, 36, 36, 36, 36, 38, 38,
38), list(c(1, 0, 1), structure(list(), names = character(0)),
list(name = c("Bestman", "Tera1", "Tera2", "Tera3", "Tera4",
"Tera5", "Brownie2", "Tetra", "Brownie1", "Brownie3", "Brownie4",
"Brownie5", "trueG", "ckage1", "ckage2", "ckage3", "ckage4",
"ckage5", "Carowner", "Hoghet1", "Hoghet2", "Hoghet3", "Hoghet4",
"Hoghet5", "Hoghet6", "Bestwomen", "Esme2", "Esme3")), list()),
<environment>), class = "igraph")
as_tbl_graph(df2) %>%
activate(nodes) %>%
mutate(type = ifelse(name %in% c("Bestman", "Tetra", "trueG", "Carowner","Bestwomen"), "root", "branch")) %>%
mutate(group = ifelse(name == "Bestman" | grepl("Tetra", name) | grepl("trueG",name) | grepl("Carowner", name) | grepl("Bestwomen", name) , "Bestman", "Tetra","trueG","Carowner","Bestwomen" ))
I get error, I want to know what I am doing wrong here ?
Your second graph is more complex than your first. Some of the 'peripheral' nodes join more than one central node, so it is not clear how they should be labelled / colored. However, tidygraph has various grouping functions which can be used to assign the nodes to groups based on their connectivity, and the centrality of a node can be calculated automatically to help with labelling and sizing.
library(tidygraph)
library(ggraph)
df2 %>%
as_tbl_graph() %>%
activate(nodes) %>%
mutate(is_central = centrality_hub() > 0.6) %>%
mutate(group = factor(group_label_prop())) %>%
ggraph(layout = "igraph", algorithm = "nicely") +
geom_edge_link(width = 2, alpha = 0.1) +
geom_node_circle(aes(r = ifelse(is_central, nchar(name)/12, 0.1), fill = group),
color = NA) +
geom_node_text(aes(label = ifelse(is_central, name, '')), size = 5,
color = "gray40", family = "Roboto Condensed", fontface = 2) +
theme_graph() +
coord_equal() +
scale_fill_brewer(palette = "Pastel2", guide = "none")
ifelse only allows for two options, try using dplyr::case_when instead.
https://dplyr.tidyverse.org/reference/case_when.html
Update to add requested code:
mutate(group = dplyr::case_when(name == "Bestman" ~ "Bestman",
grepl("Tetra", name) ~ "Tetra",
grepl("trueG",name) ~ "trueG",
grepl("Carowner", name) ~ "Carowner",
grepl("Bestwomen", name) ~ "Bestwomen"))

Storing objects of igraph

I have three different graphs that were produced by igraph package and graph_from_adjacency_matrix function. How can I store all three of these (g1, g2, g3) in one object so that later I can use this object as an input for another function. I want to preserve all the attributes of the graphs.
Here's the structure of the three graphs:
dput(g1)
structure(list(11, FALSE, c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 2,
3, 4, 5, 6, 7, 8, 9, 10, 3, 4, 5, 6, 7, 8, 9, 10, 4, 5, 6, 7,
8, 9, 10, 5, 6, 7, 8, 9, 10, 6, 7, 8, 9, 10, 7, 8, 9, 10, 8,
9, 10, 9, 10, 10), c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4,
4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 8, 8, 9),
c(0, 1, 10, 2, 11, 19, 3, 12, 20, 27, 4, 13, 21, 28, 34,
5, 14, 22, 29, 35, 40, 6, 15, 23, 30, 36, 41, 45, 7, 16,
24, 31, 37, 42, 46, 49, 8, 17, 25, 32, 38, 43, 47, 50, 52,
9, 18, 26, 33, 39, 44, 48, 51, 53, 54), c(0, 1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
51, 52, 53, 54), c(0, 0, 1, 3, 6, 10, 15, 21, 28, 36, 45,
55), c(0, 10, 19, 27, 34, 40, 45, 49, 52, 54, 55, 55), list(
c(1, 0, 1), structure(list(), .Names = character(0)),
list(name = c("jpm", "gs", "ms", "bofa", "schwab", "brk",
"wf", "citi", "amex", "spgl", "pnc")), list(wt = c(10000,
3.16222797634994, 10000, 10000, 6.2838498029626, 1.93361060894155,
10000, 10000, 5.84323225364297, 7.44026659903325, 1.31111055012301,
10000, 10000, 4.30459269702548, 2.20457094344212, 3.49673898163627,
3.09239540712491, 3.43107254995375, 10000, 5.64499596383733,
10000, 10000, 3.72116985462354, 2.70273403225818, 2.35839869470134,
10000, 10000, 10000, 1.83130016032325, 1.99399002493476,
1.7644293974645, 1.88708226743269, 7.73257077502946,
10000, 10000, 10000, 10000, 10000, 10000, 10000, 6.94406536133693,
3.32018490900407, 2.0759886748923, 4.11734201102576,
6.193275571549, 2.85404877010956, 10000, 3.01093189825944,
10000, 10000, 7.07193471387249, 10000, 5.19453928016632,
10000, 10000))), <environment>), class = "igraph")
dput(g2)
structure(list(11, FALSE, c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 2,
3, 4, 5, 6, 7, 8, 9, 10, 3, 4, 5, 6, 7, 8, 9, 10, 4, 5, 6, 7,
8, 9, 10, 5, 6, 7, 8, 9, 10, 6, 7, 8, 9, 10, 7, 8, 9, 10, 8,
9, 10, 9, 10, 10), c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4,
4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 8, 8, 9),
c(0, 1, 10, 2, 11, 19, 3, 12, 20, 27, 4, 13, 21, 28, 34,
5, 14, 22, 29, 35, 40, 6, 15, 23, 30, 36, 41, 45, 7, 16,
24, 31, 37, 42, 46, 49, 8, 17, 25, 32, 38, 43, 47, 50, 52,
9, 18, 26, 33, 39, 44, 48, 51, 53, 54), c(0, 1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
51, 52, 53, 54), c(0, 0, 1, 3, 6, 10, 15, 21, 28, 36, 45,
55), c(0, 10, 19, 27, 34, 40, 45, 49, 52, 54, 55, 55), list(
c(1, 0, 1), structure(list(), .Names = character(0)),
list(name = c("jpm", "gs", "ms", "bofa", "schwab", "brk",
"wf", "citi", "amex", "spgl", "pnc")), list(wt = c(1.72565213162016,
10000, 10000, 10000, 2.60988018061569, 3.37529546067647,
5.68789870362681, 2.44357606642214, 4.38114633403004,
10000, 10000, 2.49869325166531, 10000, 10000, 4.56956459390346,
3.52409742807134, 2.97961673322383, 3.42809851201881,
3.15481552530237, 7.32112737506667, 10000, 7.0852416616783,
3.99494740752879, 2.65955867194822, 10000, 10000, 10000,
10000, 5.73934520134914, 1.80740569361977, 1.5783164909029,
2.84567417160359, 10000, 10000, 10000, 10000, 5.30260309989479,
10000, 10000, 10000, 10000, 7.06161817483184, 6.9222112543713,
4.63691541477454, 3.48797079504012, 6.38029319494032,
10000, 2.48116694808653, 10000, 2.12352867446693, 3.04335319291233,
10000, 10000, 5.22409020671212, 10000))), <environment>), class = "igraph")
dput(g3)
structure(list(11, FALSE, c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 2,
3, 4, 5, 6, 7, 8, 9, 10, 3, 4, 5, 6, 7, 8, 9, 10, 4, 5, 6, 7,
8, 9, 10, 5, 6, 7, 8, 9, 10, 6, 7, 8, 9, 10, 7, 8, 9, 10, 8,
9, 10, 9, 10, 10), c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4,
4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 8, 8, 9),
c(0, 1, 10, 2, 11, 19, 3, 12, 20, 27, 4, 13, 21, 28, 34,
5, 14, 22, 29, 35, 40, 6, 15, 23, 30, 36, 41, 45, 7, 16,
24, 31, 37, 42, 46, 49, 8, 17, 25, 32, 38, 43, 47, 50, 52,
9, 18, 26, 33, 39, 44, 48, 51, 53, 54), c(0, 1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
51, 52, 53, 54), c(0, 0, 1, 3, 6, 10, 15, 21, 28, 36, 45,
55), c(0, 10, 19, 27, 34, 40, 45, 49, 52, 54, 55, 55), list(
c(1, 0, 1), structure(list(), .Names = character(0)),
list(name = c("jpm", "gs", "ms", "bofa", "schwab", "brk",
"wf", "citi", "amex", "spgl", "pnc")), list(wt = c(10000,
4.14221420842333, 2.69857209553848, 5.77115055524614,
1.95672007809809, 2.98690863617922, 1.92161847347613,
2.34571882319417, 10000, 10000, 1.97201563662035, 5.4078452590091,
10000, 6.85345421615961, 3.51453278996926, 10000, 10000,
2.08964950396744, 10000, 2.78868220464486, 10000, 3.41857460835555,
4.57693796722718, 1.96044036389548, 10000, 6.69365386837721,
2.61525679780493, 7.34195637377719, 2.57334862699097,
3.54317409176484, 10000, 2.33889236077345, 2.49271973693215,
5.47858809426897, 10000, 5.25238753114071, 10000, 10000,
10000, 10000, 10000, 2.68400716970295, 2.49075030691088,
2.59993683645561, 10000, 10000, 2.49345951327313, 5.7338881554994,
1.73687483250752, 4.24032760636804, 3.11756167665892,
5.07827243244947, 10000, 1.69643890905687, 10000))),
<environment>), class = "igraph")
I think you can try list like below
g <- list(g1,g2,g3)

function somersD returns NaN

I have the following dataframe:
> dput(master_credit)
structure(c(10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12,
11, 11, 11, 11, 11, 12, 11, 11, 12, 11, 11, 11, 11, 11, 12, 12,
12, 11, 12, 12, 12, 11, 11, 11, 12, 11, 12, 12, 12, 12, 13, 12,
12, 12, 12, 12, 12, 11, 12, 12, 11, 12, 12, 14, 13, 12, 13, 13,
14, 13, 13, 12, 25, 26, 3, 21, 5, 9, 43, 15, 2, 6, 4, 27, 44,
1, 10, 31, 16, 12, 37, 7, 23, 54, 18, 19, 20, 14, 35, 52, 36,
32, 29, 50, 51, 30, 42, 24, 17, 63, 8, 62, 38, 34, 33, 49, 59,
58, 57, 60, 28, 61, 40, 41, 22, 11, 47, 13, 48, 45, 46, 65, 64,
53, 39, 56, 55), .Dim = c(65L, 2L), .Dimnames = list(NULL, c("master",
"credit")))
on which I am running the code:
library(InformationValue)
> somersD(master_credit[,"master"], master_credit[,"credit"])
[1] NaN
why does it return NaN?
The documentation of somersD says somersD(actuals, predictedScores), where actuals are binary flags which are either 1 or 0.
actuals: The actual binary flags for the response variable. It can take a numeric vector containing values of either 1 or 0, where 1 represents the 'Good' or 'Events' while 0 represents 'Bad' or 'Non-Events'.
Internally while calculating Somers D statistics, somersD function tries to find the number of rows containing, 1 and 0 in actuals column. This count is used in division. And since in your dataframe there is no such row, so you are basically dividing by zero, hence it returns NaN.

sorting columns from lowest to highest values (i.e. 1, 2, 3 etc, not 1, 10, 11...2, 20, 21... etc)

I have a dataset with 50 thousand rows that I want to sort according the the values in one of the columns. The numbers in the column go from 1-30, and when I do the following
data=data[order(data$columnname),]
it gets sorted so that the order of the columns is like this
1, 10, 11 12, 13, 14, 15, 16, 17, 18, 19, 2, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 3, 30, 4, 5, 6, 7, 8, 9
how could I sort it so that it is like this
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30
For me it seems, that your format is not numeric. Try this:
data$columnname<-as.numeric(data$columnname)
data=data[order(data$columnname),]

R circular LOESS function over 24 hours (a day)

I have data for free parking slots over hours and days.
Here's a random sample of 100.
sl <- list(EmptySlots = c(7, 6, 20, 5, 16, 20, 24, 5, 24, 24, 15, 11,
8, 6, 13, 2, 21, 6, 1, 6, 9, 1, 8, 0, 20, 9, 20, 11, 22, 24,
1, 2, 12, 6, 8, 2, 23, 18, 8, 3, 20, 2, 1, 0, 5, 21, 1, 4, 20,
15, 24, 12, 4, 14, 2, 4, 20, 16, 2, 10, 2, 1, 24, 9, 22, 7, 6,
3, 20, 13, 1, 16, 12, 5, 2, 7, 4, 1, 6, 1, 1, 2, 0, 13, 24, 6,
13, 7, 24, 24, 15, 6, 10, 1, 2, 9, 5, 2, 11, 15), hour = c(8,
16, 23, 14, 18, 7, 17, 15, 19, 19, 17, 17, 16, 14, 17, 12, 19,
10, 10, 13, 16, 10, 16, 11, 12, 9, 0, 15, 16, 21, 10, 11, 17,
11, 16, 15, 23, 7, 16, 14, 18, 14, 14, 9, 15, 2, 10, 9, 19, 17,
20, 16, 12, 17, 12, 9, 23, 9, 15, 17, 10, 12, 18, 17, 18, 17,
13, 10, 7, 8, 10, 18, 11, 11, 12, 17, 12, 9, 14, 15, 10, 11,
10, 10, 20, 16, 18, 15, 21, 18, 17, 13, 8, 11, 15, 16, 11, 9,
12, 18))
A quick way to calculate a LOESS function via ggplot2.
sl <- as.data.frame(sl)
library(ggplot2)
qplot(hour, EmptySlots, data=sl, geom="jitter") + theme_bw() + stat_smooth(size = 2)
What is the best way to tell the LOESS function that 0 and 24 are neighbours? I.e. the line on the left and the right should be the same value if we were to estimate it this way.
Pointers on where to start will do fine.
I'd be tempted just to replicate the data on either side:
library(ggplot2)
empty <- c(7, 6, 20, 5, 16, 20, 24, 5, 24, 24, 15, 11, 8, 6, 13, 2, 21, 6, 1, 6, 9, 1, 8, 0, 20, 9, 20, 11, 22, 24, 1, 2, 12, 6, 8, 2, 23, 18, 8, 3, 20, 2, 1, 0, 5, 21, 1, 4, 20, 15, 24, 12, 4, 14, 2, 4, 20, 16, 2, 10, 2, 1, 24, 9, 22, 7, 6, 3, 20, 13, 1, 16, 12, 5, 2, 7, 4, 1, 6, 1, 1, 2, 0, 13, 24, 6, 13, 7, 24, 24, 15, 6, 10, 1, 2, 9, 5, 2, 11, 15)
hour <- c(8, 16, 23, 14, 18, 7, 17, 15, 19, 19, 17, 17, 16, 14, 17, 12, 19, 10, 10, 13, 16, 10, 16, 11, 12, 9, 0, 15, 16, 21, 10, 11, 17, 11, 16, 15, 23, 7, 16, 14, 18, 14, 14, 9, 15, 2, 10, 9, 19, 17, 20, 16, 12, 17, 12, 9, 23, 9, 15, 17, 10, 12, 18, 17, 18, 17, 13, 10, 7, 8, 10, 18, 11, 11, 12, 17, 12, 9, 14, 15, 10, 11, 10, 10, 20, 16, 18, 15, 21, 18, 17, 13, 8, 11, 15, 16, 11, 9, 12, 18)
emptyrep <- rep.int(empty,3)
hourrep <- c(hour,hour+24,hour-24)
sl <- data.frame(empty=emptyrep, hour=hourrep)
qplot(hour, empty, data=sl, geom="jitter") + theme_bw() + geom_smooth(method="loess",size = 1.5,span=0.2) + coord_cartesian(xlim=c(0,24))
... just like joran said a few minutes earlier (woops)

Resources