Related
I have a data looks like this
df<- structure(list(14, FALSE, c(1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12,
13, 6), c(0, 0, 0, 0, 0, 6, 6, 6, 6, 6, 6, 6, 0), c(0, 1, 2,
3, 4, 12, 5, 6, 7, 8, 9, 10, 11), c(0, 1, 2, 3, 4, 12, 5, 6,
7, 8, 9, 10, 11), c(0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13), c(0, 6, 6, 6, 6, 6, 6, 13, 13, 13, 13, 13, 13, 13, 13
), list(c(1, 0, 1), structure(list(), names = character(0)),
list(name = c("Bestman", "Tera1", "Tera2", "Tera3", "Tera4",
"Tera5", "Tetra", "Brownie1", "Brownie2", "Brownie3", "Brownie4",
"Brownie5", "Brownie6", "Brownie7")), list()), <environment>), class = "igraph")
I am trying to make a list and assign the two core as root
I can easily do this
as_tbl_graph(df) %>%
activate(nodes) %>%
mutate(type = ifelse(name %in% c("Bestman", "Tetra"), "root", "branch")) %>%
mutate(group = ifelse(name == "Bestman" | grepl("Tera", name),
"Bestman", "Tera"))
when the number of core grows, this method does not work, for example if I have more and I do the following
for example when my data becomes like this
df2<-structure(list(28, FALSE, c(1, 2, 3, 4, 5, 6, 1, 2, 8, 7, 9,
10, 11, 7, 7, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, 24, 26,
27, 7, 12, 18, 25, 12, 18, 25, 18, 25, 25), c(0, 0, 0, 0, 0,
0, 0, 0, 7, 6, 7, 7, 7, 2, 1, 12, 12, 12, 12, 12, 18, 18, 18,
18, 18, 18, 25, 25, 0, 0, 0, 0, 7, 7, 7, 12, 12, 18), c(6, 0,
7, 1, 2, 3, 4, 5, 28, 14, 13, 9, 8, 10, 11, 12, 29, 32, 15, 16,
17, 18, 19, 30, 33, 35, 20, 21, 22, 23, 24, 25, 31, 34, 36, 37,
26, 27), c(6, 0, 7, 1, 2, 3, 4, 5, 28, 29, 30, 31, 14, 13, 9,
8, 10, 11, 12, 32, 33, 34, 15, 16, 17, 18, 19, 35, 36, 20, 21,
22, 23, 24, 25, 37, 26, 27), c(0, 0, 2, 4, 5, 6, 7, 8, 12, 13,
14, 15, 16, 18, 19, 20, 21, 22, 23, 26, 27, 28, 29, 30, 31, 32,
36, 37, 38), c(0, 12, 13, 14, 14, 14, 14, 15, 22, 22, 22, 22,
22, 29, 29, 29, 29, 29, 29, 36, 36, 36, 36, 36, 36, 36, 38, 38,
38), list(c(1, 0, 1), structure(list(), names = character(0)),
list(name = c("Bestman", "Tera1", "Tera2", "Tera3", "Tera4",
"Tera5", "Brownie2", "Tetra", "Brownie1", "Brownie3", "Brownie4",
"Brownie5", "trueG", "ckage1", "ckage2", "ckage3", "ckage4",
"ckage5", "Carowner", "Hoghet1", "Hoghet2", "Hoghet3", "Hoghet4",
"Hoghet5", "Hoghet6", "Bestwomen", "Esme2", "Esme3")), list()),
<environment>), class = "igraph")
as_tbl_graph(df2) %>%
activate(nodes) %>%
mutate(type = ifelse(name %in% c("Bestman", "Tetra", "trueG", "Carowner","Bestwomen"), "root", "branch")) %>%
mutate(group = ifelse(name == "Bestman" | grepl("Tetra", name) | grepl("trueG",name) | grepl("Carowner", name) | grepl("Bestwomen", name) , "Bestman", "Tetra","trueG","Carowner","Bestwomen" ))
I get error, I want to know what I am doing wrong here ?
Your second graph is more complex than your first. Some of the 'peripheral' nodes join more than one central node, so it is not clear how they should be labelled / colored. However, tidygraph has various grouping functions which can be used to assign the nodes to groups based on their connectivity, and the centrality of a node can be calculated automatically to help with labelling and sizing.
library(tidygraph)
library(ggraph)
df2 %>%
as_tbl_graph() %>%
activate(nodes) %>%
mutate(is_central = centrality_hub() > 0.6) %>%
mutate(group = factor(group_label_prop())) %>%
ggraph(layout = "igraph", algorithm = "nicely") +
geom_edge_link(width = 2, alpha = 0.1) +
geom_node_circle(aes(r = ifelse(is_central, nchar(name)/12, 0.1), fill = group),
color = NA) +
geom_node_text(aes(label = ifelse(is_central, name, '')), size = 5,
color = "gray40", family = "Roboto Condensed", fontface = 2) +
theme_graph() +
coord_equal() +
scale_fill_brewer(palette = "Pastel2", guide = "none")
ifelse only allows for two options, try using dplyr::case_when instead.
https://dplyr.tidyverse.org/reference/case_when.html
Update to add requested code:
mutate(group = dplyr::case_when(name == "Bestman" ~ "Bestman",
grepl("Tetra", name) ~ "Tetra",
grepl("trueG",name) ~ "trueG",
grepl("Carowner", name) ~ "Carowner",
grepl("Bestwomen", name) ~ "Bestwomen"))
I have three different graphs that were produced by igraph package and graph_from_adjacency_matrix function. How can I store all three of these (g1, g2, g3) in one object so that later I can use this object as an input for another function. I want to preserve all the attributes of the graphs.
Here's the structure of the three graphs:
dput(g1)
structure(list(11, FALSE, c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 2,
3, 4, 5, 6, 7, 8, 9, 10, 3, 4, 5, 6, 7, 8, 9, 10, 4, 5, 6, 7,
8, 9, 10, 5, 6, 7, 8, 9, 10, 6, 7, 8, 9, 10, 7, 8, 9, 10, 8,
9, 10, 9, 10, 10), c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4,
4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 8, 8, 9),
c(0, 1, 10, 2, 11, 19, 3, 12, 20, 27, 4, 13, 21, 28, 34,
5, 14, 22, 29, 35, 40, 6, 15, 23, 30, 36, 41, 45, 7, 16,
24, 31, 37, 42, 46, 49, 8, 17, 25, 32, 38, 43, 47, 50, 52,
9, 18, 26, 33, 39, 44, 48, 51, 53, 54), c(0, 1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
51, 52, 53, 54), c(0, 0, 1, 3, 6, 10, 15, 21, 28, 36, 45,
55), c(0, 10, 19, 27, 34, 40, 45, 49, 52, 54, 55, 55), list(
c(1, 0, 1), structure(list(), .Names = character(0)),
list(name = c("jpm", "gs", "ms", "bofa", "schwab", "brk",
"wf", "citi", "amex", "spgl", "pnc")), list(wt = c(10000,
3.16222797634994, 10000, 10000, 6.2838498029626, 1.93361060894155,
10000, 10000, 5.84323225364297, 7.44026659903325, 1.31111055012301,
10000, 10000, 4.30459269702548, 2.20457094344212, 3.49673898163627,
3.09239540712491, 3.43107254995375, 10000, 5.64499596383733,
10000, 10000, 3.72116985462354, 2.70273403225818, 2.35839869470134,
10000, 10000, 10000, 1.83130016032325, 1.99399002493476,
1.7644293974645, 1.88708226743269, 7.73257077502946,
10000, 10000, 10000, 10000, 10000, 10000, 10000, 6.94406536133693,
3.32018490900407, 2.0759886748923, 4.11734201102576,
6.193275571549, 2.85404877010956, 10000, 3.01093189825944,
10000, 10000, 7.07193471387249, 10000, 5.19453928016632,
10000, 10000))), <environment>), class = "igraph")
dput(g2)
structure(list(11, FALSE, c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 2,
3, 4, 5, 6, 7, 8, 9, 10, 3, 4, 5, 6, 7, 8, 9, 10, 4, 5, 6, 7,
8, 9, 10, 5, 6, 7, 8, 9, 10, 6, 7, 8, 9, 10, 7, 8, 9, 10, 8,
9, 10, 9, 10, 10), c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4,
4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 8, 8, 9),
c(0, 1, 10, 2, 11, 19, 3, 12, 20, 27, 4, 13, 21, 28, 34,
5, 14, 22, 29, 35, 40, 6, 15, 23, 30, 36, 41, 45, 7, 16,
24, 31, 37, 42, 46, 49, 8, 17, 25, 32, 38, 43, 47, 50, 52,
9, 18, 26, 33, 39, 44, 48, 51, 53, 54), c(0, 1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
51, 52, 53, 54), c(0, 0, 1, 3, 6, 10, 15, 21, 28, 36, 45,
55), c(0, 10, 19, 27, 34, 40, 45, 49, 52, 54, 55, 55), list(
c(1, 0, 1), structure(list(), .Names = character(0)),
list(name = c("jpm", "gs", "ms", "bofa", "schwab", "brk",
"wf", "citi", "amex", "spgl", "pnc")), list(wt = c(1.72565213162016,
10000, 10000, 10000, 2.60988018061569, 3.37529546067647,
5.68789870362681, 2.44357606642214, 4.38114633403004,
10000, 10000, 2.49869325166531, 10000, 10000, 4.56956459390346,
3.52409742807134, 2.97961673322383, 3.42809851201881,
3.15481552530237, 7.32112737506667, 10000, 7.0852416616783,
3.99494740752879, 2.65955867194822, 10000, 10000, 10000,
10000, 5.73934520134914, 1.80740569361977, 1.5783164909029,
2.84567417160359, 10000, 10000, 10000, 10000, 5.30260309989479,
10000, 10000, 10000, 10000, 7.06161817483184, 6.9222112543713,
4.63691541477454, 3.48797079504012, 6.38029319494032,
10000, 2.48116694808653, 10000, 2.12352867446693, 3.04335319291233,
10000, 10000, 5.22409020671212, 10000))), <environment>), class = "igraph")
dput(g3)
structure(list(11, FALSE, c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 2,
3, 4, 5, 6, 7, 8, 9, 10, 3, 4, 5, 6, 7, 8, 9, 10, 4, 5, 6, 7,
8, 9, 10, 5, 6, 7, 8, 9, 10, 6, 7, 8, 9, 10, 7, 8, 9, 10, 8,
9, 10, 9, 10, 10), c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4,
4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 8, 8, 9),
c(0, 1, 10, 2, 11, 19, 3, 12, 20, 27, 4, 13, 21, 28, 34,
5, 14, 22, 29, 35, 40, 6, 15, 23, 30, 36, 41, 45, 7, 16,
24, 31, 37, 42, 46, 49, 8, 17, 25, 32, 38, 43, 47, 50, 52,
9, 18, 26, 33, 39, 44, 48, 51, 53, 54), c(0, 1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
51, 52, 53, 54), c(0, 0, 1, 3, 6, 10, 15, 21, 28, 36, 45,
55), c(0, 10, 19, 27, 34, 40, 45, 49, 52, 54, 55, 55), list(
c(1, 0, 1), structure(list(), .Names = character(0)),
list(name = c("jpm", "gs", "ms", "bofa", "schwab", "brk",
"wf", "citi", "amex", "spgl", "pnc")), list(wt = c(10000,
4.14221420842333, 2.69857209553848, 5.77115055524614,
1.95672007809809, 2.98690863617922, 1.92161847347613,
2.34571882319417, 10000, 10000, 1.97201563662035, 5.4078452590091,
10000, 6.85345421615961, 3.51453278996926, 10000, 10000,
2.08964950396744, 10000, 2.78868220464486, 10000, 3.41857460835555,
4.57693796722718, 1.96044036389548, 10000, 6.69365386837721,
2.61525679780493, 7.34195637377719, 2.57334862699097,
3.54317409176484, 10000, 2.33889236077345, 2.49271973693215,
5.47858809426897, 10000, 5.25238753114071, 10000, 10000,
10000, 10000, 10000, 2.68400716970295, 2.49075030691088,
2.59993683645561, 10000, 10000, 2.49345951327313, 5.7338881554994,
1.73687483250752, 4.24032760636804, 3.11756167665892,
5.07827243244947, 10000, 1.69643890905687, 10000))),
<environment>), class = "igraph")
I think you can try list like below
g <- list(g1,g2,g3)
I have the following dataframe:
> dput(master_credit)
structure(c(10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12,
11, 11, 11, 11, 11, 12, 11, 11, 12, 11, 11, 11, 11, 11, 12, 12,
12, 11, 12, 12, 12, 11, 11, 11, 12, 11, 12, 12, 12, 12, 13, 12,
12, 12, 12, 12, 12, 11, 12, 12, 11, 12, 12, 14, 13, 12, 13, 13,
14, 13, 13, 12, 25, 26, 3, 21, 5, 9, 43, 15, 2, 6, 4, 27, 44,
1, 10, 31, 16, 12, 37, 7, 23, 54, 18, 19, 20, 14, 35, 52, 36,
32, 29, 50, 51, 30, 42, 24, 17, 63, 8, 62, 38, 34, 33, 49, 59,
58, 57, 60, 28, 61, 40, 41, 22, 11, 47, 13, 48, 45, 46, 65, 64,
53, 39, 56, 55), .Dim = c(65L, 2L), .Dimnames = list(NULL, c("master",
"credit")))
on which I am running the code:
library(InformationValue)
> somersD(master_credit[,"master"], master_credit[,"credit"])
[1] NaN
why does it return NaN?
The documentation of somersD says somersD(actuals, predictedScores), where actuals are binary flags which are either 1 or 0.
actuals: The actual binary flags for the response variable. It can take a numeric vector containing values of either 1 or 0, where 1 represents the 'Good' or 'Events' while 0 represents 'Bad' or 'Non-Events'.
Internally while calculating Somers D statistics, somersD function tries to find the number of rows containing, 1 and 0 in actuals column. This count is used in division. And since in your dataframe there is no such row, so you are basically dividing by zero, hence it returns NaN.
I have a dataset with 50 thousand rows that I want to sort according the the values in one of the columns. The numbers in the column go from 1-30, and when I do the following
data=data[order(data$columnname),]
it gets sorted so that the order of the columns is like this
1, 10, 11 12, 13, 14, 15, 16, 17, 18, 19, 2, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 3, 30, 4, 5, 6, 7, 8, 9
how could I sort it so that it is like this
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30
For me it seems, that your format is not numeric. Try this:
data$columnname<-as.numeric(data$columnname)
data=data[order(data$columnname),]
I have data for free parking slots over hours and days.
Here's a random sample of 100.
sl <- list(EmptySlots = c(7, 6, 20, 5, 16, 20, 24, 5, 24, 24, 15, 11,
8, 6, 13, 2, 21, 6, 1, 6, 9, 1, 8, 0, 20, 9, 20, 11, 22, 24,
1, 2, 12, 6, 8, 2, 23, 18, 8, 3, 20, 2, 1, 0, 5, 21, 1, 4, 20,
15, 24, 12, 4, 14, 2, 4, 20, 16, 2, 10, 2, 1, 24, 9, 22, 7, 6,
3, 20, 13, 1, 16, 12, 5, 2, 7, 4, 1, 6, 1, 1, 2, 0, 13, 24, 6,
13, 7, 24, 24, 15, 6, 10, 1, 2, 9, 5, 2, 11, 15), hour = c(8,
16, 23, 14, 18, 7, 17, 15, 19, 19, 17, 17, 16, 14, 17, 12, 19,
10, 10, 13, 16, 10, 16, 11, 12, 9, 0, 15, 16, 21, 10, 11, 17,
11, 16, 15, 23, 7, 16, 14, 18, 14, 14, 9, 15, 2, 10, 9, 19, 17,
20, 16, 12, 17, 12, 9, 23, 9, 15, 17, 10, 12, 18, 17, 18, 17,
13, 10, 7, 8, 10, 18, 11, 11, 12, 17, 12, 9, 14, 15, 10, 11,
10, 10, 20, 16, 18, 15, 21, 18, 17, 13, 8, 11, 15, 16, 11, 9,
12, 18))
A quick way to calculate a LOESS function via ggplot2.
sl <- as.data.frame(sl)
library(ggplot2)
qplot(hour, EmptySlots, data=sl, geom="jitter") + theme_bw() + stat_smooth(size = 2)
What is the best way to tell the LOESS function that 0 and 24 are neighbours? I.e. the line on the left and the right should be the same value if we were to estimate it this way.
Pointers on where to start will do fine.
I'd be tempted just to replicate the data on either side:
library(ggplot2)
empty <- c(7, 6, 20, 5, 16, 20, 24, 5, 24, 24, 15, 11, 8, 6, 13, 2, 21, 6, 1, 6, 9, 1, 8, 0, 20, 9, 20, 11, 22, 24, 1, 2, 12, 6, 8, 2, 23, 18, 8, 3, 20, 2, 1, 0, 5, 21, 1, 4, 20, 15, 24, 12, 4, 14, 2, 4, 20, 16, 2, 10, 2, 1, 24, 9, 22, 7, 6, 3, 20, 13, 1, 16, 12, 5, 2, 7, 4, 1, 6, 1, 1, 2, 0, 13, 24, 6, 13, 7, 24, 24, 15, 6, 10, 1, 2, 9, 5, 2, 11, 15)
hour <- c(8, 16, 23, 14, 18, 7, 17, 15, 19, 19, 17, 17, 16, 14, 17, 12, 19, 10, 10, 13, 16, 10, 16, 11, 12, 9, 0, 15, 16, 21, 10, 11, 17, 11, 16, 15, 23, 7, 16, 14, 18, 14, 14, 9, 15, 2, 10, 9, 19, 17, 20, 16, 12, 17, 12, 9, 23, 9, 15, 17, 10, 12, 18, 17, 18, 17, 13, 10, 7, 8, 10, 18, 11, 11, 12, 17, 12, 9, 14, 15, 10, 11, 10, 10, 20, 16, 18, 15, 21, 18, 17, 13, 8, 11, 15, 16, 11, 9, 12, 18)
emptyrep <- rep.int(empty,3)
hourrep <- c(hour,hour+24,hour-24)
sl <- data.frame(empty=emptyrep, hour=hourrep)
qplot(hour, empty, data=sl, geom="jitter") + theme_bw() + geom_smooth(method="loess",size = 1.5,span=0.2) + coord_cartesian(xlim=c(0,24))
... just like joran said a few minutes earlier (woops)