Related
I have three different graphs that were produced by igraph package and graph_from_adjacency_matrix function. How can I store all three of these (g1, g2, g3) in one object so that later I can use this object as an input for another function. I want to preserve all the attributes of the graphs.
Here's the structure of the three graphs:
dput(g1)
structure(list(11, FALSE, c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 2,
3, 4, 5, 6, 7, 8, 9, 10, 3, 4, 5, 6, 7, 8, 9, 10, 4, 5, 6, 7,
8, 9, 10, 5, 6, 7, 8, 9, 10, 6, 7, 8, 9, 10, 7, 8, 9, 10, 8,
9, 10, 9, 10, 10), c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4,
4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 8, 8, 9),
c(0, 1, 10, 2, 11, 19, 3, 12, 20, 27, 4, 13, 21, 28, 34,
5, 14, 22, 29, 35, 40, 6, 15, 23, 30, 36, 41, 45, 7, 16,
24, 31, 37, 42, 46, 49, 8, 17, 25, 32, 38, 43, 47, 50, 52,
9, 18, 26, 33, 39, 44, 48, 51, 53, 54), c(0, 1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
51, 52, 53, 54), c(0, 0, 1, 3, 6, 10, 15, 21, 28, 36, 45,
55), c(0, 10, 19, 27, 34, 40, 45, 49, 52, 54, 55, 55), list(
c(1, 0, 1), structure(list(), .Names = character(0)),
list(name = c("jpm", "gs", "ms", "bofa", "schwab", "brk",
"wf", "citi", "amex", "spgl", "pnc")), list(wt = c(10000,
3.16222797634994, 10000, 10000, 6.2838498029626, 1.93361060894155,
10000, 10000, 5.84323225364297, 7.44026659903325, 1.31111055012301,
10000, 10000, 4.30459269702548, 2.20457094344212, 3.49673898163627,
3.09239540712491, 3.43107254995375, 10000, 5.64499596383733,
10000, 10000, 3.72116985462354, 2.70273403225818, 2.35839869470134,
10000, 10000, 10000, 1.83130016032325, 1.99399002493476,
1.7644293974645, 1.88708226743269, 7.73257077502946,
10000, 10000, 10000, 10000, 10000, 10000, 10000, 6.94406536133693,
3.32018490900407, 2.0759886748923, 4.11734201102576,
6.193275571549, 2.85404877010956, 10000, 3.01093189825944,
10000, 10000, 7.07193471387249, 10000, 5.19453928016632,
10000, 10000))), <environment>), class = "igraph")
dput(g2)
structure(list(11, FALSE, c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 2,
3, 4, 5, 6, 7, 8, 9, 10, 3, 4, 5, 6, 7, 8, 9, 10, 4, 5, 6, 7,
8, 9, 10, 5, 6, 7, 8, 9, 10, 6, 7, 8, 9, 10, 7, 8, 9, 10, 8,
9, 10, 9, 10, 10), c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4,
4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 8, 8, 9),
c(0, 1, 10, 2, 11, 19, 3, 12, 20, 27, 4, 13, 21, 28, 34,
5, 14, 22, 29, 35, 40, 6, 15, 23, 30, 36, 41, 45, 7, 16,
24, 31, 37, 42, 46, 49, 8, 17, 25, 32, 38, 43, 47, 50, 52,
9, 18, 26, 33, 39, 44, 48, 51, 53, 54), c(0, 1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
51, 52, 53, 54), c(0, 0, 1, 3, 6, 10, 15, 21, 28, 36, 45,
55), c(0, 10, 19, 27, 34, 40, 45, 49, 52, 54, 55, 55), list(
c(1, 0, 1), structure(list(), .Names = character(0)),
list(name = c("jpm", "gs", "ms", "bofa", "schwab", "brk",
"wf", "citi", "amex", "spgl", "pnc")), list(wt = c(1.72565213162016,
10000, 10000, 10000, 2.60988018061569, 3.37529546067647,
5.68789870362681, 2.44357606642214, 4.38114633403004,
10000, 10000, 2.49869325166531, 10000, 10000, 4.56956459390346,
3.52409742807134, 2.97961673322383, 3.42809851201881,
3.15481552530237, 7.32112737506667, 10000, 7.0852416616783,
3.99494740752879, 2.65955867194822, 10000, 10000, 10000,
10000, 5.73934520134914, 1.80740569361977, 1.5783164909029,
2.84567417160359, 10000, 10000, 10000, 10000, 5.30260309989479,
10000, 10000, 10000, 10000, 7.06161817483184, 6.9222112543713,
4.63691541477454, 3.48797079504012, 6.38029319494032,
10000, 2.48116694808653, 10000, 2.12352867446693, 3.04335319291233,
10000, 10000, 5.22409020671212, 10000))), <environment>), class = "igraph")
dput(g3)
structure(list(11, FALSE, c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 2,
3, 4, 5, 6, 7, 8, 9, 10, 3, 4, 5, 6, 7, 8, 9, 10, 4, 5, 6, 7,
8, 9, 10, 5, 6, 7, 8, 9, 10, 6, 7, 8, 9, 10, 7, 8, 9, 10, 8,
9, 10, 9, 10, 10), c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4,
4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 8, 8, 9),
c(0, 1, 10, 2, 11, 19, 3, 12, 20, 27, 4, 13, 21, 28, 34,
5, 14, 22, 29, 35, 40, 6, 15, 23, 30, 36, 41, 45, 7, 16,
24, 31, 37, 42, 46, 49, 8, 17, 25, 32, 38, 43, 47, 50, 52,
9, 18, 26, 33, 39, 44, 48, 51, 53, 54), c(0, 1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
51, 52, 53, 54), c(0, 0, 1, 3, 6, 10, 15, 21, 28, 36, 45,
55), c(0, 10, 19, 27, 34, 40, 45, 49, 52, 54, 55, 55), list(
c(1, 0, 1), structure(list(), .Names = character(0)),
list(name = c("jpm", "gs", "ms", "bofa", "schwab", "brk",
"wf", "citi", "amex", "spgl", "pnc")), list(wt = c(10000,
4.14221420842333, 2.69857209553848, 5.77115055524614,
1.95672007809809, 2.98690863617922, 1.92161847347613,
2.34571882319417, 10000, 10000, 1.97201563662035, 5.4078452590091,
10000, 6.85345421615961, 3.51453278996926, 10000, 10000,
2.08964950396744, 10000, 2.78868220464486, 10000, 3.41857460835555,
4.57693796722718, 1.96044036389548, 10000, 6.69365386837721,
2.61525679780493, 7.34195637377719, 2.57334862699097,
3.54317409176484, 10000, 2.33889236077345, 2.49271973693215,
5.47858809426897, 10000, 5.25238753114071, 10000, 10000,
10000, 10000, 10000, 2.68400716970295, 2.49075030691088,
2.59993683645561, 10000, 10000, 2.49345951327313, 5.7338881554994,
1.73687483250752, 4.24032760636804, 3.11756167665892,
5.07827243244947, 10000, 1.69643890905687, 10000))),
<environment>), class = "igraph")
I think you can try list like below
g <- list(g1,g2,g3)
I have a lot of financial trading data with around a million rows and I want to be able to condense this into a new data frame with a list of Unique UserIDs. I then want to be able to add up the "trades" for their account, with some conditions, ie if TransactionTypeId == 2 & AC_Type== 19. I would use a sumifs in excel for this but the size of the file means its pretty much impossible to run on my computer.
df<- structure(list(UserId = c(1, 1, 1, 1, 2,
2, 2, 3, 3, 3, 4, 5, 6,
6, 6, 7, 7, 7, 8, 8, 8,
8, 8, 9, 9, 9, 10, 11, 12,
12, 13, 13, 13, 14, 14, 15, 15,
16, 16, 16), TransactionTypeId = c(14, 1, 1, 70,
15, 1, 1, 14, 1, 1, 70, 14, 14, 1, 1, 14, 1, 1, 14, 1, 1, 1,
1, 14, 1, 1, 14, 14, 1, 1, 14, 1, 1, 1, 1, 70, 70, 14, 1, 1),
AC_Type = c(21, 21, 21, 21, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 21, 21, 21, 19, 19, 19, 19, 19, 19, 19, 19, 20,
19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20), Trades = c(30,
30, 0.00067116, 0.00067115, 249, 249, 0.00533033, 48.75,
48.75, 0.00101298, 0.00533, 24.37, 146.25, 146.25, 0.00309109,
100.01, 100.01, 0.00233551, 97.5, 90, 0.00189134, 5, 0.00245851,
234, 234, 0.00500802, 100.01, 48.75, 48.5, 0.0275474, 24,
24, 0.00051975, 100, 0.00223998, 0.00051975, 0.00205, 9.75,
8.75, 0.00017811)), row.names = c(NA, -40L), class = c("tbl_df",
"tbl", "data.frame"))
You can take sum of the logical condition that you want to count.
library(dplyr)
df %>%
group_by(UserId) %>%
summarise(count = sum(Trades[TransactionTypeId == 2 & AC_Type== 19]))
Not quite sure what you want ...
libary(dplyr)
df %>%
group_by(UserId) %>%
filter(TransactionTypeId == 1 & AC_Type == 19) %>%
summarise(sum = sum(Trades))
# A tibble: 6 x 2
UserId sum
<dbl> <dbl>
1 2 249.
2 3 48.8
3 6 146.
4 8 95.0
5 9 234.
6 12 48.5
Here you first group_by UserId, then filterthose rows that meet your conditions (NB: I've changed 2to 1 as there aren't any 2s in the sample data), and finally summarise by summing up the values in Trades.
Using data.table
library(data.table)
setDT(df)[, .(count = sum(Trades[TransactionTypeId == 2 &
AC_Type== 19], na.rm = TRUE)), UserId]
So I have read in a network data in iGraph(R) and would like to store the nodes into a list. Here's what I have done:
G = read_graph("somegraph.graphml",format="graphml")
x = list(V(G))
> x
+ 15/15 vertices, from ecb3920:
[1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
My question is, how do I get the true value, i.e. the actually node id in my data, from V(G). Thanks.
> dput(G)
structure(list(15, FALSE, c(13, 7, 9, 14, 10, 5, 4, 11, 6, 7,
14, 4, 13, 9, 10, 5, 5, 13, 9, 6, 7, 14, 12, 10, 14, 10, 11,
13, 9, 10, 12, 14, 8, 7, 11, 12, 8, 13, 14, 9, 11, 13, 13, 12,
14, 10, 13, 12, 14, 12, 13, 13, 14, 14), c(0, 0, 2, 2, 2, 2,
2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 6,
6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 9, 9, 9, 9, 10,
10, 10, 11, 11, 12, 12, 13), c(6, 11, 5, 15, 16, 8, 19, 1, 9,
20, 33, 32, 36, 2, 13, 18, 28, 39, 4, 14, 23, 25, 29, 45, 7,
26, 34, 40, 22, 30, 35, 43, 47, 49, 0, 12, 17, 27, 37, 41, 42,
46, 50, 51, 3, 10, 21, 24, 31, 38, 44, 48, 52, 53), c(1, 0, 6,
5, 2, 4, 3, 11, 15, 8, 9, 13, 14, 7, 12, 10, 16, 19, 20, 18,
23, 22, 17, 21, 25, 24, 33, 32, 28, 29, 26, 30, 27, 31, 36, 39,
34, 35, 37, 38, 40, 41, 45, 43, 42, 44, 47, 46, 48, 49, 50, 51,
52, 53), c(0, 0, 0, 0, 0, 2, 5, 7, 11, 13, 18, 24, 28, 34, 44,
54), c(0, 2, 2, 7, 16, 24, 26, 34, 40, 42, 46, 49, 51, 53, 54,
54), list(c(1, 0, 1), structure(list(), .Names = character(0)),
structure(list(id = c("1351920706", "500102244", "1454425532",
"1625050630", "510838353", "1262640078", "681721364", "1351920717",
"1260750116", "1524975171", "1070293410", "727198538", "715215233",
"1351920666", "500920034")), .Names = "id"), list()), <environment>), class = "igraph")
Just for closure (and to summarise from our chat): Based on the sample data you give, you can extract additional data for every vertex by indexing the corresponding element.
So
V(g)$id
returns
#[1] "1351920706" "500102244" "1454425532" "1625050630" "510838353"
#[6] "1262640078" "681721364" "1351920717" "1260750116" "1524975171"
#[11] "1070293410" "727198538" "715215233" "1351920666" "500920034"
I have the same question as this how to merge two weighted graph and sum weigths.
But here ist my R code for better understanding:
g1 <- graph.full(10)
V(g1)$name <- letters[1:vcount(g1)]
E(g1)$weight <- 1
g3 <- graph.full(5)
V(g3)$name <- c("a", "b", "x", "y", "z")
E(g3)$weight <- 1
graph.union.by.name(g1, g3)
The weights in merged graph should be a 2 on same edges in g1 and g3 (a - b)
And the dput of graphs is:
> dput(g1)
structure(list(10, FALSE, c(1, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3,
4, 5, 6, 7, 8, 9, 3, 4, 5, 6, 7, 8, 9, 4, 5, 6, 7, 8, 9, 5, 6,
7, 8, 9, 6, 7, 8, 9, 7, 8, 9, 8, 9, 9), c(0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3,
3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 7, 7, 8), c(0, 1, 9,
2, 10, 17, 3, 11, 18, 24, 4, 12, 19, 25, 30, 5, 13, 20, 26, 31,
35, 6, 14, 21, 27, 32, 36, 39, 7, 15, 22, 28, 33, 37, 40, 42,
8, 16, 23, 29, 34, 38, 41, 43, 44), c(0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
40, 41, 42, 43, 44), c(0, 0, 1, 3, 6, 10, 15, 21, 28, 36, 45),
c(0, 9, 17, 24, 30, 35, 39, 42, 44, 45, 45), list(c(1, 0,
1), structure(list(name = "Full graph", loops = FALSE), .Names = c("name",
"loops")), structure(list(name = c("a", "b", "c", "d", "e",
"f", "g", "h", "i", "j")), .Names = "name"), structure(list(
weight = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)), .Names = "weight"))), class = "igraph")
> dput(g2)
structure(list(10, FALSE, c(1, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3,
4, 5, 6, 7, 8, 9, 3, 4, 5, 6, 7, 8, 9, 4, 5, 6, 7, 8, 9, 5, 6,
7, 8, 9, 6, 7, 8, 9, 7, 8, 9, 8, 9, 9), c(0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3,
3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 7, 7, 8), c(0, 1, 9,
2, 10, 17, 3, 11, 18, 24, 4, 12, 19, 25, 30, 5, 13, 20, 26, 31,
35, 6, 14, 21, 27, 32, 36, 39, 7, 15, 22, 28, 33, 37, 40, 42,
8, 16, 23, 29, 34, 38, 41, 43, 44), c(0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
40, 41, 42, 43, 44), c(0, 0, 1, 3, 6, 10, 15, 21, 28, 36, 45),
c(0, 9, 17, 24, 30, 35, 39, 42, 44, 45, 45), list(c(1, 0,
1), structure(list(name = "Full graph", loops = FALSE), .Names = c("name",
"loops")), structure(list(name = c("a", "b", "c", "d", "e",
"f", "g", "h", "i", "j")), .Names = "name"), structure(list(
weight = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)), .Names = "weight"))), class = "igraph")
Is it possible with igraph or do i need some workaround?
This will be supported in the next version, until then here is a workaround:
mymerge <- function(g1, g2) {
e1 <- get.data.frame(g1, what="edges")
e2 <- get.data.frame(g2, what="edges")
e <- merge(e1, e2, by=c("from", "to"), all=TRUE)
newe <- data.frame(e[,c("from", "to"), drop=FALSE],
weight=rowSums(e[, c("weight.x", "weight.y")], na.rm=TRUE))
graph.data.frame(newe, directed=is.directed(g1))
}
mymerge(g1, g3)
# IGRAPH UNW- 13 54 --
# + attr: name (v/c), weight (e/n)
mymerge(g1, g3)["a", "b"]
# [1] 2
I have data for free parking slots over hours and days.
Here's a random sample of 100.
sl <- list(EmptySlots = c(7, 6, 20, 5, 16, 20, 24, 5, 24, 24, 15, 11,
8, 6, 13, 2, 21, 6, 1, 6, 9, 1, 8, 0, 20, 9, 20, 11, 22, 24,
1, 2, 12, 6, 8, 2, 23, 18, 8, 3, 20, 2, 1, 0, 5, 21, 1, 4, 20,
15, 24, 12, 4, 14, 2, 4, 20, 16, 2, 10, 2, 1, 24, 9, 22, 7, 6,
3, 20, 13, 1, 16, 12, 5, 2, 7, 4, 1, 6, 1, 1, 2, 0, 13, 24, 6,
13, 7, 24, 24, 15, 6, 10, 1, 2, 9, 5, 2, 11, 15), hour = c(8,
16, 23, 14, 18, 7, 17, 15, 19, 19, 17, 17, 16, 14, 17, 12, 19,
10, 10, 13, 16, 10, 16, 11, 12, 9, 0, 15, 16, 21, 10, 11, 17,
11, 16, 15, 23, 7, 16, 14, 18, 14, 14, 9, 15, 2, 10, 9, 19, 17,
20, 16, 12, 17, 12, 9, 23, 9, 15, 17, 10, 12, 18, 17, 18, 17,
13, 10, 7, 8, 10, 18, 11, 11, 12, 17, 12, 9, 14, 15, 10, 11,
10, 10, 20, 16, 18, 15, 21, 18, 17, 13, 8, 11, 15, 16, 11, 9,
12, 18))
A quick way to calculate a LOESS function via ggplot2.
sl <- as.data.frame(sl)
library(ggplot2)
qplot(hour, EmptySlots, data=sl, geom="jitter") + theme_bw() + stat_smooth(size = 2)
What is the best way to tell the LOESS function that 0 and 24 are neighbours? I.e. the line on the left and the right should be the same value if we were to estimate it this way.
Pointers on where to start will do fine.
I'd be tempted just to replicate the data on either side:
library(ggplot2)
empty <- c(7, 6, 20, 5, 16, 20, 24, 5, 24, 24, 15, 11, 8, 6, 13, 2, 21, 6, 1, 6, 9, 1, 8, 0, 20, 9, 20, 11, 22, 24, 1, 2, 12, 6, 8, 2, 23, 18, 8, 3, 20, 2, 1, 0, 5, 21, 1, 4, 20, 15, 24, 12, 4, 14, 2, 4, 20, 16, 2, 10, 2, 1, 24, 9, 22, 7, 6, 3, 20, 13, 1, 16, 12, 5, 2, 7, 4, 1, 6, 1, 1, 2, 0, 13, 24, 6, 13, 7, 24, 24, 15, 6, 10, 1, 2, 9, 5, 2, 11, 15)
hour <- c(8, 16, 23, 14, 18, 7, 17, 15, 19, 19, 17, 17, 16, 14, 17, 12, 19, 10, 10, 13, 16, 10, 16, 11, 12, 9, 0, 15, 16, 21, 10, 11, 17, 11, 16, 15, 23, 7, 16, 14, 18, 14, 14, 9, 15, 2, 10, 9, 19, 17, 20, 16, 12, 17, 12, 9, 23, 9, 15, 17, 10, 12, 18, 17, 18, 17, 13, 10, 7, 8, 10, 18, 11, 11, 12, 17, 12, 9, 14, 15, 10, 11, 10, 10, 20, 16, 18, 15, 21, 18, 17, 13, 8, 11, 15, 16, 11, 9, 12, 18)
emptyrep <- rep.int(empty,3)
hourrep <- c(hour,hour+24,hour-24)
sl <- data.frame(empty=emptyrep, hour=hourrep)
qplot(hour, empty, data=sl, geom="jitter") + theme_bw() + geom_smooth(method="loess",size = 1.5,span=0.2) + coord_cartesian(xlim=c(0,24))
... just like joran said a few minutes earlier (woops)