Rename list of lists using a named list - r

So I'm working with a list that contains other lists inside, with this structure:
library(graph)
library(RBGL)
library(Rgraphviz)
show(tree)
$`SO:0001968`
$`SO:0001968`$`SO:0001622`
$`SO:0001968`$`SO:0001622`$`SO:0001624`
$`SO:0001968`$`SO:0001622`$`SO:0001624`$`SO:0002090`
[1] 1
$`SO:0001968`$`SO:0001622`$`SO:0001623`
$`SO:0001968`$`SO:0001622`$`SO:0001623`$`SO:0002091`
[1] 1
$`SO:0001968`$`SO:0001969`
$`SO:0001968`$`SO:0001969`$`SO:0002090`
[1] 1
$`SO:0001968`$`SO:0001969`$`SO:0002091`
[1] 1
dput(tree)
list(`SO:0001968` = list(`SO:0001622` = list(`SO:0001624` = list(
`SO:0002090` = 1), `SO:0001623` = list(`SO:0002091` = 1)),
`SO:0001969` = list(`SO:0002090` = 1, `SO:0002091` = 1)))
The data I use to build the list comes from an object called g:
show(g)
A graphNEL graph with directed edges
Number of Nodes = 7
Number of Edges = 8
dput(g)
new("graphNEL",
nodes = c("SO:0001968", "SO:0001969", "SO:0001622",
"SO:0001623", "SO:0001624", "SO:0002090", "SO:0002091"), edgeL = list(
`SO:0001968` = list(edges = 3:2), `SO:0001969` = list(edges = 6:7),
`SO:0001622` = list(edges = 5:4), `SO:0001623` = list(edges = 7L),
`SO:0001624` = list(edges = 6L), `SO:0002090` = list(edges = integer(0)),
`SO:0002091` = list(edges = integer(0))), edgeData = new("attrData",
data = list(`SO:0001968|SO:0001622` = list(weight = 1), `SO:0001968|SO:0001969` = list(
weight = 1), `SO:0001969|SO:0002090` = list(weight = 1),
`SO:0001969|SO:0002091` = list(weight = 1), `SO:0001622|SO:0001624` = list(
weight = 1), `SO:0001622|SO:0001623` = list(weight = 1),
`SO:0001623|SO:0002091` = list(weight = 1), `SO:0001624|SO:0002090` = list(
weight = 1)), defaults = list(weight = 1)), nodeData = new("attrData",
data = list(`SO:0001968` = list(label = "coding_transcript_variant"),
`SO:0001969` = list(label = "coding_transcript_intron_variant"),
`SO:0001622` = list(label = "UTR_variant"), `SO:0001623` = list(
label = "5_prime_UTR_variant"), `SO:0001624` = list(
label = "3_prime_UTR_variant"), `SO:0002090` = list(
label = "3_prime_UTR_intron_variant"), `SO:0002091` = list(
label = "5_prime_UTR_intron_variant")), defaults = list(
label = NA_character_)), renderInfo = new("renderInfo",
nodes = list(), edges = list(), graph = list(), pars = list()),
graphData = list(edgemode = "directed"))
Each SO:000XXX corresponds to a name, and I can find the names using the function nodeData, that returns a named list:
nodeData(g, nodes(g), "label")
$`SO:0001968`
[1] "coding_transcript_variant"
$`SO:0001969`
[1] "coding_transcript_intron_variant"
$`SO:0001622`
[1] "UTR_variant"
$`SO:0001623`
[1] "5_prime_UTR_variant"
$`SO:0001624`
[1] "3_prime_UTR_variant"
$`SO:0002090`
[1] "3_prime_UTR_intron_variant"
$`SO:0002091`
[1] "5_prime_UTR_intron_variant"
What I need is to replace (or rename) the data in the tree list with the corresponding string of the nodeData function.
For example, replace the 'SO:0001968' in the tree list for coding_transcript_variant from the nodeData function.

This recursive function should do the trick :
# you will do this but I couldn't install your packages
# nodeD <- nodeData(g, nodes(g), "label")
nodeD <- list(`SO:0001968` = "coding_transcript_variant",
`SO:0001969` = "coding_transcript_intron_variant",
`SO:0001622` = "UTR_variant",
`SO:0001623` = "5_prime_UTR_variant",
`SO:0001624` = "3_prime_UTR_variant",
`SO:0002090` = "3_prime_UTR_intron_variant",
`SO:0002091` = "5_prime_UTR_intron_variant")
rename_items <- function(item){
if (is.list(item)){
item <- lapply(item,rename_items)
names(item) <- unname(nodeD[names(item)])
}
item
}
tree2 <- rename_items(tree)
Result
# $coding_transcript_variant
# $coding_transcript_variant$UTR_variant
# $coding_transcript_variant$UTR_variant$`3_prime_UTR_variant`
# $coding_transcript_variant$UTR_variant$`3_prime_UTR_variant`$`3_prime_UTR_intron_variant`
# [1] 1
#
#
# $coding_transcript_variant$UTR_variant$`5_prime_UTR_variant`
# $coding_transcript_variant$UTR_variant$`5_prime_UTR_variant`$`5_prime_UTR_intron_variant`
# [1] 1
#
#
#
# $coding_transcript_variant$coding_transcript_intron_variant
# $coding_transcript_variant$coding_transcript_intron_variant$`3_prime_UTR_intron_variant`
# [1] 1
#
# $coding_transcript_variant$coding_transcript_intron_variant$`5_prime_UTR_intron_variant`
# [1] 1

If you save the output from nodeData() to a vector, you can use the names() function to assign the names to a list().
An example of assigning names to list elements:
x <- 1:5
y <- 11:20
z <- 21:25
theList <- list(x,y,z)
listNames <- c("element1","element2","element3")
names(theList) <- listNames
# access first element by name, using $ form of extract operator
theList$element1
...and the output:
> theList$element1
[1] 1 2 3 4 5
>
You may need to unlist() the output of nodeData() as follows:
theNames <- unlist(nodeData(g, nodes(g), "label"))
names(g) <- theNames

Related

how to make two networks connected with selected members

I have a data like this
df<- structure(list(Core = c("Bestman", "Tetra"), member1 = c("Tera1",
"Brownie1"), member2 = c("Tera2", "Brownie2"), member3 = c("Tera3",
"Brownie3"), member4 = c("Tera4", "Brownie4"), member5 = c("Tera5",
"Brownie5"), member6 = c("", "Brownie6"), member7 = c("", "Brownie7"
)), class = "data.frame", row.names = c(NA, -2L))
I want to connect all the members to their Core. for example if you look at the first row, you can see there are 5 members , I want to connect them to their Core
The same for the second row
Then I connect both Core together
Here is what I have done
mydf <- crossprod(table(cbind(df[1], stack(df[-1]))[-3]))
graph_from_adjacency_matrix(mydf, diag = F, weighted = T, mode = "undirected") %>%
plot(edge.width = E(.)$weight)
If i understood correctly, what you want is:
library(igraph)
df<- data.frame(Core = c("Bestman", "Tetra"), member1 = c("Tera1",
"Brownie1"), member2 = c("Tera2", "Brownie2"), member3 = c("Tera3",
"Brownie3"), member4 = c("Tera4", "Brownie4"), member5 = c("Tera5",
"Brownie5"), member6 = c("", "Brownie6"), member7 = c("", "Brownie7"))
edges <- t(do.call(rbind, apply(
df, 1, function(x) cbind(x[1], x[x!=""][-1]))))
core_edges <- if(nrow(df)>1) combn(df$Core,2) else c()
g<-graph(c(edges,core_edges), directed=F )
plot(g , edge.width = E(g)$weight)
EDIT
To colorize and resize nodes:
V(g)$color <- apply(df, 1, \(x) names(V(g)) %in% x) |> apply(1,which)
V(g)$size <- 15
V(g)[df$Core]$size <- degree(g, V(g)[df$Core]) + 15
plot(g)

Adding a new value into a nested list in R

I am using Highcharts for visualization and Highcharter generates charts as a nested list similar to the example_list below. I'm summarizing it here because the original list is much longer and complex.
example_list <- list(
x = list(
hc_opts = list(
series = list(
list(group = "group_a", data = list(0,2,4,6)),
list(group = "group_b", data = list(0,3,6,9)),
list(group = "group_c", data = list(9,4,8,12))))))
Just like the group and data nodes, I would like to add an element called type at the level of group and data, but only to the 1st and 3rd elements under series. I'm basically looking for an output same as the following:
example_list <- list(
x = list(
hc_opts = list(
series = list(
list(group = "group_a", data = list(0,2,4,6), type = "type_X"),
list(group = "group_b", data = list(0,3,6,9)),
list(group = "group_c", data = list(9,4,8,12), type = "type_Y")))))
I can do this with a for-loop (because I have vectors for locations and types), but
there should be an elegant way of coding it. My best attempt so far.
locations <- c(1,3)
types <- c("type_X","type_Y")
for(i in 1:length(locations)) {
example_list[["x"]][["hc_opts"]][["series"]][[locations[i]]][["type"]] <- types[i]
}
We can use Map :
example_list$x$hc_opts$series[c(1, 3)] <- Map(c,
example_list$x$hc_opts$series[c(1, 3)], type = types)
#$x
#$x$hc_opts
#$x$hc_opts$series
#$x$hc_opts$series[[1]]
#$x$hc_opts$series[[1]]$group
#[1] "group_a"
#$x$hc_opts$series[[1]]$data
#$x$hc_opts$series[[1]]$data[[1]]
#[1] 0
#$x$hc_opts$series[[1]]$data[[2]]
#[1] 2
#$x$hc_opts$series[[1]]$data[[3]]
#[1] 4
#$x$hc_opts$series[[1]]$data[[4]]
#[1] 6
#$x$hc_opts$series[[1]]$type
#[1] "type_X"
#...
#...

Create a script with 2 vectors in R

I'm using Heatmap from the package complexheatmap
in the script, I need to create a variable ha_column that I will incorporate into my script.
ha_column = HeatmapAnnotation (df = data.frame(type1=c(rep("name1",5), rep("name2",5),rep("name3",5), col = list(type1=c("name1" = "#DCDCDC", "name2" = "#DC928B", "name2"="#BA72D3")))))
I have 2 vectors:
vectors1=c("name1","name2","name3)
vectors2=c("#DCDCDC","#DC928B","#BA72D3")
and the idea is to reproduce the above script with these two vectors.
I tried:
paste0("ha_column = HeatmapAnnotation(df = data.frame(type1 = c(rep(",vectors1,", 5),col = list(type1 = c(",vectors1,"=",vectors2,")))")
bu it only paste line by line such as:
[1] "ha_column = HeatmapAnnotation(df = data.frame(type1 = c(rep(name1, 5),col = list(type1 = c(name1=#DCDCDC)))"
[2] "ha_column = HeatmapAnnotation(df = data.frame(type1 = c(rep(name2, 5),col = list(type1 = c(name2=#DC928B)))"
[3] "ha_column = HeatmapAnnotation(df = data.frame(type1 = c(rep(name3, 5),col = list(type1 = c(name3=#BA72D3)))"
instead of doing what I want ...
Does anyone have an idea?
Thanks for your time.
It's generally not a good idea to build code as a string. Instead think of building a function to do what you want.
You could do something line
ha_column_fun = function(names, colors) {
HeatmapAnnotation(
df = data.frame(type1 = rep(names, each=5)),
col = list(type1=setNames(colors, names))
)
}
And then you could call it with
ha_column = ha_column_fun(vectors1, vectors2)

Remove all rows tha contain values below 0.10 (in all row) in R

This is my matrix:
x<-structure(list(Sample_250 = list(`ITUB4~time+ITSA4` = 0.0189772705000679,
`ITSA4~time+ITUB4` = 0.0172247829378391, `KROT3~time+ESTC3` = 0.362976295896543,
`ESTC3~time+KROT3` = 0.919654541750147, `ELET6~time+ELET3` = 0.563149047013394,
`ELET3~time+ELET6` = 0.938978962441099, `VALE5~time+BRAP4` = 0.00879735041567956,
`BRAP4~time+VALE5` = 0.00327639807633581, `RSID3~time+PDGR3` = 0.537991430220927,
`PDGR3~time+RSID3` = 0.246554103682342, `PDGR3~time+BISA3` = 0.559254391144534,
`BISA3~time+PDGR3` = 0.61031816244403, `VALE5~time+VALE3` = 0.180842743583616,
`VALE3~time+VALE5` = 0.66647273985911, `BRPR3~time+BRML3` = 0.338499489464644,
`BRML3~time+BRPR3` = 0.319063657443075, `PETR4~time+PETR3` = 0.125540460125629,
`PETR3~time+PETR4` = 0.124801328997536, `DTEX3~time+CSAN3` = 0.93868928574058,
`CSAN3~time+DTEX3` = 0.237699406950144, `RSID3~time+BISA3` = 0.449718913669525,
`BISA3~time+RSID3` = 0.7561632200477, `ELPL4~time+ELET3` = 0.174294574975377,
`ELET3~time+ELPL4` = 0.300066723578605, `EVEN3~time+CSAN3` = 0.734452997271797,
`CSAN3~time+EVEN3` = 0.104402290451259, `KROT3~time+CIEL3` = 0.93683315998679,
`CIEL3~time+KROT3` = 0.936544198858508, `MRFG3~time+BISA3` = 0.588077047082012,
`BISA3~time+MRFG3` = 0.241408284405396), Sample_220 = list(
`ITUB4~time+ITSA4` = 0.0173697888550166, `ITSA4~time+ITUB4` = 0.0149942952128483,
`KROT3~time+ESTC3` = 0.482794731209648, `ESTC3~time+KROT3` = 0.890472799194387,
`ELET6~time+ELET3` = 0.289262231792853, `ELET3~time+ELET6` = 0.583772170805346,
`VALE5~time+BRAP4` = 0.0115132699560557, `BRAP4~time+VALE5` = 0.00454387128721931,
`RSID3~time+PDGR3` = 0.701361295124465, `PDGR3~time+RSID3` = 0.276392398580336,
`PDGR3~time+BISA3` = 0.459917895151059, `BISA3~time+PDGR3` = 0.932334809205404,
`VALE5~time+VALE3` = 0.228621489426817, `VALE3~time+VALE5` = 0.599616896543261,
`BRPR3~time+BRML3` = 0.423214373690621, `BRML3~time+BRPR3` = 0.43367402957197,
`PETR4~time+PETR3` = 0.0726218638061883, `PETR3~time+PETR4` = 0.0684556705423691,
`DTEX3~time+CSAN3` = 0.957213428702438, `CSAN3~time+DTEX3` = 0.643249328242026,
`RSID3~time+BISA3` = 0.140702283930701, `BISA3~time+RSID3` = 0.438759561659429,
`ELPL4~time+ELET3` = 0.108415504373493, `ELET3~time+ELPL4` = 0.259235741006097,
`EVEN3~time+CSAN3` = 0.995097190780355, `CSAN3~time+EVEN3` = 0.35833286961364,
`KROT3~time+CIEL3` = 0.883381800410008, `CIEL3~time+KROT3` = 0.58096328992918,
`MRFG3~time+BISA3` = 0.811273794794714, `BISA3~time+MRFG3` = 0.162511686203042),
Sample_200 = list(`ITUB4~time+ITSA4` = 0.0269410475431228,
`ITSA4~time+ITUB4` = 0.0268281043283851, `KROT3~time+ESTC3` = 0.648973944293657,
`ESTC3~time+KROT3` = 0.843925839073412, `ELET6~time+ELET3` = 0.85074648265282,
`ELET3~time+ELET6` = 0.926090646237098, `VALE5~time+BRAP4` = 0.0298988391464108,
`BRAP4~time+VALE5` = 0.0210534678726486, `RSID3~time+PDGR3` = 0.913261323047721,
`PDGR3~time+RSID3` = 0.460744060168818, `PDGR3~time+BISA3` = 0.681848278084124,
`BISA3~time+PDGR3` = 0.700508228924671, `VALE5~time+VALE3` = 0.404824931817606,
`VALE3~time+VALE5` = 0.858492744479535, `BRPR3~time+BRML3` = 0.282313695830455,
`BRML3~time+BRPR3` = 0.421361074266136, `PETR4~time+PETR3` = 0.0389941410401918,
`PETR3~time+PETR4` = 0.0366363568643157, `DTEX3~time+CSAN3` = 0.593381022274927,
`CSAN3~time+DTEX3` = 0.296186622367649, `RSID3~time+BISA3` = 0.136337062156413,
`BISA3~time+RSID3` = 0.253647313739565, `ELPL4~time+ELET3` = 0.0404140463603602,
`ELET3~time+ELPL4` = 0.0584026420525388, `EVEN3~time+CSAN3` = 0.992224496682121,
`CSAN3~time+EVEN3` = 0.364016491282029, `KROT3~time+CIEL3` = 0.923443434909376,
`CIEL3~time+KROT3` = 0.492267643047159, `MRFG3~time+BISA3` = 0.505439622239642,
`BISA3~time+MRFG3` = 0.433741779126583), Sample_180 = list(
`ITUB4~time+ITSA4` = 0.0709729806619366, `ITSA4~time+ITUB4` = 0.0703318148854131,
`KROT3~time+ESTC3` = 0.714222637099451, `ESTC3~time+KROT3` = 0.983192555139107,
`ELET6~time+ELET3` = 0.651446390753224, `ELET3~time+ELET6` = 0.504251519490735,
`VALE5~time+BRAP4` = 0.0655201102796135, `BRAP4~time+VALE5` = 0.064459649024225,
`RSID3~time+PDGR3` = 0.966515813873172, `PDGR3~time+RSID3` = 0.353225059948276,
`PDGR3~time+BISA3` = 0.819582167704402, `BISA3~time+PDGR3` = 0.457403474593761,
`VALE5~time+VALE3` = 0.834891076683459, `VALE3~time+VALE5` = 0.624305154223115,
`BRPR3~time+BRML3` = 0.338684631277372, `BRML3~time+BRPR3` = 0.645983354906404,
`PETR4~time+PETR3` = 0.016615774081754, `PETR3~time+PETR4` = 0.0165629129043023,
`DTEX3~time+CSAN3` = 0.642061011299162, `CSAN3~time+DTEX3` = 0.424690135396935,
`RSID3~time+BISA3` = 0.101897354576195, `BISA3~time+RSID3` = 0.204241392846169,
`ELPL4~time+ELET3` = 0.0729734425567139, `ELET3~time+ELPL4` = 0.128996393897499,
`EVEN3~time+CSAN3` = 0.899884399768484, `CSAN3~time+EVEN3` = 0.146722568327017,
`KROT3~time+CIEL3` = 0.830125914939971, `CIEL3~time+KROT3` = 0.567087012782755,
`MRFG3~time+BISA3` = 0.122725171728208, `BISA3~time+MRFG3` = 0.459448430490008)), row.names = c("ITUB4~time+ITSA4",
"ITSA4~time+ITUB4", "KROT3~time+ESTC3", "ESTC3~time+KROT3", "ELET6~time+ELET3",
"ELET3~time+ELET6", "VALE5~time+BRAP4", "BRAP4~time+VALE5", "RSID3~time+PDGR3",
"PDGR3~time+RSID3", "PDGR3~time+BISA3", "BISA3~time+PDGR3", "VALE5~time+VALE3",
"VALE3~time+VALE5", "BRPR3~time+BRML3", "BRML3~time+BRPR3", "PETR4~time+PETR3",
"PETR3~time+PETR4", "DTEX3~time+CSAN3", "CSAN3~time+DTEX3", "RSID3~time+BISA3",
"BISA3~time+RSID3", "ELPL4~time+ELET3", "ELET3~time+ELPL4", "EVEN3~time+CSAN3",
"CSAN3~time+EVEN3", "KROT3~time+CIEL3", "CIEL3~time+KROT3", "MRFG3~time+BISA3",
"BISA3~time+MRFG3"), class = "data.frame")
1º Question) I want to remove all rows that contain values bellow 0.10. It is necessary that values bellow 0.10 belongs for the 4 columns
2º Question) I want to remove all rows that contain values bellow 0.10 on the first 3 columns.
I tried this:
x[x[1:nrow(x),]<.10,]
Is it possible to do this with a basic function in R?
Any help ?
Thanks
Try for question 1 x[!apply(x, 1, function(x) any(x < .10)), ]
Sample_250 Sample_220 Sample_200 Sample_180
KROT3~time+ESTC3 0.3629763 0.4827947 0.6489739 0.7142226
ESTC3~time+KROT3 0.9196545 0.8904728 0.8439258 0.9831926
ELET6~time+ELET3 0.563149 0.2892622 0.8507465 0.6514464
ELET3~time+ELET6 0.938979 0.5837722 0.9260906 0.5042515
RSID3~time+PDGR3 0.5379914 0.7013613 0.9132613 0.9665158
PDGR3~time+RSID3 0.2465541 0.2763924 0.4607441 0.3532251
PDGR3~time+BISA3 0.5592544 0.4599179 0.6818483 0.8195822
BISA3~time+PDGR3 0.6103182 0.9323348 0.7005082 0.4574035
VALE5~time+VALE3 0.1808427 0.2286215 0.4048249 0.8348911
VALE3~time+VALE5 0.6664727 0.5996169 0.8584927 0.6243052
BRPR3~time+BRML3 0.3384995 0.4232144 0.2823137 0.3386846
BRML3~time+BRPR3 0.3190637 0.433674 0.4213611 0.6459834
DTEX3~time+CSAN3 0.9386893 0.9572134 0.593381 0.642061
CSAN3~time+DTEX3 0.2376994 0.6432493 0.2961866 0.4246901
RSID3~time+BISA3 0.4497189 0.1407023 0.1363371 0.1018974
BISA3~time+RSID3 0.7561632 0.4387596 0.2536473 0.2042414
EVEN3~time+CSAN3 0.734453 0.9950972 0.9922245 0.8998844
CSAN3~time+EVEN3 0.1044023 0.3583329 0.3640165 0.1467226
KROT3~time+CIEL3 0.9368332 0.8833818 0.9234434 0.8301259
CIEL3~time+KROT3 0.9365442 0.5809633 0.4922676 0.567087
MRFG3~time+BISA3 0.588077 0.8112738 0.5054396 0.1227252
BISA3~time+MRFG3 0.2414083 0.1625117 0.4337418 0.4594484
For question 2: x[!apply(x[, 1:3], 1, function(x) any(x < .10)), ]
Sample_250 Sample_220 Sample_200 Sample_180
KROT3~time+ESTC3 0.3629763 0.4827947 0.6489739 0.7142226
ESTC3~time+KROT3 0.9196545 0.8904728 0.8439258 0.9831926
ELET6~time+ELET3 0.563149 0.2892622 0.8507465 0.6514464
ELET3~time+ELET6 0.938979 0.5837722 0.9260906 0.5042515
RSID3~time+PDGR3 0.5379914 0.7013613 0.9132613 0.9665158
PDGR3~time+RSID3 0.2465541 0.2763924 0.4607441 0.3532251
PDGR3~time+BISA3 0.5592544 0.4599179 0.6818483 0.8195822
BISA3~time+PDGR3 0.6103182 0.9323348 0.7005082 0.4574035
VALE5~time+VALE3 0.1808427 0.2286215 0.4048249 0.8348911
VALE3~time+VALE5 0.6664727 0.5996169 0.8584927 0.6243052
BRPR3~time+BRML3 0.3384995 0.4232144 0.2823137 0.3386846
BRML3~time+BRPR3 0.3190637 0.433674 0.4213611 0.6459834
DTEX3~time+CSAN3 0.9386893 0.9572134 0.593381 0.642061
CSAN3~time+DTEX3 0.2376994 0.6432493 0.2961866 0.4246901
RSID3~time+BISA3 0.4497189 0.1407023 0.1363371 0.1018974
BISA3~time+RSID3 0.7561632 0.4387596 0.2536473 0.2042414
EVEN3~time+CSAN3 0.734453 0.9950972 0.9922245 0.8998844
CSAN3~time+EVEN3 0.1044023 0.3583329 0.3640165 0.1467226
KROT3~time+CIEL3 0.9368332 0.8833818 0.9234434 0.8301259
CIEL3~time+KROT3 0.9365442 0.5809633 0.4922676 0.567087
MRFG3~time+BISA3 0.588077 0.8112738 0.5054396 0.1227252
BISA3~time+MRFG3 0.2414083 0.1625117 0.4337418 0.4594484
Does this do what you want?
In regards to question 1:
cond1 <- apply(x[,1:3] < 0.1, 1, any)
y <- x[!cond1, ]
head(x)
# Sample_250 Sample_220 Sample_200 Sample_180
#ITUB4~time+ITSA4 0.01897727 0.01736979 0.02694105 0.07097298
#ITSA4~time+ITUB4 0.01722478 0.0149943 0.0268281 0.07033181
#KROT3~time+ESTC3 0.3629763 0.4827947 0.6489739 0.7142226
#ESTC3~time+KROT3 0.9196545 0.8904728 0.8439258 0.9831926
#ELET6~time+ELET3 0.563149 0.2892622 0.8507465 0.6514464
#ELET3~time+ELET6 0.938979 0.5837722 0.9260906 0.5042515
In regards to question 2:
cond2 <- apply(x < 0.1, 1, all)
z <- x[!cond2, ]
head(y)
# Sample_250 Sample_220 Sample_200 Sample_180
#ITUB4~time+ITSA4 0.01897727 0.01736979 0.02694105 0.07097298
#ITSA4~time+ITUB4 0.01722478 0.0149943 0.0268281 0.07033181
#KROT3~time+ESTC3 0.3629763 0.4827947 0.6489739 0.7142226
#ESTC3~time+KROT3 0.9196545 0.8904728 0.8439258 0.9831926
#ELET6~time+ELET3 0.563149 0.2892622 0.8507465 0.6514464
#ELET3~time+ELET6 0.938979 0.5837722 0.9260906 0.5042515
For the first question:
subset(x, apply(x, 1, function(x) all(x > 0.1)) == TRUE)
For the second one:
subset(x, apply(x[, 1:3], 1, function(x) all(x > 0.1)) == TRUE)

How to prepare input data for a sankey diagrams in R?

I am trying to produce a sankey diagram in R, which is also referred as a river plot. I've seen this question Sankey Diagrams in R? where a broad variaty of packages producing sankey diagrams are listed. Since I have input data and know different tools/packages I can produce such diagram BUT my euqestion is: how can I prepare input data for such?
Let's assume we would like to present how users have migrated between various states over 10 days and have start data set like the one below:
data.frame(userID = 1:100,
day1_state = sample(letters[1:8], replace = TRUE, size = 100),
day2_state = sample(letters[1:8], replace = TRUE, size = 100),
day3_state = sample(letters[1:8], replace = TRUE, size = 100),
day4_state = sample(letters[1:8], replace = TRUE, size = 100),
day5_state = sample(letters[1:8], replace = TRUE, size = 100),
day6_state = sample(letters[1:8], replace = TRUE, size = 100),
day7_state = sample(letters[1:8], replace = TRUE, size = 100),
day8_state = sample(letters[1:8], replace = TRUE, size = 100),
day9_state = sample(letters[1:8], replace = TRUE, size = 100),
day10_state = sample(letters[1:8], replace = TRUE, size = 100)
) -> dt
Now if one would like to create a sankey diagram with networkD3 package how should one tranform this dt data.frame into required input
so that we would have input like from this example
library(networkD3)
URL <- paste0(
"https://cdn.rawgit.com/christophergandrud/networkD3/",
"master/JSONdata/energy.json")
Energy <- jsonlite::fromJSON(URL)
# Plot
sankeyNetwork(Links = Energy$links, Nodes = Energy$nodes, Source = "source",
Target = "target", Value = "value", NodeID = "name",
units = "TWh", fontSize = 12, nodeWidth = 30)
EDIT
I have found such script which prepares data in other situation and reproduced it so I assume it might be closed now:
https://github.com/mi2-warsaw/JakOniGlosowali/blob/master/sankey/sankey.R
I have found such script which prepares data in other situation and reproduced it so I assume it might be closed now:
https://github.com/mi2-warsaw/JakOniGlosowali/blob/master/sankey/sankey.R
Then this code generates such sankey diagram for mentioned in question data.frame
fixtable <- function(...) {
tab <- table(...)
if (substr(colnames(tab)[1],1,1) == "_" &
substr(rownames(tab)[1],1,1) == "_") {
tab2 <- tab
colnames(tab2) <- sapply(strsplit(colnames(tab2), split=" "), `[`, 1)
rownames(tab2) <- sapply(strsplit(rownames(tab2), split=" "), `[`, 1)
tab2[1,1] <- 0
# mandat w klubie
for (par in names(which(tab2[1,] > 0))) {
delta = min(tab2[par, 1], tab2[1, par])
tab2[par, par] = tab2[par, par] + delta
tab2[1, par] = tab2[1, par] - delta
tab2[par, 1] = tab2[par, 1] - delta
}
# przechodzi przez niezalezy
for (par in names(which(tab2[1,] > 0))) {
tab2["niez.", par] = tab2["niez.", par] + tab2[1, par]
tab2[1, par] = 0
}
for (par in names(which(tab2[,1] > 0))) {
tab2[par, "niez."] = tab2[par, "niez."] + tab2[par, 1]
tab2[par, 1] = 0
}
tab[] <- tab2[]
}
tab
}
flow2 <- rbind(
data.frame(fixtable(z = paste0(dat$day1_state, " day1"), do = paste0(dat$day2_state, " day2"))),
data.frame(fixtable(z = paste0(dat$day2_state, " day2"), do = paste0(dat$day3_state, " day3"))),
data.frame(fixtable(z = paste0(dat$day3_state, " day3"), do = paste0(dat$day4_state, " day4"))),
data.frame(fixtable(z = paste0(dat$day4_state, " day4"), do = paste0(dat$day5_state, " day5"))),
data.frame(fixtable(z = paste0(dat$day5_state, " day5"), do = paste0(dat$day6_state, " day6"))),
data.frame(fixtable(z = paste0(dat$day6_state, " day6"), do = paste0(dat$day7_state, " day7"))),
data.frame(fixtable(z = paste0(dat$day7_state, " day7"), do = paste0(dat$day8_state, " day8"))),
data.frame(fixtable(z = paste0(dat$day8_state, " day8"), do = paste0(dat$day9_state, " day9"))),
data.frame(fixtable(z = paste0(dat$day9_state, " day9"), do = paste0(dat$day10_state, " day10"))))
flow2 <- flow2[flow2[,3] > 0,]
nodes2 <- data.frame(name=unique(c(levels(factor(flow2[,1])), levels(factor(flow2[,2])))))
nam2 <- seq_along(nodes2[,1])-1
names(nam2) <- nodes2[,1]
links2 <- data.frame(source = nam2[as.character(flow2[,1])],
target = nam2[as.character(flow2[,2])],
value = flow2[,3])
sankeyNetwork(Links = links, Nodes = nodes,
Source = "source", Target = "target",
Value = "value", NodeID = "name",
fontFamily = "Arial", fontSize = 12, nodeWidth = 40,
colourScale = "d3.scale.category20()")
I asked a similar question while ago. And I guess I better post it here how it can be done with the tidyverse magic.
library(ggplot2)
library(ggalluvial)
library(tidyr)
library(dplyr)
library(stringr)
# The actual data preperation happens here
dt_new <- dt %>%
gather(day, state, -userID) %>% # Long format
mutate(day = str_match(day, "[0-9]+")[,1]) %>% # Get the numbers
mutate(day = as.integer(day), # Convert to proper data types
state = as.factor(state))
Here is how the data dt_new looks like
userID day state
1 1 1 d
2 2 1 d
3 3 1 g
4 4 1 a
5 5 1 a
6 6 1 d
7 7 1 d
8 8 1 b
9 9 1 d
10 10 1 e
...
Now plotting the Sankey plot:
ggplot(dt_new,
aes(x = day, stratum = state, alluvium = userID, fill = state, label = state)) +
geom_stratum() +
geom_text(stat = "stratum") +
geom_flow()
Here is the output

Resources