R create Random Networks according data frame - r

I have a data frame "ref.df" that has info about 12 networks. I want to create 100 random networks for each subject according their node and edge numbers.
I've tried this code but it didn't work well:
library(igraph)
random.networks <- list()
for(i in ref.df$subject){
cat("...")
for( j in 1:100){
random.networks[[j]] <- sample_gnm(n=ref.df$node,m=ref.df$edge, directed = TRUE, loops = FALSE)
}
cat(i,"\n")
}
This code generate 100 random networks only for the first subject.
Thanks for your time and advice in advance.
You can reproduce my data frame:
ref.df <- data.frame(subject=c("Civil.Liberties","Foreign.Policy","Women.s.Rights","Workers..Rights",
"Political.Polarisation","Kurdish.Peace.Process","Parallel.State",
"HDP.Election.Slogans","Related.With.Election","CHP.Election.Slogans",
"AKP.Election.Slogans","MHP.Election.Slogans"),
group=c(298,1150,474,2522,0,2570,718,2736,0,1661,2175,1460),
mod=c(0.77,0.73,0.84,0.78,0,0.72,0.66,0.62,0,0.68,0.76,0.66),
node=c(13524,68792,21925,87094,195678,98008,28499,93024,201342,61539,91640,63035),
edge=c(18694,183932,27120,143032,710044,249267,108352,255615,579919,17590,3313147,213367))

If the problem is that you want 12 x 100 networks and you are only getting a list of 100, with a minimal modification to your code, you can do as follows:
random.networks <- list()
for (subj in ref.df$subject){
cat("...")
for (i in 1:100) {
tmp <- sample_gnm(n=ref.df$node[ref.df$subject == subj],
m=ref.df$edge[ref.df$subject == subj],
directed = TRUE, loops = FALSE)
random.networks[[(length(random.networks) + 1)]] <- tmp
names(random.networks)[length(random.networks)] <- paste(as.vector(subj), i, sep = "_")
}
cat(as.vector(subj),"\n")
}
random.networks
If you want to make sure that the random networks you generate are different, you may want trying the following approach, but because of the cross comparisons, this will be very slow.
random.networks <- list()
look.up <- list()
for (subj in ref.df$subject){
cat("...")
for (i in 1:100) {
tmp <- NA
# enforce uniqueness
while(is.na(tmp)|
as.character(tmp)[4] %in% look.up) {
tmp <- sample_gnm(n=ref.df$node[ref.df$subject == subj],
m=ref.df$edge[ref.df$subject == subj],
directed = TRUE, loops = FALSE)
}
random.networks[[(length(random.networks) + 1)]] <- tmp
look.up[[(length(look.up) + 1)]] <- as.character(tmp)[4]
names(random.networks)[length(random.networks)] <- paste(as.vector(subj), i, sep = "_")
}
cat(as.vector(subj),"\n")
}

Related

Loop over a list in R

I want to do an operation if each data frame of a list. I want to perform the Kolmogorov–Smirnov (KS) test for one column in each data frame. I am using the code below but it is not working:
PDF_mean <- matrix(nrow = length(siteNumber), ncol = 4)
PDF_mean <- data.frame(PDF_mean)
names(PDF_mean) <- c("station","normal","gamma","gev")
listDF <- mget(ls(pattern="DSF_moments_"))
length(listDF)
i <- 1
for (i in length(listDF)) {
PDF_mean$station[i] <- siteNumber[i]
PDF_mean$normal[i] <- ks.test(list[i]$mean,"pnorm")$p.value
PDF_mean$gev[i] <- ks.test(list[i]$mean,"pgev")$p.value
PDF_mean$gamma[i] <- ks.test(list[i]$mean,"gamma")$p.value
}
Any help?
It is not length(listDF) instead, it would be seq_along(listDF) or 1:length(listDF) (however, it is more appropriate with seq_along) because length is a single value and it is not doing any loop
for(i in seq_along(listDF)) {
PDF_mean$station[i] <- listDF[[i]]$siteNumber
PDF_mean$normal[i] <- ks.test(listDF[[i]]$mean,"pnorm")$p.value
PDF_mean$gev[i] <- ks.test(listDF[[i]]$mean,"pgev")$p.value
PDF_mean$gamma[i] <- ks.test(listDF[[i]]$mean,"gamma")$p.value
}

Vectorized calculation of adjacency matrix

I have the following function:
CFC_GLM <- function(data, frequency_bins){
adj_mat <- matrix(0, nrow = dim(data)[1], ncol = dim(data)[1])
bf_filters <- list()
combs <- combinations(length(frequency_bins), 2, repeats.allowed = T)
all_adj_mat <- list()
for(z in 1:length(frequency_bins)){
bf_filters[[z]] <- butter(3, c(frequency_bins[[z]][1]/1200,
frequency_bins[[z]][2]/1200), type = "pass")
}
for(f in 1:nrow(combs)){
for(i in 1:dim(data)[1]){
for(j in 1:dim(data)[1]){
sensor_1 <- data[i,]
sensor_2 <- data[j,]
sensor_1_filt = filtfilt(bf_filters[[combs[f,1]]], sensor_1)
sensor_2_filt = filtfilt(bf_filters[[combs[f,2]]], sensor_2)
a_y <- abs(hilbert(sensor_2_filt, 1200))
a_x <- abs(hilbert(sensor_1_filt, 1200))
theta_x <- angle(hilbert(sensor_1_filt, 1200)) %% 2*pi
a_x_norm <- (a_x - mean(a_x))/std(a_x)
a_y_norm <- (a_y - mean(a_y))/std(a_y)
theta_x_norm <- (theta_x - mean(theta_x))/std(theta_x)
fit <- lm(a_y_norm ~ sin(theta_x_norm) + cos(theta_x_norm) +
a_x_norm)
summ <- summary(fit)
r <- sqrt(summ$r.squared)
adj_mat[i,j] <- r
}
}
all_adj_mat[[f]] <- adj_mat
}
return(all_adj_mat)
}
Just to summarize, the function takes a matrix of signals (246 sensors by 2400 samples), performs some signal processing, and then performs a GLM between every possible pairs of sensors. This process is repeated for 4 frequency bandwidths and their combinations (within and cross-frequency coupling). Right now, this code seems terribly inefficient and takes a really long time to run. Is there a way to vectorize/parallelize this function? I have researched this question extensively and cannot seem to find an answer.
I am not sure whether to make some of the tasks within the function parallel or just make the whole function able to be called by parApply (vectorized). My intuition is the latter but I am not sure how to approach this. Any help is greatly appreciated.
Reproducible Example
test_data <- c(-347627.104358097, 821947.421444641, 496824.676355433,
-178091.364312102, -358842.250713998, 234666.210462063,
-1274153.04141668,
1017066.42839987, -158388.137875357, 191691.279588641,
-16231.2106151229,
378249.600546794, 1080850.88212858, -688841.640871254,
-616713.991288002,
639401.465180969, -1625802.44142751, 472370.867686569,
-631863.239075449,
-598755.248911174, 276422.966753179, -44010.9403226763,
1569374.08537143,
-1138797.2585617, -824232.849278583, 955783.332556046,
-1943384.98409094,
-54443.829280377, -1040354.44654998, -1207674.05255178,
496481.331429747,
-417435.356472725, 1886817.1254085, -1477199.59091112,
-947353.716505171,
1116336.49812969, -2173805.84111182, -574875.152250742,
-1343996.2219146,
-1492260.06197604, 626856.67540728, -713761.48191904, 1987730.27341334,
-1673384.77863935, -968522.886481198, 1089458.71433614,
-2274932.19262517,
-1096749.79392427, -1520842.86946059, -1390794.61065106,
669864.477272507,
-906096.822125892, 1863506.59188299, -1720956.06310511,
-889359.420058576,
885300.628410276, -2224340.54992297, -1619386.88041896,
-1570131.07127786,
-934848.556063722, 644671.113108699, -973418.329437102,
1541962.53750178,
-1636863.31666018, -728992.972371437, 551297.997356909,
-2026413.5471505,
-2129730.49230266, -1511423.25789691, -236962.889589694,
580683.399845852,
-906261.700784793, 1080101.95011954, -1455931.89179814,
-518630.187846405,
158846.288141661, -1715610.22092989, -2601349.5081924,
-1380068.64260811,
541310.557194977, 509125.333244057, -711696.682554995,
551748.792106809,
-1222430.29467688, -293847.487823853, -215078.751157158,
-1354005.89576504,
-2997647.23289805, -1220136.14918605, 1231169.98678596,
455388.081391798,
-415489.975542684, 32724.7895795912, -980848.930757441,
-86618.5594163355,
-506333.915891838, -1022235.58829567, -3279232.01820961,
-1076344.95091665,
1696655.88400158), .Dim = c(10L, 10L))
frequency_bins <- list(band1 = c(2,4), band2 = c(4,12), band3 =
c(12,30), band4 = c(30,100))
system.time(test_result <- CFC_GLM(test_data, frequency_bins))
user system elapsed
1.839 0.009 1.849
I'm not sure how to include the result in a manageable way. Sorry for the naivety. This is only with 10 sensors by 10 samples, to have a manageable test set.
Right off the bat I would suggest predeclaring the length of your lists.
bf_filters <- rep(list(NA), length(frequency_bins))
all_adj_mat <- rep(list(NA), nrow(combos))
#this is your function to be applied
i_j_fun <- function ( perms ) {
sensor_1_filt = filtfilt(bf_filters[[combos[f,1]]], data[perms[1],])
sensor_2_filt = filtfilt(bf_filters[[combos[f,2]]], data[persm[2],])
a_y <- abs(hilbert(sensor_2_filt, 1200))
a_x <- abs(hilbert(sensor_1_filt, 1200))
theta_x <- angle(hilbert(sensor_1_filt, 1200)) %% 2*pi
a_x_norm <- (a_x - mean(a_x))/std(a_x)
a_y_norm <- (a_y - mean(a_y))/std(a_y)
theta_x_norm <- (theta_x - mean(theta_x))/std(theta_x)
fit <- lm(a_y_norm ~ sin(theta_x_norm) + cos(theta_x_norm) +
a_x_norm)
summ <- summary(fit)
r <- sqrt(summ$r.squared)
return(r)
}
Your i and j for loops can be turned into a function and used with apply.
#perms acts like the for loop
perms <- permuations(dim(data)[1], 2, seq_len(dim(data)[1]))
for(f in 1:nrow(combs)){
all_adj_mat[[f]] <- matrix(apply(perms, 1, i_j_fun),
nrow = dim(data)[1], ncol = dim(data[2]), byrow = TRUE)
}
That should do it.

R loop to create data frames with 2 counters

What I want is to create 60 data frames with 500 rows in each. I tried the below code and, while I get no errors, I am not getting the data frames. However, when I do a View on the as.data.frame, I get the view, but no data frame in my environment. I've been trying for three days with various versions of this code:
getDS <- function(x){
for(i in 1:3){
for(j in 1:30000){
ID_i <- data.table(x$ID[j: (j+500)])
}
}
as.data.frame(ID_i)
}
getDS(DATASETNAME)
We can use outer (on a small example)
out1 <- c(outer(1:3, 1:3, Vectorize(function(i, j) list(x$ID[j:(j + 5)]))))
lapply(out1, as.data.table)
--
The issue in the OP's function is that inside the loop, the ID_i gets updated each time i.e. it is not stored. Inorder to do that we can initialize a list and then store it
getDS <- function(x) {
ID_i <- vector('list', 3)
for(i in 1:3) {
for(j in 1:3) {
ID_i[[i]][[j]] <- data.table(x$ID[j:(j + 5)])
}
}
ID_i
}
do.call(c, getDS(x))
data
x <- data.table(ID = 1:50)
I'm not sure the description matches the code, so I'm a little unsure what the desired result is. That said, it is usually not helpful to split a data.table because the built-in by-processing makes it unnecessary. If for some reason you do want to split into a list of data.tables you might consider something along the lines of
getDS <- function(x, n=5, size = nrow(x)/n, column = "ID", reps = 3) {
x <- x[1:(n*size), ..column]
index <- rep(1:n, each = size)
replicate(reps, split(x, index),
simplify = FALSE)
}
getDS(data.table(ID = 1:20), n = 5)

Running multiple iterations of K-Means with different values for number of centroids

I have a large dataset and I am trying to run a K-means cluster analysis. However, I want to repeat this with multiple iterations by changing the number of centroids. Here's what I've done so far:
# import data
week1 <- read.csv("WEEK1.csv", header = TRUE)
week2 <- read.csv("WEEK2.csv", header = TRUE)
week3 <- read.csv("WEEK3.csv", header = TRUE)
week4 <- read.csv("WEEK4.csv", header = TRUE)
data <- rbind(week1, week2, week3, week4)
# variable names
for(i in 1:50){
assign(paste("cluster", i, sep = ""), i)
}
I've spent a long time trying to figure out how to "recall" my cluster variables in a for loop so that I can do something like this:
for (i in 1:50){
cluster[i] <- kmeans(data, i, nstart = 1)
}
Any thoughts?
Maybe this could help, put the various numbers of clusters in a vector, and store the result in a list. My example is with 3 max centroids, and I'm using the mtcars dataset, due you have not posted your data.
vector <- c() # an empty vector
for(i in 1:3){ # a loop that creates the
# various n of clusters
vector[i] <- assign(paste("cluster", i, sep = ""), i)
}
Now we can create the list of kmeans:
list_k <- list() # an empty list
for (i in vector){ # fill it with the kmeans
list_k[[i]] <- kmeans(mtcars, i, nstart = 1)
}
To have access to each kmeans, you can use this:
list_k[[3]]
To have access to each element of each list, this:
list_k[[3]][1]

representing a mosaic plot as a tree plot

I want to visualize a mosaic plot in form of a tree. For example
mosaicplot(~ Sex + Age + Survived, data = Titanic, color = TRUE)
Now what I want is to represent this in a tree form where the first node
for example be sex the second node be age and at the terminal node be number of people survived. May be it should something like http://addictedtor.free.fr/graphiques/RGraphGallery.php?graph=84 where instead of p giving the number of counts.
Is there an function in R to do this or should I write it on my own by taking at a look
at the party:::plot.BinaryTree function
Here is how I managed to get what I wanted with the lovely igraph package. The code is an ugly hack. It will be great to have you suggestions
library(igraph)
rm(list=ls())
req.data <- as.data.frame(Titanic)
lookup <- c("M","F","C","A","N","Y")
names(lookup) <- c("Male","Female","Child","Adult","Yes","No")
req.data$board <- "board"
req.data$Class.m <- paste(req.data$board,req.data$Class,sep="_")
req.data$Sex.m <- paste(req.data$board,req.data$Class,req.data$Sex,
sep="_")
req.data$Age.m <- paste(req.data$board,req.data$Class,req.data$Sex,
req.data$Age,sep="_")
req.data$Survived.m <- paste(req.data$board,req.data$Class,req.data$Sex,
req.data$Age,req.data$Survived,sep="_")
tmp <- data.frame(from=
do.call("c",lapply(req.data[,c("board",
"Class.m",
"Sex.m",
"Age.m")],as.character)),
to=do.call("c",lapply(req.data[,c("Class.m",
"Sex.m",
"Age.m",
"Survived.m")],as.character)),
stringsAsFactors=FALSE)
tmp <- tmp [!duplicated(tmp ),];rownames(tmp) <- NULL
tmp$num <- unlist(lapply(strsplit(tmp$to,"_"),
FUN=function(x){
check1 <- req.data$Class==x[2]
check2 <- req.data$Sex == x[3]
check3 <- req.data$Age == x[4]
check4 <- req.data$Survived == x[5]
sum(req.data$Freq[ifelse(is.na(check1),TRUE,check1) &
ifelse(is.na(check2),TRUE,check2) &
ifelse(is.na(check3),TRUE,check3) &
ifelse(is.na(check4),TRUE,check4)])}))
g <- graph.data.frame(tmp, directed=TRUE)
V(g)$label <- unlist(lapply(strsplit(V(g)$name,"_"),
FUN=function(y){ifelse(y[length(y)] %in% names(lookup),
lookup[y[length(y)]],y[length(y)])}))
E(g)$label <- tmp$num
plot(g,layout=layout.reingold.tilford,ylim=c(1,-1),edge.arrow.size=0.5,vertex.size=7)
legend("topleft", paste(lookup ,names(lookup),sep=" : "),ncol=2,bty="n",cex=0.7)
### To find the case for crew members
tmp1 <- tmp [grepl("Crew",tmp$from),];rownames(tmp1) <- NULL
g <- graph.data.frame(tmp1, directed=TRUE)
V(g)$label <- unlist(lapply(strsplit(V(g)$name,"_"),
FUN=function(y){ifelse(y[length(y)] %in% names(lookup),
lookup[y[length(y)]],y[length(y)])}))
E(g)$label <- tmp1$num
plot(g,layout=layout.reingold.tilford,ylim=c(1,-1),edge.arrow.size=0.5)
legend("topleft", paste(lookup ,names(lookup),sep=" : "),ncol=2,bty="n",cex=0.7)
Here is the plot I generate. You can modify the vertex/edge colors/size as you want
This is pretty close and looks a lot easier to me.. I post it here in case it may be of use. First I convert the ftable to a more traditional long data frame using expand.dft https://stat.ethz.ch/pipermail/r-help/2009-January/185561.html Then I just use the plot.dendrite function from the plotrix package.
expand.dft <- function(x, var.names = NULL, freq = "Freq", ...)
{
# allow: a table object, or a data frame in frequency form
if(inherits(x, "table"))
x <- as.data.frame.table(x, responseName = freq)
freq.col <- which(colnames(x) == freq)
if (length(freq.col) == 0)
stop(paste(sQuote("freq"), "not found in column names"))
DF <- sapply(1:nrow(x),
function(i) x[rep(i, each = x[i, freq.col]), ],
simplify = FALSE)
DF <- do.call("rbind", DF)[, -freq.col]
for (i in 1:ncol(DF))
{
DF[[i]] <- type.convert(as.character(DF[[i]]), ...)
}
rownames(DF) <- NULL
if (!is.null(var.names))
{
if (length(var.names) < dim(DF)[2])
{
stop(paste("Too few", sQuote("var.names"), "given."))
} else if (length(var.names) > dim(DF)[2]) {
stop(paste("Too many", sQuote("var.names"), "given."))
} else {
names(DF) <- var.names
}
}
DF
}
library(plotrix)
r = ftable(Titanic)
plot.dendrite(makeDendrite(expand.dft(data.frame(r))))

Resources