Graph Layout in igraph - r

I am trying to gerate gexf file using igraph but unfortunatly I have a problem with layout. How can I solve it to get a good graph like second one.
First image created with 1000 nodes but second one with 500
gD <- simplify(graph.data.frame(dataSet, directed=FALSE))
# Print number of nodes and edges
#vcount(gD)
#ecount(gD)
############################################################################################
# Calculate some node properties and node similarities that will be used to illustrate
# different plotting abilities
# Calculate degree for all nodes
degAll <- igraph::degree(gD, v = V(gD), mode = "all")
# Calculate betweenness for all nodes
#tnet::betweenness_w(data.frame(V1 = dataSet$V1,V2=dataSet$V2, V3 =dataSet$V3 ))
betAll <- igraph::betweenness(gD, v = V(gD),normalized=TRUE)
betAll <- betAll
#options("scipen"=100000000)
#betweenness()
#betAll.norm <- betAll / 100000000000000
betAll.norm <-betAll
#betAll.norm <- (betAll - min(betAll))/(max(betAll) - min(betAll))
rm(betAll)
# Calculate Dice similarities between all pairs of nodes
dsAll <- similarity.dice(gD, vids = V(gD), mode = "all")
############################################################################################
# Add new node/edge attributes based on the calculated node properties/similarities
newdataSet1 <- data.frame(username = dataSet$V1,gender = dataSet$V4)
newdataSet2 <- data.frame(username = dataSet$V2,gender = dataSet$V4)
newdataSet<-rbind(newdataSet1,newdataSet2)
genderdata<-subset(newdataSet,!duplicated(newdataSet$username))
gD <- set.vertex.attribute(gD, "degree", index = V(gD), value = degAll)
gD <- set.vertex.attribute(gD, "betweenness", index = V(gD), value = betAll.norm)
gD <- set.vertex.attribute(gD, "gender", index = V(gD), value = genderdata$gender)
newdataSet1 <- data.frame(username = dataSet$V1,sentiment = dataSet$V5)
newdataSet2 <- data.frame(username = dataSet$V2,sentiment = dataSet$V5)
newdataSet<-rbind(newdataSet1,newdataSet2)
sentimentdata<-subset(newdataSet,!duplicated(newdataSet$username))
gD <- set.vertex.attribute(gD, "sentiment", index = V(gD), value = sentimentdata$sentiment)
# Check the attributes
# summary(gD)
F1 <- function(x) {data.frame(V4 = dsAll[which(V(gD)$name == as.character(x$V1)), which(V(gD)$name == as.character(x$V2))])}
cl = createCluster(6, export = list("F1","dsAll","gD"), lib = list("igraph","plyr"))
system.time(dataSet.ext <- ddply(dataSet, .variables=c("V1", "V2", "V3"), function(x) data.frame(F1(x)),.parallel = TRUE))
#res = ddply(dat, .(category), bla, .parallel = TRUE)
stopCluster(cl)
gD <- set.edge.attribute(gD, "weight", index = E(gD), value = 0)
gD <- set.edge.attribute(gD, "similarity", index = E(gD), value = 0)
# The order of interactions in gD is not the same as it is in dataSet or as it is in the edge list,
# and for that reason these values cannot be assigned directly
#length(E(gD)[as.character(dataSet.ext$V1) %--% as.character(dataSet.ext$V2)]$weight )
E(gD)[as.character(dataSet.ext$V1) %--% as.character(dataSet.ext$V2)]$weight <- as.numeric(dataSet.ext$V3)
E(gD)[as.character(dataSet.ext$V1) %--% as.character(dataSet.ext$V2)]$similarity <- as.numeric(dataSet.ext$V4)
# Check the attributes
# summary(gD)
####################################
# Print network in the file format ready for Gephi
# This requires rgexf package
# Create a dataframe nodes: 1st column - node ID, 2nd column -node name
nodes_df <- data.frame(ID = c(1:vcount(gD)), NAME = V(gD)$name)
# Create a dataframe edges: 1st column - source node ID, 2nd column -target node ID
edges_df <- as.data.frame(get.edges(gD, c(1:ecount(gD))))
# Define node and edge attributes - these attributes won't be directly used for network visualization, but they
# may be useful for other network manipulations in Gephi
#
# Create a dataframe with node attributes: 1st column - attribute 1 (degree), 2nd column - attribute 2 (betweenness)
nodes_att <- data.frame(DEG = V(gD)$degree, BET = V(gD)$betweenness,gender = V(gD)$gender,sentement = V(gD)$sentiment)
#
# Create a dataframe with edge attributes: 1st column - attribute 1 (weight), 2nd column - attribute 2 (similarity)
edges_att <- data.frame(WGH = E(gD)$weight, SIM = E(gD)$similarity)
# Define node/edge visual attributes - these attributes are the ones used for network visualization
#
# Calculate node coordinate - needs to be 3D
#nodes_coord <- as.data.frame(layout.fruchterman.reingold(gD, weights = E(gD)$similarity, dim = 3, niter = 10000))
# We'll cheat here, as 2D coordinates result in a better (2D) plot than 3D coordinates
nodes_coord <- as.data.frame(layout.fruchterman.reingold(gD))
nodes_coord <- cbind(nodes_coord, rep(0, times = nrow(nodes_coord)))
#
# Calculate node size
# We'll interpolate node size based on the node betweenness centrality, using the "approx" function
uniqueNess<-unique(V(gD)$betweenness)
approxVals <- approx(c(1, 5), n = length(unique(V(gD)$betweenness)))
# And we will assign a node size for each node based on its betweenness centrality
nodes_size <- sapply(V(gD)$betweenness, function(x) approxVals$y[which(sort(unique(V(gD)$betweenness)) == x)])
#
# Define node color
# We'll interpolate node colors based on the node degree using the "colorRampPalette" function from the "grDevices" library
# This function returns a function corresponding to a collor palete of "bias" number of elements
F2 <- colorRampPalette(c("#F5DEB3", "#FF0000"), bias = length(unique(V(gD)$degree)), space = "rgb", interpolate = "linear")
# Now we'll create a color for each degree
colCodes <- F2(length(unique(V(gD)$degree)))
#################test parallel####################
cl = createCluster(6, export = list("F2","dsAll","gD","colCodes"), lib = list("igraph","plyr"))
system.time(nodes_col<-parSapply(cl,V(gD)$degree, function(x) colCodes[which(sort(unique(V(gD)$degree)) == x)]))
#res = ddply(dat, .(category), bla, .parallel = TRUE)
stopCluster(cl)
#############################
# And we will assign a color for each node based on its degree
#nodes_col <- sapply(V(gD)$degree, function(x) colCodes[which(sort(unique(V(gD)$degree)) == x)])
# Transform it into a data frame (we have to transpose it first)
nodes_col_df <- as.data.frame(t(col2rgb(nodes_col, alpha = FALSE)))
# And add alpha (between 0 and 1). The alpha from "col2rgb" function takes values from 0-255, so we cannot use it
nodes_col_df <- cbind(nodes_col_df, alpha = rep(1, times = nrow(nodes_col_df)))
# Assign visual attributes to nodes (colors have to be 4dimensional - RGBA)
nodes_att_viz <- list(color = nodes_col_df, position = nodes_coord, size = nodes_size)
# Assign visual attributes to edges using the same approach as we did for nodes
F2 <- colorRampPalette(c("#FFFF00", "#006400"), bias = length(unique(E(gD)$weight)), space = "rgb", interpolate = "linear")
colCodes <- F2(length(unique(E(gD)$weight)))
#################test parallel####################
cl = createCluster(12, export = list("F2","dsAll","gD","colCodes"), lib = list("igraph","plyr"))
system.time(edges_col<-parSapply(cl,E(gD)$weight, function(x) colCodes[which(sort(unique(E(gD)$weight)) == x)]))
stopCluster(cl)
#############################
#edges_col <- sapply(E(gD)$weight, function(x) colCodes[which(sort(unique(E(gD)$weight)) == x)])
edges_col_df <- as.data.frame(t(col2rgb(edges_col, alpha = FALSE)))
edges_col_df <- cbind(edges_col_df, alpha = rep(1, times = nrow(edges_col_df)))
edges_att_viz <-list(color = edges_col_df)
# Write the network into a gexf (Gephi) file
#write.gexf(nodes = nodes_df, edges = edges_df, nodesAtt = nodes_att, edgesWeight = E(gD)$weight, edgesAtt = edges_att, nodesVizAtt = nodes_att_viz, edgesVizAtt = edges_att_viz, defaultedgetype = "undirected", output = "lesmis.gexf")
# And without edge weights
write.gexf(nodes = nodes_df, edges = edges_df, nodesAtt = nodes_att, edgesAtt = edges_att, nodesVizAtt = nodes_att_viz, edgesVizAtt = edges_att_viz, defaultedgetype = "undirected", output = "arctic.gexf")

Related

R: Create function that iteratively performs some analysis to pairs of rasters, based on their names

I am having 2 sets of raster data and their names are:
ntl_'a number'.tif
pop_'a number'.tif
My goal is to create a function that reads the first pair of rasters (e.g., ntl_1.tif and pop_1.tif), then executes the below code and then repeats the process with the next pair:
library(raster)
library(DescTools)
#create a data.frame of values from the NTL and pop raster data
ntl = raster("path/ntl_1.tif")
vals_ntl <- as.data.frame(values(ntl))
ntl_coords = as.data.frame(xyFromCell(ntl, 1:ncell(ntl)))
combine <- as.data.frame(cbind(ntl_coords,vals_ntl))
pop<-raster("path/pop_1.tif")
pop = resample(pop, ntl, method = 'bilinear')
vals_pop <- as.data.frame(values(pop))
block.data <- as.data.frame(cbind(combine, vals_pop))
names(block.data)[3] <- "ntl"
names(block.data)[4] <- "pop"
block.data <- na.omit(block.data)
block.data = subset(block.data, select = -c(x, y))
# sort by ntl
block.data <-block.data[order(block.data$ntl),]
ntl_vector <- block.data[ , "ntl"]
pop_vector <- block.data[ , "pop"]
#compute gini index
Gini(ntl_vector, pop_vector, unbiased = FALSE)
My issue is with the code inside the function, I do not know how to properly make the syntax (the above code is for a pair of raster while I have hundreds of pairs). Hopefully I can get the results (i.e., the gini coefficient) of every pair in my console or, even better, in a data.frame. The data are here.
library(purrr)
library(fs)
raster_gini <- function(
.ntl = "ntl_1.tif",
.pop = "pop_1.tif",
.rdgal = TRUE
) {
if(.rdgal) {
ntl = raster(.ntl)
vals_ntl <- as.data.frame(values(ntl))
ntl_coords = as.data.frame(xyFromCell(ntl, 1:ncell(ntl)))
combine <- as.data.frame(cbind(ntl_coords,vals_ntl))
pop<-raster(.pop)
pop = resample(pop, ntl, method = 'bilinear')
vals_pop <- as.data.frame(values(pop))
block.data <- as.data.frame(cbind(combine, vals_pop))
#rename the columns
names(block.data)[3] <- "ntl"
names(block.data)[4] <- "pop"
#remove NA values
block.data <- na.omit(block.data)
#remove the columns x & y
block.data = subset(block.data, select = -c(x, y))
# sort by ntl
block.data <-block.data[order(block.data$ntl),]
ntl_vector <- block.data[ , "ntl"]
pop_vector <- block.data[ , "pop"]
#compute gini index
gini <- Gini(ntl_vector, pop_vector, unbiased = FALSE)
c(ntl = .ntl, pop = .pop, gini = gini)
} else {
c(ntl = .ntl, pop = .pop)
}
}
doc_paths_ntl <- fs::dir_ls("path_to_ntl_raster", glob = "*tif*")
doc_paths_pop <- fs::dir_ls("path_to_pop_raster", glob = "*tif*")
result_df <- purrr::map2_dfr(.x = doc_paths_ntl, .y = doc_paths_pop, .f = raster_gini)
result_df <- result_df |>
dplyr::mutate(ntl = basename(ntl)) |>
dplyr::mutate(pop = basename(pop))
result_df

How can I check for convergence in Sobol' sensitivity indices, using sensobol?

I would like to check the convergence of Sobol' sensitivity indices, using the sensobol library, by re-computing the sensitivity indices using sub-samples of decreasing size extracted from the original sample.
Here, I present an example code using the Ishigami function as model. Since computing the model output takes very long with the model I actually use, I want to avoid recomputing the model output for different sample sizes, but want to use sub-samples of my overall sample for this check.
I have written code that runs through, however, it seems that the result is 'not correct', as soon as the sample size is not equal the initial sample size.
Inital set-up
library(sensobol)
# Define settings
matrices <- c("A", "B", "AB", "BA")
N <- 1000
params <- paste("X", 1:3, sep = "")
first <- total <- "azzini"
order <- "first"
R <- 10
type <- "percent"
conf <- 0.95
# Create sample matrix using Sobol' (1967) quasi-random numbers
mat <- sobol_matrices(matrices = matrices, N = N, params = params, order = order, type = "QRN")
# Compute model output using Ishigami function as model
Y <- ishigami_Fun(mat)
Correct Sobol' indices as benchmark result
# Compute and bootstrap Sobol' indices for entire sample N
ind <- sobol_indices(matrices = c("A", "B", "AB", "BA"),
Y = Y,
N = N,
params = params,
boot = TRUE,
first = "azzini",
total = "azzini",
order = "first",
R = R,
type = type,
conf = conf)
cols <- colnames(ind)[1:length(params)]
ind[ , (cols):= round(.SD, 3), .SDcols = (cols)]
Check for convergence
Now, to analyze whether convergence is reached, I want to re-compute the sensitivity indices using sub-samples of decreasing size extracted from the original sample
# function to compute sensitivity indices, depending on the sample size and the model output vector
fct_conv <- function(N, Y) {
# compute how many model runs are performed in the case of the Azzini estimator
nr_model_runs <- 2*N*(length(params)+1) # length(params) = k
# extract sub-sample of model output
y_sub <- Y[1:nr_model_runs]
# compute and bootstrap Sobol' indices
ind_sub <- sobol_indices(matrices = c("A", "B", "AB", "BA"),
Y = y_sub,
N = N,
params = params,
boot = TRUE,
first = "azzini",
total = "azzini",
order = "first",
R = R,
type = type,
conf = conf)
cols <- colnames(ind_sub)[1:length(params)]
ind_sub[ , (cols):= round(.SD, 3), .SDcols = (cols)]
return(ind_sub)
}
Let's compare the benchmark result (ind) to two other outputs: Running fct_conv with the full sample (ind_full_sample) and running fct_conv with a very slightly reduced sample (ind_red_sample).
ind_full_sample <- fct_conv(1000, Y)
ind_red_sample <- fct_conv(999, Y)
ind
ind_full_sample
ind_red_sample
It seems that as soon as the sample size is reduced, the result doesn't make sense. Why is that? I'd be glad for any hints or ideas!
The results do not make sense because you are sampling without considering the ordering of the sample matrix. Try the following
# Load the required packages:
library(sensobol)
library(data.table)
library(ggplot2)
# Create function to swiftly check convergence (you do not need bootstrap)
sobol_convergence <- function(Y, N, sample.size, seed = 666) {
dt <- data.table(matrix(Y, nrow = N))
set.seed(seed) # To permit replication
subsample <- unlist(dt[sample(.N, sample.size)], use.names = FALSE)
ind <- sobol_indices(matrices = matrices,
Y = subsample,
N = sample.size,
params = params,
first = first,
total = total,
order = order)
return(ind)
}
# Define sequence of sub-samples at which you want to check convergence
sample.size <- seq(100, 1000, 50) # every 50
# Run function
ind.list <- lapply(sample.size, function(n)
sobol_convergence(Y = Y, N = N, sample.size = n))
# Extract total number of model runs C and results in each run
Cost <- indices <- list()
for(i in 1:length(ind.list)) {
Cost[[i]] <- ind.list[[i]]$C
indices[[i]] <- ind.list[[i]]$results
}
names(indices) <- Cost
# Final dataset
final.dt <- rbindlist(indices, idcol = "Cost")[, Cost:= as.numeric(Cost)]
# Plot results
ggplot(final.dt, aes(Cost, original, color = sensitivity)) +
geom_line() +
labs(x = "Total number of model runs", y = "Sobol' indices") +
facet_wrap(~parameters) +
theme_bw()

Writing Custom Layer (3D RoI Pooling) using Keras in R

Using R, I would like to produce a Keras custom layer that performs 3D Region of Interest (RoI) Pooling. My 3D RoI Pooling function works as expected outside of an R6Class KerasLayer, but I am having trouble integrating it into a Custom Layer. I am not sure if I am using autograph correctly in the below example. I am under the impressing that for me to use the R functionalities (i.e. for loop) in the "call" section of the custom layer I am required to wrap the function in autograph.
Is the below custom layer for applying a 3D RoI Pooling (without training weights) appropriate syntex for an R user of Keras?
The input for the 3d_RoI_Pool layer includes:
(i) output layer from a VNet model,
shape=(None, 16, 16, 40, 1)
(ii) tensor defining the "index" for cropping the input layer to the extent of each RoI,
- tf.Variable 'Variable:0' shape=(1, 1, 6) dtype=float64, numpy=array([[[ 5., 5., 4., 5., 10., 20.]]])
(iii) output dimension for the pooled 3D RoIs.
In this case its c(1, 1, 5, 5, 5, 1) for c(Batch_Size, RoI_Size, X, Y, Z, Channels)
Below are some questions that I am unclear about:
I initially apply layer_cropping_3d to crop each RoI but am not sure if custom layer is allowed to use available R Keras layers? For this reason, in code I performed my own cropping.
Is it the correct use of autograph within custom layer?
Should I instead use the layer_lambda() for such a function (i.e. Wraps arbitrary expression as a layer), given that I do not have trainable weights in the 3D_RoI_Pooling layer?
Below is the Code that gets the output layer from a V-Net CNN and prepares inputs for the custom layer:
# FINAL LAYERS OF V_Net
Output_shortcut = up1_DeConv
Final_Conv <- up1_Concat %>%
# 1st
layer_conv_3d(filters = Filter_Start, kernel_size = Kernel_Conv_Size, padding = "same", name = "Final_Conv") %>%
layer_batch_normalization() %>%
layer_activation("relu")
Final_Conv <- layer_add(c(Final_Conv, Output_shortcut), name = "Final_Conv_ResNet")
# PREPARE INPUT FOR CUSTOM LAYER
n_RoI <- 1
Batch_Size <- 1
Table_RoI <- list(list(c(5, 5), c(4, 5), c(10, 20)))
Table_RoI <- array_reshape(unlist(Table_RoI), c(Batch_Size,n_RoI,6), order="F")
Table_RoI <- tf$Variable(Table_RoI, tf$int16)
output_dim <- c(Batch_Size, n_RoI, 5,5,5, as.numeric(dim(Final_Conv)[5]))
# FEED CUSTOM LAYER TO CNN
Final_Conv2 <- Final_Conv %>% layer_3D_ROI_pooled(Table_RoI=Table_RoI, output_dim= output_dim)
Below is the actual 3D RoI Pooling Custom Layer that generates an error:
# 3D ROIpooled_Layer (R KERAS )
ROIpooled_Layer <- R6::R6Class("KerasLayer",
inherit = KerasLayer,
public = list(
Table_RoI = NULL,
output_dim = NULL,
###############
# INITIALISE
###############
initialize = function(Table_RoI, output_dim) {
self$Table_RoI = Table_RoI
self$output_dim = output_dim
},
###############
# CALL FUNCTION ROIpooled_Function <-
###############
call = autograph(function(x, mask = NULL,
Table_RoI,
output_dim) {
## Input_L ROI_Table
n_Batch <- output_dim[1]
n_RoI <- output_dim[2]
Channels <- output_dim[6]
for(r in 1:n_RoI){
# layer_cropping_3d(Input_L, cropping = list(list(as.numeric(Table_RoI[,r,1]), as.numeric(Table_RoI[,r,2])),
# list(as.numeric(Table_RoI[,r,3]), as.numeric(Table_RoI[,r,4])),
# list(as.numeric(Table_RoI[,r,5]), as.numeric(Table_RoI[,r,6]))))
RoI_Cropped <-x[,(as.numeric(Table_RoI[,r,1])+1):(dim(feature_map_T)[2]-as.numeric(Table_RoI[,r,2])),
(as.numeric(Table_RoI[,r,3])+1):(dim(feature_map_T)[3]-as.numeric(Table_RoI[,r,4])),
(as.numeric(Table_RoI[,r,5])+1):(dim(feature_map_T)[4]-as.numeric(Table_RoI[,r,6])),]
RoI_X_Res <- as.array(k_shape(RoI_Cropped)[2])
RoI_Y_Res <- as.array(k_shape(RoI_Cropped)[3])
RoI_Z_Res <- as.array(k_shape(RoI_Cropped)[4])
New_X_Res <- as.array(output_dim[3])
New_Y_Res <- as.array(output_dim[4])
New_Z_Res <- as.array(output_dim[5])
X_step = RoI_X_Res / New_X_Res
Y_step = RoI_Y_Res / New_Y_Res
Z_step = RoI_Z_Res / New_Z_Res
for(ch in 1:Channels) {
print(paste("ch", ch))
for (k in 1:New_Z_Res) {
print(paste("k", k))
for (j in 1:New_Y_Res) {
print(paste("j", j, "k", k))
for (i in 1:New_X_Res) {
# INDEX X
Index_Xstart <- floor((i-1)*X_step+1)
if(i+1 <= RoI_X_Res){
Index_Xend <- floor((i)*X_step)
}else{
Index_Xend <- RoI_X_Res
}
# INDEX Y
Index_Ystart <- floor((j-1)*Y_step+1)
if(j+1 <= RoI_Y_Res){
Index_Yend <- floor((j)*Y_step)
}else{
Index_Yend <-RoI_Y_Res
}
# INDEX Z
Index_Zstart <- floor((k-1)*Z_step+1)
if(k+1 <= RoI_Z_Res){
Index_Zend <- floor((k)*Z_step)
}else{
Index_Zend <-RoI_Z_Res
}
Max_Pool_X_Value <- as.array(k_max(RoI_Cropped[n_Batch,Index_Xstart:Index_Xend, Index_Ystart:Index_Yend, Index_Zstart:Index_Zend,ch])) # ADD BATCH AND CHANNEL LAYERS
RoI_Pooled_Array[,r,i,j,k,ch] <- Max_Pool_X_Value
}# i LOOP
} # j LOOP
} # k Loop
} #Ch LOOP
} # r LOOP (ROI)
feature_map_ROIpooled <- tf$Variable(RoI_Pooled_Array, tf$int16) # ??? NOT SURE IF RETURN NEEDS TO BE A VARIABLE
return (feature_map_ROIpooled)
}), # END OF AUTO, # END OF CALL ... AUTOGRAPH FUNCTION
##############
# OUTPUT SHAPE
##############
compute_output_shape = function(input_shape) {
list(self$output_dim)
}
)
)
#
###############################
# Create layer wrapper function
###############################
layer_3D_ROI_pooled <- function(object, Table_RoI, output_dim, name = NULL, trainable = TRUE) {
create_layer(ROIpooled_Layer, object, list(Table_RoI = Table_RoI,
output_dim = as.integer(output_dim),
name = name,
trainable = FALSE
))
}
ERROR:
Error in value[[3L]](cond) :
The R function's signature must not contains esoteric Python-incompatible constructs. Detailed traceback: SyntaxError: non-default argument follows default argument (<string>, line 3)
Any help/insight/clarity would be appreciated.
Kind Regards,
Dom
I'm not sure if this is the correct protocol to answer own question but I think I have a working 3D RoI pooling Custom Layer to share. Many mistakes in above but most notable change is in the for loop.... I think I needed to first generate a list of tensors that represent each pooled RoI and then re-shape it to the desired output shape.
################################################################################################
# ROI_3D_pooled_Layer (Custom layer class)
##########################################
ROI_3D_pooled_Layer <- R6::R6Class("KerasLayer",
inherit = KerasLayer,
public = list(
List_RoI = NULL,
output_dim = NULL,
initialize = function(List_RoI, output_dim) {
self$List_RoI = List_RoI
self$output_dim = output_dim
},
call = function(x, mask = NULL) {
List_RoI <- self$List_RoI
output_dim <- self$output_dim
# EXTRACT INFORMATION ON OUTPUT DIMENSION
n_Batch <- as.integer(output_dim[1])
n_RoIs <- as.integer(output_dim[2])
n_Channels <- as.integer(output_dim[6])
New_X_Res <- as.numeric(output_dim[3])
New_Y_Res <- as.numeric(output_dim[4])
New_Z_Res <- as.numeric(output_dim[5])
input_shape <- dim(x)
# EMPTY LIST TO STORE TENSORS
output_list = list()
for(r in 1:n_RoIs){ # LOOP RoIs
# GET one RoI AND CROP INPUT LAYER
if(n_RoIs > 1){
oneList_RoI <- List_RoI[[r]]
}else{
oneList_RoI <- List_RoI
}
RoI_Cropped <-x[,(oneList_RoI[[1]][1]+1):(as.numeric(input_shape[2])-oneList_RoI[[1]][2]),
(oneList_RoI[[2]][1]+1):(as.numeric(input_shape[3])-oneList_RoI[[2]][2]),
(oneList_RoI[[3]][1]+1):(as.numeric(input_shape[4])-oneList_RoI[[3]][1]),]
# GET RoI Dimensions for XYZ
RoI_X_Res <- as.numeric(dim(RoI_Cropped)[2])
RoI_Y_Res <- as.numeric(dim(RoI_Cropped)[3])
RoI_Z_Res <- as.numeric(dim(RoI_Cropped)[4])
# CALCULATE STEPS IN ALL DIMENSIONS FOR POOLING
X_step = RoI_X_Res / New_X_Res
Y_step = RoI_Y_Res / New_Y_Res
Z_step = RoI_Z_Res / New_Z_Res
for(ch in 1:n_Channels) { # LOOP CHANNEL
for (k in 1:New_Z_Res) { # LOOP Z
for (j in 1:New_Y_Res) { # LOOP Y
for (i in 1:New_X_Res) { # LOOP X
# INDEX X
Index_Xstart <- floor((i-1)*X_step+1)
if(i+1 <= RoI_X_Res){
Index_Xend <- floor((i)*X_step)
}else{
Index_Xend <- RoI_X_Res
}
# INDEX Y
Index_Ystart <- floor((j-1)*Y_step+1)
if(j+1 <= RoI_Y_Res){
Index_Yend <- floor((j)*Y_step)
}else{
Index_Yend <-RoI_Y_Res
}
# INDEX Z
Index_Zstart <- floor((k-1)*Z_step+1)
if(k+1 <= RoI_Z_Res){
Index_Zend <- floor((k)*Z_step)
}else{
Index_Zend <-RoI_Z_Res
}
# MAX POOL VOLUME FOR EACH ELEMENT IN FINAL LAYER AND PUT IN EMPTY ARRAY
Max_Pool_X_Value <-k_max(RoI_Cropped[,Index_Xstart:Index_Xend, Index_Ystart:Index_Yend, Index_Zstart:Index_Zend,ch])
# APPEND EACH RoI_Pooled element into a list
output_list <- list.append(output_list, Max_Pool_X_Value)
}# i LOOP
} # j LOOP
} # k Loop
} #Ch LOOP
} # r LOOP (ROI)
# STACK THE OUTPUT LIST AND RESHAPE TO THE DESIRED OUTPUT SIZE
output_Stack <- k_stack(output_list, axis = 1)
feature_map_ROIpooled <- k_reshape(output_Stack, shape = c(n_Batch, n_RoIs, New_X_Res, New_Y_Res, New_Z_Res, n_Channels))
return (feature_map_ROIpooled)
},
compute_output_shape = function(input_shape) {
return(self$output_dim)
}
)
)
# Create layer wrapper function
layer_3D_ROI_pooled <- function(object, List_RoI, output_dim) {
create_layer(ROI_3D_pooled_Layer, object, list(List_RoI = List_RoI,
output_dim = as.integer(output_dim)
))
}
This is my first attempt with a Keras Custom Layer (using R) so please provide suggestions for improvement.

add clusters and nodes from SOMbrero package to training data

I am playing a bit with the SOMbrero package. I would like to attach the cluster numbers created like so (taken from here):
my.sc <- superClass(iris.som, k=3)
and X and Y coordinates of the SOM nodes to the training dataset.
In some code, where I use the kohonen package, I create clusters like this:
range01 <- function(x){(x-min(x))/(max(x)-min(x))}
ind <- sapply(SubsetData, is.numeric)
SubsetData[ind] <- lapply(SubsetData[ind], range01)
TrainingMatrix <- as.matrix(SubsetData)
GridDefinition <- somgrid(xdim = 4, ydim = 4, topo = "rectangular", toroidal = FALSE)
SomModel <- som(
data = TrainingMatrix,
grid = GridDefinition,
rlen = 10000,
alpha = c(0.05, 0.01),
keep.data = TRUE
)
nb <- table(SomModel$unit.classif)
groups = 5
tree.hc = cutree(hclust(d=dist(SomModel$codes[[1]]),method="ward.D2",members=nb),groups)
plot(SomModel, type="codes", bgcol=rainbow(groups)[tree.hc])
add.cluster.boundaries(SomModel, tree.hc)
result <- OrginalData
result$Cluster <- tree.hc[SomModel$unit.classif]
result$X <- SomModel$grid$pts[SomModel$unit.classif,"x"]
result$Y <- SomModel$grid$pts[SomModel$unit.classif,"y"]
write.table(result, file = "FinalData.csv", sep = ",", col.names = NA, quote = FALSE)
PS:
Some example code using the iris dataset can be found here.
PPS:
I played a bit with the code iris code quoted above and think I have managed to extract the clusters, node ids and prototypes (see code below). What is missing are the coordinates X and Y. I think they are in here:
iris.som$parameters$the.grid$coord
Code:
library(SOMbrero)
set.seed(100)
setwd("D:\\RProjects\Clustering")
#iris.som <- trainSOM(x.data=iris[,1:4],dimension=c(10,10), maxit=100000, scaling="unitvar", radius.type="gaussian")
iris.som <- trainSOM(x.data=iris[,1:4],dimension=c(3,3), maxit=100000, scaling="unitvar", radius.type="gaussian")
# perform a hierarchical clustering
## with 3 super clusters
iris.sc <- superClass(iris.som, k=3)
summary(iris.sc)
# compute the projection quality indicators
quality(iris.som)
iris1 <- iris
iris1$Cluster = iris.sc$cluster[iris.sc$som$clustering]
iris1$Node = iris.sc$som$clustering
iris1$Pt1Sepal.Length = iris.sc$som$prototypes[iris.sc$som$clustering,1]
iris1$Pt2Sepal.Width = iris.sc$som$prototypes[iris.sc$som$clustering,2]
iris1$Pt3Petal.Length = iris.sc$som$prototypes[iris.sc$som$clustering,3]
iris1$Pt4Petal.Width = iris.sc$som$prototypes[iris.sc$som$clustering,4]
write.table(iris1, file = "Iris.csv", sep = ",", col.names = NA, quote = FALSE)
I think I have figured it out using the iris example (please correct/improve code! - I am not fluent in R):
library(SOMbrero)
set.seed(100)
setwd("D:\\RProjects\\SomBreroClustering")
iris.som <- trainSOM(x.data=iris[,1:4],dimension=c(5,5), maxit=10000, scaling="unitvar", radius.type="letremy")
# perform a hierarchical clustering
# with 3 super clusters
iris.sc <- superClass(iris.som, k=3)
summary(iris.sc)
# compute the projection quality indicators
quality(iris.som)
iris1 <- iris
iris1$Cluster = iris.sc$cluster[iris.sc$som$clustering]
iris1$Node = iris.sc$som$clustering
iris1$Pt1Sepal.Length = iris.sc$som$prototypes[iris.sc$som$clustering,1]
iris1$Pt2Sepal.Width = iris.sc$som$prototypes[iris.sc$som$clustering,2]
iris1$Pt3Petal.Length = iris.sc$som$prototypes[iris.sc$som$clustering,3]
iris1$Pt4Petal.Width = iris.sc$som$prototypes[iris.sc$som$clustering,4]
iris1$X = iris.som$parameters$the.grid$coord[iris.sc$som$clustering,1]
iris1$Y = iris.som$parameters$the.grid$coord[iris.sc$som$clustering,2]
write.table(iris1, file = "Iris.csv", sep = ",", col.names = NA, quote = FALSE)
I am not sure that I got it right but:
iris.som$parameters$the.grid contains coordinates of the clusters (it is a two column array with x and y coordinates in the mapping space)
so I think that what you want to do is
out.grid <- iris.som$parameters$the.grid$coord
out.grid$sc <- iris.sc$clustering
and export out.grid (a three column array). iris.sc$som$prototypes contains the coordinate of the prototypes of the clusters but in the original space (the four dimensional space in which the iris dataset takes its values.
I think my answer captures the requirements. Adding the node ids, x +
y coordinates, cluster and prototypes to the original data. Would you
agree.
yes :)

SpatialLines to iGraph conversion simplifies topology

I'm trying to convert from spatialLinesDataFrame to igraph object, and think I may be losing information I want to keep. Fairly new to igraph so please bear with me. The example below illustrates:
# create sldf object
require(sp); require(igraph); require(shp2graph)
d = data.frame(x = c(0,80,100,0,-20,-8,0,3,-10,-5,80,75),
y = c(0,-10,5,0,14,33,0,-4,-10,-12,-10,5),
grp = c(1,1,1,2,2,2,3,3,3,3,4,4))
sl = SpatialLines(list(
Lines(list(Line(d[d$grp == 1,1:2]),
Line(d[d$grp == 4,1:2])), ID=1),
Lines(Line(d[d$grp == 2,1:2]), ID=2),
Lines(Line(d[d$grp == 3,1:2]), ID=3))
)
sldf = SpatialLinesDataFrame(sl, iris[1:3,])
plot(sldf)
Now convert to igraph and plot:
read_sldf = readshpnw(sldf, ELComputed = T)
g = nel2igraph(read_sldf[[2]], read_sldf[[3]], weight=read_sldf[[4]])
plot(g)
Am I right that the off-branch of the first spdf row (sldf[1,]) has been lost? Calling as_edgelist(g) returns 3 rows not 4.
Just change those options in readshpnw:
# create sldf object
require(sp); require(igraph); require(shp2graph)
d = data.frame(x = c(0,80,100,0,-20,-8,0,3,-10,-5,80,75),
y = c(0,-10,5,0,14,33,0,-4,-10,-12,-10,5),
grp = c(1,1,1,2,2,2,3,3,3,3,4,4))
sl = SpatialLines(list(
Lines(list(Line(d[d$grp == 1,1:2]),
Line(d[d$grp == 4,1:2])), ID=1),
Lines(Line(d[d$grp == 2,1:2]), ID=2),
Lines(Line(d[d$grp == 3,1:2]), ID=3))
)
sldf = SpatialLinesDataFrame(sl, iris[1:3,])
plot(sldf)
nodes = readshpnw(sldf, ELComputed = TRUE, Detailed = TRUE, ea.prop = names(sldf))
g = nel2igraph(nodes[[2]], nodes[[3]])
plot(g)

Resources