Related
I have a dataframe that has two columns, x and y (both populated with numbers). I am trying to look at a moving window within the data, and I've done it like this (source):
# Extract just x and y from the original data frame
df <- dat_fin %>% select(x, y)
# Moving window creation
nr <- nrow(df)
windowSize <- 10
windfs <- lapply(seq_len(nr - windowSize + 1), function(i) df[i:(i + windowSize - 1), ])
This lapply creates a list of tibbles that are each 10 (x, y) pairs. At this point, I am trying to compute a single quantity using each of the sets of 10 pairs; my current (not working) code looks like this:
library(shotGroups)
for (f in 1:length(windfs)) {
tsceps[f] = getCEP(windfs[f], accuracy = TRUE)
}
When I run this, I get the error:
Error in getCEP.default(windfs, accuracy = TRUE) : xy must be numeric
My goal is that the variable that I've called tsceps should be a 1 x length(windfs) data frame, each value in which comes from the getCEP calculation for each of the windowed subsets.
I've tried various things with unnest and unlist, all of which were unsuccessful.
What am I missing?
Working code:
df <- dat_fin %>% select(x, y)
nr <- nrow(df)
windowSize <- 10
windfs <- lapply(seq_len(nr - windowSize + 1), function(i) df[i:(i + windowSize - 1), ])
tsceps <- vector(mode = "numeric", length = length(windfs))
library(shotGroups)
for (j in 1:length(windfs)) {
tsceps[j] <- getCEP(windfs[[j]], type = "CorrNormal", CEPlevel = 0.50, accuracy = TRUE)
}
ults <- unlist(tsceps)
ults_cep <- vector(mode = "numeric", length = length(ults))
for (k in 1:length(ults)) {
ults_cep[k] <- ults[[k]]
}
To get this working with multiple type arguments to getCEP, just use additional code blocks for each type required.
Using R, I would like to produce a Keras custom layer that performs 3D Region of Interest (RoI) Pooling. My 3D RoI Pooling function works as expected outside of an R6Class KerasLayer, but I am having trouble integrating it into a Custom Layer. I am not sure if I am using autograph correctly in the below example. I am under the impressing that for me to use the R functionalities (i.e. for loop) in the "call" section of the custom layer I am required to wrap the function in autograph.
Is the below custom layer for applying a 3D RoI Pooling (without training weights) appropriate syntex for an R user of Keras?
The input for the 3d_RoI_Pool layer includes:
(i) output layer from a VNet model,
shape=(None, 16, 16, 40, 1)
(ii) tensor defining the "index" for cropping the input layer to the extent of each RoI,
- tf.Variable 'Variable:0' shape=(1, 1, 6) dtype=float64, numpy=array([[[ 5., 5., 4., 5., 10., 20.]]])
(iii) output dimension for the pooled 3D RoIs.
In this case its c(1, 1, 5, 5, 5, 1) for c(Batch_Size, RoI_Size, X, Y, Z, Channels)
Below are some questions that I am unclear about:
I initially apply layer_cropping_3d to crop each RoI but am not sure if custom layer is allowed to use available R Keras layers? For this reason, in code I performed my own cropping.
Is it the correct use of autograph within custom layer?
Should I instead use the layer_lambda() for such a function (i.e. Wraps arbitrary expression as a layer), given that I do not have trainable weights in the 3D_RoI_Pooling layer?
Below is the Code that gets the output layer from a V-Net CNN and prepares inputs for the custom layer:
# FINAL LAYERS OF V_Net
Output_shortcut = up1_DeConv
Final_Conv <- up1_Concat %>%
# 1st
layer_conv_3d(filters = Filter_Start, kernel_size = Kernel_Conv_Size, padding = "same", name = "Final_Conv") %>%
layer_batch_normalization() %>%
layer_activation("relu")
Final_Conv <- layer_add(c(Final_Conv, Output_shortcut), name = "Final_Conv_ResNet")
# PREPARE INPUT FOR CUSTOM LAYER
n_RoI <- 1
Batch_Size <- 1
Table_RoI <- list(list(c(5, 5), c(4, 5), c(10, 20)))
Table_RoI <- array_reshape(unlist(Table_RoI), c(Batch_Size,n_RoI,6), order="F")
Table_RoI <- tf$Variable(Table_RoI, tf$int16)
output_dim <- c(Batch_Size, n_RoI, 5,5,5, as.numeric(dim(Final_Conv)[5]))
# FEED CUSTOM LAYER TO CNN
Final_Conv2 <- Final_Conv %>% layer_3D_ROI_pooled(Table_RoI=Table_RoI, output_dim= output_dim)
Below is the actual 3D RoI Pooling Custom Layer that generates an error:
# 3D ROIpooled_Layer (R KERAS )
ROIpooled_Layer <- R6::R6Class("KerasLayer",
inherit = KerasLayer,
public = list(
Table_RoI = NULL,
output_dim = NULL,
###############
# INITIALISE
###############
initialize = function(Table_RoI, output_dim) {
self$Table_RoI = Table_RoI
self$output_dim = output_dim
},
###############
# CALL FUNCTION ROIpooled_Function <-
###############
call = autograph(function(x, mask = NULL,
Table_RoI,
output_dim) {
## Input_L ROI_Table
n_Batch <- output_dim[1]
n_RoI <- output_dim[2]
Channels <- output_dim[6]
for(r in 1:n_RoI){
# layer_cropping_3d(Input_L, cropping = list(list(as.numeric(Table_RoI[,r,1]), as.numeric(Table_RoI[,r,2])),
# list(as.numeric(Table_RoI[,r,3]), as.numeric(Table_RoI[,r,4])),
# list(as.numeric(Table_RoI[,r,5]), as.numeric(Table_RoI[,r,6]))))
RoI_Cropped <-x[,(as.numeric(Table_RoI[,r,1])+1):(dim(feature_map_T)[2]-as.numeric(Table_RoI[,r,2])),
(as.numeric(Table_RoI[,r,3])+1):(dim(feature_map_T)[3]-as.numeric(Table_RoI[,r,4])),
(as.numeric(Table_RoI[,r,5])+1):(dim(feature_map_T)[4]-as.numeric(Table_RoI[,r,6])),]
RoI_X_Res <- as.array(k_shape(RoI_Cropped)[2])
RoI_Y_Res <- as.array(k_shape(RoI_Cropped)[3])
RoI_Z_Res <- as.array(k_shape(RoI_Cropped)[4])
New_X_Res <- as.array(output_dim[3])
New_Y_Res <- as.array(output_dim[4])
New_Z_Res <- as.array(output_dim[5])
X_step = RoI_X_Res / New_X_Res
Y_step = RoI_Y_Res / New_Y_Res
Z_step = RoI_Z_Res / New_Z_Res
for(ch in 1:Channels) {
print(paste("ch", ch))
for (k in 1:New_Z_Res) {
print(paste("k", k))
for (j in 1:New_Y_Res) {
print(paste("j", j, "k", k))
for (i in 1:New_X_Res) {
# INDEX X
Index_Xstart <- floor((i-1)*X_step+1)
if(i+1 <= RoI_X_Res){
Index_Xend <- floor((i)*X_step)
}else{
Index_Xend <- RoI_X_Res
}
# INDEX Y
Index_Ystart <- floor((j-1)*Y_step+1)
if(j+1 <= RoI_Y_Res){
Index_Yend <- floor((j)*Y_step)
}else{
Index_Yend <-RoI_Y_Res
}
# INDEX Z
Index_Zstart <- floor((k-1)*Z_step+1)
if(k+1 <= RoI_Z_Res){
Index_Zend <- floor((k)*Z_step)
}else{
Index_Zend <-RoI_Z_Res
}
Max_Pool_X_Value <- as.array(k_max(RoI_Cropped[n_Batch,Index_Xstart:Index_Xend, Index_Ystart:Index_Yend, Index_Zstart:Index_Zend,ch])) # ADD BATCH AND CHANNEL LAYERS
RoI_Pooled_Array[,r,i,j,k,ch] <- Max_Pool_X_Value
}# i LOOP
} # j LOOP
} # k Loop
} #Ch LOOP
} # r LOOP (ROI)
feature_map_ROIpooled <- tf$Variable(RoI_Pooled_Array, tf$int16) # ??? NOT SURE IF RETURN NEEDS TO BE A VARIABLE
return (feature_map_ROIpooled)
}), # END OF AUTO, # END OF CALL ... AUTOGRAPH FUNCTION
##############
# OUTPUT SHAPE
##############
compute_output_shape = function(input_shape) {
list(self$output_dim)
}
)
)
#
###############################
# Create layer wrapper function
###############################
layer_3D_ROI_pooled <- function(object, Table_RoI, output_dim, name = NULL, trainable = TRUE) {
create_layer(ROIpooled_Layer, object, list(Table_RoI = Table_RoI,
output_dim = as.integer(output_dim),
name = name,
trainable = FALSE
))
}
ERROR:
Error in value[[3L]](cond) :
The R function's signature must not contains esoteric Python-incompatible constructs. Detailed traceback: SyntaxError: non-default argument follows default argument (<string>, line 3)
Any help/insight/clarity would be appreciated.
Kind Regards,
Dom
I'm not sure if this is the correct protocol to answer own question but I think I have a working 3D RoI pooling Custom Layer to share. Many mistakes in above but most notable change is in the for loop.... I think I needed to first generate a list of tensors that represent each pooled RoI and then re-shape it to the desired output shape.
################################################################################################
# ROI_3D_pooled_Layer (Custom layer class)
##########################################
ROI_3D_pooled_Layer <- R6::R6Class("KerasLayer",
inherit = KerasLayer,
public = list(
List_RoI = NULL,
output_dim = NULL,
initialize = function(List_RoI, output_dim) {
self$List_RoI = List_RoI
self$output_dim = output_dim
},
call = function(x, mask = NULL) {
List_RoI <- self$List_RoI
output_dim <- self$output_dim
# EXTRACT INFORMATION ON OUTPUT DIMENSION
n_Batch <- as.integer(output_dim[1])
n_RoIs <- as.integer(output_dim[2])
n_Channels <- as.integer(output_dim[6])
New_X_Res <- as.numeric(output_dim[3])
New_Y_Res <- as.numeric(output_dim[4])
New_Z_Res <- as.numeric(output_dim[5])
input_shape <- dim(x)
# EMPTY LIST TO STORE TENSORS
output_list = list()
for(r in 1:n_RoIs){ # LOOP RoIs
# GET one RoI AND CROP INPUT LAYER
if(n_RoIs > 1){
oneList_RoI <- List_RoI[[r]]
}else{
oneList_RoI <- List_RoI
}
RoI_Cropped <-x[,(oneList_RoI[[1]][1]+1):(as.numeric(input_shape[2])-oneList_RoI[[1]][2]),
(oneList_RoI[[2]][1]+1):(as.numeric(input_shape[3])-oneList_RoI[[2]][2]),
(oneList_RoI[[3]][1]+1):(as.numeric(input_shape[4])-oneList_RoI[[3]][1]),]
# GET RoI Dimensions for XYZ
RoI_X_Res <- as.numeric(dim(RoI_Cropped)[2])
RoI_Y_Res <- as.numeric(dim(RoI_Cropped)[3])
RoI_Z_Res <- as.numeric(dim(RoI_Cropped)[4])
# CALCULATE STEPS IN ALL DIMENSIONS FOR POOLING
X_step = RoI_X_Res / New_X_Res
Y_step = RoI_Y_Res / New_Y_Res
Z_step = RoI_Z_Res / New_Z_Res
for(ch in 1:n_Channels) { # LOOP CHANNEL
for (k in 1:New_Z_Res) { # LOOP Z
for (j in 1:New_Y_Res) { # LOOP Y
for (i in 1:New_X_Res) { # LOOP X
# INDEX X
Index_Xstart <- floor((i-1)*X_step+1)
if(i+1 <= RoI_X_Res){
Index_Xend <- floor((i)*X_step)
}else{
Index_Xend <- RoI_X_Res
}
# INDEX Y
Index_Ystart <- floor((j-1)*Y_step+1)
if(j+1 <= RoI_Y_Res){
Index_Yend <- floor((j)*Y_step)
}else{
Index_Yend <-RoI_Y_Res
}
# INDEX Z
Index_Zstart <- floor((k-1)*Z_step+1)
if(k+1 <= RoI_Z_Res){
Index_Zend <- floor((k)*Z_step)
}else{
Index_Zend <-RoI_Z_Res
}
# MAX POOL VOLUME FOR EACH ELEMENT IN FINAL LAYER AND PUT IN EMPTY ARRAY
Max_Pool_X_Value <-k_max(RoI_Cropped[,Index_Xstart:Index_Xend, Index_Ystart:Index_Yend, Index_Zstart:Index_Zend,ch])
# APPEND EACH RoI_Pooled element into a list
output_list <- list.append(output_list, Max_Pool_X_Value)
}# i LOOP
} # j LOOP
} # k Loop
} #Ch LOOP
} # r LOOP (ROI)
# STACK THE OUTPUT LIST AND RESHAPE TO THE DESIRED OUTPUT SIZE
output_Stack <- k_stack(output_list, axis = 1)
feature_map_ROIpooled <- k_reshape(output_Stack, shape = c(n_Batch, n_RoIs, New_X_Res, New_Y_Res, New_Z_Res, n_Channels))
return (feature_map_ROIpooled)
},
compute_output_shape = function(input_shape) {
return(self$output_dim)
}
)
)
# Create layer wrapper function
layer_3D_ROI_pooled <- function(object, List_RoI, output_dim) {
create_layer(ROI_3D_pooled_Layer, object, list(List_RoI = List_RoI,
output_dim = as.integer(output_dim)
))
}
This is my first attempt with a Keras Custom Layer (using R) so please provide suggestions for improvement.
I have a list of lists, containing data.frames, from which I want to select only a few rows. I can achieve it in a for-loop, where I create a sequence based on the amount of rows and select only row indices according to that sequence.
But if I have deeper nested lists it doesn't work anymore. I am also sure, that there is a better way of doing that without a loop.
What would be an efficient and generic approach to sample from nested lists, that vary in their dimensions and contain data.frames or matrices?
## Dummy Data
n1=100;n2=300;n3=100
crdOrig <- list(
list(data.frame(x = runif(n1,10,20), y = runif(n1,40,60))),
list(data.frame(x = runif(n2,10,20), y = runif(n2,40,60))),
list(data.frame(x = runif(n3,10,20), y = runif(n3,40,60)))
)
## Code to opimize
FiltRef <- list()
filterBy = 10
for (r in 1:length(crdOrig)) {
tmp <- do.call(rbind, crdOrig[[r]])
filterInd <- seq(1,nrow(tmp), by = filterBy)
FiltRef[[r]] <- tmp[filterInd,]
}
crdResult <- do.call(rbind, FiltRef)
# Plotting
crdOrigPl <- do.call(rbind, unlist(crdOrig, recursive = F))
plot(crdOrigPl[,1], crdOrigPl[,2], col="red", pch=20)
points(crdResult[,1], crdResult[,2], col="green", pch=20)
The code above works also if a list contains several data.frames (data below).
## Dummy Data (Multiple DF)
crdOrig <- list(
list(data.frame(x = runif(n1,10,20), y = runif(n1,40,60)),
data.frame(x = runif(n1,10,20), y = runif(n1,40,60))),
list(data.frame(x = runif(n2,10,20), y = runif(n2,40,60))),
list(data.frame(x = runif(n3,10,20), y = runif(n3,40,60)))
)
But if a list contains multiple lists, it throws an error trying to bind the result (FiltRef) together.
The result can be a data.frame with 2 columns (x,y) - like crdResult or a one dimensional list like FiltRef (from the first example)
## Dummy Data (Multiple Lists)
crdOrig <- list(
list(list(data.frame(x = runif(n1,10,20), y = runif(n1,40,60))),
list(data.frame(x = runif(n1,10,20), y = runif(n1,40,60)))),
list(data.frame(x = runif(n2,10,20), y = runif(n2,40,60))),
list(data.frame(x = runif(n3,10,20), y = runif(n3,40,60)))
)
+1 and Thank you all for your brilliant answers! They all work and there is a lot to learn from each one of them. I will give this one to #Gwang-Jin Kim as his solution is the most flexible and extensive, although they all deserve to be checked!
Preparation and implementation of flatten
Well, there are many other answers which are in principle the same.
I meanwhile implemented for fun the flattening of nested lists.
Since I am thinking in Lisp:
Implemented first car and cdr from lisp.
car <- function(l) {
if(is.list(l)) {
if (null(l)) {
list()
} else {
l[[1]]
}
} else {
error("Not a list.")
}
}
cdr <- function(l) {
if (is.list(l)) {
if (null(l) || length(l) == 1) {
list()
} else {
l[2:length(l)]
}
} else {
error("Not a list.")
}
}
Some predicate functions:
null <- function(l) length(l) == 0
# this is Lisp's `null` checking whether list is empty (`length(l) == 0`)
# R's `is.null()` checks for the value NULL and not `length(obj) == 0`
# upon #Martin Morgan's comment removed other predicate functions
# thank you #Martin Morgan!
# instead using `is.data.frame()` and `is.list()`, since they are
# not only already there but also safer.
Which are necessary to build flatten (for data frame lists)
flatten <- function(nested.list.construct) {
# Implemented Lisp's flatten tail call recursively. (`..flatten()`)
# Instead of (atom l) (is.df l).
..flatten <- function(l, acc.l) {
if (null(l)) {
acc.l
} else if (is.data.frame(l)) { # originally one checks here for is.atom(l)
acc.l[[length(acc.l) + 1]] <- l
acc.l # kind of (list* l acc.l)
} else {
..flatten(car(l), ..flatten(cdr(l), acc.l))
}
}
..flatten(nested.list.construct, list())
}
# an atom is in the widest sence a non-list object
After this, the actual function is defined using a sampling function.
Defining sampling function
# helper function
nrow <- function(df) dim(df)[1L]
# sampling function
sample.one.nth.of.rows <- function(df, fraction = 1/10) {
# Randomly selects a fraction of the rows of a data frame
nr <- nrow(df)
df[sample(nr, fraction * nr), , drop = FALSE]
}
The actual collector function (from nested data-frame-lists)
collect.df.samples <- function(df.list.construct, fraction = 1/10) {
do.call(rbind,
lapply(flatten(df.list.construct),
function(df) sample.one.nth.of.rows(df, fraction)
)
)
}
# thanks for the improvement with `do.call(rbind, [list])` #Ryan!
# and the hint that `require(data.table)`
# `data.table::rbindlist([list])` would be even faster.
collect.df.samples first flattens the nested list construct of data frames df.list.construct to a flat list of data frames. It applies the function sample.one.nth.of.rows to each elements of the list (lapply). There by it produces a list of sampled data frames (which contain the fraction - here 1/10th of the original data frame rows). These sampled data frames are rbinded across the list. The resulting data frame is returned. It consists of the sampled rows of each of the data frames.
Testing on example
## Dummy Data (Multiple Lists)
n1=100;n2=300;n3=100
crdOrig <- list(
list(list(data.frame(x = runif(n1,10,20), y = runif(n1,40,60))),
list(data.frame(x = runif(n1,10,20), y = runif(n1,40,60)))),
list(data.frame(x = runif(n2,10,20), y = runif(n2,40,60))),
list(data.frame(x = runif(n3,10,20), y = runif(n3,40,60)))
)
collect.df.samples(crdOrig, fraction = 1/10)
Refactoring for later modifications
By writing the collect.df.samples function to:
# sampler function
sample.10th.fraction <- function(df) sample.one.nth.of.rows(df, fraction = 1/10)
# refactored:
collect.df.samples <-
function(df.list.construct,
df.sampler.fun = sample.10th.fraction) {
do.call(rbind,
lapply(flatten(df.list.construct), df.sampler.fun))
}
One can make the sampler function replace-able.
(And if not: By changing the fraction parameter, one can enhance or reduce amount of rows collected from each data frame.)
The sampler function is in this definition easily exchangable
For choosing every nth (e.g. every 10th) row in the data frame, instead of a random sampling,
you could e.g. use the sampler function:
df[seq(from=1, to=nrow(df), by = nth), , drop = FALSE]
and input it as df.sampler.fun = in collect.df.samples. Then, this function will be applied to every data frame in the nested df list object and collected to one data frame.
every.10th.rows <- function(df, nth = 10) {
df[seq(from=1, to=nrow(df), by = nth), , drop = FALSE]
}
a.10th.of.all.rows <- function(df, fraction = 1/10) {
sample.one.nth.of.rows(df, fraction)
}
collect.df.samples(crdOrig, a.10th.of.all.rows)
collect.df.samples(crdOrig, every.10th.rows)
I would just flatten the whole darn thing and work on a clean list.
library(rlist)
out <- list.flatten(y)
# prepare a vector for which columns belong together
vc <- rep(1:(length(out)/2), each = 2)
vc <- split(1:length(vc), vc)
# prepare the final list
ll <- vector("list", length(unique(vc)))
for (i in 1:length(vc)) {
ll[[i]] <- as.data.frame(out[vc[[i]]])
}
result <- lapply(ll, FUN = function(x) {
x[sample(1:nrow(x), size = 10, replace = FALSE), ]
})
do.call(rbind, result)
x y
98 10.32912 52.87113
52 16.42912 46.07026
92 18.85397 46.26403
90 12.04884 57.79290
23 18.20997 40.57904
27 18.98340 52.55919
...
Here's an answer in base borrowing from a custom "rapply" function mentioned here rapply to nested list of data frames in R
df_samples<-list()
i=1
f<-function(x) {
i<<-i+1
df_samples[[i]]<<-x[sample(rownames(x),10),]
}
recurse <- function (L, f) {
if (inherits(L, "data.frame")) {
f(L) }
else lapply(L, recurse, f)
}
recurse(crdOrig, f)
res<-do.call("rbind", df_samples)
I too would flatten the list-of-lists into a standard representation (and do all analysis on the flattened representation, not just the subseting), but keep track of relevant indexing information, e.g.,
flatten_recursive = function(x) {
i <- 0L
.f = function(x, depth) {
if (is.data.frame(x)) {
i <<- i + 1L
cbind(i, depth, x)
} else {
x = lapply(x, .f, depth + 1L)
do.call(rbind, x)
}
}
.f(x, 0L)
}
The internal function .f() visits each element of a list. If the element is a data.frame, it adds a unique identifier to index it. If it's a list, then it calls itself on each element of the list (incrementing a depth counter, in case this is useful, one could also add a 'group' counter) and then row-binds the elements. I use an internal function so that I can have a variable i to increment across function calls. The end result is a single data frame with a index to use for referencing the original results.
> tbl <- flatten_recursive(crdOrig) %>% as_tibble()
> tbl %>% group_by(i, depth) %>% summarize(n())
# A tibble: 4 x 3
# Groups: i [?]
i depth `n()`
<int> <int> <int>
1 1 3 100
2 2 3 100
3 3 2 300
4 4 2 100
> tbl %>% group_by(i) %>% slice(seq(1, n(), by = 10)) %>% summarize(n())
# A tibble: 4 x 2
i `n()`
<int> <int>
1 1 10
2 2 10
3 3 30
4 4 10
The overall pattern of .f() can be adjusted for additional data types, e.g., (some details omitted)
.f <- function(x) {
if (is.data.frame(x)) {
x
} else if (is.matrix(x)) {
x <- as.data.frame(x)
setNames(x, c("x", "y"))
} else {
do.call(rbind, lapply(x, .f))
}
}
Consider a recursive call conditionally checking if first item is a data.frame or list class.
stack_process <- function(lst){
if(class(lst[[1]]) == "data.frame") {
tmp <- lst[[1]]
}
if(class(lst[[1]]) == "list") {
inner <- lapply(lst, stack_process)
tmp <- do.call(rbind, inner)
}
return(tmp)
}
new_crdOrig <- lapply(crdOrig, function(x) {
df <- stack_process(x)
filterInd <- seq(1, nrow(df), by = filterBy)
return(df[filterInd,])
})
final_df <- do.call(rbind, new_crdOrig)
I need to execute this code many times in order to get 45 different matrices at the end: mat[j], j=1:45.
Not sure how to use "for-loop" to achieve that, will be grateful for any tips.
Data files are stored here, year-by-year https://intl-atlas-downloads.s3.amazonaws.com/index.html
library(readstata13)
library(diverse)
library(plyr)
for (j in 1:45) {
dat <- read.dta13(file.choose())
data = aggregate(dat$export_value, by = list(dat$exporter,dat$commoditycode), FUN = sum)
colnames(data) = c("land","product","value")
dt = split(data, f = data$product)
land = as.data.frame(sort(unique(data[, 1])))
nds = seq(1, nrow(land), by = 1)
texmat = cbind(nds, land)
colnames(texmat) = c("num", "land")
for (i in 1:length(unique(data[, 2]))) {
(join(texmat, dt[[i]], by = "land", type = "left")$value)
}
mt = sapply(1:length(unique(data[, 2])), function(i) join(texmat, dt[[i]], by = "land", type = "left")$value)
colnames(mt) = unique(data[, 2])
rownames(mt) = sort(unique(data[, 1]))
mt[is.na(mt)] = 0
rcamat=values(mt, category_row = FALSE, norm = "rca",filter = 1, binary = TRUE)
rcamat[is.na(rcamat)] = 0
tmat = rcamat[rowSums(rcamat) != 0, , drop = TRUE]
mat = t(tmat)
}
It looks like you're almost there with the for loop. You just need to add 2 concepts:
1) Creating a list of matrices to read at the start. A construction like:
filenames <- paste0('H0_',1995:2016,'.dta')
filenames <- c(filenames,paste0('S2_final_',1962:2016,'.dta'))
that creates a vector of the files you want to read will allow you to replace file.choose with something like the following (inside the loop):
dat <- read.dta13(paste0('/path/to/directory/with/files/',filenames[i]))
This way you can grab a new file with each loop iteration.
2) Storing the output matrices at the end of the loop. You can do this either by putting them all in a list, or by using assign to create a collection of objects. I prefer the list approach:
#before the for loop initialize a NULL list:
mats <- NULL
#at the end of the loop, (after mat = t(tmat) but before the close bracket) add this line to add it to the list
mats[[i]] <- mat
This will create a list mats with mats[[1]] holding the first matrix, mats[[2]] holding the second, and so on.
You could alternatively create a bunch of objects like so:
#at the end of the for loop add
assign(paste0('mat_',i),mat)
Which will create mat_1, mat_2, and so on as separate objects. A full implementation would look something like this:
library(readstata13)
library(diverse)
library(plyr)
setwd('/path/to/files/')
filenames <- paste0('H0_',1995:2016,'.dta')
filenames <- c(filenames,paste0('S2_final_',1962:2016,'.dta'))
#you'll have to prune this to the files you actually want, as this list is more than 45
finished_matrices <- NULL
for (j in 1:45) {
dat <- read.dta13(filenames[i]) #pickup
data = aggregate(dat$export_value, by = list(dat$exporter,dat$commoditycode), FUN = sum)
colnames(data) = c("land","product","value")
dt = split(data, f = data$product)
land = as.data.frame(sort(unique(data[, 1])))
nds = seq(1, nrow(land), by = 1)
texmat = cbind(nds, land)
colnames(texmat) = c("num", "land")
for (i in 1:length(unique(data[, 2]))) {
(join(texmat, dt[[i]], by = "land", type = "left")$value)
}
mt = sapply(1:length(unique(data[, 2])), function(i) join(texmat, dt[[i]], by = "land", type = "left")$value)
colnames(mt) = unique(data[, 2])
rownames(mt) = sort(unique(data[, 1]))
mt[is.na(mt)] = 0
rcamat=values(mt, category_row = FALSE, norm = "rca",filter = 1, binary = TRUE)
rcamat[is.na(rcamat)] = 0
tmat = rcamat[rowSums(rcamat) != 0, , drop = TRUE]
mat = t(tmat)
finished_matrices[[i]] <- mat
}
I have to admit I am new to coding functions, wherefore I need your help.
This code shall provide a Bayesian criterion (pBIC) following an ANOVA and automatically read the necessary information from the ANOVA table.
I have two functions
## This is function 1
test_pBIC1 <- function(name,c){ ## name is the name of the ANOVA table, e.g. "ANOVA_ALL_wake" and c is the number of conditions
c = c
data = get(name)
i = length(data$ANOVA$Effect)
result1 = data.frame(name,c,i)
return(result1)
}
## ----------------------------------------------------
## I now run and save the result of Function 1
result1 <- test_pBIC1("ANOVA_ALL_wake",3) ## for test
## ----------------------------------------------------
## This is function 2
test_pBIC2 <- function(result1){
name1 <- as.character(result1$name)
data = get(name1)
count <- as.vector(result1$i)
for (i in 1:count){
s = (data$ANOVA$DFd[i]/data$ANOVA$DFn[i])+1
n = s*(result1[2]-1)
SSE1 = data$ANOVA$SSd[i]
SSE0 = data$ANOVA$SSd[i]+data$ANOVA$SSn[i]
deltaBIC = (n * log(SSE1/SSE0))+(data$ANOVA$DFn[i]*log(n))
BF01 = exp(deltaBIC/2)
pH0_D = (BF01/(1+BF01))
pH1_D = (1-pH0_D)
result = data.frame(pH0_D, pH1_D)
colnames(result) <- c("pH0_D", "pH1_D")
rownames(result) <- c(data$ANOVA$Effect[i])
if (i == 1){
result_all <- result
} else {
result_all <- rbind (result_all, result)
}
}
return(result_all)
}
## ------------------------------------------------------
Now I run function 2 and receive the result
test_pBIC2(result1)
Now while this does it's job, I would like to link the two functions so I just have to give the name and the parameter c and still get result_all in the end, i.e. without having to run the two functions after each other.
I have tried to come up with this solution:
test_pBIC <- function(name,c){ ## pass arguments as: test_pBIC(name = "ANOVA_all_wake", c = 3)
c = c
name = name
result1 = data.frame(name,c)
# return(result1)
test_pBIC1 <- function(result1){
c = as.vector(result1$c)
name1 <- as.character(result1$name)
data = get(name)
i = length(data$ANOVA$Effect)
result2 = data.frame(name,c,i)
# return(result2)
test_pBIC2 <- function(result2){
name1 <- as.character(result2$name)
data = get(name1)
count <- as.numeric(integer$i)
for (i in 1:count){
s = (data$ANOVA$DFd[i]/data$ANOVA$DFn[i])+1
n = s*(result1[2]-1)
SSE1 = data$ANOVA$SSd[i]
SSE0 = data$ANOVA$SSd[i]+data$ANOVA$SSn[i]
deltaBIC = (n * log(SSE1/SSE0))+(data$ANOVA$DFn[i]*log(n))
BF01 = exp(deltaBIC/2)
pH0_D = (BF01/(1+BF01))
pH1_D = (1-pH0_D)
result = data.frame(pH0_D, pH1_D)
colnames(result) <- c("pH0_D", "pH1_D")
rownames(result) <- c(data$ANOVA$Effect[i])
if (i == 1){
result_all <- result
} else {
result_all <- rbind (result_all, result)
}
}
return(result_all)
}
}
}
test_pBIC("ANOVA_all_wake", 3)
However, I just get NOTHING...and I cannot find the mistake :(.
Thanks!!
Not entirely sure what the issue is, a reproducible example would help a lot. If you want to just combine it into one function you could do...
test_overall <- function(name,c) {
c = c
data = get(name)
i = length(data$ANOVA$Effect)
result1 = data.frame(name,c,i)
name1 <- as.character(result1$name)
data = get(name1)
count <- as.vector(result1$i)
for (i in 1:count){
s = (data$ANOVA$DFd[i]/data$ANOVA$DFn[i])+1
n = s*(result1[2]-1)
SSE1 = data$ANOVA$SSd[i]
SSE0 = data$ANOVA$SSd[i]+data$ANOVA$SSn[i]
deltaBIC = (n * log(SSE1/SSE0))+(data$ANOVA$DFn[i]*log(n))
BF01 = exp(deltaBIC/2)
pH0_D = (BF01/(1+BF01))
pH1_D = (1-pH0_D)
result = data.frame(pH0_D, pH1_D)
colnames(result) <- c("pH0_D", "pH1_D")
rownames(result) <- c(data$ANOVA$Effect[i])
if (i == 1){
result_all <- result
} else {
result_all <- rbind (result_all, result)
}
}
return(result_all)
}
In your first code example you've created functions test_pBIC1 and test_pBIC2. If you want to create a function test_pBIC that calls both, you can just define a function that calls both:
test_pBIC <- function(name, c) test_pBIC2(test_pBIC1(name, c))