Alternative to torch.nn.functional.fold when working with 5D inputs - patch

I'm currently working on MRI data where each batch is of dimension:
(Batch Size, Channels, Height, Width, Depth)
and I'm trying to make patches of 6x6x2.
I'm able to do the unfold part, but the torch.nn.functional.fold documentation explicitly indicates that input need to be (BS, C, H, W) and I wonder if a regular view or reshape will preserve the information I computed patchwise.
Here is a code snippet to illustrate the issue.
alpha = 0.5
inputs = torch.randn(1, 1, 96, 96, 32)
bs, channels, heigth, width, depth = inputs.size()
ratio = torch.abs(inputs / alpha).sum(dim=1).reshape(1, 1, height, width, depth)
pSize = (6, 6, 2)
ratioPatches = (
ratio.unfold(2, pSize[0], pSize[0])
.unfold(3, pSize[1], pSize[1])
.unfold(4, pSize[2], pSize[2])
) # size ([1, 1, 16, 16, 16, 6, 6, 2])
ratioSum = (
ratioPatches.sum((5, 6, 7))
.reshape(bs, 1, height // pSize[0], width // pSize[1], depth // pSize[2], 1, 1, 1)
.expand_as(ratioPatches)
)
probsPatches = ratioPatches / ratioSum
probsPatches = (
probsPatches.reshape(
bs,
1,
height // pSize[0],
width // pSize[1],
depth // pSize[2],
pSize[0] * pSize[1] * pSize[2],
)
.permute(0, 1, 5, 2, 3, 4)
.squeeze(1)
.reshape(bs, pSize[0] * pSize[1] * pSize[2], -1)
) # size ([1, 72, 4096])
probs = probsPatches.reshape(bs, 1, height, width, depth) # size ([1, 1, 96, 96, 32])

Related

Plot tree-like data vs time (including 1-child nodes)

I have some simple tree-like edge-data (e.g. data below) with the following characteristics:
there is a root node (0)
all non-root nodes have exactly one parent, but 0-to-many children (including 1)
there is a time t associated with each edge (or equivalently each unique node in i.fr)
we can compute dt as below, if helpful
I want to plot these data as a tree, with time along one dimension, so that edge lengths are proportional to dt (e.g. sketch below). How can I do this in R?
I explored ape and data.tree packages, and ggtree, but none seem to provide interface for creating tree objects from edge lists, and I think my data (with 1-child nodes) are rejected as some types of trees?
Sample Data
tree = data.frame(
t = c( 0, 1, 1, 4, 5, 7),
i.fr = c( 0, 1, 1, 2, 3, 5),
i.to = c( 1, 2, 3, 4, 5, 6),
dt = c(NA, 1, 1, 3, 4, 2))
Fake phylo
fake.phylo = list(
edge = cbind(tree$i.fr,tree$i.to),
tip.label = c('4','6'),
Nnode = 5,
edge.length = tree$dt)
class(fake.phylo) = 'phylo'
phylo.tree = as.phylo(fake.phylo) # works 😈
plot(tree) # (!) tree badly conformed; cannot plot. Check the edge matrix.
Desired Result
Here is an option using ggraph; I have no idea how well this generalises for trees with more than one split.
library(ggraph)
tree %>%
select(i.fr, i.to) %>%
graph_from_data_frame(directed = TRUE) %>%
ggraph() +
geom_node_point() +
geom_edge_link(arrow = arrow(length = unit(4, 'mm')), end_cap = circle(3, 'mm')) +
geom_node_label(aes(label = name)) +
theme_minimal() +
scale_y_continuous(
limits = c(0, max(tree$dt, na.rm = TRUE)),
breaks = c(0:(max(tree$dt, na.rm = TRUE))),
labels = rev(c(0:(max(tree$dt, na.rm = TRUE)))),
minor_breaks = NULL,
position = "left") +
scale_x_continuous(expand = c(0.1, 0.1), breaks = NULL) +
labs(x = "", y = "Time")
You can also force coord_fixed() which gives you a narrower version
# ... Same as before +
coord_fixed()
With help from here, a reasonably full-featured solution is:
library('ggplot2')
.tip.pos <<- 0
.recurse.tree = function(ii,par=0,gen=0){
# recursively walk the tree and extract the following rows:
# index (ordered by tree search), generation, position, n direct children, n total children
b.par = ii[,1]==par
i.chi = ii[b.par,2]
n.chi = length(i.chi)
if (n.chi > 0){
mat.chi = matrix(nrow=5,unlist(lapply(i.chi,function(i){
.recurse.tree(ii=ii[!b.par,,drop=FALSE],par=i,gen=gen+1)
})))
par.pos = mean(range(mat.chi[3,mat.chi[1,] %in% i.chi])) # midpoint of direct children
mat.par.chi = matrix(nrow=5,c(par,gen,par.pos,n.chi,ncol(mat.chi),mat.chi))
} else {
.tip.pos <<- .tip.pos + 1
mat.par = matrix(nrow=5,c(par,gen,.tip.pos,0,0))
}
}
plot.tree = function(tree,...){
# plot a transmission tree vs time
tree.data = .recurse.tree(as.matrix(tree[c('par','chi')]))
tree = rbind(c(-1,-1,0,NA),tree) # append dummy root
tree = tree[match(tree.data[1,],tree$chi),] # reorder to match tree.data
tree$gen = factor(tree.data[2,]) # generation
tree$pos = tree.data[3,] # position
tree$child.direct = tree.data[4,] # n direct children
tree$child.total = tree.data[5,] # n total children
pc.map = match(tree$par,tree$chi) # lookup index for chi -> par
tree$pos.par = tree$pos[pc.map] # parent position
tree$t.par = tree$t[pc.map] # parent t
g = ggplot(tree) +
geom_segment(aes_string(y='t.par',x='pos.par',xend='pos',yend='t'),alpha=.5) +
geom_point(aes_string(x='pos',y='t',...)) +
scale_x_continuous(labels=NULL,breaks=NULL) + labs(x='')
}
tree = data.frame(
t = c( 0, 1, 1, 4, 5, 7),
par = c( 0, 1, 1, 2, 3, 5),
chi = c( 1, 2, 3, 4, 5, 6),
dt = c(NA, 1, 1, 3, 4, 2))
g = plot.tree(tree,color='gen')
ggsave('Rplots.png',w=4,h=4)
Result
Extending
The code above can be easily modified to give some nice results, e.g. with bigger data, custom aes, and ggMarginal:

alternative of for loop in R

cell_support_xyz <- function(level, zero)
{
for(i in 1:level[1]){
for(j in 1:level[2]){
for(k in 1:level[3]){
cat("cell (", i, ", ", j, ", ", k,") --> support set = (",
+!(i == zero[1]), ", ", +!(j == zero[2]), ", ", +!(k == zero[3]), ")\n", sep = "")
}
}
}
}
#Example 1
l<-c(2,3,2)
z<-c(1,1,1)
> cell_support_xyz(l,z)
cell (1, 1, 1) --> support set = (0, 0, 0)
cell (1, 1, 2) --> support set = (0, 0, 1)
cell (1, 2, 1) --> support set = (0, 1, 0)
cell (1, 2, 2) --> support set = (0, 1, 1)
cell (1, 3, 1) --> support set = (0, 1, 0)
cell (1, 3, 2) --> support set = (0, 1, 1)
cell (2, 1, 1) --> support set = (1, 0, 0)
cell (2, 1, 2) --> support set = (1, 0, 1)
cell (2, 2, 1) --> support set = (1, 1, 0)
cell (2, 2, 2) --> support set = (1, 1, 1)
cell (2, 3, 1) --> support set = (1, 1, 0)
cell (2, 3, 2) --> support set = (1, 1, 1)
The above code works just fine. But I want to avoid for loop. Here I used 3 for loops (because the length of both argument vectors is 3). If the length of vectors increases or decreases the function won't work (I need to adjust for loops accordingly); which is why I want to replace for-loop with some efficient alternative that works for any length. Any suggestion?
One way to remove the for loop and making the solution flexible enough for any length input.
We use expand.grid to create all possible combinations of level and use apply rowwise to create a string to print.
cell_support_xyz <- function(level, zero) {
tmp <- do.call(expand.grid, lapply(level, seq))
abc <- apply(tmp, 1, function(x)
cat(sprintf('cell (%s) --> support set = (%s)\n',
toString(x), toString(+(x != zero)))))
}
l<-c(2,3,2)
z<-c(1,1,1)
cell_support_xyz(l, z)
#cell (1, 1, 1) --> support set = (0, 0, 0)
#cell (2, 1, 1) --> support set = (1, 0, 0)
#cell (1, 2, 1) --> support set = (0, 1, 0)
#cell (2, 2, 1) --> support set = (1, 1, 0)
#cell (1, 3, 1) --> support set = (0, 1, 0)
#cell (2, 3, 1) --> support set = (1, 1, 0)
#cell (1, 1, 2) --> support set = (0, 0, 1)
#cell (2, 1, 2) --> support set = (1, 0, 1)
#cell (1, 2, 2) --> support set = (0, 1, 1)
#cell (2, 2, 2) --> support set = (1, 1, 1)
#cell (1, 3, 2) --> support set = (0, 1, 1)
#cell (2, 3, 2) --> support set = (1, 1, 1)
You can do that in 2 steps:
l<-c(2,3,2)
z<-c(1,1,1)
cells <- expand.grid(lapply(l, seq))
t(apply(cells, 1, function(x) 1L*!(x == z)))
cells contains all the combinations. If the order matters, you can simply reorder it:
cells <- dplyr::arrange(cells, Var1, Var2, Var3)
Then, for each row (apply(,1,)) you can use == which is already vectorized to compare the entire row to the entire z vector.
Multiplying by 1L makes it integer, same as +.

Multichannel Convolutional Neural Network in Tensorflow

I have a signal with dimensional = (samples, height, width, channels) and a kernel with dimensional = (filter_height, filter_width, in_channels, out_channels) to use them in a convolution operation as:
tf.nn.conv2d(signal, kernel, strides=[1, 1, 1, 1], padding="VALID")
However, width = filter_width and channels = in_channels because I'm using a text as signal. What I want to check is if I use a multichannel signal is the same as concatenating in the width dimension the multichannel signal and operating as one channel signal (like in the next figure) but the output is different even sharing the kernel values. To explain better, I copy some code here:
import tensorflow as tf
sess = tf.InteractiveSession()
samples = 2
n = 10
We = 5
filter_size = 4
num_filters = 20
embedding1 = tf.Variable(tf.truncated_normal([samples,n,We,1], stddev=0.1))
embedding2 = tf.Variable(tf.truncated_normal([samples,n,We,1], stddev=0.1))
multi_X = tf.concat([embedding1, embedding2], 3)#shape=(2, 10, 5, 2)
concat_X = tf.concat([embedding1, embedding2], 2)#shape=(2, 10, 10, 1)
W1 = tf.Variable(tf.truncated_normal([filter_size, We, 1, num_filters], stddev=0.1))
W2 = tf.Variable(tf.truncated_normal([filter_size, We, 1, num_filters], stddev=0.1))
multi_W = tf.concat([W1, W2], 2)#shape=(4, 5, 2, 20)
concat_W = tf.concat([W1, W2], 1)#shape=(4, 10, 1, 20)
multi_conv = tf.nn.conv2d(multi_X, multi_W, strides=[1, 1, 1, 1], padding="VALID")#shape=(2, 7, 1, 20)
concat_conv = tf.nn.conv2d(concat_X, concat_W, strides=[1, 1, 1, 1], padding="VALID")#shape=(2, 7, 1, 20)
init_op = tf.global_variables_initializer()
sess.run(init_op)
#sess.run(multi_conv) == sess.run(concat_conv) may be True

Optimizing an optimization

I have a discrete data set with multiple peaks. I am trying to generate an automatic method for fitting a Gaussian curve to an unknown number of data points. The ultimate goal is to provide a measure of uncertainty on the location (x-axis) of the peak in the y-axis, using the sigma value of a best-fit Gaussian curve. The full data set has a half dozen or so unique peaks of various shapes.
Here is a sample data set.
working <- data.frame(age = seq(1, 50), likelihood = c())
likelihood = c(10, 10, 10, 10, 10, 12, 14, 16, 17, 18,
19, 20, 19, 18, 17, 16, 14, 12, 11, 10,
10, 9, 8, 8, 8, 8, 7, 6, 6, 6))
Here is the Gaussian fitting procedure. I found it on SO, but I can't find the page I took it from again, so please forgive the lack of link and citation.
fitG =
function(x,y,mu,sig,scale)
f = function(p){
d = p[3] * dnorm( x, mean = p[ 1 ], sd = p[ 2 ] )
sum( ( d - y ) ^ 2)
}
optim( c( mu, sig, scale ), f )
}
This works well if I pre-define the area to fit. For instance taking only the area around the peak and using input mean = 10, sigma = 5, and scale = 1:
work2 <- work[5:20, ]
fit1 <- fitG(work2$age, work2$likelihood, 10, 5, 1)
fitpar1 <- fit1$par
plot(work2$age, work2$likelihood, pch = 20)
lines(work2$age, fitpar1[3]*dnorm(work2$age, fitpar1[1], fitpar1[2]))
However, I am interested in automating the procedure in some way, where I define the peak centers for the whole data set using peakwindow from the cardidates package. The ideal function would then iterate the number of data points used in the fit around a given peak in order to optimize the Gaussian parameters. Here is my attempt:
fitG.2 <- function (x, y) {
g <- function (z) {
newdata <- x[(y - 1 - z) : (y + 1 + z), ]
newfit <- fitG( newdata$age, newdata$likelihood, 10, 5, 1)
}
optimize( f = g, interval = c(seq(1, 100)))
}
However, I can't get this type of function to actually work (an error I can't solve). I have also tried creating a function with a for loop and setting break parameters but this method does not work consistently for peaks with widely varying shape parameters. There are likely many other R functions unknown to me that do exactly this.

Sort a vector where the largest is at the center in r

I know this is a simple question, but I have searched everywhere and I am pretty sure that there is no answer to my question.
I want to sort a vector where the largest is in the middle and goes to to tails when the values go down.
For example:
c( 20, 30, 40, 50, 60)
I want to have:
c(20, 40, 60, 50, 30 ) or c(30, 50, 60, 40, 20 )
It does not matter.
Can anyone offer me a quick solution?
Thanks!
This is much easier to solve if you assume that you have 2n (n is a natural number) distinct observations. Here is one solution:
ints = sample.int(100, size = 30, replace = FALSE)
ints_o = ints[order(ints)]
ints_tent = c(ints_o[seq.int(from = 1, to = (length(ints) - 1), by = 2)],
rev(ints_o[seq.int(from = 2, to = length(ints), by = 2)]))
Edit:
Here is function that deals with both cases:
makeTent = function(ints) {
ints_o = ints[order(ints)]
if((length(ints) %% 2) == 0) {
# even number of observations
ints_tent = c(ints_o[seq.int(from = 1, to = (length(ints) - 1), by = 2)],
rev(ints_o[seq.int(from = 2, to = length(ints), by = 2)]))
} else {
# odd number of observations
ints_tent = c(ints_o[seq.int(from = 2, to = (length(ints) - 1), by = 2)],
rev(ints_o[seq.int(from = 1, to = length(ints), by = 2)]))
}
return(ints_tent)
}
# test the function
ints_even = sample.int(100, size = 30, replace = FALSE)
ints_odd = sample.int(100, size = 31, replace = FALSE)
makeTent(ints_odd)
makeTent(ints_even)

Resources