I am trying to use scipy.integrate.quad with Tensorflow as following.
time and Lambda are two Tensors with shape (None, 1).
def f_t(self, time, Lambda):
h = Lambda * self.shape * time ** (self.shape - 1)
S = tf.exp(-1 * Lambda * time ** self.shape)
return h * S
def left_censoring(self, time, Lambda):
return tf.map_fn(lambda x: integrate.quad(self.f_t,
0.0,
x[0], # it is not a float before evaluation
args=(x[1],)),
tf.concat([time, Lambda], 1))
However, I get an error as below:
File "J:\Workspace\Distributions.py", line 30, in <lambda>
args=(x[1],)),
File "I:\Anaconda3\envs\tensorflow\lib\site-packages\scipy\integrate\quadpack.py", line 323, in quad
points)
File "I:\Anaconda3\envs\tensorflow\lib\site-packages\scipy\integrate\quadpack.py", line 388, in _quad
return _quadpack._qagse(func,a,b,args,full_output,epsabs,epsrel,limit)
TypeError: a float is required
X[0] is a Tensor with shape=(). It is not a float value before evaluation. Is it possible to solve the problem? How should I calculate integration in Tensorflow?
If you have at least TensorFlow 1.8.0, you're probably best off using tf.contrib.integrate.odeint_fixed() like this code (tested):
from __future__ import print_function
import tensorflow as tf
assert tf.VERSION >= "1.8.0", "This code only works with TensorFlow 1.8.0 or later."
def f( y, a ):
return a * a
x = tf.constant( [ 0.0, 1.0, 2, 3, 4 ], dtype = tf.float32 )
i = tf.contrib.integrate.odeint_fixed( f, 0.0, x, method = "rk4" )
with tf.Session() as sess:
res = sess.run( i )
print( res )
will output:
[ 0. 0.33333334 2.6666667 9. 21.333334 ]
properly integrating x2 over the intervals of [ 0, 0 ], [ 0, 1 ], [ 0, 2 ], [ 0, 3 ], and [ 0, 4 ] as per x = [ 0, 1, 2, 3, 4 ] above. (The primitive function of x2 is ⅓ x3, so for example 43 / 3 = 64/3 = 21 ⅓.)
Otherwise, for earlier TensorFlow versions, here's how to fix your code.
So the main issue is that you have to use tf.py_func() to map a Python function (scipy.integrate.quad() in this case) on a tensor. tf.map_fn() will map other TensorFlow operations and passes and expects tensors as operands. Therefore x[ 0 ] will never be a simple float, it will be a scalar tensor and scipy.integrate.quad() will not know what to do with that.
You can't completely get rid of tf.map_fn() either, unless you want to manually loop over numpy arrays.
Furthermore, scipy.integrate.quad() returns a double (float64), whereas your tensors are float32.
I've simplified your code a lot, because I don't have access to the rest of it and it looks too complicated compared to the core of this question. The following code (tested):
from __future__ import print_function
import tensorflow as tf
from scipy import integrate
def f( a ):
return a * a
def integrated( f, x ):
return tf.map_fn( lambda y: tf.py_func(
lambda z: integrate.quad( f, 0.0, z )[ 0 ], [ y ], tf.float64 ),
x )
x = tf.constant( [ 1.0, 2, 3, 4 ], dtype = tf.float64 )
i = integrated( f, x )
with tf.Session() as sess:
res = sess.run( i )
print( res )
will also output:
[ 0.33333333 2.66666667 9. 21.33333333]
Related
from torchvision_starter.engine import train_one_epoch, evaluate
from torchvision_starter import utils
import multiprocessing
import time
n_cpu = multiprocessing.cpu_count()
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
_ = model.to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
step_size=3,
gamma=0.2,
verbose=True
)
# Let's train for 10 epochs
num_epochs = 1
start = time.time()
for epoch in range(10, 10 + num_epochs):
# train for one epoch, printing every 10 iterations
train_one_epoch(model, optimizer, data_loaders['train'], device, epoch, print_freq=10)
# update the learning rate
lr_scheduler.step()
# evaluate on the validation dataset
evaluate(model, data_loaders['valid'], device=device)
stop = time.time()
print(f"\n\n{num_epochs} epochs in {stop - start} s ({(stop-start) / 3600:.2f} hrs)")
Before I move on to this part, everything is OK. But after I run the part, the error is like below:
I have tried to add drop_last to the helper.py's function like:
data_loaders["train"] = torch.utils.data.DataLoader(
train_data,
batch_size=batch_size,
sampler=train_sampler,
num_workers=num_workers,
collate_fn=utils.collate_fn,
drop_last=True
)
But it doesn't work. By the way, the torch and torchvision are compatible and Cuda is available.
I wonder how to fix it.
The get_data_loaders function:
def get_data_loaders(
folder, batch_size: int = 2, valid_size: float = 0.2, num_workers: int = -1, limit: int = -1, thinning: int = None
):
"""
Create and returns the train_one_epoch, validation and test data loaders.
:param foder: folder containing the dataset
:param batch_size: size of the mini-batches
:param valid_size: fraction of the dataset to use for validation. For example 0.2
means that 20% of the dataset will be used for validation
:param num_workers: number of workers to use in the data loaders. Use -1 to mean
"use all my cores"
:param limit: maximum number of data points to consider
:param thinning: take every n-th frame, instead of all frames
:return a dictionary with 3 keys: 'train_one_epoch', 'valid' and 'test' containing respectively the
train_one_epoch, validation and test data loaders
"""
if num_workers == -1:
# Use all cores
num_workers = multiprocessing.cpu_count()
# We will fill this up later
data_loaders = {"train": None, "valid": None, "test": None}
# create 3 sets of data transforms: one for the training dataset,
# containing data augmentation, one for the validation dataset
# (without data augmentation) and one for the test set (again
# without augmentation)
data_transforms = {
"train": get_transform(UdacitySelfDrivingDataset.mean, UdacitySelfDrivingDataset.std, train=True),
"valid": get_transform(UdacitySelfDrivingDataset.mean, UdacitySelfDrivingDataset.std, train=False),
"test": get_transform(UdacitySelfDrivingDataset.mean, UdacitySelfDrivingDataset.std, train=False),
}
# Create train and validation datasets
train_data = UdacitySelfDrivingDataset(
folder,
transform=data_transforms["train"],
train=True,
thinning=thinning
)
# The validation dataset is a split from the train_one_epoch dataset, so we read
# from the same folder, but we apply the transforms for validation
valid_data = UdacitySelfDrivingDataset(
folder,
transform=data_transforms["valid"],
train=True,
thinning=thinning
)
# obtain training indices that will be used for validation
n_tot = len(train_data)
indices = torch.randperm(n_tot)
# If requested, limit the number of data points to consider
if limit > 0:
indices = indices[:limit]
n_tot = limit
split = int(math.ceil(valid_size * n_tot))
train_idx, valid_idx = indices[split:], indices[:split]
# define samplers for obtaining training and validation batches
train_sampler = torch.utils.data.SubsetRandomSampler(train_idx)
valid_sampler = torch.utils.data.SubsetRandomSampler(valid_idx) # =
# prepare data loaders
data_loaders["train"] = torch.utils.data.DataLoader(
train_data,
batch_size=batch_size,
sampler=train_sampler,
num_workers=num_workers,
collate_fn=utils.collate_fn,
drop_last=True
)
data_loaders["valid"] = torch.utils.data.DataLoader(
valid_data, # -
batch_size=batch_size, # -
sampler=valid_sampler, # -
num_workers=num_workers, # -
collate_fn=utils.collate_fn,
drop_last=True
)
# Now create the test data loader
test_data = UdacitySelfDrivingDataset(
folder,
transform=data_transforms["test"],
train=False,
thinning=thinning
)
if limit > 0:
indices = torch.arange(limit)
test_sampler = torch.utils.data.SubsetRandomSampler(indices)
else:
test_sampler = None
data_loaders["test"] = torch.utils.data.DataLoader(
test_data,
batch_size=batch_size,
shuffle=False,
num_workers=num_workers,
sampler=test_sampler,
collate_fn=utils.collate_fn,
drop_last=True
# -
)
return data_loaders
class UdacitySelfDrivingDataset(torch.utils.data.Dataset):
# Mean and std of the dataset to be used in nn.Normalize
mean = torch.tensor([0.3680, 0.3788, 0.3892])
std = torch.tensor([0.2902, 0.3069, 0.3242])
def __init__(self, root, transform, train=True, thinning=None):
super().__init__()
self.root = os.path.abspath(os.path.expandvars(os.path.expanduser(root)))
self.transform = transform
# load datasets
if train:
self.df = pd.read_csv(os.path.join(self.root, "labels_train.csv"))
else:
self.df = pd.read_csv(os.path.join(self.root, "labels_test.csv"))
# Index by file id (i.e., a sequence of the same length as the number of images)
codes, uniques = pd.factorize(self.df['frame'])
if thinning:
# Take every n-th rows. This makes sense because the images are
# frames of videos from the car, so we are essentially reducing
# the frame rate
thinned = uniques[::thinning]
idx = self.df['frame'].isin(thinned)
print(f"Keeping {thinned.shape[0]} of {uniques.shape[0]} images")
print(f"Keeping {idx.sum()} objects out of {self.df.shape[0]}")
self.df = self.df[idx].reset_index(drop=True)
# Recompute codes
codes, uniques = pd.factorize(self.df['frame'])
self.n_images = len(uniques)
self.df['image_id'] = codes
self.df.set_index("image_id", inplace=True)
self.classes = ['car', 'truck', 'pedestrian', 'bicyclist', 'light']
self.colors = ['cyan', 'blue', 'red', 'purple', 'orange']
#property
def n_classes(self):
return len(self.classes)
def __getitem__(self, idx):
if idx in self.df.index:
row = self.df.loc[[idx]]
else:
return KeyError(f"Element {idx} not in dataframe")
# load images fromm file
img_path = os.path.join(self.root, "images", row['frame'].iloc[0])
img = Image.open(img_path).convert("RGB")
# Exclude bogus boxes with 0 height or width
h = row['ymax'] - row['ymin']
w = row['xmax'] - row['xmin']
filter_idx = (h > 0) & (w > 0)
row = row[filter_idx]
# get bounding box coordinates for each mask
boxes = row[['xmin', 'ymin', 'xmax', 'ymax']].values
# convert everything into a torch.Tensor
boxes = torch.as_tensor(boxes, dtype=torch.float32)
# get the labels
labels = torch.as_tensor(row['class_id'].values, dtype=int)
image_id = torch.tensor([idx])
area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
# assume no crowd for everything
iscrowd = torch.zeros((row.shape[0],), dtype=torch.int64)
target = {}
target["boxes"] = boxes
target["labels"] = labels
target["image_id"] = image_id
target["area"] = area
target["iscrowd"] = iscrowd
if self.transform is not None:
img, target = self.transform(img, target)
return img, target
def __len__(self):
return self.n_images
def plot(self, idx, renormalize=True, predictions=None, threshold=0.5, ax=None):
image, label_js = self[idx]
if renormalize:
# Invert the T.Normalize transform
unnormalize = T.Compose(
[
T.Normalize(mean = [ 0., 0., 0. ], std = 1 / type(self).std),
T.Normalize(mean = -type(self).mean, std = [ 1., 1., 1. ])
]
)
image, label_js = unnormalize(image, label_js)
if ax is None:
fig, ax = plt.subplots(figsize=(8, 8))
_ = ax.imshow(torch.permute(image, [1, 2, 0]))
for i, box in enumerate(label_js['boxes']):
xy = (box[0], box[1])
h, w = (box[2] - box[0]), (box[3] - box[1])
r = patches.Rectangle(xy, h, w, fill=False, color=self.colors[label_js['labels'][i]-1], lw=2, alpha=0.5)
ax.add_patch(r)
if predictions is not None:
# Make sure the predictions are on the CPU
for k in predictions:
predictions[k] = predictions[k].detach().cpu().numpy()
for i, box in enumerate(predictions['boxes']):
if predictions['scores'][i] > threshold:
xy = (box[0], box[1])
h, w = (box[2] - box[0]), (box[3] - box[1])
r = patches.Rectangle(xy, h, w, fill=False, color=self.colors[predictions['labels'][i]-1], lw=2, linestyle=':')
ax.add_patch(r)
_ = ax.axis("off")
return ax
I am trying to write an explicit Successive Overrelaxation Function over a 2D matrix. In this case for an electrostatic potential.
When trying to optimize this in Cython I seem to get an error that I am not quite sure I understand.
%%cython
cimport cython
import numpy as np
cimport numpy as np
from libc.math cimport pi
#SOR function
#cython.boundscheck(False)
#cython.wraparound(False)
#cython.initializedcheck(False)
#cython.nonecheck(False)
def SOR_potential(np.float64_t[:, :] potential, mask, int max_iter, float error_threshold, float alpha):
#the ints
cdef int height = potential.shape[0]
cdef int width = potential.shape[1] #more general non quadratic
cdef int it = 0
#the floats
cdef float error = 0.0
cdef float sor_adjustment
#the copy array we will iterate over and return
cdef np.ndarray[np.float64_t, ndim=2] input_matrix = potential.copy()
#set the ideal alpha if user input is 0.0
if alpha == 0.0:
alpha = 2/(1+(pi/((height+width)*0.5)))
#start the SOR loop. The for loops omit the 0 and -1 index\
#because they are *shadow points* used for neuman boundary conditions\
cdef int row, col
#iteration loop
while True:
#2-stencil loop
for row in range(1, height-1):
for col in range(1, width-1):
if not(mask[row][col]):
potential[row][col] = 0.25*(input_matrix[row-1][col] + \
input_matrix[row+1][col] + \
input_matrix[row][col-1] + \
input_matrix[row][col+1])
sor_adjustment = alpha * (potential[row][col] - input_matrix[row][col])
input_matrix[row][col] = sor_adjustment + input_matrix[row][col]
error += np.abs(input_matrix[row][col] - potential[row][col])
#by the end of this loop input_matrix and potential have diff values
if error<error_threshold:
break
elif it>max_iter:
break
else:
error = 0
it = it + 1
return input_matrix, error, it
and I used a very simple example for an array to see if it would give an error output.
test = [[True, False], [True, False]]
pot = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float64)
SOR_potential(pot, test, 50, 0.1, 0.0)
Gives out this error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In [30], line 1
----> 1 SOR_potential(pot, test, 50, 0.1, 0.0)
File _cython_magic_6c09a5060df996862b8e35adacc0e25c.pyx:21, in _cython_magic_6c09a5060df996862b8e35adacc0e25c.SOR_potential()
TypeError: Cannot convert _cython_magic_6c09a5060df996862b8e35adacc0e25c._memoryviewslice to numpy.ndarray
But when I delete the np.float64_t[:, :] part from
def SOR_potential(np.float64_t[:, :] potential,...)
the code works. Of course, the simple 2x2 matrix will not converge but it gives no errors. Where is the mistake here?
I also tried importing the modules differently as suggested here
Cython: how to resolve TypeError: Cannot convert memoryviewslice to numpy.ndarray?
but I got 2 errors instead of 1 where there were type mismatches.
Note: I would also like to ask, how would I define a numpy array of booleans to put in front of the "mask" input in the function?
A minimal reproducible example of your error message would look like this:
def foo(np.float64_t[:, :] A):
cdef np.ndarray[np.float64_t, ndim=2] B = A.copy()
# ... do something with B ...
return B
The problem is, that A is a memoryview while B is a np.ndarray. If both A and B are memoryviews, i.e.
def foo(np.float64_t[:, :] A):
cdef np.float64_t[:, :] B = A.copy()
# ... do something with B ...
return np.asarray(B)
your example will compile without errors. Note that you then need to call np.asarray if you want to return a np.ndarray.
Regarding your second question: You could use a memoryview with dtype np.uint8_t
def foo(np.float64_t[:, :] A, np.uint8_t[:, :] mask):
cdef np.float64_t[:, :] B = A.copy()
# ... do something with B and mask ...
return np.asarray(B)
and call it like this from Python:
mask = np.array([[True, True], [False, False]], dtype=bool)
A = np.ones((2,2), dtype=np.float64)
foo(A, mask)
PS: If your array's buffers are guaranteed to be C-Contiguous, you can use contiguous memoryviews for better performance:
def foo(np.float64_t[:, ::1] A, np.uint8_t[:, ::1] mask):
cdef np.float64_t[:, ::1] B = A.copy()
# ... do something with B and mask ...
return np.asarray(B)
I am using three-dimensional convolution links (with ConvolutionND) in my chain.
The forward computation run smoothly (I checked intermediate result shapes to be sure I understood correctly the meaning of the parameters of convolution_nd), but during the backward a CuDNNError is raised with the message CUDNN_STATUS_NOT_SUPPORTED.
The cover_all parameter of ConvolutionND as its default value of False, so from the doc I don't see what can be the cause of the error.
Here is how I defind one of the convolution layers :
self.conv1 = chainer.links.ConvolutionND(3, 1, 4, (3, 3, 3)).to_gpu(self.GPU_1_ID)
And the call stack is
File "chainer/function_node.py", line 548, in backward_accumulate
gxs = self.backward(target_input_indexes, grad_outputs)
File "chainer/functions/connection/convolution_nd.py", line 118, in backward
gy, W, stride=self.stride, pad=self.pad, outsize=x_shape)
File "chainer/functions/connection/deconvolution_nd.py", line 310, in deconvolution_nd
y, = func.apply(args)
File chainer/function_node.py", line 258, in apply
outputs = self.forward(in_data)
File "chainer/functions/connection/deconvolution_nd.py", line 128, in forward
return self._forward_cudnn(x, W, b)
File "chainer/functions/connection/deconvolution_nd.py", line 105, in _forward_cudnn
tensor_core=tensor_core)
File "cupy/cudnn.pyx", line 881, in cupy.cudnn.convolution_backward_data
File "cupy/cuda/cudnn.pyx", line 975, in cupy.cuda.cudnn.convolutionBackwardData_v3
File "cupy/cuda/cudnn.pyx", line 461, in cupy.cuda.cudnn.check_status
cupy.cuda.cudnn.CuDNNError: CUDNN_STATUS_NOT_SUPPORTED
So are there special points to take care of when using ConvolutionND ?
A failing code is for instance :
import chainer
from chainer import functions as F
from chainer import links as L
from chainer.backends import cuda
import numpy as np
import cupy as cp
chainer.global_config.cudnn_deterministic = False
NB_MASKS = 60
NB_FCN = 3
NB_CLASS = 17
class MFEChain(chainer.Chain):
"""docstring for Wavelphasenet."""
def __init__(self,
FCN_Dim,
gpu_ids=None):
super(MFEChain, self).__init__()
self.GPU_0_ID, self.GPU_1_ID = (0, 1) if gpu_ids is None else gpu_ids
with self.init_scope():
self.conv1 = chainer.links.ConvolutionND(3, 1, 4, (3, 3, 3)).to_gpu(
self.GPU_1_ID
)
def __call__(self, inputs):
### Pad input ###
processed_sequences = []
for convolved in inputs:
## Transform to sequences)
copy = convolved if self.GPU_0_ID == self.GPU_1_ID else F.copy(convolved, self.GPU_1_ID)
processed_sequences.append(copy)
reprocessed_sequences = []
with cuda.get_device(self.GPU_1_ID):
for convolved in processed_sequences:
convolved = F.expand_dims(convolved, 0)
convolved = F.expand_dims(convolved, 0)
convolved = self.conv1(convolved)
reprocessed_sequences.append(convolved)
states = F.vstack(reprocessed_sequences)
logits = states
ret_logits = logits if self.GPU_0_ID == self.GPU_1_ID else F.copy(logits, self.GPU_0_ID)
return ret_logits
def mfe_test():
mfe = MFEChain(150)
inputs = list(
chainer.Variable(
cp.random.randn(
NB_MASKS,
11,
in_len,
dtype=cp.float32
)
) for in_len in [53248]
)
val = mfe(inputs)
grad = cp.ones(val.shape, dtype=cp.float32)
val.grad = grad
val.backward()
for i in inputs:
print(i.grad)
if __name__ == "__main__":
mfe_test()
cupy.cuda.cudnn.convolutionBackwardData_v3 is incompatible with some specific parameters, as described in an issue in official github.
Unfortunately, the issue only dealt with deconvolution_2d.py (not deconvolution_nd.py), therefore the decision-making about whether cudnn is used or not failed in your case, I guess.
you can check your parameter by confirming
check whether dilation parameter (!=1) or group parameter (!=1) is passed to the convolution.
print chainer.config.cudnn_deterministic, configuration.config.autotune, and configuration.config.use_cudnn_tensor_core.
Further support may be obtained by raising an issue in the official github.
The code you showed is much complicated.
To clarify the problem, the code below would help.
from chainer import Variable, Chain
from chainer import links as L
from chainer import functions as F
import numpy as np
from six import print_
batch_size = 1
in_channel = 1
out_channel = 1
class MyLink(Chain):
def __init__(self):
super(MyLink, self).__init__()
with self.init_scope():
self.conv = L.ConvolutionND(3, 1, 1, (3, 3, 3), nobias=True, initialW=np.ones((in_channel, out_channel, 3, 3, 3)))
def __call__(self, x):
return F.sum(self.conv(x))
if __name__ == "__main__":
my_link = MyLink()
my_link.to_gpu(0)
batch = Variable(np.ones((batch_size, in_channel, 3, 3, 3)))
batch.to_gpu(0)
loss = my_link(batch)
loss.backward()
print_(batch.grad)
Could it be possible to write in FFL a version of filter that stops filtering after the first negative match, i.e. the remaining items are assumed to be positive matches? more generally, a filter.
Example:
removeMaxOf1([1,2,3,4], value>=2)
Expected Result:
[1,3,4]
This seems like something very difficult to write in a pure functional style. Maybe recursion or let could acheive it?
Note: the whole motivation for this question was hypothesizing about micro-optimizations. so performance is very relevant. I am also looking for something that is generally applicable to any data type, not just int.
I have recently added find_index to the engine which allows this to be done easily:
if(n = -1, [], list[:n] + list[n+1:])
where n = find_index(list, value<2)
where list = [1,2,3,4]
find_index will return the index of the first match, or -1 if no match is found. There is also find_index_or_die which returns the index of the first match, asserting if none is found for when you're absolutely certain there is an instance in the list.
You could also implement something like this using recursion:
def filterMaxOf1(list ls, function(list)->bool pred, list result=[]) ->list
base ls = []: result
base not pred(ls[0]): result + ls[1:]
recursive: filterMaxOf1(ls[1:], pred, result + [ls[0]])
Of course recursion can! :D
filterMaxOf1(input, target)
where filterMaxOf1 = def
([int] l, function f) -> [int]
if(size(l) = 0,
[],
if(not f(l[0]),
l[1:],
flatten([
l[0],
recurse(l[1:], f)
])
)
)
where input = [
1, 2, 3, 4, ]
where target = def
(int i) -> bool
i < 2
Some checks:
--> filterOfMax1([1, ]) where filterOfMax1 = [...]
[1]
--> filterOfMax1([2, ]) where filterOfMax1 = [...]
[]
--> filterOfMax1([1, 2, ]) where filterOfMax1 = [...]
[1]
--> filterOfMax1([1, 2, 3, 4, ]) where filterOfMax1 = [...]
[1, 3, 4]
This flavor loses some strong type safety, but is nearer to tail recursion:
filterMaxOf1(input, target)
where filterMaxOf1 = def
([int] l, function f) -> [int]
flatten(filterMaxOf1i(l, f))
where filterMaxOf1i = def
([int] l, function f) -> [any]
if(size(l) = 0,
[],
if(not f(l[0]),
l[1:],
[
l[0],
recurse(l[1:], f)
]
)
)
where input = [
1, 2, 3, 4, ]
where target = def
(int i) -> bool
i < 2
Using R, it is very easy to approximate basic functions through a neural network:
library(nnet)
x <- sort(10*runif(50))
y <- sin(x)
nn <- nnet(x, y, size=4, maxit=10000, linout=TRUE, abstol=1.0e-8, reltol = 1.0e-9, Wts = seq(0, 1, by=1/12) )
plot(x, y)
x1 <- seq(0, 10, by=0.1)
lines(x1, predict(nn, data.frame(x=x1)), col="green")
predict( nn , data.frame(x=pi/2) )
A simple neural network with one hidden layer of a mere 4 neurons is sufficient to approximate a sine. (As per stackoverflow question Approximating function with Neural Network.)
But I cannot obtain the same in PyTorch.
In fact, the neural network created by R contains not only an input, four hidden and an output, but also two "bias" neurons - the first connected towards the hidden layer, the second towards the output.
The plot above is obtained through the following:
library(devtools)
library(scales)
library(reshape)
source_url('https://gist.github.com/fawda123/7471137/raw/cd6e6a0b0bdb4e065c597e52165e5ac887f5fe95/nnet_plot_update.r')
plot.nnet(nn$wts,struct=nn$n, pos.col='#007700',neg.col='#FF7777') ### this plots the graph
plot.nnet(nn$wts,struct=nn$n, pos.col='#007700',neg.col='#FF7777', wts.only=1) ### this prints the weights
Attempting the same with PyTorch produces a different network: the bias neurons are missing.
Following is an attempt to do in PyTorch what was done previously in R. The results will not be satisfactory: the function is not approximated. The most evident difference is that absence of the bias neurons.
import torch
from torch.autograd import Variable
import random
import math
N, D_in, H, D_out = 1000, 1, 4, 1
l_x = []
l_y = []
for a in range(1000):
r = random.random()*10
l_x.append( [r] )
l_y.append( [math.sin(r)] )
tx = torch.cuda.FloatTensor(l_x)
ty = torch.cuda.FloatTensor(l_y)
x = Variable(tx, requires_grad=False)
y = Variable(ty, requires_grad=False)
w1 = Variable(torch.randn(D_in, H ).type(torch.cuda.FloatTensor), requires_grad=True)
w2 = Variable(torch.randn(H, D_out).type(torch.cuda.FloatTensor), requires_grad=True)
learning_rate = 1e-5
for t in range(1000):
y_pred = x.mm(w1).clamp(min=0).mm(w2)
loss = (y_pred - y).pow(2).sum()
if t<10 or t%100==1: print(t, loss.data[0])
loss.backward()
w1.data -= learning_rate * w1.grad.data
w2.data -= learning_rate * w2.grad.data
w1.grad.data.zero_()
w2.grad.data.zero_()
t = [ [math.pi] ]
print( str(t) +" -> "+ str( (Variable(torch.cuda.FloatTensor( t ))).mm(w1).clamp(min=0).mm(w2).data ) )
t = [ [math.pi/2] ]
print( str(t) +" -> "+ str( (Variable(torch.cuda.FloatTensor( t ))).mm(w1).clamp(min=0).mm(w2).data ) )
How to make the network approximate to the given function (sine in this case), through either inserting the "bias" neurons or other missing detail?
Moreover: I have difficulties in understanding why R inserts the "bias". I found information that the bias could be akin to the "Intercept in a Regression Model" - I still find it not clear. Any information would be appreciated.
EDIT: an excellent explanation turned out to be at stackoverflow question Role of Bias in Neural Networks
EDIT:
An example to obtain the result, though using the "fuller" framework ("not reinventing the wheel") is as follows:
import torch
from torch.autograd import Variable
import torch.nn.functional as F
import math
N, D_in, H, D_out = 1000, 1, 4, 1
l_x = []
l_y = []
for a in range(1000):
t = (a/1000.0)*10
l_x.append( [t] )
l_y.append( [math.sin(t)] )
x = Variable( torch.FloatTensor(l_x) )
y = Variable( torch.FloatTensor(l_y) )
class Net(torch.nn.Module):
def __init__(self, n_feature, n_hidden, n_output):
super(Net, self).__init__()
self.to_hidden = torch.nn.Linear(n_feature, n_hidden)
self.to_output = torch.nn.Linear(n_hidden, n_output)
def forward(self, x):
x = self.to_hidden(x)
x = F.tanh(x) # activation function
x = self.to_output(x)
return x
net = Net(n_feature = D_in, n_hidden = H, n_output = D_out)
learning_rate = 0.01
optimizer = torch.optim.Adam( net.parameters() , lr=learning_rate )
for t in range(1000):
y_pred = net(x)
loss = (y_pred - y).pow(2).sum()
if t<10 or t%100==1: print(t, loss.data[0])
loss.backward()
optimizer.step()
optimizer.zero_grad()
t = [ [math.pi] ]
print( str(t) +" -> "+ str( net( Variable(torch.FloatTensor( t )) ) ) )
t = [ [math.pi/2] ]
print( str(t) +" -> "+ str( net( Variable(torch.FloatTensor( t )) ) ) )
Unfortunately, while this code works properly, it does not solve the matter of making the original, more "low level" code work as expected (e.g. introducing the bias).
Following up on #jdhao's comment - this is a super-simple PyTorch model that computes exactly what you want:
class LinearWithInputBias(nn.Linear):
def __init__(self, in_features, out_features, out_bias=True, in_bias=True):
nn.Linear.__init__(self, in_features, out_features, out_bias)
if in_bias:
in_bias = torch.zeros(1, out_features)
# in_bias.normal_() # if you want it to be randomly initialized
self._out_bias = nn.Parameter(in_bias)
def forward(self, x):
out = nn.Linear.forward(self, x)
try:
out = out + self._out_bias
except AttributeError:
pass
return out
However, there's an additional bug in your code: from what I can see, you don't train it - i.e. you do not call an optimizer (like torch.optim.SGD(mod.parameters()) before you delete the gradient information by calling grad.data.zero_().