how to build a custom bidirectional encoder for seq2seq with tf2? - recurrent-neural-network

class Encoder(tf.keras.Model):
def __init__(self, vocab_size, embedding_dim, enc_units, batch_sz):
super(Encoder, self).__init__()
self.batch_sz = batch_sz
self.enc_units = enc_units
self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
self.gru = tf.keras.layers.GRU(self.enc_units,
return_sequences=True,
return_state=True,
recurrent_initializer='glorot_uniform')
self.bigru=tf.keras.layers.Bidirectional(tf.keras.layers.GRU(self.enc_units,
return_sequences=True,
return_state=True, recurrent_initializer='glorot_uniform'))
def call(self, x):
x = self.embedding(x)
# output, state = self.gru(x)
output, state = self.bigru(x)
return output, state
For the above code, when I used the gru layer, it worked. But when I used the bigru layer, I got the following error:
ValueError: in converted code:
<ipython-input-51-3ba1fe0beb05>:8 train_step_seq2seq *
enc_output, enc_hidden = encoder(inp)
/tensorflow-2.0.0/python3.6/tensorflow_core/python/keras/engine/base_layer.py:847 __call__
outputs = call_fn(cast_inputs, *args, **kwargs)
<ipython-input-53-4f1b00e47a9a>:22 call *
output, state = self.bidir(x)
ValueError: too many values to unpack (expected 2)
So I'm now wondering what is going on here?

It is not well-documented, but the bidirectional layer (unlike the single direction RNN layer) returns a three-tuple:
concatenated states of the forward and backward RNN (shape: batch × length × 2 GRU dimentsion)
final state of the forward RNN (shape: batch × batch dimension)
final state of the backward RNN (shape: batch × batch dimension)

Related

Type error when fine-tuning a bert-large-uncased-whole-word-masking model by Huggingface

I am trying to fine-tune a Huggingface bert-large-uncased-whole-word-masking model and i get a type error like this when training:
"TypeError: only integer tensors of a single element can be converted to an index"
Here is the code:
train_inputs = tokenizer(text_list[0:457], return_tensors='pt', max_length=512, truncation=True, padding='max_length')
train_inputs['labels']= train_inputs.input_ids.detach().clone()
Then i mask randomly about 15% of the words in the input-ids,
and define a class for the dataset, and then the mistake happens in the training loop:
class MeditationsDataset(torch.utils.data.Dataset):
def __init__(self, encodings):
self.encodings= encodings
def __getitem__(self, idx):
return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
def __len__(self):
return self.encodings.input_ids
train_dataset = MeditationsDataset(train_inputs)
train_dataloader = torch.utils.data.DataLoader(dataset= train_dataset, batch_size=8, shuffle=False)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
from transformers import BertModel, AdamW
model = BertModel.from_pretrained("bert-large-uncased-whole-word-masking")
model.to(device)
model.train()
optim = AdamW(model.parameters(), lr=1e-5)
num_epochs = 2
from tqdm.auto import tqdm
for epoch in range(num_epochs):
loop = tqdm(train_dataloader, leave=True)
for batch in loop:
batch = {k: v.to(device) for k, v in batch.items()}
outputs = model(**batch)
loss = outputs.loss
loss.backward()
optimizer.step()
optimizer.zero_grad()
The mistake happens in "for batch in loop"
Does anybody understand it and know how to solve this? Thanks in advance for your help
In the class MeditationsDataset in function __getitem__ torch.tensor(val[idx]) is deprecated by PyTorch you should use instead val[idx].clone().detach()

Are these normal speed of Bert Pretrained Model Inference in PyTorch

I am testing Bert base and Bert distilled model in Huggingface with 4 scenarios of speeds, batch_size = 1:
1) bert-base-uncased: 154ms per request
2) bert-base-uncased with quantifization: 94ms per request
3) distilbert-base-uncased: 86ms per request
4) distilbert-base-uncased with quantifization: 69ms per request
I am using the IMDB text as experimental data and set the max_length=512, so it's quite long. The cpu on Ubuntu 18.04 info is below:
cat /proc/cpuinfo | grep 'name'| uniq
model name : Intel(R) Xeon(R) Platinum 8163 CPU # 2.50GHz
The machine has 3 GPU available for use:
Tesla V100-SXM2
It seems quite slow for realtime application. Are those speeds normal for bert base model?
The testing code is below:
import pandas as pd
import torch.quantization
from transformers import AutoTokenizer, AutoModel, DistilBertTokenizer, DistilBertModel
def get_embedding(model, tokenizer, text):
inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)
outputs = model(**inputs)
output_tensors = outputs[0][0]
output_numpy = output_tensors.detach().numpy()
embedding = output_numpy.tolist()[0]
def process_text(model, tokenizer, text_lines):
for index, line in enumerate(text_lines):
embedding = get_embedding(model, tokenizer, line)
if index % 100 == 0:
print('Current index: {}'.format(index))
import time
from datetime import timedelta
if __name__ == "__main__":
df = pd.read_csv('../data/train.csv', sep='\t')
df = df.head(1000)
text_lines = df['review']
text_line_count = len(text_lines)
print('Text size: {}'.format(text_line_count))
start = time.time()
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
model = AutoModel.from_pretrained("bert-base-uncased")
process_text(model, tokenizer, text_lines)
end = time.time()
print('Total time spent with bert base: {}'.format(str(timedelta(seconds=end - start))))
model = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8)
process_text(model, tokenizer, text_lines)
end2 = time.time()
print('Total time spent with bert base quantization: {}'.format(str(timedelta(seconds=end2 - end))))
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
model = DistilBertModel.from_pretrained("distilbert-base-uncased")
process_text(model, tokenizer, text_lines)
end3 = time.time()
print('Total time spent with distilbert: {}'.format(str(timedelta(seconds=end3 - end2))))
model = DistilBertModel.from_pretrained("distilbert-base-uncased")
model = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8)
process_text(model, tokenizer, text_lines)
end4 = time.time()
print('Total time spent with distilbert quantization: {}'.format(str(timedelta(seconds=end4 - end3))))
EDIT: based on suggestion I changed to the following:
inputs = tokenizer(text_batch, padding=True, return_tensors="pt")
outputs = model(**inputs)
Where text_batch is a list of text as input.
No, you can speed it up.
First, why are you testing it with batch size 1?
Both tokenizer and model accept batched inputs. Basically, you can pass a 2D array/list that contains a single sample at each row. See the documentation for tokenizer: https://huggingface.co/transformers/main_classes/tokenizer.html#transformers.PreTrainedTokenizer.__call__ The same applies for the models.
Also, your for loop is sequential even if you use batch size larger than 1. You can create a test data and then use Trainer class with trainer.predict()
Also see this discussion of mine at the HF forums: https://discuss.huggingface.co/t/urgent-trainer-predict-and-model-generate-creates-totally-different-predictions/3426

Error when implementing RBF kernel bandwidth differentiation in Pytorch

I'm implementing an RBF network by using some beginer examples from Pytorch Website. I have a problem when implementing the kernel bandwidth differentiation for the network. Also, Iwould like to know whether my attempt ti implement the idea is fine. This is a code sample to reproduce the issue. Thanks
# -*- coding: utf-8 -*-
import torch
from torch.autograd import Variable
def kernel_product(x,y, mode = "gaussian", s = 1.):
x_i = x.unsqueeze(1)
y_j = y.unsqueeze(0)
xmy = ((x_i-y_j)**2).sum(2)
if mode == "gaussian" : K = torch.exp( - xmy/s**2) )
elif mode == "laplace" : K = torch.exp( - torch.sqrt(xmy + (s**2)))
elif mode == "energy" : K = torch.pow( xmy + (s**2), -.25 )
return torch.t(K)
class MyReLU(torch.autograd.Function):
"""
We can implement our own custom autograd Functions by subclassing
torch.autograd.Function and implementing the forward and backward passes
which operate on Tensors.
"""
#staticmethod
def forward(ctx, input):
"""
In the forward pass we receive a Tensor containing the input and return
a Tensor containing the output. ctx is a context object that can be used
to stash information for backward computation. You can cache arbitrary
objects for use in the backward pass using the ctx.save_for_backward method.
"""
ctx.save_for_backward(input)
return input.clamp(min=0)
#staticmethod
def backward(ctx, grad_output):
"""
In the backward pass we receive a Tensor containing the gradient of the loss
with respect to the output, and we need to compute the gradient of the loss
with respect to the input.
"""
input, = ctx.saved_tensors
grad_input = grad_output.clone()
grad_input[input < 0] = 0
return grad_input
dtype = torch.cuda.FloatTensor
N, D_in, H, D_out = 64, 1000, 100, 10
# Create random Tensors to hold input and outputs, and wrap them in Variables.
x = Variable(torch.randn(N, D_in).type(dtype), requires_grad=False)
y = Variable(torch.randn(N, D_out).type(dtype), requires_grad=False)
# Create random Tensors for weights, and wrap them in Variables.
w1 = Variable(torch.randn(H, D_in).type(dtype), requires_grad=True)
w2 = Variable(torch.randn(H, D_out).type(dtype), requires_grad=True)
# I've created this scalar variable (the kernel bandwidth)
s = Variable(torch.randn(1).type(dtype), requires_grad=True)
learning_rate = 1e-6
for t in range(500):
# To apply our Function, we use Function.apply method. We alias this as 'relu'.
relu = MyReLU.apply
# Forward pass: compute predicted y using operations on Variables; we compute
# ReLU using our custom autograd operation.
# y_pred = relu(x.mm(w1)).mm(w2)
y_pred = relu(kernel_product(w1, x, s)).mm(w2)
# Compute and print loss
loss = (y_pred - y).pow(2).sum()
print(t, loss.data[0])
# Use autograd to compute the backward pass.
loss.backward()
# Update weights using gradient descent
w1.data -= learning_rate * w1.grad.data
w2.data -= learning_rate * w2.grad.data
# Manually zero the gradients after updating weights
w1.grad.data.zero_()
w2.grad.data.zero_()
However I get this error, which dissapears when I simply use a fixed scalar in the default input parameter of kernel_product():
RuntimeError: eq() received an invalid combination of arguments - got (str), but expected one of:
* (float other)
didn't match because some of the arguments have invalid types: (str)
* (Variable other)
didn't match because some of the arguments have invalid types: (str)
Well, you are calling kernel_product(w1, x, s) where w1, x and s are torch Variable while the definition of the function is: kernel_product(x,y, mode = "gaussian", s = 1.). Seems like s should be a string specifying the mode.

Tensorflow : how to insert custom input to existing graph?

I have downloaded a tensorflow GraphDef that implements a VGG16 ConvNet, which I use doing this :
Pl['images'] = tf.placeholder(tf.float32,
[None, 448, 448, 3],
name="images") #batch x width x height x channels
with open("tensorflow-vgg16/vgg16.tfmodel", mode='rb') as f:
fileContent = f.read()
graph_def = tf.GraphDef()
graph_def.ParseFromString(fileContent)
tf.import_graph_def(graph_def, input_map={"images": Pl['images']})
Besides, I have image features that are homogeneous to the output of the "import/pool5/".
How can I tell my graph that don't want to use his input "images", but the tensor "import/pool5/" as input ?
Thank's !
EDIT
OK I realize I haven't been very clear. Here is the situation:
I am trying to use this implementation of ROI pooling, using a pre-trained VGG16, which I have in the GraphDef format. So here is what I do:
First of all, I load the model:
tf.reset_default_graph()
with open("tensorflow-vgg16/vgg16.tfmodel",
mode='rb') as f:
fileContent = f.read()
graph_def = tf.GraphDef()
graph_def.ParseFromString(fileContent)
graph = tf.get_default_graph()
Then, I create my placeholders
images = tf.placeholder(tf.float32,
[None, 448, 448, 3],
name="images") #batch x width x height x channels
boxes = tf.placeholder(tf.float32,
[None,5], # 5 = [batch_id,x1,y1,x2,y2]
name = "boxes")
And I define the output of the first part of the graph to be conv5_3/Relu
tf.import_graph_def(graph_def,
input_map={'images':images})
out_tensor = graph.get_tensor_by_name("import/conv5_3/Relu:0")
So, out_tensor is of shape [None,14,14,512]
Then, I do the ROI pooling:
[out_pool,argmax] = module.roi_pool(out_tensor,
boxes,
7,7,1.0/1)
With out_pool.shape = N_Boxes_in_batch x 7 x 7 x 512, which is homogeneous to pool5. I would then like to feed out_pool as an input to the op that comes just after pool5, so it would look like
tf.import_graph_def(graph.as_graph_def(),
input_map={'import/pool5':out_pool})
But it doesn't work, I have this error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-89-527398d7344b> in <module>()
5
6 tf.import_graph_def(graph.as_graph_def(),
----> 7 input_map={'import/pool5':out_pool})
8
9 final_out = graph.get_tensor_by_name("import/Relu_1:0")
/usr/local/lib/python3.4/dist-packages/tensorflow/python/framework/importer.py in import_graph_def(graph_def, input_map, return_elements, name, op_dict)
333 # NOTE(mrry): If the graph contains a cycle, the full shape information
334 # may not be available for this op's inputs.
--> 335 ops.set_shapes_for_outputs(op)
336
337 # Apply device functions for this op.
/usr/local/lib/python3.4/dist-packages/tensorflow/python/framework/ops.py in set_shapes_for_outputs(op)
1610 raise RuntimeError("No shape function registered for standard op: %s"
1611 % op.type)
-> 1612 shapes = shape_func(op)
1613 if len(op.outputs) != len(shapes):
1614 raise RuntimeError(
/home/hbenyounes/vqa/roi_pooling_op_grad.py in _roi_pool_shape(op)
13 channels = dims_data[3]
14 print(op.inputs[1].name, op.inputs[1].get_shape())
---> 15 dims_rois = op.inputs[1].get_shape().as_list()
16 num_rois = dims_rois[0]
17
/usr/local/lib/python3.4/dist-packages/tensorflow/python/framework/tensor_shape.py in as_list(self)
745 A list of integers or None for each dimension.
746 """
--> 747 return [dim.value for dim in self._dims]
748
749 def as_proto(self):
TypeError: 'NoneType' object is not iterable
Any clue ?
It is usually very convenient to use tf.train.export_meta_graph to store the whole MetaGraph. Then, upon restoring you can use tf.train.import_meta_graph, because it turns out that it passes all additional arguments to the underlying import_scoped_meta_graph which has the input_map argument and utilizes it when it gets to it's own invocation of import_graph_def.
It is not documented, and took me waaaay toooo much time to find it, but it works!
What I would do is something along those lines:
-First retrieve the names of the tensors representing the weights and biases of the 3 fully connected layers coming after pool5 in VGG16.
To do that I would inspect [n.name for n in graph.as_graph_def().node].
(They probably look something like import/locali/weight:0, import/locali/bias:0, etc.)
-Put them in a python list:
weights_names=["import/local1/weight:0" ,"import/local2/weight:0" ,"import/local3/weight:0"]
biases_names=["import/local1/bias:0" ,"import/local2/bias:0" ,"import/local3/bias:0"]
-Define a function that look something like:
def pool5_tofcX(input_tensor, layer_number=3):
flatten=tf.reshape(input_tensor,(-1,7*7*512))
tmp=flatten
for i in xrange(layer_number):
tmp=tf.matmul(tmp, graph.get_tensor_by_name(weights_name[i]))
tmp=tf.nn.bias_add(tmp, graph.get_tensor_by_name(biases_name[i]))
tmp=tf.nn.relu(tmp)
return tmp
Then define the tensor using the function:
wanted_output=pool5_tofcX(out_pool)
Then you are done !
Jonan Georgiev provided an excellent answer here. The same approach was also described with little fanfare at the end of this git issue: https://github.com/tensorflow/tensorflow/issues/3389
Below is a copy/paste runnable example of using this approach to switch out a placeholder for a tf.data.Dataset get_next tensor.
import tensorflow as tf
my_placeholder = tf.placeholder(dtype=tf.float32, shape=1, name='my_placeholder')
my_op = tf.square(my_placeholder, name='my_op')
# Save the graph to memory
graph_def = tf.get_default_graph().as_graph_def()
print('----- my_op before any remapping -----')
print([n for n in graph_def.node if n.name == 'my_op'])
tf.reset_default_graph()
ds = tf.data.Dataset.from_tensors(1.0)
next_tensor = tf.data.make_one_shot_iterator(ds).get_next(name='my_next_tensor')
# Restore the graph with a custom input mapping
tf.graph_util.import_graph_def(graph_def, input_map={'my_placeholder': next_tensor}, name='')
print('----- my_op after remapping -----')
print([n for n in tf.get_default_graph().as_graph_def().node if n.name == 'my_op'])
Output, where we can clearly see that the input to the square operation has changed.
----- my_op before any remapping -----
[name: "my_op"
op: "Square"
input: "my_placeholder"
attr {
key: "T"
value {
type: DT_FLOAT
}
}
]
----- my_op after remapping -----
[name: "my_op"
op: "Square"
input: "my_next_tensor"
attr {
key: "T"
value {
type: DT_FLOAT
}
}
]

How can I use functools.partial on multiple methods on an object, and freeze parameters out of order?

I find functools.partial to be extremely useful, but I would like to be able to freeze arguments out of order (the argument you want to freeze is not always the first one) and I'd like to be able to apply it to several methods on a class at once, to make a proxy object that has the same methods as the underlying object except with some of its methods parameters being frozen (think of it as generalizing partial to apply to classes). And I'd prefer to do this without editing the original object, just like partial doesn't change its original function.
I've managed to scrap together a version of functools.partial called 'bind' that lets me specify parameters out of order by passing them by keyword argument. That part works:
>>> def foo(x, y):
... print x, y
...
>>> bar = bind(foo, y=3)
>>> bar(2)
2 3
But my proxy class does not work, and I'm not sure why:
>>> class Foo(object):
... def bar(self, x, y):
... print x, y
...
>>> a = Foo()
>>> b = PureProxy(a, bar=bind(Foo.bar, y=3))
>>> b.bar(2)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: bar() takes exactly 3 arguments (2 given)
I'm probably doing this all sorts of wrong because I'm just going by what I've pieced together from random documentation, blogs, and running dir() on all the pieces. Suggestions both on how to make this work and better ways to implement it would be appreciated ;) One detail I'm unsure about is how this should all interact with descriptors. Code follows.
from types import MethodType
class PureProxy(object):
def __init__(self, underlying, **substitutions):
self.underlying = underlying
for name in substitutions:
subst_attr = substitutions[name]
if hasattr(subst_attr, "underlying"):
setattr(self, name, MethodType(subst_attr, self, PureProxy))
def __getattribute__(self, name):
return getattr(object.__getattribute__(self, "underlying"), name)
def bind(f, *args, **kwargs):
""" Lets you freeze arguments of a function be certain values. Unlike
functools.partial, you can freeze arguments by name, which has the bonus
of letting you freeze them out of order. args will be treated just like
partial, but kwargs will properly take into account if you are specifying
a regular argument by name. """
argspec = inspect.getargspec(f)
argdict = copy(kwargs)
if hasattr(f, "im_func"):
f = f.im_func
args_idx = 0
for arg in argspec.args:
if args_idx >= len(args):
break
argdict[arg] = args[args_idx]
args_idx += 1
num_plugged = args_idx
def new_func(*inner_args, **inner_kwargs):
args_idx = 0
for arg in argspec.args[num_plugged:]:
if arg in argdict:
continue
if args_idx >= len(inner_args):
# We can't raise an error here because some remaining arguments
# may have been passed in by keyword.
break
argdict[arg] = inner_args[args_idx]
args_idx += 1
f(**dict(argdict, **inner_kwargs))
new_func.underlying = f
return new_func
Update: In case anyone can benefit, here's the final implementation I went with:
from types import MethodType
class PureProxy(object):
""" Intended usage:
>>> class Foo(object):
... def bar(self, x, y):
... print x, y
...
>>> a = Foo()
>>> b = PureProxy(a, bar=FreezeArgs(y=3))
>>> b.bar(1)
1 3
"""
def __init__(self, underlying, **substitutions):
self.underlying = underlying
for name in substitutions:
subst_attr = substitutions[name]
if isinstance(subst_attr, FreezeArgs):
underlying_func = getattr(underlying, name)
new_method_func = bind(underlying_func, *subst_attr.args, **subst_attr.kwargs)
setattr(self, name, MethodType(new_method_func, self, PureProxy))
def __getattr__(self, name):
return getattr(self.underlying, name)
class FreezeArgs(object):
def __init__(self, *args, **kwargs):
self.args = args
self.kwargs = kwargs
def bind(f, *args, **kwargs):
""" Lets you freeze arguments of a function be certain values. Unlike
functools.partial, you can freeze arguments by name, which has the bonus
of letting you freeze them out of order. args will be treated just like
partial, but kwargs will properly take into account if you are specifying
a regular argument by name. """
argspec = inspect.getargspec(f)
argdict = copy(kwargs)
if hasattr(f, "im_func"):
f = f.im_func
args_idx = 0
for arg in argspec.args:
if args_idx >= len(args):
break
argdict[arg] = args[args_idx]
args_idx += 1
num_plugged = args_idx
def new_func(*inner_args, **inner_kwargs):
args_idx = 0
for arg in argspec.args[num_plugged:]:
if arg in argdict:
continue
if args_idx >= len(inner_args):
# We can't raise an error here because some remaining arguments
# may have been passed in by keyword.
break
argdict[arg] = inner_args[args_idx]
args_idx += 1
f(**dict(argdict, **inner_kwargs))
return new_func
You're "binding too deep": change def __getattribute__(self, name): to def __getattr__(self, name): in class PureProxy. __getattribute__ intercepts every attribute access and so bypasses everything that you've set with setattr(self, name, ... making those setattr bereft of any effect, which obviously's not what you want; __getattr__ is called only for access to attributes not otherwise defined so those setattr calls become "operative" & useful.
In the body of that override, you can and should also change object.__getattribute__(self, "underlying") to self.underlying (since you're not overriding __getattribute__ any more). There are other changes I'd suggest (enumerate in lieu of the low-level logic you're using for counters, etc) but they wouldn't change the semantics.
With the change I suggest, your sample code works (you'll have to keep testing with more subtle cases of course). BTW, the way I debugged this was simply to stick in print statements in the appropriate places (a jurassic=era approach but still my favorite;-).

Resources