TypeError: Caught TypeError in DataLoader worker process 0. TypeError: 'KeyError' object is not iterable - torch

from torchvision_starter.engine import train_one_epoch, evaluate
from torchvision_starter import utils
import multiprocessing
import time
n_cpu = multiprocessing.cpu_count()
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
_ = model.to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
step_size=3,
gamma=0.2,
verbose=True
)
# Let's train for 10 epochs
num_epochs = 1
start = time.time()
for epoch in range(10, 10 + num_epochs):
# train for one epoch, printing every 10 iterations
train_one_epoch(model, optimizer, data_loaders['train'], device, epoch, print_freq=10)
# update the learning rate
lr_scheduler.step()
# evaluate on the validation dataset
evaluate(model, data_loaders['valid'], device=device)
stop = time.time()
print(f"\n\n{num_epochs} epochs in {stop - start} s ({(stop-start) / 3600:.2f} hrs)")
Before I move on to this part, everything is OK. But after I run the part, the error is like below:
I have tried to add drop_last to the helper.py's function like:
data_loaders["train"] = torch.utils.data.DataLoader(
train_data,
batch_size=batch_size,
sampler=train_sampler,
num_workers=num_workers,
collate_fn=utils.collate_fn,
drop_last=True
)
But it doesn't work. By the way, the torch and torchvision are compatible and Cuda is available.
I wonder how to fix it.
The get_data_loaders function:
def get_data_loaders(
folder, batch_size: int = 2, valid_size: float = 0.2, num_workers: int = -1, limit: int = -1, thinning: int = None
):
"""
Create and returns the train_one_epoch, validation and test data loaders.
:param foder: folder containing the dataset
:param batch_size: size of the mini-batches
:param valid_size: fraction of the dataset to use for validation. For example 0.2
means that 20% of the dataset will be used for validation
:param num_workers: number of workers to use in the data loaders. Use -1 to mean
"use all my cores"
:param limit: maximum number of data points to consider
:param thinning: take every n-th frame, instead of all frames
:return a dictionary with 3 keys: 'train_one_epoch', 'valid' and 'test' containing respectively the
train_one_epoch, validation and test data loaders
"""
if num_workers == -1:
# Use all cores
num_workers = multiprocessing.cpu_count()
# We will fill this up later
data_loaders = {"train": None, "valid": None, "test": None}
# create 3 sets of data transforms: one for the training dataset,
# containing data augmentation, one for the validation dataset
# (without data augmentation) and one for the test set (again
# without augmentation)
data_transforms = {
"train": get_transform(UdacitySelfDrivingDataset.mean, UdacitySelfDrivingDataset.std, train=True),
"valid": get_transform(UdacitySelfDrivingDataset.mean, UdacitySelfDrivingDataset.std, train=False),
"test": get_transform(UdacitySelfDrivingDataset.mean, UdacitySelfDrivingDataset.std, train=False),
}
# Create train and validation datasets
train_data = UdacitySelfDrivingDataset(
folder,
transform=data_transforms["train"],
train=True,
thinning=thinning
)
# The validation dataset is a split from the train_one_epoch dataset, so we read
# from the same folder, but we apply the transforms for validation
valid_data = UdacitySelfDrivingDataset(
folder,
transform=data_transforms["valid"],
train=True,
thinning=thinning
)
# obtain training indices that will be used for validation
n_tot = len(train_data)
indices = torch.randperm(n_tot)
# If requested, limit the number of data points to consider
if limit > 0:
indices = indices[:limit]
n_tot = limit
split = int(math.ceil(valid_size * n_tot))
train_idx, valid_idx = indices[split:], indices[:split]
# define samplers for obtaining training and validation batches
train_sampler = torch.utils.data.SubsetRandomSampler(train_idx)
valid_sampler = torch.utils.data.SubsetRandomSampler(valid_idx) # =
# prepare data loaders
data_loaders["train"] = torch.utils.data.DataLoader(
train_data,
batch_size=batch_size,
sampler=train_sampler,
num_workers=num_workers,
collate_fn=utils.collate_fn,
drop_last=True
)
data_loaders["valid"] = torch.utils.data.DataLoader(
valid_data, # -
batch_size=batch_size, # -
sampler=valid_sampler, # -
num_workers=num_workers, # -
collate_fn=utils.collate_fn,
drop_last=True
)
# Now create the test data loader
test_data = UdacitySelfDrivingDataset(
folder,
transform=data_transforms["test"],
train=False,
thinning=thinning
)
if limit > 0:
indices = torch.arange(limit)
test_sampler = torch.utils.data.SubsetRandomSampler(indices)
else:
test_sampler = None
data_loaders["test"] = torch.utils.data.DataLoader(
test_data,
batch_size=batch_size,
shuffle=False,
num_workers=num_workers,
sampler=test_sampler,
collate_fn=utils.collate_fn,
drop_last=True
# -
)
return data_loaders
class UdacitySelfDrivingDataset(torch.utils.data.Dataset):
# Mean and std of the dataset to be used in nn.Normalize
mean = torch.tensor([0.3680, 0.3788, 0.3892])
std = torch.tensor([0.2902, 0.3069, 0.3242])
def __init__(self, root, transform, train=True, thinning=None):
super().__init__()
self.root = os.path.abspath(os.path.expandvars(os.path.expanduser(root)))
self.transform = transform
# load datasets
if train:
self.df = pd.read_csv(os.path.join(self.root, "labels_train.csv"))
else:
self.df = pd.read_csv(os.path.join(self.root, "labels_test.csv"))
# Index by file id (i.e., a sequence of the same length as the number of images)
codes, uniques = pd.factorize(self.df['frame'])
if thinning:
# Take every n-th rows. This makes sense because the images are
# frames of videos from the car, so we are essentially reducing
# the frame rate
thinned = uniques[::thinning]
idx = self.df['frame'].isin(thinned)
print(f"Keeping {thinned.shape[0]} of {uniques.shape[0]} images")
print(f"Keeping {idx.sum()} objects out of {self.df.shape[0]}")
self.df = self.df[idx].reset_index(drop=True)
# Recompute codes
codes, uniques = pd.factorize(self.df['frame'])
self.n_images = len(uniques)
self.df['image_id'] = codes
self.df.set_index("image_id", inplace=True)
self.classes = ['car', 'truck', 'pedestrian', 'bicyclist', 'light']
self.colors = ['cyan', 'blue', 'red', 'purple', 'orange']
#property
def n_classes(self):
return len(self.classes)
def __getitem__(self, idx):
if idx in self.df.index:
row = self.df.loc[[idx]]
else:
return KeyError(f"Element {idx} not in dataframe")
# load images fromm file
img_path = os.path.join(self.root, "images", row['frame'].iloc[0])
img = Image.open(img_path).convert("RGB")
# Exclude bogus boxes with 0 height or width
h = row['ymax'] - row['ymin']
w = row['xmax'] - row['xmin']
filter_idx = (h > 0) & (w > 0)
row = row[filter_idx]
# get bounding box coordinates for each mask
boxes = row[['xmin', 'ymin', 'xmax', 'ymax']].values
# convert everything into a torch.Tensor
boxes = torch.as_tensor(boxes, dtype=torch.float32)
# get the labels
labels = torch.as_tensor(row['class_id'].values, dtype=int)
image_id = torch.tensor([idx])
area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
# assume no crowd for everything
iscrowd = torch.zeros((row.shape[0],), dtype=torch.int64)
target = {}
target["boxes"] = boxes
target["labels"] = labels
target["image_id"] = image_id
target["area"] = area
target["iscrowd"] = iscrowd
if self.transform is not None:
img, target = self.transform(img, target)
return img, target
def __len__(self):
return self.n_images
def plot(self, idx, renormalize=True, predictions=None, threshold=0.5, ax=None):
image, label_js = self[idx]
if renormalize:
# Invert the T.Normalize transform
unnormalize = T.Compose(
[
T.Normalize(mean = [ 0., 0., 0. ], std = 1 / type(self).std),
T.Normalize(mean = -type(self).mean, std = [ 1., 1., 1. ])
]
)
image, label_js = unnormalize(image, label_js)
if ax is None:
fig, ax = plt.subplots(figsize=(8, 8))
_ = ax.imshow(torch.permute(image, [1, 2, 0]))
for i, box in enumerate(label_js['boxes']):
xy = (box[0], box[1])
h, w = (box[2] - box[0]), (box[3] - box[1])
r = patches.Rectangle(xy, h, w, fill=False, color=self.colors[label_js['labels'][i]-1], lw=2, alpha=0.5)
ax.add_patch(r)
if predictions is not None:
# Make sure the predictions are on the CPU
for k in predictions:
predictions[k] = predictions[k].detach().cpu().numpy()
for i, box in enumerate(predictions['boxes']):
if predictions['scores'][i] > threshold:
xy = (box[0], box[1])
h, w = (box[2] - box[0]), (box[3] - box[1])
r = patches.Rectangle(xy, h, w, fill=False, color=self.colors[predictions['labels'][i]-1], lw=2, linestyle=':')
ax.add_patch(r)
_ = ax.axis("off")
return ax

Related

Error message when running the codes in Jupyter notebook

I am trying to test out the accuracy of the images without using image augmentation. When I run both of the codes, I got an error shown below:
TypeError: 'NoneType' object is not callable
I found that the error occurs in the second code. I would like to know the reason on the cause of this error message, and how to resolve it. Attached below are my codes, which have to be run simultaneously. I am using Jupyter notebook for that. Thanks!
Code 1:
import torch
from torch import nn
from torch.autograd import Variable
import torch.nn.functional as F
import math
class CrossEntropyLabelSmooth(nn.Module):
"""Cross entropy loss with label smoothing regularizer.
Reference:
Szegedy et al. Rethinking the Inception Architecture for Computer Vision. CVPR 2016.
Equation: y = (1 - epsilon) * y + epsilon / K.
Args:
num_classes (int): number of classes.
epsilon (float): weight.
"""
def __init__(self, num_classes, epsilon=0.1, device='cpu'):
super(CrossEntropyLabelSmooth, self).__init__()
self.num_classes = num_classes
self.epsilon = epsilon
self.device = device
self.logsoftmax = nn.LogSoftmax(dim=1)
def forward(self, inputs, targets):
"""
Args:
inputs: prediction matrix (before softmax) with shape (batch_size, num_classes)
targets: ground truth labels with shape (num_classes)
"""
log_probs = self.logsoftmax(inputs)
# targets = torch.zeros(log_probs.size()).scatter_(1, targets.unsqueeze(1).data, 1)# for mldg da
targets = torch.zeros(log_probs.size()).scatter_(1, targets.unsqueeze(1).data.cpu(), 1)#for zzd
targets = targets.to(self.device)
targets = (1 - self.epsilon) * targets + self.epsilon / self.num_classes
loss = (-Variable(targets) * log_probs).mean(0).sum()
return loss
class TripletLoss(nn.Module):
"""Triplet loss with hard positive/negative mining.
Reference:
Hermans et al. In Defense of the Triplet Loss for Person Re-Identification. arXiv:1703.07737.
Code imported from https://github.com/Cysu/open-reid/blob/master/reid/loss/triplet.py.
Args:
margin (float): margin for triplet.
"""
def __init__(self, margin=0.3):
super(TripletLoss, self).__init__()
self.margin = margin
self.ranking_loss = nn.MarginRankingLoss(margin=margin)
def forward(self, inputs, targets):
"""
Args:
inputs: feature matrix with shape (batch_size, feat_dim)
targets: ground truth labels with shape (num_classes)
"""
n = inputs.size(0)
# Compute pairwise distance, replace by the official when merged
dist = torch.pow(inputs, 2).sum(dim=1, keepdim=True).expand(n, n)
dist = dist + dist.t()
dist.addmm_(1, -2, inputs, inputs.t())
dist = dist.clamp(min=1e-12).sqrt() # for numerical stability
# For each anchor, find the hardest positive and negative
mask = targets.expand(n, n).eq(targets.expand(n, n).t())
dist_ap, dist_an = [], []
for i in range(n):
dist_ap.append(dist[i][mask[i]].max().unsqueeze(0))
dist_an.append(dist[i][mask[i] == 0].min().unsqueeze(0))
dist_ap = torch.cat(dist_ap)
dist_an = torch.cat(dist_an)
# Compute ranking hinge loss
y = torch.ones_like(dist_an)
loss = self.ranking_loss(dist_an, dist_ap, y)
return loss
class CenterLoss(nn.Module):
"""Center loss.
Reference:
Wen et al. A Discriminative Feature Learning Approach for Deep Face Recognition. ECCV 2016.
Args:
num_classes (int): number of classes.
feat_dim (int): feature dimension.
"""
def __init__(self, num_classes=10, feat_dim=2048, device='cpu'):
super(CenterLoss, self).__init__()
self.num_classes = num_classes
self.feat_dim = feat_dim
self.device = device
self.centers = nn.Parameter(torch.randn(self.num_classes, self.feat_dim)).to(self.device)
def forward(self, x, labels):
"""
Args:
x: feature matrix with shape (batch_size, feat_dim).
labels: ground truth labels with shape (num_classes).
"""
batch_size = x.size(0)
distmat = torch.pow(x, 2).sum(dim=1, keepdim=True).expand(batch_size, self.num_classes) + \
torch.pow(self.centers, 2).sum(dim=1, keepdim=True).expand(self.num_classes, batch_size).t()
distmat.addmm_(1, -2, x, self.centers.t())
classes = torch.arange(self.num_classes).long()
classes = classes.to(self.device)
labels = labels.unsqueeze(1).expand(batch_size, self.num_classes)
mask = labels.data.eq(classes.expand(batch_size, self.num_classes))
dist = []
for i in range(batch_size):
value = distmat[i][mask[i]]
value = value.clamp(min=1e-12, max=1e+12) # for numerical stability
dist.append(value)
dist = torch.cat(dist)
loss = dist.mean()
return loss
Code 2:
# Code without data augmentation
import torch
import torch.nn as nn
from torchvision.datasets import ImageFolder
from torchvision import transforms
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader
import os
import numpy as np
from tqdm import tqdm
from PIL import Image
class FoodDataset(Dataset):
def __init__(self, file, transform=None, mode='train'):
self.transforms = transform
self.mode = mode
with open(file, 'r') as f:
self.image_list = f.readlines()
def __len__(self):
return len(self.image_list)
def __getitem__(self, index):
label = None
if self.mode == 'train':
image, label = self.image_list[index].split('\n')[0].split('\t')
label = int(label)
else:
image = self.image_list[index].split('\n')[0]
image = Image.open(image).convert('RGB')
image = self.transforms(image)
if self.mode == 'train':
return image, label
else:
return image
#transforms_train = transforms.Compose([
# transforms.Resize((224, 224)),
# transforms.RandomHorizontalFlip(p=0.5),
# transforms.RandomVerticalFlip(p=0.5),
# transforms.Pad(10, 10),
# transforms.RandomRotation(45),
# transforms.RandomCrop((224, 224)),
# transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5),
# transforms.ToTensor(),
# transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
# ])
#transforms_test = transforms.Compose([
# transforms.Resize((224, 224)),
# transforms.ToTensor(),
# transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
# ])
def evaluate(prediction, ground_truth):
num_correct = (np.array(prediction) == np.array(ground_truth)).sum()
return num_correct / len(prediction)
train_ds = FoodDataset('data/train.txt')
val_ds = FoodDataset('data/val.txt')
test_ds = FoodDataset('data/test.txt')
train_dl = DataLoader(train_ds, batch_size=32, shuffle=True)
val_dl = DataLoader(val_ds, batch_size=32, shuffle=True)
test_dl = DataLoader(test_ds, batch_size=32, shuffle=True)
num_classes = 5
train_model = models.resnet50(pretrained=True)
train_model.fc = nn.Linear(2048, num_classes)
output_dir = 'checkpoint'
if output_dir and not os.path.exists(output_dir):
os.makedirs(output_dir)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
ce_loss = CrossEntropyLabelSmooth(num_classes = num_classes, device = device)
optimizer = torch.optim.Adam(train_model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)
for param in train_model.parameters():
param.requires_grad = False
for param in train_model.fc.parameters():
param.requires_grad = True
for i in range(5):
train_model.train()
train_model.to(device)
for img, label in tqdm(train_dl):
img = img.to(device)
label = label.to(device)
optimizer.zero_grad()
output= train_model(img)
loss = ce_loss(output, label)
loss.backward()
optimizer.step()
for param in train_model.parameters():
param.requires_grad = True
epoch = 100
highest_acc = {'epoch': 0, 'accuracy': 0}
for ep in range(epoch):
train_model.train()
train_model.to(device)
count = 0
running_loss = 0.0
validation_loss = 0.0
output_list = []
ground_truth_list = []
for img, label in tqdm(train_dl):
img = img.to(device)
label = label.to(device)
optimizer.zero_grad()
output= train_model(img)
loss = ce_loss(output, label)
count += 1
prediction = torch.argmax(output, dim=1)
output_list.extend(prediction.detach().cpu())
ground_truth_list.extend(label.cpu())
running_loss += loss.item()
loss.backward()
optimizer.step()
scheduler.step()
if ep % 10 == 0:
torch.save(train_model.state_dict(), output_dir + '/resnet50_' + str(ep) + '.pth')
accuracy = evaluate(output_list, ground_truth_list)
print(f'Epoch[{ep}] training accuracy: {accuracy} '
f'training loss: {running_loss / count:.3e} Base Lr: {optimizer.param_groups[0]["lr"]:.5e}')
if ep % 10 == 0:
train_model.eval()
count = 0
output_list = []
ground_truth_list = []
for img, label in tqdm(val_dl):
with torch.no_grad():
img = img.to(device)
lbl = label.to(device)
output= train_model(img)
val_loss = ce_loss(output, lbl)
validation_loss += val_loss.item()
count += 1
prediction = torch.argmax(output, dim=1)
output_list.extend(prediction.detach().cpu())
ground_truth_list.extend(label)
accuracy = evaluate(output_list, ground_truth_list)
if accuracy > highest_acc['accuracy']:
highest_acc['accuracy'] = accuracy
highest_acc['epoch'] = ep
print(f'Accuracy: {accuracy} Epoch:{ep}')
torch.save(train_model.state_dict(), output_dir + '/resnet50_' + 'final' + '.pth')
print('highest_acc: {} epoch: {}'.format(highest_acc['accuracy'], highest_acc['epoch']))

How to create a DataSet of 1000 graphs in python

I need to create a dataset of 1000 graphs. I used the following code:
data_list = []
ngraphs = 1000
for i in range(ngraphs):
num_nodes = randint(10,500)
num_edges = randint(10,num_nodes*(num_nodes - 1))
f1 = np.random.randint(10, size=(num_nodes))
f2 = np.random.randint(10,20, size=(num_nodes))
f3 = np.random.randint(20,30, size=(num_nodes))
f_final = np.stack((f1,f2,f3), axis=1)
capital = 2*f1 + f2 - f3
f1_t = torch.from_numpy(f1)
f2_t = torch.from_numpy(f2)
f3_t = torch.from_numpy(f3)
capital_t = torch.from_numpy(capital)
capital_t = capital_t.type(torch.LongTensor)
x = torch.from_numpy(f_final)
x = x.type(torch.LongTensor)
edge_index = torch.randint(low=0, high=num_nodes, size=(num_edges,2), dtype=torch.long)
edge_attr = torch.randint(low=0, high=50, size=(num_edges,1), dtype=torch.long)
data = Data(x = x, edge_index = edge_index.t().contiguous(), y = capital_t, edge_attr=edge_attr )
data_list.append(data)
This works. But when I run my training function as follows:
for epoch in range(1, 500):
loss = train()
print(f'Loss: {loss:.4f}')
I keep getting the following error:
RuntimeError Traceback (most recent call
last) in ()
1 for epoch in range(1, 500):
----> 2 loss = train()
3 print(f'Loss: {loss:.4f}')
5 frames /usr/local/lib/python3.7/dist-packages/torch/nn/functional.py
in linear(input, weight, bias) 1845 if
has_torch_function_variadic(input, weight): 1846 return
handle_torch_function(linear, (input, weight), input, weight,
bias=bias)
-> 1847 return torch._C._nn.linear(input, weight, bias) 1848 1849
RuntimeError: expected scalar type Float but found Long
Can someone help me troubleshoot this. Or make a 1000 graph dataset that doesn't throw this error.
Change your x and y tensor into FloatTensor, since Linear layer in python only accept FloatTensor inputs

Unable to understand dimension mismatch error in Julia

I’m a beginner with Julia and ML. I’m attempting to re-use code from the Flux Model Zoo, specifically this, to classify images from this dataset. Below is my version of the code - I modified the data load and the params in the build_model to account for the difference in image size and the number of character types to be classified. The original had 28x28 and 10 digits, the arabic character set had 32x32 images and 28 characters.
function getimages(filename)
filepath = pwd() * "/images/" * filename
mtrx = Matrix(DataFrame(CSV.File(filepath)))
r, _ = size(mtrx)
v = Vector{Matrix{Int64}}()
for i = 1:r
push!(v, reshape(m[i, :], 32, 32))
end
v
end
function getlabels(filename)
filepath = pwd() * "/images/" * filename
vec(Matrix(DataFrame(CSV.File(filepath))))
end
function load_data(args)
train_data_file = "csvTrainImages.csv"
test_data_file = "csvTestImages.csv"
train_label_file = "csvTrainLabel.csv"
test_label_file = "csvTestLabel.csv"
train_data = getimages(train_data_file)
test_data = getimages(test_data_file)
train_labels = getlabels(train_label_file)
test_labels = getlabels(test_label_file)
xtrain = Flux.flatten(train_data)
xtest = Flux.flatten(test_data)
ytrain, ytest = onehotbatch(train_labels, 1:28), onehotbatch(test_labels, 1:28)
train_loader = DataLoader((xtrain, ytrain), batchsize=args.batchsize, shuffle=true)
test_loader = DataLoader((xtest, ytest), batchsize=args.batchsize)
return train_loader, test_loader
end
function build_model(; imgsize=(32,32,1), nclasses=28)
return Chain(
Dense(prod(imgsize), 32, relu),
Dense(32, nclasses))
end
function loss_and_accuracy(data_loader, model, device)
acc = 0
ls = 0.0f0
num = 0
for (x, y) in data_loader
x, y = device(x), device(y)
ŷ = model(x)
ls += logitcrossentropy(model(x), y, agg=sum)
acc += sum(onecold(cpu(model(x))) .== onecold(cpu(y)))
num += size(x, 2)
end
return ls / num, acc / num
end
#kwdef mutable struct Args
η::Float64 = 3e-4 # learning rate
batchsize::Int = 256 # batch size
epochs::Int = 10 # number of epochs
use_cuda::Bool = true # use gpu (if cuda available)
end
function train(; kws...)
args = Args(; kws...) # collect options in a struct for convenience
if CUDA.functional() && args.use_cuda
#info "Training on CUDA GPU"
CUDA.allowscalar(false)
device = gpu
else
#info "Training on CPU"
device = cpu
end
# Create test and train dataloaders
train_loader, test_loader = load_data(args)
# Construct model
model = build_model() |> device
ps = Flux.params(model) # model's trainable parameters
## Optimizer
opt = ADAM(args.η)
## Training
for epoch in 1:args.epochs
for (x, y) in train_loader
x, y = device(x), device(y) # transfer data to device
gs = gradient(() -> logitcrossentropy(model(x), y), ps) # compute gradient
Flux.Optimise.update!(opt, ps, gs) # update parameters
end
# Report on train and test
train_loss, train_acc = loss_and_accuracy(train_loader, model, device)
test_loss, test_acc = loss_and_accuracy(test_loader, model, device)
println("Epoch=$epoch")
println(" train_loss = $train_loss, train_accuracy = $train_acc")
println(" test_loss = $test_loss, test_accuracy = $test_acc")
end
end
I get the following error when I train the model. Specifically, during the gradient computation. Could you help me understand which two matrices the error refers to and point me towards a solution? My guess is that it has to do with the build_model params, but I’m not quite sure what needs to change and how.
DimensionMismatch("matrix A has dimensions (32,1024), matrix B has dimensions (1,256)")
macro expansion#interface2.jl:0[inlined]
_pullback(::Zygote.Context, ::typeof(throw), ::DimensionMismatch)#interface2.jl:9
_pullback#matmul.jl:814[inlined]
_pullback(::Zygote.Context, ::typeof(LinearAlgebra._generic_matmatmul!), ::Matrix{Matrix{Float32}}, ::Char, ::Char, ::Matrix{Float32}, ::Matrix{Matrix{Int64}}, ::LinearAlgebra.MulAddMul{true, true, Bool, Bool})#interface2.jl:0
_pullback#matmul.jl:802[inlined]
_pullback(::Zygote.Context, ::typeof(LinearAlgebra.generic_matmatmul!), ::Matrix{Matrix{Float32}}, ::Char, ::Char, ::Matrix{Float32}, ::Matrix{Matrix{Int64}}, ::LinearAlgebra.MulAddMul{true, true, Bool, Bool})#interface2.jl:0
_pullback#matmul.jl:302[inlined]
_pullback#matmul.jl:275[inlined]
_pullback(::Zygote.Context, ::typeof(LinearAlgebra.mul!), ::Matrix{Matrix{Float32}}, ::Matrix{Float32}, ::Matrix{Matrix{Int64}})#interface2.jl:0
_pullback#matmul.jl:153[inlined]
_pullback(::Zygote.Context, ::typeof(*), ::Matrix{Float32}, ::Matrix{Matrix{Int64}})#interface2.jl:0
_pullback#basic.jl:147[inlined] ....
Solved by fixing the get images method as below.
function getimages(filename)
filepath = pwd() * "/images/" * filename
mtrx = Matrix(DataFrame(CSV.File(filepath)))
return mtrx'
end

Using bias in PyTorch for basic function approximation

Using R, it is very easy to approximate basic functions through a neural network:
library(nnet)
x <- sort(10*runif(50))
y <- sin(x)
nn <- nnet(x, y, size=4, maxit=10000, linout=TRUE, abstol=1.0e-8, reltol = 1.0e-9, Wts = seq(0, 1, by=1/12) )
plot(x, y)
x1 <- seq(0, 10, by=0.1)
lines(x1, predict(nn, data.frame(x=x1)), col="green")
predict( nn , data.frame(x=pi/2) )
A simple neural network with one hidden layer of a mere 4 neurons is sufficient to approximate a sine. (As per stackoverflow question Approximating function with Neural Network.)
But I cannot obtain the same in PyTorch.
In fact, the neural network created by R contains not only an input, four hidden and an output, but also two "bias" neurons - the first connected towards the hidden layer, the second towards the output.
The plot above is obtained through the following:
library(devtools)
library(scales)
library(reshape)
source_url('https://gist.github.com/fawda123/7471137/raw/cd6e6a0b0bdb4e065c597e52165e5ac887f5fe95/nnet_plot_update.r')
plot.nnet(nn$wts,struct=nn$n, pos.col='#007700',neg.col='#FF7777') ### this plots the graph
plot.nnet(nn$wts,struct=nn$n, pos.col='#007700',neg.col='#FF7777', wts.only=1) ### this prints the weights
Attempting the same with PyTorch produces a different network: the bias neurons are missing.
Following is an attempt to do in PyTorch what was done previously in R. The results will not be satisfactory: the function is not approximated. The most evident difference is that absence of the bias neurons.
import torch
from torch.autograd import Variable
import random
import math
N, D_in, H, D_out = 1000, 1, 4, 1
l_x = []
l_y = []
for a in range(1000):
r = random.random()*10
l_x.append( [r] )
l_y.append( [math.sin(r)] )
tx = torch.cuda.FloatTensor(l_x)
ty = torch.cuda.FloatTensor(l_y)
x = Variable(tx, requires_grad=False)
y = Variable(ty, requires_grad=False)
w1 = Variable(torch.randn(D_in, H ).type(torch.cuda.FloatTensor), requires_grad=True)
w2 = Variable(torch.randn(H, D_out).type(torch.cuda.FloatTensor), requires_grad=True)
learning_rate = 1e-5
for t in range(1000):
y_pred = x.mm(w1).clamp(min=0).mm(w2)
loss = (y_pred - y).pow(2).sum()
if t<10 or t%100==1: print(t, loss.data[0])
loss.backward()
w1.data -= learning_rate * w1.grad.data
w2.data -= learning_rate * w2.grad.data
w1.grad.data.zero_()
w2.grad.data.zero_()
t = [ [math.pi] ]
print( str(t) +" -> "+ str( (Variable(torch.cuda.FloatTensor( t ))).mm(w1).clamp(min=0).mm(w2).data ) )
t = [ [math.pi/2] ]
print( str(t) +" -> "+ str( (Variable(torch.cuda.FloatTensor( t ))).mm(w1).clamp(min=0).mm(w2).data ) )
How to make the network approximate to the given function (sine in this case), through either inserting the "bias" neurons or other missing detail?
Moreover: I have difficulties in understanding why R inserts the "bias". I found information that the bias could be akin to the "Intercept in a Regression Model" - I still find it not clear. Any information would be appreciated.
EDIT: an excellent explanation turned out to be at stackoverflow question Role of Bias in Neural Networks
EDIT:
An example to obtain the result, though using the "fuller" framework ("not reinventing the wheel") is as follows:
import torch
from torch.autograd import Variable
import torch.nn.functional as F
import math
N, D_in, H, D_out = 1000, 1, 4, 1
l_x = []
l_y = []
for a in range(1000):
t = (a/1000.0)*10
l_x.append( [t] )
l_y.append( [math.sin(t)] )
x = Variable( torch.FloatTensor(l_x) )
y = Variable( torch.FloatTensor(l_y) )
class Net(torch.nn.Module):
def __init__(self, n_feature, n_hidden, n_output):
super(Net, self).__init__()
self.to_hidden = torch.nn.Linear(n_feature, n_hidden)
self.to_output = torch.nn.Linear(n_hidden, n_output)
def forward(self, x):
x = self.to_hidden(x)
x = F.tanh(x) # activation function
x = self.to_output(x)
return x
net = Net(n_feature = D_in, n_hidden = H, n_output = D_out)
learning_rate = 0.01
optimizer = torch.optim.Adam( net.parameters() , lr=learning_rate )
for t in range(1000):
y_pred = net(x)
loss = (y_pred - y).pow(2).sum()
if t<10 or t%100==1: print(t, loss.data[0])
loss.backward()
optimizer.step()
optimizer.zero_grad()
t = [ [math.pi] ]
print( str(t) +" -> "+ str( net( Variable(torch.FloatTensor( t )) ) ) )
t = [ [math.pi/2] ]
print( str(t) +" -> "+ str( net( Variable(torch.FloatTensor( t )) ) ) )
Unfortunately, while this code works properly, it does not solve the matter of making the original, more "low level" code work as expected (e.g. introducing the bias).
Following up on #jdhao's comment - this is a super-simple PyTorch model that computes exactly what you want:
class LinearWithInputBias(nn.Linear):
def __init__(self, in_features, out_features, out_bias=True, in_bias=True):
nn.Linear.__init__(self, in_features, out_features, out_bias)
if in_bias:
in_bias = torch.zeros(1, out_features)
# in_bias.normal_() # if you want it to be randomly initialized
self._out_bias = nn.Parameter(in_bias)
def forward(self, x):
out = nn.Linear.forward(self, x)
try:
out = out + self._out_bias
except AttributeError:
pass
return out
However, there's an additional bug in your code: from what I can see, you don't train it - i.e. you do not call an optimizer (like torch.optim.SGD(mod.parameters()) before you delete the gradient information by calling grad.data.zero_().

Tensorflow: 6 layer CNN: OOM (use 10Gb GPU memory)

I am using the following code for running a 6 layer CNN with 2 FC layers on top (on Tesla K-80 GPU).
Somehow, it consumes entire memory 10GB and died out of memory.I know that i can reduce the batch_size and then run , but i also want to run with 15 or 20 CNN layers.Whats wrong with the following code and why it takes all the memory? How should i run the code for 15 layers CNN.
Code:
import model
with tf.Graph().as_default() as g_train:
filenames = tf.train.match_filenames_once(FLAGS.train_dir+'*.tfrecords')
filename_queue = tf.train.string_input_producer(filenames, shuffle=True, num_epochs=FLAGS.num_epochs)
feats,labels = get_batch_input(filename_queue, batch_size=FLAGS.batch_size)
### feats size=(batch_size, 100, 50)
logits = model.inference(feats, FLAGS.batch_size)
loss = model.loss(logits, labels, feats)
tvars = tf.trainable_variables()
global_step = tf.Variable(0, name='global_step', trainable=False)
# Add to the Graph operations that train the model.
train_op = model.training(loss, tvars, global_step, FLAGS.learning_rate, FLAGS.clip_gradients)
# Add the Op to compare the logits to the labels during evaluation.
eval_correct = model.evaluation(logits, labels, feats)
summary_op = tf.merge_all_summaries()
saver = tf.train.Saver(tf.all_variables(), max_to_keep=15)
# The op for initializing the variables.
init_op = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init_op)
summary_writer = tf.train.SummaryWriter(FLAGS.model_dir,
graph=sess.graph)
# Start input enqueue threads.
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
try:
step = 0
while not coord.should_stop():
_, loss_value = sess.run([train_op, loss])
if step % 100 == 0:
print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value))
# Update the events file.
summary_str = sess.run(summary_op)
summary_writer.add_summary(summary_str, step)
if (step == 0) or (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps:
ckpt_model = os.path.join(FLAGS.model_dir, 'model.ckpt')
saver.save(sess, ckpt_model, global_step=step)
#saver.save(sess, FLAGS.model_dir, global_step=step)
step += 1
except tf.errors.OutOfRangeError:
print('Done training for %d epochs, %d steps.' % (FLAGS.num_epochs, step))
finally:
coord.join(threads)
sess.close()
###################### File model.py ####################
def conv2d(x, W, b, strides=1):
# Conv2D wrapper, with bias and relu activation
x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1],
padding='SAME')
x = tf.nn.bias_add(x, b)
return tf.nn.relu(x)
def maxpool2d(x, k=2,s=2):
# MaxPool2D wrapper
return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, s,
s,1],padding='SAME')
def inference(feats,batch_size):
#feats size (batch_size,100,50,1) #batch_size=256
conv1_w=tf.get_variable("conv1_w", [filter_size,filter_size,1,256],initializer=tf.uniform_unit_scaling_initializer())
conv1_b=tf.get_variable("conv1_b",[256])
conv1 = conv2d(feats, conv1_w, conv1_b,2)
conv1 = maxpool2d(conv1, k=2,s=2)
### This was replicated for 6 layers and the 2 FC connected layers are added
return logits
def training(loss, train_vars, global_step, learning_rate, clip_gradients):
# Add a scalar summary for the snapshot loss.
tf.scalar_summary(loss.op.name, loss)
grads, _ = tf.clip_by_global_norm(tf.gradients(loss, train_vars,aggregation_method=1), clip_gradients)
optimizer = tf.train.AdamOptimizer(learning_rate)
train_op = optimizer.apply_gradients(zip(grads, train_vars), global_step=global_step)
return train_op
I am not too sure what the model python library is. If it is something you wrote and can change the setting in the optimizer I would suggest the following which I use in my own code
train_step = tf.train.AdamOptimizer(learning_rate).minimize(cost, aggregation_method = tf.AggregationMethod.EXPERIMENTAL_ACCUMULATE_N)
By default the aggeragetion_method is ADD_N but if you change it to EXPERIMENTAL_ACCUMULATE_N or EXPERIMENTAL_TREE this will greatly save memory. The main memory hog in these programs is that tensorflow must save the output values at every neuron so that it can compute the gradients. Changing the aggregation_method helps a lot from my experience.
Also BTW I don't think there is anything wrong with your code. I can run out of memory on small cov-nets as well.

Resources