Why does my model predict the same label? - graph

I am training a graph convolution neural network to classify EEG signals into emotion classes. The input of my data is an array of size [12803216]-->[number of subjects * numbers of channels (nodes) * features of each node]. The output should be class 0(Negative) or class 1(Positive).The data is slightly imbalanced (45% class 0 and 55% class 1). The problem is that my model always predict label 0 as output for all inputs in the training stage regardless of the convolution function.
What is wrong with my code and how can I fix it? Any comments are welcome.
connectivity at the below code is predefined based at the connections of the 32 electrodes(nodes)
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, SAGEConv, ResGatedGraphConv, global_mean_pool, BatchNorm
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, f1_score, accuracy_score
labels = np.load("/content/drive/MyDrive/ValenceLabels_thres_5.npy")
labels = np.array(labels, dtype='int64')
labels.shape
class EEGraph(nn.Module):
def __init__(self, embedding_dim, first_conv, n_layers, conv_layer):
super(EEGraph, self).__init__()
self.n_layers = n_layers
self.convs = []
self.bns = []
d_in = embedding_dim
d_out = first_conv
for i in range(n_layers):
self.convs.append(conv_layer(d_in, d_out))
self.bns.append(BatchNorm(d_out, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True))
if i < n_layers - 1:
d_in, d_out = d_out, 2*d_out
self.convs = torch.nn.ModuleList(self.convs)
self.bns = torch.nn.ModuleList(self.bns)
self.project = nn.Linear(d_out, 3) # d_in beacu
self.project.apply(lambda x: nn.init.xavier_normal_(x.weight, gain=1) if type(x) == nn.Linear else None)
def forward(self, x, edge_index):
for i, (conv, bn) in enumerate(zip(self.convs, self.bns)):
x = conv(x, edge_index).permute(0, 2, 1)
x = bn(x)
x = F.dropout(F.leaky_relu(x, negative_slope=0.01), p=0.5, training=self.training).permute(0, 2, 1)
out = x.mean(dim=1).squeeze(dim=-1)
out = self.project(out)
return F.softmax(out, dim=-1)
device = torch.device("cuda")
connectivity = [[channel_order.index(e[0]), channel_order.index(e[1])] for e in edges]
connectivity = torch.tensor(connectivity).t().contiguous().to(device)
best_f1_score = -1
best_trial_name = None
n_epochs = 500
lr = 1e-3
weight_decay = 1e-5
batch_size = 63
criterion = nn.CrossEntropyLoss()
for node_dim in [16]:
node_features = np.load(f"/content/deap_graph_valence{node_dim}_1.npy")
A, Xte, yA, yte = train_test_split(node_features, labels, test_size=0.2, shuffle=True, stratify=labels, random_state=0)
Xtr, Xtr_valid, ytr, ytr_valid = train_test_split(A, yA, test_size=0.2, shuffle=True, stratify=yA, random_state=0)
Xtr = torch.tensor(Xtr).float().to(device)
Xtr_valid = torch.tensor(Xtr_valid).float().to(device)
Xte = torch.tensor(Xte).float().to(device)
ytr = torch.tensor(ytr).to(device)
#ytr_valid = torch.tensor(ytr_valid).to(device)
#yte = torch.tensor(yte).to(device)
for conv_fn in [GCNConv, SAGEConv, ResGatedGraphConv]:
for n_layers in range(1, 4):
for conv_dim in [32, 64, 128,256]:
trial_name = f"node_dim_{node_dim}-conv_fn_{conv_fn.__name__}-conv_layers_{n_layers}-conv_dim_{conv_dim}"
print(f"#: {trial_name}")
model = EEGraph(embedding_dim=Xtr.shape[-1],
first_conv=conv_dim,
n_layers=n_layers,
conv_layer=conv_fn).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
for epoch in range(n_epochs):
model.train()
indices = torch.randperm(len(Xtr))
for j, batch in enumerate(indices.view(-1, 63)):
optimizer.zero_grad()
batch_input = Xtr[batch]
outputs = model(batch_input, connectivity)
loss = criterion(outputs, ytr[batch])
loss.backward()
optimizer.step()
with torch.no_grad():
model.eval()
outputs = model(Xtr_valid, connectivity)
output_classes = torch.argmax(outputs, dim=-1).cpu().numpy()
f1 = f1_score(ytr_valid, output_classes, average="macro")
if f1 > best_f1_score:
best_trial_name = trial_name
best_f1_score = f1
print("-"*100)
print(f"Best model so far: {best_trial_name}")
print(f"Best F1 Score: %{100*best_f1_score:.2f}")
test_outputs = model(Xte, connectivity)
test_output_classes = torch.argmax(test_outputs, dim=-1).cpu().numpy()
print(classification_report(yte, test_output_classes, target_names=["Negative", "Positive"]))
print("-"*100)
print()

Related

TypeError: Caught TypeError in DataLoader worker process 0. TypeError: 'KeyError' object is not iterable

from torchvision_starter.engine import train_one_epoch, evaluate
from torchvision_starter import utils
import multiprocessing
import time
n_cpu = multiprocessing.cpu_count()
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
_ = model.to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
step_size=3,
gamma=0.2,
verbose=True
)
# Let's train for 10 epochs
num_epochs = 1
start = time.time()
for epoch in range(10, 10 + num_epochs):
# train for one epoch, printing every 10 iterations
train_one_epoch(model, optimizer, data_loaders['train'], device, epoch, print_freq=10)
# update the learning rate
lr_scheduler.step()
# evaluate on the validation dataset
evaluate(model, data_loaders['valid'], device=device)
stop = time.time()
print(f"\n\n{num_epochs} epochs in {stop - start} s ({(stop-start) / 3600:.2f} hrs)")
Before I move on to this part, everything is OK. But after I run the part, the error is like below:
I have tried to add drop_last to the helper.py's function like:
data_loaders["train"] = torch.utils.data.DataLoader(
train_data,
batch_size=batch_size,
sampler=train_sampler,
num_workers=num_workers,
collate_fn=utils.collate_fn,
drop_last=True
)
But it doesn't work. By the way, the torch and torchvision are compatible and Cuda is available.
I wonder how to fix it.
The get_data_loaders function:
def get_data_loaders(
folder, batch_size: int = 2, valid_size: float = 0.2, num_workers: int = -1, limit: int = -1, thinning: int = None
):
"""
Create and returns the train_one_epoch, validation and test data loaders.
:param foder: folder containing the dataset
:param batch_size: size of the mini-batches
:param valid_size: fraction of the dataset to use for validation. For example 0.2
means that 20% of the dataset will be used for validation
:param num_workers: number of workers to use in the data loaders. Use -1 to mean
"use all my cores"
:param limit: maximum number of data points to consider
:param thinning: take every n-th frame, instead of all frames
:return a dictionary with 3 keys: 'train_one_epoch', 'valid' and 'test' containing respectively the
train_one_epoch, validation and test data loaders
"""
if num_workers == -1:
# Use all cores
num_workers = multiprocessing.cpu_count()
# We will fill this up later
data_loaders = {"train": None, "valid": None, "test": None}
# create 3 sets of data transforms: one for the training dataset,
# containing data augmentation, one for the validation dataset
# (without data augmentation) and one for the test set (again
# without augmentation)
data_transforms = {
"train": get_transform(UdacitySelfDrivingDataset.mean, UdacitySelfDrivingDataset.std, train=True),
"valid": get_transform(UdacitySelfDrivingDataset.mean, UdacitySelfDrivingDataset.std, train=False),
"test": get_transform(UdacitySelfDrivingDataset.mean, UdacitySelfDrivingDataset.std, train=False),
}
# Create train and validation datasets
train_data = UdacitySelfDrivingDataset(
folder,
transform=data_transforms["train"],
train=True,
thinning=thinning
)
# The validation dataset is a split from the train_one_epoch dataset, so we read
# from the same folder, but we apply the transforms for validation
valid_data = UdacitySelfDrivingDataset(
folder,
transform=data_transforms["valid"],
train=True,
thinning=thinning
)
# obtain training indices that will be used for validation
n_tot = len(train_data)
indices = torch.randperm(n_tot)
# If requested, limit the number of data points to consider
if limit > 0:
indices = indices[:limit]
n_tot = limit
split = int(math.ceil(valid_size * n_tot))
train_idx, valid_idx = indices[split:], indices[:split]
# define samplers for obtaining training and validation batches
train_sampler = torch.utils.data.SubsetRandomSampler(train_idx)
valid_sampler = torch.utils.data.SubsetRandomSampler(valid_idx) # =
# prepare data loaders
data_loaders["train"] = torch.utils.data.DataLoader(
train_data,
batch_size=batch_size,
sampler=train_sampler,
num_workers=num_workers,
collate_fn=utils.collate_fn,
drop_last=True
)
data_loaders["valid"] = torch.utils.data.DataLoader(
valid_data, # -
batch_size=batch_size, # -
sampler=valid_sampler, # -
num_workers=num_workers, # -
collate_fn=utils.collate_fn,
drop_last=True
)
# Now create the test data loader
test_data = UdacitySelfDrivingDataset(
folder,
transform=data_transforms["test"],
train=False,
thinning=thinning
)
if limit > 0:
indices = torch.arange(limit)
test_sampler = torch.utils.data.SubsetRandomSampler(indices)
else:
test_sampler = None
data_loaders["test"] = torch.utils.data.DataLoader(
test_data,
batch_size=batch_size,
shuffle=False,
num_workers=num_workers,
sampler=test_sampler,
collate_fn=utils.collate_fn,
drop_last=True
# -
)
return data_loaders
class UdacitySelfDrivingDataset(torch.utils.data.Dataset):
# Mean and std of the dataset to be used in nn.Normalize
mean = torch.tensor([0.3680, 0.3788, 0.3892])
std = torch.tensor([0.2902, 0.3069, 0.3242])
def __init__(self, root, transform, train=True, thinning=None):
super().__init__()
self.root = os.path.abspath(os.path.expandvars(os.path.expanduser(root)))
self.transform = transform
# load datasets
if train:
self.df = pd.read_csv(os.path.join(self.root, "labels_train.csv"))
else:
self.df = pd.read_csv(os.path.join(self.root, "labels_test.csv"))
# Index by file id (i.e., a sequence of the same length as the number of images)
codes, uniques = pd.factorize(self.df['frame'])
if thinning:
# Take every n-th rows. This makes sense because the images are
# frames of videos from the car, so we are essentially reducing
# the frame rate
thinned = uniques[::thinning]
idx = self.df['frame'].isin(thinned)
print(f"Keeping {thinned.shape[0]} of {uniques.shape[0]} images")
print(f"Keeping {idx.sum()} objects out of {self.df.shape[0]}")
self.df = self.df[idx].reset_index(drop=True)
# Recompute codes
codes, uniques = pd.factorize(self.df['frame'])
self.n_images = len(uniques)
self.df['image_id'] = codes
self.df.set_index("image_id", inplace=True)
self.classes = ['car', 'truck', 'pedestrian', 'bicyclist', 'light']
self.colors = ['cyan', 'blue', 'red', 'purple', 'orange']
#property
def n_classes(self):
return len(self.classes)
def __getitem__(self, idx):
if idx in self.df.index:
row = self.df.loc[[idx]]
else:
return KeyError(f"Element {idx} not in dataframe")
# load images fromm file
img_path = os.path.join(self.root, "images", row['frame'].iloc[0])
img = Image.open(img_path).convert("RGB")
# Exclude bogus boxes with 0 height or width
h = row['ymax'] - row['ymin']
w = row['xmax'] - row['xmin']
filter_idx = (h > 0) & (w > 0)
row = row[filter_idx]
# get bounding box coordinates for each mask
boxes = row[['xmin', 'ymin', 'xmax', 'ymax']].values
# convert everything into a torch.Tensor
boxes = torch.as_tensor(boxes, dtype=torch.float32)
# get the labels
labels = torch.as_tensor(row['class_id'].values, dtype=int)
image_id = torch.tensor([idx])
area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
# assume no crowd for everything
iscrowd = torch.zeros((row.shape[0],), dtype=torch.int64)
target = {}
target["boxes"] = boxes
target["labels"] = labels
target["image_id"] = image_id
target["area"] = area
target["iscrowd"] = iscrowd
if self.transform is not None:
img, target = self.transform(img, target)
return img, target
def __len__(self):
return self.n_images
def plot(self, idx, renormalize=True, predictions=None, threshold=0.5, ax=None):
image, label_js = self[idx]
if renormalize:
# Invert the T.Normalize transform
unnormalize = T.Compose(
[
T.Normalize(mean = [ 0., 0., 0. ], std = 1 / type(self).std),
T.Normalize(mean = -type(self).mean, std = [ 1., 1., 1. ])
]
)
image, label_js = unnormalize(image, label_js)
if ax is None:
fig, ax = plt.subplots(figsize=(8, 8))
_ = ax.imshow(torch.permute(image, [1, 2, 0]))
for i, box in enumerate(label_js['boxes']):
xy = (box[0], box[1])
h, w = (box[2] - box[0]), (box[3] - box[1])
r = patches.Rectangle(xy, h, w, fill=False, color=self.colors[label_js['labels'][i]-1], lw=2, alpha=0.5)
ax.add_patch(r)
if predictions is not None:
# Make sure the predictions are on the CPU
for k in predictions:
predictions[k] = predictions[k].detach().cpu().numpy()
for i, box in enumerate(predictions['boxes']):
if predictions['scores'][i] > threshold:
xy = (box[0], box[1])
h, w = (box[2] - box[0]), (box[3] - box[1])
r = patches.Rectangle(xy, h, w, fill=False, color=self.colors[predictions['labels'][i]-1], lw=2, linestyle=':')
ax.add_patch(r)
_ = ax.axis("off")
return ax

ValueError: shapes (2,1000) and (2,2,1000) not aligned: 1000 (dim 1) != 2 (dim 1)

I'm implementing a MLP to test a simple NN architecture, hoping to scale up to a bigger network with a larger dataset. My end goal is making a working phone recognizer for TIMIT data, as part of my internship.
To build the MLP, I used the suggestions of this video: https://www.youtube.com/watch?v=Z97XGNUUx9o.
And the proposal of my teacher to use the following inputs:
X = np.random.rand(5,1000)
y = X[4:5,:]
The error message is the following:
ValueError Traceback (most recent call last)
Cell In [63], line 7
5 build_model()
6 mlp = MLP(1000, [1000], 1000)
----> 7 mlp.train(inputs,targets, 50, 0.1)
8 output = mlp.forward_propagate(input)
Cell In [62], line 117, in MLP.train(self, inputs, targets, epochs, learning_rate)
115 output = self.forward_propagate(input)
116 error = target - output
--> 117 self.back_propagate(error)
118 self.gradient_descent(learning_rate=1)
119 sum_error += self._mse(target,output)
Cell In [62], line 96, in MLP.back_propagate(self, error)
94 current_activations = self.activations[i]
95 current_activations_reshaped = current_activations.reshape(current_activations.shape[0], -1)
---> 96 self.derivatives[i] = np.dot(current_activations, delta)
97 error = np.dot(delta, self.weights[i].T)
98 return error
File <__array_function__ internals>:180, in dot(*args, **kwargs)
ValueError: shapes (2,1000) and (2,2,1000) not aligned: 1000 (dim 1) != 2 (dim 1)
This is the relevant code:
class MLP(object):
def __init__(self, num_inputs=3, hidden_layers=[3,3], num_outputs=2):
self.num_inputs = num_inputs
self.hidden_layers = hidden_layers
self.num_outputs = num_outputs
layers = [num_inputs] + hidden_layers + [num_outputs]
weights = []
for i in range(len(layers) - 1):
w = np.random.rand(layers[i], layers[i + 1])
weights.append(w)
self.weights = weights
activations = []
for i in range(len(layers)):
a = np.zeros(layers[i])
activations.append(a)
self.activations = activations
derivatives = []
for i in range(len(layers) - 1):
d = np.zeros((layers[i], layers[i+1]))
derivatives.append(d)
self.derivatives = derivatives
def forward_propagate(self,inputs):
activations = inputs
self.activations[0] = inputs
for i in range(len(self.weights)):
net_inputs = np.dot(activations,self.weights)
activations = self._sigmoid(net_inputs)
self.activations[i+1] = activations
return activations
def back_propagate(self, error):
for i in reversed(range(len(self.derivatives))):
activations = self.activations[i+1]
delta = error * self._sigmoid_derivative(activations)
delta_reshaped = delta.reshape(delta.shape[0], -1).T
current_activations = self.activations[i]
current_activations_reshaped = current_activations.reshape(current_activations.shape[0], -1)
self.derivatives[i] = np.dot(current_activations, delta)
error = np.dot(delta, self.weights[i].T)
return error
def _sigmoid_derivative(self,x):
return x * (1.0 - x)
def _sigmoid(self,x):
y = 1.0 / (1+np.exp(-x))
return y
def gradient_descent(self, learning_rate):
for i in range(len(self.weights)):
weights = self.weights[i]
derivatives = self.derivatives[i]
weights += derivatives + learning_rate
def _mse(self,target,output):
return np.average((target-output)**2)
def train(self,inputs,targets,epochs,learning_rate):
for i in range(epochs):
sum_error = 0
for input,target in zip(inputs,targets):
output = self.forward_propagate(input)
error = target - output
self.back_propagate(error)
self.gradient_descent(learning_rate=1)
sum_error += self._mse(target,output)
print("Error: {} at epoch {}".format(sum_error/len(inputs), i))
And this is how I ran it:
if __name__ == "__main__":
X, y = load_dataset()
inputs = X
targets = y
build_model()
mlp = MLP(1000, [1000], 1000)
mlp.train(inputs,targets, 50, 0.1)
output = mlp.forward_propagate(input)
Thanks in advance!
I tried to do what the video said, to set up an MLP, as was the suggestion of the teacher, but I don't know how to solve the shape error.

Optimization failing after very few iterations for nonlinear constraints calculated in a blackbox wrapped in an explicit component

I have a blackbox solver which is wrapped as explicit component and the objective function and constraints are calculated in the blackbox solver and output. These are taken to a constraint components that has an equality constraint defined such that at any iteration, these constraints are satisifed. I am using finite difference to approximate the partial derivatives. However, I get this SLSQP error "Positive directional derivative for linesearch". From S.O., I understand that this error translates - optimizer could not find a direction to move to and also couldn't verify if the results are minimum. I found that for some iterations derivative is 'None' and it was 'None' at least a few times before it threw this error. Is it because the constraints are calculated in the black box solver? or is it because 'fd' for approximation is not working for non linear constraints? or both? A problem summary is attached for reference.
from PowerHarvest import *
from HydroDynamics import *
from SatelliteComms import *
from Propulsion import *
from Constraints import *
from SystemCost import *
class MDA(Group):
"""Multidisciplinary Analysis Group"""
def __init__(self, derivative_method='fd', **kwargs):
super(MDA, self).__init__(**kwargs)
self.derivative_method = 'fd'
def setup(self):
cycle = self.add_subsystem('cycle',Group(), promotes = ["*"])
cycle.nonlinear_solver = om.NewtonSolver(solve_subsystems = True)
cycle.nonlinear_solver.options['atol'] = 1e-6
cycle.add_subsystem('Hydro', Hydro(),promotes = ["*"]) #This is a blackbox explicit component!
cycle.add_subsystem('Propulsion_system', Propulsion(),promotes = ["*"])
cycle.add_subsystem('PowerHarvest_system',PowerHarvest(),promotes = ["*"])
cycle.add_subsystem("SatelitteComs_system", SatelitteComs(),promotes = ["*"])
cycle.nonlinear_solver.options['atol'] = 1.0e-5
cycle.nonlinear_solver.options['maxiter'] = 500
cycle.nonlinear_solver.options['iprint'] = 2
#Add constraint on the each subsytem if possible
#cycle.add_constraint('',om.ex)
self.add_subsystem('PowerConstraints_system', PowerConstraints(), promotes=["*"])
self.add_subsystem('BodyConstraints_system', BodyConstraint(),promotes = ["*"])
self.add_subsystem('SystemCost_system',SystemCost(), promotes = ['*'])
self.add_constraint('A_PV', upper = 100, units = 'm**2')
#these constraints are output of the blackbox solver!
self.add_constraint('AreaCon', upper = 0)
self.add_constraint('massCon',equals = 0)
self.add_constraint('P_Load', upper = 0) # Solar generates just enough for everything no storing!
self.add_constraint('DraughtCon', lower = 0.5 )
self.add_constraint('GMCon', lower = 0.01) #should be positive
#self.add_constraint('theta', upper = 0.14, lower = 0.1)
self.add_constraint('Amplitude_Con',upper = -0.1) #amplitude differenc
Added. Run script
import openmdao.api as om
from geom_utils import *
from openmdao.api import Problem, Group, ExplicitComponent,ImplicitComponent, IndepVarComp, ExecComp,\
NonlinearBlockGS, ScipyOptimizeDriver,NewtonSolver,DirectSolver,ScipyKrylov
import os
import numpy as np
from types import FunctionType
from geom_utils import *
from capytaine.meshes.meshes import Mesh
from pprint import pprint
from PowerHarvest import *
from HydroDynamics import *
from SatelliteComms import *
from Propulsion import *
from Constraints import *
from SystemCost import *
from PEARLMDA import *
if __name__ == '__main__':
prob = Problem()
model = prob.model = MDA()
prob.driver = ScipyOptimizeDriver(optimizer = 'SLSQP')
# prob.model.nonlinear_solver = om.NonlinearBlockGS()
#prob.driver.options['optimizer'] = 'COBYLA'
prob.driver.options['tol'] = 1e-5
prob.model.add_design_var('Df', lower= 6.0, upper=20.0, units = "m")
prob.model.add_design_var('tf', lower=1.0, upper=4.0, units = "m")
#prob.model.add_design_var('submergence', upper = -0.9)
prob.model.add_design_var('Vs', lower=1, upper=2, units = "m/s") #make sure the lower, upper are according to their units.
prob.model.add_design_var('ld', lower = 3, upper = 7, units = 'm' )
prob.model.add_objective('cost_per_byte' )
newton = om.NewtonSolver(solve_subsystems=True)
newton.linesearch = om.BoundsEnforceLS()
prob.model.nonlinear_solver = newton
prob.model.linear_solver = om.DirectSolver()
# sqlite file to record the intermediate calculations and derivatives
r = om.SqliteRecorder("pearl_computations.sql")
prob.add_recorder(r)
prob.driver.add_recorder(r)
prob.driver.recording_options["record_derivatives"] = True
# Attach recorder to a subsystem
model.nonlinear_solver.add_recorder(r)
model.add_recorder(r)
prob.driver.recording_options["includes"] = ["*"]
# Attach recorder to a solver
model.nonlinear_solver.add_recorder(r)
prob.setup()
prob.set_solver_print(level=2)
# For gradients across the model this will do the finite difference method
prob.model.approx_totals(method="fd", step=0.1, form="forward", step_calc="abs")
prob.run_model()
prob.run_driver()
prob.record("final_state")
print('minimum objective found at')
print(prob['cost_per_byte'][0])
print(prob['A_PV'])
print(f"tf: {prob['tf'][0]}")
results = dict()
results['tf'] = prob['tf'][0]
results['Df'] = prob['Df'][0]
results['ld'] = prob['ld'][0]
results['mass'] = prob['Payloadmass'][0]
results['DraughCon'] = prob['DraughtCon'][0]
results['AmplitudeCon'] = prob['AmplitudeCon'][0]
print(results)
Scaling report

Error message when running the codes in Jupyter notebook

I am trying to test out the accuracy of the images without using image augmentation. When I run both of the codes, I got an error shown below:
TypeError: 'NoneType' object is not callable
I found that the error occurs in the second code. I would like to know the reason on the cause of this error message, and how to resolve it. Attached below are my codes, which have to be run simultaneously. I am using Jupyter notebook for that. Thanks!
Code 1:
import torch
from torch import nn
from torch.autograd import Variable
import torch.nn.functional as F
import math
class CrossEntropyLabelSmooth(nn.Module):
"""Cross entropy loss with label smoothing regularizer.
Reference:
Szegedy et al. Rethinking the Inception Architecture for Computer Vision. CVPR 2016.
Equation: y = (1 - epsilon) * y + epsilon / K.
Args:
num_classes (int): number of classes.
epsilon (float): weight.
"""
def __init__(self, num_classes, epsilon=0.1, device='cpu'):
super(CrossEntropyLabelSmooth, self).__init__()
self.num_classes = num_classes
self.epsilon = epsilon
self.device = device
self.logsoftmax = nn.LogSoftmax(dim=1)
def forward(self, inputs, targets):
"""
Args:
inputs: prediction matrix (before softmax) with shape (batch_size, num_classes)
targets: ground truth labels with shape (num_classes)
"""
log_probs = self.logsoftmax(inputs)
# targets = torch.zeros(log_probs.size()).scatter_(1, targets.unsqueeze(1).data, 1)# for mldg da
targets = torch.zeros(log_probs.size()).scatter_(1, targets.unsqueeze(1).data.cpu(), 1)#for zzd
targets = targets.to(self.device)
targets = (1 - self.epsilon) * targets + self.epsilon / self.num_classes
loss = (-Variable(targets) * log_probs).mean(0).sum()
return loss
class TripletLoss(nn.Module):
"""Triplet loss with hard positive/negative mining.
Reference:
Hermans et al. In Defense of the Triplet Loss for Person Re-Identification. arXiv:1703.07737.
Code imported from https://github.com/Cysu/open-reid/blob/master/reid/loss/triplet.py.
Args:
margin (float): margin for triplet.
"""
def __init__(self, margin=0.3):
super(TripletLoss, self).__init__()
self.margin = margin
self.ranking_loss = nn.MarginRankingLoss(margin=margin)
def forward(self, inputs, targets):
"""
Args:
inputs: feature matrix with shape (batch_size, feat_dim)
targets: ground truth labels with shape (num_classes)
"""
n = inputs.size(0)
# Compute pairwise distance, replace by the official when merged
dist = torch.pow(inputs, 2).sum(dim=1, keepdim=True).expand(n, n)
dist = dist + dist.t()
dist.addmm_(1, -2, inputs, inputs.t())
dist = dist.clamp(min=1e-12).sqrt() # for numerical stability
# For each anchor, find the hardest positive and negative
mask = targets.expand(n, n).eq(targets.expand(n, n).t())
dist_ap, dist_an = [], []
for i in range(n):
dist_ap.append(dist[i][mask[i]].max().unsqueeze(0))
dist_an.append(dist[i][mask[i] == 0].min().unsqueeze(0))
dist_ap = torch.cat(dist_ap)
dist_an = torch.cat(dist_an)
# Compute ranking hinge loss
y = torch.ones_like(dist_an)
loss = self.ranking_loss(dist_an, dist_ap, y)
return loss
class CenterLoss(nn.Module):
"""Center loss.
Reference:
Wen et al. A Discriminative Feature Learning Approach for Deep Face Recognition. ECCV 2016.
Args:
num_classes (int): number of classes.
feat_dim (int): feature dimension.
"""
def __init__(self, num_classes=10, feat_dim=2048, device='cpu'):
super(CenterLoss, self).__init__()
self.num_classes = num_classes
self.feat_dim = feat_dim
self.device = device
self.centers = nn.Parameter(torch.randn(self.num_classes, self.feat_dim)).to(self.device)
def forward(self, x, labels):
"""
Args:
x: feature matrix with shape (batch_size, feat_dim).
labels: ground truth labels with shape (num_classes).
"""
batch_size = x.size(0)
distmat = torch.pow(x, 2).sum(dim=1, keepdim=True).expand(batch_size, self.num_classes) + \
torch.pow(self.centers, 2).sum(dim=1, keepdim=True).expand(self.num_classes, batch_size).t()
distmat.addmm_(1, -2, x, self.centers.t())
classes = torch.arange(self.num_classes).long()
classes = classes.to(self.device)
labels = labels.unsqueeze(1).expand(batch_size, self.num_classes)
mask = labels.data.eq(classes.expand(batch_size, self.num_classes))
dist = []
for i in range(batch_size):
value = distmat[i][mask[i]]
value = value.clamp(min=1e-12, max=1e+12) # for numerical stability
dist.append(value)
dist = torch.cat(dist)
loss = dist.mean()
return loss
Code 2:
# Code without data augmentation
import torch
import torch.nn as nn
from torchvision.datasets import ImageFolder
from torchvision import transforms
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader
import os
import numpy as np
from tqdm import tqdm
from PIL import Image
class FoodDataset(Dataset):
def __init__(self, file, transform=None, mode='train'):
self.transforms = transform
self.mode = mode
with open(file, 'r') as f:
self.image_list = f.readlines()
def __len__(self):
return len(self.image_list)
def __getitem__(self, index):
label = None
if self.mode == 'train':
image, label = self.image_list[index].split('\n')[0].split('\t')
label = int(label)
else:
image = self.image_list[index].split('\n')[0]
image = Image.open(image).convert('RGB')
image = self.transforms(image)
if self.mode == 'train':
return image, label
else:
return image
#transforms_train = transforms.Compose([
# transforms.Resize((224, 224)),
# transforms.RandomHorizontalFlip(p=0.5),
# transforms.RandomVerticalFlip(p=0.5),
# transforms.Pad(10, 10),
# transforms.RandomRotation(45),
# transforms.RandomCrop((224, 224)),
# transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5),
# transforms.ToTensor(),
# transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
# ])
#transforms_test = transforms.Compose([
# transforms.Resize((224, 224)),
# transforms.ToTensor(),
# transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
# ])
def evaluate(prediction, ground_truth):
num_correct = (np.array(prediction) == np.array(ground_truth)).sum()
return num_correct / len(prediction)
train_ds = FoodDataset('data/train.txt')
val_ds = FoodDataset('data/val.txt')
test_ds = FoodDataset('data/test.txt')
train_dl = DataLoader(train_ds, batch_size=32, shuffle=True)
val_dl = DataLoader(val_ds, batch_size=32, shuffle=True)
test_dl = DataLoader(test_ds, batch_size=32, shuffle=True)
num_classes = 5
train_model = models.resnet50(pretrained=True)
train_model.fc = nn.Linear(2048, num_classes)
output_dir = 'checkpoint'
if output_dir and not os.path.exists(output_dir):
os.makedirs(output_dir)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
ce_loss = CrossEntropyLabelSmooth(num_classes = num_classes, device = device)
optimizer = torch.optim.Adam(train_model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)
for param in train_model.parameters():
param.requires_grad = False
for param in train_model.fc.parameters():
param.requires_grad = True
for i in range(5):
train_model.train()
train_model.to(device)
for img, label in tqdm(train_dl):
img = img.to(device)
label = label.to(device)
optimizer.zero_grad()
output= train_model(img)
loss = ce_loss(output, label)
loss.backward()
optimizer.step()
for param in train_model.parameters():
param.requires_grad = True
epoch = 100
highest_acc = {'epoch': 0, 'accuracy': 0}
for ep in range(epoch):
train_model.train()
train_model.to(device)
count = 0
running_loss = 0.0
validation_loss = 0.0
output_list = []
ground_truth_list = []
for img, label in tqdm(train_dl):
img = img.to(device)
label = label.to(device)
optimizer.zero_grad()
output= train_model(img)
loss = ce_loss(output, label)
count += 1
prediction = torch.argmax(output, dim=1)
output_list.extend(prediction.detach().cpu())
ground_truth_list.extend(label.cpu())
running_loss += loss.item()
loss.backward()
optimizer.step()
scheduler.step()
if ep % 10 == 0:
torch.save(train_model.state_dict(), output_dir + '/resnet50_' + str(ep) + '.pth')
accuracy = evaluate(output_list, ground_truth_list)
print(f'Epoch[{ep}] training accuracy: {accuracy} '
f'training loss: {running_loss / count:.3e} Base Lr: {optimizer.param_groups[0]["lr"]:.5e}')
if ep % 10 == 0:
train_model.eval()
count = 0
output_list = []
ground_truth_list = []
for img, label in tqdm(val_dl):
with torch.no_grad():
img = img.to(device)
lbl = label.to(device)
output= train_model(img)
val_loss = ce_loss(output, lbl)
validation_loss += val_loss.item()
count += 1
prediction = torch.argmax(output, dim=1)
output_list.extend(prediction.detach().cpu())
ground_truth_list.extend(label)
accuracy = evaluate(output_list, ground_truth_list)
if accuracy > highest_acc['accuracy']:
highest_acc['accuracy'] = accuracy
highest_acc['epoch'] = ep
print(f'Accuracy: {accuracy} Epoch:{ep}')
torch.save(train_model.state_dict(), output_dir + '/resnet50_' + 'final' + '.pth')
print('highest_acc: {} epoch: {}'.format(highest_acc['accuracy'], highest_acc['epoch']))

Using bias in PyTorch for basic function approximation

Using R, it is very easy to approximate basic functions through a neural network:
library(nnet)
x <- sort(10*runif(50))
y <- sin(x)
nn <- nnet(x, y, size=4, maxit=10000, linout=TRUE, abstol=1.0e-8, reltol = 1.0e-9, Wts = seq(0, 1, by=1/12) )
plot(x, y)
x1 <- seq(0, 10, by=0.1)
lines(x1, predict(nn, data.frame(x=x1)), col="green")
predict( nn , data.frame(x=pi/2) )
A simple neural network with one hidden layer of a mere 4 neurons is sufficient to approximate a sine. (As per stackoverflow question Approximating function with Neural Network.)
But I cannot obtain the same in PyTorch.
In fact, the neural network created by R contains not only an input, four hidden and an output, but also two "bias" neurons - the first connected towards the hidden layer, the second towards the output.
The plot above is obtained through the following:
library(devtools)
library(scales)
library(reshape)
source_url('https://gist.github.com/fawda123/7471137/raw/cd6e6a0b0bdb4e065c597e52165e5ac887f5fe95/nnet_plot_update.r')
plot.nnet(nn$wts,struct=nn$n, pos.col='#007700',neg.col='#FF7777') ### this plots the graph
plot.nnet(nn$wts,struct=nn$n, pos.col='#007700',neg.col='#FF7777', wts.only=1) ### this prints the weights
Attempting the same with PyTorch produces a different network: the bias neurons are missing.
Following is an attempt to do in PyTorch what was done previously in R. The results will not be satisfactory: the function is not approximated. The most evident difference is that absence of the bias neurons.
import torch
from torch.autograd import Variable
import random
import math
N, D_in, H, D_out = 1000, 1, 4, 1
l_x = []
l_y = []
for a in range(1000):
r = random.random()*10
l_x.append( [r] )
l_y.append( [math.sin(r)] )
tx = torch.cuda.FloatTensor(l_x)
ty = torch.cuda.FloatTensor(l_y)
x = Variable(tx, requires_grad=False)
y = Variable(ty, requires_grad=False)
w1 = Variable(torch.randn(D_in, H ).type(torch.cuda.FloatTensor), requires_grad=True)
w2 = Variable(torch.randn(H, D_out).type(torch.cuda.FloatTensor), requires_grad=True)
learning_rate = 1e-5
for t in range(1000):
y_pred = x.mm(w1).clamp(min=0).mm(w2)
loss = (y_pred - y).pow(2).sum()
if t<10 or t%100==1: print(t, loss.data[0])
loss.backward()
w1.data -= learning_rate * w1.grad.data
w2.data -= learning_rate * w2.grad.data
w1.grad.data.zero_()
w2.grad.data.zero_()
t = [ [math.pi] ]
print( str(t) +" -> "+ str( (Variable(torch.cuda.FloatTensor( t ))).mm(w1).clamp(min=0).mm(w2).data ) )
t = [ [math.pi/2] ]
print( str(t) +" -> "+ str( (Variable(torch.cuda.FloatTensor( t ))).mm(w1).clamp(min=0).mm(w2).data ) )
How to make the network approximate to the given function (sine in this case), through either inserting the "bias" neurons or other missing detail?
Moreover: I have difficulties in understanding why R inserts the "bias". I found information that the bias could be akin to the "Intercept in a Regression Model" - I still find it not clear. Any information would be appreciated.
EDIT: an excellent explanation turned out to be at stackoverflow question Role of Bias in Neural Networks
EDIT:
An example to obtain the result, though using the "fuller" framework ("not reinventing the wheel") is as follows:
import torch
from torch.autograd import Variable
import torch.nn.functional as F
import math
N, D_in, H, D_out = 1000, 1, 4, 1
l_x = []
l_y = []
for a in range(1000):
t = (a/1000.0)*10
l_x.append( [t] )
l_y.append( [math.sin(t)] )
x = Variable( torch.FloatTensor(l_x) )
y = Variable( torch.FloatTensor(l_y) )
class Net(torch.nn.Module):
def __init__(self, n_feature, n_hidden, n_output):
super(Net, self).__init__()
self.to_hidden = torch.nn.Linear(n_feature, n_hidden)
self.to_output = torch.nn.Linear(n_hidden, n_output)
def forward(self, x):
x = self.to_hidden(x)
x = F.tanh(x) # activation function
x = self.to_output(x)
return x
net = Net(n_feature = D_in, n_hidden = H, n_output = D_out)
learning_rate = 0.01
optimizer = torch.optim.Adam( net.parameters() , lr=learning_rate )
for t in range(1000):
y_pred = net(x)
loss = (y_pred - y).pow(2).sum()
if t<10 or t%100==1: print(t, loss.data[0])
loss.backward()
optimizer.step()
optimizer.zero_grad()
t = [ [math.pi] ]
print( str(t) +" -> "+ str( net( Variable(torch.FloatTensor( t )) ) ) )
t = [ [math.pi/2] ]
print( str(t) +" -> "+ str( net( Variable(torch.FloatTensor( t )) ) ) )
Unfortunately, while this code works properly, it does not solve the matter of making the original, more "low level" code work as expected (e.g. introducing the bias).
Following up on #jdhao's comment - this is a super-simple PyTorch model that computes exactly what you want:
class LinearWithInputBias(nn.Linear):
def __init__(self, in_features, out_features, out_bias=True, in_bias=True):
nn.Linear.__init__(self, in_features, out_features, out_bias)
if in_bias:
in_bias = torch.zeros(1, out_features)
# in_bias.normal_() # if you want it to be randomly initialized
self._out_bias = nn.Parameter(in_bias)
def forward(self, x):
out = nn.Linear.forward(self, x)
try:
out = out + self._out_bias
except AttributeError:
pass
return out
However, there's an additional bug in your code: from what I can see, you don't train it - i.e. you do not call an optimizer (like torch.optim.SGD(mod.parameters()) before you delete the gradient information by calling grad.data.zero_().

Resources