Error message when running the codes in Jupyter notebook - jupyter-notebook

I am trying to test out the accuracy of the images without using image augmentation. When I run both of the codes, I got an error shown below:
TypeError: 'NoneType' object is not callable
I found that the error occurs in the second code. I would like to know the reason on the cause of this error message, and how to resolve it. Attached below are my codes, which have to be run simultaneously. I am using Jupyter notebook for that. Thanks!
Code 1:
import torch
from torch import nn
from torch.autograd import Variable
import torch.nn.functional as F
import math
class CrossEntropyLabelSmooth(nn.Module):
"""Cross entropy loss with label smoothing regularizer.
Reference:
Szegedy et al. Rethinking the Inception Architecture for Computer Vision. CVPR 2016.
Equation: y = (1 - epsilon) * y + epsilon / K.
Args:
num_classes (int): number of classes.
epsilon (float): weight.
"""
def __init__(self, num_classes, epsilon=0.1, device='cpu'):
super(CrossEntropyLabelSmooth, self).__init__()
self.num_classes = num_classes
self.epsilon = epsilon
self.device = device
self.logsoftmax = nn.LogSoftmax(dim=1)
def forward(self, inputs, targets):
"""
Args:
inputs: prediction matrix (before softmax) with shape (batch_size, num_classes)
targets: ground truth labels with shape (num_classes)
"""
log_probs = self.logsoftmax(inputs)
# targets = torch.zeros(log_probs.size()).scatter_(1, targets.unsqueeze(1).data, 1)# for mldg da
targets = torch.zeros(log_probs.size()).scatter_(1, targets.unsqueeze(1).data.cpu(), 1)#for zzd
targets = targets.to(self.device)
targets = (1 - self.epsilon) * targets + self.epsilon / self.num_classes
loss = (-Variable(targets) * log_probs).mean(0).sum()
return loss
class TripletLoss(nn.Module):
"""Triplet loss with hard positive/negative mining.
Reference:
Hermans et al. In Defense of the Triplet Loss for Person Re-Identification. arXiv:1703.07737.
Code imported from https://github.com/Cysu/open-reid/blob/master/reid/loss/triplet.py.
Args:
margin (float): margin for triplet.
"""
def __init__(self, margin=0.3):
super(TripletLoss, self).__init__()
self.margin = margin
self.ranking_loss = nn.MarginRankingLoss(margin=margin)
def forward(self, inputs, targets):
"""
Args:
inputs: feature matrix with shape (batch_size, feat_dim)
targets: ground truth labels with shape (num_classes)
"""
n = inputs.size(0)
# Compute pairwise distance, replace by the official when merged
dist = torch.pow(inputs, 2).sum(dim=1, keepdim=True).expand(n, n)
dist = dist + dist.t()
dist.addmm_(1, -2, inputs, inputs.t())
dist = dist.clamp(min=1e-12).sqrt() # for numerical stability
# For each anchor, find the hardest positive and negative
mask = targets.expand(n, n).eq(targets.expand(n, n).t())
dist_ap, dist_an = [], []
for i in range(n):
dist_ap.append(dist[i][mask[i]].max().unsqueeze(0))
dist_an.append(dist[i][mask[i] == 0].min().unsqueeze(0))
dist_ap = torch.cat(dist_ap)
dist_an = torch.cat(dist_an)
# Compute ranking hinge loss
y = torch.ones_like(dist_an)
loss = self.ranking_loss(dist_an, dist_ap, y)
return loss
class CenterLoss(nn.Module):
"""Center loss.
Reference:
Wen et al. A Discriminative Feature Learning Approach for Deep Face Recognition. ECCV 2016.
Args:
num_classes (int): number of classes.
feat_dim (int): feature dimension.
"""
def __init__(self, num_classes=10, feat_dim=2048, device='cpu'):
super(CenterLoss, self).__init__()
self.num_classes = num_classes
self.feat_dim = feat_dim
self.device = device
self.centers = nn.Parameter(torch.randn(self.num_classes, self.feat_dim)).to(self.device)
def forward(self, x, labels):
"""
Args:
x: feature matrix with shape (batch_size, feat_dim).
labels: ground truth labels with shape (num_classes).
"""
batch_size = x.size(0)
distmat = torch.pow(x, 2).sum(dim=1, keepdim=True).expand(batch_size, self.num_classes) + \
torch.pow(self.centers, 2).sum(dim=1, keepdim=True).expand(self.num_classes, batch_size).t()
distmat.addmm_(1, -2, x, self.centers.t())
classes = torch.arange(self.num_classes).long()
classes = classes.to(self.device)
labels = labels.unsqueeze(1).expand(batch_size, self.num_classes)
mask = labels.data.eq(classes.expand(batch_size, self.num_classes))
dist = []
for i in range(batch_size):
value = distmat[i][mask[i]]
value = value.clamp(min=1e-12, max=1e+12) # for numerical stability
dist.append(value)
dist = torch.cat(dist)
loss = dist.mean()
return loss
Code 2:
# Code without data augmentation
import torch
import torch.nn as nn
from torchvision.datasets import ImageFolder
from torchvision import transforms
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader
import os
import numpy as np
from tqdm import tqdm
from PIL import Image
class FoodDataset(Dataset):
def __init__(self, file, transform=None, mode='train'):
self.transforms = transform
self.mode = mode
with open(file, 'r') as f:
self.image_list = f.readlines()
def __len__(self):
return len(self.image_list)
def __getitem__(self, index):
label = None
if self.mode == 'train':
image, label = self.image_list[index].split('\n')[0].split('\t')
label = int(label)
else:
image = self.image_list[index].split('\n')[0]
image = Image.open(image).convert('RGB')
image = self.transforms(image)
if self.mode == 'train':
return image, label
else:
return image
#transforms_train = transforms.Compose([
# transforms.Resize((224, 224)),
# transforms.RandomHorizontalFlip(p=0.5),
# transforms.RandomVerticalFlip(p=0.5),
# transforms.Pad(10, 10),
# transforms.RandomRotation(45),
# transforms.RandomCrop((224, 224)),
# transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5),
# transforms.ToTensor(),
# transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
# ])
#transforms_test = transforms.Compose([
# transforms.Resize((224, 224)),
# transforms.ToTensor(),
# transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
# ])
def evaluate(prediction, ground_truth):
num_correct = (np.array(prediction) == np.array(ground_truth)).sum()
return num_correct / len(prediction)
train_ds = FoodDataset('data/train.txt')
val_ds = FoodDataset('data/val.txt')
test_ds = FoodDataset('data/test.txt')
train_dl = DataLoader(train_ds, batch_size=32, shuffle=True)
val_dl = DataLoader(val_ds, batch_size=32, shuffle=True)
test_dl = DataLoader(test_ds, batch_size=32, shuffle=True)
num_classes = 5
train_model = models.resnet50(pretrained=True)
train_model.fc = nn.Linear(2048, num_classes)
output_dir = 'checkpoint'
if output_dir and not os.path.exists(output_dir):
os.makedirs(output_dir)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
ce_loss = CrossEntropyLabelSmooth(num_classes = num_classes, device = device)
optimizer = torch.optim.Adam(train_model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)
for param in train_model.parameters():
param.requires_grad = False
for param in train_model.fc.parameters():
param.requires_grad = True
for i in range(5):
train_model.train()
train_model.to(device)
for img, label in tqdm(train_dl):
img = img.to(device)
label = label.to(device)
optimizer.zero_grad()
output= train_model(img)
loss = ce_loss(output, label)
loss.backward()
optimizer.step()
for param in train_model.parameters():
param.requires_grad = True
epoch = 100
highest_acc = {'epoch': 0, 'accuracy': 0}
for ep in range(epoch):
train_model.train()
train_model.to(device)
count = 0
running_loss = 0.0
validation_loss = 0.0
output_list = []
ground_truth_list = []
for img, label in tqdm(train_dl):
img = img.to(device)
label = label.to(device)
optimizer.zero_grad()
output= train_model(img)
loss = ce_loss(output, label)
count += 1
prediction = torch.argmax(output, dim=1)
output_list.extend(prediction.detach().cpu())
ground_truth_list.extend(label.cpu())
running_loss += loss.item()
loss.backward()
optimizer.step()
scheduler.step()
if ep % 10 == 0:
torch.save(train_model.state_dict(), output_dir + '/resnet50_' + str(ep) + '.pth')
accuracy = evaluate(output_list, ground_truth_list)
print(f'Epoch[{ep}] training accuracy: {accuracy} '
f'training loss: {running_loss / count:.3e} Base Lr: {optimizer.param_groups[0]["lr"]:.5e}')
if ep % 10 == 0:
train_model.eval()
count = 0
output_list = []
ground_truth_list = []
for img, label in tqdm(val_dl):
with torch.no_grad():
img = img.to(device)
lbl = label.to(device)
output= train_model(img)
val_loss = ce_loss(output, lbl)
validation_loss += val_loss.item()
count += 1
prediction = torch.argmax(output, dim=1)
output_list.extend(prediction.detach().cpu())
ground_truth_list.extend(label)
accuracy = evaluate(output_list, ground_truth_list)
if accuracy > highest_acc['accuracy']:
highest_acc['accuracy'] = accuracy
highest_acc['epoch'] = ep
print(f'Accuracy: {accuracy} Epoch:{ep}')
torch.save(train_model.state_dict(), output_dir + '/resnet50_' + 'final' + '.pth')
print('highest_acc: {} epoch: {}'.format(highest_acc['accuracy'], highest_acc['epoch']))

Related

TypeError: Caught TypeError in DataLoader worker process 0. TypeError: 'KeyError' object is not iterable

from torchvision_starter.engine import train_one_epoch, evaluate
from torchvision_starter import utils
import multiprocessing
import time
n_cpu = multiprocessing.cpu_count()
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
_ = model.to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
step_size=3,
gamma=0.2,
verbose=True
)
# Let's train for 10 epochs
num_epochs = 1
start = time.time()
for epoch in range(10, 10 + num_epochs):
# train for one epoch, printing every 10 iterations
train_one_epoch(model, optimizer, data_loaders['train'], device, epoch, print_freq=10)
# update the learning rate
lr_scheduler.step()
# evaluate on the validation dataset
evaluate(model, data_loaders['valid'], device=device)
stop = time.time()
print(f"\n\n{num_epochs} epochs in {stop - start} s ({(stop-start) / 3600:.2f} hrs)")
Before I move on to this part, everything is OK. But after I run the part, the error is like below:
I have tried to add drop_last to the helper.py's function like:
data_loaders["train"] = torch.utils.data.DataLoader(
train_data,
batch_size=batch_size,
sampler=train_sampler,
num_workers=num_workers,
collate_fn=utils.collate_fn,
drop_last=True
)
But it doesn't work. By the way, the torch and torchvision are compatible and Cuda is available.
I wonder how to fix it.
The get_data_loaders function:
def get_data_loaders(
folder, batch_size: int = 2, valid_size: float = 0.2, num_workers: int = -1, limit: int = -1, thinning: int = None
):
"""
Create and returns the train_one_epoch, validation and test data loaders.
:param foder: folder containing the dataset
:param batch_size: size of the mini-batches
:param valid_size: fraction of the dataset to use for validation. For example 0.2
means that 20% of the dataset will be used for validation
:param num_workers: number of workers to use in the data loaders. Use -1 to mean
"use all my cores"
:param limit: maximum number of data points to consider
:param thinning: take every n-th frame, instead of all frames
:return a dictionary with 3 keys: 'train_one_epoch', 'valid' and 'test' containing respectively the
train_one_epoch, validation and test data loaders
"""
if num_workers == -1:
# Use all cores
num_workers = multiprocessing.cpu_count()
# We will fill this up later
data_loaders = {"train": None, "valid": None, "test": None}
# create 3 sets of data transforms: one for the training dataset,
# containing data augmentation, one for the validation dataset
# (without data augmentation) and one for the test set (again
# without augmentation)
data_transforms = {
"train": get_transform(UdacitySelfDrivingDataset.mean, UdacitySelfDrivingDataset.std, train=True),
"valid": get_transform(UdacitySelfDrivingDataset.mean, UdacitySelfDrivingDataset.std, train=False),
"test": get_transform(UdacitySelfDrivingDataset.mean, UdacitySelfDrivingDataset.std, train=False),
}
# Create train and validation datasets
train_data = UdacitySelfDrivingDataset(
folder,
transform=data_transforms["train"],
train=True,
thinning=thinning
)
# The validation dataset is a split from the train_one_epoch dataset, so we read
# from the same folder, but we apply the transforms for validation
valid_data = UdacitySelfDrivingDataset(
folder,
transform=data_transforms["valid"],
train=True,
thinning=thinning
)
# obtain training indices that will be used for validation
n_tot = len(train_data)
indices = torch.randperm(n_tot)
# If requested, limit the number of data points to consider
if limit > 0:
indices = indices[:limit]
n_tot = limit
split = int(math.ceil(valid_size * n_tot))
train_idx, valid_idx = indices[split:], indices[:split]
# define samplers for obtaining training and validation batches
train_sampler = torch.utils.data.SubsetRandomSampler(train_idx)
valid_sampler = torch.utils.data.SubsetRandomSampler(valid_idx) # =
# prepare data loaders
data_loaders["train"] = torch.utils.data.DataLoader(
train_data,
batch_size=batch_size,
sampler=train_sampler,
num_workers=num_workers,
collate_fn=utils.collate_fn,
drop_last=True
)
data_loaders["valid"] = torch.utils.data.DataLoader(
valid_data, # -
batch_size=batch_size, # -
sampler=valid_sampler, # -
num_workers=num_workers, # -
collate_fn=utils.collate_fn,
drop_last=True
)
# Now create the test data loader
test_data = UdacitySelfDrivingDataset(
folder,
transform=data_transforms["test"],
train=False,
thinning=thinning
)
if limit > 0:
indices = torch.arange(limit)
test_sampler = torch.utils.data.SubsetRandomSampler(indices)
else:
test_sampler = None
data_loaders["test"] = torch.utils.data.DataLoader(
test_data,
batch_size=batch_size,
shuffle=False,
num_workers=num_workers,
sampler=test_sampler,
collate_fn=utils.collate_fn,
drop_last=True
# -
)
return data_loaders
class UdacitySelfDrivingDataset(torch.utils.data.Dataset):
# Mean and std of the dataset to be used in nn.Normalize
mean = torch.tensor([0.3680, 0.3788, 0.3892])
std = torch.tensor([0.2902, 0.3069, 0.3242])
def __init__(self, root, transform, train=True, thinning=None):
super().__init__()
self.root = os.path.abspath(os.path.expandvars(os.path.expanduser(root)))
self.transform = transform
# load datasets
if train:
self.df = pd.read_csv(os.path.join(self.root, "labels_train.csv"))
else:
self.df = pd.read_csv(os.path.join(self.root, "labels_test.csv"))
# Index by file id (i.e., a sequence of the same length as the number of images)
codes, uniques = pd.factorize(self.df['frame'])
if thinning:
# Take every n-th rows. This makes sense because the images are
# frames of videos from the car, so we are essentially reducing
# the frame rate
thinned = uniques[::thinning]
idx = self.df['frame'].isin(thinned)
print(f"Keeping {thinned.shape[0]} of {uniques.shape[0]} images")
print(f"Keeping {idx.sum()} objects out of {self.df.shape[0]}")
self.df = self.df[idx].reset_index(drop=True)
# Recompute codes
codes, uniques = pd.factorize(self.df['frame'])
self.n_images = len(uniques)
self.df['image_id'] = codes
self.df.set_index("image_id", inplace=True)
self.classes = ['car', 'truck', 'pedestrian', 'bicyclist', 'light']
self.colors = ['cyan', 'blue', 'red', 'purple', 'orange']
#property
def n_classes(self):
return len(self.classes)
def __getitem__(self, idx):
if idx in self.df.index:
row = self.df.loc[[idx]]
else:
return KeyError(f"Element {idx} not in dataframe")
# load images fromm file
img_path = os.path.join(self.root, "images", row['frame'].iloc[0])
img = Image.open(img_path).convert("RGB")
# Exclude bogus boxes with 0 height or width
h = row['ymax'] - row['ymin']
w = row['xmax'] - row['xmin']
filter_idx = (h > 0) & (w > 0)
row = row[filter_idx]
# get bounding box coordinates for each mask
boxes = row[['xmin', 'ymin', 'xmax', 'ymax']].values
# convert everything into a torch.Tensor
boxes = torch.as_tensor(boxes, dtype=torch.float32)
# get the labels
labels = torch.as_tensor(row['class_id'].values, dtype=int)
image_id = torch.tensor([idx])
area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
# assume no crowd for everything
iscrowd = torch.zeros((row.shape[0],), dtype=torch.int64)
target = {}
target["boxes"] = boxes
target["labels"] = labels
target["image_id"] = image_id
target["area"] = area
target["iscrowd"] = iscrowd
if self.transform is not None:
img, target = self.transform(img, target)
return img, target
def __len__(self):
return self.n_images
def plot(self, idx, renormalize=True, predictions=None, threshold=0.5, ax=None):
image, label_js = self[idx]
if renormalize:
# Invert the T.Normalize transform
unnormalize = T.Compose(
[
T.Normalize(mean = [ 0., 0., 0. ], std = 1 / type(self).std),
T.Normalize(mean = -type(self).mean, std = [ 1., 1., 1. ])
]
)
image, label_js = unnormalize(image, label_js)
if ax is None:
fig, ax = plt.subplots(figsize=(8, 8))
_ = ax.imshow(torch.permute(image, [1, 2, 0]))
for i, box in enumerate(label_js['boxes']):
xy = (box[0], box[1])
h, w = (box[2] - box[0]), (box[3] - box[1])
r = patches.Rectangle(xy, h, w, fill=False, color=self.colors[label_js['labels'][i]-1], lw=2, alpha=0.5)
ax.add_patch(r)
if predictions is not None:
# Make sure the predictions are on the CPU
for k in predictions:
predictions[k] = predictions[k].detach().cpu().numpy()
for i, box in enumerate(predictions['boxes']):
if predictions['scores'][i] > threshold:
xy = (box[0], box[1])
h, w = (box[2] - box[0]), (box[3] - box[1])
r = patches.Rectangle(xy, h, w, fill=False, color=self.colors[predictions['labels'][i]-1], lw=2, linestyle=':')
ax.add_patch(r)
_ = ax.axis("off")
return ax

grads is showing none after loss.backward() in fgsm attack

import torch
from torch import nn
from transformers import BertTokenizer, BertForSequenceClassification
# Load pre-trained model and tokenizer
model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
model.cuda()
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
# Set up device (CPU or GPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Define PGD/FGSM attack functions
def fgsm_attack(model, loss_fn, input_ids, attention_mask, labels, epsilon=0.1):
input_ids = input_ids.float()
attention_mask = attention_mask.float()
input_ids.requires_grad = True
attention_mask.requires_grad = True
logits = model(input_ids=input_ids.long(), attention_mask=attention_mask.long())[0]
loss = loss_fn(logits, labels)
loss.backward()
# Create perturbation tensor based on sign of gradients
perturbation = epsilon * input_ids.grad.sign()
# Add perturbation to input tensor and clamp values
perturbed_input = input_ids + perturbation
perturbed_input = torch.clamp(perturbed_input, 0, 1)
# Detach input from computation graph and return perturbed input
return perturbed_input.detach()
from tqdm import tqdm
for batch in tqdm(validation_dataloader):
input_ids = batch[0].to(device)
token_type_ids = batch[1].to(device)
attention_mask = batch[2].to(device)
labels = batch[3].to(device)
peterbed_inputs = fgsm_attack(model, nn.CrossEntropyLoss(), input_ids, attention_mask, labels, epsilon=0.1)
#outputs = model(input_ids, attention_mask=attention_mask, token_type_ids = token_type_ids)
I imported the BERT model and tried to perform fgsm attack on it. But it throws the error while getting sign of input_ids.grad.sign() showing that the value is None
Can some please help me why the input_ids.grad showing None
I expected input_ids.grads not to be None

Why does my model predict the same label?

I am training a graph convolution neural network to classify EEG signals into emotion classes. The input of my data is an array of size [12803216]-->[number of subjects * numbers of channels (nodes) * features of each node]. The output should be class 0(Negative) or class 1(Positive).The data is slightly imbalanced (45% class 0 and 55% class 1). The problem is that my model always predict label 0 as output for all inputs in the training stage regardless of the convolution function.
What is wrong with my code and how can I fix it? Any comments are welcome.
connectivity at the below code is predefined based at the connections of the 32 electrodes(nodes)
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, SAGEConv, ResGatedGraphConv, global_mean_pool, BatchNorm
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, f1_score, accuracy_score
labels = np.load("/content/drive/MyDrive/ValenceLabels_thres_5.npy")
labels = np.array(labels, dtype='int64')
labels.shape
class EEGraph(nn.Module):
def __init__(self, embedding_dim, first_conv, n_layers, conv_layer):
super(EEGraph, self).__init__()
self.n_layers = n_layers
self.convs = []
self.bns = []
d_in = embedding_dim
d_out = first_conv
for i in range(n_layers):
self.convs.append(conv_layer(d_in, d_out))
self.bns.append(BatchNorm(d_out, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True))
if i < n_layers - 1:
d_in, d_out = d_out, 2*d_out
self.convs = torch.nn.ModuleList(self.convs)
self.bns = torch.nn.ModuleList(self.bns)
self.project = nn.Linear(d_out, 3) # d_in beacu
self.project.apply(lambda x: nn.init.xavier_normal_(x.weight, gain=1) if type(x) == nn.Linear else None)
def forward(self, x, edge_index):
for i, (conv, bn) in enumerate(zip(self.convs, self.bns)):
x = conv(x, edge_index).permute(0, 2, 1)
x = bn(x)
x = F.dropout(F.leaky_relu(x, negative_slope=0.01), p=0.5, training=self.training).permute(0, 2, 1)
out = x.mean(dim=1).squeeze(dim=-1)
out = self.project(out)
return F.softmax(out, dim=-1)
device = torch.device("cuda")
connectivity = [[channel_order.index(e[0]), channel_order.index(e[1])] for e in edges]
connectivity = torch.tensor(connectivity).t().contiguous().to(device)
best_f1_score = -1
best_trial_name = None
n_epochs = 500
lr = 1e-3
weight_decay = 1e-5
batch_size = 63
criterion = nn.CrossEntropyLoss()
for node_dim in [16]:
node_features = np.load(f"/content/deap_graph_valence{node_dim}_1.npy")
A, Xte, yA, yte = train_test_split(node_features, labels, test_size=0.2, shuffle=True, stratify=labels, random_state=0)
Xtr, Xtr_valid, ytr, ytr_valid = train_test_split(A, yA, test_size=0.2, shuffle=True, stratify=yA, random_state=0)
Xtr = torch.tensor(Xtr).float().to(device)
Xtr_valid = torch.tensor(Xtr_valid).float().to(device)
Xte = torch.tensor(Xte).float().to(device)
ytr = torch.tensor(ytr).to(device)
#ytr_valid = torch.tensor(ytr_valid).to(device)
#yte = torch.tensor(yte).to(device)
for conv_fn in [GCNConv, SAGEConv, ResGatedGraphConv]:
for n_layers in range(1, 4):
for conv_dim in [32, 64, 128,256]:
trial_name = f"node_dim_{node_dim}-conv_fn_{conv_fn.__name__}-conv_layers_{n_layers}-conv_dim_{conv_dim}"
print(f"#: {trial_name}")
model = EEGraph(embedding_dim=Xtr.shape[-1],
first_conv=conv_dim,
n_layers=n_layers,
conv_layer=conv_fn).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
for epoch in range(n_epochs):
model.train()
indices = torch.randperm(len(Xtr))
for j, batch in enumerate(indices.view(-1, 63)):
optimizer.zero_grad()
batch_input = Xtr[batch]
outputs = model(batch_input, connectivity)
loss = criterion(outputs, ytr[batch])
loss.backward()
optimizer.step()
with torch.no_grad():
model.eval()
outputs = model(Xtr_valid, connectivity)
output_classes = torch.argmax(outputs, dim=-1).cpu().numpy()
f1 = f1_score(ytr_valid, output_classes, average="macro")
if f1 > best_f1_score:
best_trial_name = trial_name
best_f1_score = f1
print("-"*100)
print(f"Best model so far: {best_trial_name}")
print(f"Best F1 Score: %{100*best_f1_score:.2f}")
test_outputs = model(Xte, connectivity)
test_output_classes = torch.argmax(test_outputs, dim=-1).cpu().numpy()
print(classification_report(yte, test_output_classes, target_names=["Negative", "Positive"]))
print("-"*100)
print()

Optimization failing after very few iterations for nonlinear constraints calculated in a blackbox wrapped in an explicit component

I have a blackbox solver which is wrapped as explicit component and the objective function and constraints are calculated in the blackbox solver and output. These are taken to a constraint components that has an equality constraint defined such that at any iteration, these constraints are satisifed. I am using finite difference to approximate the partial derivatives. However, I get this SLSQP error "Positive directional derivative for linesearch". From S.O., I understand that this error translates - optimizer could not find a direction to move to and also couldn't verify if the results are minimum. I found that for some iterations derivative is 'None' and it was 'None' at least a few times before it threw this error. Is it because the constraints are calculated in the black box solver? or is it because 'fd' for approximation is not working for non linear constraints? or both? A problem summary is attached for reference.
from PowerHarvest import *
from HydroDynamics import *
from SatelliteComms import *
from Propulsion import *
from Constraints import *
from SystemCost import *
class MDA(Group):
"""Multidisciplinary Analysis Group"""
def __init__(self, derivative_method='fd', **kwargs):
super(MDA, self).__init__(**kwargs)
self.derivative_method = 'fd'
def setup(self):
cycle = self.add_subsystem('cycle',Group(), promotes = ["*"])
cycle.nonlinear_solver = om.NewtonSolver(solve_subsystems = True)
cycle.nonlinear_solver.options['atol'] = 1e-6
cycle.add_subsystem('Hydro', Hydro(),promotes = ["*"]) #This is a blackbox explicit component!
cycle.add_subsystem('Propulsion_system', Propulsion(),promotes = ["*"])
cycle.add_subsystem('PowerHarvest_system',PowerHarvest(),promotes = ["*"])
cycle.add_subsystem("SatelitteComs_system", SatelitteComs(),promotes = ["*"])
cycle.nonlinear_solver.options['atol'] = 1.0e-5
cycle.nonlinear_solver.options['maxiter'] = 500
cycle.nonlinear_solver.options['iprint'] = 2
#Add constraint on the each subsytem if possible
#cycle.add_constraint('',om.ex)
self.add_subsystem('PowerConstraints_system', PowerConstraints(), promotes=["*"])
self.add_subsystem('BodyConstraints_system', BodyConstraint(),promotes = ["*"])
self.add_subsystem('SystemCost_system',SystemCost(), promotes = ['*'])
self.add_constraint('A_PV', upper = 100, units = 'm**2')
#these constraints are output of the blackbox solver!
self.add_constraint('AreaCon', upper = 0)
self.add_constraint('massCon',equals = 0)
self.add_constraint('P_Load', upper = 0) # Solar generates just enough for everything no storing!
self.add_constraint('DraughtCon', lower = 0.5 )
self.add_constraint('GMCon', lower = 0.01) #should be positive
#self.add_constraint('theta', upper = 0.14, lower = 0.1)
self.add_constraint('Amplitude_Con',upper = -0.1) #amplitude differenc
Added. Run script
import openmdao.api as om
from geom_utils import *
from openmdao.api import Problem, Group, ExplicitComponent,ImplicitComponent, IndepVarComp, ExecComp,\
NonlinearBlockGS, ScipyOptimizeDriver,NewtonSolver,DirectSolver,ScipyKrylov
import os
import numpy as np
from types import FunctionType
from geom_utils import *
from capytaine.meshes.meshes import Mesh
from pprint import pprint
from PowerHarvest import *
from HydroDynamics import *
from SatelliteComms import *
from Propulsion import *
from Constraints import *
from SystemCost import *
from PEARLMDA import *
if __name__ == '__main__':
prob = Problem()
model = prob.model = MDA()
prob.driver = ScipyOptimizeDriver(optimizer = 'SLSQP')
# prob.model.nonlinear_solver = om.NonlinearBlockGS()
#prob.driver.options['optimizer'] = 'COBYLA'
prob.driver.options['tol'] = 1e-5
prob.model.add_design_var('Df', lower= 6.0, upper=20.0, units = "m")
prob.model.add_design_var('tf', lower=1.0, upper=4.0, units = "m")
#prob.model.add_design_var('submergence', upper = -0.9)
prob.model.add_design_var('Vs', lower=1, upper=2, units = "m/s") #make sure the lower, upper are according to their units.
prob.model.add_design_var('ld', lower = 3, upper = 7, units = 'm' )
prob.model.add_objective('cost_per_byte' )
newton = om.NewtonSolver(solve_subsystems=True)
newton.linesearch = om.BoundsEnforceLS()
prob.model.nonlinear_solver = newton
prob.model.linear_solver = om.DirectSolver()
# sqlite file to record the intermediate calculations and derivatives
r = om.SqliteRecorder("pearl_computations.sql")
prob.add_recorder(r)
prob.driver.add_recorder(r)
prob.driver.recording_options["record_derivatives"] = True
# Attach recorder to a subsystem
model.nonlinear_solver.add_recorder(r)
model.add_recorder(r)
prob.driver.recording_options["includes"] = ["*"]
# Attach recorder to a solver
model.nonlinear_solver.add_recorder(r)
prob.setup()
prob.set_solver_print(level=2)
# For gradients across the model this will do the finite difference method
prob.model.approx_totals(method="fd", step=0.1, form="forward", step_calc="abs")
prob.run_model()
prob.run_driver()
prob.record("final_state")
print('minimum objective found at')
print(prob['cost_per_byte'][0])
print(prob['A_PV'])
print(f"tf: {prob['tf'][0]}")
results = dict()
results['tf'] = prob['tf'][0]
results['Df'] = prob['Df'][0]
results['ld'] = prob['ld'][0]
results['mass'] = prob['Payloadmass'][0]
results['DraughCon'] = prob['DraughtCon'][0]
results['AmplitudeCon'] = prob['AmplitudeCon'][0]
print(results)
Scaling report

Self-coded QuestionAnswering models are not trained effectively

I tried using Transformers' own Albert ForQuestionAnswering, which was able to train effectively. But I define similar code myself and can't train it effectively.
model = AutoModelForQuestionAnswering.from_pretrained(config.bert_model)
my model:The rest of the code is the same. That is, the code for training and testing is the same, but the loaded model is different.
class BertQA(nn.Module):
def __init__(self, config):
super(BertQA, self).__init__()
self.albert = AutoModel.from_pretrained(config.bert_model, add_pooling_layer=False)
# for name, param in self.model.named_parameters():
# print(name, param)
self.qa_outputs = nn.Linear(312, 2, bias=True)
self._init_weights([self.qa_outputs])
def _init_weights(self, blocks, **kwargs):
"""
参数初始化,将 Linear / Embedding / LayerNorm 与 Bert 进行一样的初始化
"""
for block in blocks:
for module in block.modules():
if isinstance(module, nn.Linear):
module.weight.data.normal_(mean=0.0, std=0.02)
if module.bias is not None:
nn.init.zeros_(module.bias)
def forward(self,
input_ids,
attention_mask,
token_type_ids,
start_positions=None,
end_positions=None):
outputs = self.albert(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
sequence_output = outputs[0]
logits = self.qa_outputs(sequence_output)
start_logits, end_logits = logits.split(1, dim=-1)
start_logits = start_logits.squeeze(-1) # (32, 512)
end_logits = end_logits.squeeze(-1) # (32, 512)
total_loss = None
if start_positions is not None and end_positions is not None:
if len(start_positions.size()) > 1:
start_positions = start_positions.squeeze(-1)
if len(end_positions.size()) > 1:
end_positions = end_positions.squeeze(-1)
# end_positions: (32)
# start_position: (32)
loss_fct = CrossEntropyLoss()
start_loss = loss_fct(start_logits, start_positions)
end_loss = loss_fct(end_logits, end_positions)
total_loss = (start_loss + end_loss) / 2
return total_loss
else:
start_logits = torch.argmax(start_logits, 1)
end_logits = torch.argmax(end_logits, 1)
return start_logits, end_logits
model = BertQA(config)

Resources