Optimization failing after very few iterations for nonlinear constraints calculated in a blackbox wrapped in an explicit component - openmdao

I have a blackbox solver which is wrapped as explicit component and the objective function and constraints are calculated in the blackbox solver and output. These are taken to a constraint components that has an equality constraint defined such that at any iteration, these constraints are satisifed. I am using finite difference to approximate the partial derivatives. However, I get this SLSQP error "Positive directional derivative for linesearch". From S.O., I understand that this error translates - optimizer could not find a direction to move to and also couldn't verify if the results are minimum. I found that for some iterations derivative is 'None' and it was 'None' at least a few times before it threw this error. Is it because the constraints are calculated in the black box solver? or is it because 'fd' for approximation is not working for non linear constraints? or both? A problem summary is attached for reference.
from PowerHarvest import *
from HydroDynamics import *
from SatelliteComms import *
from Propulsion import *
from Constraints import *
from SystemCost import *
class MDA(Group):
"""Multidisciplinary Analysis Group"""
def __init__(self, derivative_method='fd', **kwargs):
super(MDA, self).__init__(**kwargs)
self.derivative_method = 'fd'
def setup(self):
cycle = self.add_subsystem('cycle',Group(), promotes = ["*"])
cycle.nonlinear_solver = om.NewtonSolver(solve_subsystems = True)
cycle.nonlinear_solver.options['atol'] = 1e-6
cycle.add_subsystem('Hydro', Hydro(),promotes = ["*"]) #This is a blackbox explicit component!
cycle.add_subsystem('Propulsion_system', Propulsion(),promotes = ["*"])
cycle.add_subsystem('PowerHarvest_system',PowerHarvest(),promotes = ["*"])
cycle.add_subsystem("SatelitteComs_system", SatelitteComs(),promotes = ["*"])
cycle.nonlinear_solver.options['atol'] = 1.0e-5
cycle.nonlinear_solver.options['maxiter'] = 500
cycle.nonlinear_solver.options['iprint'] = 2
#Add constraint on the each subsytem if possible
#cycle.add_constraint('',om.ex)
self.add_subsystem('PowerConstraints_system', PowerConstraints(), promotes=["*"])
self.add_subsystem('BodyConstraints_system', BodyConstraint(),promotes = ["*"])
self.add_subsystem('SystemCost_system',SystemCost(), promotes = ['*'])
self.add_constraint('A_PV', upper = 100, units = 'm**2')
#these constraints are output of the blackbox solver!
self.add_constraint('AreaCon', upper = 0)
self.add_constraint('massCon',equals = 0)
self.add_constraint('P_Load', upper = 0) # Solar generates just enough for everything no storing!
self.add_constraint('DraughtCon', lower = 0.5 )
self.add_constraint('GMCon', lower = 0.01) #should be positive
#self.add_constraint('theta', upper = 0.14, lower = 0.1)
self.add_constraint('Amplitude_Con',upper = -0.1) #amplitude differenc
Added. Run script
import openmdao.api as om
from geom_utils import *
from openmdao.api import Problem, Group, ExplicitComponent,ImplicitComponent, IndepVarComp, ExecComp,\
NonlinearBlockGS, ScipyOptimizeDriver,NewtonSolver,DirectSolver,ScipyKrylov
import os
import numpy as np
from types import FunctionType
from geom_utils import *
from capytaine.meshes.meshes import Mesh
from pprint import pprint
from PowerHarvest import *
from HydroDynamics import *
from SatelliteComms import *
from Propulsion import *
from Constraints import *
from SystemCost import *
from PEARLMDA import *
if __name__ == '__main__':
prob = Problem()
model = prob.model = MDA()
prob.driver = ScipyOptimizeDriver(optimizer = 'SLSQP')
# prob.model.nonlinear_solver = om.NonlinearBlockGS()
#prob.driver.options['optimizer'] = 'COBYLA'
prob.driver.options['tol'] = 1e-5
prob.model.add_design_var('Df', lower= 6.0, upper=20.0, units = "m")
prob.model.add_design_var('tf', lower=1.0, upper=4.0, units = "m")
#prob.model.add_design_var('submergence', upper = -0.9)
prob.model.add_design_var('Vs', lower=1, upper=2, units = "m/s") #make sure the lower, upper are according to their units.
prob.model.add_design_var('ld', lower = 3, upper = 7, units = 'm' )
prob.model.add_objective('cost_per_byte' )
newton = om.NewtonSolver(solve_subsystems=True)
newton.linesearch = om.BoundsEnforceLS()
prob.model.nonlinear_solver = newton
prob.model.linear_solver = om.DirectSolver()
# sqlite file to record the intermediate calculations and derivatives
r = om.SqliteRecorder("pearl_computations.sql")
prob.add_recorder(r)
prob.driver.add_recorder(r)
prob.driver.recording_options["record_derivatives"] = True
# Attach recorder to a subsystem
model.nonlinear_solver.add_recorder(r)
model.add_recorder(r)
prob.driver.recording_options["includes"] = ["*"]
# Attach recorder to a solver
model.nonlinear_solver.add_recorder(r)
prob.setup()
prob.set_solver_print(level=2)
# For gradients across the model this will do the finite difference method
prob.model.approx_totals(method="fd", step=0.1, form="forward", step_calc="abs")
prob.run_model()
prob.run_driver()
prob.record("final_state")
print('minimum objective found at')
print(prob['cost_per_byte'][0])
print(prob['A_PV'])
print(f"tf: {prob['tf'][0]}")
results = dict()
results['tf'] = prob['tf'][0]
results['Df'] = prob['Df'][0]
results['ld'] = prob['ld'][0]
results['mass'] = prob['Payloadmass'][0]
results['DraughCon'] = prob['DraughtCon'][0]
results['AmplitudeCon'] = prob['AmplitudeCon'][0]
print(results)
Scaling report

Related

grads is showing none after loss.backward() in fgsm attack

import torch
from torch import nn
from transformers import BertTokenizer, BertForSequenceClassification
# Load pre-trained model and tokenizer
model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
model.cuda()
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
# Set up device (CPU or GPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Define PGD/FGSM attack functions
def fgsm_attack(model, loss_fn, input_ids, attention_mask, labels, epsilon=0.1):
input_ids = input_ids.float()
attention_mask = attention_mask.float()
input_ids.requires_grad = True
attention_mask.requires_grad = True
logits = model(input_ids=input_ids.long(), attention_mask=attention_mask.long())[0]
loss = loss_fn(logits, labels)
loss.backward()
# Create perturbation tensor based on sign of gradients
perturbation = epsilon * input_ids.grad.sign()
# Add perturbation to input tensor and clamp values
perturbed_input = input_ids + perturbation
perturbed_input = torch.clamp(perturbed_input, 0, 1)
# Detach input from computation graph and return perturbed input
return perturbed_input.detach()
from tqdm import tqdm
for batch in tqdm(validation_dataloader):
input_ids = batch[0].to(device)
token_type_ids = batch[1].to(device)
attention_mask = batch[2].to(device)
labels = batch[3].to(device)
peterbed_inputs = fgsm_attack(model, nn.CrossEntropyLoss(), input_ids, attention_mask, labels, epsilon=0.1)
#outputs = model(input_ids, attention_mask=attention_mask, token_type_ids = token_type_ids)
I imported the BERT model and tried to perform fgsm attack on it. But it throws the error while getting sign of input_ids.grad.sign() showing that the value is None
Can some please help me why the input_ids.grad showing None
I expected input_ids.grads not to be None

Why does my model predict the same label?

I am training a graph convolution neural network to classify EEG signals into emotion classes. The input of my data is an array of size [12803216]-->[number of subjects * numbers of channels (nodes) * features of each node]. The output should be class 0(Negative) or class 1(Positive).The data is slightly imbalanced (45% class 0 and 55% class 1). The problem is that my model always predict label 0 as output for all inputs in the training stage regardless of the convolution function.
What is wrong with my code and how can I fix it? Any comments are welcome.
connectivity at the below code is predefined based at the connections of the 32 electrodes(nodes)
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, SAGEConv, ResGatedGraphConv, global_mean_pool, BatchNorm
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, f1_score, accuracy_score
labels = np.load("/content/drive/MyDrive/ValenceLabels_thres_5.npy")
labels = np.array(labels, dtype='int64')
labels.shape
class EEGraph(nn.Module):
def __init__(self, embedding_dim, first_conv, n_layers, conv_layer):
super(EEGraph, self).__init__()
self.n_layers = n_layers
self.convs = []
self.bns = []
d_in = embedding_dim
d_out = first_conv
for i in range(n_layers):
self.convs.append(conv_layer(d_in, d_out))
self.bns.append(BatchNorm(d_out, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True))
if i < n_layers - 1:
d_in, d_out = d_out, 2*d_out
self.convs = torch.nn.ModuleList(self.convs)
self.bns = torch.nn.ModuleList(self.bns)
self.project = nn.Linear(d_out, 3) # d_in beacu
self.project.apply(lambda x: nn.init.xavier_normal_(x.weight, gain=1) if type(x) == nn.Linear else None)
def forward(self, x, edge_index):
for i, (conv, bn) in enumerate(zip(self.convs, self.bns)):
x = conv(x, edge_index).permute(0, 2, 1)
x = bn(x)
x = F.dropout(F.leaky_relu(x, negative_slope=0.01), p=0.5, training=self.training).permute(0, 2, 1)
out = x.mean(dim=1).squeeze(dim=-1)
out = self.project(out)
return F.softmax(out, dim=-1)
device = torch.device("cuda")
connectivity = [[channel_order.index(e[0]), channel_order.index(e[1])] for e in edges]
connectivity = torch.tensor(connectivity).t().contiguous().to(device)
best_f1_score = -1
best_trial_name = None
n_epochs = 500
lr = 1e-3
weight_decay = 1e-5
batch_size = 63
criterion = nn.CrossEntropyLoss()
for node_dim in [16]:
node_features = np.load(f"/content/deap_graph_valence{node_dim}_1.npy")
A, Xte, yA, yte = train_test_split(node_features, labels, test_size=0.2, shuffle=True, stratify=labels, random_state=0)
Xtr, Xtr_valid, ytr, ytr_valid = train_test_split(A, yA, test_size=0.2, shuffle=True, stratify=yA, random_state=0)
Xtr = torch.tensor(Xtr).float().to(device)
Xtr_valid = torch.tensor(Xtr_valid).float().to(device)
Xte = torch.tensor(Xte).float().to(device)
ytr = torch.tensor(ytr).to(device)
#ytr_valid = torch.tensor(ytr_valid).to(device)
#yte = torch.tensor(yte).to(device)
for conv_fn in [GCNConv, SAGEConv, ResGatedGraphConv]:
for n_layers in range(1, 4):
for conv_dim in [32, 64, 128,256]:
trial_name = f"node_dim_{node_dim}-conv_fn_{conv_fn.__name__}-conv_layers_{n_layers}-conv_dim_{conv_dim}"
print(f"#: {trial_name}")
model = EEGraph(embedding_dim=Xtr.shape[-1],
first_conv=conv_dim,
n_layers=n_layers,
conv_layer=conv_fn).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
for epoch in range(n_epochs):
model.train()
indices = torch.randperm(len(Xtr))
for j, batch in enumerate(indices.view(-1, 63)):
optimizer.zero_grad()
batch_input = Xtr[batch]
outputs = model(batch_input, connectivity)
loss = criterion(outputs, ytr[batch])
loss.backward()
optimizer.step()
with torch.no_grad():
model.eval()
outputs = model(Xtr_valid, connectivity)
output_classes = torch.argmax(outputs, dim=-1).cpu().numpy()
f1 = f1_score(ytr_valid, output_classes, average="macro")
if f1 > best_f1_score:
best_trial_name = trial_name
best_f1_score = f1
print("-"*100)
print(f"Best model so far: {best_trial_name}")
print(f"Best F1 Score: %{100*best_f1_score:.2f}")
test_outputs = model(Xte, connectivity)
test_output_classes = torch.argmax(test_outputs, dim=-1).cpu().numpy()
print(classification_report(yte, test_output_classes, target_names=["Negative", "Positive"]))
print("-"*100)
print()

RuntimeError: quantile() q tensor must be same dtype as the input tensor in pytorch-forecasting

PyTorch-Forecasting version: 0.10.2
PyTorch version:1.12.1
Python version:3.10.4
Operating System: windows
Expected behavior
No Error
Actual behavior
The Error is
File c:\Users\josepeeterson.er\Miniconda3\envs\pytorch\lib\site-packages\pytorch_forecasting\metrics\base_metrics.py:979, in DistributionLoss.to_quantiles(self, y_pred, quantiles, n_samples)
977 except NotImplementedError: # resort to derive quantiles empirically
978 samples = torch.sort(self.sample(y_pred, n_samples), -1).values
--> 979 quantiles = torch.quantile(samples, torch.tensor(quantiles, device=samples.device), dim=2).permute(1, 2, 0)
980 return quantiles
RuntimeError: quantile() q tensor must be same dtype as the input tensor
How do I set them to be of same datatype? This is happening internally. I do not have control over this. I am not using any GPUs.
The link to the .csv file with input data is https://github.com/JosePeeterson/Demand_forecasting
The data is just sampled from a negative binomila distribution wiht parameters (9,0.5) every 4 hours. the time inbetween is all zero.
I just want to see if DeepAR can learn this pattern.
Code to reproduce the problem
from pytorch_forecasting.data.examples import generate_ar_data
import matplotlib.pyplot as plt
import pandas as pd
from pytorch_forecasting.data import TimeSeriesDataSet
from pytorch_forecasting.data import NaNLabelEncoder
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
import pytorch_lightning as pl
from pytorch_forecasting import NegativeBinomialDistributionLoss, DeepAR
import torch
from pytorch_forecasting.data.encoders import TorchNormalizer
data = [pd.read_csv('1_f_nbinom_train.csv')]
data["date"] = pd.Timestamp("2021-08-24") + pd.to_timedelta(data.time_idx, "H")
data['_hour_of_day'] = str(data["date"].dt.hour)
data['_day_of_week'] = str(data["date"].dt.dayofweek)
data['_day_of_month'] = str(data["date"].dt.day)
data['_day_of_year'] = str(data["date"].dt.dayofyear)
data['_week_of_year'] = str(data["date"].dt.weekofyear)
data['_month_of_year'] = str(data["date"].dt.month)
data['_year'] = str(data["date"].dt.year)
max_encoder_length = 60
max_prediction_length = 20
training_cutoff = data["time_idx"].max() - max_prediction_length
training = TimeSeriesDataSet(
data.iloc[0:-620],
time_idx="time_idx",
target="value",
categorical_encoders={"series": NaNLabelEncoder(add_nan=True).fit(data.series), "_hour_of_day": NaNLabelEncoder(add_nan=True).fit(data._hour_of_day), \
"_day_of_week": NaNLabelEncoder(add_nan=True).fit(data._day_of_week), "_day_of_month" : NaNLabelEncoder(add_nan=True).fit(data._day_of_month), "_day_of_year" : NaNLabelEncoder(add_nan=True).fit(data._day_of_year), \
"_week_of_year": NaNLabelEncoder(add_nan=True).fit(data._week_of_year), "_year": NaNLabelEncoder(add_nan=True).fit(data._year)},
group_ids=["series"],
min_encoder_length=max_encoder_length,
max_encoder_length=max_encoder_length,
min_prediction_length=max_prediction_length,
max_prediction_length=max_prediction_length,
time_varying_unknown_reals=["value"],
time_varying_known_categoricals=["_hour_of_day","_day_of_week","_day_of_month","_day_of_year","_week_of_year","_year" ],
time_varying_known_reals=["time_idx"],
add_relative_time_idx=False,
randomize_length=None,
scalers=[],
target_normalizer=TorchNormalizer(method="identity",center=False,transformation=None )
)
validation = TimeSeriesDataSet.from_dataset(
training,
data.iloc[-620:-420],
# predict=True,
stop_randomization=True,
)
batch_size = 64
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=8)
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size, num_workers=8)
# save datasets
training.save("training.pkl")
validation.save("validation.pkl")
early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=5, verbose=False, mode="min")
lr_logger = LearningRateMonitor()
trainer = pl.Trainer(
max_epochs=10,
gpus=0,
gradient_clip_val=0.1,
limit_train_batches=30,
limit_val_batches=3,
# fast_dev_run=True,
# logger=logger,
# profiler=True,
callbacks=[lr_logger, early_stop_callback],
)
deepar = DeepAR.from_dataset(
training,
learning_rate=0.1,
hidden_size=32,
dropout=0.1,
loss=NegativeBinomialDistributionLoss(),
log_interval=10,
log_val_interval=3,
# reduce_on_plateau_patience=3,
)
print(f"Number of parameters in network: {deepar.size()/1e3:.1f}k")
torch.set_num_threads(10)
trainer.fit(
deepar,
train_dataloaders=train_dataloader,
val_dataloaders=val_dataloader,
)
Need to cast samples to torch.tensor as shown below. Then save this base_metrics.py and rerun above code.
except NotImplementedError: # resort to derive quantiles empirically
samples = torch.sort(self.sample(y_pred, n_samples), -1).values
quantiles = torch.quantile(torch.tensor(samples), torch.tensor(quantiles, device=samples.device), dim=2).permute(1, 2, 0)
return quantiles

Error message when running the codes in Jupyter notebook

I am trying to test out the accuracy of the images without using image augmentation. When I run both of the codes, I got an error shown below:
TypeError: 'NoneType' object is not callable
I found that the error occurs in the second code. I would like to know the reason on the cause of this error message, and how to resolve it. Attached below are my codes, which have to be run simultaneously. I am using Jupyter notebook for that. Thanks!
Code 1:
import torch
from torch import nn
from torch.autograd import Variable
import torch.nn.functional as F
import math
class CrossEntropyLabelSmooth(nn.Module):
"""Cross entropy loss with label smoothing regularizer.
Reference:
Szegedy et al. Rethinking the Inception Architecture for Computer Vision. CVPR 2016.
Equation: y = (1 - epsilon) * y + epsilon / K.
Args:
num_classes (int): number of classes.
epsilon (float): weight.
"""
def __init__(self, num_classes, epsilon=0.1, device='cpu'):
super(CrossEntropyLabelSmooth, self).__init__()
self.num_classes = num_classes
self.epsilon = epsilon
self.device = device
self.logsoftmax = nn.LogSoftmax(dim=1)
def forward(self, inputs, targets):
"""
Args:
inputs: prediction matrix (before softmax) with shape (batch_size, num_classes)
targets: ground truth labels with shape (num_classes)
"""
log_probs = self.logsoftmax(inputs)
# targets = torch.zeros(log_probs.size()).scatter_(1, targets.unsqueeze(1).data, 1)# for mldg da
targets = torch.zeros(log_probs.size()).scatter_(1, targets.unsqueeze(1).data.cpu(), 1)#for zzd
targets = targets.to(self.device)
targets = (1 - self.epsilon) * targets + self.epsilon / self.num_classes
loss = (-Variable(targets) * log_probs).mean(0).sum()
return loss
class TripletLoss(nn.Module):
"""Triplet loss with hard positive/negative mining.
Reference:
Hermans et al. In Defense of the Triplet Loss for Person Re-Identification. arXiv:1703.07737.
Code imported from https://github.com/Cysu/open-reid/blob/master/reid/loss/triplet.py.
Args:
margin (float): margin for triplet.
"""
def __init__(self, margin=0.3):
super(TripletLoss, self).__init__()
self.margin = margin
self.ranking_loss = nn.MarginRankingLoss(margin=margin)
def forward(self, inputs, targets):
"""
Args:
inputs: feature matrix with shape (batch_size, feat_dim)
targets: ground truth labels with shape (num_classes)
"""
n = inputs.size(0)
# Compute pairwise distance, replace by the official when merged
dist = torch.pow(inputs, 2).sum(dim=1, keepdim=True).expand(n, n)
dist = dist + dist.t()
dist.addmm_(1, -2, inputs, inputs.t())
dist = dist.clamp(min=1e-12).sqrt() # for numerical stability
# For each anchor, find the hardest positive and negative
mask = targets.expand(n, n).eq(targets.expand(n, n).t())
dist_ap, dist_an = [], []
for i in range(n):
dist_ap.append(dist[i][mask[i]].max().unsqueeze(0))
dist_an.append(dist[i][mask[i] == 0].min().unsqueeze(0))
dist_ap = torch.cat(dist_ap)
dist_an = torch.cat(dist_an)
# Compute ranking hinge loss
y = torch.ones_like(dist_an)
loss = self.ranking_loss(dist_an, dist_ap, y)
return loss
class CenterLoss(nn.Module):
"""Center loss.
Reference:
Wen et al. A Discriminative Feature Learning Approach for Deep Face Recognition. ECCV 2016.
Args:
num_classes (int): number of classes.
feat_dim (int): feature dimension.
"""
def __init__(self, num_classes=10, feat_dim=2048, device='cpu'):
super(CenterLoss, self).__init__()
self.num_classes = num_classes
self.feat_dim = feat_dim
self.device = device
self.centers = nn.Parameter(torch.randn(self.num_classes, self.feat_dim)).to(self.device)
def forward(self, x, labels):
"""
Args:
x: feature matrix with shape (batch_size, feat_dim).
labels: ground truth labels with shape (num_classes).
"""
batch_size = x.size(0)
distmat = torch.pow(x, 2).sum(dim=1, keepdim=True).expand(batch_size, self.num_classes) + \
torch.pow(self.centers, 2).sum(dim=1, keepdim=True).expand(self.num_classes, batch_size).t()
distmat.addmm_(1, -2, x, self.centers.t())
classes = torch.arange(self.num_classes).long()
classes = classes.to(self.device)
labels = labels.unsqueeze(1).expand(batch_size, self.num_classes)
mask = labels.data.eq(classes.expand(batch_size, self.num_classes))
dist = []
for i in range(batch_size):
value = distmat[i][mask[i]]
value = value.clamp(min=1e-12, max=1e+12) # for numerical stability
dist.append(value)
dist = torch.cat(dist)
loss = dist.mean()
return loss
Code 2:
# Code without data augmentation
import torch
import torch.nn as nn
from torchvision.datasets import ImageFolder
from torchvision import transforms
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader
import os
import numpy as np
from tqdm import tqdm
from PIL import Image
class FoodDataset(Dataset):
def __init__(self, file, transform=None, mode='train'):
self.transforms = transform
self.mode = mode
with open(file, 'r') as f:
self.image_list = f.readlines()
def __len__(self):
return len(self.image_list)
def __getitem__(self, index):
label = None
if self.mode == 'train':
image, label = self.image_list[index].split('\n')[0].split('\t')
label = int(label)
else:
image = self.image_list[index].split('\n')[0]
image = Image.open(image).convert('RGB')
image = self.transforms(image)
if self.mode == 'train':
return image, label
else:
return image
#transforms_train = transforms.Compose([
# transforms.Resize((224, 224)),
# transforms.RandomHorizontalFlip(p=0.5),
# transforms.RandomVerticalFlip(p=0.5),
# transforms.Pad(10, 10),
# transforms.RandomRotation(45),
# transforms.RandomCrop((224, 224)),
# transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5),
# transforms.ToTensor(),
# transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
# ])
#transforms_test = transforms.Compose([
# transforms.Resize((224, 224)),
# transforms.ToTensor(),
# transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
# ])
def evaluate(prediction, ground_truth):
num_correct = (np.array(prediction) == np.array(ground_truth)).sum()
return num_correct / len(prediction)
train_ds = FoodDataset('data/train.txt')
val_ds = FoodDataset('data/val.txt')
test_ds = FoodDataset('data/test.txt')
train_dl = DataLoader(train_ds, batch_size=32, shuffle=True)
val_dl = DataLoader(val_ds, batch_size=32, shuffle=True)
test_dl = DataLoader(test_ds, batch_size=32, shuffle=True)
num_classes = 5
train_model = models.resnet50(pretrained=True)
train_model.fc = nn.Linear(2048, num_classes)
output_dir = 'checkpoint'
if output_dir and not os.path.exists(output_dir):
os.makedirs(output_dir)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
ce_loss = CrossEntropyLabelSmooth(num_classes = num_classes, device = device)
optimizer = torch.optim.Adam(train_model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)
for param in train_model.parameters():
param.requires_grad = False
for param in train_model.fc.parameters():
param.requires_grad = True
for i in range(5):
train_model.train()
train_model.to(device)
for img, label in tqdm(train_dl):
img = img.to(device)
label = label.to(device)
optimizer.zero_grad()
output= train_model(img)
loss = ce_loss(output, label)
loss.backward()
optimizer.step()
for param in train_model.parameters():
param.requires_grad = True
epoch = 100
highest_acc = {'epoch': 0, 'accuracy': 0}
for ep in range(epoch):
train_model.train()
train_model.to(device)
count = 0
running_loss = 0.0
validation_loss = 0.0
output_list = []
ground_truth_list = []
for img, label in tqdm(train_dl):
img = img.to(device)
label = label.to(device)
optimizer.zero_grad()
output= train_model(img)
loss = ce_loss(output, label)
count += 1
prediction = torch.argmax(output, dim=1)
output_list.extend(prediction.detach().cpu())
ground_truth_list.extend(label.cpu())
running_loss += loss.item()
loss.backward()
optimizer.step()
scheduler.step()
if ep % 10 == 0:
torch.save(train_model.state_dict(), output_dir + '/resnet50_' + str(ep) + '.pth')
accuracy = evaluate(output_list, ground_truth_list)
print(f'Epoch[{ep}] training accuracy: {accuracy} '
f'training loss: {running_loss / count:.3e} Base Lr: {optimizer.param_groups[0]["lr"]:.5e}')
if ep % 10 == 0:
train_model.eval()
count = 0
output_list = []
ground_truth_list = []
for img, label in tqdm(val_dl):
with torch.no_grad():
img = img.to(device)
lbl = label.to(device)
output= train_model(img)
val_loss = ce_loss(output, lbl)
validation_loss += val_loss.item()
count += 1
prediction = torch.argmax(output, dim=1)
output_list.extend(prediction.detach().cpu())
ground_truth_list.extend(label)
accuracy = evaluate(output_list, ground_truth_list)
if accuracy > highest_acc['accuracy']:
highest_acc['accuracy'] = accuracy
highest_acc['epoch'] = ep
print(f'Accuracy: {accuracy} Epoch:{ep}')
torch.save(train_model.state_dict(), output_dir + '/resnet50_' + 'final' + '.pth')
print('highest_acc: {} epoch: {}'.format(highest_acc['accuracy'], highest_acc['epoch']))

MetaModelUnstructured Computational Time

I am using sample 2D functions for optimization with MetaModelUnStructuredComp.
Below is a code snippet. The computational time spent for training increases considerably as I increase the number of sample points. I am not sure if this much increase is expected or am I doing something wrong.
The problem is 2D and predicting 1 output below is some performance time;
45 sec for 900 points*
14 sec for 625 points
3.7 sec for 400 points
*points represent the dimension of each training input
Will decreasing this be a focus of openMDAO development team in the future? (keep reading for the edited version)
import numpy as np
from openmdao.api import Problem, IndepVarComp
from openmdao.api import ScipyOptimizeDriver
from openmdao.api import MetaModelUnStructuredComp, FloatKrigingSurrogate,MetaModelUnStructuredComp
from openmdao.api import CaseReader, SqliteRecorder
import time
t0 = time.time()
class trig(MetaModelUnStructuredComp):
def setup(self):
ii=3
nx, ny = (10*ii, 10*ii)
print(nx*ny)
xx = np.linspace(-3,3, nx)
yy = np.linspace(-2,2, ny)
x, y = np.meshgrid(xx, yy)
# z = np.sin(x)**10 + np.cos(10 + y) * np.cos(x)
# z=4+4.5*x-4*y+x**2+2*y**2-2*x*y+x**4-2*x**2*y
term1 = (4-2.1*x**2+(x**4)/3) * x**2;
term2 = x*y;
term3 = (-4+4*y**2) * y**2;
z = term1 + term2 + term3;
self.add_input('x', training_data=x.flatten())
self.add_input('y', training_data=y.flatten())
self.add_output('meta_out', surrogate=FloatKrigingSurrogate(),
training_data=z.flatten())
prob = Problem()
inputs_comp = IndepVarComp()
inputs_comp.add_output('x', 1.5)
inputs_comp.add_output('y', 1.5)
prob.model.add_subsystem('inputs_comp', inputs_comp)
#triginst=
prob.model.add_subsystem('trig', trig())
prob.model.connect('inputs_comp.x', 'trig.x')
prob.model.connect('inputs_comp.y', 'trig.y')
prob.driver = ScipyOptimizeDriver()
prob.driver.options['optimizer'] = 'SLSQP'
prob.driver.options['tol'] = 1e-8
prob.driver.options['disp'] = True
prob.model.add_design_var('inputs_comp.x', lower=-3, upper=3)
prob.model.add_design_var('inputs_comp.y', lower=-2, upper=2)
prob.model.add_objective('trig.meta_out')
prob.setup(check=True)
prob.run_model()
print(prob['inputs_comp.x'])
print(prob['inputs_comp.y'])
print(prob['trig.meta_out'])
t1 = time.time()
total = t1-t0
print(total)
Following the answers below i am adding a code snippet of an explicit component that uses SMT toolbox for surrogate. I guess this is one way to use the toolbox's capabilities.
import numpy as np
from smt.surrogate_models import RBF
from openmdao.api import ExplicitComponent
from openmdao.api import Problem, ScipyOptimizeDriver
from openmdao.api import Group, IndepVarComp
import smt
# Sample problem with SMT Toolbox and OpenMDAO Explicit Comp
#Optimization of SIX-HUMP CAMEL FUNCTION with 2 global optima
class MetaCompSMT(ExplicitComponent):
def initialize(self):
self.options.declare('sm', types=smt.surrogate_models.rbf.RBF)
def setup(self):
self.add_input('x')
self.add_input('y')
self.add_output('z')
# self.declare_partials(of='z', wrt=['x','y'], method='fd')
self.declare_partials(of='*', wrt='*')
def compute(self, inputs, outputs):
# sm = self.options['sm'] # seems like this is not needed
sta=np.column_stack([inputs[i] for i in inputs])
outputs['z'] =sm.predict_values(sta).flatten()
def compute_partials(self, inputs, partials):
sta=np.column_stack([inputs[i] for i in inputs])
print(sta)
for i,invar in enumerate(inputs):
partials['z', invar] =sm.predict_derivatives(sta,i)
# SMT SURROGATE IS TRAINED IN ADVANCE AND PASSED TO THE COMPONENT AS GLOBAL INPUT
# Training Data
ii=3 # "incerases the domain size"
nx, ny = (10*ii, 5*ii)
x, y = np.meshgrid(np.linspace(-3,3, nx), np.linspace(-2,2, ny))
term1 = (4-2.1*x**2+(x**4)/3) * x**2;
term2 = x*y;
term3 = (-4+4*y**2) * y**2;
z = term1 + term2 + term3;
# Surrogate training
xt=np.column_stack([x.flatten(),y.flatten()])
yt=z.flatten()
#sm = KPLSK(theta0=[1e-2])
sm=RBF(d0=-1,poly_degree=-1,reg=1e-13,print_global=False)
sm.set_training_values(xt, yt)
sm.train()
prob = Problem() # Start the OpenMDAO optimization problem
prob.model = model = Group() # Assemble a group within the problem. In this case single group.
"Independent component ~ single Design variable "
inputs_comp = IndepVarComp() # OpenMDAO approach for the design variable as independent component output
inputs_comp.add_output('x', 2.5) # Vary initial value for finding the second global optimum
inputs_comp.add_output('y', 1.5) # Vary initial value for finding the second global optimum
model.add_subsystem('inputs_comp', inputs_comp)
"Component 1"
comp = MetaCompSMT(sm=sm)
model.add_subsystem('MetaCompSMT', comp)
"Connect design variable to the 2 components. Easier to follow than promote"
model.connect('inputs_comp.x', 'MetaCompSMT.x')
model.connect('inputs_comp.y', 'MetaCompSMT.y')
"Lower/Upper bound design variables"
model.add_design_var('inputs_comp.x', lower=-3, upper=3)
model.add_design_var('inputs_comp.y', lower=-2, upper=2)
model.add_objective('MetaCompSMT.z')
prob.driver = ScipyOptimizeDriver()
prob.driver.options['optimizer'] = 'SLSQP'
prob.driver.options['disp'] = True
prob.driver.options['tol'] = 1e-9
prob.setup(check=True, mode='fwd')
prob.run_driver()
print(prob['inputs_comp.x'],prob['inputs_comp.y'],prob['MetaCompSMT.z'])
If you are willing to compile some code yourself, you could write very light weight wrapper for the Surrogate Modeling Toolbox (SMT). You could write that wrapper to work with the standard MetaModelUnstructuredComp or just write your own component wrapper.
Either way, that library has some significantly faster unstructured surrogate models in it. The default OpenMDAO implementations are just basic implementations. We may improve them over time, but for larger data sets or design spaces SMT offers much better algorithms.
We haven't written a general SMT wrapper in OpenMDAO as of Version 2.4, but its not hard to write your own.
I'm going to look into the performance of the MetaModelUnStructuredComp using your test case a bit more closely. Though I do notice that this test case does involve fitting a structured data set. If you were to use MetaModelStructuredComp(http://openmdao.org/twodocs/versions/2.2.0/features/building_blocks/components/metamodelstructured.html), the performance is considerably better:
class trig(MetaModelStructuredComp):
def setup(self):
ii=3
nx, ny = (10*ii, 10*ii)
xx = np.linspace(-3,3, nx)
yy = np.linspace(-2,2, ny)
x, y = np.meshgrid(xx, yy, indexing='ij')
term1 = (4-2.1*x**2+(x**4)/3) * x**2;
term2 = x*y;
term3 = (-4+4*y**2) * y**2;
z = term1 + term2 + term3;
self.add_input('x', 0.0, xx)
self.add_input('y', 0.0, yy)
self.add_output('meta_out', 0.0, z)
The 900 points case goes from 14 seconds on my machine using MetaModelUnStructuredComp to 0.081 when using MetaModelStructuredComp.

Resources