Is it possible to optimize a single list entry in OpenMDAO 1.X? In this example, I desire to add/optimize the first entry of z. In my real problem, it is impossible for me to add the entire vector as a design variable (I'm using NREL's DAKOTA driver). I thought I could make dummy variables connected to z, but looks like 1.X does not support connecting variables to list entries.
ppb.py:
from __future__ import print_function
import numpy as np
from dakota_driver.driver import pydakdriver
from openmdao.api import ScipyOptimizer
from openmdao.api import IndepVarComp, Component, Problem, Group
class Paraboloid(Component):
""" Evaluates the equation f(x,y) = (x-3)^2 + xy + (y+4)^2 - 3 """
def __init__(self):
super(Paraboloid, self).__init__()
self.add_param('x', val=6.0)
self.add_param('y', val=-7.0)
self.add_param('z', val=np.array([2., 2., 2.]))
self.add_output('f_xy', val=0.0)
def solve_nonlinear(self, params, unknowns, resids):
"""f(x,y) = (x-3)^2 + xy + (y+4)^2 - 3
"""
x = params['x']
y = params['y']
z = params['z']
unknowns['f_xy'] = z[0]*(x-3.0)**2 + x*y + (y+4.0)**2 - 3.0 + abs(z[0]-4)
def linearize(self, params, unknowns, resids):
""" Jacobian for our paraboloid."""
x = params['x']
y = params['y']
z = params['z']
J = {}
J['f_xy', 'x'] = 2.0*x - 6.0 + y
J['f_xy', 'y'] = 2.0*y + 8.0 + x
J['f_xy', 'z'] = (x-3.0)**2 + 1
return J
top = Problem()
root = top.root = Group()
root.add('p1', IndepVarComp('x', 13.0))
root.add('p2', IndepVarComp('y', -14.0))
root.add('p3', IndepVarComp('z', np.array([0.0, 0., 0.])))
root.add('p', Paraboloid())
root.connect('p1.x', 'p.x')
root.connect('p2.y', 'p.y')
root.connect('p3.z', 'p.z')
top.driver = ScipyOptimizer()
top.driver.options['optimizer'] = 'Powell'
top.driver.add_desvar('p3.z[0]', lower=-30., upper=30.) # <--- Is it possible to do this?
top.driver.add_objective('p.f_xy')
top.setup()
top['p1.x'] = 3.0
top['p2.y'] = -4.0
#top['p3.z'] = 2.0
top['p3.z'] = np.array([0., 0., 0.])
top.run()
Run Results:
$ python ppb.py
Traceback (most recent call last):
File "ppb.py", line 75, in <module>
top.setup()
File "/scratch/jquick/1.0/lib/python2.7/site-packages/openmdao/core/problem.py", line 586, in setup
raise NameError("Can't find param of interest '%s'." % v)
NameError: Can't find param of interest 'p3.z[0]'.
Specify the index to be optimized using the indices keyword:
top.driver.add_desvar('p3.z', lower=-30., upper=30., indices=[0])
Related
I am trying to test out the accuracy of the images without using image augmentation. When I run both of the codes, I got an error shown below:
TypeError: 'NoneType' object is not callable
I found that the error occurs in the second code. I would like to know the reason on the cause of this error message, and how to resolve it. Attached below are my codes, which have to be run simultaneously. I am using Jupyter notebook for that. Thanks!
Code 1:
import torch
from torch import nn
from torch.autograd import Variable
import torch.nn.functional as F
import math
class CrossEntropyLabelSmooth(nn.Module):
"""Cross entropy loss with label smoothing regularizer.
Reference:
Szegedy et al. Rethinking the Inception Architecture for Computer Vision. CVPR 2016.
Equation: y = (1 - epsilon) * y + epsilon / K.
Args:
num_classes (int): number of classes.
epsilon (float): weight.
"""
def __init__(self, num_classes, epsilon=0.1, device='cpu'):
super(CrossEntropyLabelSmooth, self).__init__()
self.num_classes = num_classes
self.epsilon = epsilon
self.device = device
self.logsoftmax = nn.LogSoftmax(dim=1)
def forward(self, inputs, targets):
"""
Args:
inputs: prediction matrix (before softmax) with shape (batch_size, num_classes)
targets: ground truth labels with shape (num_classes)
"""
log_probs = self.logsoftmax(inputs)
# targets = torch.zeros(log_probs.size()).scatter_(1, targets.unsqueeze(1).data, 1)# for mldg da
targets = torch.zeros(log_probs.size()).scatter_(1, targets.unsqueeze(1).data.cpu(), 1)#for zzd
targets = targets.to(self.device)
targets = (1 - self.epsilon) * targets + self.epsilon / self.num_classes
loss = (-Variable(targets) * log_probs).mean(0).sum()
return loss
class TripletLoss(nn.Module):
"""Triplet loss with hard positive/negative mining.
Reference:
Hermans et al. In Defense of the Triplet Loss for Person Re-Identification. arXiv:1703.07737.
Code imported from https://github.com/Cysu/open-reid/blob/master/reid/loss/triplet.py.
Args:
margin (float): margin for triplet.
"""
def __init__(self, margin=0.3):
super(TripletLoss, self).__init__()
self.margin = margin
self.ranking_loss = nn.MarginRankingLoss(margin=margin)
def forward(self, inputs, targets):
"""
Args:
inputs: feature matrix with shape (batch_size, feat_dim)
targets: ground truth labels with shape (num_classes)
"""
n = inputs.size(0)
# Compute pairwise distance, replace by the official when merged
dist = torch.pow(inputs, 2).sum(dim=1, keepdim=True).expand(n, n)
dist = dist + dist.t()
dist.addmm_(1, -2, inputs, inputs.t())
dist = dist.clamp(min=1e-12).sqrt() # for numerical stability
# For each anchor, find the hardest positive and negative
mask = targets.expand(n, n).eq(targets.expand(n, n).t())
dist_ap, dist_an = [], []
for i in range(n):
dist_ap.append(dist[i][mask[i]].max().unsqueeze(0))
dist_an.append(dist[i][mask[i] == 0].min().unsqueeze(0))
dist_ap = torch.cat(dist_ap)
dist_an = torch.cat(dist_an)
# Compute ranking hinge loss
y = torch.ones_like(dist_an)
loss = self.ranking_loss(dist_an, dist_ap, y)
return loss
class CenterLoss(nn.Module):
"""Center loss.
Reference:
Wen et al. A Discriminative Feature Learning Approach for Deep Face Recognition. ECCV 2016.
Args:
num_classes (int): number of classes.
feat_dim (int): feature dimension.
"""
def __init__(self, num_classes=10, feat_dim=2048, device='cpu'):
super(CenterLoss, self).__init__()
self.num_classes = num_classes
self.feat_dim = feat_dim
self.device = device
self.centers = nn.Parameter(torch.randn(self.num_classes, self.feat_dim)).to(self.device)
def forward(self, x, labels):
"""
Args:
x: feature matrix with shape (batch_size, feat_dim).
labels: ground truth labels with shape (num_classes).
"""
batch_size = x.size(0)
distmat = torch.pow(x, 2).sum(dim=1, keepdim=True).expand(batch_size, self.num_classes) + \
torch.pow(self.centers, 2).sum(dim=1, keepdim=True).expand(self.num_classes, batch_size).t()
distmat.addmm_(1, -2, x, self.centers.t())
classes = torch.arange(self.num_classes).long()
classes = classes.to(self.device)
labels = labels.unsqueeze(1).expand(batch_size, self.num_classes)
mask = labels.data.eq(classes.expand(batch_size, self.num_classes))
dist = []
for i in range(batch_size):
value = distmat[i][mask[i]]
value = value.clamp(min=1e-12, max=1e+12) # for numerical stability
dist.append(value)
dist = torch.cat(dist)
loss = dist.mean()
return loss
Code 2:
# Code without data augmentation
import torch
import torch.nn as nn
from torchvision.datasets import ImageFolder
from torchvision import transforms
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader
import os
import numpy as np
from tqdm import tqdm
from PIL import Image
class FoodDataset(Dataset):
def __init__(self, file, transform=None, mode='train'):
self.transforms = transform
self.mode = mode
with open(file, 'r') as f:
self.image_list = f.readlines()
def __len__(self):
return len(self.image_list)
def __getitem__(self, index):
label = None
if self.mode == 'train':
image, label = self.image_list[index].split('\n')[0].split('\t')
label = int(label)
else:
image = self.image_list[index].split('\n')[0]
image = Image.open(image).convert('RGB')
image = self.transforms(image)
if self.mode == 'train':
return image, label
else:
return image
#transforms_train = transforms.Compose([
# transforms.Resize((224, 224)),
# transforms.RandomHorizontalFlip(p=0.5),
# transforms.RandomVerticalFlip(p=0.5),
# transforms.Pad(10, 10),
# transforms.RandomRotation(45),
# transforms.RandomCrop((224, 224)),
# transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5),
# transforms.ToTensor(),
# transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
# ])
#transforms_test = transforms.Compose([
# transforms.Resize((224, 224)),
# transforms.ToTensor(),
# transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
# ])
def evaluate(prediction, ground_truth):
num_correct = (np.array(prediction) == np.array(ground_truth)).sum()
return num_correct / len(prediction)
train_ds = FoodDataset('data/train.txt')
val_ds = FoodDataset('data/val.txt')
test_ds = FoodDataset('data/test.txt')
train_dl = DataLoader(train_ds, batch_size=32, shuffle=True)
val_dl = DataLoader(val_ds, batch_size=32, shuffle=True)
test_dl = DataLoader(test_ds, batch_size=32, shuffle=True)
num_classes = 5
train_model = models.resnet50(pretrained=True)
train_model.fc = nn.Linear(2048, num_classes)
output_dir = 'checkpoint'
if output_dir and not os.path.exists(output_dir):
os.makedirs(output_dir)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
ce_loss = CrossEntropyLabelSmooth(num_classes = num_classes, device = device)
optimizer = torch.optim.Adam(train_model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)
for param in train_model.parameters():
param.requires_grad = False
for param in train_model.fc.parameters():
param.requires_grad = True
for i in range(5):
train_model.train()
train_model.to(device)
for img, label in tqdm(train_dl):
img = img.to(device)
label = label.to(device)
optimizer.zero_grad()
output= train_model(img)
loss = ce_loss(output, label)
loss.backward()
optimizer.step()
for param in train_model.parameters():
param.requires_grad = True
epoch = 100
highest_acc = {'epoch': 0, 'accuracy': 0}
for ep in range(epoch):
train_model.train()
train_model.to(device)
count = 0
running_loss = 0.0
validation_loss = 0.0
output_list = []
ground_truth_list = []
for img, label in tqdm(train_dl):
img = img.to(device)
label = label.to(device)
optimizer.zero_grad()
output= train_model(img)
loss = ce_loss(output, label)
count += 1
prediction = torch.argmax(output, dim=1)
output_list.extend(prediction.detach().cpu())
ground_truth_list.extend(label.cpu())
running_loss += loss.item()
loss.backward()
optimizer.step()
scheduler.step()
if ep % 10 == 0:
torch.save(train_model.state_dict(), output_dir + '/resnet50_' + str(ep) + '.pth')
accuracy = evaluate(output_list, ground_truth_list)
print(f'Epoch[{ep}] training accuracy: {accuracy} '
f'training loss: {running_loss / count:.3e} Base Lr: {optimizer.param_groups[0]["lr"]:.5e}')
if ep % 10 == 0:
train_model.eval()
count = 0
output_list = []
ground_truth_list = []
for img, label in tqdm(val_dl):
with torch.no_grad():
img = img.to(device)
lbl = label.to(device)
output= train_model(img)
val_loss = ce_loss(output, lbl)
validation_loss += val_loss.item()
count += 1
prediction = torch.argmax(output, dim=1)
output_list.extend(prediction.detach().cpu())
ground_truth_list.extend(label)
accuracy = evaluate(output_list, ground_truth_list)
if accuracy > highest_acc['accuracy']:
highest_acc['accuracy'] = accuracy
highest_acc['epoch'] = ep
print(f'Accuracy: {accuracy} Epoch:{ep}')
torch.save(train_model.state_dict(), output_dir + '/resnet50_' + 'final' + '.pth')
print('highest_acc: {} epoch: {}'.format(highest_acc['accuracy'], highest_acc['epoch']))
I am just playing around with the Paraboloid tutorial in OpenMDAO. I tried something simple, basically changing the input values to the Paraboloid component. See the following code. When I run it though, it will print the same result as if nothing happened. So what is going on? if I have a group, how would I modify the inputs?
from __future__ import print_function
from openmdao.api import IndepVarComp, Component, Problem, Group
class Paraboloid(Component):
""" Evaluates the equation f(x,y) = (x-3)^2 + xy + (y+4)^2 - 3 """
def __init__(self):
super(Paraboloid, self).__init__()
self.add_param('x', val=0.0)
self.add_param('y', val=0.0)
self.add_output('f_xy', val=0.0)
def solve_nonlinear(self, params, unknowns, resids):
"""f(x,y) = (x-3)^2 + xy + (y+4)^2 - 3
"""
x = params['x']
y = params['y']
unknowns['f_xy'] = (x-3.0)**2 + x*y + (y+4.0)**2 - 3.0
def linearize(self, params, unknowns, resids):
""" Jacobian for our paraboloid."""
x = params['x']
y = params['y']
J = {}
J['f_xy', 'x'] = 2.0*x - 6.0 + y
J['f_xy', 'y'] = 2.0*y + 8.0 + x
return J
if __name__ == "__main__":
top = Problem()
root = top.root = Group()
root.add('p1', IndepVarComp('x', 3.0))
root.add('p2', IndepVarComp('y', -4.0))
root.add('p', Paraboloid())
root.connect('p1.x', 'p.x')
root.connect('p2.y', 'p.y')
root.p1.x=3.0;
root.p2.y=-4.0;
top.setup()
top.run()
print(root.p.unknowns['f_xy'])
root.p1.x=5.0;
root.p2.y=5.0;
top.setup()
top.run()
print(root.p.unknowns['f_xy'])
You have to call setup() before you can set any values.
After that you set them via a dictionary like access from the problem instance. For your sample code it should look like:
top.setup()
top['p1.x'] = 3.
top['p2.y'] = 4.
top.run()
print(top['p.f_xy'])
top['p1.x'] = 10.
top['p2.y'] = 10.
top.run()
print(top['p.f_xy'])
When I run this model with the adjusted script I get:
##############################################
Setup: Checking for potential issues...
No recorders have been specified, so no data will be saved.
Setup: Check complete.
##############################################
73.0
342.0
I have a param that is a 2D array. It works fine with getting the correct output but when I try to do anything with the gradients such as optimization or check_total_derivatives I get a sizing error. I was wondering what the best way is to handle params that are of size 2D. Here is a sample code:
import numpy as np
from openmdao.api import Group, Problem, Component, IndepVarComp, ExecComp
class C1(Component):
def __init__(self, n):
super(C1, self).__init__()
self.add_param('grid', val=np.zeros((n, n)))
self.add_output('x', shape=1)
self.n = n
def solve_nonlinear(self, params, unknowns, resids):
x = 0
for i in range(self.n):
for j in range(self.n):
x += params['grid'][i][j]
unknowns['x'] = x
def linearize(self, params, unknowns, resids):
J = {}
J['x', 'grid'] = np.ones((self.n, self.n))
return J
class Group1(Group):
def __init__(self, n):
super(Group1, self).__init__()
self.add('grid', IndepVarComp('grid', np.zeros((n, n))), promotes=['*'])
self.add('c1', C1(n), promotes=['*'])
self.add('obj_cmp', ExecComp('obj = -x', x=1.0), promotes=['*'])
n = 3
p = Problem()
p.root = Group1(n)
p.setup(check=False)
p['grid'] = np.ones((n, n))
p.run()
p.check_total_derivatives()
print p['x']
I get the error:
ValueError: In component 'c1', the derivative of 'x' wrt 'grid' should have shape '(1, 3)' but has shape '(3, 3)' instead.
I feel like the derivative in this case should be of size (3, 3) because that is the size of the input param. How do you handle 2D params?
You have a small mistake in the Jacobian; it should look like this:
def linearize(self, params, unknowns, resids):
J = {}
J['x', 'grid'] = np.ones((1, self.n*self.n))
return J
The output x is length 1, while the param grid is n by n, so it is length n*n, so the resulting J should be 1 by 9. With that change, I get the right answer.
I did notice a mistake in the error message. It should say that the expected shape is (1, 9) instead of (1, 3). I will put in a fix for that.
When you have a 2D variable and need to construct the gradient, flatten it (in row-major order) and formulate the gradient based on the flattened version.
I have a situation where the gradient of one component is by necessity calculated in another component. What I have attempted to do is just have the gradient be an output from the first component and an input to the second component. I have set it to be pass_by_obj so that it doesn't affect other calculations. Any recommendations on whether or not this would be the best way to do it would be appreciated. Nevertheless, I am getting an error when using check_partial_derivatives(). It seems to be an error for any output that is specified as pass_by_obj. Here is a simple case:
import numpy as np
from openmdao.api import Group, Problem, Component, ScipyGMRES, ExecComp, IndepVarComp
class Comp1(Component):
def __init__(self):
super(Comp1, self).__init__()
self.add_param('x', shape=1)
self.add_output('y', shape=1)
self.add_output('dz_dy', shape=1, pass_by_obj=True)
def solve_nonlinear(self, params, unknowns, resids):
x = params['x']
unknowns['y'] = 4.0*x + 1.0
unknowns['dz_dy'] = 2.0*x
def linearize(self, params, unknowns, resids):
J = {}
J['y', 'x'] = 4.0
return J
class Comp2(Component):
def __init__(self):
super(Comp2, self).__init__()
self.add_param('y', shape=1)
self.add_param('dz_dy', shape=1, pass_by_obj=True)
self.add_output('z', shape=1)
def solve_nonlinear(self, params, unknowns, resids):
y = params['y']
unknowns['z'] = y*2.0
def linearize(self, params, unknowns, resids):
J = {}
J['z', 'y'] = params['dz_dy']
return J
class TestGroup(Group):
def __init__(self):
super(TestGroup, self).__init__()
self.add('x', IndepVarComp('x', 0.0), promotes=['*'])
self.add('c1', Comp1(), promotes=['*'])
self.add('c2', Comp2(), promotes=['*'])
p = Problem()
p.root = TestGroup()
p.setup(check=False)
p['x'] = 2.0
p.run()
print p['z']
print 'gradients'
test_grad = open('partial_gradients_test.txt', 'w')
partial = p.check_partial_derivatives(out_stream=test_grad)
I get the following error message:
partial = p.check_partial_derivatives(out_stream=test_grad)
File "/usr/local/lib/python2.7/site-packages/openmdao/core/problem.py", line 1699, in check_partial_derivatives
dresids._dat[u_name].val[idx] = 1.0
TypeError: '_ByObjWrapper' object does not support item assignment
I asked before about the params being checked for pass_by_obj in check_partial_derivatives() and it might be simply a matter of checking the unknowns for pass_by_obj as well.
the error you're getting is another bug related to check_partial_derivatives function. It should be easy enough to fix, but in the meantime you can just remove the pass_by_obj setting. Since you're computing a value in one component and passing it to another, there isn't a need to do pass_by_obj at all (and it will be more efficient if you don't).
You said that you did it so that it "doesn't affect other calculations", but I don't quite know what you mean by that. It won't affect anything unless you use it in the solve_nonlinear method.
I have a component that has an input that is an int so I am setting pass_by_obj = True. However, when I check derivatives with check_partial_derivatives(), it throws this error:
data = prob.check_partial_derivatives(out_stream=sys.stdout)
File "/usr/local/lib/python2.7/site-packages/openmdao/core/problem.py", line 1711, in check_partial_derivatives
jac_rev[(u_name, p_name)][idx, :] = dinputs._dat[p_name].val
TypeError: float() argument must be a string or a number
It appears to be trying to take the derivative even though it cannot. Here is a simple example:
import sys
from openmdao.api import IndepVarComp, Problem, Group, Component
class Comp(Component):
def __init__(self):
super(Comp, self).__init__()
self.add_param('x', val=0.0)
self.add_param('y', val=3, pass_by_obj=True)
self.add_output('z', val=0.0)
def solve_nonlinear(self, params, unknowns, resids):
unknowns['z'] = params['y']*params['x']
def linearize(self, params, unknowns, resids):
J = {}
J['z', 'x'] = params['y']
return J
prob = Problem()
prob.root = Group()
prob.root.add('comp', Comp(), promotes=['*'])
prob.root.add('p1', IndepVarComp('x', 0.0), promotes=['x'])
prob.root.add('p2', IndepVarComp('y', 3, pass_by_obj=True), promotes=['y'])
prob.setup(check=False)
prob['x'] = 2.0
prob['y'] = 3
prob.run()
print prob['z']
data = prob.check_partial_derivatives(out_stream=sys.stdout)
It is possible to use the check_partial_derivatives() method with components that have inputs that are specified as pass_by_obj? I don't care about the derivatives for the inputs that are specified as pass_by_obj, but I care about the other inputs.
Thanks for the report and test. This was a bug where we weren't excluding the design variables that were declared pass_by_obj. I've got a pull request up on the OpenMDAO repo with a fix. It'll probably be merged to master within a day.
EDIT -- The fix is merged. https://github.com/OpenMDAO/OpenMDAO/commit/b123b284e46aac7e15fa9bce3751f9ad9bb63b95