Explicitly set dependency TaskFlowAPI - airflow

Suppose I have a dag as follows:
def my_dag():
#task
def task_a():
# Some code here
return
#task
def task_b():
# Some code here
return
task_a()
task_b()
my_dag_instance = my_dag()
I would like to set task_b downstream from task_a. That is, I want task_b to execute only if task_a has executed successfully. How can I do so? Note that task_a does not return anything.
I tried task_a >> task_b right before the last line of my code (my_dag_instance = my_dag()) yet got an error along the lines of " TaskDecorator >> TaskDecorator is invalid operation". Any advice?

Indeed, you can do that, inside the dag context, and with task instance instead of task method reference:
def my_dag():
#task
def task_a():
# Some code here
return
#task
def task_b():
# Some code here
return
task_a() >> task_b()
# or
# A = task_a()
# B = task_b()
# A >> B
my_dag_instance = my_dag()

Related

Providing the same resource to every op in a job

So I need myresource in every op. How do I do this without having to specify it in every op, but in the job definition instead? I think this is possible, but I cannot find a specification in the documentation for exactly what the job config looks like. Thanks!
#op()
def op1(context):
# This throws an error
res = context.resources.myresource
return
​
#op()
def op2(context):
res = context.resources.myresource
return
​
#op()
def op3(context):
res = context.resources.myresource
return
​
#op()
def op4(context):
res = context.resources.myresource
return
​
#job(
resource_defs = {"myresource" : myresource.configured({"arg" : "whatever"})}
)
def myjob():
op1()
op2()
op3()
op4()

How to ignore the whole output of an instruction using python doctest?

Ellispis does not seem to work to ignore the whole output line.
I'd like to ignore everything that is outputed by foo:
def foo():
"""
>>> foo() # doctest: +ELLIPSIS
...
"""
print("IGNORE ME")
if __name__ == '__main__':
import doctest
doctest.testmod()
Running with python3 gives:
Failed example:
foo() # doctest: +ELLIPSIS
Expected nothing
Got:
IGNORE ME
**********************************************************************
1 items had failures:
1 of 1 in __main__.foo
***Test Failed*** 1 failures.
Note that ignoring only part of the output works. Adding a character before ... (here "-"):
def foo():
"""
>>> foo() # doctest: +ELLIPSIS
-...
"""
print("-IGNORE ME")
if __name__ == '__main__':
import doctest
doctest.testmod()

AtrributeError MomentSGD optimizer has no attribute prepare

Recently, I run the code released by other authors. They utilized chainer v1.3, but I installed v4. When I run the code, it errors that Attribute Errors: MomentSGD optimizer has no attribute prepare. Here I post the codes of this part:
class BaseModel(chainer.Chain):
loss = None
accuracy = None
gpu_mode = False
_train = False
def __call__(self, *arg_list, **arg_dict):
raise NotImplementedError()
def clear(self):
self.loss = None
self.accuracy = None
def train(self, data, optimizer):
self._train = True
optimizer.update(self, data)
if self.accuracy is None:
return float(self.loss.data)
else:
return float(self.loss.data), float(self.accuracy.data)
def validate(self, data):
self._train = False
self(data)
if self.accuracy is None:
return float(self.loss.data)
else:
return float(self.loss.data), float(self.accuracy.data)
def test(self, data):
self._train = False
raise NotImplementedError()
def save(self, fname):
serializers.save_hdf5(fname, self)
def load(self, fname):
serializers.load_hdf5(fname, self)
def cache(self):
self.to_cpu()
cached_model = self.copy()
self.to_gpu()
return cached_model
# this part is the error part
def setup(self, optimizer):
self.to_gpu()
optimizer.target = self
optimizer.prepare()
def to_cpu(self):
if not self.gpu_mode:
return
super(BaseModel, self).to_cpu()
self.gpu_mode = False
def to_gpu(self):
if self.gpu_mode:
return
super(BaseModel, self).to_gpu()
self.gpu_mode = True
Newer version of chainer uses setup method to initialize optimizer.
Can you try modifing your code as follows?
def setup(self, optimizer):
self.to_gpu()
optimizer.setup(self)

How should you use argparse to choose which action to perform and pass arguments to it?

I want to use the argparse library to parse some arguments but I'm struggling to work out what in the myriad of ways you can specify arguments is the simplest way to choose between a few actions. Different actions require different numbers of arguments.
Given the following calls I'd expect the following outputs:
> python MyClass.py action1 foo
Action 1: 12345 - foo
> python MyClass.py action2 20 30
Action 2: 12345 - 20 30
The following seems to work:
import argparse
class MyClass:
def __init__(self, someVar):
self.someVar = someVar
def Action1(self, intToPrint):
print("Print 1: %d - %s"%(self.someVar,intToPrint))
def Action2(self, firstNum, firstString):
print("Print 2: %d - %d %s"%(self.someVar,firstNum, firstString))
def CallAction1(mc, args):
mc.Action1(args.intToPrint)
def CallAction2(mc, args):
mc.Action2(args.firstNum, args.firstString)
def Main():
parser = argparse.ArgumentParser(prog='PythonArgumentParsing.py')
subparsers = parser.add_subparsers(help='commands')
action1Group = subparsers.add_parser('action1', help='action 1 help')
action1Group.add_argument('intToPrint', type=str)
action1Group.set_defaults(func=CallAction1)
action2Group = subparsers.add_parser('action2', help='action 1 help')
action2Group.add_argument('firstNum', type=int)
action2Group.add_argument('firstString', type=str)
action2Group.set_defaults(func=CallAction2)
args = parser.parse_args()
someVar = 12345
mc = MyClass(someVar)
args.func(mc, args)
if __name__ == "__main__":
Main()
...but it seems a little clunky to have to create a CallAction to pass arguments from the parser.
Is there any way to clean this up?
I gather that you are just bothered by needing to write the Call_Action... functions which convert the args namespace into positional parameters for the method calls.
Using keyword parameters might eliminate this need. The following hasn't been tested yet:
def Action1(self, intToPrint=None, **kwargs):
print("Print 1: %d - %s"%(self.someVar,intToPrint))
def Action2(self, firstNum=None, firstString=None, **kwargs):
print("Print 2: %d - %d %s"%(self.someVar,firstNum, firstString))
...
action1Group.set_defaults(func=MyClass.Action1)
...
args.func(mc, **vars(args))
If I've done this right I can pass the whole vars(args) dictionary to the method. It will use the parameters that it needs, and ignore the rest.
argparse makes extensive use of the **kwargs method of passing parameters.

How can I use functools.partial on multiple methods on an object, and freeze parameters out of order?

I find functools.partial to be extremely useful, but I would like to be able to freeze arguments out of order (the argument you want to freeze is not always the first one) and I'd like to be able to apply it to several methods on a class at once, to make a proxy object that has the same methods as the underlying object except with some of its methods parameters being frozen (think of it as generalizing partial to apply to classes). And I'd prefer to do this without editing the original object, just like partial doesn't change its original function.
I've managed to scrap together a version of functools.partial called 'bind' that lets me specify parameters out of order by passing them by keyword argument. That part works:
>>> def foo(x, y):
... print x, y
...
>>> bar = bind(foo, y=3)
>>> bar(2)
2 3
But my proxy class does not work, and I'm not sure why:
>>> class Foo(object):
... def bar(self, x, y):
... print x, y
...
>>> a = Foo()
>>> b = PureProxy(a, bar=bind(Foo.bar, y=3))
>>> b.bar(2)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: bar() takes exactly 3 arguments (2 given)
I'm probably doing this all sorts of wrong because I'm just going by what I've pieced together from random documentation, blogs, and running dir() on all the pieces. Suggestions both on how to make this work and better ways to implement it would be appreciated ;) One detail I'm unsure about is how this should all interact with descriptors. Code follows.
from types import MethodType
class PureProxy(object):
def __init__(self, underlying, **substitutions):
self.underlying = underlying
for name in substitutions:
subst_attr = substitutions[name]
if hasattr(subst_attr, "underlying"):
setattr(self, name, MethodType(subst_attr, self, PureProxy))
def __getattribute__(self, name):
return getattr(object.__getattribute__(self, "underlying"), name)
def bind(f, *args, **kwargs):
""" Lets you freeze arguments of a function be certain values. Unlike
functools.partial, you can freeze arguments by name, which has the bonus
of letting you freeze them out of order. args will be treated just like
partial, but kwargs will properly take into account if you are specifying
a regular argument by name. """
argspec = inspect.getargspec(f)
argdict = copy(kwargs)
if hasattr(f, "im_func"):
f = f.im_func
args_idx = 0
for arg in argspec.args:
if args_idx >= len(args):
break
argdict[arg] = args[args_idx]
args_idx += 1
num_plugged = args_idx
def new_func(*inner_args, **inner_kwargs):
args_idx = 0
for arg in argspec.args[num_plugged:]:
if arg in argdict:
continue
if args_idx >= len(inner_args):
# We can't raise an error here because some remaining arguments
# may have been passed in by keyword.
break
argdict[arg] = inner_args[args_idx]
args_idx += 1
f(**dict(argdict, **inner_kwargs))
new_func.underlying = f
return new_func
Update: In case anyone can benefit, here's the final implementation I went with:
from types import MethodType
class PureProxy(object):
""" Intended usage:
>>> class Foo(object):
... def bar(self, x, y):
... print x, y
...
>>> a = Foo()
>>> b = PureProxy(a, bar=FreezeArgs(y=3))
>>> b.bar(1)
1 3
"""
def __init__(self, underlying, **substitutions):
self.underlying = underlying
for name in substitutions:
subst_attr = substitutions[name]
if isinstance(subst_attr, FreezeArgs):
underlying_func = getattr(underlying, name)
new_method_func = bind(underlying_func, *subst_attr.args, **subst_attr.kwargs)
setattr(self, name, MethodType(new_method_func, self, PureProxy))
def __getattr__(self, name):
return getattr(self.underlying, name)
class FreezeArgs(object):
def __init__(self, *args, **kwargs):
self.args = args
self.kwargs = kwargs
def bind(f, *args, **kwargs):
""" Lets you freeze arguments of a function be certain values. Unlike
functools.partial, you can freeze arguments by name, which has the bonus
of letting you freeze them out of order. args will be treated just like
partial, but kwargs will properly take into account if you are specifying
a regular argument by name. """
argspec = inspect.getargspec(f)
argdict = copy(kwargs)
if hasattr(f, "im_func"):
f = f.im_func
args_idx = 0
for arg in argspec.args:
if args_idx >= len(args):
break
argdict[arg] = args[args_idx]
args_idx += 1
num_plugged = args_idx
def new_func(*inner_args, **inner_kwargs):
args_idx = 0
for arg in argspec.args[num_plugged:]:
if arg in argdict:
continue
if args_idx >= len(inner_args):
# We can't raise an error here because some remaining arguments
# may have been passed in by keyword.
break
argdict[arg] = inner_args[args_idx]
args_idx += 1
f(**dict(argdict, **inner_kwargs))
return new_func
You're "binding too deep": change def __getattribute__(self, name): to def __getattr__(self, name): in class PureProxy. __getattribute__ intercepts every attribute access and so bypasses everything that you've set with setattr(self, name, ... making those setattr bereft of any effect, which obviously's not what you want; __getattr__ is called only for access to attributes not otherwise defined so those setattr calls become "operative" & useful.
In the body of that override, you can and should also change object.__getattribute__(self, "underlying") to self.underlying (since you're not overriding __getattribute__ any more). There are other changes I'd suggest (enumerate in lieu of the low-level logic you're using for counters, etc) but they wouldn't change the semantics.
With the change I suggest, your sample code works (you'll have to keep testing with more subtle cases of course). BTW, the way I debugged this was simply to stick in print statements in the appropriate places (a jurassic=era approach but still my favorite;-).

Resources