I need to get last two successful execution dates of Airflow job to use in my current run.
Example :
Execution date Job status
2020-05-03 success
2020-05-04 fail
2020-05-05 success
Question :
When I run my job on May 6th I should get values of May 3rd and 5th into variables. Is it possible?
You can leverage SQLAlchemy magic for retrieving execution_dates against last 'n' successfull runs
from pendulum import Pendulum
from typing import List, Dict, Any, Optional
from airflow.utils.state import State
from airflow.settings import Session
from airflow.models.taskinstance import TaskInstance
def last_execution_date(
dag_id: str, task_id: str, n: int, session: Optional[Session] = None
) -> List[Pendulum]:
"""
This function is to queries against airflow table and
return the most recent execution date
Args:
dag_id: dag name
task_id : task name
n : number of runs
session: Session to connect airflow postgres db
Returns:
list of execution date of most of recent n runs
"""
query_val = (
session.query(TaskInstance)
.filter(
TaskInstance.dag_id == dag_id,
TaskInstance.task_id == task_id,
TaskInstance.state == State.SUCCESS,
)
.order_by(TaskInstance.execution_date.desc())
.limit(n)
)
execution_dates: List[Pendulum] = list(map(lambda ti: ti.execution_date, query_val))
return execution_dates
# Above function can be used as utility function and can be leveraged with provide_session as below:
last_success_date_fn = provide_session(last_execution_date) # can use provide session decorator as is.
This snippet is tested end to end and can be used in prod.
I've referred to tree() method of views.py for coming up with this script.
Alternatively, you can fire this SQL query to the Airflow's meta-db to retrieve last n execution dates with successful runs
SELECT execution_date
FROM task_instance
WHERE dag_id = 'my_dag_id'
AND task_id = 'my_task_id'
AND state = 'success'
ORDER BY execution_date DESC
LIMIT n
In the lastest version of airflow:
def last_execution_date(
dag_id: str, task_id: str, n: int):
session = Session()
query_val = (
session.query(TaskInstance)
.filter(
TaskInstance.dag_id == dag_id,
TaskInstance.task_id == task_id,
TaskInstance.state == State.SUCCESS,
)
.order_by(TaskInstance.execution_date.desc())
.limit(n)
)
execution_dates = list(map(lambda ti: ti.execution_date, query_val))
return execution_dates
Related
I'm working with Airflow 2.1.4 and looking to find the status of the prior task run (Task Run, not Task Instance and not Dag Run).
I.e., DAG MorningWorkflow runs a 9:00am, and task ConditionalTask is in that dag. There is some precondition logic that will throw an AirflowSkipException in a number of situations (including timeframe of day and other context-specific information to reduce the likelihood of collisions with independent processes)
If ConditionalTask fails, we can fix the issue, clear the failed run, and re-run it without running the entire DAG. However, the skip logic reruns and will often now skip it, even though the original conditions were non-skipping.
So, I want to update the precondition logic to never skip if this taskinstance ran previously and failed. I can determine if the taskinstance ran previously using TaskInstance.try_number orTaskInstance.prev_attempted_tries, but this doesn't tell me whether it actually tried to run originally or if it skipped (i.e., if we clear the entire DagRun to rerun the whole workflow, we would want it to still skip).
An alternative would be to determine whether the first attempted run was skipped or not.
#Kevin Crouse
In order to answer your question, we can take advantage of from airflow.models import DagRun
To provide you with a complete, answer I have created two functions to assist you in resolving similar quandaries in the future.
How to return the overall state/success of a specific dag_id passed as a function arg?
def get_last_dag_run_status(dag_id):
""" Returns the status of the last dag run for the given dag_id
1. Utilise the find method of DagRun class
2. Step 1 returns a list, so we sort it by the last execution date
3. I have returned 2 examples for you to see a) the state, b) the last execution date, you can explore this further by just returning last_dag_run[0]
Args:
dag_id (str): The dag_id to check
Returns:
List - The status of the last dag run for the given dag_id
List - The last execution date of the dag run for the given dag_id
"""
last_dag_run = DagRun.find(dag_id=dag_id)
last_dag_run.sort(key=lambda x: x.execution_date, reverse=True)
return [last_dag_run[0].state, last_dag_run[0].execution_date]
How to return the status of a specific task_id, within a specific dag_id?
def get_task_status(dag_id, task_id):
""" Returns the status of the last dag run for the given dag_id
1. The code is very similar to the above function, I use it as the foundation for many similar problems/solutions
2. The key difference is that in the return statement, we can directly access the .get_task_instance passing our desired task_id and its state
Args:
dag_id (str): The dag_id to check
task_id (str): The task_id to check
Returns:
List - The status of the last dag run for the given dag_id
"""
last_dag_run = DagRun.find(dag_id=dag_id)
last_dag_run.sort(key=lambda x: x.execution_date, reverse=True)
return last_dag_run[0].get_task_instance(task_id).state
I hope this helps you in your journey to resolve your issues.
For posterity, here is a complete dummy Dag to demonstrate the 2 functions working.
from airflow import DAG
from airflow.operators.dummy import DummyOperator
from airflow.operators.python import PythonOperator
from airflow.models import DagRun
from datetime import datetime
def get_last_dag_run_status(dag_id):
""" Returns the status of the last dag run for the given dag_id
Args:
dag_id (str): The dag_id to check
Returns:
List - The status of the last dag run for the given dag_id
List - The last execution date of the dag run for the given dag_id
"""
last_dag_run = DagRun.find(dag_id=dag_id)
last_dag_run.sort(key=lambda x: x.execution_date, reverse=True)
return [last_dag_run[0].state, last_dag_run[0].execution_date]
def get_task_status(dag_id, task_id):
""" Returns the status of the last dag run for the given dag_id
Args:
dag_id (str): The dag_id to check
task_id (str): The task_id to check
Returns:
List - The status of the last dag run for the given dag_id
"""
last_dag_run = DagRun.find(dag_id=dag_id)
last_dag_run.sort(key=lambda x: x.execution_date, reverse=True)
return last_dag_run[0].get_task_instance(task_id).state
with DAG(
'stack_overflow_ans_1',
tags = ['SO'],
start_date = datetime(2022, 1, 1),
schedule_interval = None,
catchup = False,
is_paused_upon_creation = False
) as dag:
t1 = DummyOperator(
task_id = 'start'
)
t2 = PythonOperator(
task_id = 'get_last_dag_run_status',
python_callable = get_last_dag_run_status,
op_args = ['YOUR_DAG_NAME'],
do_xcom_push = False
)
t3 = PythonOperator(
task_id = 'get_task_status',
python_callable = get_task_status,
op_args = ['YOUR_DAG_NAME', 'YOUR_DAG_TASK_WITHIN_THE_DAG'],
do_xcom_push = False
)
t4 = DummyOperator(
task_id = 'end'
)
t1 >> t2 >> t3 >> t4
I tried below code but still i am getting issue
from airflow.models DagModel
def get_latest_execution_date(**kwargs):
session = airflow.settings.Session()
f = open("/home/Insurance/InsuranceDagsTimestamp.txt","w+")
try:
Insurance_last_dag_run = session.query(DagModel)
for Insdgrun in Insurance_last_dag_run:
if Insdgrun is None:
f.write(Insdgrun.dag_id+",9999-12-31"+"\n")
else:
f.write(Insdgrun.dag_id+","+ Insdgrun.execution_date+"\n")
except:
session.rollback()
finally:
session.close()
t1 = PythonOperator(
task_id='records',
provide_context=True,
python_callable=get_latest_execution_date,
dag=dag)
Is there any way how to fix and get the latest dag run time information
There are multiple ways to get the most recent execution of a DagRun. One way is to make use of the Airflow DagRun model.
from airflow.models import DagRun
def get_most_recent_dag_run(dag_id):
dag_runs = DagRun.find(dag_id=dag_id)
dag_runs.sort(key=lambda x: x.execution_date, reverse=True)
return dag_runs[0] if dag_runs else None
dag_run = get_most_recent_dag_run('fake-dag-id-001')
if dag_run:
print(f'The most recent DagRun was executed at: {dag_run.execution_date}')
You can find more info on the DagRun model and it's properties in the Airflow Docs located here.
The PythonOperator op_args parameter is templatized.
The callable only writes the latest execution date to a file so you can implement the function the following way:
def store_last_execution_date(execution_date):
'''Appends latest execution date to a file
:param execution_date: The last execution date of the DagRun.
'''
with open("/home/Insurance/InsuranceDagsTimestamp.txt", "w+") as f:
f.write(execution_date)
t1 = PythonOperator(
task_id="records",
provide_context=True,
python_callable=store_last_execution_date,
op_args=[
"{{dag.get_latest_execution_date()}}",
],
dag=dag
)
I have a stored XCom value that I wanted to pass to another python function which is not called using PythonOperator.
def sql_file_template():
<some code which uses xcom variable>
def call_stored_proc(**kwargs):
#project = kwargs['row_id']
print("INSIDE CALL STORE PROC ------------")
query = """CALL `{0}.dataset_name.store_proc`(
'{1}' # source table
, ['{2}'] # row_ids
, '{3}' # pivot_col_name
, '{4}' # pivot_col_value
, 100 # max_columns
, 'MAX' # aggregation
);"""
query = query.format(kwargs['project'],kwargs['source_tbl'] ,kwargs['row_id'],kwargs['pivot_col'],kwargs['pivot_val'])
job = client.query(query, location="US")
for result in job.result():
task_instance = kwargs['task_instance']
task_instance.xcom_push(key='query_string', value=result)
print result
return result
bq_cmd = PythonOperator (
task_id= 'task1'
provide_context= True,
python_callable= call_stored_proc,
op_kwargs= {'project' : project,
'source_tbl' : source_tbl,
'row_id' : row_id,
'pivot_col' : pivot_col,
'pivot_val' : pivot_val
},
dag= dag
)
dummy_operator >> bq_cmd
sql_file_template()
The output of stored proc is a string which is captured using xcom.
Now I would like to pass this value to some python function sql_file_template without using PythonOperator.
As per Airflow documentation xcom can be accessed only between tasks.
Can anyone help on this?
If you have access to the Airflow installation you'd like to query (configuration, database access, and code) you can use Airflow's airflow.models.XCom:get_one class method:
from datetime import datetime
from airflow.models import XCom
execution_date = datetime(2020, 8, 28)
xcom_value = XCom.get_one(execution_date=execution_date,
task_id="the_task_id",
dag_id="the_dag_id")
So you want to access XCOM outside Airflow (probably a different project / module, without creating any Airflow DAGs / tasks)?
Airflow uses SQLAlchemy for mapping all it's models (including XCOM) to corresponding SQLAlchemy backend (meta-db) tables
Therefore this can be done in two ways
Leverage Airflow's SQLAlchemy model
(without having to create a task or DAG). Here's an untested code snippet for reference
from typing import List
from airflow.models import XCom
from airflow.settings import Session
from airflow.utils.db import provide_session
from pendulum import Pendulum
#provide_session
def read_xcom_values(dag_id: str,
task_id: str,
execution_date: Pendulum,
session: Optional[Session]) -> List[str]:
"""
Function that reads and returns 'values' of XCOMs with given filters
:param dag_id:
:param task_id:
:param execution_date: datetime object
:param session: Airflow's SQLAlchemy Session (this param must not be passed, it will be automatically supplied by
'#provide_session' decorator)
:return:
"""
# read XCOMs
xcoms: List[XCom] = session.query(XCom).filter(
XCom.dag_id == dag_id, XCom.task_id == task_id,
XCom.execution_date == execution_date).all()
# retrive 'value' fields from XCOMs
xcom_values: List[str] = list(map(lambda xcom: xcom.value, xcoms))
return xcom_values
Do note that since it is importing airflow packages, it still requires working airflow installation on python classpath (as well as connection to backend-db), but here we are not creating any tasks or dags (this snippet can be run in a standalone python file)
For this snippet, I have referred to views.py which is my favorite place to peek into Airflow's SQLAlchemy magic
Directly query Airflow's SQLAlchemy backend meta-db
Connect to meta db and run this query
SELECT value FROM xcom WHERE dag_id='' AND task_id='' AND ..
I am working on some simple Apache Airflow DAG. My goal is to:
1. calculate the data parameter based on the DAG run date - I try achieve that by the Python operator.
2. pass the parameter calculated above as a bq query parameter.
Any ideas are welcom.
My code below - I have marked the two points with I am struggling with by the 'TODO' label.
...
def set_date_param(dag_run_time):
# a business logic applied here
....
return "2020-05-28" # example result
# --------------------------------------------------------
# DAG definition below
# --------------------------------------------------------
# Python operator
set_data_param = PythonOperator(
task_id='set_data_param',
python_callable=set_data_param,
provide_cotext=True,
op_kwargs={
"dag_run_date": #TODO - how to pass the DAG running date as a function input parameter
},
dag=dag
)
# bq operator
load_data_to_bq_table = BigQueryOperator(
task_id='load_data_to_bq_table',
sql="""SELECT ccustomer_id, sales
FROM `my_project.dataset1.table1`
WHERE date_key = {date_key_param}
""".format(
date_key_param =
), #TODO - how to get the python operator results from the previous step
use_legacy_sql=False,
destination_dataset_table="my_project.dataset2.table2}",
trigger_rule='all_success',
dag=dag
)
set_data_param >> load_data_to_bq_table
For PythonOperator to pass the execution date to the python_callable, you only need to set provide_cotext=True (as it has been already done in your example). This way, Airflow automatically passes a collection of keyword arguments to the python callable, such that the names and values of these arguments are equivalent to the template variables described here. That is, if you define the python callable as set_data_param(ds, **kwargs): ..., the ds parameter will automatically get the execution date as a string value in the format YYYY-MM-DD.
XCOM allows task instances to exchange messages. To use the date returned by set_date_param() inside the sql query string of BigQueryOperator, you can combine XCOM with Jinja templating:
sql="""SELECT ccustomer_id, sales
FROM `my_project.dataset1.table1`
WHERE date_key = {{ task_instance.xcom_pull(task_ids='set_data_param') }}
"""
The following complete example puts all pieces together. In the example, the get_date task creates a date string based on the execution date. After that, the use_date task uses XCOM and Jinja templating to retrieve the date string and writes it to a log.
import logging
from airflow import DAG
from airflow.operators.python_operator import PythonOperator
from airflow.utils.dates import days_ago
default_args = {'start_date': days_ago(1)}
def calculate_date(ds, execution_date, **kwargs):
return f'{ds} ({execution_date.strftime("%m/%d/%Y")})'
def log_date(date_string):
logging.info(date_string)
with DAG(
'a_dag',
schedule_interval='*/5 * * * *',
default_args=default_args,
catchup=False,
) as dag:
get_date = PythonOperator(
task_id='get_date',
python_callable=calculate_date,
provide_context=True,
)
use_date = PythonOperator(
task_id='use_date',
python_callable=log_date,
op_args=['Date: {{ task_instance.xcom_pull(task_ids="get_date") }}'],
)
get_date >> use_date
Is there any way to make a user-defined macro in Airflow which is itself computed from other macros?
from airflow import DAG
from airflow.operators.bash_operator import BashOperator
dag = DAG(
'simple',
schedule_interval='0 21 * * *',
user_defined_macros={
'next_execution_date': '{{ dag.following_schedule(execution_date) }}',
},
)
task = BashOperator(
task_id='bash_op',
bash_command='echo "{{ next_execution_date }}"',
dag=dag,
)
The use case here is to back-port the new Airflow v1.8 next_execution_date macro to work in Airflow v1.7. Unfortunately, this template is rendered without macro expansion:
$ airflow render simple bash_op 2017-08-09 21:00:00
# ----------------------------------------------------------
# property: bash_command
# ----------------------------------------------------------
echo "{{ dag.following_schedule(execution_date) }}"
Here are some solutions:
1. Override BashOperator to add some values to the context
class NextExecutionDateAwareBashOperator(BashOperator):
def render_template(self, attr, content, context):
dag = context['dag']
execution_date = context['execution_date']
context['next_execution_date'] = dag.following_schedule(execution_date)
return super().render_templates(attr, content, context)
# or in python 2:
# return super(NextExecutionDateAwareBashOperator, self).render_templates(attr, content, context)
The good part with this approach: you can capture some repeated code in your custom operator.
The bad part: you have to write a custom operator to add values to the context, before templated fields are rendered.
2. Do your computation in a user defined macro
Macros are not necessarily values. They can be functions.
In your dag :
def compute_next_execution_date(dag, execution_date):
return dag.following_schedule(execution_date)
dag = DAG(
'simple',
schedule_interval='0 21 * * *',
user_defined_macros={
'next_execution_date': compute_next_execution_date,
},
)
task = BashOperator(
task_id='bash_op',
bash_command='echo "{{ next_execution_date(dag, execution_date) }}"',
dag=dag,
)
The good part: you can define reusable functions to process values available at runtime (XCom values, job instance properties, task instance properties, etc...), and make your function result available to render a template.
The bad part (but not that annoying): you have to import such a function as a user defined macro in every dag where needed.
3. Call your statement directly in your template
This solution is the simplest (as mentioned by Ardan's answer), and probably the good one in your case.
BashOperator(
task_id='bash_op',
bash_command='echo "{{ dag.following_schedule(execution_date) }}"',
dag=dag,
)
Ideal for simple calls like this one. And they are some other objects directly available as macros (like task, task_instance, etc...); even some standard modules are available (like macros.time, ...).
I would vote for making Airflow Plugin to inject your pre-defined macros.
Using this method, you can use your pre-defined macro in any Operator without declare anything.
Below are some custom macros that we're using.
Example using: {{ macros.dagtz_next_execution_date(ti) }}
from airflow.plugins_manager import AirflowPlugin
from datetime import datetime, timedelta
from airflow.utils.db import provide_session
from airflow.models import DagRun
import pendulum
#provide_session
def _get_dag_run(ti, session=None):
"""Get DagRun obj of the TaskInstance ti
Args:
ti (TYPE): the TaskInstance object
session (None, optional): Not in use
Returns:
DagRun obj: the DagRun obj of the TaskInstance ti
"""
task = ti.task
dag_run = None
if hasattr(task, 'dag'):
dag_run = (
session.query(DagRun)
.filter_by(
dag_id=task.dag.dag_id,
execution_date=ti.execution_date)
.first()
)
session.expunge_all()
session.commit()
return dag_run
def ds_add_no_dash(ds, days):
"""
Add or subtract days from a YYYYMMDD
:param ds: anchor date in ``YYYYMMDD`` format to add to
:type ds: str
:param days: number of days to add to the ds, you can use negative values
:type days: int
>>> ds_add('20150101', 5)
'20150106'
>>> ds_add('20150106', -5)
'20150101'
"""
ds = datetime.strptime(ds, '%Y%m%d')
if days:
ds = ds + timedelta(days)
return ds.isoformat()[:10].replace('-', '')
def dagtz_execution_date(ti):
"""get the TaskInstance execution date (in DAG timezone) in pendulum obj
Args:
ti (TaskInstance): the TaskInstance object
Returns:
pendulum obj: execution_date in pendulum object (in DAG tz)
"""
execution_date_pdl = pendulum.instance(ti.execution_date)
dagtz_execution_date_pdl = execution_date_pdl.in_timezone(ti.task.dag.timezone)
return dagtz_execution_date_pdl
def dagtz_next_execution_date(ti):
"""get the TaskInstance next execution date (in DAG timezone) in pendulum obj
Args:
ti (TaskInstance): the TaskInstance object
Returns:
pendulum obj: next execution_date in pendulum object (in DAG tz)
"""
# For manually triggered dagruns that aren't run on a schedule, next/previous
# schedule dates don't make sense, and should be set to execution date for
# consistency with how execution_date is set for manually triggered tasks, i.e.
# triggered_date == execution_date.
dag_run = _get_dag_run(ti)
if dag_run and dag_run.external_trigger:
next_execution_date = ti.execution_date
else:
next_execution_date = ti.task.dag.following_schedule(ti.execution_date)
next_execution_date_pdl = pendulum.instance(next_execution_date)
dagtz_next_execution_date_pdl = next_execution_date_pdl.in_timezone(ti.task.dag.timezone)
return dagtz_next_execution_date_pdl
def dagtz_next_ds(ti):
"""get the TaskInstance next execution date (in DAG timezone) in YYYY-MM-DD string
"""
dagtz_next_execution_date_pdl = dagtz_next_execution_date(ti)
return dagtz_next_execution_date_pdl.strftime('%Y-%m-%d')
def dagtz_next_ds_nodash(ti):
"""get the TaskInstance next execution date (in DAG timezone) in YYYYMMDD string
"""
dagtz_next_ds_str = dagtz_next_ds(ti)
return dagtz_next_ds_str.replace('-', '')
def dagtz_prev_execution_date(ti):
"""get the TaskInstance previous execution date (in DAG timezone) in pendulum obj
Args:
ti (TaskInstance): the TaskInstance object
Returns:
pendulum obj: previous execution_date in pendulum object (in DAG tz)
"""
# For manually triggered dagruns that aren't run on a schedule, next/previous
# schedule dates don't make sense, and should be set to execution date for
# consistency with how execution_date is set for manually triggered tasks, i.e.
# triggered_date == execution_date.
dag_run = _get_dag_run(ti)
if dag_run and dag_run.external_trigger:
prev_execution_date = ti.execution_date
else:
prev_execution_date = ti.task.dag.previous_schedule(ti.execution_date)
prev_execution_date_pdl = pendulum.instance(prev_execution_date)
dagtz_prev_execution_date_pdl = prev_execution_date_pdl.in_timezone(ti.task.dag.timezone)
return dagtz_prev_execution_date_pdl
def dagtz_prev_ds(ti):
"""get the TaskInstance prev execution date (in DAG timezone) in YYYY-MM-DD string
"""
dagtz_prev_execution_date_pdl = dagtz_prev_execution_date(ti)
return dagtz_prev_execution_date_pdl.strftime('%Y-%m-%d')
def dagtz_prev_ds_nodash(ti):
"""get the TaskInstance prev execution date (in DAG timezone) in YYYYMMDD string
"""
dagtz_prev_ds_str = dagtz_prev_ds(ti)
return dagtz_prev_ds_str.replace('-', '')
# Defining the plugin class
class AirflowTestPlugin(AirflowPlugin):
name = "custom_macros"
macros = [dagtz_execution_date, ds_add_no_dash,
dagtz_next_execution_date, dagtz_next_ds, dagtz_next_ds_nodash,
dagtz_prev_execution_date, dagtz_prev_ds, dagtz_prev_ds_nodash]
user_defined_macros are not processed as templates by default. If you want to keep a template in a user_defined_macro (or if you use a template in a params variable), you can always re-run the templating function manually:
class DoubleTemplatedBashOperator(BashOperator):
def pre_execute(self, context):
context['ti'].render_templates()
And this will work for templates that don't also reference other parameters or UDMs. This way, you can have "two-deep" templates.
Or put your UDM directly in the BashOperator's command instead (the easiest solution):
BashOperator(
task_id='bash_op',
bash_command='echo "{{ dag.following_schedule(execution_date) }}"',
dag=dag,
)
None of these was working for me so heres what I did, I used the user_defined_macros but I pass all template variables to my macro and then I use jinja to render the result
MACRO_CONFIG = 'config({"data_interval_start": data_interval_start, "data_interval_end": data_interval_end, "ds": ds, "ds_nodash": ds_nodash, "ts": ts, "ts_nodash_with_tz": ts_nodash_with_tz, "ts_nodash": ts_nodash, "prev_data_interval_start_success": prev_data_interval_start_success, "prev_data_interval_end_success": prev_data_interval_end_success, "dag": dag, "task": task, "macros": macros, "task_instance": task_instance, "ti": ti, "params": params, "conn": conn, "task_instance_key_str": task_instance_key_str, "conf": conf, "run_id": run_id, "dag_run": dag_run, "test_mode": test_mode})'
def config_macro(context):
return FunctionThatReturnsTemplates(context)
with DAG(
'my-dag-id',
schedule_interval=None,
start_date=days_ago(1),
user_defined_macros={'config': config_macro}
) as dag:
...
def config_macro_template(attr_name):
return '{{' + MACRO_CONFIG + '.' + attr_name + '}}'
class FunctionThatReturnsTemplates(object):
def __getattribute__(self, name):
attr = object.__getattribute__(self, name)
logging.info('attr')
logging.info(attr)
logging.info("type(attr)")
logging.info(type(attr))
if callable(attr):
logging.info('method attr')
def render_result(*args, **kwargs):
logging.info('before calling %s' % attr.__name__)
result = attr(*args, **kwargs)
logging.info('done calling %s' % attr.__name__)
return Template(result).render(**self.context) if isinstance(result, str) or isinstance(result, unicode) else result
return render_result
logging.info('attr is not method')
if isinstance(attr, str) or isinstance(attr, unicode):
logging.info('attr is string or unicode')
result = Template(attr).render(**self.context)
logging.info(result)
logging.info("result")
return result
return attr
def __init__(self, context):
logging.info('from sampling pipeline context')
logging.info(context)
self.context = context
...
my_task = SomeOperator(
templated_field=config_macro_template('function(args)'),
task_id='my-task-id'
)