Airflow Xcom not getting resolved return task_instance string - airflow

I am facing an odd issue with xcom_pull where it is always returning back a xcom_pull string
"{{ task_instance.xcom_pull(dag_id = 'cf_test',task_ids='get_config_val',key='http_con_id') }}"
My requirement is simple I have pushed an xcom using python operator and with xcom_pull I am trying to retrieve the value and pass it as an http_conn_id for SimpleHttpOperator but the variable is returning a string instead of resolving xcom_pull value.
Python Operator is successfully able to push XCom.
Code:
from datetime import datetime
import simplejson as json
from airflow.models import DAG
from airflow.operators.dummy_operator import DummyOperator
from airflow.operators.python_operator import PythonOperator
from airflow.providers.http.operators.http import SimpleHttpOperator
from google.auth.transport.requests import Request
default_airflow_args = {
"owner": "divyaansh",
"depends_on_past": False,
"start_date": datetime(2022, 5, 18),
"retries": 0,
"schedule_interval": "#hourly",
}
project_configs = {
"project_id": "test",
"conn_id": "google_cloud_storage_default",
"bucket_name": "test-transfer",
"folder_name": "processed-test-rdf",
}
def get_config_vals(**kwargs) -> dict:
"""
Get config vals from airlfow variable and store it as xcoms
"""
task_instance = kwargs["task_instance"]
task_instance.xcom_push(key="http_con_id", value="gcp_cloud_function")
def generate_api_token(cf_name: str):
"""
generate token for api request
"""
import google.oauth2.id_token
request = Request()
target_audience = f"https://us-central1-test-a2h.cloudfunctions.net/{cf_name}"
return google.oauth2.id_token.fetch_id_token(
request=request, audience=target_audience
)
with DAG(
dag_id="cf_test",
default_args=default_airflow_args,
catchup=False,
render_template_as_native_obj=True,
) as dag:
start = DummyOperator(task_id="start")
config_vals = PythonOperator(
task_id="get_config_val", python_callable=get_config_vals, provide_context=True
)
ip_data = json.dumps(
{
"bucket_name": project_configs["bucket_name"],
"file_name": "dummy",
"target_location": "/valid",
}
)
conn_id = "{{ task_instance.xcom_pull(dag_id = 'cf_test',task_ids='get_config_val',key='http_con_id') }}"
api_token = generate_api_token("new-cp")
cf_task = SimpleHttpOperator(
task_id="file_decrypt_and_validate_cf",
http_conn_id=conn_id,
method="POST",
endpoint="new-cp",
data=json.dumps(
json.dumps(
{
"bucket_name": "test-transfer",
"file_name": [
"processed-test-rdf/dummy_20220501.txt",
"processed-test-rdf/dummy_20220502.txt",
],
"target_location": "/valid",
}
)
),
headers={
"Authorization": f"bearer {api_token}",
"Content-Type": "application/json",
},
do_xcom_push=True,
log_response=True,
)
print("task new-cp", cf_task)
check_flow = DummyOperator(task_id="check_flow")
end = DummyOperator(task_id="end")
start >> config_vals >> cf_task >> check_flow >> end
Error Message:
raise AirflowNotFoundException(f"The conn_id `{conn_id}` isn't defined") airflow.exceptions.AirflowNotFoundException: The conn_id `"{{ task_instance.xcom_pull(dag_id = 'cf_test',task_ids='get_config_val',key='http_con_id') }}"` isn't defined
I have tried several different days but nothing seems to be working.
Can someone point me to the right direction here.
Airflow-version : 2.2.3
Composer-version : 2.0.11

In SimpleHttpOperator the http_conn_id parameter is not templated field thus you can not use Jinja engine with it. This means that this parameter can not be rendered. So when you pass "{{ task_instance.xcom_pull(dag_id = 'cf_test',task_ids='get_config_val',key='http_con_id') }}" to the operator you expect it to be replaced during runtime with the value stored in Xcom by previous task but in fact Airflow consider it just as a regular string this is also what the exception tells you. It actually try to search a connection with the name of your very long string but couldn't find it so it tells you that the connection is not defined.
To solve it you can create a custom operator:
class MySimpleHttpOperator(SimpleHttpOperator):
template_fields = SimpleHttpOperator.template_fields + ("http_conn_id",)
Then you should replace SimpleHttpOperator with MySimpleHttpOperator in your DAG.
This change makes the string that you set in http_conn_id to be passed via the Jinja engine. So in your case the string will be replaced with the Xcom value as you expect.

Related

Airflow - getting the execution_date in task when calling an Operator

I have this Operator, its pretty much the same as S3CopyObjectOperator except it looks for all objects in a folder and copies to a destination folder.
import os
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
from airflow.utils.decorators import apply_defaults
from common.s3.partition import Partition, PartitionType
from airflow.models import BaseOperator
import logging
class S3CopyObjectsOperator(BaseOperator):
#apply_defaults
def __init__(self,
aws_conn_id: str,
partition: Partition,
s3_bucket: str,
dest_prefix: str,
*args,
**kwargs):
super(S3CopyObjectsOperator, self).__init__(*args, **kwargs)
self.aws_conn_id = aws_conn_id
self.partition = partition
self.s3_bucket = s3_bucket
self.dest_prefix = dest_prefix
def execute(self, context):
self.partition.partition_value = context.get("execution_date")
logging.info(f'self.dest_prefix: {self.dest_prefix}')
exec_date = context.get("execution_date")
logging.info(f'self.partition.partition_value: {self.partition.partition_value}')
s3 = S3Hook(self.aws_conn_id)
s3_conn = s3.get_conn()
logging.info(f'source bucket -- self.partition.bucket: {self.partition.bucket}')
logging.info(f'source key -- self.partition.key_prefix: {self.partition.key_prefix}')
source_keys = s3.list_keys(bucket_name=self.partition.bucket, prefix=self.partition.key_prefix, delimiter="/")
logging.info(f'keys: {source_keys}')
for file in source_keys:
prefix, filename = os.path.split(file)
dest_key = f'{self.dest_prefix}/{filename}'
logging.info(f'Copying file {filename} to {self.dest_prefix}')
key = self.partition.key_prefix + filename
logging.info(f'key: {key}')
s3_conn.copy_object(Bucket=self.s3_bucket,
Key=f'{dest_key}',
CopySource={
'Bucket': self.partition.bucket,
'Key': key
}, ContentEncoding='csv')
However when I use this operator in my task I need my dest_prefix to include the execution date.
Things I've tried:
I've tried adding ds = '{{ ds_nodash }}' in the dag file but when I print self.dest_prefix in the Operator the value it returns he string value and not the execution date.
I've also tried creating a function but when I print self.dest_prefix in the Operator the value it returns is: self.dest_prefix: <function exec_value at 0x7fd008fcb940> See below for my task:
the execution date should be after snapshot_date=
for data_group in data_group_names:
copy_felix_to_s3 = S3CopyObjectsOperator(
task_id=f'copy_felix_{data_group}_data_to_s3',
aws_conn_id='aws_default',
s3_bucket='bucket_name',
partition=felixS3Partition(
bucket='source_bucket',
location_base=f'our_bucket/{data_group}',
partition_type=None
),
dest_prefix=f"felix/{data_group}/snapshot_date= ds",
dag=dag
)
copy_felix_to_s3
You are missing declaration of the parameter as templated field.
class S3CopyObjectsOperator(BaseOperator):
...
template_fields = ("dest_prefix",)
...
Macros (such as ds_nodash) are available only for templated fields thus if you don't specify template_fields it will handle the value you pass as string and it will not be rendered.

DatabricksRunOperator Execution date

opr_run_now = DatabricksRunNowOperator(
task_id = 'run_now',
databricks_conn_id = 'databricks_default',
job_id = 754377,
notebook_params = meta_data,
dag = dag
) here
Is there way to pass execution date using databricks run operator.
What do you want to pass the execution_date to? What are you trying to achieve in the end? The following doc was helpful for me:
https://www.astronomer.io/guides/airflow-databricks
And here is an example where I am passing execution_date to be used in a python file run in Databricks. I'm capturing the execution_date using sys.argv.
from airflow import DAG
from airflow.providers.databricks.operators.databricks import (
DatabricksRunNowOperator,
)
from datetime import datetime, timedelta
spark_python_task = {
"python_file": "dbfs:/FileStore/sandbox/databricks_test_python_task.py"
}
# Define params for Run Now Operator
python_params = [
"{{ execution_date }}",
"{{ execution_date.subtract(hours=1) }}",
]
default_args = {
"owner": "airflow",
"depends_on_past": False,
"email_on_failure": False,
"email_on_retry": False,
"retries": 1,
"retry_delay": timedelta(minutes=2),
}
with DAG(
dag_id="databricks_dag",
start_date=datetime(2022, 3, 11),
schedule_interval="#hourly",
catchup=False,
default_args=default_args,
max_active_runs=1,
) as dag:
opr_run_now = DatabricksRunNowOperator(
task_id="run_now",
databricks_conn_id="Databricks",
job_id=2060,
python_params=python_params,
)
opr_run_now
There are two ways to set DatabricksRunOperator. One with named arguments (as you did) - which doesn't support templating. The second way is to use JSON payload that you typically use to call the api/2.0/jobs/run-now - This way also gives you the ability to pass execution_date as the json parameter is templated.
notebook_task_params = {
'new_cluster': new_cluster,
'notebook_task': {
'notebook_path': '/test-{{ ds }}',
}
DatabricksSubmitRunOperator(task_id='notebook_task', json=notebook_task_params)
For more information see the operator docs.

How to pass previous task state as parameter to another task within the Airflow Taskflow API?

I want to get the status of a SparkSubmitOperator, transform it to some value that my API understands and pass it within the payload of a SimpleHttpOperator so that I can update the job status inside my DB. I want to do this by using Taskflow API.
I wrote the code below but I get this error when I try to load it:
Broken DAG: [/opt/airflow/dags/export/inapp_clicks/export.py] Traceback (most recent call last):
File "/home/airflow/.local/lib/python3.6/site-packages/airflow/models/baseoperator.py", line 1378, in set_downstream
self._set_relatives(task_or_task_list, upstream=False, edge_modifier=edge_modifier)
File "/home/airflow/.local/lib/python3.6/site-packages/airflow/models/baseoperator.py", line 1316, in _set_relatives
task_object.update_relative(self, not upstream)
AttributeError: 'function' object has no attribute 'update_relative'
Code:
from datetime import datetime
from airflow.decorators import dag, task
from airflow.models import Variable
from airflow.providers.apache.spark.operators.spark_submit import SparkSubmitOperator
#dag(schedule_interval=None, start_date=datetime.now(), tags=["export", "inapp"])
def export_inapp_clicks():
DEFAULT_NUM_EXECUTORS = 2
DEFAULT_EXECUTOR_CORES = 3
DEFAULT_EXECUTOR_MEMORY = "2g"
DEFAULT_DRIVER_MEMORY = "1g"
#task()
def update_job_status(dag, ti, execution_date):
jst = dag.get_task("export_inapp_clicks_job_submission")
jsti = TaskInstance(jst, execution_date)
xcom_value = ti.xcom_pull(task_ids="export_inapp_clicks_job_submission")
print("Task:", jst)
print("Task Instance:", jsti)
print("Task State:", jsti.current_state())
print("XCOM Value:", xcom_value)
# TODO: call API via SimpleHttpOperator
job_submission = SparkSubmitOperator(
task_id="export_inapp_clicks_job_submission",
conn_id="yarn",
name="{{ dag_run.conf['name'] }}",
conf=Variable.get("export_inapp_clicks_conf", deserialize_json=True),
jars=Variable.get("export_inapp_clicks_jars"),
application=Variable.get("pyspark_executor_path"),
application_args=[
"--module",
"export_inapp_clicks.export",
"--org-id",
"{{ dag_run.conf['orgId'] }}",
"--app-id",
"{{ dag_run.conf['appId'] }}",
"--inapp-id",
"{{ dag_run.conf['inappId'] }}",
"--start-date",
"{{ dag_run.conf['startDate'] }}",
"--end-date",
"{{ dag_run.conf['endDate'] }}",
"--data-path",
Variable.get("event_data_path"),
"--es-nodes",
Variable.get("es_nodes"),
"--destination",
Variable.get("export_inapp_clicks_output"),
"--explain",
"--debug",
"--encode-columns",
"--log-level",
"WARN"
],
py_files=Variable.get("export_inapp_clicks_py_files"),
num_executors=Variable.get("export_inapp_clicks_num_executors", DEFAULT_NUM_EXECUTORS),
executor_cores=Variable.get("export_inapp_clicks_executor_cores", DEFAULT_EXECUTOR_CORES),
executor_memory=Variable.get("export_inapp_clicks_executor_memory", DEFAULT_EXECUTOR_MEMORY),
driver_memory=Variable.get("export_inapp_clicks_driver_memory", DEFAULT_DRIVER_MEMORY),
status_poll_interval=10
)
job_submission >> update_job_status
export_dag = export_inapp_clicks()
Consider the following example, the first task will correspond to your SparkSubmitOperator task:
_get_upstream_task Takes care of getting the state of the first task from the second one, by performing a query to the metadata database:
DAG definition, first two task:
import json
from airflow.decorators import dag, task
from airflow.utils.dates import days_ago
from airflow.utils.db import provide_session
from airflow.models.taskinstance import TaskInstance
from airflow.providers.http.operators.http import SimpleHttpOperator
#dag(
default_args= {"owner": "airflow"},
schedule_interval=None,
start_date=days_ago(0),
catchup=False,
tags=["custom_example", "TaskFlow"],
)
def taskflow_previous_task():
#provide_session
def _get_upstream_task(upstream_task_id, dag, execution_date, session=None, **_):
upstream_ti = (
session.query(TaskInstance)
.filter(
TaskInstance.dag_id == dag.dag_id,
TaskInstance.execution_date == execution_date,
TaskInstance.task_id == upstream_task_id,
)
.first()
)
return upstream_ti
#task
def job_submission_task(**context):
print(f"Task Id: {context['ti'].task_id}")
return {"job_data": "something"}
#task(trigger_rule='all_done')
def update_job_status(job_data, **context):
print(f"Data from previous Task: {job_data['job_data']}")
upstream_ti = _get_upstream_task("job_submission_task", **context)
print(f"Upstream_ti state: {upstream_ti.state}")
return upstream_ti.state
job_results = job_submission_task()
job_status = update_job_status(job_results)
job_submission_task returns a dict that is passed to update_job_status via Xcoms using XcomArg which is a main feature of Taskflow API. By doing so you get to avoid explicitly perfoming xcom_pull() and xcom_push() operations.
Once you get the TaskInstance object from _get_upstream_task method, you can return it and retrieve it again from the last task wich will perfom the HTTP request:
Final task, end of DAG definition:
task_post_op = SimpleHttpOperator(
task_id="post_op",
endpoint="post",
data=json.dumps({"job_status": f"{job_status}"}),
headers={"Content-Type": "application/json"},
log_response=True,
)
job_status >> task_post_op
example_dag = taskflow_previous_task()
Since the param data of SimpleHttpOperator is templated, you can retrieve the Xcom value from the second task using Jinja:
data=json.dumps({"job_status": f"{job_status}"}),
Execution logs:
Task_1:
AIRFLOW_CTX_DAG_RUN_ID=manual__2021-08-20T23:15:15.226853+00:00
[2021-08-20 23:15:17,148] {logging_mixin.py:104} INFO - Task Id: job_submission_task
[2021-08-20 23:15:17,148] {python.py:151} INFO - Done. Returned value was: {'job_data': 'something'}
[2021-08-20 23:15:17,202] {taskinstance.py:1211} INFO - Marking task as SUCCESS.
Task_2:
AIRFLOW_CTX_DAG_ID=taskflow_previous_task
AIRFLOW_CTX_TASK_ID=update_job_status
AIRFLOW_CTX_EXECUTION_DATE=2021-08-20T23:15:15.226853+00:00
AIRFLOW_CTX_DAG_RUN_ID=manual__2021-08-20T23:15:15.226853+00:00
[2021-08-20 23:15:18,768] {logging_mixin.py:104} INFO - Data from previous Task: something
[2021-08-20 23:15:18,792] {logging_mixin.py:104} INFO - Upstream_ti state: success
[2021-08-20 23:15:18,793] {python.py:151} INFO - Done. Returned value was: success
[2021-08-20 23:15:18,874] {taskinstance.py:1211} INFO - Marking task as SUCCESS.
Task_3:
AIRFLOW_CTX_DAG_ID=taskflow_previous_task
AIRFLOW_CTX_TASK_ID=post_op
AIRFLOW_CTX_EXECUTION_DATE=2021-08-20T23:15:15.226853+00:00
AIRFLOW_CTX_DAG_RUN_ID=manual__2021-08-20T23:15:15.226853+00:00
[2021-08-20 23:15:21,201] {http.py:111} INFO - Calling HTTP method
[2021-08-20 23:15:21,228] {base.py:78} INFO - Using connection to: id: http_default. Host: https://www.httpbin.org, Port: None, Schema: , Login: , Password: None, extra: {}
[2021-08-20 23:15:21,245] {http.py:140} INFO - Sending 'POST' to url: https://www.httpbin.org/post
[2021-08-20 23:15:21,973] {http.py:115} INFO - {
"args": {},
"data": "{\"job_status\": \"success\"}",
"files": {},
"form": {},
"headers": {
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate",
"Content-Length": "25",
"Content-Type": "application/json",
"Host": "www.httpbin.org",
"User-Agent": "python-requests/2.25.1",
"X-Amzn-Trace-Id": "Root=1-61203789-0136b7557ba4e0116bb5e16d"
},
"json": {
"job_status": "success"
},
"origin": "200.73.153.254",
"url": "https://www.httpbin.org/post"
}
[2021-08-20 23:15:22,027] {taskinstance.py:1211} INFO - Marking task as SUCCESS.
Let me know if that worked for you, I tried to use as many Taskflow features as I could.
Source: Docs1 Docs2
Edit:
Added trigger_rule='all_done' to update_job_status task.

Jinja Template Variable Email ID not rendering when using ON_FAILURE_CALLBACK

Need help on rendering the jinja template email ID in the On_failure_callback.
I understand that rendering templates work fine in the SQL file or with the operator having template_fields .How do I get below code rendered the jinja template variable
It works fine with Variable.get('email_edw_alert'), but I don't want to use Variable method to avoid hitting DB
Below is the Dag file
import datetime
import os
from functools import partial
from datetime import timedelta
from airflow.models import DAG,Variable
from airflow.contrib.operators.snowflake_operator import SnowflakeOperator
from alerts.email_operator import dag_failure_email
def get_db_dag(
*,
dag_id,
start_date,
schedule_interval,
max_taskrun,
max_dagrun,
proc_nm,
load_sql
):
default_args = {
'owner': 'airflow',
'start_date': start_date,
'provide_context': True,
'execution_timeout': timedelta(minutes=max_taskrun),
'retries': 0,
'retry_delay': timedelta(minutes=3),
'retry_exponential_backoff': True,
'email_on_retry': False,
}
dag = DAG(
dag_id=dag_id,
schedule_interval=schedule_interval,
dagrun_timeout=timedelta(hours=max_dagrun),
template_searchpath=tmpl_search_path,
default_args=default_args,
max_active_runs=1,
catchup='{{var.value.dag_catchup}}',
on_failure_callback=partial(dag_failure_email, config={'email_address': '{{var.value.email_edw_alert}}'}),
)
load_table = SnowflakeOperator(
task_id='load_table',
sql=load_sql,
snowflake_conn_id=CONN_ID,
autocommit=True,
dag=dag,
)
load_table
return dag
# ======== DAG DEFINITIONS #
edw_table_A = get_db_dag(
dag_id='edw_table_A',
start_date=datetime.datetime(2020, 5, 21),
schedule_interval='0 5 * * *',
max_taskrun=3, # Minutes
max_dagrun=1, # Hours
load_sql='recon/extract.sql',
)
Below is the python code alerts.email_operator
import logging
from airflow.utils.email import send_email
from airflow.models import Variable
logger = logging.getLogger(__name__)
TIME_FORMAT = "%Y-%m-%d %H:%M:%S"
def dag_failure_email(context, config=None):
config = {} if config is None else config
task_id = context.get('task_instance').task_id
dag_id = context.get("dag").dag_id
execution_time = context.get("execution_date").strftime(TIME_FORMAT)
reason = context.get("exception")
alerting_email_address = config.get('email_address')
dag_failure_html_body = f"""<html>
<header><title>The following DAG has failed!</title></header>
<body>
<b>DAG Name</b>: {dag_id}<br/>
<b>Task Id</b>: {task_id}<br/>
<b>Execution Time (UTC)</b>: {execution_time}<br/>
<b>Reason for Failure</b>: {reason}<br/>
</body>
</html>
"""
try:
if reason != 'dagrun_timeout':
send_email(
to=alerting_email_address,
subject=f"Airflow alert: <DagInstance: {dag_id} - {execution_time} [failed]",
html_content=dag_failure_html_body,
)
except Exception as e:
logger.error(
f'Error in sending email to address {alerting_email_address}: {e}',
exc_info=True,
)
I have also tried other way too , even below one is not working
try:
if reason != 'dagrun_timeout':
send_email = EmailOperator(
to=alerting_email_address,
task_id='email_task',
subject=f"Airflow alert: <DagInstance: {dag_id} - {execution_time} [failed]",
params={'content1': 'random'},
html_content=dag_failure_html_body,
)
send_email.dag = context['dag']
#send_email.to = send_email.get_template_env().from_string(send_email.to).render(**context)
send_email.to = send_email.render_template(alerting_email_address, send_email.to, context)
send_email.execute(context)
except Exception as e:
logger.error(
f'Error in sending email to address {alerting_email_address}: {e}',
exc_info=True,
)
I don't think templates would work in this way - you'll have to have something specifically parse the template. Usually jinja templates in Airflow are used to pass templated fields through to operators, and rendered using the render_template function (https://airflow.apache.org/docs/stable/_modules/airflow/models/baseoperator.html#BaseOperator.render_template)
Since your callback function isn't an operator, it won't have this method by default.
I think the best thing to do here would be to either explicitly call Variable.get during runtime of the callback function itself, rather than in the DAG definition, or implement some version of that render_template_fields function in your callback. Both of these solutions would result only in hitting the DB during runtime of this task, rather than whenever the DAG is created.
Edit: Just saw your attempt to do the rendering explicitly via the operator. Are the fields that you want to be templated specified as templated_fields within email operator?

Airflow BashOperator Parameter From XCom Value

I am having some problem assigning an xcom value to the BashOperator.
All the parameters are properly retrieved except the tmp_dir, which is an xcom value generated during init_dag. I was able to retrieve the value in my custom operator but not being able to do it in the BashOperator. I have added the outputs of the three different ways I have tried that came to my mind.
I think one way could be if I could store that value in a variable but I was also not able to figure it out how.
Any help will be highly appreciated.
Here is my DAG code:
import airflow
from airflow.models import DAG
from airflow.utils.dates import days_ago
from airflow.models import Variable
from utility import util
import os
from airflow.operators.bash_operator import BashOperator
from operators.mmm_operator import MMMOperator #it is a custom operator
from operators.iftp_operator import IFTPOperator #it is another custom operator
AF_DATAMONTH = util.get_date_by_format(deltaMth=2,deltaDay=0,ft='%b_%Y').lower() #it gives a date in required format
AF_FILENM_1 = 'SOME_FILE_' + AF_DATAMONTH + '.zip' #required filename for ftp
default_args = {
'owner': 'airflow',
'depends_on_past': False,
'start_date': days_ago(0),
}
dag = DAG(dag_id='my_dag', default_args=default_args, schedule_interval=None)
init_dag = MMMOperator(
task_id='init_dag',
provide_context=True,
mmm_oracle_conn_id=Variable.get('SOME_VARIABLE'),
mmm_view="{0}.{1}".format(Variable.get('ANOTHER_VARIABLE'), AF_DAG_MMM_VIEW_NM),
mmm_view_filter=None,
mmm_kv_type=True,
mmm_af_env_view="{0}.{1}".format(Variable.get('ANOTHER_VARIABLE_1'),Variable.get('ANOTHER_VARIABLE_2')),
dag=dag
) #local_tmp_folder is generated here and pushed via xcom
download_ftp_files = IFTPOperator(task_id='download_ftp_files',
ftp_conn_id=util.getFromConfig("nt_conn_id"), #value properly retrieved by xcom_pull
operation='GET',
source_path=util.getFromConfig("nt_remote_folder"), #value properly retrieved by xcom_pull
dest_path=util.getFromConfig("local_tmp_folder"), #value properly retrieved by xcom_pull
filenames=AF_FILENM,
dag=dag
)
bash_cmd_template = "cd /vagrant/ && python3 hello_print.py {{params.client}} {{params.task}} {{params.environment}} {{params.tmp_dir}} {{params.af_file_nm}}"
#try 1 output value for params.tmp_dir: {{ ti.xcom_pull(task_ids="init_dag")["local_tmp_folder"] }} - instead of the actual tmp folder location
#try 2 and try 3 output: Broken DAG: [/home/vagrant/airflow/dags/my_dag.py] name 'ti' is not defined - message in UI
execute_main_py_script = BashOperator(
task_id='execute_main_py_script',
bash_command=bash_cmd_template,
params={'client' : 'some_client',
'task' : 'load_some_task',
'environment' : 'environment_name',
#'tmp_dir' : util.getFromConfig("local_tmp_folder"), #try 1
#'tmp_dir' : {{ ti.xcom_pull(task_ids="init_dag")["local_tmp_folder"] }} #try 2
#'tmp_dir' : ti.xcom_pull(task_ids="init_dag")["local_tmp_folder"] #try 3
'af_file_nm' : AF_FILENM_1
},
provide_context=True,
dag=dag
)
init_dag >> download_ftp_files >> execute_main_py_script
The params argument of the BashOperator is not Jinja Templated hence any values you pass in params would be rendered "as-is".
You should directly pass the value of tmp_dir in bash_cmd_template as follows:
bash_cmd_template = """
cd /vagrant/ && python3 hello_print.py {{params.client}} {{params.task}} {{params.environment}} {{ ti.xcom_pull(task_ids="init_dag")["local_tmp_folder"] }} {{params.af_file_nm}}
"""
execute_main_py_script = BashOperator(
task_id='execute_main_py_script',
bash_command=bash_cmd_template,
params={'client' : 'some_client',
'task' : 'load_some_task',
'environment' : 'environment_name',
'af_file_nm' : AF_FILENM_1
},
provide_context=True,
dag=dag
)

Resources