Requirement: Create a custom date function to be used in operators, DAG, etc
Below is the DAG file
DAG
from airflow import DAG
from airflow.operators.python_operator import PythonOperator
from datetime import datetime, timedelta
from alerts.custom_date import strt_of_wk_strt_mon_dt, NEXT_DS_NODASH
default_args = {
'owner': 'airflow',
'depends_on_past': False,
'start_date': datetime(2020, 7, 8),
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': timedelta(minutes=5),
}
dag = DAG(dag_id = 'test_date',
schedule_interval='#once',
default_args=default_args)
def test(**kwargs):
first_date = kwargs.get('execution_date', None)
strt_wk_dt = kwargs.get('strt_wk_dt')
next_ds_nodash = kwargs.get('next_ds_nodash')
s3_key = kwargs.get('s3_key')
print(f'EXECUTION DATE:{first_date}')
print(f'STRT_WK_DT:{strt_wk_dt}')
print(f'NEXT_DS_NODASH:{next_ds_nodash}')
print(f'S#_KEY:{s3_key}')
with dag:
execution_date = '{{ execution_date }}'
next_ds_nodash = NEXT_DS_NODASH
strt_wk_dt = strt_of_wk_strt_mon_dt()
t1 = PythonOperator(
task_id='show_template',
python_callable=test,
op_kwargs={'execution_date': execution_date,
'next_ds_nodash': next_ds_nodash,
'strt_wk_dt': strt_wk_dt,
's3_key':f'snowflakes/FEEDS/{strt_wk_dt}/abc_{strt_wk_dt}.csv'},
provide_context=True)
With datetime package
First I tried with using DateTime library and it worked fine as below screenshot
Below is the cstm_date.py
from datetime import datetime, timedelta
import logging
logger = logging.getLogger(__name__)
NEXT_DS_NODASH = '{{ (execution_date + macros.timedelta(days=1)).strftime("%m%d%Y") }}'
def strt_of_wk_strt_mon_dt():
return (datetime.today().date() - timedelta(days=datetime.today().weekday())).strftime('%Y_%m_%d')
The output is printing as expected as below
Next, I tried with using pendulum library, the output is not printing the date value
** Pendulum package**
Below is the cstm_date.py
import pendulum
import logging
logger = logging.getLogger(__name__)
NEXT_DS_NODASH = '{{ (execution_date + macros.timedelta(days=1)).strftime("%m%d%Y") }}'
def strt_of_wk_strt_mon_dt():
today = pendulum.now()
return today.start_of('week').format('YYYY_MM_DD')
The output is not printing the STRT_WK_DT value
What am I missing?
You are using the right conversion instruction for datetime.strftime but not using the string datetime conversion instruction in your new function that uses pendulum.
Yours
def strt_of_wk_strt_mon_dt():
today = pendulum.now()
return today.start_of('week').format('YYYY_MM_DD')
Intended
def strt_of_wk_strt_mon_dt():
today = pendulum.now()
return today.start_of('week').format('%Y_%m_%d')
Related
The code doesn't output what I expected. How do I get the code to output when i call the function?
import calendar
import datetime
from datetime import date
Eke, Orie, Afor, Nkwo = 0, 1, 2, 3
start_year = 1600
def IgboWeek(func):
def wrapper_func():
if date.year != start_year:
Eke = calendar.SUNDAY
Orie = calendar.MONDAY
Afor = calendar.TUESDAY
Nkwo = calendar.WEDNESDAY
calendar.THURSDAY = Eke
calendar.FRIDAY = Orie
calendar.SATURDAY = Afor
func()
return wrapper_func
#IgboWeek
def birthday():
birthday = input("Enter your birthday in the format: YYYY/MM/DD: ")
if Eke in birthday:
print("Eke")
elif Orie in birthday:
print("Orie")
elif Afor in birthday:
print("Afor")
elif Nkwo in birthday:
print("Nkwo")
else:
print("Not in a good mood for that")
Igobirthday = IgboWeek(birthday)
Igobirthday()
import os
import sys
from PyQt5 import QtCore, QtMultimedia
CURRENT_DIR = os.path.dirname(os.path.realpath(file))
def main():
filename = os.path.join(CURRENT_DIR, 'Max Brhon - Cyberpunk.wav')
app = QtCore.QCoreApplication(sys.argv)
QtMultimedia.QSound.play(filename)
# end in 5 seconds:
QtCore.QTimer.singleShot(5 * 1000, app.quit)
sys.exit(app.exec_())
if name == "main":
main()
I want to customize my DAG to call a datarbicks notebook when it is success or failure. I have created two different functions to call a databricks notebook based on the success/failure cases. success or failure callback function is calling but databricsks notebook is not executing. here is the sample code.
def task_success_callback(context):
""" task_success callback """
context['task_instance'].task_id
print("success case")
dq_notebook_success_task_params = {
'existing_cluster_id': Variable.get("DATABRICKS_CLUSTER_ID"),
'notebook_task': {
'notebook_path': '/AAA/Airflow/Operators/audit_file_operator',
'base_parameters': {
"root": "dbfs:/mnt/aaa",
"audit_file_path": "/success_file_path/",
"table_name": "sample_data_table",
"audit_flag": "success"
}
}
}
DatabricksSubmitRunOperator(
task_id="weather_table_task_id",
databricks_conn_id='databricks_conn',
json=dq_notebook_success_task_params,
do_xcom_push=True,
secrets=[secret.Secret(
deploy_type='env',
deploy_target=None,
secret='adf-service-principal'
), secret.Secret(
deploy_type='env',
deploy_target=None,
secret='postgres-credentials',
)],
)
def task_failure_callback(context):
""" task_success callback """
context['task_instance'].task_id
print("failure case")
dq_notebook_failure_task_params = {
'existing_cluster_id': Variable.get("DATABRICKS_CLUSTER_ID"),
'notebook_task': {
'notebook_path': '/AAA/Airflow/Operators/audit_file_operator',
'base_parameters': {
"root": "dbfs:/mnt/aaa",
"audit_file_path": "/failure_file_path/",
"table_name": "sample_data_table",
"audit_flag": "failure"
}
}
}
DatabricksSubmitRunOperator(
task_id="weather_table_task_id",
databricks_conn_id='databricks_conn',
json=dq_notebook_failure_task_params,
do_xcom_push=True,
secrets=[secret.Secret(
deploy_type='env',
deploy_target=None,
secret='adf-service-principal'
), secret.Secret(
deploy_type='env',
deploy_target=None,
secret='postgres-credentials',
)],
)
DEFAULT_ARGS = {
"owner": "admin",
"depends_on_past": False,
"start_date": datetime(2020, 9, 23),
"on_success_callback": task_success_callback,
"on_failure_callback": task_failure_callback,
"email": ["airflow#airflow.com"],
"email_on_failure": False,
"email_on_retry": False,
"retries": 1,
"retry_delay": timedelta(seconds=10),
}
==================
Remaining DAG code
==================
In Airflow every operator has execute() method that define the operator logic. When You create your workflow Airflow initialize the constructor, render the templates & call the execute method for you. However when you define operator inside a python function you need also to handle this on your own.
So when you write:
def task_success_callback(context):
DatabricksSubmitRunOperator(..)
All you did here is to initialize the DatabricksSubmitRunOperator contactor. You didn't invoke the operator logic.
What you need to do is:
def task_success_callback(context):
op = DatabricksSubmitRunOperator(..)
op.execute()
TableList = collections.namedtuple(
"table_list",
"table_name audit_file_name",
)
LIST_OF_TABLES = [
TableList(
table_name="table1",
audit_file_name="/testdata/Audit_files/",
),
TableList(
table_name="table2",
audit_file_name="/testdata/Audit_files/",
),
TableList(
table_name="table3",
audit_file_name="/testdata/Audit_files/",
),
TableList(
table_name="table4",
audit_file_name="/testdata/Audit_files/",
)
]
for table in LIST_OF_TABLES:
DEFAULT_ARGS = {
"owner": "admin",
"depends_on_past": False,
"start_date": datetime(2020, 9, 23),
"on_success_callback": partial(task_success_callback,table.table_name,table.audit_file_name),
"on_failure_callback": partial(task_failure_callback,table.table_name,table.audit_file_name),
"email": ["airflow#airflow.com"],
"email_on_failure": False,
"email_on_retry": False,
"retries": 1,
"retry_delay": timedelta(seconds=10),
}
WORKFLOW = DAG(
'test_dag',
default_args=DEFAULT_ARGS,
schedule_interval="30 3 * * 1",
catchup=False,
)
I'm trying to get BranchPythonOperator working but I have the following error:
'BigQueryInsertJobOperator' object is not iterable
Here is my Branch Operator:
branching = BranchPythonOperator(
task_id='branching',
python_callable=return_branch,
provide_context=True)
Here is my Python Callable:
def return_branch(ds, **kwargs):
execution_year = kwargs['execution_date'].strftime("%Y")
type = dataset_metadata[f'{execution_year}']['var']
if type == 'foo':
return x
return y
x and y are BigQueryInsertJobOperator:
x = BigQueryInsertJobOperator(
task_id='x',
configuration={
"query": {
"query": "{% include 'q.sql' %}",
"use_legacy_sql": False
}
},
dag=dag)
I'd like to refer to this answer. Your method, return_branch, shouldn't return the operator. It must return the task_id of your operator. You'll get something like this:
def return_branch(ds, **kwargs):
next_task_id = "a" # <some kind of logic>
return next_task_id
branching = BranchPythonOperator(
task_id="pick_query",
python_callable=return_branch,
provide_context=True,
)
option_1 = DummyOperator(task_id="a")
option_2 = DummyOperator(task_id="b")
branching >> [option_1, option_2]
Hello Everyone i need help why my output result is none in the print('bla bla') line so from my output is None, None, None that actually insert from npm , nama , and jurusan but the output is none ,can anybody help me solve it thanks
import sqlite3
import tkinter
from tkinter import *
from tkinter import ttk
def Daftar():
window = Tk()
window.title("Welcome to TutorialsPoint")
window.geometry('400x400')
window.configure(background = "grey");
Lnpm = Label(window, text="Please Input Your npm: ").grid(row=0, column=0)
Lnama = Label(window,text="Please Input Your nama: ").grid(row=1, column=0)
Ljurusan = Label(window,text="Please Input Your jurusan: ").grid(row=2, column=0)
npm = Entry(window).grid(row = 0,column = 1)
nama = Entry(window).grid(row = 1,column = 1)
jurusan = Entry(window).grid(row = 2,column = 1)
def Clicked():
print("First Name: %s\nLast Name: %s\nLast Name: %s" % (npm, nama, jurusan))
connect = sqlite3.connect('Presensi.db')
cur = connect.cursor()
connect.execute("INSERT OR IGNORE INTO user(npm,nama,jurusan) values(?,?,?)", (str(npm),str(nama),str(jurusan)))
connect.execute("INSERT OR IGNORE INTO presensi(nama) values(?)", (str(nama),))
connect.commit()
cur.close()
btn = ttk.Button(window ,text="Register",command= Clicked()).grid(row=3,column=0)
window.mainloop()
You've got two big issues here:
the grid() function of the Entry object returns None and that's what npm, nama and jurusan are None. What you have to do is store the Entry object, not the value returned from grid().
you're not calling get() on the Entry objects to get their input values
What you can do is create a class in which you store the Entry objects. The callback function of the Button object can then be a method of the class.
I've reorganised your code to do this:
from tkinter import Tk, Label, Button, Entry
import sqlite3
class Daftar:
def __init__(self, master):
self.window = master
self.window.title("Welcome to TutorialsPoint")
self.window.geometry('400x400')
self.window.configure(background = "grey");
self.Lnpm = Label(self.window, text="Please Input Your npm: ").grid(row=0, column=0)
self.Lnama = Label(self.window,text="Please Input Your nama: ").grid(row=1, column=0)
self.Ljurusan = Label(self.window,text="Please Input Your jurusan: ").grid(row=2, column=0)
#Entry objects for later use
self.npm = Entry(self.window)
self.npm.grid(row = 0,column = 1)
self.nama = Entry(self.window)
self.nama.grid(row = 1,column = 1)
self.jurusan = Entry(self.window)
self.jurusan.grid(row = 2,column = 1)
self.btn = Button(self.window ,text="Register",command = self.Clicked).grid(row=3,column=0)
def Clicked(self):
#Get the entry values
npm = self.npm.get()
nama = self.nama.get()
jurusan = self.jurusan.get()
print("First Name: %s\nLast Name: %s\nLast Name: %s" % (npm, nama, jurusan))
connect = sqlite3.connect('Presensi.db')
cur = connect.cursor()
connect.execute("INSERT OR IGNORE INTO user(npm,nama,jurusan) values(?,?,?)", (npm,nama,jurusan))
connect.execute("INSERT OR IGNORE INTO presensi(nama) values(?)", (nama,))
connect.commit()
cur.close()
root = Tk()
my_gui = Daftar(root)
root.mainloop()
window.mainloop()