Amazon AirFlow 1.10.12: No module named 'operators' - airflow

I am creating a plugin and dag structure for Amazon AirFlow 1.10.12. I do according to the documentation:
dags:
- aws_from_redshift_to_s3.py
plugins:
- __init__.py
- from_redshift_to_s3_plugin.py
- operators:
-- __init__.py
-- aws_from_redshift_to_s3_operator.py
aws_from_redshift_to_s3_operator.py:
from airflow.hooks.postgres_hook import PostgresHook
from airflow.models import BaseOperator
from airflow.utils.decorators import apply_defaults
from airflow.contrib.hooks.aws_hook import AwsHook
class FromRedshiftToS3TransferOperator(BaseOperator):
pass
from_redshift_to_s3_plugin.py:
from airflow.plugins_manager import AirflowPlugin
from operators.aws_from_redshift_to_s3_operator import FromRedshiftToS3TransferOperator
class FromRedShiftToS3Plugin(AirflowPlugin):
name = 'from_redshift_to_s3_plugin'
operators = [FromRedshiftToS3TransferOperator]
В самом ДАГе подключаю так:
from operators.from_redshift_to_s3_plugin import FromRedshiftToS3TransferOperator
При попытке активировать ДАГ в Amazon AirFlow 1.10.12 получаю ошибку: No module named 'operators'

https://airflow.apache.org/docs/apache-airflow/1.10.12/howto/custom-operator.html
As mentioned in this documentation, you no longer need to import from operators. Instead try importing like this,
from aws_from_redshift_to_s3_operator import FromRedshiftToS3TransferOperator

Related

Multiple jobs getting triggered instead of only one job in MWAA

We are facing an issue in Managed Appflow Apache Airflow(MWAA) service, we created 10 different DAGs with almost same DAG operators (AWS Glue job) but with different parameters to same job.
We face an issue that operators in multiple DAGs are getting automatically triggered even though it is not scheduled. We tried changing the alias name for each operators in 10 DAGs but still random operators are getting triggered randomly in the 10 DAGs.
Please advise to troubleshoot this issue.
We set the dependency to the next job upon completion, but still it is not working as expected.
dag code for reference:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Created on Wed Sep 21 11:31:03 2022
#author:
"""
#psycopg2-binary
from airflow.operators.bash import BashOperator
from airflow.operators.dummy import DummyOperator
import os
import boto3
import logging
import json
import pandas as pd
from datetime import timedelta
from airflow import DAG
from airflow.models.baseoperator import chain
from airflow.operators.bash import BashOperator
from airflow.operators.dummy import DummyOperator
from airflow.operators.python_operator import PythonOperator
from airflow.providers.amazon.aws.operators.glue import AwsGlueJobOperator
from airflow.utils.dates import days_ago
from datetime import datetime, timedelta
from airflow.providers.postgres.hooks.postgres import PostgresHook
from airflow.utils.log.logging_mixin import LoggingMixin
from airflow.models import TaskInstance
from airflow.models import XCom
from airflow.models import Variable
from airflow.utils.task_group import TaskGroup
DEFAULT_ARGS = {
"owner": "<<owner_name>>",
"depends_on_past": False,
"retries": 0,
"email_on_failure": False,
"email_on_retry": False,
}
def work_with_postgress_lims(ti,**kwargs):
try:
hook = PostgresHook(postgres_conn_id="<<db_name>>")
print(kwargs)
print("Hello from method")
print(hook)
id_list = f"SELECT id::varchar,dataobjectname FROM <<table_name>> where systeminfoid ='lims' and process_flag ='Y' and airflow_flag ='Y' order by id;"
print(hook.schema)
id_values = hook.get_records(id_list)
print(id_values)
stg_list = f"SELECT id::varchar,dataobjectname FROM <<table_name>> where systeminfoid ='lims' and process_flag ='Y' and staging_flag ='Y' and airflow_flag ='Y' order by id;"
print(hook.schema)
stg_values = hook.get_records(stg_list)
print(stg_list)
Variable.set(key='lims_table_and_dataset_list',
value=id_values, serialize_json=True)
options = Variable.get('lims_table_and_dataset_list',
default_var=['default_table'],
deserialize_json=True)
Variable.set(key='lims_stg_table_and_dataset_list',
value=stg_values, serialize_json=True)
options2 = Variable.get('lims_stg_table_and_dataset_list',
default_var=['default_table'],
deserialize_json=True)
print(options)
print(options2)
return id_values,stg_values
except Exception as e:
print(e)
glue_client = boto3.client('glue', region_name='<<region_name>>')
logger=logging.getLogger('airflow.task')
with DAG(
dag_id='kdh_source_to_curated_lims',
description="source to curated",
default_args=DEFAULT_ARGS,
dagrun_timeout=timedelta(hours=48),
start_date=datetime(2022,9,21,6,15,00),
concurrency=5,
max_active_runs=2,
schedule_interval=None) as dag:
work_with_postgress_lims = PythonOperator(
task_id='python_callable_operator_lims',
python_callable=work_with_postgress_lims,
do_xcom_push=False,
provide_context=True
)
options_dataset_id = Variable.get('lims_table_and_dataset_list',
default_var=['default_table'],
deserialize_json=True)
options_stg_dataset_id = Variable.get('lims_stg_table_and_dataset_list',
default_var=['default_table'],
deserialize_json=True)
kdh_jr_invoke_lims = AwsGlueJobOperator(task_id='kdh_jr_invoke_lims', job_name='kdh_jr_invoke', script_args={'--source_system': 'lims'})
with TaskGroup('dynamic_raw_tasks_group_lims',prefix_group_id=False,) as dynamic_raw_tasks_group_lims:
if(options_dataset_id):
for option_dataset_id in options_dataset_id:
t = AwsGlueJobOperator(task_id=option_dataset_id[1]+'_raw', job_name='kdh-rd_jr', script_args={'--dataset_id':option_dataset_id[0], '--airflow_flag':'Y'})
last = DummyOperator(task_id=option_dataset_id[1]+'_raw_end')
t >> last
with TaskGroup('dynamic_stg_tasks_group_lims',prefix_group_id=False,) as dynamic_stg_tasks_group_lims:
if(options_stg_dataset_id):
for option_stg_dataset_id in options_stg_dataset_id:
t = AwsGlueJobOperator(task_id=option_stg_dataset_id[1]+'_stg', job_name='kdh_dq_staging', script_args={'--source_system': 'lims','--table': option_stg_dataset_id[1]})
last = DummyOperator(task_id=option_stg_dataset_id[1]+'_stg_end')
t >> last
kdh_jr_curated_lims = AwsGlueJobOperator(task_id='kdh_jr_curated_lims', job_name='kdh_stg_curated', script_args={'--source_system': 'lims'})
kdh_jr_invoke_lims>>work_with_postgress_lims>>dynamic_raw_tasks_group_lims>>dynamic_stg_tasks_group_lims>>kdh_jr_curated_lims
#work_with_postgress_lims>>dynamic_raw_tasks_group_lims>>dynamic_stg_tasks_group_lims
# In[ ]:

The conn_id envinorment variable isn't defined

I have the dag which tries connects to http endpoint nevertheless somehow it doesn't work. I defined connection using envinorment variable but somehow it is not seen by httpsensor and i am getting below error however that variable was created in the system. Whats wrong here? Below dag and full error code.
The conn_id `AIRFLOW_VAR_FOO` isn't defined
DAG:
import os
import json
import pprint
import datetime
import requests
from airflow.models import DAG
from airflow.operators.python import PythonOperator
from airflow.providers.sftp.operators.sftp import SFTPOperator
from airflow.providers.sftp.sensors.sftp import SFTPSensor
from airflow.utils.dates import days_ago
from airflow.models import Variable
from airflow.sensors.http_sensor import HttpSensor
from airflow.hooks.base_hook import BaseHook
def init_vars():
os.environ['AIRFLOW_VAR_FOO'] = "https://mywebxxx.net/"
print(os.environ['AIRFLOW_VAR_FOO'])
with DAG(
dag_id='request_test',
schedule_interval=None,
start_date=days_ago(2)) as dag:
init_vars = PythonOperator(task_id="init_vars",
python_callable=init_vars)
task_is_api_active = HttpSensor(
task_id='is_api_active',
http_conn_id='AIRFLOW_VAR_FOO',
endpoint='post'
)
get_data = PythonOperator(task_id="get_data",
python_callable=get_data)
init_vars >> task_is_api_active
Full Error:
File "/home/airflow/.local/lib/python3.7/site-packages/airflow/models/connection.py", line 379, in get_connection_from_secrets
raise AirflowNotFoundException(f"The conn_id `{conn_id}` isn't defined")
airflow.exceptions.AirflowNotFoundException: The conn_id `AIRFLOW_VAR_FOO` isn't defined
[2022-11-04 10:32:41,720] {taskinstance.py:1551} INFO - Marking task as FAILED. dag_id=request_test, task_id=is_api_active, execution_date=20221104T103235, start_date=20221104T103240, end_date=20221104T103241
[2022-11-04 10:32:42,628] {local_task_job.py:149} INFO - Task exited with return code 1
EDIT:
import os
import json
import pprint
import datetime
import requests
from airflow.models import DAG
from airflow.operators.python import PythonOperator
from airflow.providers.sftp.operators.sftp import SFTPOperator
from airflow.providers.sftp.sensors.sftp import SFTPSensor
from airflow.utils.dates import days_ago
from airflow.models import Variable
from airflow.sensors.http_sensor import HttpSensor
from airflow.hooks.base_hook import BaseHook
def init_vars():
os.environ['AIRFLOW_VAR_FOO'] = "https://mywebxxx.net/"
print(os.environ['AIRFLOW_VAR_FOO'])
with DAG(
dag_id='request_test',
schedule_interval=None,
start_date=days_ago(2)) as dag:
init_vars = PythonOperator(task_id="init_vars",
python_callable=init_vars)
call init_vars()
task_is_api_active = HttpSensor(
task_id='is_api_active',
http_conn_id='AIRFLOW_VAR_FOO',
endpoint='post'
)
get_data = PythonOperator(task_id="get_data",
python_callable=get_data)
task_is_api_active
You need to define the env variable as AIRFLOW_CONN_VAR_FOO and then your http_conn_id="var_foo".
for more details see this link
def init_vars():
os.environ['AIRFLOW_VAR_FOO'] = "https://mywebxxx.net/"
print(os.environ['AIRFLOW_VAR_FOO'])
with DAG(
dag_id='request_test',
schedule_interval=None,
start_date=days_ago(2)) as dag:
init_vars()
task_is_api_active = HttpSensor(
task_id='is_api_active',
http_conn_id='AIRFLOW_VAR_FOO',
endpoint='post'
)
get_data = PythonOperator(task_id="get_data",
python_callable=get_data)
task_is_api_active

How to execute parallel test cases in Unittest framework in python?

I've tried the following method in test suite file:
import unittest
import xmlrunner
from ui_tests.test_prelogin import PreLoginTests
login_tests = unittest.TestLoader().loadTestsFromTestCase(PreLoginTests)
test_suite = unittest.TestSuite([login_tests])
unittest.main(testRunner=xmlrunner.XMLTestRunner(output='test-reports')).run(test_suite)

Not compile project after migrate from Vaadin 7 to Vaadin 8

Current version of Vaadin is 7.3.6
Here some my code:
import com.vaadin.data.Property;
import com.vaadin.data.Property.ValueChangeEvent;
import com.vaadin.ui.NativeSelect;
import com.vaadin.ui.TextField;
import com.vaadin.ui.UI;
import com.vaadin.ui.VerticalLayout;
private NativeSelect currencySelector;
private void initCurrencySelector(String providerId) {
currencySelector = new NativeSelect();
List<String> selectCurrencyList;
currencySelector.removeAllItems();
}
And this code success compile.
But after I try to upgrade to Vaadin 8.12.0 then this code not compile.
error in this lines:
import com.vaadin.data.Property;
import com.vaadin.data.Property.ValueChangeEvent;
import com.vaadin.event.FieldEvents.TextChangeEvent;
import com.vaadin.event.FieldEvents.TextChangeListener;
and in this line:
currencySelector.removeAllItems();
the new imports should be
import com.vaadin.data.HasValue.ValueChangeEvent;
import com.vaadin.event.FieldsEvent
TextChangeEvent and TextChangeListener probably were replaced by HasValue.ValueChangeEvent and HasValue.ValueChangeListener
currencySelector.removeAllItems(); should be
currencySelector.setDataProvider(new ListDataProvider(new ArrayList()));
a list of incompatible changes can be found here https://vaadin.com/download/prerelease/8.0/8.0.0/8.0.0.beta1/release-notes.html#incompatible

Airflow : Publish a dynamically created dag

I want to be able to publish and trigger a DAG object from my code which is not in control of scheduler (viz. $AIRFLOW_HOME/dags folder)
My last resort would be to programmatically create a py file containing the DAG definition that I want to publish and save this file to the $AIRFLOW_HOME/dags folder.
I'm sure it should be easier than that.
Below is what I've tried.
import airflow
from airflow import DAG
from datetime import timedelta
from airflow.models import DagPickle
from airflow.operators.dummy_operator import DummyOperator
from airflow.utils.db import provide_session
#provide_session
def submit_dag(session=None):
args = {
'owner': 'airflow',
'start_date': airflow.utils.dates.days_ago(2)
}
dag = DAG(
dag_id='sample', default_args=args,
schedule_interval=None, start_date=airflow.utils.dates.days_ago(2),
dagrun_timeout=timedelta(minutes=60))
task = DummyOperator(task_id='one', dag=dag)
dag_pickle = DagPickle(task)
session.add(dag_pickle)
session.commit()
submit_dag()
The above code does create entries in dag_pickle table but how do I publish and later trigger this dag?
I can do pickle.dump(dag,open(DAGS_FOLDER/pickled_dags,'wb')) and have a file in DAGS FOLDER that would pickle.load(DAGS_FOLDER/pickled_dags)

Resources