I keep getting an empty task list APScheduler - nginx

I am writing a website on a Flask, I use it to run uwsgi + nginx. It was necessary to write a timer to periodically execute tasks. For this I used uwsgidecorators. The task should check the status of the scheduler tasks. To get a list of tasks, I used get_jobs(). But the list I keep getting empty.
webapp/__init__.py:
# -*- coding: utf-8 -*-
from gevent import monkey
monkey.patch_all()
import grpc.experimental.gevent
grpc.experimental.gevent.init_gevent()
from flask import Flask, session, request
from config import DevelopConfig, MqttConfig, MailConfig, ProductionConfig
from flask_sqlalchemy import SQLAlchemy
from flask_migrate import Migrate
from flask_mail import Mail
from flask_script import Manager
from flask_socketio import SocketIO
from flask_mqtt import Mqtt
from flask_login import LoginManager
from flask_babel import Babel
from flask_babel_js import BabelJS
from flask_babel import lazy_gettext as _l
from apscheduler.schedulers.gevent import GeventScheduler
app = Flask(__name__)
app.config.from_object(ProductionConfig)
app.config.from_object(MqttConfig)
app.config.from_object(MailConfig)
db = SQLAlchemy(app)
migrate = Migrate(app, db, render_as_batch=True)
mail = Mail(app)
mqtt = Mqtt(app)
manager = Manager(app, db)
login_manager = LoginManager(app)
login_manager.login_view = 'auth'
login_manager.login_message = _l("Необходимо авторизоваться для доступа к закрытой странице")
login_manager.login_message_category = "error"
scheduler = GeventScheduler()
scheduler.start()
scheduler.add_job(publish_async, args=["Hello"], id="job", trigger='interval', seconds=2)
socketio = SocketIO(app, async_mode='gevent_uwsgi') # Production Version
babel = Babel(app)
babeljs = BabelJS(app=app, view_path='/translations/')
import webapp.views
#babel.localeselector
def get_locale():
# if the user has set up the language manually it will be stored in the session,
# so we use the locale from the user settings
try:
language = session['language']
except KeyError:
language = None
if language is not None:
print(language)
return language
return request.accept_languages.best_match(app.config['LANGUAGES'].keys())
from webapp import models
def publish_async(message):
print(message)
webapp/tasks.py:
from uwsgidecorators import timer
#timer(10, target='spooler')
def check_run_tasks(args):
_list_schedulers = _scheduler_method.get_jobs()
print(_list_schedulers)
wsgi.ini:
[uwsgi]
env = PYTHONIOENCODING=UTF-8
module = wsgi:app
master = true
# processes = 5
enable-threads = true
gevent = 1024
gevent-monkey-patch = true
buffer-size=32768
# lazy-apps = true
socket = /home/sammy/projectnew/projectnew.sock
socket-timeout = 30
chmod-socket = 664
thunder-lock = true
spooler = /home/sammy/projectnew/webapp/mytasks
import = webapp/tasks.py
vacuum = true
die-on-term = true
wsgi.py:
# -*- coding: utf-8 -*-
from webapp import app, socketio
if __name__ == '__main__':
socketio.run(app, use_reloader=False)

Related

Multiple jobs getting triggered instead of only one job in MWAA

We are facing an issue in Managed Appflow Apache Airflow(MWAA) service, we created 10 different DAGs with almost same DAG operators (AWS Glue job) but with different parameters to same job.
We face an issue that operators in multiple DAGs are getting automatically triggered even though it is not scheduled. We tried changing the alias name for each operators in 10 DAGs but still random operators are getting triggered randomly in the 10 DAGs.
Please advise to troubleshoot this issue.
We set the dependency to the next job upon completion, but still it is not working as expected.
dag code for reference:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Created on Wed Sep 21 11:31:03 2022
#author:
"""
#psycopg2-binary
from airflow.operators.bash import BashOperator
from airflow.operators.dummy import DummyOperator
import os
import boto3
import logging
import json
import pandas as pd
from datetime import timedelta
from airflow import DAG
from airflow.models.baseoperator import chain
from airflow.operators.bash import BashOperator
from airflow.operators.dummy import DummyOperator
from airflow.operators.python_operator import PythonOperator
from airflow.providers.amazon.aws.operators.glue import AwsGlueJobOperator
from airflow.utils.dates import days_ago
from datetime import datetime, timedelta
from airflow.providers.postgres.hooks.postgres import PostgresHook
from airflow.utils.log.logging_mixin import LoggingMixin
from airflow.models import TaskInstance
from airflow.models import XCom
from airflow.models import Variable
from airflow.utils.task_group import TaskGroup
DEFAULT_ARGS = {
"owner": "<<owner_name>>",
"depends_on_past": False,
"retries": 0,
"email_on_failure": False,
"email_on_retry": False,
}
def work_with_postgress_lims(ti,**kwargs):
try:
hook = PostgresHook(postgres_conn_id="<<db_name>>")
print(kwargs)
print("Hello from method")
print(hook)
id_list = f"SELECT id::varchar,dataobjectname FROM <<table_name>> where systeminfoid ='lims' and process_flag ='Y' and airflow_flag ='Y' order by id;"
print(hook.schema)
id_values = hook.get_records(id_list)
print(id_values)
stg_list = f"SELECT id::varchar,dataobjectname FROM <<table_name>> where systeminfoid ='lims' and process_flag ='Y' and staging_flag ='Y' and airflow_flag ='Y' order by id;"
print(hook.schema)
stg_values = hook.get_records(stg_list)
print(stg_list)
Variable.set(key='lims_table_and_dataset_list',
value=id_values, serialize_json=True)
options = Variable.get('lims_table_and_dataset_list',
default_var=['default_table'],
deserialize_json=True)
Variable.set(key='lims_stg_table_and_dataset_list',
value=stg_values, serialize_json=True)
options2 = Variable.get('lims_stg_table_and_dataset_list',
default_var=['default_table'],
deserialize_json=True)
print(options)
print(options2)
return id_values,stg_values
except Exception as e:
print(e)
glue_client = boto3.client('glue', region_name='<<region_name>>')
logger=logging.getLogger('airflow.task')
with DAG(
dag_id='kdh_source_to_curated_lims',
description="source to curated",
default_args=DEFAULT_ARGS,
dagrun_timeout=timedelta(hours=48),
start_date=datetime(2022,9,21,6,15,00),
concurrency=5,
max_active_runs=2,
schedule_interval=None) as dag:
work_with_postgress_lims = PythonOperator(
task_id='python_callable_operator_lims',
python_callable=work_with_postgress_lims,
do_xcom_push=False,
provide_context=True
)
options_dataset_id = Variable.get('lims_table_and_dataset_list',
default_var=['default_table'],
deserialize_json=True)
options_stg_dataset_id = Variable.get('lims_stg_table_and_dataset_list',
default_var=['default_table'],
deserialize_json=True)
kdh_jr_invoke_lims = AwsGlueJobOperator(task_id='kdh_jr_invoke_lims', job_name='kdh_jr_invoke', script_args={'--source_system': 'lims'})
with TaskGroup('dynamic_raw_tasks_group_lims',prefix_group_id=False,) as dynamic_raw_tasks_group_lims:
if(options_dataset_id):
for option_dataset_id in options_dataset_id:
t = AwsGlueJobOperator(task_id=option_dataset_id[1]+'_raw', job_name='kdh-rd_jr', script_args={'--dataset_id':option_dataset_id[0], '--airflow_flag':'Y'})
last = DummyOperator(task_id=option_dataset_id[1]+'_raw_end')
t >> last
with TaskGroup('dynamic_stg_tasks_group_lims',prefix_group_id=False,) as dynamic_stg_tasks_group_lims:
if(options_stg_dataset_id):
for option_stg_dataset_id in options_stg_dataset_id:
t = AwsGlueJobOperator(task_id=option_stg_dataset_id[1]+'_stg', job_name='kdh_dq_staging', script_args={'--source_system': 'lims','--table': option_stg_dataset_id[1]})
last = DummyOperator(task_id=option_stg_dataset_id[1]+'_stg_end')
t >> last
kdh_jr_curated_lims = AwsGlueJobOperator(task_id='kdh_jr_curated_lims', job_name='kdh_stg_curated', script_args={'--source_system': 'lims'})
kdh_jr_invoke_lims>>work_with_postgress_lims>>dynamic_raw_tasks_group_lims>>dynamic_stg_tasks_group_lims>>kdh_jr_curated_lims
#work_with_postgress_lims>>dynamic_raw_tasks_group_lims>>dynamic_stg_tasks_group_lims
# In[ ]:

How to import external library in QT Python?

I have an issue when importing external python library in QT app.
The program is crashing when I'm trying to import the canlib.
However, no import error is caught, only application communicate appear like this:
program finished with code -1
When I comment out the canlib import, program runs fine.
import os
from pathlib import Path
import sys
import random
from Connection import Connection
from PySide6.QtGui import QGuiApplication
from PySide6.QtQml import QQmlApplicationEngine
from PySide6.QtCore import QTimer
from canlib import kvadblib #here is an error source
if __name__ == "__main__":
app = QGuiApplication(sys.argv)
engine = QQmlApplicationEngine()
# Connection QT <---> Python
connection = Connection()
engine.rootContext().setContextProperty("connection", connection)
# End
# Hardware Init
db = kvadblib.Dbc(filename='battery_monitoring_app.dbc')
ch = communication.open_virtual_channel(1)
# End
engine.load(os.fspath(Path(__file__).resolve().parent / "qml/main.qml"))
if not engine.rootObjects():
sys.exit(-1)
### DO WHILE TRUE STUFF
def doStuff():
connection.testSignalVariable.emit(random.uniform(0.00, 2.00))
### END
timer = QTimer()
timer.timeout.connect(doStuff)
timer.start(100)
sys.exit(app.exec_())

Should single scrapy crawler process speed without limit be as fast as multiple crawler processes speed?

I have set CONCURRENT_REQUESTS,CONCURRENT_REQUESTS_PER_DOMAIN
and CONCURRENT_REQUESTS_PER_IP to be 1,000,000 but it just cannot be as fast as running multiple crawler processes, each processing part of a list of urls. Is that to be expected? In fact, if I run 8 crawler processes then the speed is about 8x faster.
I am not sure what I am configuring wrong. I would expect a single crawler process without any rate limit to run at the maximum speed possible, so it should be as fast as running 8 crawler processes.
import csv
import scrapy
import random
from urllib.parse import urlencode
from pprint import pprint
import requests
import re
import json
class XXXSpider(scrapy.Spider):
name = 'xxx'
def start_requests(self):
base_url = 'xxx'
base_query = 'yyy'
for s in self.words:
token = random.choice(self.tokens)
headers['token'] = token
user_agent = random.choice(user_agents)
headers['User-Agent'] = user_agent
params['q'] = base_query.format("${:s}".format(s))
encoded_params = urlencode(params)
xxx_url = "{:s}?{:s}".format(base_url, encoded_params)
yield scrapy.Request(url=xxx_url, headers=headers, callback=self.parse)
def parse(self, response):
data = json.loads(response.body)
multiple crawler processes
from scrapy.crawler import CrawlerProcess
import re
import requests
from multiprocessing import Pool
import csv
if __name__ == "__main__":
num_processes = 32
pool = Pool(num_processes)
tokens = pool.map(request_token, range(num_processes))
concurrency = 8
process = CrawlerProcess()
split_size = len(words) // concurrency
for i in range(0, len(words), split_size):
split = words[i: i+split_size]
process.crawl(XXXSpider, tokens=tokens, words=split)
process.start()

Pact: Error when trying to setup mock provider

I'm trying to write my first Pact-python test using pytest, Could someone please tell me what's wrong with my code?
import unittest
import requests
import json
import pytest
import atexit
from pact import Consumer, Provider
pact = Consumer('Consumer').has_pact_with(Provider('Provider'), host_name='mockservice', port=8080)
pact.start_service()
atexit.register(pact.stop_service)
class InterviewDetails(unittest.TestCase):
def test_candidate_report_api(self):
candidate_report_payload = {}
resp = requests.post("http://localhost:1234/users/",data=json.dumps(candidate_report_payload))
response = json.loads(resp.text)
return response
#pytest.mark.health1
def test_candidate_report(self):
expected = {}
(pact.given('Comment')
.upon_receiving('comment')
.with_request(method='POST', path="http://localhost:1234/users/", headers={})
.will_respond_with(200, body=expected))
with pact:
pact.setup()
result = self.test_candidate_report_api()
self.assertEqual(result, expected)
pact.verify()
The error from stacktrace:
AttributeError: module 'pact' has no attribute 'Like'
Can you please confirm you're using pact-python from https://github.com/pact-foundation/pact-python/ (and not pactman, a project that is not maintained by the Pact Foundation)?
It might be related to the way you have setup your test?
Here is an example project you can use for reference: https://github.com/pactflow/example-consumer-python/
Relevant test code:
"""pact test for product service client"""
import json
import logging
import os
import requests
from requests.auth import HTTPBasicAuth
import pytest
from pact import Consumer, Like, Provider, Term, Format
from src.consumer import ProductConsumer
log = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
print(Format().__dict__)
PACT_MOCK_HOST = 'localhost'
PACT_MOCK_PORT = 1234
PACT_DIR = os.path.dirname(os.path.realpath(__file__))
#pytest.fixture
def consumer():
return ProductConsumer(
'http://{host}:{port}'
.format(host=PACT_MOCK_HOST, port=PACT_MOCK_PORT)
)
#pytest.fixture(scope='session')
def pact(request):
pact = Consumer('pactflow-example-consumer-python').has_pact_with(
Provider('pactflow-example-provider-python'), host_name=PACT_MOCK_HOST, port=PACT_MOCK_PORT,
pact_dir="./pacts", log_dir="./logs")
try:
print('start service')
pact.start_service()
yield pact
finally:
print('stop service')
pact.stop_service()
def test_get_product(pact, consumer):
expected = {
'id': "27",
'name': 'Margharita',
'type': 'Pizza'
}
(pact
.given('a product with ID 10 exists')
.upon_receiving('a request to get a product')
.with_request('GET', '/product/10')
.will_respond_with(200, body=Like(expected)))
with pact:
user = consumer.get_product('10')
assert user.name == 'Margharita'

Nginx server with uwsgi,flask and sleekxmpp

I'm trying to handling some messages by using nginx server with uwsgi, flask and sleekxmpp.
Here is the code.
import ssl, json, logging, threading, time
from flask import Flask
from sleekxmpp import ClientXMPP
from sleekxmpp.exceptions import IqError, IqTimeout
smsg = """{
"version":1,
"type":"request",
"messageId":"xxyyzz",
"payload":
{
"deviceType":"ctlr",
"command":"getDeviceInfo"
}
}"""
class XMPP(ClientXMPP):
rosterList=[]
def __init__(self, jid, password):
ClientXMPP.__init__(self, jid, password)
self.add_event_handler('session_start', self.session_start, threaded = True)
self.add_event_handler('message', self.message, threaded=True)
self.ssl_version = ssl.PROTOCOL_SSLv23
def session_start(self, event):
self.send_presence(pshow='online')
try:
self.rosterList.append(self.get_roster())
except IqError as err:
print 'Error: %' % err.iq['error']['condition']
except IqTimeout:
print 'Error: Request time out'
def message(self, msg):
data = msg['body'][12:]
dictData = json.loads(data)
print data
if 'payload' in dictData.keys():
for lists in dictData['payload']['indexes']:
print lists
elif 'message' in dictData.keys():
print 'Request accepted'
app = Flask(__name__)
#logging.basicConfig(level = logging.DEBUG)
xmpp = XMPP('jid', 'password')
class XmppThread(threading.Thread):
def __init__(self):
threading.Thread.__init__(self)
def run(self):
if xmpp.connect(('server', '5222')):
xmpp.process(block=True)
xt = XmppThread()
xt.start()
#app.route('/')
def send():
xmpp.send_message(mto='receiver', mbody=smsg, mtype='chat')
return '<h1>Send</h1>'
I run the code by uwsgi with these options.
[uwsgi]
uid = uwsgi
gid = uwsgi
pidfile = /run/uwsgi/uwsgi.pid
emperor = /etc/uwsgi.d
stats = /run/uwsgi/stats.sock
chmod-socket = 660
emperor-tyrant = true
cap = setgid,setuid
[uwsgi]
plugin = python
http-socket = :8080
wsgi-file = /var/www/uwsgi/flask_uwsgi.py
callable = app
module = app
enable-threads = True
logto = /var/www/uwsgi/flask_uwsgi.log
When I run uwsgi by typing command, like '/usr/sbin/uwsgi --ini uwsgi.ini', it works well. I can send and recieve the messages. But, when I run this on CentOS 7's service, recieve is working, but send is not working.
Did i need some more options or missing something?

Resources