How do i resolve Async tornado fetching future error - asynchronous

I am trying to use AsyncHTTPClient to Get/Post from a local service that is already running at port 6000.
but i keep getting an error RuntimeError: Task got bad yield: <tornado.concurrent.Future object at 0x03C9B490>
ps. im using tornado 4.4.2, this error is fixed with the latest version but how do i do it in 4.4.2? Please help!
import tornado.ioloop
from tornado.httpclient import AsyncHTTPClient
import asyncio
import tornado
import urllib
from datetime import datetime
import time
async def client(url):
http_client = AsyncHTTPClient()
response = await http_client.fetch(url)
return response.body
async def main():
http_client = AsyncHTTPClient()
url = "http://localhost:6000/listings"
result = await client(url)
print(result)
if __name__ == "__main__":
result = asyncio.run(main())
print(result)
print(int(time.time() * 1e6))

You can't use asyncio with Tornado prior to version 5.0.
Use Tornado's own ioloop to run your program:
from tornado import ioloop
if __name__ == "__main__":
result = ioloop.IOLoop.current().run_sync(main)
UPDATE: The above solution will work fine, but, if you want, you can use asyncio with Tornado 4.x. See: tornado.platform.asyncio.AsyncIOMainLoop.

Related

Mocking SQLAlchemy test within Strawberry/FastAPI

I'm working on creating unit tests for a FastAPI, Strawberry, and SQLAlchemy setup. The current API is working and returning data correctly, but I cannot figure out how to mock the underlying database for unit tests. Would love any help/guidance to figure out this issue.
Below is the test code I"m currently working with, which I'm hoping will be enough to solve this issues but happy to post more if it helps. Running this currently will produce and output of ExecutionResult(data=None, errors=[GraphQLError("'NoneType' object is not subscriptable", locations=[SourceLocation(line=3, column=13)], path=['biomarkers'])], extensions={}), which seems to indicate that it is almost working but not quite reaching the mocked data within UnifiedAlchemyMagicMock.
import uuid
import unittest
from unittest import mock
import strawberry
from strawberry.extensions import Extension
from alchemy_mock.mocking import UnifiedAlchemyMagicMock
from app.api.api_v1 import api
from app.models import biomarker as biomarker_models
class MockSession:
'''Create Mock Session for Db'''
session = UnifiedAlchemyMagicMock(data=[
(
[mock.call.query(biomarker_models.Biomarker)],
[biomarker_models.Biomarker(
name="hello",
id=uuid.UUID('1a8d8791-946c-4fc4-8f5d-1b0c4f5ee2f5'),
quest_biomarker_code="quest"),
biomarker_models.Biomarker(
name="test",
id=uuid.uuid4(),
quest_biomarker_code="palazo")]
)
])
class MockRequest(Extension):
'''Mock Request state for context'''
def on_request_start(self):
self.execution_context.context["db"] = MockSession()
def on_request_end(self):
self.execution_context.context["db"].close()
class BioMarkerTestCase(unittest.TestCase):
'''Test Biomarker'''
def setUp(self) -> None:
self.strawberry_schema = strawberry.Schema(
query=api.Query,
mutation=api.Mutation,
extensions=[MockRequest],
types=api.QUERY_TYPE_LIST)
def test_query_get_all(self) -> None:
'''test biomarker query'''
query = """
query {
biomarkers {
id
name
whyItMatters
questBiomarkerCode
modeOfAcquisition
questRefRangeLow
questRefRangeHigh
optimalRangeLow
optimalRangeHigh
withinRangeRecommendations
belowRangeRecommendations
aboveRangeRecommendations
crossReferenceBiomarkers
notes
resourcesCited
measurementUnits
isCritical
resultDataType
critical{
id
biomarkerId
isPriority1
priority1Range
isPriority2
priority2Range
}
}
}
"""
query_result = query_result = self.strawberry_schema.execute_sync(query)
self.assertIsNotNone(query_result.data)
When using
query_result = query_result = self.strawberry_schema.execute_sync(query)
the context_value is defaulted to None, which I think is the cause of your errors.
try with:
query_result = query_result = self.strawberry_schema.execute_sync(query, context_value={})

Should single scrapy crawler process speed without limit be as fast as multiple crawler processes speed?

I have set CONCURRENT_REQUESTS,CONCURRENT_REQUESTS_PER_DOMAIN
and CONCURRENT_REQUESTS_PER_IP to be 1,000,000 but it just cannot be as fast as running multiple crawler processes, each processing part of a list of urls. Is that to be expected? In fact, if I run 8 crawler processes then the speed is about 8x faster.
I am not sure what I am configuring wrong. I would expect a single crawler process without any rate limit to run at the maximum speed possible, so it should be as fast as running 8 crawler processes.
import csv
import scrapy
import random
from urllib.parse import urlencode
from pprint import pprint
import requests
import re
import json
class XXXSpider(scrapy.Spider):
name = 'xxx'
def start_requests(self):
base_url = 'xxx'
base_query = 'yyy'
for s in self.words:
token = random.choice(self.tokens)
headers['token'] = token
user_agent = random.choice(user_agents)
headers['User-Agent'] = user_agent
params['q'] = base_query.format("${:s}".format(s))
encoded_params = urlencode(params)
xxx_url = "{:s}?{:s}".format(base_url, encoded_params)
yield scrapy.Request(url=xxx_url, headers=headers, callback=self.parse)
def parse(self, response):
data = json.loads(response.body)
multiple crawler processes
from scrapy.crawler import CrawlerProcess
import re
import requests
from multiprocessing import Pool
import csv
if __name__ == "__main__":
num_processes = 32
pool = Pool(num_processes)
tokens = pool.map(request_token, range(num_processes))
concurrency = 8
process = CrawlerProcess()
split_size = len(words) // concurrency
for i in range(0, len(words), split_size):
split = words[i: i+split_size]
process.crawl(XXXSpider, tokens=tokens, words=split)
process.start()

Pact: Error when trying to setup mock provider

I'm trying to write my first Pact-python test using pytest, Could someone please tell me what's wrong with my code?
import unittest
import requests
import json
import pytest
import atexit
from pact import Consumer, Provider
pact = Consumer('Consumer').has_pact_with(Provider('Provider'), host_name='mockservice', port=8080)
pact.start_service()
atexit.register(pact.stop_service)
class InterviewDetails(unittest.TestCase):
def test_candidate_report_api(self):
candidate_report_payload = {}
resp = requests.post("http://localhost:1234/users/",data=json.dumps(candidate_report_payload))
response = json.loads(resp.text)
return response
#pytest.mark.health1
def test_candidate_report(self):
expected = {}
(pact.given('Comment')
.upon_receiving('comment')
.with_request(method='POST', path="http://localhost:1234/users/", headers={})
.will_respond_with(200, body=expected))
with pact:
pact.setup()
result = self.test_candidate_report_api()
self.assertEqual(result, expected)
pact.verify()
The error from stacktrace:
AttributeError: module 'pact' has no attribute 'Like'
Can you please confirm you're using pact-python from https://github.com/pact-foundation/pact-python/ (and not pactman, a project that is not maintained by the Pact Foundation)?
It might be related to the way you have setup your test?
Here is an example project you can use for reference: https://github.com/pactflow/example-consumer-python/
Relevant test code:
"""pact test for product service client"""
import json
import logging
import os
import requests
from requests.auth import HTTPBasicAuth
import pytest
from pact import Consumer, Like, Provider, Term, Format
from src.consumer import ProductConsumer
log = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
print(Format().__dict__)
PACT_MOCK_HOST = 'localhost'
PACT_MOCK_PORT = 1234
PACT_DIR = os.path.dirname(os.path.realpath(__file__))
#pytest.fixture
def consumer():
return ProductConsumer(
'http://{host}:{port}'
.format(host=PACT_MOCK_HOST, port=PACT_MOCK_PORT)
)
#pytest.fixture(scope='session')
def pact(request):
pact = Consumer('pactflow-example-consumer-python').has_pact_with(
Provider('pactflow-example-provider-python'), host_name=PACT_MOCK_HOST, port=PACT_MOCK_PORT,
pact_dir="./pacts", log_dir="./logs")
try:
print('start service')
pact.start_service()
yield pact
finally:
print('stop service')
pact.stop_service()
def test_get_product(pact, consumer):
expected = {
'id': "27",
'name': 'Margharita',
'type': 'Pizza'
}
(pact
.given('a product with ID 10 exists')
.upon_receiving('a request to get a product')
.with_request('GET', '/product/10')
.will_respond_with(200, body=Like(expected)))
with pact:
user = consumer.get_product('10')
assert user.name == 'Margharita'

Automate pdf downloads using python

I want to automatically download new pdf files as they are published on a collection of online libraries. I tried using the following python code but found it doesn't work with urllib 3. Anyone know how I can replicate it?
import urllib2
def main():
download_file("http://mensenhandel.nl/files/pdftest2.pdf")
def download_file(download_url):
response = urllib2.urlopen(download_url)
file = open("document.pdf", 'wb')
file.write(response.read())
file.close()
print("Completed")
if __name__ == "__main__":
main()
Your solution is good. I just did a few minor adjustments. The first thing is you need to use the content method of the response. Below is the code -
import requests
def main():
download_file("http://mensenhandel.nl/files/pdftest2.pdf")
def download_file(download_url):
response = requests.get(download_url, stream = True)
with open("document.pdf", 'wb') as file:
file.write(response.content)
file.close()
print("Completed")
if __name__ == "__main__":
main()

Issue while adding a job in APScheduler(Python 3.6)

I am trying to schedule my python script (merchant.py) once a week using APScheduler
For this purpose, I have installed the package and when I run the following example code with cronjob option-
from datetime import datetime
import time
import os
from apscheduler.schedulers.background import BackgroundScheduler
def tick():
print('Tick! The time is: %s' % datetime.now())
if __name__ == '__main__':
scheduler = BackgroundScheduler()
scheduler.add_job(tick, 'cron', day_of_week = 'mon', hour = 11)
scheduler.start()
print('Press Ctrl+{0} to exit'.format('Break' if os.name == 'nt' else 'C'))
try:
while True:
time.sleep(2)
except (KeyboardInterrupt, SystemExit):
scheduler.shutdown()
I am getting following error-
KeyError: 'cron'
LookupError: No trigger by the name "cron" was found
Could you please correct me what I am doing wrong here?
I found one solution so sharing for others-
from apscheduler.schedulers.blocking import BlockingScheduler
from apscheduler.triggers.combining import OrTrigger
from apscheduler.triggers.cron import CronTrigger
import time
sched = BlockingScheduler()
trigger = OrTrigger([
CronTrigger(day_of_week='thu', hour='07', minute='28')
])
sched.add_job(YOUR_JOB, trigger)
sched.start()
try:
while True:
time.sleep(2)
except (KeyboardInterrupt, SystemExit):
sched.shutdown()

Resources