import asyncio
import aiohttp
from time import perf_counter
import csv
path = "*******************"
domains = []
total_count=0
with open(path, 'r') as file:
csvreader = csv.reader(file)
for row in csvreader:
try:
website = row[4].split("//")[-1].split("www.")[-1].split('/')[0]
if website == "":
continue
domains.append(website)
except:
continue
sample = domains[0:50]
async def fetch(s, body):
async with s.post('https://****************', json=body) as r:
if r.status!= 200:
pass
enrich_response = await r.json()
#print(enrich_response)
employees = enrich_response['employees']
for employee in employees:
if(employee['job_title'] == "Owner"):
print(employee)
print("************************************************")
global total_count
total_count += 1
print("Total Count:", total_count)
continue
elif(employee['job_title'] == "CEO"):
print(employee)
print("***************************************************")
total_count+=1
print("Total Count:", total_count)
continue
else:
continue
async def fetch_all(s,bodies):
tasks = []
for body in bodies:
task = asyncio.create_task(fetch(s, body))
tasks.append(task)
res = await asyncio.gather(*tasks)
return res
async def main():
# apikeys = list(apikeysone.keys.values())
bodies = []
for domain in sample:
body = {
"api_key": "********************************",
"domain" : "{}".format(domain)
}
bodies.append(body)
async with aiohttp.ClientSession() as session:
data = await fetch_all(session, bodies)
print(data[0])
if __name__ == '__main__':
start = perf_counter()
try:
asyncio.run(main())
except Exception as e:
print(e)
pass
stop = perf_counter()
print("Time taken:", stop - start)
Hi!
I'm trying to connect to a scraping service provider using asyncio, instead of simple synchronous api calls.
But I get a TimeOut error. How could I use exception handling to wait a few seconds before retrying it once again? Or just skipping that task if it fails?
Thank you in advance fellow coder!
Tried adding to some places continue/pass
Try exploring asyncio.wait_for() function. It takes an awaitable and a timeout value. If task isn't completed before timeout value, it raises asyncio.exceptions.TimeoutError which you can handle in any way you want in except clause.
A typical example (from Python doc) is as follows:
async def eternity():
# Sleep for one hour
await asyncio.sleep(3600)
print('yay!')
async def main():
# Wait for at most 1 second
try:
await asyncio.wait_for(eternity(), timeout=1.0)
except TimeoutError:
print('timeout!')
asyncio.run(main())
# Expected output:
#
# timeout!
Related
I have an async function which computes a list of urls asynchronously, and I want to use parameterize to generate an async test for each of those urls , which will assert the status code.
What I'm trying to do is something like this:
#pytest.fixture async def compute_urls():
urls = await compute_urls_helper()
return urls
#pytest.mark.asyncio
#pytest.mark.parameterize('url',await compute_urls())
async def test_url(compute_urls,url):
resp = await get_url(url)
assert resp.status_code == 200
I know that using 'await' inside the parameterize is not possible, so I would love to hear suggestions for this kind of operation.
You can use asyncio.run to create an event loop just to compute the params:
import asyncio
from unittest.mock import AsyncMock
import pytest
async def compute_urls_helper():
return ["stackoverflow.com", "jooj.com"]
async def get_url(url: str) -> AsyncMock:
return AsyncMock(status_code=200)
#pytest.mark.asyncio
#pytest.mark.parametrize("url", asyncio.run(compute_urls_helper()))
async def test_url(url):
resp = await get_url(url)
assert resp.status_code == 200
However I wouldn't recommend to use this method frequently, because as stated in the docs:
This function (asyncio.run) always creates a new event loop and closes it at the end. It should be used as a main entry point for asyncio programs, and should ideally only be called once.
Therefore, you can create a session scoped fixture so that you can reuse a fixture containing each url object like so:
import asyncio
from unittest.mock import AsyncMock
import pytest
async def compute_urls_helper():
return ["google.com", "jooj.com"]
#Can be moved to conftest.py in the root of the project
#pytest.fixture(scope="session", params=asyncio.run(compute_urls_helper()))
def url(request):
return request.param
async def get_url(url: str) -> AsyncMock:
return AsyncMock(status_code=200)
#pytest.mark.asyncio
async def test_url(url):
resp = await get_url(url)
assert resp.status_code == 200
I am using asyncio to send a bunch of requests to a web service concurrently. I cannot use aiohttp.ClientSession becuase the target service provides a client package with a lot of proprietary functions. Even if I can hack the authentication process to establish a session, I still have to implement those client's functions. I implemented my own Async Context Manager as in the following code snippet. It seems fine, except apparently the requests are executed sequentially. It takes 22 seconds where a concurrent run costs less than 3 seconds.
import aiohttp, asyncio, requests, time, json
start_time = time.time()
class MyRequests:
def __init__(self, requests):
self.requests = requests
async def __aenter__(self) -> "MyRequests":
return self
async def __aexit__(
self, exc_type, exc, tb
) -> None:
pass
def get(self, url) -> "MyResponse":
return MyResponse(self.requests.get(url))
class MyResponse:
def __init__(self, resp) -> None:
self.response = resp
self.url = self.response.url
async def __aenter__(self) -> "MyResponse":
return self
async def __aexit__(self, exc_type, exc, tb) -> None:
pass
async def get_pokemon(session, url):
async with session.get(url) as resp:
return resp.url
async def main():
async with MyRequests(requests) as session:
tasks = []
for number in range(1, 50):
url = f'https://pokeapi.co/api/v2/pokemon/{number}'
tasks.append(asyncio.create_task(get_pokemon(session, url)))
original_pokemon = await asyncio.gather(*tasks)
for pokemon in original_pokemon:
print(pokemon)
asyncio.run(main())
print("--- %s seconds ---" % (time.time() - start_time))
I am trying to setup pytest with httpx.AsyncClient and sqlalchemy AsyncSession with FastAPI. Everything practically mimics the tests in FastAPI Fullstack repo, except for async stuff.
No issues with CRUD unit tests. The issue arises when running API tests using AsyncClient from httpx lib.
The issue is, any request made by client only has access to the users (in my case) created before initializing (setting up) the client fixture.
My pytest conftest.py setup is like this:
from typing import Dict, Generator, Callable
import asyncio
from fastapi import FastAPI
import pytest
# from sqlalchemy.orm import Session
from sqlalchemy.ext.asyncio import AsyncSession
from httpx import AsyncClient
import os
import warnings
import sqlalchemy as sa
from alembic.config import Config
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.ext.asyncio import create_async_engine
from sqlalchemy.orm import sessionmaker
async def get_test_session() -> Generator:
test_engine = create_async_engine(
settings.SQLALCHEMY_DATABASE_URI + '_test',
echo=False,
)
# expire_on_commit=False will prevent attributes from being expired
# after commit.
async_sess = sessionmaker(
test_engine, expire_on_commit=False, class_=AsyncSession
)
async with async_sess() as sess, sess.begin():
yield sess
#pytest.fixture(scope="session")
async def async_session() -> Generator:
test_engine = create_async_engine(
settings.SQLALCHEMY_DATABASE_URI + '_test',
echo=False,
pool_size=20, max_overflow=0
)
# expire_on_commit=False will prevent attributes from being expired
# after commit.
async_sess = sessionmaker(
test_engine, expire_on_commit=False, class_=AsyncSession
)
yield async_sess
#pytest.fixture(scope="session")
async def insert_initial_data(async_session:Callable):
async with async_session() as session, session.begin():
# insert first superuser - basic CRUD ops to insert data in test db
await insert_first_superuser(session)
# insert test.superuser#example.com
await insert_first_test_user(session)
# inserts test.user#example.com
#pytest.fixture(scope='session')
def app(insert_initial_data) -> FastAPI:
return FastAPI()
#pytest.fixture(scope='session')
async def client(app: FastAPI) -> Generator:
from app.api.deps import get_session
app.dependency_overrides[get_session] = get_test_session
async with AsyncClient(
app=app, base_url="http://test",
) as ac:
yield ac
# reset dependencies
app.dependency_overrides = {}
So in this case, only the superuser test.superuser#example.com and normal user test.user#example.com are available during running API tests. e.g., code below is able to fetch the access token just fine:
async def authentication_token_from_email(
client: AsyncClient, session: AsyncSession,
) -> Dict[str, str]:
"""
Return a valid token for the user with given email.
"""
email = 'test.user#example.com'
password = 'test.user.password'
user = await crud.user.get_by_email(session, email=email)
assert user is not None
data = {"username": email, "password": password}
response = await client.post(f"{settings.API_V1_STR}/auth/access-token",
data=data)
auth_token = response.cookies.get('access_token')
assert auth_token is not None
return auth_token
but, the modified code below doesn't - here I try to insert new user, and then log in to get access token.
async def authentication_token_from_email(
client: AsyncClient, session: AsyncSession,
) -> Dict[str, str]:
"""
Return a valid token for the user with given email.
If the user doesn't exist it is created first.
"""
email = random_email()
password = random_lower_string()
user = await crud.user.get_by_email(session, email=email)
if not user:
user_in_create = UserCreate(email=email,
password=password)
user = await crud.user.create(session, obj_in=user_in_create)
else:
user_in_update = UserUpdate(password=password)
user = await crud.user.update(session, db_obj=user, obj_in=user_in_update)
assert user is not None
# works fine up to this point, user inserted successfully
# now try to send http request to fetch token, and user is not found in the db
data = {"username": email, "password": password}
response = await client.post(f"{settings.API_V1_STR}/auth/access-token",
data=data)
auth_token = response.cookies.get('access_token')
# returns None.
return auth_token
What is going on here ? Appreciate any help!
Turns out all I needed to do is, for reason I do not understand, is to define the FastAPI dependency override function inside the client fixture:
before
async def get_test_session() -> Generator:
test_engine = create_async_engine(
settings.SQLALCHEMY_DATABASE_URI + '_test',
echo=False,
)
# expire_on_commit=False will prevent attributes from being expired
# after commit.
async_sess = sessionmaker(
test_engine, expire_on_commit=False, class_=AsyncSession
)
async with async_sess() as sess, sess.begin():
yield sess
#pytest.fixture(scope='session')
async def client(app: FastAPI) -> Generator:
from app.api.deps import get_session
app.dependency_overrides[get_session] = get_test_session
async with AsyncClient(
app=app, base_url="http://test",
) as ac:
yield ac
# reset dependencies
app.dependency_overrides = {}
after
#pytest.fixture(scope="function")
async def session(async_session) -> Generator:
async with async_session() as sess, sess.begin():
yield sess
#pytest.fixture
async def client(app: FastAPI, session:AsyncSession) -> Generator:
from app.api.deps import get_session
# this needs to be defined inside this fixture
# this is generate that yields session retrieved from `session` fixture
def get_sess():
yield session
app.dependency_overrides[get_session] = get_sess
async with AsyncClient(
app=app, base_url="http://test",
) as ac:
yield ac
app.dependency_overrides = {}
I'd appreciate any explanation of this behavior. Thanks!
I'm using the latest version (1.0.2) of aiohttp with python3.5 I have the following server code
import asyncio
from aiohttp.web import Application, Response, StreamResponse, run_app
async def long(request):
resp = StreamResponse()
name = request.match_info.get('name', 'Anonymous')
resp.content_type = 'text/plain'
for _ in range(1000000):
answer = ('Hello world\n').encode('utf8')
await resp.prepare(request)
resp.write(answer)
await resp.write_eof()
return resp
async def init(loop):
app = Application(loop=loop)
app.router.add_get('/long', long)
return app
loop = asyncio.get_event_loop()
app = loop.run_until_complete(init(loop))
run_app(app)
If I then run two curl requests curl http://localhost:8080/long in different terminals, only the first one will receive data
My thought was that using asyncio you could, in a monothreaded code, start serving other response, while an other is waiting for I/O
Most of the code I found online about concurent+asyncio only talks about the client side, but not server side
Am I missing something or is my comprehension of how asyncio works is flawed ?
Just push await resp.drain() after resp.write() for giving aiohttp a chance to switch between tasks:
import asyncio
from aiohttp.web import Application, Response, StreamResponse, run_app
async def long(request):
resp = StreamResponse()
name = request.match_info.get('name', 'Anonymous')
resp.content_type = 'text/plain'
await resp.prepare(request) # prepare should be called once
for _ in range(1000000):
answer = ('Hello world\n').encode('utf8')
resp.write(answer)
await resp.drain() # switch point
await resp.write_eof()
return resp
async def init(loop):
app = Application(loop=loop)
app.router.add_get('/long', long)
return app
loop = asyncio.get_event_loop()
app = loop.run_until_complete(init(loop))
run_app(app)
I'm querying Graphite's index.json to get all the metrics. Is there an option to pass a root metric and get only a sub-tree? Something like:
http://<my.graphite>/metrics/index.json?query="my.metric.subtree"
That is not supported.
What you can do however is call /metrics/find recursively (call it again for each branch encountered)
Something like this:
#!/usr/bin/python
from __future__ import print_function
import requests
import json
import argparse
try:
from Queue import Queue
except:
from queue import Queue
from threading import Thread, Lock
import sys
import unicodedata
outLock = Lock()
def output(msg):
with outLock:
print(msg)
sys.stdout.flush()
class Walker(Thread):
def __init__(self, queue, url, user=None, password=None, seriesFrom=None, depth=None):
Thread.__init__(self)
self.queue = queue
self.url = url
self.user = user
self.password = password
self.seriesFrom = seriesFrom
self.depth = depth
def run(self):
while True:
branch = self.queue.get()
try:
branch[0].encode('ascii')
except Exception as e:
with outLock:
sys.stderr.write('found branch with invalid characters: ')
sys.stderr.write(unicodedata.normalize('NFKD', branch[0]).encode('utf-8','xmlcharrefreplace'))
sys.stderr.write('\n')
else:
if self.depth is not None and branch[1] == self.depth:
output(branch[0])
else:
self.walk(branch[0], branch[1])
self.queue.task_done()
def walk(self, prefix, depth):
payload = {
"query": (prefix + ".*") if prefix else '*',
"format": "treejson"
}
if self.seriesFrom:
payload['from']=self.seriesFrom
auth = None
if self.user is not None:
auth = (self.user, self.password)
r = requests.get(
self.url + '/metrics/find',
params=payload,
auth=auth,
)
if r.status_code != 200:
sys.stderr.write(r.text+'\n')
raise Exception(
'Error walking finding series: branch={branch} reason={reason}'
.format(branch=unicodedata.normalize('NFKD', prefix).encode('ascii','replace'), reason=r.reason)
)
metrics = r.json()
for metric in metrics:
try:
if metric['leaf']:
output(metric['id'])
else:
self.queue.put((metric['id'], depth+1))
except Exception as e:
output(metric)
raise e
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--url", help="Graphite URL", required=True)
parser.add_argument("--prefix", help="Metrics prefix", required=False, default='')
parser.add_argument("--user", help="Basic Auth username", required=False)
parser.add_argument("--password", help="Basic Auth password", required=False)
parser.add_argument("--concurrency", help="concurrency", default=8, required=False, type=int)
parser.add_argument("--from", dest='seriesFrom', help="only get series that have been active since this time", required=False)
parser.add_argument("--depth", type=int, help="maximum depth to traverse. If set, the branches at the depth will be printed", required=False)
args = parser.parse_args()
url = args.url
prefix = args.prefix
user = args.user
password = args.password
concurrency = args.concurrency
seriesFrom = args.seriesFrom
depth = args.depth
queue = Queue()
for x in range(concurrency):
worker = Walker(queue, url, user, password, seriesFrom, depth)
worker.daemon = True
worker.start()
queue.put((prefix, 0))
queue.join()
Note: this code comes from: https://github.com/grafana/cloud-graphite-scripts/blob/master/query/walk_metrics.py