import asyncio
from fastapi import APIRouter, WebSocket
from app.sherlock import main
import asyncio
import billiard as multiprocessing
from queue import Empty
from celery import Celery
from fastapi import WebSocket, WebSocketDisconnect
router = APIRouter()
#router.websocket_route("/{username}")
async def handles(websocket: WebSocket):
username = websocket.path_params["username"]
await websocket.accept()
q = multiprocessing.Queue()
p = multiprocessing.Process(target=main, args=(q, username))
p.start()
while True:
await asyncio.sleep(0)
try:
q_result = q.get(block=False)
except Empty:
q_result = None
if q_result:
try:
await websocket.send_json(q_result)
except WebSocketDisconnect:
p.terminate()
break
if not p.is_alive():
try:
await websocket.send_json(q_result)
await websocket.close()
except WebSocketDisconnect:
p.terminate()
finally:
break
So this is what I have so far, a user inputs a username, which then the backend picks up, makes about 400 requests to different websites, and returns data. I used multithreading for that.
My issue is that when I host the site and have multiple users on it, the website drastically slows down. My thought is that I may need to use Celery for this but dont really know where to start. Does anyone have any ideas? let me know!
Related
I have written a web scraper which needs to scrape few hundred pages asynchronously in Playwright-Python after login.
I've came across aiometer from #Florimond Manca (https://github.com/florimondmanca/aiometer) to limit requests in the main async function - this works well.
The problem I'm having at the moment, is closing the pages after they've been scraped. The async function just increases the amount of pages load - as it should - but it increases memory consumption significantly if few hundred are loaded.
In the function I'm opening a browser context and passing that to each async scraping request per page, the rationale being that it decreases memory overhead and it conserves the state from my login function (implemented in my main script - not shown).
How can I close the pages after being scraped (in the scrape function)?
import asyncio
import functools
from playwright.async_api import async_playwright
from bs4 import BeautifulSoup
import pandas as pd
import aiometer
urls = [
"https://scrapethissite.com/pages/ajax-javascript/#2015",
"https://scrapethissite.com/pages/ajax-javascript/#2014",
"https://scrapethissite.com/pages/ajax-javascript/#2013",
"https://scrapethissite.com/pages/ajax-javascript/#2012",
"https://scrapethissite.com/pages/ajax-javascript/#2011",
"https://scrapethissite.com/pages/ajax-javascript/#2010"
]
async def scrape(context, url):
page = await context.new_page()
await page.goto(url)
await page.wait_for_load_state(state="networkidle")
await page.wait_for_timeout(1000)
#Getting results off the page
html = await page.content()
soup = BeautifulSoup(html, "lxml")
tables = soup.find_all('table')
dfs = pd.read_html(str(tables))
df=dfs[0]
print("Dataframe in page "+url+ " scraped")
page.close
return df
async def main(urls):
async with async_playwright() as p:
browser = await p.chromium.launch(headless=False)
context = await browser.new_context()
master_results = pd.DataFrame()
async with aiometer.amap(
functools.partial(scrape, context),
urls,
max_at_once=5, # Limit maximum number of concurrently running tasks.
max_per_second=3, # Limit request rate to not overload the server.
) as results:
async for data in results:
print(data)
master_results = pd.concat([master_results,data], ignore_index=True)
print(master_results)
asyncio.run(main(urls))
I've tried the await keyword before page.close() or context.close() throws an error: "TypeError: object method can't be used in 'await' expression".
After reading a few pages, even into the Playwright documentation bug trackers on github: https://github.com/microsoft/playwright/issues/10476 , I found the problem:
I forgot to add parentheses in my page.close function.
page.close()
So simple - but yet took me hours to get to. Probably part of learning to code.
How to handle path parametes with hyphen(-) in FastAPI, as python does not allow hyphen in identifiers?
You can use alias in your definitions. It’s all documented here
As stated in the comments below, if the links stops working here is the code:
from typing import Union
from fastapi import FastAPI, Query
app = FastAPI()
#app.get("/items/")
async def read_items(q: Union[str, None] = Query(default=None, alias="item-query")):
results = {"items": [{"item_id": "Foo"}, {"item_id": "Bar"}]}
if q:
results.update({"q": q})
return results
I am building a browser game where every user has 4 types of ressources and each users produce more ressources based on the level of their farms.
What I am trying to do, is whenever a given user is logged in, I want to recalculate it's current ressources whenever he is refreshing the page or performing any action.
Seems the middleware is the right tool for my need but I am a bit confused on the implementation with my current architecture (multiple routers). What would be the cleanest way to call a function to perform ressources recalculation before performing any other API calls?
This is what I have tried so far (example middleware):
app.py (without middleware):
from fastapi import FastAPI, Depends, Request
from src.api.v1.village import village_router
from src.api.v1.auth import auth_router
from src.api.v1.admin import admin_router
from src.core.auth import get_current_user
from src.core.config import *
def create_app() -> FastAPI:
root_app = FastAPI()
root_app.include_router(
auth_router,
prefix="/api/v1",
tags=["auth"],
)
root_app.include_router(
admin_router,
prefix="/api/v1",
tags=["admin"],
dependencies=[Depends(get_current_user)],
)
root_app.include_router(
village_router,
prefix="/api/v1",
tags=["village"],
)
return root_app
I then added an helloworld middleware and add the get_current_user as a dependency because a user must be logged_in to perform the calculations.
app.py (with middleware):
from fastapi import FastAPI, Depends, Request
from src.api.v1.village import village_router
from src.api.v1.auth import auth_router
from src.api.v1.admin import admin_router
from src.core.auth import get_current_user
from src.core.config import *
import time
def create_app() -> FastAPI:
root_app = FastAPI()
root_app.include_router(
auth_router,
prefix="/api/v1",
tags=["auth"],
)
root_app.include_router(
admin_router,
prefix="/api/v1",
tags=["admin"],
dependencies=[Depends(get_current_user)],
)
root_app.include_router(
village_router,
prefix="/api/v1",
tags=["village"],
)
#root_app.middleware("http")
async def add_process_time_header(
request: Request, call_next, current_user=Depends(get_current_user)
):
start_time = time.time()
response = await call_next(request)
process_time = time.time() - start_time
response.headers["X-Process-Time"] = str(process_time)
print("middleware call")
return response
return root_app
Seems the dependency is ignored because the middleware is called even when I am not logged in, which is not the case for my protected_routes (I am getting a 401 error on my routes if I a not logged in).
async def get_current_user(
session=Depends(get_db), token: str = Depends(oauth2_scheme)
) -> UserAuth:
try:
payload = jwt.decode(token, SECRET_KEY, algorithms=[AUTH_TOKEN_ALGO])
email: str = payload.get("email")
user_id: str = payload.get("user_id")
if email is None:
raise ValueError("A very specific bad thing happened.")
token_data = UserJWTToken(user_id=user_id, email=email)
except jwt.PyJWTError:
raise ValueError("A very specific bad thing happened.")
user = get_user_by_email(session, token_data.email)
if user is None:
raise ValueError("A very specific bad thing happened.")
return user
You can make use of the Global Dependencies. Here is one example that may help you in this situation
from fastapi import Depends, FastAPI, Request
def get_db_session():
print("Calling 'get_db_session(...)'")
return "Some Value"
def get_current_user(session=Depends(get_db_session)):
print("Calling 'get_current_user(...)'")
return session
def recalculate_resources(request: Request, current_user=Depends(get_current_user)):
print("calling 'recalculate_resources(...)'")
request.state.foo = current_user
app = FastAPI(dependencies=[Depends(recalculate_resources)])
#app.get("/")
async def root(request: Request):
return {"foo_from_dependency": request.state.foo}
So it can actually show the info I want it to in the terminal. But when I prompt it to send it as a discord message it appears to be attempting to send a blank message. It's probably something stupid, but thank you for looking. The language is Python.
import os
import discord
import requests
import json
import pprint
client = discord.Client()
def get_time():
response = requests.get("http://api.timezonedb.com/v2.1/get-time-zone?key=W9BJQ3QMGG69&format=json&by=position&lat=37.9838&lng=23.7275")
return pprint.pprint(response.json())
#client.event
async def on_ready():
print('We have logged in as {0.user}'.format(client))
#client.event
async def on_message(message):
if message.author == client.user:
return
if message.content.startswith('$petertest'):
clock = get_time()
await message.channel.send(clock)
client.run(os.environ['TOKEN'])
You are using the pprint module to print the data to the console itself. That is the issue there
Changing your code to simply return the data will fix the error.
return response.json()
If you want to send the formatted json data to discord, you can use json.dumps:
if message.content.startswith('test'):
clock = get_time()
clock = json.dumps(clock, indent=4)
await message.channel.send(clock)
I want develop a web-socket watcher in python in such a way that when I send sth then it should wait until the response is received (sort of like blocking socket programming) I know it is weird, basically I want to make a command line python 3.6 tool that can communicate with the server WHILE KEEPING THE SAME CONNECTION LIVE for all the commands coming from user.
I can see that the below snippet is pretty typical using python 3.6.
import asyncio
import websockets
import json
import traceback
async def call_api(msg):
async with websockets.connect('wss://echo.websocket.org') as websocket:
await websocket.send(msg)
while websocket.open:
response = await websocket.recv()
return (response)
print(asyncio.get_event_loop().run_until_complete(call_api("test 1")))
print(asyncio.get_event_loop().run_until_complete(call_api("test 2")))
but this will creates a new ws connection for every command which defeats the purpose. One might say, you gotta use the async handler but I don't know how to synchronize the ws response with the user input from command prompt.
I am thinking if I could make the async coroutine (call_api) work like a generator where it has yield statement instead of return then I probably could do sth like beow:
async def call_api(msg):
async with websockets.connect('wss://echo.websocket.org') as websocket:
await websocket.send(msg)
while websocket.open:
response = await websocket.recv()
msg = yield (response)
generator = call_api("cmd1")
cmd = input(">>>")
while cmd != 'exit'
result = next(generator.send(cmd))
print(result)
cmd = input(">>>")
Please let me know your valuable comments.
Thank you
This can be achieved using an asynchronous generator (PEP 525).
Here is a working example:
import random
import asyncio
async def accumulate(x=0):
while True:
x += yield x
await asyncio.sleep(1)
async def main():
# Initialize
agen = accumulate()
await agen.asend(None)
# Accumulate random values
while True:
value = random.randrange(5)
print(await agen.asend(value))
asyncio.run(main())