Trouble in connecting scrapy with Firebase - firebase

I'm new to scrapy and want to connect it to firestore so that I may save my results there.
Here is my settings.py (just mentioning related code)
ITEM_PIPELINES = {
'flights_i_spider.pipelines.FirebasePipeline': 300,
}
Is there anything else related to firebase that needs to be included to the settings.py file? because ITEM_PIPELINES was just added to settings.py.
here is my pipelines.py:
import firebase_admin
from firebase_admin import credentials, firestore
from google.cloud import firestore
class FirebasePipeline:
def __init__(self):
cred = credentials.Certificate('./service_key.json')
firebase_admin.initialize_app(cred,
{
'databaseURL': 'https://abcdefghijklmno.firebaseio.com'
}
)
def process_item(self, item, spider):
db = firestore.Client()
doc_ref = db.collection('output_data')
doc_ref.add(item.to_dict())
# doc_ref.insert(dict(item))
return item
Please don't mention this https://github.com/skhaz/scrapy-firebase because this did not work for me.

Related

pytest with httpx.AsyncClient cannot find newly created database records

I am trying to setup pytest with httpx.AsyncClient and sqlalchemy AsyncSession with FastAPI. Everything practically mimics the tests in FastAPI Fullstack repo, except for async stuff.
No issues with CRUD unit tests. The issue arises when running API tests using AsyncClient from httpx lib.
The issue is, any request made by client only has access to the users (in my case) created before initializing (setting up) the client fixture.
My pytest conftest.py setup is like this:
from typing import Dict, Generator, Callable
import asyncio
from fastapi import FastAPI
import pytest
# from sqlalchemy.orm import Session
from sqlalchemy.ext.asyncio import AsyncSession
from httpx import AsyncClient
import os
import warnings
import sqlalchemy as sa
from alembic.config import Config
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.ext.asyncio import create_async_engine
from sqlalchemy.orm import sessionmaker
async def get_test_session() -> Generator:
test_engine = create_async_engine(
settings.SQLALCHEMY_DATABASE_URI + '_test',
echo=False,
)
# expire_on_commit=False will prevent attributes from being expired
# after commit.
async_sess = sessionmaker(
test_engine, expire_on_commit=False, class_=AsyncSession
)
async with async_sess() as sess, sess.begin():
yield sess
#pytest.fixture(scope="session")
async def async_session() -> Generator:
test_engine = create_async_engine(
settings.SQLALCHEMY_DATABASE_URI + '_test',
echo=False,
pool_size=20, max_overflow=0
)
# expire_on_commit=False will prevent attributes from being expired
# after commit.
async_sess = sessionmaker(
test_engine, expire_on_commit=False, class_=AsyncSession
)
yield async_sess
#pytest.fixture(scope="session")
async def insert_initial_data(async_session:Callable):
async with async_session() as session, session.begin():
# insert first superuser - basic CRUD ops to insert data in test db
await insert_first_superuser(session)
# insert test.superuser#example.com
await insert_first_test_user(session)
# inserts test.user#example.com
#pytest.fixture(scope='session')
def app(insert_initial_data) -> FastAPI:
return FastAPI()
#pytest.fixture(scope='session')
async def client(app: FastAPI) -> Generator:
from app.api.deps import get_session
app.dependency_overrides[get_session] = get_test_session
async with AsyncClient(
app=app, base_url="http://test",
) as ac:
yield ac
# reset dependencies
app.dependency_overrides = {}
So in this case, only the superuser test.superuser#example.com and normal user test.user#example.com are available during running API tests. e.g., code below is able to fetch the access token just fine:
async def authentication_token_from_email(
client: AsyncClient, session: AsyncSession,
) -> Dict[str, str]:
"""
Return a valid token for the user with given email.
"""
email = 'test.user#example.com'
password = 'test.user.password'
user = await crud.user.get_by_email(session, email=email)
assert user is not None
data = {"username": email, "password": password}
response = await client.post(f"{settings.API_V1_STR}/auth/access-token",
data=data)
auth_token = response.cookies.get('access_token')
assert auth_token is not None
return auth_token
but, the modified code below doesn't - here I try to insert new user, and then log in to get access token.
async def authentication_token_from_email(
client: AsyncClient, session: AsyncSession,
) -> Dict[str, str]:
"""
Return a valid token for the user with given email.
If the user doesn't exist it is created first.
"""
email = random_email()
password = random_lower_string()
user = await crud.user.get_by_email(session, email=email)
if not user:
user_in_create = UserCreate(email=email,
password=password)
user = await crud.user.create(session, obj_in=user_in_create)
else:
user_in_update = UserUpdate(password=password)
user = await crud.user.update(session, db_obj=user, obj_in=user_in_update)
assert user is not None
# works fine up to this point, user inserted successfully
# now try to send http request to fetch token, and user is not found in the db
data = {"username": email, "password": password}
response = await client.post(f"{settings.API_V1_STR}/auth/access-token",
data=data)
auth_token = response.cookies.get('access_token')
# returns None.
return auth_token
What is going on here ? Appreciate any help!
Turns out all I needed to do is, for reason I do not understand, is to define the FastAPI dependency override function inside the client fixture:
before
async def get_test_session() -> Generator:
test_engine = create_async_engine(
settings.SQLALCHEMY_DATABASE_URI + '_test',
echo=False,
)
# expire_on_commit=False will prevent attributes from being expired
# after commit.
async_sess = sessionmaker(
test_engine, expire_on_commit=False, class_=AsyncSession
)
async with async_sess() as sess, sess.begin():
yield sess
#pytest.fixture(scope='session')
async def client(app: FastAPI) -> Generator:
from app.api.deps import get_session
app.dependency_overrides[get_session] = get_test_session
async with AsyncClient(
app=app, base_url="http://test",
) as ac:
yield ac
# reset dependencies
app.dependency_overrides = {}
after
#pytest.fixture(scope="function")
async def session(async_session) -> Generator:
async with async_session() as sess, sess.begin():
yield sess
#pytest.fixture
async def client(app: FastAPI, session:AsyncSession) -> Generator:
from app.api.deps import get_session
# this needs to be defined inside this fixture
# this is generate that yields session retrieved from `session` fixture
def get_sess():
yield session
app.dependency_overrides[get_session] = get_sess
async with AsyncClient(
app=app, base_url="http://test",
) as ac:
yield ac
app.dependency_overrides = {}
I'd appreciate any explanation of this behavior. Thanks!

TypeError: register_tortoise() got an unexpected keyword argument 'add_exeption_handlers'

Code was copies from https://testdriven.io/courses/tdd-fastapi/postgres-setup/ but it show an exeption while running with uvicorn
import os
from fastapi import FastAPI, Depends
from tortoise.contrib.fastapi import register_tortoise
from app.config import get_settings, Settings
app = FastAPI()
register_tortoise(
app,
db_url=os.environ.get("DATABASE_URL"),
modules={"models": ["app.models.tortoise"]},
generate_schemas=True,
add_exeption_handlers=True,
)
#app.get("/ping")
async def pong(settings: Settings = Depends(get_settings)):
return {
"ping": "pong!",
"environment": settings.environment,
"testing": settings.testing
}

How to get graphiql ide while dependancy Injection in graphql fastapi?

Here is my code so far, I want to inject AuthJWT as dependancy:
from starlette.graphql import GraphQLApp
from starlette.requests import Request as Rq
from fastapi_jwt_auth import AuthJWT
graphql_app = GraphQLApp(schema=graphene.Schema(query=Query, mutation=Mutation))
#router.post("/gql")
async def graph(request: Rq, Authorize: AuthJWT = Depends() ):
request.state.authorize = Authorize
return await graphql_app.handle_graphql(request=request)
app.include_router(router)
Its working fine with post request on insomnia ide but I am unable to see the graphiql ide in the browser in "localhost:8000/gql" url
It gives error: "GET /gql HTTP/1.1" 405 Method Not Allowed
How can I get the graphiql ide?
Is it possible to get the ide by Custom Request and APIRoute class? https://fastapi.tiangolo.com/advanced/custom-request-and-route/
Answer by #IndominusByte
import graphene
from fastapi import FastAPI, Request, Depends
from fastapi_jwt_auth import AuthJWT
from starlette.graphql import GraphQLApp
from starlette.datastructures import URL
from pydantic import BaseModel
class Settings(BaseModel):
authjwt_secret_key: str = "secret"
#AuthJWT.load_config
def get_config():
return Settings()
class Query(graphene.ObjectType):
hello = graphene.String(name=graphene.String(default_value="stranger"))
def resolve_hello(self, info, name):
authorize = info.context['request'].state.authorize
access_token = authorize.create_access_token(subject=name)
return "Hello " + name + "access_token" + access_token
app = FastAPI()
graphql_app = GraphQLApp(schema=graphene.Schema(query=Query))
# graphiql ide path here
#app.get('/')
async def graphiql(request: Request):
request._url = URL('/gql')
return await graphql_app.handle_graphiql(request=request)
# use the path for frontend request
#app.post('/gql')
async def graphql(request: Request, authorize: AuthJWT = Depends()):
request.state.authorize = authorize
return await graphql_app.handle_graphql(request=request)
https://github.com/IndominusByte/fastapi-jwt-auth/issues/28

TypeError: 'coroutine' object is not subscriptable in python Quart Framework

from quart import Quart, request, render_template, jsonify
import json
import os, sys
import pandas as pd
import requests
import asyncio
from pylon.model.db_models import RawFiles
from pylon.orm import db
app = Quart(__name__)
#app.route('/upload', methods=['POST'])
async def handle_form():
f = await request.files['filename']
f.save(f.filename)
data = pd.read_csv(f.filename)
data.to_json("json_data.json")
data = pd.read_json("json_data.json")
os.remove("json_data.json")
os.remove(f.filename)
print(type(data))
print(data)
return ""
#app.route("/")
async def index():
return await render_template('upload.html')
if __name__ == "__main__":
app.run(host="bheem11.arch.des.co", port=5043, debug = True)
I am getting one error described in title. I am working in quartz framework in python. Hoping for proper solution. Actually i am getting coroutine error when #app.route("/upload", methods = "post") execute.
This line await request.files['filename'] should be (await request.files)['filename']. Without the parenthesis everything to the right of await is evaluated first, which results in the attempt to subscribe (['filename'] operation) the files attribute. This doesn't work as the files attribute returns a coroutine - which is not subscriptable. There is more on this in the Quart documentation.

Tornado performance issue with MySQLand Redis

I have a tornado server running with MySQL for DB and Redis for cache. I am using web socket to send/receive data. My code is like this:
Server
import logging
import os.path
import uuid
import sys
import json
import tornadis
import tormysql
import tornado.escape
import tornado.ioloop
import tornado.options
import tornado.web
import tornado.websocket
from tornado import gen
from tornado.concurrent import Future
from tornado.options import define, options
#gen.coroutine
def getFromDB(query):
with (yield dbPool.Connection()) as conn:
with conn.cursor() as cursor:
yield cursor.execute(query)
datas = cursor.fetchall()
return datas
return None
#gen.coroutine
def getFromCache(cmd):
pipeline = tornadis.Pipeline()
pipeline.stack_call(cmd)
with (yield cachePool.connected_client()) as singleClient:
redisResult = yield singleClient.call(pipeline)
if isinstance(redisResult, tornadis.TornadisException):
print("Redis exception: %s"%(redisResult))
else:
return redisResult
async def getData(dbQuery, cacheQuery):
waitDict = {}
if dbQuery:
waitDict['db'] = getFromDB(dbQuery)
if cacheQuery:
waitDict['cache'] = getFromCache(cacheQuery)
resultList = []
if len(waitDict) > 0:
await gen.multi(waitDict)
if 'db' in waitDict:
dbRes = waitDict['db'].result()
if dbRes:
for eachResult in dbRes:
changeRes = someFunct(eachResult)
resultList.append(changeRes)
if 'cache' in waitDict:
cacheRes = waitDict['cache'].result()
if cacheRes:
for eachResult in cacheRes:
changeRes = someFunct(eachResult)
resultList.append(changeRes)
return resultList
class SocketHandler(tornado.websocket.WebSocketHandler):
SUPPORTED_METHODS = ("GET")
def open(self):
print("Socket open:%s"%(self))
def on_close(self):
print("Socket closed:%s"%(self))
async def on_message(self, inp):
if requestForData:
ret = await getData(dbQuery, cacheQuery)
self.write_message(ret)
class Application(tornado.web.Application):
def __init__(self):
handlers = [
(r"/sock", SocketHandler),
]
define("port", default=8000, help="run on the given port", type=int)
tornado.options.parse_command_line()
app = Application()
app.listen(options.port)
print("PORT:%s"%(options.port))
tornado.ioloop.IOLoop.current().start()
I am using tornadis for Redis and tormysql for MySQL.
I am running this setup on amazon linux instance m5.large with 2vCPUs memeory:8Gib.
Client
I am trying to simulate the traffic using web socket. The code is like this:
import sys
import json
import asyncio
import websockets
def getData():
for i in range(100):
async with websockets.connect(SOCKET_URL, extra_headers=extraHeaders) as websocket:
for i an range(100):
await websocket.send("get data")
reply = await websocket.recv()
print(reply)
asyncio.get_event_loop().run_until_complete(getData())
I am running multiple instance of the client.
The server is running good but its able to handle only 25 connections. After 25 connections the delay for the reply from the server increases. I want server to reply to be very fast. How do I decrease the delay for the response? So is there any problem in the code?

Resources