The view articles.views.article_detail didn't return an HttpResponse object. It returned None instead - django-urls

My django project has this articles app in it which has the following urls.py
from django.urls import path
from . import views
urlpatterns = [
path('',views.articles_list),
path("<slug:slug>/",views.article_detail),
]
and the following is the views.py
from django.shortcuts import render
from .models import Article
from django.http import HttpResponse
def articles_list(request):
articles=Article.objects.all().order_by('date')
return render(request,'articles\list.html',{'Articles':articles})
def article_detail(request,slug):
HttpResponse(slug)
Now when I request for /articles/hello-world/ my desired output is a webpage with hello-world but that is not the case. It shows value error.
Now when I request for /articles/hello-world/ my desired output is a webpage with hello-world but that is not the case. It shows value error.

Related

How to handle path parametes with hyphen(-) in a FastAPI application?

How to handle path parametes with hyphen(-) in FastAPI, as python does not allow hyphen in identifiers?
You can use alias in your definitions. It’s all documented here
As stated in the comments below, if the links stops working here is the code:
from typing import Union
from fastapi import FastAPI, Query
app = FastAPI()
#app.get("/items/")
async def read_items(q: Union[str, None] = Query(default=None, alias="item-query")):
results = {"items": [{"item_id": "Foo"}, {"item_id": "Bar"}]}
if q:
results.update({"q": q})
return results

FastAPI - dependency inside Middleware?

I am building a browser game where every user has 4 types of ressources and each users produce more ressources based on the level of their farms.
What I am trying to do, is whenever a given user is logged in, I want to recalculate it's current ressources whenever he is refreshing the page or performing any action.
Seems the middleware is the right tool for my need but I am a bit confused on the implementation with my current architecture (multiple routers). What would be the cleanest way to call a function to perform ressources recalculation before performing any other API calls?
This is what I have tried so far (example middleware):
app.py (without middleware):
from fastapi import FastAPI, Depends, Request
from src.api.v1.village import village_router
from src.api.v1.auth import auth_router
from src.api.v1.admin import admin_router
from src.core.auth import get_current_user
from src.core.config import *
def create_app() -> FastAPI:
root_app = FastAPI()
root_app.include_router(
auth_router,
prefix="/api/v1",
tags=["auth"],
)
root_app.include_router(
admin_router,
prefix="/api/v1",
tags=["admin"],
dependencies=[Depends(get_current_user)],
)
root_app.include_router(
village_router,
prefix="/api/v1",
tags=["village"],
)
return root_app
I then added an helloworld middleware and add the get_current_user as a dependency because a user must be logged_in to perform the calculations.
app.py (with middleware):
from fastapi import FastAPI, Depends, Request
from src.api.v1.village import village_router
from src.api.v1.auth import auth_router
from src.api.v1.admin import admin_router
from src.core.auth import get_current_user
from src.core.config import *
import time
def create_app() -> FastAPI:
root_app = FastAPI()
root_app.include_router(
auth_router,
prefix="/api/v1",
tags=["auth"],
)
root_app.include_router(
admin_router,
prefix="/api/v1",
tags=["admin"],
dependencies=[Depends(get_current_user)],
)
root_app.include_router(
village_router,
prefix="/api/v1",
tags=["village"],
)
#root_app.middleware("http")
async def add_process_time_header(
request: Request, call_next, current_user=Depends(get_current_user)
):
start_time = time.time()
response = await call_next(request)
process_time = time.time() - start_time
response.headers["X-Process-Time"] = str(process_time)
print("middleware call")
return response
return root_app
Seems the dependency is ignored because the middleware is called even when I am not logged in, which is not the case for my protected_routes (I am getting a 401 error on my routes if I a not logged in).
async def get_current_user(
session=Depends(get_db), token: str = Depends(oauth2_scheme)
) -> UserAuth:
try:
payload = jwt.decode(token, SECRET_KEY, algorithms=[AUTH_TOKEN_ALGO])
email: str = payload.get("email")
user_id: str = payload.get("user_id")
if email is None:
raise ValueError("A very specific bad thing happened.")
token_data = UserJWTToken(user_id=user_id, email=email)
except jwt.PyJWTError:
raise ValueError("A very specific bad thing happened.")
user = get_user_by_email(session, token_data.email)
if user is None:
raise ValueError("A very specific bad thing happened.")
return user
You can make use of the Global Dependencies. Here is one example that may help you in this situation
from fastapi import Depends, FastAPI, Request
def get_db_session():
print("Calling 'get_db_session(...)'")
return "Some Value"
def get_current_user(session=Depends(get_db_session)):
print("Calling 'get_current_user(...)'")
return session
def recalculate_resources(request: Request, current_user=Depends(get_current_user)):
print("calling 'recalculate_resources(...)'")
request.state.foo = current_user
app = FastAPI(dependencies=[Depends(recalculate_resources)])
#app.get("/")
async def root(request: Request):
return {"foo_from_dependency": request.state.foo}

BeautifulSoup.text returns blank string in VSCode, but works fine in Google Colab

I am trying to scrape this website https://understat.com/league/EPL.
Once I have parsed the page:
import json
from bs4 import BeautifulSoup
from urllib.request import urlopen
scrape_urlEPL="https://understat.com/league/EPL"
page_connect=urlopen(scrape_urlEPL)
page_html=BeautifulSoup(page_connect, "html.parser")
Then I search for "script" in the html.
page_html.findAll(name="script")
This gives me a list of all occurences of "script". Say I want to extract the text from the third element. Just printing the html for this shows a valid output.
page_html.findAll(name="script")[3]
The output:
<script>
var playersData = JSON.parse('\x5B\x7B\x22id\x22\x3A\x221389\x22,\x22player_name\x22\x3A\x22Jorginho\x22,\x22games\x22\x3A\x2228\x22,\x22time\x22\x3A\x222022\x22,\x22goals\x22\x3A\x227\x22,\x22xG\x22\x3A\x226.972690678201616\x22,\x22assists\x22\x3A\x221\x22,\x22xA\x22\x3A\x221.954869382083416\x22,\x22shots\x22\x3A\x2214\x22,\x22key_passes\x22\x3A\x2224\x22,\x22yellow_cards\x22\x3A\x222\x22,\x22red_cards\x22\x3A\x220\x22,\x22position\x22\x3A\x22M\x20S\x22,\x2....
Now if I want to extract the text from this,
page_html.findAll(name="script")[3].text
This gives an empty string ''.
However the same code works fine in Google Colab and returns:
'\n\tvar playersData\t= JSON.parse('\\x5B\\x7B\\x22id\\x22\\x3A\\x22647\\x22,\\x22player_name\\x22\\x3A\\x22Harry\\x20Kane\\x22,\\x22games\\x22\\x3A\\x2235\\x22,\\x22time\\x22\\x3A\\x223097\\x22,\\x22goals\\x22\\x3A\\x2223\\x22,\\x22xG\\x22\\x3A\\x2222.174858909100294\\x22,\\x22assists\\x22\\x3A\\x2214\\x22,\\x22xA\\x22\\x3A\\x227.577093588188291\\x22,\\x22shots\\x22\\x3A\\x22138\\x22,\\x22key_passes\\x22\\x3A\\x2249...'
which is as expected. I don't understand why this error comes up in VSCode.
Be informed that script TAG is only holding string which IS NOT a TEXT.
JSON.PARSE is a JavaScript function which parse a string
You've to use .string instead of .text
import httpx
import trio
from bs4 import BeautifulSoup
async def main():
async with httpx.AsyncClient(timeout=None) as client:
r = await client.get('https://understat.com/league/EPL')
soup = BeautifulSoup(r.text, 'lxml')
goal = soup.select('script')[3].string
print(goal)
if __name__ == "__main__":
trio.run(main)
Ref : Bs4 difference between string and text

How to set an OpenTelemetry span attribute for a FastAPI route?

Adding a tag to a trace's span can be very useful to later analyse the tracing data and slice & dice it by the wanted tag.
After reading the OpenTelemetry docs, I couldn't figure out a way to add a custom tag to a span.
Here is my sample FastAPI application, already instrumented with OpenTelemetry:
"""main.py"""
from typing import Dict
import fastapi
from opentelemetry import trace
from opentelemetry.sdk.trace.export import (
ConsoleSpanExporter,
SimpleExportSpanProcessor,
)
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
from opentelemetry.sdk.trace import TracerProvider
trace.set_tracer_provider(TracerProvider())
trace.get_tracer_provider().add_span_processor(SimpleExportSpanProcessor(ConsoleSpanExporter()))
app = fastapi.FastAPI()
#app.get("/user/{id}")
async def get_user(id: int) -> Dict[str, str]:
"""Test endpoint."""
return {"message": "hello user!"}
FastAPIInstrumentor.instrument_app(app)
You can run it with uvicorn main:app --reload
How can I add the user id to the span?
After reading the source code of the ASGI instrumentation's OpenTelemetryMiddleware (here), I realised you can simply get the current span, set the tag (or attribute), and the current span will be returned with all its attributes.
#app.get("/user/{id}")
async def get_user(id: int) -> Dict[str, str]:
"""Test endpoint."""
# Instrument the user id as a span tag
current_span = trace.get_current_span()
if current_span:
current_span.set_attribute("user_id", id)
return {"message": "hello user!"}

Scrapy does not extract data

I am trying to get some technical informations about automobiles from this page
Here is my current code:
import scrapy
import re
from arabamcom.items import ArabamcomItem
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor
class BasicSpider(CrawlSpider):
name="arabamcom"
allowed_domains=["arabam.com"]
start_urls=['https://www.arabam.com/ikinci-el/otomobil']
rules=(Rule(LinkExtractor(allow=(r'/ilan')),callback="parse_item",follow=True),)
def parse_item(self,response):
item=ArabamcomItem()
item['fiyat']=response.css('span.color-red.font-huge.bold::text').extract()
item['marka']=response.css('p.color-black.bold.word-break.mb4::text').extract()
item['yil']=response.xpath('//*[#id="js-hook-appendable-technicalPropertiesWrapper"]/div[2]/dl[1]/dd/span/text()').extract()
And this is my items.py file
import scrapy
class ArabamcomItem(scrapy.Item):
fiyat=scrapy.Field()
marka=scrapy.Field()
yil=scrapy.Field()
When i run the code i can get data from 'marka' and 'fiyat' item but spider does not get anything for 'yil' attribute. Also other parts like 'Yakit Tipi','Vites Tipi' etc. How can i solve this problem ?
What's wrong:
//*[#id="js-hook-appendable-technicalPropertiesWrapper"]/......
This id start with js and may be dynamic element appeded by javascript
Scrapy do not have the ability to render javascript by default.
There are 2 solutions you can try
Scrapy-Splash
This is a javascript rendering engine for scrapy
Install Splash as a Docker container
Modify your settings.py file to integrate splash (append following middlewares to your project)
SPLASH_URL = 'http://127.0.0.1:8050'
SPIDER_MIDDLEWARES = {
'scrapy_splash.SplashDeduplicateArgsMiddleware':100,
}
DOWNLOADER_MIDDLEWARES = {
'scrapy_splash.SplashCookiesMiddleware':723,
'scrapy_splash.SplashMiddleware': 725,
'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware': 810,
}
Replace your Request Function with SplashRequest
from scrapy_splash import SplashRequest as SP
SP(url=url, callback=parse, endpoint='render.html', args={'wait': 5})
Selenium WebDriver
This is a browser automation-testing framework
Install Selenium from PyPi and install there corresponding driver(e.g. Firefox -> Geckodriver) to PATH folder
Append following middleware class to your project's middleware.py file:
class SeleniumMiddleware(object):
#classmethod
def from_crawler(cls, crawler):
middleware = cls()
crawler.signals.connect(middleware.spider_opened, signals.spider_opened)
crawler.signals.connect(middleware.spider_closed, signals.spider_closed)
return middleware
def process_request(self, request, spider):
request.meta['driver'] = self.driver
self.driver.get(request.url)
self.driver.implicitly_wait(2)
body = to_bytes(self.driver.page_source)
return HtmlResponse(self.driver.current_url, body=body, encoding='utf-8', request=request)
def spider_opened(self, spider):
"""Change your browser mode here"""
self.driver = webdriver.Firefox()
def spider_closed(self, spider):
self.driver.close()
Modify your settings.py file to integrate the Selenium middleware (append following middleware to your project and replace yourproject with your project name)
DOWNLOADER_MIDDLEWARES = {
'yourproject.middlewares.SeleniumMiddleware': 200
}
Comparison
Scrapy-Splash
An official module by Scrapy Company
You can deploy splash instance to cloud, so that you will be able to browse the url in cloud then transfer the render.html back to your spider
It's slow
Splash container will stop if there is a memory leak. (Be sure to deploy splash instance on a high memory cloud instance)
Selenium web driver
You have to have Firefox or Chrome with their corresponding automated-test-driver on your machine, unless you use PhantomJS.
You can't modify request headers directly with Selenium web driver
You could render the webpage using a headless browser but this data can be easily extracted without it, try this:
import re
import ast
...
def parse_item(self,response):
regex = re.compile('dataLayer.push\((\{.*\})\);', re.DOTALL)
html_info = response.xpath('//script[contains(., "dataLayer.push")]').re_first(regex)
data = ast.literal_eval(html_info)
yield {'fiyat': data['CD_Fiyat'],
'marka': data['CD_marka'],
'yil': data['CD_yil']}
# output an item with {'fiyat': '103500', 'marka': 'Renault', 'yil': '2017'}

Resources