Everytime I try to run this the price returns as none, why? I'm new to this.
import scrapy
class AliSpiderSpider(scrapy.Spider):
name = 'ali_spider'
allowed_domains = ['www.aliexpress.com']
start_urls = ['https://www.aliexpress.com']
def parse(self, response):
yield {
'current_price': response.xpath('//*[#id="redesign-flash-deals"]/div[2]/ul/li[1]/a/div/div[2]/span[1]/text()').get()
}
Related
Here is the code of my main.py in FastAPI:
from typing import List, Union
import datetime
import databases
import sqlalchemy
from fastapi import FastAPI
from pydantic import BaseModel
DATABASE_URL = "postgresql://username:password#localhost/collector"
database = databases.Database(DATABASE_URL)
metadata = sqlalchemy.MetaData()
computers = sqlalchemy.Table(
"computers",
metadata,
sqlalchemy.Column("id", sqlalchemy.Integer, primary_key=True, index=True),
sqlalchemy.Column("computername", sqlalchemy.String),
sqlalchemy.Column("computerip", sqlalchemy.String),
sqlalchemy.Column("computerexternalip", sqlalchemy.String),
sqlalchemy.Column("time", sqlalchemy.DateTime),
)
engine = sqlalchemy.create_engine(
DATABASE_URL
)
metadata.create_all(engine)
class ComputerBase(BaseModel):
computername: str
computerip: str
computerexternalip: str
time: str = datetime.datetime
class ComputerIn(ComputerBase):
pass
class Computer(ComputerBase):
id: int
class Config:
orm_mode = True
app = FastAPI()
#app.on_event("startup")
async def startup():
await database.connect()
#app.on_event("shutdown")
async def shutdown():
await database.disconnect()
#app.get("/computers/", response_model=List[Computer])
async def read_computers():
query = computers.select()
print(query)
return await database.fetch_all(query)
#app.post("/computers/", response_model=Computer)
async def create_computer(computer: ComputerIn):
current_time = datetime.datetime.utcnow
query = computers.insert().values(computername=computer.computername, computerip=computer.computerip, computerexternalip=computer.computerexternalip, time=current_time)
last_record_id = await database.execute(query)
return {**computer.dict(), "id": last_record_id}
When I go on https://localhost:8000/computers, I get this error:
asyncpg.exceptions.UndefinedColumnError: column computers.id does not
exist
Which I don't understand since I declare a table names "computers" with an id column at the begining of my code.
Any idea ?
Thank you
I'm new to scrapy and want to connect it to firestore so that I may save my results there.
Here is my settings.py (just mentioning related code)
ITEM_PIPELINES = {
'flights_i_spider.pipelines.FirebasePipeline': 300,
}
Is there anything else related to firebase that needs to be included to the settings.py file? because ITEM_PIPELINES was just added to settings.py.
here is my pipelines.py:
import firebase_admin
from firebase_admin import credentials, firestore
from google.cloud import firestore
class FirebasePipeline:
def __init__(self):
cred = credentials.Certificate('./service_key.json')
firebase_admin.initialize_app(cred,
{
'databaseURL': 'https://abcdefghijklmno.firebaseio.com'
}
)
def process_item(self, item, spider):
db = firestore.Client()
doc_ref = db.collection('output_data')
doc_ref.add(item.to_dict())
# doc_ref.insert(dict(item))
return item
Please don't mention this https://github.com/skhaz/scrapy-firebase because this did not work for me.
I'm trying to make a class based view in aiohttp. I'm following the doc. All goes good but I can't find a way to make detailed views.
from aiohttp import web
class MyView(web.View):
async def get(self):
resp = await get_response(self.request)
return resp
async def post(self):
resp = await post_response(self.request)
return resp
app.router.add_view('/view', MyView)
This code will produce two endpoints:
POST /view
GET /view
But how to add the GET /view/:pk: using the class-based views? I know that I can make it manually, adding to the router without the class-based views but I'm looking for a way to use it here.
UPD:
The goal is to generate the URLs like
"""
POST /view # creation
GET /view # a full list
GET /view/:id: # detailed view
"""
from aiohttp import web
class MyView(web.View):
async def get(self):
resp = await get_response(self.request)
return resp
async def post(self):
resp = await post_response(self.request)
return resp
async def detailed_get(self):
resp = await post_response(self.request)
return resp
app.router.add_view('/view', MyView)
Or at least get the URLs like:
POST /view # creation
GET /view/:id: # detailed view
For creating the additional behavior on class-based view add route decorator to View.
from aiohttp import web
routes = web.RouteTableDef()
#routes.view('/ert/{name}')
#routes.view('/ert/')
class MyView(web.View):
async def get(self):
return web.Response(text='Get method')
async def post(self):
return web.Response(text='Post method')
Im making a unit test in a rest controller and this is the return:
return ResponseEntity.status(HttpStatus.OK).body(result);
Im getting this error:
Required request body is missing
This is my current test:
def "Signup"() {
given:
UserDto userDto = new UserDto(id: 1, password: "password123", username: "username123")
def personDto = new PersonDto(id: 1, user : userDto)
when: "signup url is hit"
def response = mockMvc.perform(post('/person/signup'))
then:
personService.signup(userDto) >> personDto
response.andExpect(status().isOk())
}
Any idea how to mock .body or how to add a body in the request. Thanks ::)
Add another expectation like:
response.andExpect(content().string(containsString('blah')))
Reference:
MockMvcResultMatchers.content()
ContentResultMatchers.string(org.hamcrest.Matcher<? super String> matcher)
import static groovyx.net.http.ContentType.JSON
import groovyx.net.http.RESTClient
import groovy.util.slurpersupport.GPathResult
import static groovyx.net.http.ContentType.URLENC
def accountId = "yourAccountId" // this is the number after http://basecamp.com when logged into the basecamp website e.g. http://basecamp.com/1234567
def userName = "basecampUserName"
def password = "basecampPassword"
def basecamp = new RESTClient( "https://basecamp.com/${accountId}/api/v1/".toString() )
basecamp.auth.basic userName, password
def response = basecamp.get(
path: "projects.json",
headers: ["User-Agent": "My basecamp application (myemail#domain.com)"]
)
println response.data.toString(2) // or you can return this value and do whatever you want
// post with body
def 'test post method'(){
given:
restClient .headers.Accept = 'application/json'
when:
def resp = restClient .post(path: 'path(ex:/api/list/',
query:[param1:'param1value',param2:'param2value'],
body: 'your json',
contentType:'application/json'
)
then:
resp.status == 200
}
}
I'm querying Graphite's index.json to get all the metrics. Is there an option to pass a root metric and get only a sub-tree? Something like:
http://<my.graphite>/metrics/index.json?query="my.metric.subtree"
That is not supported.
What you can do however is call /metrics/find recursively (call it again for each branch encountered)
Something like this:
#!/usr/bin/python
from __future__ import print_function
import requests
import json
import argparse
try:
from Queue import Queue
except:
from queue import Queue
from threading import Thread, Lock
import sys
import unicodedata
outLock = Lock()
def output(msg):
with outLock:
print(msg)
sys.stdout.flush()
class Walker(Thread):
def __init__(self, queue, url, user=None, password=None, seriesFrom=None, depth=None):
Thread.__init__(self)
self.queue = queue
self.url = url
self.user = user
self.password = password
self.seriesFrom = seriesFrom
self.depth = depth
def run(self):
while True:
branch = self.queue.get()
try:
branch[0].encode('ascii')
except Exception as e:
with outLock:
sys.stderr.write('found branch with invalid characters: ')
sys.stderr.write(unicodedata.normalize('NFKD', branch[0]).encode('utf-8','xmlcharrefreplace'))
sys.stderr.write('\n')
else:
if self.depth is not None and branch[1] == self.depth:
output(branch[0])
else:
self.walk(branch[0], branch[1])
self.queue.task_done()
def walk(self, prefix, depth):
payload = {
"query": (prefix + ".*") if prefix else '*',
"format": "treejson"
}
if self.seriesFrom:
payload['from']=self.seriesFrom
auth = None
if self.user is not None:
auth = (self.user, self.password)
r = requests.get(
self.url + '/metrics/find',
params=payload,
auth=auth,
)
if r.status_code != 200:
sys.stderr.write(r.text+'\n')
raise Exception(
'Error walking finding series: branch={branch} reason={reason}'
.format(branch=unicodedata.normalize('NFKD', prefix).encode('ascii','replace'), reason=r.reason)
)
metrics = r.json()
for metric in metrics:
try:
if metric['leaf']:
output(metric['id'])
else:
self.queue.put((metric['id'], depth+1))
except Exception as e:
output(metric)
raise e
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--url", help="Graphite URL", required=True)
parser.add_argument("--prefix", help="Metrics prefix", required=False, default='')
parser.add_argument("--user", help="Basic Auth username", required=False)
parser.add_argument("--password", help="Basic Auth password", required=False)
parser.add_argument("--concurrency", help="concurrency", default=8, required=False, type=int)
parser.add_argument("--from", dest='seriesFrom', help="only get series that have been active since this time", required=False)
parser.add_argument("--depth", type=int, help="maximum depth to traverse. If set, the branches at the depth will be printed", required=False)
args = parser.parse_args()
url = args.url
prefix = args.prefix
user = args.user
password = args.password
concurrency = args.concurrency
seriesFrom = args.seriesFrom
depth = args.depth
queue = Queue()
for x in range(concurrency):
worker = Walker(queue, url, user, password, seriesFrom, depth)
worker.daemon = True
worker.start()
queue.put((prefix, 0))
queue.join()
Note: this code comes from: https://github.com/grafana/cloud-graphite-scripts/blob/master/query/walk_metrics.py