Scrapy, Aliexpress returning none for prices

Scrapy, Aliexpress returning none for prices - web-scraping

Everytime I try to run this the price returns as none, why? I'm new to this.
import scrapy
class AliSpiderSpider(scrapy.Spider):
name = 'ali_spider'
allowed_domains = ['www.aliexpress.com']
start_urls = ['https://www.aliexpress.com']
def parse(self, response):
yield {
'current_price': response.xpath('//*[#id="redesign-flash-deals"]/div[2]/ul/li[1]/a/div/div[2]/span[1]/text()').get()
}

Related

FastAPI, column computers.id does not exist

Here is the code of my main.py in FastAPI:
from typing import List, Union
import datetime
import databases
import sqlalchemy
from fastapi import FastAPI
from pydantic import BaseModel
DATABASE_URL = "postgresql://username:password#localhost/collector"
database = databases.Database(DATABASE_URL)
metadata = sqlalchemy.MetaData()
computers = sqlalchemy.Table(
"computers",
metadata,
sqlalchemy.Column("id", sqlalchemy.Integer, primary_key=True, index=True),
sqlalchemy.Column("computername", sqlalchemy.String),
sqlalchemy.Column("computerip", sqlalchemy.String),
sqlalchemy.Column("computerexternalip", sqlalchemy.String),
sqlalchemy.Column("time", sqlalchemy.DateTime),
)
engine = sqlalchemy.create_engine(
DATABASE_URL
)
metadata.create_all(engine)
class ComputerBase(BaseModel):
computername: str
computerip: str
computerexternalip: str
time: str = datetime.datetime
class ComputerIn(ComputerBase):
pass
class Computer(ComputerBase):
id: int
class Config:
orm_mode = True
app = FastAPI()
#app.on_event("startup")
async def startup():
await database.connect()
#app.on_event("shutdown")
async def shutdown():
await database.disconnect()
#app.get("/computers/", response_model=List[Computer])
async def read_computers():
query = computers.select()
print(query)
return await database.fetch_all(query)
#app.post("/computers/", response_model=Computer)
async def create_computer(computer: ComputerIn):
current_time = datetime.datetime.utcnow
query = computers.insert().values(computername=computer.computername, computerip=computer.computerip, computerexternalip=computer.computerexternalip, time=current_time)
last_record_id = await database.execute(query)
return {**computer.dict(), "id": last_record_id}
When I go on https://localhost:8000/computers, I get this error:
asyncpg.exceptions.UndefinedColumnError: column computers.id does not
exist
Which I don't understand since I declare a table names "computers" with an id column at the begining of my code.
Any idea ?
Thank you

Trouble in connecting scrapy with Firebase

I'm new to scrapy and want to connect it to firestore so that I may save my results there.
Here is my settings.py (just mentioning related code)
ITEM_PIPELINES = {
'flights_i_spider.pipelines.FirebasePipeline': 300,
}
Is there anything else related to firebase that needs to be included to the settings.py file? because ITEM_PIPELINES was just added to settings.py.
here is my pipelines.py:
import firebase_admin
from firebase_admin import credentials, firestore
from google.cloud import firestore
class FirebasePipeline:
def __init__(self):
cred = credentials.Certificate('./service_key.json')
firebase_admin.initialize_app(cred,
{
'databaseURL': 'https://abcdefghijklmno.firebaseio.com'
}
)
def process_item(self, item, spider):
db = firestore.Client()
doc_ref = db.collection('output_data')
doc_ref.add(item.to_dict())
# doc_ref.insert(dict(item))
return item
Please don't mention this https://github.com/skhaz/scrapy-firebase because this did not work for me.

Detailed class based view in aiohttp

I'm trying to make a class based view in aiohttp. I'm following the doc. All goes good but I can't find a way to make detailed views.
from aiohttp import web
class MyView(web.View):
async def get(self):
resp = await get_response(self.request)
return resp
async def post(self):
resp = await post_response(self.request)
return resp
app.router.add_view('/view', MyView)
This code will produce two endpoints:
POST /view
GET /view
But how to add the GET /view/:pk: using the class-based views? I know that I can make it manually, adding to the router without the class-based views but I'm looking for a way to use it here.
UPD:
The goal is to generate the URLs like
"""
POST /view # creation
GET /view # a full list
GET /view/:id: # detailed view
"""
from aiohttp import web
class MyView(web.View):
async def get(self):
resp = await get_response(self.request)
return resp
async def post(self):
resp = await post_response(self.request)
return resp
async def detailed_get(self):
resp = await post_response(self.request)
return resp
app.router.add_view('/view', MyView)
Or at least get the URLs like:
POST /view # creation
GET /view/:id: # detailed view

For creating the additional behavior on class-based view add route decorator to View.
from aiohttp import web
routes = web.RouteTableDef()
#routes.view('/ert/{name}')
#routes.view('/ert/')
class MyView(web.View):
async def get(self):
return web.Response(text='Get method')
async def post(self):
return web.Response(text='Post method')

How to mock rest controller with ResponseEntity as return object?

Im making a unit test in a rest controller and this is the return:
return ResponseEntity.status(HttpStatus.OK).body(result);
Im getting this error:
Required request body is missing
This is my current test:
def "Signup"() {
given:
UserDto userDto = new UserDto(id: 1, password: "password123", username: "username123")
def personDto = new PersonDto(id: 1, user : userDto)
when: "signup url is hit"
def response = mockMvc.perform(post('/person/signup'))
then:
personService.signup(userDto) >> personDto
response.andExpect(status().isOk())
}
Any idea how to mock .body or how to add a body in the request. Thanks ::)

Add another expectation like:
response.andExpect(content().string(containsString('blah')))
Reference:
MockMvcResultMatchers.content()
ContentResultMatchers.string(org.hamcrest.Matcher<? super String> matcher)

import static groovyx.net.http.ContentType.JSON
import groovyx.net.http.RESTClient
import groovy.util.slurpersupport.GPathResult
import static groovyx.net.http.ContentType.URLENC
def accountId = "yourAccountId" // this is the number after http://basecamp.com when logged into the basecamp website e.g. http://basecamp.com/1234567
def userName = "basecampUserName"
def password = "basecampPassword"
def basecamp = new RESTClient( "https://basecamp.com/${accountId}/api/v1/".toString() )
basecamp.auth.basic userName, password
def response = basecamp.get(
path: "projects.json",
headers: ["User-Agent": "My basecamp application (myemail#domain.com)"]
)
println response.data.toString(2) // or you can return this value and do whatever you want
// post with body
def 'test post method'(){
given:
restClient .headers.Accept = 'application/json'
when:
def resp = restClient .post(path: 'path(ex:/api/list/',
query:[param1:'param1value',param2:'param2value'],
body: 'your json',
contentType:'application/json'
)
then:
resp.status == 200
}
}

Query graphite index.json for a specific sub-tree

I'm querying Graphite's index.json to get all the metrics. Is there an option to pass a root metric and get only a sub-tree? Something like:
http://<my.graphite>/metrics/index.json?query="my.metric.subtree"

That is not supported.
What you can do however is call /metrics/find recursively (call it again for each branch encountered)
Something like this:
#!/usr/bin/python
from __future__ import print_function
import requests
import json
import argparse
try:
from Queue import Queue
except:
from queue import Queue
from threading import Thread, Lock
import sys
import unicodedata
outLock = Lock()
def output(msg):
with outLock:
print(msg)
sys.stdout.flush()
class Walker(Thread):
def __init__(self, queue, url, user=None, password=None, seriesFrom=None, depth=None):
Thread.__init__(self)
self.queue = queue
self.url = url
self.user = user
self.password = password
self.seriesFrom = seriesFrom
self.depth = depth
def run(self):
while True:
branch = self.queue.get()
try:
branch[0].encode('ascii')
except Exception as e:
with outLock:
sys.stderr.write('found branch with invalid characters: ')
sys.stderr.write(unicodedata.normalize('NFKD', branch[0]).encode('utf-8','xmlcharrefreplace'))
sys.stderr.write('\n')
else:
if self.depth is not None and branch[1] == self.depth:
output(branch[0])
else:
self.walk(branch[0], branch[1])
self.queue.task_done()
def walk(self, prefix, depth):
payload = {
"query": (prefix + ".*") if prefix else '*',
"format": "treejson"
}
if self.seriesFrom:
payload['from']=self.seriesFrom
auth = None
if self.user is not None:
auth = (self.user, self.password)
r = requests.get(
self.url + '/metrics/find',
params=payload,
auth=auth,
)
if r.status_code != 200:
sys.stderr.write(r.text+'\n')
raise Exception(
'Error walking finding series: branch={branch} reason={reason}'
.format(branch=unicodedata.normalize('NFKD', prefix).encode('ascii','replace'), reason=r.reason)
)
metrics = r.json()
for metric in metrics:
try:
if metric['leaf']:
output(metric['id'])
else:
self.queue.put((metric['id'], depth+1))
except Exception as e:
output(metric)
raise e
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--url", help="Graphite URL", required=True)
parser.add_argument("--prefix", help="Metrics prefix", required=False, default='')
parser.add_argument("--user", help="Basic Auth username", required=False)
parser.add_argument("--password", help="Basic Auth password", required=False)
parser.add_argument("--concurrency", help="concurrency", default=8, required=False, type=int)
parser.add_argument("--from", dest='seriesFrom', help="only get series that have been active since this time", required=False)
parser.add_argument("--depth", type=int, help="maximum depth to traverse. If set, the branches at the depth will be printed", required=False)
args = parser.parse_args()
url = args.url
prefix = args.prefix
user = args.user
password = args.password
concurrency = args.concurrency
seriesFrom = args.seriesFrom
depth = args.depth
queue = Queue()
for x in range(concurrency):
worker = Walker(queue, url, user, password, seriesFrom, depth)
worker.daemon = True
worker.start()
queue.put((prefix, 0))
queue.join()
Note: this code comes from: https://github.com/grafana/cloud-graphite-scripts/blob/master/query/walk_metrics.py

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

Scrapy, Aliexpress returning none for prices - web-scraping

Related

FastAPI, column computers.id does not exist

Trouble in connecting scrapy with Firebase

Detailed class based view in aiohttp

How to mock rest controller with ResponseEntity as return object?

Query graphite index.json for a specific sub-tree

Categories

Resources