python unittest error and using patch from unittest.mock - python-unittest

I have two python files the first one is named employee.py and this is its code
import requests
class Employee:
"""A sample Employee class"""
raise_amt = 1.05
def __init__(self, first, last, pay):
self.first = first
self.last = last
self.pay = pay
#property
def email(self):
return '{}.{}#email.com'.format(self.first, self.last)
#property
def fullname(self):
return '{} {}'.format(self.first, self.last)
def apply_raise(self):
self.pay = int(self.pay * self.raise_amt)
def monthly_schedule(self, month):
response = requests.get(f'http://company.com/{self.last}/{month}')
if response.ok:
return response.text
else:
return 'Bad Response!'
The other file is named test_employee.py
and this is its code
import unittest
from unittest.mock import patch
from employee import Employee
class TestEmployee(unittest.TestCase):
#classmethod
def setUpClass(cls):
print('setupClass')
#classmethod
def tearDownClass(cls):
print('teardownClass')
def setUp(self):
print('setUp')
self.emp_1 = Employee('Corey', 'Schafer', 50000)
self.emp_2 = Employee('Sue', 'Smith', 60000)
def tearDown(self):
print('tearDown\n')
def test_email(self):
print('test_email')
self.assertEqual(self.emp_1.email, 'Corey.Schafer#email.com')
self.assertEqual(self.emp_2.email, 'Sue.Smith#email.com')
self.emp_1.first = 'John'
self.emp_2.first = 'Jane'
self.assertEqual(self.emp_1.email, 'John.Schafer#email.com')
self.assertEqual(self.emp_2.email, 'Jane.Smith#email.com')
def test_fullname(self):
print('test_fullname')
self.assertEqual(self.emp_1.fullname, 'Corey Schafer')
self.assertEqual(self.emp_2.fullname, 'Sue Smith')
self.emp_1.first = 'John'
self.emp_2.first = 'Jane'
self.assertEqual(self.emp_1.fullname, 'John Schafer')
self.assertEqual(self.emp_2.fullname, 'Jane Smith')
def test_apply_raise(self):
print('test_apply_raise')
self.emp_1.apply_raise()
self.emp_2.apply_raise()
self.assertEqual(self.emp_1.pay, 52500)
self.assertEqual(self.emp_2.pay, 63000)
def test_monthly_schedule(self):
with patch('employee.requests.get') as mocked_get:
mocked_get.return_value.ok = True
mocked_get.return_value.text = 'Success'
schedule = self.emp_1.monthly_schedule('May')
mocked_get.assert_called_with('http://company.com/Schafer/May')
self.assertEqual(schedule,'Success')
if __name__ == '__main__':
unittest.main()
when I run test_employee.py, I get this error
ModuleNotFoundError: No module named 'employee.requests'; 'employee' is not a package
The code runs well if you delete the function test_monthly_schedule from test_employee.py
and delete monthly_schedule from employee.py
also I do not know if that will make a difference, but I use python 3.8. Also, I'm using Mac

Related

How can I use Google Cloud Functions to run a web scraper?

Thanks in advance for your help.
I'm currently running a webscraper - this is the first time I've ever done something like this - It pulls addresses from the URL and then will match the address to the users input. This will be going into a chat bot, I wondering how I can make this run on Google Functions. Whats the process to do this, is there a tutorial anywhere?
This is my code so far. There is a small items file too
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor
from ..items import DataItem
from fuzzywuzzy import fuzz
from urllib.parse import urljoin
import scrapy
class AddressesSpider(scrapy.Spider):
name = 'Addresses'
allowed_domains = ['find-energy-certificate.service.gov.uk']
postcode = "bh10+4ah"
start_urls = ['https://find-energy-certificate.service.gov.uk/find-a-certificate/search-by-postcode?postcode=' + postcode]
## def start_requests(self):
## self.first = input("Please enter the address you would like to match: ")
## yield scrapy.Request(url=self.start_urls[0], callback=self.parse)
def parse(self, response):
first = input("Please enter the address you would like to match: ")
highest_ratios = []
highest_item = None
for row in response.xpath('//table[#class="govuk-table"]//tr'):
address = row.xpath("normalize-space(.//a[#class='govuk-link']/text())").extract()[0].lower()
address = address.rsplit(',', 2)[0]
link = row.xpath('.//a[#class="govuk-link"]/#href').extract()
details = row.xpath("normalize-space(.//td/following-sibling::td)").extract()
ratio = fuzz.token_set_ratio(address, first)
item = DataItem()
item['link'] = link
item['details'] = details
item['address'] = address
item['ratioresult'] = ratio
if len(highest_ratios) < 3:
highest_ratios.append(item)
elif ratio > min(highest_ratios, key=lambda x: x['ratioresult'])['ratioresult']:
highest_ratios.remove(min(highest_ratios, key=lambda x: x['ratioresult']))
highest_ratios.append(item)
highest_ratios_100 = [item for item in highest_ratios if item['ratioresult'] == 100]
if highest_ratios_100:
for item in highest_ratios_100:
yield item
else:
yield max(highest_ratios, key=lambda x: x['ratioresult'])
if len(highest_ratios_100) > 1:
for i, item in enumerate(highest_ratios_100):
print(f"{i+1}: {item['address']}")
selected = int(input("Please select the correct address by entering the number corresponding to the address: ")) - 1
selected_item = highest_ratios_100[selected]
else:
selected_item = highest_ratios_100[0] if highest_ratios_100 else max(highest_ratios, key=lambda x: x['ratioresult'])
new_url = selected_item['link'][0]
new_url = str(new_url)
if new_url:
base_url = 'https://find-energy-certificate.service.gov.uk'
print(f'Base URL: {base_url}')
print(f'New URL: {new_url}')
new_url = urljoin(base_url, new_url)
print(f'Combined URL: {new_url}')
yield scrapy.Request(new_url, callback=self.parse_new_page)
def parse_new_page(self, response):
Postcode = response.xpath('normalize-space((//p[#class="epc-address govuk-body"]/text())[last()])').extract()
Town = response.xpath('normalize-space((//p[#class="epc-address govuk-body"]/text())[last()-1])').extract()
First = response.xpath(".//p[#class='epc-address govuk-body']").extract()
Type = response.xpath('normalize-space(//dd[1]/text())').extract_first()
Walls = response.xpath("//th[contains(text(), 'Wall')]/following-sibling::td[1]/text()").extract()
Roof = response.xpath("//th[contains(text(), 'Roof')]/following-sibling::td[1]/text()").extract()
Heating = response.xpath("//th[text()='Main heating']/following-sibling::td[1]/text()").extract_first()
CurrentScore = response.xpath('//body[1]/div[2]/main[1]/div[1]/div[3]/div[3]/svg[1]/svg[1]/text[1]/text()').re_first("[0-9+]{1,2}")
Maxscore = response.xpath('//body[1]/div[2]/main[1]/div[1]/div[3]/div[3]/svg[1]/svg[2]/text[1]/text()').re_first("[0-9+]{2}")
Expiry = response.xpath('normalize-space(//b)').extract_first()
FloorArea = response.xpath('//dt[contains(text(), "floor area")]/following-sibling::dd/text()').re_first("[0-9+]{2,3}")
Steps = response.xpath("//h3[contains(text(),'Step')]/text()").extract()
yield {
'Postcode': Postcode,
'Town': Town,
'First': First,
'Type': Type,
'Walls': Walls,
'Roof': Roof,
'Heating': Heating,
'CurrentScore': CurrentScore,
'Maxscore': Maxscore,
'Expiry': Expiry,
'FloorArea': FloorArea,
'Steps': Steps
}
I've tried googling and having a look around and can't get how to deploy this as a project to run on google functions or can I just copy the code into the console somewhere?
You can try running your spider from a script. However, a better solution would be to wrap scrapy in its own child process.
For example:
from multiprocessing import Process, Queue
from ... import MySpider
from scrapy.crawler import CrawlerProcess
from scrapy.utils.project import get_project_settings
def my_cloud_function(event, context):
def script(queue):
try:
settings = get_project_settings()
settings.setdict({
'LOG_LEVEL': 'ERROR',
'LOG_ENABLED': True,
})
process = CrawlerProcess(settings)
process.crawl(MySpider)
process.start()
queue.put(None)
except Exception as e:
queue.put(e)
queue = Queue()
# wrap the spider in a child process
main_process = Process(target=script, args=(queue,))
main_process.start() # start the process
main_process.join() # block until the spider finishes
result = queue.get() # check the process did not return an error
if result is not None:
raise result
return 'ok'
You can refer to this tutorial for more info.

An error occurred while creating a bitcoin price notification Telegram chatbot (PytzUsageWarning)

import requests
import telegram
import json
from telegram.ext import Updater, CommandHandler
import time
import sys
import pandas as pd
from apscheduler.schedulers.background import BlockingScheduler
from apscheduler.jobstores.base import JobLookupError
dt = requests.get('https://min-api.cryptocompare.com/data/price?fsym=BTC&tsyms=USD,EUR')
print(dt.text)
price_now = dt.json()
bot_token = "5668522544:AAFqNFcgd5wDBtQbJBhRayfPx9VpVPVjcyQ"
Cointimeline = telegram.Bot(token=bot_token)
updates = Cointimeline.getUpdates()
for i in updates:
print(i.message)
class Chatbot:
def __init__(self, token):
self.core = telegram.Bot(token)
self.updater = Updater(token)
self.id = 5734902861
def sendmsg(self, text):
self.core.sendmsg(chat_id=self.id, text=text)
def stop(self):
self.updater.stop()
class Alert(Chatbot):
def __init__(self):
self.token = '5668522544:AAFqNFcgd5wDBtQbJBhRayfPx9VpVPVjcyQ'
Chatbot.__init__(self, self.token)
self.updater.stop()
def controller(self, cmd, func):
self.updater.dispatcher.controller(CommandHandler(smd, func))
def start(self):
self.sendMessage('')
aps = BlockingScheduler()
def push():
dt = requests.get("https://min-api.cryptocompare.com/data/price?fsym=BTC&tsyms=USD,EUR")
ALERTBOT = Alert()
ALERTBOT.sendmsg(dt.text)a
price_now = pd.DataFrame({"USD": {list(dt.json().values())[0]}, "EUR": [list(dt.json().values())[1]]})
data = pd.read_csv("ALERTBOT.csv")
data = data.append(price_now, sort=True)
data = data.loc[:, 'USD':'EUR']
data.to_csv("ALERTBOT.csv")
aps.add_job(push, 'interval', seconds=60)
aps.start()
The error continues to occur.
PytzUsageWarning: The zone attribute is specific to pytz's interface; please migrate to a new time zone provider. For more details on how to do so, see https://pytz-deprecation-shim.readthedocs.io/en/latest/migration.html
if obj.zone == 'local':
The error is caused by the pytz library being out of date. You can solve the problem by updating the library:
pip install --upgrade pytz

Fetch datastore entity by id inside of a Dataflow transform

I have 2 datastore models:
class KindA(ndb.Model):
field_a1 = ndb.StringProperty()
field_a2 = ndb.StringProperty()
class KindB(ndb.Model):
field_b1 = ndb.StringProperty()
field_b2 = ndb.StringProperty()
key_to_kind_a = ndb.KeyProperty(KindA)
I want to query KindB and output it to a csv file, but if an entity of KindB points to an entity in KindA I want those fields to be present in the csv as well.
If I was able to use ndb inside of a transform I would setup my pipeline like this
def format(element): # element is an `entity_pb2` object of KindB
try:
obj_a_key_id = element.properties.get('key_to_kind_a', None).key_value.path[0]
except:
obj_a_key_id = None
# <<<<<<<<<<<<<<<<<<<<<<<<<<<<<< HOW DO I DO THIS
obj_a = ndb.Key(KindA, obj_a_key_id).get() if obj_a_key_id else None
return ",".join([
element.properties.get('field_b1', None).string_value,
element.properties.get('field_b2', None).string_value,
obj_a.properties.get('field_a1', None).string_value if obj_a else '',
obj_a.properties.get('field_a2', None).string_value if obj_a else '',
]
def build_pipeline(project, start_date, end_date, export_path):
query = query_pb2.Query()
query.kind.add().name = 'KindB'
filter_1 = datastore_helper.set_property_filter(query_pb2.Filter(), 'field_b1', PropertyFilter.GREATER_THAN, start_date)
filter_2 = datastore_helper.set_property_filter(query_pb2.Filter(), 'field_b1', PropertyFilter.LESS_THAN, end_date)
datastore_helper.set_composite_filter(query.filter, CompositeFilter.AND, filter_1, filter_2)
p = beam.Pipeline(options=pipeline_options)
_ = (p
| 'read from datastore' >> ReadFromDatastore(project, query, None)
| 'format' >> beam.Map(format)
| 'write' >> apache_beam.io.WriteToText(
file_path_prefix=export_path,
file_name_suffix='.csv',
header='field_b1,field_b2,field_a1,field_a2',
num_shards=1)
)
return p
I suppose I could use ReadFromDatastore to query all entities of KindA and then use CoGroupByKey to merge them, but KindA has millions of records and that would be very inefficient.
Per the reccommendations in this answer: https://stackoverflow.com/a/49130224/4458510
I created the following utils, which were inspired by the source code of
DatastoreWriteFn in apache_beam.io.gcp.datastore.v1.datastoreio
write_mutations and fetch_entities in apache_beam.io.gcp.datastore.v1.helper
import logging
import time
from socket import error as _socket_error
from apache_beam.metrics import Metrics
from apache_beam.transforms import DoFn, window
from apache_beam.utils import retry
from apache_beam.io.gcp.datastore.v1.adaptive_throttler import AdaptiveThrottler
from apache_beam.io.gcp.datastore.v1.helper import make_partition, retry_on_rpc_error, get_datastore
from apache_beam.io.gcp.datastore.v1.util import MovingSum
from apache_beam.utils.windowed_value import WindowedValue
from google.cloud.proto.datastore.v1 import datastore_pb2, query_pb2
from googledatastore.connection import Datastore, RPCError
_WRITE_BATCH_INITIAL_SIZE = 200
_WRITE_BATCH_MAX_SIZE = 500
_WRITE_BATCH_MIN_SIZE = 10
_WRITE_BATCH_TARGET_LATENCY_MS = 5000
def _fetch_keys(project_id, keys, datastore, throttler, rpc_stats_callback=None, throttle_delay=1):
req = datastore_pb2.LookupRequest()
req.project_id = project_id
for key in keys:
req.keys.add().CopyFrom(key)
#retry.with_exponential_backoff(num_retries=5, retry_filter=retry_on_rpc_error)
def run(request):
# Client-side throttling.
while throttler.throttle_request(time.time() * 1000):
logging.info("Delaying request for %ds due to previous failures", throttle_delay)
time.sleep(throttle_delay)
if rpc_stats_callback:
rpc_stats_callback(throttled_secs=throttle_delay)
try:
start_time = time.time()
response = datastore.lookup(request)
end_time = time.time()
if rpc_stats_callback:
rpc_stats_callback(successes=1)
throttler.successful_request(start_time * 1000)
commit_time_ms = int((end_time - start_time) * 1000)
return response, commit_time_ms
except (RPCError, _socket_error):
if rpc_stats_callback:
rpc_stats_callback(errors=1)
raise
return run(req)
# Copied from _DynamicBatchSizer in apache_beam.io.gcp.datastore.v1.datastoreio
class _DynamicBatchSizer(object):
"""Determines request sizes for future Datastore RPCS."""
def __init__(self):
self._commit_time_per_entity_ms = MovingSum(window_ms=120000, bucket_ms=10000)
def get_batch_size(self, now):
"""Returns the recommended size for datastore RPCs at this time."""
if not self._commit_time_per_entity_ms.has_data(now):
return _WRITE_BATCH_INITIAL_SIZE
recent_mean_latency_ms = (self._commit_time_per_entity_ms.sum(now) / self._commit_time_per_entity_ms.count(now))
return max(_WRITE_BATCH_MIN_SIZE,
min(_WRITE_BATCH_MAX_SIZE,
_WRITE_BATCH_TARGET_LATENCY_MS / max(recent_mean_latency_ms, 1)))
def report_latency(self, now, latency_ms, num_mutations):
"""Reports the latency of an RPC to Datastore.
Args:
now: double, completion time of the RPC as seconds since the epoch.
latency_ms: double, the observed latency in milliseconds for this RPC.
num_mutations: int, number of mutations contained in the RPC.
"""
self._commit_time_per_entity_ms.add(now, latency_ms / num_mutations)
class LookupKeysFn(DoFn):
"""A `DoFn` that looks up keys in the Datastore."""
def __init__(self, project_id, fixed_batch_size=None):
self._project_id = project_id
self._datastore = None
self._fixed_batch_size = fixed_batch_size
self._rpc_successes = Metrics.counter(self.__class__, "datastoreRpcSuccesses")
self._rpc_errors = Metrics.counter(self.__class__, "datastoreRpcErrors")
self._throttled_secs = Metrics.counter(self.__class__, "cumulativeThrottlingSeconds")
self._throttler = AdaptiveThrottler(window_ms=120000, bucket_ms=1000, overload_ratio=1.25)
self._elements = []
self._batch_sizer = None
self._target_batch_size = None
def _update_rpc_stats(self, successes=0, errors=0, throttled_secs=0):
"""Callback function, called by _fetch_keys()"""
self._rpc_successes.inc(successes)
self._rpc_errors.inc(errors)
self._throttled_secs.inc(throttled_secs)
def start_bundle(self):
"""(re)initialize: connection with datastore, _DynamicBatchSizer obj"""
self._elements = []
self._datastore = get_datastore(self._project_id)
if self._fixed_batch_size:
self._target_batch_size = self._fixed_batch_size
else:
self._batch_sizer = _DynamicBatchSizer()
self._target_batch_size = self._batch_sizer.get_batch_size(time.time()*1000)
def process(self, element):
"""Collect elements and process them as a batch"""
self._elements.append(element)
if len(self._elements) >= self._target_batch_size:
return self._flush_batch()
def finish_bundle(self):
"""Flush any remaining elements"""
if self._elements:
objs = self._flush_batch()
for obj in objs:
yield WindowedValue(obj, window.MAX_TIMESTAMP, [window.GlobalWindow()])
def _flush_batch(self):
"""Fetch all of the collected keys from datastore"""
response, latency_ms = _fetch_keys(
project_id=self._project_id,
keys=self._elements,
datastore=self._datastore,
throttler=self._throttler,
rpc_stats_callback=self._update_rpc_stats)
logging.info("Successfully read %d keys in %dms.", len(self._elements), latency_ms)
if not self._fixed_batch_size:
now = time.time()*1000
self._batch_sizer.report_latency(now, latency_ms, len(self._elements))
self._target_batch_size = self._batch_sizer.get_batch_size(now)
self._elements = []
return [entity_result.entity for entity_result in response.found]
class LookupEntityFieldFn(LookupKeysFn):
"""
Looks-up a field on an EntityPb2 object
Expects a EntityPb2 object as input
Outputs a tuple, where the first element is the input object and the second element is the object found during the
lookup
"""
def __init__(self, project_id, field_name, fixed_batch_size=None):
super(LookupEntityFieldFn, self).__init__(project_id=project_id, fixed_batch_size=fixed_batch_size)
self._field_name = field_name
#staticmethod
def _pb2_key_value_to_tuple(kv):
"""Converts a key_value object into a tuple, so that it can be a dictionary key"""
path = []
for p in kv.path:
path.append(p.name)
path.append(p.id)
return tuple(path)
def _flush_batch(self):
_elements = self._elements
keys_to_fetch = []
for element in self._elements:
kv = element.properties.get(self._field_name, None)
if kv and kv.key_value and kv.key_value.path:
keys_to_fetch.append(kv.key_value)
self._elements = keys_to_fetch
read_keys = super(LookupEntityFieldFn, self)._flush_batch()
_by_key = {self._pb2_key_value_to_tuple(entity.key): entity for entity in read_keys}
output_pairs = []
for input_obj in _elements:
kv = input_obj.properties.get(self._field_name, None)
output_obj = None
if kv and kv.key_value and kv.key_value.path:
output_obj = _by_key.get(self._pb2_key_value_to_tuple(kv.key_value), None)
output_pairs.append((input_obj, output_obj))
return output_pairs
The Key to this is the line response = datastore.lookup(request), where:
datastore = get_datastore(project_id) (from apache_beam.io.gcp.datastore.v1.helper.get_datastore)
request is a LookupRequest from google.cloud.proto.datastore.v1.datastore_pb2
response is LookupResponse from google.cloud.proto.datastore.v1.datastore_pb2
The rest of the above code does things like:
using a single connection to the datastore for a dofn bundle
batches keys together before performing a lookup request
throttles interactions with the datastore if requests start to fail
(honestly I don't know how critical these bits are, I just came across them when browsing the apache_beam source code)
The resulting util function LookupEntityFieldFn(project_id, field_name) is a DoFn that takes in an entity_pb2 object as input, extracts and fetches/gets the key_property that resides on the field field_name, and outputs the result as a tuple (the fetch-result is paired with the input object)
My Pipeline code then became
def format(element): # element is a tuple `entity_pb2` objects
kind_b_element, kind_a_element = element
return ",".join([
kind_b_element.properties.get('field_b1', None).string_value,
kind_b_element.properties.get('field_b2', None).string_value,
kind_a_element.properties.get('field_a1', None).string_value if kind_a_element else '',
kind_a_element.properties.get('field_a2', None).string_value if kind_a_element else '',
]
def build_pipeline(project, start_date, end_date, export_path):
query = query_pb2.Query()
query.kind.add().name = 'KindB'
filter_1 = datastore_helper.set_property_filter(query_pb2.Filter(), 'field_b1', PropertyFilter.GREATER_THAN, start_date)
filter_2 = datastore_helper.set_property_filter(query_pb2.Filter(), 'field_b1', PropertyFilter.LESS_THAN, end_date)
datastore_helper.set_composite_filter(query.filter, CompositeFilter.AND, filter_1, filter_2)
p = beam.Pipeline(options=pipeline_options)
_ = (p
| 'read from datastore' >> ReadFromDatastore(project, query, None)
| 'extract field' >> apache_beam.ParDo(LookupEntityFieldFn(project_id=project, field_name='key_to_kind_a'))
| 'format' >> beam.Map(format)
| 'write' >> apache_beam.io.WriteToText(
file_path_prefix=export_path,
file_name_suffix='.csv',
header='field_b1,field_b2,field_a1,field_a2',
num_shards=1)
)
return p

Scrapy IdentationError: expected an idented block

Trust you are doing well. Please I need your help, I´m obtaining this error but I don´t know why:
File "C:\Users\Luis\Amazon\mercado\spiders\spider.py", line 14
yield scrapy.Request("https://www.amazon.es/s/ref=sr_pg_2?rh=n%3A1951051031%2Cn%3A2424922031%2Ck%3Afebi&page=1&keywords=febi&ie=UTF8&qid=1535314254",self.parse_item)
^IndentationError: expected an indented block
# -*- coding: utf-8 -*-
import scrapy
import urllib
from mercado.items import MercadoItem
class MercadoSpider(CrawlSpider):
name = 'mercado'
item_count = 0
allowed_domain = ['https://www.amazon.es']
start_urls = ['https://www.amazon.es/s/ref=sr_pg_2rh=n%3A1951051031%2Cn%3A2424922031%2Ck%3Afebi&page=1&keywords=febi&ie=UTF8&qid=1 535314254']
def start_requests(self):
yield scrapy.Request("https://www.amazon.es/s/ref=sr_pg_2?rh=n%3A1951051031%2Cn%3A2424922031%2Ck%3Afebi&page=1&keywords=febi&ie=UTF8&qid=1535314254",self.parse_item)
for i in range(2,400):
yield scrapy.Request("https://www.amazon.es/s/ref=sr_pg_2?rh=n%3A1951051031%2Cn%3A2424922031%2Ck%3Afebi&page="+str(i)+"&keywords=febi&ie=UTF8&qid=1535314254",self.parse_item)
def parse_item(self, response):
ml_item = MercadoItem()
#info de producto
ml_item['articulo'] = response.xpath('normalize-space(//*[#id="productTitle"])').extract()
ml_item['precio'] = response.xpath('normalize-space(//*[#id="priceblock_ourprice"])').extract()
self.item_count += 1
yield ml_item
Do you know why?
I' ve added the code here to do it easily.
You have an indentation error:
# -*- coding: utf-8 -*-
import scrapy
import urllib
from mercado.items import MercadoItem
class MercadoSpider(CrawlSpider):
name = 'mercado'
item_count = 0
allowed_domain = ['https://www.amazon.es']
start_urls = ['https://www.amazon.es/s/ref=sr_pg_2rh=n%3A1951051031%2Cn%3A2424922031%2Ck%3Afebi&page=1&keywords=febi&ie=UTF8&qid=1 535314254']
def start_requests(self):
yield scrapy.Request("https://www.amazon.es/s/ref=sr_pg_2?rh=n%3A1951051031%2Cn%3A2424922031%2Ck%3Afebi&page=1&keywords=febi&ie=UTF8&qid=1535314254",self.parse_item)
for i in range(2,400):
yield scrapy.Request("https://www.amazon.es/s/ref=sr_pg_2?rh=n%3A1951051031%2Cn%3A2424922031%2Ck%3Afebi&page="+str(i)+"&keywords=febi&ie=UTF8&qid=1535314254",self.parse_item)
def parse_item(self, response):
ml_item = MercadoItem()
#info de producto
ml_item['articulo'] = response.xpath('normalize-space(//*[#id="productTitle"])').extract()
ml_item['precio'] = response.xpath('normalize-space(//*[#id="priceblock_ourprice"])').extract()
self.item_count += 1
yield ml_item
UPDATE But right now you have code (not optimal) to get pagination and parse details page. You need to add code to parse each pagination page and get detail link for each item:
def start_requests(self):
yield scrapy.Request("https://www.amazon.es/s/ref=sr_pg_2?rh=n%3A1951051031%2Cn%3A2424922031%2Ck%3Afebi&page=1&keywords=febi&ie=UTF8&qid=1535314254",self.parse_search)
for i in range(2,400):
yield scrapy.Request("https://www.amazon.es/s/ref=sr_pg_2?rh=n%3A1951051031%2Cn%3A2424922031%2Ck%3Afebi&page="+str(i)+"&keywords=febi&ie=UTF8&qid=1535314254",self.parse_search)
def parse_search(self, response):
for item_link in response.xpath('//ul[#id="s-results-list-atf"]//a[contains(#class, "s-access-detail-page")]/#href').extract():
yield scrapy.Request(item_link, self.parse_item)
def parse_item(self, response):
ml_item = MercadoItem()
#info de producto
ml_item['articulo'] = response.xpath('normalize-space(//*[#id="productTitle"])').extract()
ml_item['precio'] = response.xpath('normalize-space(//*[#id="priceblock_ourprice"])').extract()
self.item_count += 1
yield ml_item

wxpython wx.combobox save wx.StaticText view help me

import wx
import sqlite3
class Frame(wx.Frame):
def __init__(self):
wx.Frame.__init__(self, None)
self.panel = wx.Panel(self)
self.text = wx.StaticText(self.panel)
self.conn = sqlite3.connect("test.db")
self.cursor = self.conn.cursor()
self.autoRefersh()
def autoRefersh(self):
self.LoadList()
wx.CallLater(1000, self.autoRefersh)
def LoadList(self):
self.cursor.execute("SELECT *FROM CLINIC1")
for date1 in self.cursor: pass
self.staticText2_1 = wx.StaticText(self.panel, label=date1[1], style=wx.ALIGN_CENTER, pos=(100,100))
if __name__ == '__main__':
app = wx.App()
frame = Frame()
frame.Show()
app.MainLoop()
combobox data sqlite3 save in why panel show Why it looks different bug??
I do not know why this is happening.
You missed one crucial step, getting the data itself.
You are using the cursor object not the data returned by the cursor.
def LoadList(self):
self.cursor.execute("SELECT *FROM CLINIC1")
data = self.cursor.fetchall()
for date1 in data: pass
self.staticText2_1 = wx.StaticText(self.panel, label=date1[1], style=wx.ALIGN_CENTER, pos=(100,100))
AS you are "passing" in your for loop perhaps what you actually want is only a single record, in which case
data = self.cursor.fetchone()
and drop the for loop
Even better, read the tutorial
https://www.blog.pythonlibrary.org/2012/07/18/python-a-simple-step-by-step-sqlite-tutorial/
In the heading of your question you mention combobox, so I assume that you want to replace the statictext with a combobox. The following should get you started, I'll leave the wx.EVT_COMBOBOX event binding for you to add, as you will need it to do something when you select an item.
import wx
import sqlite3
class Frame(wx.Frame):
def __init__(self):
wx.Frame.__init__(self, None)
self.selected_data=[]
self.panel = wx.Panel(self)
self.combo = wx.ComboBox(self.panel,-1,choices=self.selected_data, size=(130,30))
self.conn = sqlite3.connect("test.db")
self.cursor = self.conn.cursor()
self.combo.SetValue("Choose an Item")
self.autoRefresh()
def autoRefresh(self):
self.LoadList()
def LoadList(self):
self.combo.Clear()
self.cursor.execute("SELECT * FROM CLINIC1")
data = self.cursor.fetchall()
for date1 in data:
self.selected_data.append(date1[1])
for i in self.selected_data:
self.combo.Append(i)
if __name__ == '__main__':
app = wx.App()
frame = Frame()
frame.Show()
app.MainLoop()
Edit:
It should look like this.

Resources