Marshmallow 3.10.0 ValidationError: Missing data for required field? - integration-testing

I am new to Marshmallow (3.10.0) and I am trying to understand the following errors that I am getting:
Traceback (most recent call last):
File "/opt/venv/lib/python3.7/site-packages/marshmallow/schema.py", line 779, in _run_validator
validator_func(output, partial=partial, many=many)
File "/usr/src/wazo-confd/wazo_confd/helpers/destination.py", line 351, in _validate_skill_rule_variables
['skill_rule_id'],
marshmallow.exceptions.ValidationError: Missing data for required field. When `skill_rule_variables` is defined
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/src/wazo-confd/wazo_confd/helpers/common.py", line 28, in wrapper
return func(*args, **kwargs)
File "/opt/venv/lib/python3.7/site-packages/xivo/auth_verifier.py", line 155, in wrapper
return func(*args, **kwargs)
File "/opt/venv/lib/python3.7/site-packages/xivo/auth_verifier.py", line 125, in wrapper
return func(*args, **kwargs)
File "/usr/src/wazo-confd/wazo_confd/plugins/call_filter_fallback/resource.py", line 36, in put
fallbacks = self.schema().load(request.get_json(), partial=True)
File "/opt/venv/lib/python3.7/site-packages/marshmallow/schema.py", line 728, in load
data, many=many, partial=partial, unknown=unknown, postprocess=True
File "/opt/venv/lib/python3.7/site-packages/marshmallow/schema.py", line 866, in _do_load
unknown=unknown,
File "/opt/venv/lib/python3.7/site-packages/marshmallow/schema.py", line 674, in _deserialize
index=index,
File "/opt/venv/lib/python3.7/site-packages/marshmallow/schema.py", line 496, in _call_and_store
value = getter_func(data)
File "/opt/venv/lib/python3.7/site-packages/marshmallow/schema.py", line 667, in <lambda>
val, field_name, data, **d_kwargs
File "/opt/venv/lib/python3.7/site-packages/marshmallow/fields.py", line 356, in deserialize
output = self._deserialize(value, attr, data, **kwargs)
File "/usr/src/wazo-confd/wazo_confd/helpers/destination.py", line 544, in _deserialize
return fields.Nested(schema, **self.kwargs)._deserialize(value, attr, data)
File "/opt/venv/lib/python3.7/site-packages/marshmallow/fields.py", line 611, in _deserialize
return self._load(value, data, partial=partial)
File "/opt/venv/lib/python3.7/site-packages/marshmallow/fields.py", line 594, in _load
valid_data = self.schema.load(value, unknown=self.unknown, partial=partial)
File "/opt/venv/lib/python3.7/site-packages/marshmallow/schema.py", line 728, in load
data, many=many, partial=partial, unknown=unknown, postprocess=True
File "/opt/venv/lib/python3.7/site-packages/marshmallow/schema.py", line 891, in _do_load
field_errors=field_errors,
File "/opt/venv/lib/python3.7/site-packages/marshmallow/schema.py", line 1194, in _invoke_schema_validators
partial=partial,
File "/opt/venv/lib/python3.7/site-packages/marshmallow/schema.py", line 781, in _run_validator
error_store.store_error(err.messages, err.field_name, index=index)
File "/opt/venv/lib/python3.7/site-packages/marshmallow/error_store.py", line 22, in store_error
messages = {field_name: messages}
TypeError: unhashable type: 'list'
The first one seems to occur in this file (destination.py):
# Copyright 2016-2022 The Wazo Authors (see the AUTHORS file)
# SPDX-License-Identifier: GPL-3.0-or-later
import json
from marshmallow import (
EXCLUDE,
Schema,
fields,
pre_dump,
post_load,
post_dump,
validates_schema,
validates,
)
from marshmallow.exceptions import ValidationError
from marshmallow.validate import Length, OneOf, Regexp, Predicate, Range
from xivo_dao.helpers import errors
from xivo_dao.helpers.exception import NotFoundError
from xivo_dao.resources.application import dao as application_dao
from xivo_dao.resources.conference import dao as conference_dao
from xivo_dao.resources.group import dao as group_dao
from xivo_dao.resources.ivr import dao as ivr_dao
from xivo_dao.resources.moh import dao as moh_dao
from xivo_dao.resources.outcall import dao as outcall_dao
from xivo_dao.resources.queue import dao as queue_dao
from xivo_dao.resources.skill_rule import dao as skill_rule_dao
from xivo_dao.resources.switchboard import dao as switchboard_dao
from xivo_dao.resources.user import dao as user_dao
from xivo_dao.resources.voicemail import dao as voicemail_dao
from wazo_confd.helpers.mallow import StrictBoolean
from wazo_confd.helpers.validator import GetResource, Validator
import logging
logger = logging.getLogger(__name__)
COMMAND_REGEX = r'^(?!(try)?system\()[a-zA-Z]{3,}\((.*)\)$'
CONTEXT_REGEX = r'^[a-zA-Z0-9_-]{1,39}$'
EXTEN_REGEX = r'^[0-9*#]{1,255}$'
SKILL_RULE_VARIABLE_REGEX = r'^[^[;\|]+$'
class BaseDestinationSchema(Schema):
class Meta:
unknown = EXCLUDE
type = fields.String(
validate=OneOf(
[
'application',
'conference',
'custom',
'extension',
'group',
'hangup',
'ivr',
'none',
'outcall',
'queue',
'sound',
'switchboard',
'user',
'voicemail',
]
),
required=True,
)
#post_dump
def convert_type_to_user(self, data, **kwargs):
if data['type'] == 'endcall':
data['type'] = 'hangup'
return data
#post_load
def convert_type_to_database(self, data, **kwargs):
if data['type'] == 'hangup':
data['type'] = 'endcall'
return data
class ApplicationDestinationSchema(BaseDestinationSchema):
application = fields.String(
validate=OneOf(
['callback_disa', 'custom', 'directory', 'disa', 'fax_to_mail', 'voicemail']
),
attribute='subtype',
required=True,
)
#post_dump
def convert_application_to_user(self, data, **kwargs):
if data['application'] == 'callbackdisa':
data['application'] = 'callback_disa'
elif data['application'] == 'faxtomail':
data['application'] = 'fax_to_mail'
elif data['application'] == 'voicemailmain':
data['application'] = 'voicemail'
return data
#post_load
def convert_application_to_database(self, data, **kwargs):
if data['subtype'] == 'callback_disa':
data['subtype'] = 'callbackdisa'
elif data['subtype'] == 'fax_to_mail':
data['subtype'] = 'faxtomail'
elif data['subtype'] == 'voicemail':
data['subtype'] = 'voicemailmain'
return data
class CallBackDISADestinationSchema(ApplicationDestinationSchema):
pin = fields.String(
validate=(Predicate('isdigit'), Length(max=40)),
allow_none=True,
attribute='actionarg1',
)
context = fields.String(
validate=Regexp(CONTEXT_REGEX), attribute='actionarg2', required=True
)
class CustomApplicationDestinationSchema(ApplicationDestinationSchema):
application_uuid = fields.UUID(attribute='actionarg1', required=True)
_application = fields.Nested(
'ApplicationSchema', only=['name'], attribute='application', dump_only=True
)
#post_dump
def make_application_fields_flat(self, data, **kwargs):
if data.get('_application'):
data['application_name'] = data['_application']['name']
data.pop('_application', None)
return data
class DISADestinationSchema(ApplicationDestinationSchema):
pin = fields.String(
validate=(Predicate('isdigit'), Length(max=40)),
allow_none=True,
attribute='actionarg1',
)
context = fields.String(
validate=Regexp(CONTEXT_REGEX), attribute='actionarg2', required=True
)
class DirectoryDestinationSchema(ApplicationDestinationSchema):
context = fields.String(
validate=Regexp(CONTEXT_REGEX), attribute='actionarg1', required=True
)
class FaxToMailDestinationSchema(ApplicationDestinationSchema):
email = fields.Email(validate=Length(max=80), attribute='actionarg1', required=True)
class VoicemailMainDestinationSchema(ApplicationDestinationSchema):
context = fields.String(
validate=Regexp(CONTEXT_REGEX), attribute='actionarg1', required=True
)
class ConferenceDestinationSchema(BaseDestinationSchema):
conference_id = fields.Integer(attribute='actionarg1', required=True)
conference = fields.Nested('ConferenceSchema', only=['name'], dump_only=True)
#post_dump
def make_conference_fields_flat(self, data, **kwargs):
if data.get('conference'):
data['conference_name'] = data['conference']['name']
data.pop('conference', None)
return data
class CustomDestinationSchema(BaseDestinationSchema):
command = fields.String(
validate=(Regexp(COMMAND_REGEX), Length(max=255)),
attribute='actionarg1',
required=True,
)
class ExtensionDestinationSchema(BaseDestinationSchema):
exten = fields.String(
validate=Regexp(EXTEN_REGEX), attribute='actionarg1', required=True
)
context = fields.String(
validate=Regexp(CONTEXT_REGEX), attribute='actionarg2', required=True
)
class GroupDestinationSchema(BaseDestinationSchema):
group_id = fields.Integer(attribute='actionarg1', required=True)
ring_time = fields.Float(
validate=Range(min=0), attribute='actionarg2', allow_none=True
)
group = fields.Nested('GroupSchema', only=['label', 'name'], dump_only=True)
#post_dump
def make_group_fields_flat(self, data, **kwargs):
if data.get('group'):
# TODO(pc-m): Label was added in 21.04 group_name should be remove when we remove
# the compatibility logic in group schema
data['group_name'] = data['group']['name']
data['group_label'] = data['group']['label']
data.pop('group', None)
return data
class HangupDestinationSchema(BaseDestinationSchema):
cause = fields.String(
validate=OneOf(['busy', 'congestion', 'normal']),
attribute='subtype',
missing='normal',
required=False,
)
#post_dump
def convert_cause_to_user(self, data, **kwargs):
if data['cause'] == 'hangup':
data['cause'] = 'normal'
return data
#post_load
def convert_cause_to_database(self, data, **kwargs):
if data['subtype'] == 'normal':
data['subtype'] = 'hangup'
return data
class BusyDestinationSchema(HangupDestinationSchema):
timeout = fields.Float(
attribute='actionarg1', validate=Range(min=0), allow_none=True
)
class CongestionDestinationSchema(HangupDestinationSchema):
timeout = fields.Float(
attribute='actionarg1', validate=Range(min=0), allow_none=True
)
class IVRDestinationSchema(BaseDestinationSchema):
ivr_id = fields.Integer(attribute='actionarg1', required=True)
ivr = fields.Nested('IvrSchema', only=['name'], dump_only=True)
#post_dump
def make_ivr_fields_flat(self, data, **kwargs):
if data.get('ivr'):
data['ivr_name'] = data['ivr']['name']
data.pop('ivr', None)
return data
class NormalDestinationSchema(HangupDestinationSchema):
pass
class NoneDestinationSchema(BaseDestinationSchema):
pass
class OutcallDestinationSchema(BaseDestinationSchema):
outcall_id = fields.Integer(attribute='actionarg1', required=True)
exten = fields.String(
validate=(Predicate('isdigit'), Length(max=255)),
attribute='actionarg2',
required=True,
)
class QueueDestinationSchema(BaseDestinationSchema):
queue_id = fields.Integer(attribute='actionarg1', required=True)
ring_time = fields.Float(validate=Range(min=0), allow_none=True)
skill_rule_id = fields.Integer(allow_none=True)
skill_rule_variables = fields.Dict(allow_none=True)
queue = fields.Nested('QueueSchema', only=['label'], dump_only=True)
#pre_dump
def separate_action(self, data, **kwargs):
options = data.actionarg2.split(';') if data.actionarg2 else []
data.ring_time = None
data.skill_rule_id = None
data.skill_rule_variables = None
_skill_rule_variables = None
if len(options) == 1:
data.ring_time = options[0] or None
elif len(options) == 2: # id is always bound with variables
data.skill_rule_id = options[0]
_skill_rule_variables = options[1] or None
elif len(options) == 3:
data.ring_time = options[0]
data.skill_rule_id = options[1]
_skill_rule_variables = options[2] or None
if _skill_rule_variables:
_skill_rule_variables = _skill_rule_variables.replace(
'|', ','
) # dialplan interpret comma ...
data.skill_rule_variables = json.loads(_skill_rule_variables)
return data
#post_load
def merge_action(self, data, **kwargs):
ring_time = data.pop('ring_time', None)
skill_rule_id = data.pop('skill_rule_id', None)
skill_rule_variables = data.pop('skill_rule_variables', None)
skill_rule_variables_str = (
json.dumps(skill_rule_variables).replace(',', '|')
if skill_rule_variables
else ''
)
data[
'actionarg2'
] = '{ring_time}{sep1}{skill_rule_id}{sep2}{skill_rule_variables}'.format(
ring_time=ring_time or '',
sep1=';' if ring_time and skill_rule_id else '',
skill_rule_id=skill_rule_id or '',
sep2=';' if skill_rule_id else '',
skill_rule_variables=skill_rule_variables_str,
)
return data
#post_dump
def make_queue_fields_flat(self, data, **kwargs):
if data.get('queue'):
data['queue_label'] = data['queue']['label']
data.pop('queue', None)
return data
#validates_schema
def _validate_skill_rule_variables(self, data, **kwargs):
logger.critical('------------------------------------------')
logger.critical(data)
logger.critical(kwargs)
logger.critical('------------------------------------------')
if not data.get('skill_rule_variables'):
return
if not data.get('skill_rule_id'):
raise ValidationError(
'Missing data for required field. When `skill_rule_variables` is defined',
['skill_rule_id'],
)
#validates('skill_rule_variables')
def _validate_skill_rule_variables_value(self, variables):
# with marshmallow 3.0 we can set this validator on the field declaration
if not variables:
return
validator = Regexp(SKILL_RULE_VARIABLE_REGEX)
for key, value in variables.items():
validator(key)
validator(value)
class SoundDestinationSchema(BaseDestinationSchema):
filename = fields.String(
validate=Length(max=255), attribute='actionarg1', required=True
)
skip = StrictBoolean()
no_answer = StrictBoolean()
#pre_dump
def separate_action(self, data, **kwargs):
options = data.actionarg2 if data.actionarg2 else ''
data.skip = True if 'skip' in options else False
data.no_answer = True if 'noanswer' in options else False
return data
#post_load
def merge_action(self, data, **kwargs):
data['actionarg2'] = '{skip}{noanswer}'.format(
skip='skip' if data.pop('skip', False) else '',
noanswer='noanswer' if data.pop('no_answer', False) else '',
)
return data
class SwitchboardDestinationSchema(BaseDestinationSchema):
switchboard_uuid = fields.UUID(attribute='actionarg1', required=True)
ring_time = fields.Float(
validate=Range(min=0), attribute='actionarg2', allow_none=True
)
switchboard = fields.Nested('SwitchboardSchema', only=['name'], dump_only=True)
#post_dump
def make_switchboard_fields_flat(self, data, **kwargs):
if data.get('switchboard'):
data['switchboard_name'] = data['switchboard']['name']
data.pop('switchboard', None)
return data
class UserDestinationSchema(BaseDestinationSchema):
user_id = fields.Integer(attribute='actionarg1', required=True)
ring_time = fields.Float(validate=Range(min=0), allow_none=True)
moh_uuid = fields.UUID(allow_none=True)
user = fields.Nested('UserSchema', only=['firstname', 'lastname'], dump_only=True)
#post_dump
def make_user_fields_flat(self, data, **kwargs):
if data.get('user'):
data['user_firstname'] = data['user']['firstname']
data['user_lastname'] = data['user']['lastname']
data.pop('user', None)
return data
#pre_dump
def separate_action(self, data, **kwargs):
options = data.actionarg2.split(';') if data.actionarg2 else []
data.ring_time = None
data.moh_uuid = None
if len(options) > 0:
data.ring_time = options[0] or None
if len(options) > 1: # id is always bound with variables
data.moh_uuid = options[1]
return data
#post_load
def merge_action(self, data, **kwargs):
ring_time = data.pop('ring_time', None)
moh_uuid = data.pop('moh_uuid', None)
actionarg2 = ''
if ring_time is not None:
actionarg2 += str(ring_time)
if moh_uuid is not None:
actionarg2 += ';{}'.format(moh_uuid)
data['actionarg2'] = actionarg2
return data
class VoicemailDestinationSchema(BaseDestinationSchema):
voicemail_id = fields.Integer(attribute='actionarg1', required=True)
skip_instructions = StrictBoolean()
greeting = fields.String(validate=OneOf(['busy', 'unavailable']), allow_none=True)
voicemail = fields.Nested('VoicemailSchema', only=['name'], dump_only=True)
#pre_dump
def separate_action(self, data, **kwargs):
options = data.actionarg2 if data.actionarg2 else ''
data.skip_instructions = True if 's' in options else False
data.greeting = (
'busy' if 'b' in options else 'unavailable' if 'u' in options else None
)
return data
#post_load
def merge_action(self, data, **kwargs):
greeting = data.pop('greeting', None)
data['actionarg2'] = '{}{}'.format(
'b' if greeting == 'busy' else 'u' if greeting == 'unavailable' else '',
's' if data.pop('skip_instructions', False) else '',
)
return data
#post_dump
def make_voicemail_fields_flat(self, data, **kwargs):
if data.get('voicemail'):
data['voicemail_name'] = data['voicemail']['name']
data.pop('voicemail', None)
return data
class DestinationField(fields.Nested):
application_schemas = {
'callback_disa': CallBackDISADestinationSchema,
'callbackdisa': CallBackDISADestinationSchema,
'custom': CustomApplicationDestinationSchema,
'directory': DirectoryDestinationSchema,
'disa': DISADestinationSchema,
'fax_to_mail': FaxToMailDestinationSchema,
'faxtomail': FaxToMailDestinationSchema,
'voicemail': VoicemailMainDestinationSchema,
'voicemailmain': VoicemailMainDestinationSchema,
}
hangup_schemas = {
'busy': BusyDestinationSchema,
'congestion': CongestionDestinationSchema,
'normal': NormalDestinationSchema,
'hangup': NormalDestinationSchema,
}
destination_schemas = {
'application': ApplicationDestinationSchema,
'conference': ConferenceDestinationSchema,
'custom': CustomDestinationSchema,
'extension': ExtensionDestinationSchema,
'group': GroupDestinationSchema,
'hangup': HangupDestinationSchema,
'endcall': HangupDestinationSchema,
'ivr': IVRDestinationSchema,
'none': NoneDestinationSchema,
'outcall': OutcallDestinationSchema,
'queue': QueueDestinationSchema,
'sound': SoundDestinationSchema,
'switchboard': SwitchboardDestinationSchema,
'user': UserDestinationSchema,
'voicemail': VoicemailDestinationSchema,
}
def __init__(self, **kwargs):
# FIXME(sileht): I'm not sure validation works here...
# This of dynamic nesterd stuffs should not done like this.
self.kwargs = kwargs
self.kwargs["unknown"] = EXCLUDE
super().__init__(BaseDestinationSchema, **self.kwargs)
def _deserialize(self, value, attr, data, **kwargs):
self.schema.context = self.context
base = super()._deserialize(value, attr, data, **kwargs)
schema = self.destination_schemas[base['type']]
if base['type'] == 'application':
base = fields.Nested(schema, **self.kwargs)._deserialize(value, attr, data)
schema = self.application_schemas[base['subtype']]
if base['type'] == 'endcall':
base = fields.Nested(schema, **self.kwargs)._deserialize(value, attr, data)
schema = self.hangup_schemas[base['subtype']]
return fields.Nested(schema, **self.kwargs)._deserialize(value, attr, data)
def _serialize(self, nested_obj, attr, obj):
base = super()._serialize(nested_obj, attr, obj)
if not base:
return base
schema = self.destination_schemas[base['type']]
if base['type'] == 'application':
base = fields.Nested(schema, **self.kwargs)._serialize(
nested_obj, attr, obj
)
schema = self.application_schemas[base['application']]
if base['type'] == 'hangup':
base = fields.Nested(schema, **self.kwargs)._serialize(
nested_obj, attr, obj
)
schema = self.hangup_schemas[base['cause']]
return fields.Nested(schema, **self.kwargs)._serialize(nested_obj, attr, obj)
class OptionalGetSkillRuleFromActionArg2Resource(Validator):
def __init__(self, dao_get):
self.dao_get = dao_get
def validate(self, model):
destination = QueueDestinationSchema().dump(model)
skill_rule_id = destination.get('skill_rule_id', None)
if not skill_rule_id:
return
try:
self.dao_get(skill_rule_id)
except NotFoundError:
metadata = {'skill_rule_id': skill_rule_id}
raise errors.param_not_found('skill_rule_id', 'SkillRule', **metadata)
class GetMohFromActionArg2Resource(Validator):
def __init__(self, dao_get):
self._dao_get = dao_get
def validate(self, model):
destination = UserDestinationSchema().dump(model)
moh_uuid = destination.get('moh_uuid', None)
if not moh_uuid:
return
try:
self._dao_get(moh_uuid)
except NotFoundError:
metadata = {'moh_uuid': moh_uuid}
raise errors.param_not_found('moh_uuid', 'MOH', **metadata)
class DestinationValidator:
_VALIDATORS = {
'application:callbackdisa': [],
'application:custom': [
GetResource('actionarg1', application_dao.get, 'Application')
],
'application:directory': [],
'application:disa': [],
'application:faxtomail': [],
'application:voicemailmain': [],
'conference': [GetResource('actionarg1', conference_dao.get, 'Conference')],
'custom': [],
'extension': [],
'group': [GetResource('actionarg1', group_dao.get, 'Group')],
'endcall:busy': [],
'endcall:congestion': [],
'endcall:hangup': [],
'ivr': [GetResource('actionarg1', ivr_dao.get, 'IVR')],
'none': [],
'outcall': [GetResource('actionarg1', outcall_dao.get, 'Outcall')],
'queue': [
GetResource('actionarg1', queue_dao.get, 'Queue'),
OptionalGetSkillRuleFromActionArg2Resource(skill_rule_dao.get),
],
'sound': [],
'switchboard': [GetResource('actionarg1', switchboard_dao.get, 'Switchboard')],
'user': [
GetResource('actionarg1', user_dao.get, 'User'),
GetMohFromActionArg2Resource(moh_dao.get),
],
'voicemail': [GetResource('actionarg1', voicemail_dao.get, 'Voicemail')],
}
def validate(self, destination):
for validator in self._VALIDATORS[destination.action]:
validator.validate(destination)
As for the second one, it seems to be caused within this file (resource.py):
# Copyright 2018-2020 The Wazo Authors (see the AUTHORS file)
# SPDX-License-Identifier: GPL-3.0-or-later
from flask import request
from wazo_confd.auth import required_acl
from wazo_confd.helpers.restful import ConfdResource
from .schema import CallFilterFallbackSchema
class CallFilterFallbackList(ConfdResource):
schema = CallFilterFallbackSchema
has_tenant_uuid = True
def __init__(self, service, call_filter_dao):
super().__init__()
self.service = service
self.call_filter_dao = call_filter_dao
#required_acl('confd.callfilters.{call_filter_id}.fallbacks.read')
def get(self, call_filter_id):
tenant_uuids = self._build_tenant_list({'recurse': True})
call_filter = self.call_filter_dao.get(
call_filter_id, tenant_uuids=tenant_uuids
)
return self.schema().dump(call_filter.fallbacks)
#required_acl('confd.callfilters.{call_filter_id}.fallbacks.update')
def put(self, call_filter_id):
tenant_uuids = self._build_tenant_list({'recurse': True})
call_filter = self.call_filter_dao.get(
call_filter_id, tenant_uuids=tenant_uuids
)
fallbacks = self.schema().load(request.get_json())
self.service.edit(call_filter, fallbacks)
return '', 204
I am not sure what causes these errors; tried to find online solutions but I could not fix the issue.

Related

Scrapy: Limiting the number of next page that are scraped. Unfortunately, the DEPTH_LIMIT custom setting doesn't work

I have build a simple amazon scraper to download listings of the products. However, I am not sure how I can limit the number of next pages that are crawled. Ideally, I don't want the spider to crawl more than 10 pages for each main page that it starts with. Some of the URLs in fact only have 2 pages.
Here is my code:
import scrapy
from scrapy.crawler import CrawlerProcess
from scraper_api import ScraperAPIClient
#Error Management Modules
from scrapy.spidermiddlewares.httperror import HttpError
from twisted.internet.error import DNSLookupError
from twisted.internet.error import TimeoutError, TCPTimedOutError
from .datatransformation import ErrorFileManagement
# Importing all defined attributes and items to be scraped!
from ..items import AmazonListingItems
from ..attributes import *
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy.sql.expression import column
class AmazonListings(scrapy.Spider):
name = "amazonlistings"
def start_requests(self):
error = ErrorManager()
client = ScraperAPIClient('50903e1bf8db5418a25334f3e3ed7c74')
db = create_engine('postgresql://postgres:Maisha123#localhost:5432')
urls = db.execute('select category_url from scrapycategory')
df = pd.DataFrame(urls.fetchall())
urls = df.values.tolist()
for url in urls:
yield scrapy.Request(client.scrapyGet(url=url[0]), callback=self.parse, errback=error.error_handler, dont_filter=True)
custom_settings = {
'DEPTH_LIMIT' : 3,
'DOWNLOAD_DELAYED': 5
}
def parse(self, response):
items = AmazonListingItems()
ap = AttributeParser()
error = ErrorManager()
client = ScraperAPIClient('50903e1bf8db5418a25334f3e3ed7c74')
itemlist = ap.itemlist(response)
if itemlist:
for item in itemlist:
items['mainurl'] = response.url
items['producturl'] = ap.producturl(item)
items['productname'] = ap.productname(item)
items['price'] = ap.price(item)
items['ratings'] = ap.ratings(item)
items['reviews'] = ap.reviews(item)
items['heroimg'] = ap.heroimg(item)
items['badge'] = ap.badge(item)
yield items
next_page = ap.next_page(response)
if next_page:
dom = 'www.amazon.com'
if dom in next_page:
request = scrapy.Request(client.scrapyGet(next_page), callback=self.parse,errback=error.error_handler)
yield request
else:
next_page_url = 'https://www.amazon.com' + next_page
request = scrapy.Request(client.scrapyGet(next_page_url), callback=self.parse,errback=error.error_handler)
yield request
else:
error.error_handler(response, itemlist=False)
#All Attribute Parser
class AttributeParser:
def itemlist(self, response):
itemlist = []
itemlist.append(response.css('.zg-item'))
itemlist.append(response.css('.s-asin .sg-col-inner'))
if itemlist:
for item in itemlist:
if item:
return item
def producturl(self, response):
for urls in AmazonListing_producturl:
value = response.css(urls).extract()
if value:
return value
def productname(self, response):
for productname in AmazonListing_productname:
value = response.css(productname).extract()
if value:
return value
def price(self, response):
for price in AmazonListing_price:
value = response.css(price).extract()
if value:
return value
def ratings(self, response):
for ratings in AmazonListing_ratings:
value = response.css(ratings).extract()
if value:
return value
def reviews(self, response):
for reviews in AmazonListing_reviews:
value = response.css(reviews).extract()
if value:
return value
def heroimg(self, response):
for heroimg in AmazonListing_heroimg:
value = response.css(heroimg).extract()
if value:
return value
def badge(self, response):
for badge in AmazonListing_badge:
value = response.css(badge).extract()
if value:
return value
def next_page(self,response):
for nxtpg in AmazonListing_nextpage:
value = response.css(nxtpg).get()
if value:
return value
else:
return None
class ErrorManager:
def error_handler(self, failure, itemlist=True):
er = ErrorFileManagement()
if itemlist == False:
response = failure
failure_record = {
'request_url': response.url,
'request_url': response.request.url,
'status': response.status,
'ip_address': response.ip_address,
'headers': response.headers,
'response.body': response.body,
}
er.addError(failure_record)
elif failure.check(HttpError):
response = failure.value.response
failure_record = {
'request_url': response.request.url,
'response_url': response.url,
'status': response.status,
'ip_address': response.ip_address,
'headers': response.headers,
'response_body': response.body,
}
er.addError(failure_record)
elif failure.check(DNSLookupError):
response = failure.request
failure_record = {
'request_url': response.request.url,
'response_url': response.url,
'status': response.status,
'ip_address': response.ip_address,
'headers': response.headers,
'response_body': response.body,
}
er.addError(failure)
elif failure.check(TimeoutError, TCPTimedOutError):
response = failure.request
failure_record = {
'request_url': response.request.url,
'response_url': response.url,
'status': response.status,
'ip_address': response.ip_address,
'headers': response.headers,
'response_body': response.body,
}
er.addError(failure_record)
elif failure.status == 200:
response = failure
failure_record = {
'request_url': response.request.url,
'response_url': response.url,
'status': response.status,
'ip_address': response.ip_address,
'headers': response.headers,
'response_body': response.body,
}
er.addError(failure_record)
else:
response = failure
failure_record = {
'request_url': response.request.url,
'response_url': response.url,
'status': response.status,
'ip_address': response.ip_address,
'headers': response.headers,
'response_body': response.body,
}
er.addError(failure_record)
process = CrawlerProcess(settings={
'FEEDS': {
'/mnt/d/dev/dsiqscraper/amzlistings.csv': {'format':'csv'},
},
})
process.crawl(AmazonListings)
process.start()
custom_settings supposed to be a class attribute.
Like this:
class AmazonListings(scrapy.Spider):
name = "amazonlistings"
custom_settings = {
'DEPTH_LIMIT' : 3,
'DOWNLOAD_DELAYED': 5
}
def start_requests(self):
error = ErrorManager()
client = ScraperAPIClient('50903e1bf8db5418a25334f3e3ed7c74')
db = create_engine('postgresql://postgres:Maisha123#localhost:5432')
urls = db.execute('select category_url from scrapycategory')
df = pd.DataFrame(urls.fetchall())
urls = df.values.tolist()
for url in urls:
yield scrapy.Request(client.scrapyGet(url=url[0]), callback=self.parse, errback=error.error_handler, dont_filter=True)
def parse...........

Airflow branch errors with object is not iterable

I'm trying to get BranchPythonOperator working but I have the following error:
'BigQueryInsertJobOperator' object is not iterable
Here is my Branch Operator:
branching = BranchPythonOperator(
task_id='branching',
python_callable=return_branch,
provide_context=True)
Here is my Python Callable:
def return_branch(ds, **kwargs):
execution_year = kwargs['execution_date'].strftime("%Y")
type = dataset_metadata[f'{execution_year}']['var']
if type == 'foo':
return x
return y
x and y are BigQueryInsertJobOperator:
x = BigQueryInsertJobOperator(
task_id='x',
configuration={
"query": {
"query": "{% include 'q.sql' %}",
"use_legacy_sql": False
}
},
dag=dag)
I'd like to refer to this answer. Your method, return_branch, shouldn't return the operator. It must return the task_id of your operator. You'll get something like this:
def return_branch(ds, **kwargs):
next_task_id = "a" # <some kind of logic>
return next_task_id
branching = BranchPythonOperator(
task_id="pick_query",
python_callable=return_branch,
provide_context=True,
)
option_1 = DummyOperator(task_id="a")
option_2 = DummyOperator(task_id="b")
branching >> [option_1, option_2]

Why do I get an error "__init__() missing 1 required positional argument " when my code runs proper?

The error is only apparent in a competitive code website - "leetcode" when I submit my solution, but it isn't prevalent when I run the code in the online text-editor or on my local machine.
class Solution:
def __init__(self, x):
"""
:type x: str
:rtype: bool
"""
self.x = x
def convert_list(self):
# my_list = list(self.x)
self.my_list = list(self.x)
return self.my_list
def store_list(self):
my_list = list(self.x)
list_length = len(my_list)
new_new_list = []
for i in range(0,list_length):
new_list = my_list.pop()
new_new_list.append(new_list)
return new_new_list
def isPalindrome(self):
if self.convert_list() == self.store_list():
return 'true'.lower()
else:
return 'false'.lower()
X = Solution(input())
# print(X.store_list())
# print(X.convert_list())
print(X.isPalindrome())
Did some indentation and is working fine
class Solution:
def __init__(self, x):
"""
:type x: str
:rtype: bool
"""
self.x = x
def convert_list(self):
# my_list = list(self.x)
self.my_list = list(self.x)
return self.my_list
def store_list(self):
my_list = list(self.x)
list_length = len(my_list)
new_new_list = []
for i in range(0,list_length):
new_list = my_list.pop()
new_new_list.append(new_list)
return new_new_list
def isPalindrome(self):
if self.convert_list() == self.store_list():
return 'true'.lower()
else:
return 'false'.lower()
X = Solution(input())
# print(X.store_list())
# print(X.convert_list())
print(X.isPalindrome())

Wxpython grid AppendRows

The following code is used to generate a table that an row can be added by a button, but only the data of the last row is eliminated after running.
import wx, wx.grid
class GridData(wx.grid.PyGridTableBase):
_cols = "a b c".split()
_data = [
"1 2 3".split(),
"4 5 6".split(),
"7 8 9".split()
]
_highlighted = set()
def GetColLabelValue(self, col):
return self._cols[col]
def GetNumberRows(self):
return len(self._data)
def GetNumberCols(self):
return len(self._cols)
def GetValue(self, row, col):
return self._data[row][col]
def SetValue(self, row, col, val):
self._data[row][col] = val
def AppendRows(self, *args):
msg = wx.grid.GridTableMessage(self,
wx.grid.GRIDTABLE_NOTIFY_ROWS_APPENDED,
)
self.GetView().ProcessTableMessage(msg)
return True
# self.GetView().EndBatch()
# msg = wx.grid.GridTableMessage(self, wx.grid.GRIDTABLE_REQUEST_VIEW_GET_VALUES)
# self.GetView().ProcessTableMessage(msg)
def GetAttr(self, row, col, kind):
attr = wx.grid.GridCellAttr()
attr.SetBackgroundColour(wx.GREEN if row in self._highlighted else wx.WHITE)
return attr
def set_value(self, row, col, val):
self._highlighted.add(row)
self.SetValue(row, col, val)
class Test(wx.Frame):#main frame
def __init__(self):
wx.Frame.__init__(self, None)
self.data = GridData()
self.grid = wx.grid.Grid(self)
self.grid.SetTable(self.data)
btn = wx.Button(self, label="set a2 to x")
btn.Bind(wx.EVT_BUTTON, self.OnTest)
self.Sizer = wx.BoxSizer(wx.VERTICAL)
self.Sizer.Add(self.grid, 1, wx.EXPAND)
self.Sizer.Add(btn, 0, wx.EXPAND)
def OnTest(self, event):
self.grid.AppendRows(numRows=3)
#self.data.set_value(1, 0, "x")
self.grid.Refresh()
app = wx.PySimpleApp()
app.TopWindow = Test()
app.TopWindow.Show()
app.MainLoop()
There is no error report,and the expectation can't be reached.
The following code is used to generate a table that can be added by a button, but only the data of the last row can be eliminated after running.

Load two columns at start in QColumnView

Is there any way to load more than one column at start in QColumnView?
I tried simulating the click on the desired index in the tree view. Though the click event is received it doesn't load the second column. Tried calling the createColumn as well with the index. But both approaches didn't work.
from PyQt4 import QtCore, QtGui
import os
try:
_fromUtf8 = QtCore.QString.fromUtf8
except AttributeError:
def _fromUtf8(s):
return s
try:
_encoding = QtGui.QApplication.UnicodeUTF8
def _translate(context, text, disambig):
return QtGui.QApplication.translate(context, text, disambig, _encoding)
except AttributeError:
def _translate(context, text, disambig):
return QtGui.QApplication.translate(context, text, disambig)
class MyModel(QtGui.QFileSystemModel):
def __init__(self):
super().__init__()
self.checkedIndexes = {}
self.parentChecked=False
def flags(self,index):
flags=super().flags(index)|QtCore.Qt.ItemIsUserCheckable
return flags
def checkState(self, index):
if index in self.checkedIndexes:
return self.checkedIndexes[index]
else:
return QtCore.Qt.Checked
def data(self, index, role=QtCore.Qt.DisplayRole):
if role == QtCore.Qt.CheckStateRole:
if index.column() == 0:
return self.checkState(index)
else:
return super().data(index, role)
def setData(self, index, value, role):
if (role == QtCore.Qt.CheckStateRole and index.column() == 0):
self.checkedIndexes[index] = value
self.dataChanged.emit(index,index)
return True
return super().setData(index, value, role)
def hasChildren(self,index):
hasChildren=super().hasChildren(index)
path=super().filePath(index)
dirIter=QtCore.QDirIterator(path,QtCore.QDir.AllDirs|QtCore.QDir.NoDotAndDotDot|QtCore.QDir.NoSymLinks)
if dirIter.hasNext():
return True
else:
return False
return hasChildren
class columnView(QtGui.QDialog):
def __init__(self,parent=None):
super().__init__(parent)
self.ui = Ui_Dialog()
self.ui.setupUi(self)
self.model=MyModel()
self.model.setFilter(QtCore.QDir.AllDirs|QtCore.QDir.NoDotAndDotDot|QtCore.QDir.NoSymLinks)
path=os.path.expanduser("~")
self.model.setRootPath(path)
self.ui.columnView.setModel(self.model)
#print("path=",path)
self.ui.columnView.setRootIndex(self.model.index(path))
self.ui.columnView.updatePreviewWidget.connect(self.closePreview)
self.show()
openIndex=self.model.index(os.path.join(path,"Documents"))
self.ui.columnView.createColumn(openIndex)
#QtCore.QMetaObject.invokeMethod(self.ui.columnView, "clicked", QtCore.Qt.QueuedConnection, QtCore.Q_ARG(QtCore.QModelIndex, openIndex))
self.ui.columnView.clicked.connect(self.rowClicked)
self.ui.closePushButton.clicked.connect(self.close)
def rowClicked(self,index):
print("row clicked=",self.model.filePath(index))
def closePreview(self,index):
self.ui.columnView.setPreviewWidget(None)
class Ui_Dialog(object):
def setupUi(self, Dialog):
Dialog.setObjectName(_fromUtf8("Dialog"))
Dialog.resize(596, 389)
self.verticalLayout = QtGui.QVBoxLayout(Dialog)
self.verticalLayout.setObjectName(_fromUtf8("verticalLayout"))
self.columnView = QtGui.QColumnView(Dialog)
self.columnView.setObjectName(_fromUtf8("columnView"))
self.verticalLayout.addWidget(self.columnView)
self.horizontalLayout = QtGui.QHBoxLayout()
self.horizontalLayout.setObjectName(_fromUtf8("horizontalLayout"))
spacerItem = QtGui.QSpacerItem(40, 20, QtGui.QSizePolicy.Expanding, QtGui.QSizePolicy.Minimum)
self.horizontalLayout.addItem(spacerItem)
self.closePushButton = QtGui.QPushButton(Dialog)
self.closePushButton.setObjectName(_fromUtf8("closePushButton"))
self.horizontalLayout.addWidget(self.closePushButton)
self.verticalLayout.addLayout(self.horizontalLayout)
self.retranslateUi(Dialog)
QtCore.QMetaObject.connectSlotsByName(Dialog)
def retranslateUi(self, Dialog):
Dialog.setWindowTitle(_translate("Dialog", "Dialog", None))
self.closePushButton.setText(_translate("Dialog", "Close", None))
if __name__ == "__main__":
import sys
app = QtGui.QApplication(sys.argv)
view = columnView()
sys.exit(app.exec_())
Though both TreeView and ColumnView is designed to display hierarchical data, I feel that when compared with TreeView, the ColumnView implementation was given less significance and highly frustrating. In TreeView you can do the above easily with QTreeView.expand(index).
The only way to do is to select the row with the index using the selection model
self.ui.columnView.selectionModel().setCurrentIndex(index,QtGui.QItemSelectionModel.Current|QtGui.QItemSelectionModel.Select)
This will highlight the row and will load the corresponding next column.
Ref: https://forum.qt.io/topic/76588/loading-two-columns-at-start-in-qcolumnview

Resources