asyncio 'function' object has no attribute 'send' - python-3.6

I'm trying to send message to the client every 30 seconds till client disconnects in django channels. Below is the piece of code written to achieve it using asyncio. But getting the error "AttributeError: 'function' object has no attribute 'send'". I haven't used asyncio before, so tried many possibilities and all of them results in some kind of error (because of my inexperience).
Could someone please help me how can this be solved.
Below is the code :
class HomeConsumer(WebsocketConsumer):
def connect(self):
self.room_name = "home"
self.room_group_name = self.room_name
async_to_sync(self.channel_layer.group_add)(
self.room_group_name,
self.channel_name
)
self.accept()
self.connected = True
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
task = loop.create_task(self.send_response)
loop.run_until_complete(task)
async def send_response(self):
while self.connected:
sent_by = Message.objects.filter(notification_read=False).exclude(
last_sent_by=self.scope["user"]).values("last_sent_by__username")
self.send(text_data=json.dumps({
'notification_by': list(sent_by)
}))
asyncio.sleep(30)
def disconnect(self, close_code):
async_to_sync(self.channel_layer.group_discard)(
self.room_group_name,
self.channel_name
)
self.connected = False
something might be wrong at below portion of the code i believe:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
task = loop.create_task(self.send_response)
loop.run_until_complete(task)
using loop = asyncio.get_event_loop() instead of creating new_event_loop() results in :
RuntimeError: There is no current event loop in thread 'ThreadPoolExecutor-0_0'.

I'm posting this solution as answer because i searched a lot on how to send data to client without client requesting in django-channels. But couldn't find any complete explanation or answers. So hope this would help someone who is in the situation i was in.
Thanks to user4815162342 for the help he provided for solving the issue i had.
class HomeConsumer(AsyncWebsocketConsumer):
async def connect(self):
self.room_name = "home"
self.room_group_name = self.room_name
await self.channel_layer.group_add(
self.room_group_name,
self.channel_name
)
await self.accept()
self.connected = True
try:
loop = asyncio.get_event_loop()
except:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
loop.create_task(self.send_response())
async def send_response(self):
while self.connected:
sent_by = Message.objects.filter(notification_read=False).exclude(
last_sent_by=self.scope["user"]).values("last_sent_by__username")
await self.send(text_data=json.dumps({
'notification_by': list(sent_by)
}))
await asyncio.sleep(30)
async def disconnect(self, close_code):
await self.channel_layer.group_discard(
self.room_group_name,
self.channel_name
)
self.connected = False
If there is any issue or obsolete usage please correct me

Related

Why are my async requests slower than sync ones?

I need to make a 100 get requests to make a 100 BeautifulSoup objects from different pages.
To practice my async skills I've written two functions, each of which makes 100 get-responses and creates 100 BeautifulSoup objects from the same page. I also need to use sleep because I'm working with imdb.com and they don't like too many get responses:
Async version:
# Gets a BeautifulSoup from a url asynchronously
async def get_page_soup(url):
response_text = await get_response_text(url)
return BeautifulSoup(response_text, features="html.parser")
async def get_n_soups_async(url, num_soups=100):
soup = await get_page_soup(url)
for i in range(num_soups - 1):
soup = await get_page_soup(url)
await asyncio.sleep(0.5)
return soup
Sync version:
def get_n_soups_sync(url, num_soups=100):
soup = BeautifulSoup(requests.get(url).text, features="html.parser")
for i in range(num_soups - 1):
soup = BeautifulSoup(requests.get(url).text, features="html.parser")
time.sleep(0.5)
return soup
Main loop
async def main():
print("Async main() has started... ")
t1 = time.perf_counter()
soup = await get_n_soups_async('https://www.imdb.com/name/nm0425005', 100)
t2 = time.perf_counter()
print(t2 - t1, type(soup))
t1 = time.perf_counter()
soup = get_n_soups_sync('https://www.imdb.com/name/nm0425005', 100)
t2 = time.perf_counter()
print(t2 - t1, type(soup))
print("Async main() is over.")
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
What I can't understand is why it takes my async function around 270 secs to run, while my sync one needs only around 230 seconds.
What am I doing wrong in using async and how can I fix that to speed up getting 100 soups?
In my opinion this could be caused in the loop. In the async case you wait each time for the response. In the sync one you will continue without waiting for the response. So you will call the next soup before the last one arrived.
You could create an own promise so that u will do the sync call in it. This promise you can also await so that you await the whole result instead of every single one.

How to stop the Crawler

I am trying to write a crawler that goes to a website and searches for a list of keywords, with max_Depth of 2. But the scraper is supposed to stop once any of the keyword's appears on any page, the problem i am facing right now is that the crawler does-not stop when it first see's any of the keywords.
Even after trying, early return command, break command and CloseSpider Commands and even python exit commands.
My class of the Crawler:
class WebsiteSpider(CrawlSpider):
name = "webcrawler"
allowed_domains = ["www.roomtoread.org"]
start_urls = ["https://"+"www.roomtoread.org"]
rules = [Rule(LinkExtractor(), follow=True, callback="check_buzzwords")]
crawl_count = 0
words_found = 0
def check_buzzwords(self, response):
self.__class__.crawl_count += 1
crawl_count = self.__class__.crawl_count
wordlist = [
"sfdc",
"pardot",
"Web-to-Lead",
"salesforce"
]
url = response.url
contenttype = response.headers.get("content-type", "").decode('utf-8').lower()
data = response.body.decode('utf-8')
for word in wordlist:
substrings = find_all_substrings(data, word)
for pos in substrings:
ok = False
if not ok:
if self.__class__.words_found==0:
self.__class__.words_found += 1
print(word + "," + url + ";")
STOP!
return Item()
def _requests_to_follow(self, response):
if getattr(response, "encoding", None) != None:
return CrawlSpider._requests_to_follow(self, response)
else:
return []
I want it to stop execution when if not ok: is True.
When I want to stop a spider, I usually use the exception exception scrapy.exceptions.CloseSpider(reason='cancelled') from Scrapy-Docs.
The example there shows how you can use it:
if 'Bandwidth exceeded' in response.body:
raise CloseSpider('bandwidth_exceeded')
In your case something like
if not ok:
raise CloseSpider('keyword_found')
Or is that what you meant with
CloseSpider Commands
and already tried it?

How can I start a brand new request in Scrapy crawler?

I am scraping from a website that will give every request session a sid, after getting the sid, I perform further search query with this sid and scrape the results.
I want to change the sid every time I've finished scraping all results of a single query, I've tried clearing the cookies but it doesn't work.
However, if I restart my crawler, it wll get a different sid each time, I just don't know how to get a new sid without restart the crawler.
I am wondering if there're something else that let the server know two requests are from the same connection.
Thanks!
Here is my current code:
class MySpider(scrapy.Spider):
name = 'my_spider'
allowed_domains = ['xxx.com']
start_urls = ['http://xxx/']
sid_pattern = r'SID=(\w+)&'
SID = None
query_list = ['aaa', 'bbb', 'ccc']
i = 0
def parse(self, response):
if self.i >= len(self.query_list):
return
pattern = re.compile(self.sid_pattern)
result = re.search(pattern, response.url)
if result is not None:
self.SID = result.group(1)
else:
exit(-1)
search_url = 'http://xxxx/AdvancedSearch.do'
query = self.query_list[i]
self.i += 1
query_form = {
'aaa':'bbb'
}
yield FormRequest(adv_search_url, method='POST', formdata=query_form, dont_filter=True,
callback=self.parse_result_entry)
yield Request(self.start_urls[0], cookies={}, callback=self.parse,dont_filter=True)
def parse_result(self, response):
do something
Setting COOKIES_ENABLED = False can achieve this, but is there another way other than a global settings?

Flask-WTF: Queries of FormFields in FieldList are none after validate_on_submit

I'm trying to generate dynamic forms using Flask-WTF to create a new product based on some templates. A product will have a list of required key-value pairs based on its type, as well as a list of parts required to build it. The current relevant code looks as follows:
forms.py:
class PartSelectionForm(Form):
selected_part = QuerySelectField('Part', get_label='serial', allow_blank=True)
part_type = StringField('Type')
slot = IntegerField('Slot')
required = BooleanField('Required')
def __init__(self, csrf_enabled=False, *args, **kwargs):
super(PartSelectionForm, self).__init__(csrf_enabled=False, *args, **kwargs)
class NewProductForm(Form):
serial = StringField('Serial', default='', validators=[DataRequired()])
notes = TextAreaField('Notes', default='')
parts = FieldList(FormField(PartSelectionForm))
views.py:
#app.route('/products/new/<prodmodel>', methods=['GET', 'POST'])
#login_required
def new_product(prodmodel):
try:
model = db.session.query(ProdModel).filter(ProdModel.id==prodmodel).one()
except NoResultFound, e:
flash('No products of model type -' + prodmodel + '- found.', 'error')
return redirect(url_for('index'))
keys = db.session.query(ProdTypeTemplate.prod_info_key).filter(ProdTypeTemplate.prod_type_id==model.prod_type_id)\
.order_by(ProdTypeTemplate.prod_info_key).all()
parts_needed = db.session.query(ProdModelTemplate).filter(ProdModelTemplate.prod_model_id==prodmodel)\
.order_by(ProdModelTemplate.part_type_id, ProdModelTemplate.slot).all()
class F(forms.NewProductForm):
pass
for key in keys:
if key.prod_info_key in ['shipped_os','factory_os']:
setattr(F, key.prod_info_key, forms.QuerySelectField(key.prod_info_key, get_label='version'))
else:
setattr(F, key.prod_info_key, forms.StringField(key.prod_info_key, validators=[forms.DataRequired()]))
form = F(request.form)
if request.method == 'GET':
for part in parts_needed:
entry = form.parts.append_entry(forms.PartSelectionForm())
entry.part_type.data=part.part_type_id
entry.slot.data=slot=part.slot
entry.required.data=part.required
entry.selected_part.query = db.session.query(Part).join(PartModel).filter(PartModel.part_type_id==part.part_type_id, Part.status=='inventory')
if form.__contains__('shipped_os'):
form.shipped_os.query = db.session.query(OSVersion).order_by(OSVersion.version)
if form.__contains__('factory_os'):
form.factory_os.query = db.session.query(OSVersion).order_by(OSVersion.version)
if form.validate_on_submit():
...
Everything works as expected on a GET request, but on the validate_on_submit I get errors. The error is that all of the queries and query_factories for the selected_part QuerySelectFields in the list of PartSelectionForms is none, causing either direct errors in WTForms validation code or when Jinja2 attempts to re-render the QuerySelectFields. I'm not sure why this happens on the POST when everything appears to be correct for the GET.
I realized that although I set the required queries on a GET I'm not doing it for any PartSelectionForm selected_part entries on the POST. Since I already intended part_type, slot, and required to be hidden form fields, I added the following immediately before the validate_on_submit and everything works correctly:
for entry in form.parts:
entry.selected_part.query = db.session.query(Part).join(PartModel).\
filter(PartModel.part_type_id==entry.part_type.data, Part.status=='inventory')

How to override delete-event in pygtk?

I am coding a simple text editor, so I am trying to check unsaved changes before closing the application. Now I know it has to be something with 'delete-event', and by googling around I have found a way, but it gives an error.
This is my code:
__gsignals__ = {
"delete-event" : "override"
}
def do_delete(self, widget, event):
print 'event overriden'
tabsNumber = self.handler.tabbar.get_n_pages()
#self.handler.tabbar.set_current_page(0)
for i in range(tabsNumber, 0):
doc = self.handler.tabbar.docs[i]
lines = self.handler.tabbar.lineNumbers[i]
self.handler.tabbar.close_tab(doc, lines)
# if self.handler.tabbar.get_n_pages() == 0:
# self.destroy_app()
def destroy_app(self):
gtk.main_quit()
And this is the error I get:
TypeError: Gtk.Widget.delete_event() argument 1 must be gtk.Widget, not gtk.gdk.Event
What is the right way to do it?
I found the answer,
self.connect('delete-event', self.on_delete_event)
and
__gsignals__ = {
"delete-event" : "override"
}
def on_delete_event(event, self, widget):
tabsNumber = self.handler.tabbar.get_n_pages()
#self.handler.tabbar.set_current_page(0)
for i in range(tabsNumber, 0):
doc = self.handler.tabbar.docs[i]
lines = self.handler.tabbar.lineNumbers[i]
self.handler.tabbar.close_tab(doc, lines)
self.hide()
self.destroy_app()
return True
The key is in return True. It prevents the default handler to take place and for somehow the error doesn't appear any more.

Resources