An error occurred while creating a bitcoin price notification Telegram chatbot (PytzUsageWarning) - telegram

import requests
import telegram
import json
from telegram.ext import Updater, CommandHandler
import time
import sys
import pandas as pd
from apscheduler.schedulers.background import BlockingScheduler
from apscheduler.jobstores.base import JobLookupError
dt = requests.get('https://min-api.cryptocompare.com/data/price?fsym=BTC&tsyms=USD,EUR')
print(dt.text)
price_now = dt.json()
bot_token = "5668522544:AAFqNFcgd5wDBtQbJBhRayfPx9VpVPVjcyQ"
Cointimeline = telegram.Bot(token=bot_token)
updates = Cointimeline.getUpdates()
for i in updates:
print(i.message)
class Chatbot:
def __init__(self, token):
self.core = telegram.Bot(token)
self.updater = Updater(token)
self.id = 5734902861
def sendmsg(self, text):
self.core.sendmsg(chat_id=self.id, text=text)
def stop(self):
self.updater.stop()
class Alert(Chatbot):
def __init__(self):
self.token = '5668522544:AAFqNFcgd5wDBtQbJBhRayfPx9VpVPVjcyQ'
Chatbot.__init__(self, self.token)
self.updater.stop()
def controller(self, cmd, func):
self.updater.dispatcher.controller(CommandHandler(smd, func))
def start(self):
self.sendMessage('')
aps = BlockingScheduler()
def push():
dt = requests.get("https://min-api.cryptocompare.com/data/price?fsym=BTC&tsyms=USD,EUR")
ALERTBOT = Alert()
ALERTBOT.sendmsg(dt.text)a
price_now = pd.DataFrame({"USD": {list(dt.json().values())[0]}, "EUR": [list(dt.json().values())[1]]})
data = pd.read_csv("ALERTBOT.csv")
data = data.append(price_now, sort=True)
data = data.loc[:, 'USD':'EUR']
data.to_csv("ALERTBOT.csv")
aps.add_job(push, 'interval', seconds=60)
aps.start()
The error continues to occur.
PytzUsageWarning: The zone attribute is specific to pytz's interface; please migrate to a new time zone provider. For more details on how to do so, see https://pytz-deprecation-shim.readthedocs.io/en/latest/migration.html
if obj.zone == 'local':

The error is caused by the pytz library being out of date. You can solve the problem by updating the library:
pip install --upgrade pytz

Related

How can I use Google Cloud Functions to run a web scraper?

Thanks in advance for your help.
I'm currently running a webscraper - this is the first time I've ever done something like this - It pulls addresses from the URL and then will match the address to the users input. This will be going into a chat bot, I wondering how I can make this run on Google Functions. Whats the process to do this, is there a tutorial anywhere?
This is my code so far. There is a small items file too
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor
from ..items import DataItem
from fuzzywuzzy import fuzz
from urllib.parse import urljoin
import scrapy
class AddressesSpider(scrapy.Spider):
name = 'Addresses'
allowed_domains = ['find-energy-certificate.service.gov.uk']
postcode = "bh10+4ah"
start_urls = ['https://find-energy-certificate.service.gov.uk/find-a-certificate/search-by-postcode?postcode=' + postcode]
## def start_requests(self):
## self.first = input("Please enter the address you would like to match: ")
## yield scrapy.Request(url=self.start_urls[0], callback=self.parse)
def parse(self, response):
first = input("Please enter the address you would like to match: ")
highest_ratios = []
highest_item = None
for row in response.xpath('//table[#class="govuk-table"]//tr'):
address = row.xpath("normalize-space(.//a[#class='govuk-link']/text())").extract()[0].lower()
address = address.rsplit(',', 2)[0]
link = row.xpath('.//a[#class="govuk-link"]/#href').extract()
details = row.xpath("normalize-space(.//td/following-sibling::td)").extract()
ratio = fuzz.token_set_ratio(address, first)
item = DataItem()
item['link'] = link
item['details'] = details
item['address'] = address
item['ratioresult'] = ratio
if len(highest_ratios) < 3:
highest_ratios.append(item)
elif ratio > min(highest_ratios, key=lambda x: x['ratioresult'])['ratioresult']:
highest_ratios.remove(min(highest_ratios, key=lambda x: x['ratioresult']))
highest_ratios.append(item)
highest_ratios_100 = [item for item in highest_ratios if item['ratioresult'] == 100]
if highest_ratios_100:
for item in highest_ratios_100:
yield item
else:
yield max(highest_ratios, key=lambda x: x['ratioresult'])
if len(highest_ratios_100) > 1:
for i, item in enumerate(highest_ratios_100):
print(f"{i+1}: {item['address']}")
selected = int(input("Please select the correct address by entering the number corresponding to the address: ")) - 1
selected_item = highest_ratios_100[selected]
else:
selected_item = highest_ratios_100[0] if highest_ratios_100 else max(highest_ratios, key=lambda x: x['ratioresult'])
new_url = selected_item['link'][0]
new_url = str(new_url)
if new_url:
base_url = 'https://find-energy-certificate.service.gov.uk'
print(f'Base URL: {base_url}')
print(f'New URL: {new_url}')
new_url = urljoin(base_url, new_url)
print(f'Combined URL: {new_url}')
yield scrapy.Request(new_url, callback=self.parse_new_page)
def parse_new_page(self, response):
Postcode = response.xpath('normalize-space((//p[#class="epc-address govuk-body"]/text())[last()])').extract()
Town = response.xpath('normalize-space((//p[#class="epc-address govuk-body"]/text())[last()-1])').extract()
First = response.xpath(".//p[#class='epc-address govuk-body']").extract()
Type = response.xpath('normalize-space(//dd[1]/text())').extract_first()
Walls = response.xpath("//th[contains(text(), 'Wall')]/following-sibling::td[1]/text()").extract()
Roof = response.xpath("//th[contains(text(), 'Roof')]/following-sibling::td[1]/text()").extract()
Heating = response.xpath("//th[text()='Main heating']/following-sibling::td[1]/text()").extract_first()
CurrentScore = response.xpath('//body[1]/div[2]/main[1]/div[1]/div[3]/div[3]/svg[1]/svg[1]/text[1]/text()').re_first("[0-9+]{1,2}")
Maxscore = response.xpath('//body[1]/div[2]/main[1]/div[1]/div[3]/div[3]/svg[1]/svg[2]/text[1]/text()').re_first("[0-9+]{2}")
Expiry = response.xpath('normalize-space(//b)').extract_first()
FloorArea = response.xpath('//dt[contains(text(), "floor area")]/following-sibling::dd/text()').re_first("[0-9+]{2,3}")
Steps = response.xpath("//h3[contains(text(),'Step')]/text()").extract()
yield {
'Postcode': Postcode,
'Town': Town,
'First': First,
'Type': Type,
'Walls': Walls,
'Roof': Roof,
'Heating': Heating,
'CurrentScore': CurrentScore,
'Maxscore': Maxscore,
'Expiry': Expiry,
'FloorArea': FloorArea,
'Steps': Steps
}
I've tried googling and having a look around and can't get how to deploy this as a project to run on google functions or can I just copy the code into the console somewhere?
You can try running your spider from a script. However, a better solution would be to wrap scrapy in its own child process.
For example:
from multiprocessing import Process, Queue
from ... import MySpider
from scrapy.crawler import CrawlerProcess
from scrapy.utils.project import get_project_settings
def my_cloud_function(event, context):
def script(queue):
try:
settings = get_project_settings()
settings.setdict({
'LOG_LEVEL': 'ERROR',
'LOG_ENABLED': True,
})
process = CrawlerProcess(settings)
process.crawl(MySpider)
process.start()
queue.put(None)
except Exception as e:
queue.put(e)
queue = Queue()
# wrap the spider in a child process
main_process = Process(target=script, args=(queue,))
main_process.start() # start the process
main_process.join() # block until the spider finishes
result = queue.get() # check the process did not return an error
if result is not None:
raise result
return 'ok'
You can refer to this tutorial for more info.

Sensor return value can not be stored/retreived using PokeReturnValue

Below code creates the dag (the graph is also attached) which contains 2 PythonSensors and a PythonOperator.
First Sensors creates a random integer list as data and a random boolean with 50% chance of success. It logs generated values and returns PokeReturnValue
Second sensor and Python operator both tries to get data from xcom and log them.
Graph of DAG
# region IMPORTS
import random
import logging
from datetime import datetime, timedelta
from airflow import DAG
from heliocampus.configuration.constants import Constants
from airflow.operators.empty import EmptyOperator
from airflow.operators.python import PythonOperator
from airflow.sensors.python import PythonSensor
from airflow.sensors.base import PokeReturnValue
from airflow.utils.trigger_rule import TriggerRule
from box import Box
# endregion
# region configuration
constants = Constants()
dagconfig = Box({ "Code":"Test" })
# endregion
def main() -> DAG:
# region default_args
args = dict()
args['start_date'] = datetime(2021, 1, 1)
# endregion
with DAG(dag_id=dagconfig.Code, schedule_interval="#once", default_args=args, tags=['test', 'V0.1.4']) as dag:
start = EmptyOperator(task_id="start")
# region Sensors
check_all_expired_tables = PythonSensor(
task_id="CHECK_ALL_EXPIRED_TABLES",
poke_interval=timedelta(seconds=20).total_seconds(),
timeout=timedelta(minutes=1).total_seconds(),
mode="reschedule",
python_callable=check_expired_tables,
trigger_rule=TriggerRule.ALL_SUCCESS
)
check_all_expired_tables_notification = PythonOperator(
task_id="CHECK_ALL_EXPIRED_TABLES_NOTIFICATION",
python_callable=sensor_result_nofitication,
op_kwargs={"notification_source":"CHECK_ALL_EXPIRED_TABLES"},
trigger_rule=TriggerRule.ALL_FAILED
)
verify_ods_operator = PythonSensor(
task_id="VERIFY_ODS",
poke_interval=timedelta(seconds=30).total_seconds(),
timeout=timedelta(hours=2).total_seconds(),
mode="reschedule",
python_callable=verify_ods,
op_kwargs={"notification_source":"CHECK_ALL_EXPIRED_TABLES"},
trigger_rule=TriggerRule.ALL_SUCCESS
)
# endregion
end = EmptyOperator(task_id="end")
start >> check_all_expired_tables >> verify_ods_operator >> end
check_all_expired_tables >> check_all_expired_tables_notification
return dag
# region Notifications
def sensor_result_nofitication(ti, notification_source):
actual_xcom_value = ti.xcom_pull(task_ids=[notification_source])
logging.info(f"sensor_result_nofitication : Sensor without key from {notification_source} is {actual_xcom_value}")
actual_xcom_value = ti.xcom_pull(key='return_value', task_ids=[notification_source])
logging.info(f"sensor_result_nofitication : Sensor return_value from {notification_source} is {actual_xcom_value}")
# endregion
def check_expired_tables():
randomlist = random.sample(range(10, 30), 5)
randomResult = random.randint(0, 100) > 50
logging.info(f"check_expired_tables : returning PokeReturnValue(is_done={randomResult}, xcom_value={randomlist})")
return PokeReturnValue(is_done=randomResult, xcom_value=randomlist)
def verify_ods(ti, notification_source):
actual_xcom_value = ti.xcom_pull(task_ids=[notification_source])
logging.info(f"verify_ods : Sensor without key from {notification_source} is {actual_xcom_value}")
actual_xcom_value = ti.xcom_pull(key='return_value', task_ids=[notification_source])
logging.info(f"verify_ods : Sensor return_value from {notification_source} is {actual_xcom_value}")
rnd = random.randint(0, 100)
logging.info("Random Number : {num}".format(num=rnd))
return (rnd > 20)
main()
Regardless of whether the first sensor is successfull or not the data from xcom can not be logged in the second sensor or python operator.
I don't know if the problem is on the pushing side or pulling side.
I can not see any rows inserted in airflow database (xcom table).
The problem lives in the PythonSensor which is coercing the return of the python callable to boolean without checking its type first:
return_value = self.python_callable(*self.op_args, **self.op_kwargs)
return PokeReturnValue(bool(return_value))
To get the expected behavior something like this needs to be added to the PythonSensor:
return return_value if isinstance(return_value, PokeReturnValue) else PokeReturnValue(bool(return_value)

Download multiple 10-ks documents

I need to download multiple 10-ks documents, however, this code works fine if i download the 10-ks between 5-10 companies. But if i increase the number of companies in [cik_lookup function]. Here's code.
import nltk
import numpy as np
import pandas as pd
import pickle
import pprint
import project_helper
from tqdm import tqdm
Here's the py file that includes project_helper functions.
import matplotlib.pyplot as plt
import requests
from ratelimit import limits, sleep_and_retry
class SecAPI(object):
SEC_CALL_LIMIT = {'calls': 10, 'seconds': 1}
#staticmethod
#sleep_and_retry
# Dividing the call limit by half to avoid coming close to the limit
#limits(calls=SEC_CALL_LIMIT['calls'] / 2, period=SEC_CALL_LIMIT['seconds'])
def _call_sec(url):
return requests.get(url)
def get(self, url):
return self._call_sec(url).text
def print_ten_k_data(ten_k_data, fields, field_length_limit=50):
indentation = ' '
print('[')
for ten_k in ten_k_data:
print_statement = '{}{{'.format(indentation)
for field in fields:
value = str(ten_k[field])
# Show return lines in output
if isinstance(value, str):
value_str = '\'{}\''.format(value.replace('\n', '\\n'))
else:
value_str = str(value)
# Cut off the string if it gets too long
if len(value_str) > field_length_limit:
value_str = value_str[:field_length_limit] + '...'
print_statement += '\n{}{}: {}'.format(indentation * 2, field, value_str)
print_statement += '},'
print(print_statement)
print(']')
The first step it to download NLP Corpora.
nltk.download('stopwords')
nltk.download('wordnet')
Than Get 10ks
#cik_lookup = {
# 'GOOGL':'0001288776',
# 'AAPL':'0000320193',
# 'FACEBOOK':'0001326801',
# 'AMZN':'0001018724',
# 'MSFT':'0000789019'}
cik_lookup = {
'AEP': '0000004904',
'AXP': '0000004962',
'BA': '0000012927',
'BK': '0001390777',
'CAT': '0000018230',
'DE': '0000315189',
'DIS': '0001001039',
'DTE': '0000936340',
'ED': '0001047862',
'EMR': '0000032604',
'ETN': '0001551182',
'GE': '0000040545',
'IBM': '0000051143',
'IP': '0000051434',
'JNJ': '0000200406',
'KO': '0000021344',
'LLY': '0000059478',
'MCD': '0000063908',
'MO': '0000764180',
'MRK': '0000310158',
'MRO': '0000101778',
'PCG': '0001004980',
'PEP': '0000077476',
'PFE': '0000078003',
'PG': '0000080424',
'PNR': '0000077360',
'SYY': '0000096021',
'TXN': '0000097476',
'UTX': '0000101829',
'WFC': '0000072971',
'WMT': '0000104169',
'WY': '0000106535',
'XOM': '0000034088'}
Get list of 10-ks
sec_api = project_helper.SecAPI()
from bs4 import BeautifulSoup
def get_sec_data(cik, doc_type, start=0, count=60):
newest_pricing_data = pd.to_datetime('2021-01-01')
rss_url = 'https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany' \
'&CIK={}&type={}&start={}&count={}&owner=exclude&output=atom' \
.format(cik, doc_type, start, count)
sec_data = sec_api.get(rss_url)
feed = BeautifulSoup(sec_data.encode('utf-8'), 'xml').feed
entries = [
(
entry.content.find('filing-href').getText(),
entry.content.find('filing-type').getText(),
entry.content.find('filing-date').getText())
for entry in feed.find_all('entry', recursive=False)
if pd.to_datetime(entry.content.find('filing-date').getText()) <= newest_pricing_data]
return entries
example_ticker = 'AEP'
sec_data = {}
for ticker, cik in cik_lookup.items():
sec_data[ticker] = get_sec_data(cik, '10-K')
The code works fine if i download the 10-ks between 5-10 companies. But if i increase the number of companies in [cik_lookup function] I get the following error. The first error I got is as below.
UnicodeEncodeError Traceback (most recent call last)
<ipython-input-8-28a784054794> in <module>()
20
21 for ticker, cik in cik_lookup.items():
---> 22 sec_data[ticker] = get_sec_data(cik, '10-K')
<ipython-input-8-28a784054794> in get_sec_data(cik, doc_type, start, count)
5 rss_url = 'https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany' '&CIK={}&type={}&start={}&count={}&owner=exclude&output=atom' .format(cik, doc_type, start, count)
6 sec_data = sec_api.get(rss_url)
----> 7 feed = BeautifulSoup(sec_data.encode('ascii'), 'xml').feed
8 entries = [
9 (
UnicodeEncodeError: 'ascii' codec can't encode characters in position 2599-2601: ordinal not in range(128)
However, after some google search over BeutifulSoup(ecodes) I changed it to utf-8 and then got the following error.
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-9-9c77ed07af2d> in <module>()
20
21 for ticker, cik in cik_lookup.items():
---> 22 sec_data[ticker] = get_sec_data(cik, '10-K')
<ipython-input-9-9c77ed07af2d> in get_sec_data(cik, doc_type, start, count)
11 entry.content.find('filing-type').getText(),
12 entry.content.find('filing-date').getText())
---> 13 for entry in feed.find_all('entry', recursive=False)
14 if pd.to_datetime(entry.content.find('filing-date').getText()) <= newest_pricing_data]
15
AttributeError: 'NoneType' object has no attribute 'find_all'
The project can be accessed here at the following github repo.
github repo herealso.

python unittest error and using patch from unittest.mock

I have two python files the first one is named employee.py and this is its code
import requests
class Employee:
"""A sample Employee class"""
raise_amt = 1.05
def __init__(self, first, last, pay):
self.first = first
self.last = last
self.pay = pay
#property
def email(self):
return '{}.{}#email.com'.format(self.first, self.last)
#property
def fullname(self):
return '{} {}'.format(self.first, self.last)
def apply_raise(self):
self.pay = int(self.pay * self.raise_amt)
def monthly_schedule(self, month):
response = requests.get(f'http://company.com/{self.last}/{month}')
if response.ok:
return response.text
else:
return 'Bad Response!'
The other file is named test_employee.py
and this is its code
import unittest
from unittest.mock import patch
from employee import Employee
class TestEmployee(unittest.TestCase):
#classmethod
def setUpClass(cls):
print('setupClass')
#classmethod
def tearDownClass(cls):
print('teardownClass')
def setUp(self):
print('setUp')
self.emp_1 = Employee('Corey', 'Schafer', 50000)
self.emp_2 = Employee('Sue', 'Smith', 60000)
def tearDown(self):
print('tearDown\n')
def test_email(self):
print('test_email')
self.assertEqual(self.emp_1.email, 'Corey.Schafer#email.com')
self.assertEqual(self.emp_2.email, 'Sue.Smith#email.com')
self.emp_1.first = 'John'
self.emp_2.first = 'Jane'
self.assertEqual(self.emp_1.email, 'John.Schafer#email.com')
self.assertEqual(self.emp_2.email, 'Jane.Smith#email.com')
def test_fullname(self):
print('test_fullname')
self.assertEqual(self.emp_1.fullname, 'Corey Schafer')
self.assertEqual(self.emp_2.fullname, 'Sue Smith')
self.emp_1.first = 'John'
self.emp_2.first = 'Jane'
self.assertEqual(self.emp_1.fullname, 'John Schafer')
self.assertEqual(self.emp_2.fullname, 'Jane Smith')
def test_apply_raise(self):
print('test_apply_raise')
self.emp_1.apply_raise()
self.emp_2.apply_raise()
self.assertEqual(self.emp_1.pay, 52500)
self.assertEqual(self.emp_2.pay, 63000)
def test_monthly_schedule(self):
with patch('employee.requests.get') as mocked_get:
mocked_get.return_value.ok = True
mocked_get.return_value.text = 'Success'
schedule = self.emp_1.monthly_schedule('May')
mocked_get.assert_called_with('http://company.com/Schafer/May')
self.assertEqual(schedule,'Success')
if __name__ == '__main__':
unittest.main()
when I run test_employee.py, I get this error
ModuleNotFoundError: No module named 'employee.requests'; 'employee' is not a package
The code runs well if you delete the function test_monthly_schedule from test_employee.py
and delete monthly_schedule from employee.py
also I do not know if that will make a difference, but I use python 3.8. Also, I'm using Mac

wxpython wx.combobox save wx.StaticText view help me

import wx
import sqlite3
class Frame(wx.Frame):
def __init__(self):
wx.Frame.__init__(self, None)
self.panel = wx.Panel(self)
self.text = wx.StaticText(self.panel)
self.conn = sqlite3.connect("test.db")
self.cursor = self.conn.cursor()
self.autoRefersh()
def autoRefersh(self):
self.LoadList()
wx.CallLater(1000, self.autoRefersh)
def LoadList(self):
self.cursor.execute("SELECT *FROM CLINIC1")
for date1 in self.cursor: pass
self.staticText2_1 = wx.StaticText(self.panel, label=date1[1], style=wx.ALIGN_CENTER, pos=(100,100))
if __name__ == '__main__':
app = wx.App()
frame = Frame()
frame.Show()
app.MainLoop()
combobox data sqlite3 save in why panel show Why it looks different bug??
I do not know why this is happening.
You missed one crucial step, getting the data itself.
You are using the cursor object not the data returned by the cursor.
def LoadList(self):
self.cursor.execute("SELECT *FROM CLINIC1")
data = self.cursor.fetchall()
for date1 in data: pass
self.staticText2_1 = wx.StaticText(self.panel, label=date1[1], style=wx.ALIGN_CENTER, pos=(100,100))
AS you are "passing" in your for loop perhaps what you actually want is only a single record, in which case
data = self.cursor.fetchone()
and drop the for loop
Even better, read the tutorial
https://www.blog.pythonlibrary.org/2012/07/18/python-a-simple-step-by-step-sqlite-tutorial/
In the heading of your question you mention combobox, so I assume that you want to replace the statictext with a combobox. The following should get you started, I'll leave the wx.EVT_COMBOBOX event binding for you to add, as you will need it to do something when you select an item.
import wx
import sqlite3
class Frame(wx.Frame):
def __init__(self):
wx.Frame.__init__(self, None)
self.selected_data=[]
self.panel = wx.Panel(self)
self.combo = wx.ComboBox(self.panel,-1,choices=self.selected_data, size=(130,30))
self.conn = sqlite3.connect("test.db")
self.cursor = self.conn.cursor()
self.combo.SetValue("Choose an Item")
self.autoRefresh()
def autoRefresh(self):
self.LoadList()
def LoadList(self):
self.combo.Clear()
self.cursor.execute("SELECT * FROM CLINIC1")
data = self.cursor.fetchall()
for date1 in data:
self.selected_data.append(date1[1])
for i in self.selected_data:
self.combo.Append(i)
if __name__ == '__main__':
app = wx.App()
frame = Frame()
frame.Show()
app.MainLoop()
Edit:
It should look like this.

Resources