Can't get fresh length of list Telegram bot API - global-variables

I have flask telegram bot with pyTelegramBotAPI deployed on Heroku. Need to get fresh lenght of list in the start message. List refreshing every 5 minutes in gettinglist.py. Can't find my mistake, please help.
bot.py
import config
import gettinglist
from gettinglist import getting_list
import telebot
from flask import Flask, request
from threading import Thread
def app_run():
app.run(host="0.0.0.0", port=os.environ.get('PORT', 80))
msg_start = """
Lenght of list now: %d
""" % config.LIST_LENGHT
application_thread = Thread(target=app_run)
getting_list_thread = Thread(target=getting_list)
bot = telebot.TeleBot("<MY_BOT_TOKEN>")
#bot.message_handler(commands=['start'])
def start():
cid = m.chat.id
bot.send_message(cid, msg_start, parse_mode='html')
#app.route("/bot", methods=['POST'])
def getMessage():
bot.process_new_updates([telebot.types.Update.de_json(request.stream.read().decode("utf-8"))])
return "ok", 200
#app.route("/")
def webhook():
bot.remove_webhook()
bot.set_webhook(url="<HEROKU_APP_URL>")
return "ok", 200
if __name__ == '__main__':
application_thread.start()
getting_list_thread.start()
gettinglist.py
import config
from time import sleep
LIST_LENGHT = 0
LIST = []
def getting_list():
while True:
global LIST
global LIST_LENGHT
LIST = [num for num in range(0, 100)]
config.LIST_LENGHT = len(LIST)
return LIST
sleep(300)
config.py
LIST_LENGHT = 0

Related

How to log user activity in a streamlit app?

I have a streamlit app that is public (ie. no user log-in). I would like to have log files of the form:
|2023-02-10 16:30:16 : user at ip=___ clicked button key=___
|2023-02-10 16:30:19 : user at ip=___ clicked button key=___
|2023-02-10 16:31:10 : user at ip=___ clicked button key=___
|2023-02-10 16:31:27 : user at ip=___ clicked button key=___
|...
Is there any way to achieve this? It's because I want to do some analytics on how the app is being used.
You can access the remote ip address via get_script_run_ctx and .remote_ip:
from streamlit import runtime
from streamlit.runtime.scriptrunner import get_script_run_ctx
def get_remote_ip() -> str:
"""Get remote ip."""
try:
ctx = get_script_run_ctx()
if ctx is None:
return None
session_info = runtime.get_instance().get_client(ctx.session_id)
if session_info is None:
return None
except Exception as e:
return None
return session_info.request.remote_ip
import streamlit as st
st.title("Title")
st.markdown(f"The remote ip is {get_remote_ip()}")
For the logging part, I suggest you use a ContextFilter:
import logging
class ContextFilter(logging.Filter):
def filter(self, record):
record.user_ip = get_remote_ip()
return super().filter(record)
This custom filter will modify the LogRecord and add it the custom attribute user_ip that you can then use inside the Formatter.
All together, it gives:
import logging
import streamlit as st
from streamlit import runtime
from streamlit.runtime.scriptrunner import get_script_run_ctx
def get_remote_ip() -> str:
"""Get remote ip."""
try:
ctx = get_script_run_ctx()
if ctx is None:
return None
session_info = runtime.get_instance().get_client(ctx.session_id)
if session_info is None:
return None
except Exception as e:
return None
return session_info.request.remote_ip
class ContextFilter(logging.Filter):
def filter(self, record):
record.user_ip = get_remote_ip()
return super().filter(record)
def init_logging():
# Make sure to instanciate the logger only once
# otherwise, it will create a StreamHandler at every run
# and duplicate the messages
# create a custom logger
logger = logging.getLogger("foobar")
if logger.handlers: # logger is already setup, don't setup again
return
logger.propagate = False
logger.setLevel(logging.INFO)
# in the formatter, use the variable "user_ip"
formatter = logging.Formatter("%(name)s %(asctime)s %(levelname)s [user_ip=%(user_ip)s] - %(message)s")
handler = logging.StreamHandler()
handler.setLevel(logging.INFO)
handler.addFilter(ContextFilter())
handler.setFormatter(formatter)
logger.addHandler(handler)
def main():
logger.info("Inside main")
st.title("Title")
text = st.sidebar.text_input("Text:")
logger.info(f"This is the text: {text}")
if __name__ == "__main__":
init_logging()
logger = logging.getLogger("foobar")
main()
foobar 2023-02-13 15:43:57,252 INFO [user_ip=::1] - Inside main
foobar 2023-02-13 15:43:57,253 INFO [user_ip=::1] - This is the text: Hello, world!
Note: Here the user_ip is "::1" because everything was done locally.

How do i resolve Async tornado fetching future error

I am trying to use AsyncHTTPClient to Get/Post from a local service that is already running at port 6000.
but i keep getting an error RuntimeError: Task got bad yield: <tornado.concurrent.Future object at 0x03C9B490>
ps. im using tornado 4.4.2, this error is fixed with the latest version but how do i do it in 4.4.2? Please help!
import tornado.ioloop
from tornado.httpclient import AsyncHTTPClient
import asyncio
import tornado
import urllib
from datetime import datetime
import time
async def client(url):
http_client = AsyncHTTPClient()
response = await http_client.fetch(url)
return response.body
async def main():
http_client = AsyncHTTPClient()
url = "http://localhost:6000/listings"
result = await client(url)
print(result)
if __name__ == "__main__":
result = asyncio.run(main())
print(result)
print(int(time.time() * 1e6))
You can't use asyncio with Tornado prior to version 5.0.
Use Tornado's own ioloop to run your program:
from tornado import ioloop
if __name__ == "__main__":
result = ioloop.IOLoop.current().run_sync(main)
UPDATE: The above solution will work fine, but, if you want, you can use asyncio with Tornado 4.x. See: tornado.platform.asyncio.AsyncIOMainLoop.

Should single scrapy crawler process speed without limit be as fast as multiple crawler processes speed?

I have set CONCURRENT_REQUESTS,CONCURRENT_REQUESTS_PER_DOMAIN
and CONCURRENT_REQUESTS_PER_IP to be 1,000,000 but it just cannot be as fast as running multiple crawler processes, each processing part of a list of urls. Is that to be expected? In fact, if I run 8 crawler processes then the speed is about 8x faster.
I am not sure what I am configuring wrong. I would expect a single crawler process without any rate limit to run at the maximum speed possible, so it should be as fast as running 8 crawler processes.
import csv
import scrapy
import random
from urllib.parse import urlencode
from pprint import pprint
import requests
import re
import json
class XXXSpider(scrapy.Spider):
name = 'xxx'
def start_requests(self):
base_url = 'xxx'
base_query = 'yyy'
for s in self.words:
token = random.choice(self.tokens)
headers['token'] = token
user_agent = random.choice(user_agents)
headers['User-Agent'] = user_agent
params['q'] = base_query.format("${:s}".format(s))
encoded_params = urlencode(params)
xxx_url = "{:s}?{:s}".format(base_url, encoded_params)
yield scrapy.Request(url=xxx_url, headers=headers, callback=self.parse)
def parse(self, response):
data = json.loads(response.body)
multiple crawler processes
from scrapy.crawler import CrawlerProcess
import re
import requests
from multiprocessing import Pool
import csv
if __name__ == "__main__":
num_processes = 32
pool = Pool(num_processes)
tokens = pool.map(request_token, range(num_processes))
concurrency = 8
process = CrawlerProcess()
split_size = len(words) // concurrency
for i in range(0, len(words), split_size):
split = words[i: i+split_size]
process.crawl(XXXSpider, tokens=tokens, words=split)
process.start()

Pact: Error when trying to setup mock provider

I'm trying to write my first Pact-python test using pytest, Could someone please tell me what's wrong with my code?
import unittest
import requests
import json
import pytest
import atexit
from pact import Consumer, Provider
pact = Consumer('Consumer').has_pact_with(Provider('Provider'), host_name='mockservice', port=8080)
pact.start_service()
atexit.register(pact.stop_service)
class InterviewDetails(unittest.TestCase):
def test_candidate_report_api(self):
candidate_report_payload = {}
resp = requests.post("http://localhost:1234/users/",data=json.dumps(candidate_report_payload))
response = json.loads(resp.text)
return response
#pytest.mark.health1
def test_candidate_report(self):
expected = {}
(pact.given('Comment')
.upon_receiving('comment')
.with_request(method='POST', path="http://localhost:1234/users/", headers={})
.will_respond_with(200, body=expected))
with pact:
pact.setup()
result = self.test_candidate_report_api()
self.assertEqual(result, expected)
pact.verify()
The error from stacktrace:
AttributeError: module 'pact' has no attribute 'Like'
Can you please confirm you're using pact-python from https://github.com/pact-foundation/pact-python/ (and not pactman, a project that is not maintained by the Pact Foundation)?
It might be related to the way you have setup your test?
Here is an example project you can use for reference: https://github.com/pactflow/example-consumer-python/
Relevant test code:
"""pact test for product service client"""
import json
import logging
import os
import requests
from requests.auth import HTTPBasicAuth
import pytest
from pact import Consumer, Like, Provider, Term, Format
from src.consumer import ProductConsumer
log = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
print(Format().__dict__)
PACT_MOCK_HOST = 'localhost'
PACT_MOCK_PORT = 1234
PACT_DIR = os.path.dirname(os.path.realpath(__file__))
#pytest.fixture
def consumer():
return ProductConsumer(
'http://{host}:{port}'
.format(host=PACT_MOCK_HOST, port=PACT_MOCK_PORT)
)
#pytest.fixture(scope='session')
def pact(request):
pact = Consumer('pactflow-example-consumer-python').has_pact_with(
Provider('pactflow-example-provider-python'), host_name=PACT_MOCK_HOST, port=PACT_MOCK_PORT,
pact_dir="./pacts", log_dir="./logs")
try:
print('start service')
pact.start_service()
yield pact
finally:
print('stop service')
pact.stop_service()
def test_get_product(pact, consumer):
expected = {
'id': "27",
'name': 'Margharita',
'type': 'Pizza'
}
(pact
.given('a product with ID 10 exists')
.upon_receiving('a request to get a product')
.with_request('GET', '/product/10')
.will_respond_with(200, body=Like(expected)))
with pact:
user = consumer.get_product('10')
assert user.name == 'Margharita'

Python QtWebKit 2nd link not downloaded

I am trying to download the content of both the links.The first link works properly and the content is downloaded as a html file.But the content of the 2 nd link(a text file) is not downloaded.Please help me out.
Here is my code :
from PySide.QtCore import *
from PySide.QtGui import *
from PySide.QtWebKit import *
import sys
import codecs
class Downloader(QObject):
# To be emitted when every items are downloaded
done = Signal()
def __init__(self, urlList, parent = None):
super(Downloader, self).__init__(parent)
self.urlList = urlList
self.counter = 0
# As you probably don't need to display the page
# you can use QWebPage instead of QWebView
self.page = QWebPage(self)
self.page.loadFinished.connect(self.save)
self.startNext()
def currentUrl(self):
return self.urlList[self.counter][0]
def currentFilename(self):
return self.urlList[self.counter][1]
def startNext(self):
print "Downloading %s..."%self.currentUrl()
self.page.mainFrame().load(self.currentUrl())
def save(self, ok):
if ok:
data = self.page.mainFrame().toHtml()
with codecs.open(self.currentFilename(), encoding="utf-8", mode="w") as f:
f.write(data)
print "Saving %s to %s."%(self.currentUrl(), self.currentFilename())
else:
print "Error while downloading %s\nSkipping."%self.currentUrl()
self.counter += 1
if self.counter < len(self.urlList):
self.startNext()
else:
self.done.emit()
urlList = [("http://www.nsf.gov/awardsearch/simpleSearchResult?queryText=8","nsf.html"),
("http://www.nsf.gov/awardsearch/ExportResultServlet?exportType=txt","a.txt")]
app = QApplication(sys.argv)
downloader = Downloader(urlList)
# Quit when done
downloader.done.connect(app.quit)
# To view the pages
web = QWebView()
# To prevent user action that would interrupt the current page loading
web.setDisabled(True)
web.setPage(downloader.page)
web.show()
sys.exit(app.exec_())

Resources