Run repeating jon queue - telegram

def set_timer(update: Update, context: CallbackContext) -> None:
"""Add a job to the queue."""
chat_id = update.message.chat_id
try:
# args[0] should contain the time for the timer in seconds
due = int(context.args[0])
if due < 0:
update.message.reply_text('Sorry we can not go back to future!')
return
job_removed = remove_job_if_exists(str(chat_id), context)
context.job_queue.run_once(alarm, due, context=chat_id, name=str(chat_id))
text = 'Timer successfully set!'
if job_removed:
text += ' Old one was removed.'
update.message.reply_text(text)
except (IndexError, ValueError):
update.message.reply_text('Usage: /set <seconds>')
How do I compile here by putting job queue run_repeated?

Related

Airflow Dynamic Task mapping - DagBag import timeout

I have a DAG that fetches a list of items from a source, in batches of 10 at a time, and then does a dynamic task mapping on each batch. Here is the code
def tutorial_taskflow_api():
#task(multiple_outputs=True)
def get_items(limit, cur):
#actual logic is to fetch items and cursor from external API call
if cur == None:
cursor =limit+1
items = range (0, limit)
else:
cursor = cur+limit+1
items = range(cur, cur+limit)
return {'cursor': cursor, 'kinds': items}
#task
def process_item(item):
print(f"Processing item {item}")
#task
def get_cursor_from_response(response):
return response['cursor']
#task
def get_items_from_response(response):
return response['items']
cursor = None
limit = 10
while True:
response = get_items(limit, cursor)
items = get_items_from_response(response)
cursor = get_cursor_from_response(response)
if cursor:
process_item.expand(item=items)
if cursor == None:
break
tutorial_taskflow_api()
If you see, I attempt to get a list of items from a source, in batches of 10, and then do a dynamic task mapping on each of the batch.
However, when I import this item, i get the Dag Import timeout error:
Broken DAG: [/opt/airflow/dags/Test.py] Traceback (most recent call last):
File "/home/airflow/.local/lib/python3.7/site-packages/airflow/decorators/base.py", line 144, in _find_id_suffixes
for task_id in dag.task_ids:
File "/home/airflow/.local/lib/python3.7/site-packages/airflow/utils/timeout.py", line 69, in handle_timeout
raise AirflowTaskTimeout(self.error_message)
airflow.exceptions.AirflowTaskTimeout: DagBag import timeout for /opt/airflow/dags/Test.py after 30.0s.
Please take a look at these docs to improve your DAG import time:
* https://airflow.apache.org/docs/apache-airflow/2.5.1/best-practices.html#top-level-python-code
* https://airflow.apache.org/docs/apache-airflow/2.5.1/best-practices.html#reducing-dag-complexity, PID: 23822
How to solve this?
I went through the documentation and found that executing the While loop logic shouldn't really be there, but in some other task. But if I put that in some other task, how can I perform dynamic task mapping from inside that other task?
This code:
while True:
response = get_items(limit, cursor)
items = get_items_from_response(response)
cursor = get_cursor_from_response(response)
if cursor:
process_item.expand(item=items)
if cursor == None:
break
is running in the DagFileProcessor before creating a DAG run, and it's executing every min_file_process_interval, and each time Airflow retry to run a task in this dag. Airflow has some timeouts like dagbag_import_timeout which is the maximum duration the different DagFileProcessor have to process the dag files before a timeout exception, in your case if you have a big batch, or the API has some latency, you can easily exceed this duration.
Also you are considering cursor = get_cursor_from_response(response) as a normal python variable, but it is not the case, where the value is not available before creating a dag run.
Solution and best practices:
The Dynamic Task Mapping is designed to solve this problem, and it's flexible, so you can use it in different ways:
import pendulum
from airflow.decorators import dag, task
#dag(dag_id="tutorial_taskflow_api", start_date=pendulum.datetime(2023, 1, 1), schedule=None)
def tutorial_taskflow_api():
#task
def get_items(limit):
data = []
start_ind = 0
while True:
end_ind = min(start_ind + limit, 95) # 95 records in the API
items = range(start_ind, end_ind) if start_ind <= 90 else None # a fake end of data
if items is None:
break
data.extend(items)
start_ind = end_ind
return data
#task
def process_item(item):
print(f"Processing item {item}")
process_item.expand(item=get_items(limit=10))
tutorial_taskflow_api()
But if you want to process the data in batches, the best way is the mapped task groups, but unfortunately the nested mapped tasks is not supported yet, so you need to process items in a loop:
import pendulum
from airflow.decorators import dag, task, task_group
#dag(dag_id="tutorial_taskflow_api", start_date=pendulum.datetime(2023, 1, 1), schedule=None)
def tutorial_taskflow_api():
#task
def get_pages(limit):
start_ind = 0
pages = []
while True:
end_ind = min(start_ind + limit, 95) # 95 records in the API
page = dict(start=start_ind, end=end_ind) if start_ind <= 90 else None # a fake end of data
if page is None:
break
pages.append(page)
start_ind = end_ind
return pages
#task_group()
def process_batch(start, end):
#task
def get_items(start, end):
return list(range(start, end))
#task
def process_items(items):
for item in items:
print(f"Processing item {item}")
process_items(get_items(start=start, end=end))
process_batch.expand_kwargs(get_pages(10))
tutorial_taskflow_api()
Update:
There is the conf max_map_length which the maximum number of parallel mapped tasks/task group you can have. If you have some picks in your API, you can increase this limit (not recommended) or calculating the limit (batch size) dynamically:
import pendulum
from airflow.decorators import dag, task, task_group
#dag(dag_id="tutorial_taskflow_api", start_date=pendulum.datetime(2023, 1, 1), schedule=None)
def tutorial_taskflow_api():
#task
def get_limit():
import math
max_map_length = 1024
elements_count = 9999 # get from the API
preferd_batch_size = 10
return max(preferd_batch_size, math.ceil(elements_count/max_map_length))
#task
def get_pages(limit):
start_ind = 0
pages = []
while True:
end_ind = min(start_ind + limit, 95) # 95 records in the API
page = dict(start=start_ind, end=end_ind) if start_ind <= 90 else None # a fake end of data
if page is None:
break
pages.append(page)
start_ind = end_ind
return pages
#task_group()
def process_batch(start, end):
#task
def get_items(start, end):
return list(range(start, end))
#task
def process_items(items):
for item in items:
print(f"Processing item {item}")
process_items(get_items(start=start, end=end))
process_batch.expand_kwargs(get_pages(get_limit()))
tutorial_taskflow_api()

How to stop the Crawler

I am trying to write a crawler that goes to a website and searches for a list of keywords, with max_Depth of 2. But the scraper is supposed to stop once any of the keyword's appears on any page, the problem i am facing right now is that the crawler does-not stop when it first see's any of the keywords.
Even after trying, early return command, break command and CloseSpider Commands and even python exit commands.
My class of the Crawler:
class WebsiteSpider(CrawlSpider):
name = "webcrawler"
allowed_domains = ["www.roomtoread.org"]
start_urls = ["https://"+"www.roomtoread.org"]
rules = [Rule(LinkExtractor(), follow=True, callback="check_buzzwords")]
crawl_count = 0
words_found = 0
def check_buzzwords(self, response):
self.__class__.crawl_count += 1
crawl_count = self.__class__.crawl_count
wordlist = [
"sfdc",
"pardot",
"Web-to-Lead",
"salesforce"
]
url = response.url
contenttype = response.headers.get("content-type", "").decode('utf-8').lower()
data = response.body.decode('utf-8')
for word in wordlist:
substrings = find_all_substrings(data, word)
for pos in substrings:
ok = False
if not ok:
if self.__class__.words_found==0:
self.__class__.words_found += 1
print(word + "," + url + ";")
STOP!
return Item()
def _requests_to_follow(self, response):
if getattr(response, "encoding", None) != None:
return CrawlSpider._requests_to_follow(self, response)
else:
return []
I want it to stop execution when if not ok: is True.
When I want to stop a spider, I usually use the exception exception scrapy.exceptions.CloseSpider(reason='cancelled') from Scrapy-Docs.
The example there shows how you can use it:
if 'Bandwidth exceeded' in response.body:
raise CloseSpider('bandwidth_exceeded')
In your case something like
if not ok:
raise CloseSpider('keyword_found')
Or is that what you meant with
CloseSpider Commands
and already tried it?

Elixir Phoenix - uploading multiple images with Task module

This is my first Elixir/Phoenix project and I am loving it so far. Sadly, I haven't had much time to really explore tasks/concurrency or otp.
I have a form that can allow up to 7 image uploads in one go. I initially set up the code to upload these synchronously, 1 by 1. My next iteration was to attempt to run these in parallel, which can be seen in the upload_many function below.
The total time taken to upload 7 x 250kb images has not reduced (~15 seconds) with my re-write which clearly suggests I am doing something wrong.
I would really appreciate any assistance or advice.
defmodule MyApp.S3 do
def upload_many(params, keys) do
ops = [max_concurrency: System.schedulers_online() * 3, timeout: 20000]
keys
# [{"passport_image_url", file}, {"drivers_license_url", file2}, ...]
|> Task.async_stream(&upload/1, ops)
|> Enum.to_list()
end
def upload({url_key, image_params}) do
unique_filename = get_unique_filename(image_params.filename)
case File.read(image_params.path) do
{:error, _} ->
{:error, url_key, "file could not be read"}
{:ok, image_binary} ->
# returns image url string or error
res = put_object(unique_filename, image_binary)
IO.inspect url_key
Tuple.insert_at(res, 1, url_key)
# {:ok, url_key, url}
end
end
def get_unique_filename(filename) do
file_uuid = UUID.uuid4(:hex)
image_filename = filename
"#{file_uuid}-#{image_filename}"
end
def put_object(unique, image_binary) do
bucket = System.get_env("BUCKET_NAME")
res = ExAws.S3.put_object(bucket, unique, image_binary)
|> ExAws.request!
case res do
%{status_code: 200} ->
{:ok, image_url(unique, bucket)}
_ ->
{:error, "error uploading to S3"}
end
end
def image_url(unique, bucket) do
"https://#{bucket}.s3.amazonaws.com/#{bucket}/#{unique}"
end
end
I have written a test that uploads 7 images and has run it with and without mocking the ExAws request with Process.sleep(4000); %{status_code: 200}. With a 4 second mock, it manages to perform 7 tasks simultaneously in just over 4 seconds, but if I remove the mock and upload the files for real, it takes around 15 seconds again. Here is the test:
test "updates startpack with many file uploads", %{conn: conn, user: user} do
startpack = Repo.insert! %Startpack{user_id: user.id}
image_upload = %Plug.Upload{path: "test/fixtures/foxy.png", filename: "foxy.png"}
# possible solution for multiple fields
images = %{
"passport_image" => image_upload,
"vehicle_insurance_image" => image_upload,
"box_rental_image" => image_upload,
"equipment_rental_image" => image_upload,
"p45_image" => image_upload,
"schedule_d_letter_image" => image_upload,
"loan_out_company_cert_image" => image_upload
}
valid = Map.merge(#valid_attrs, images)
with_mock ExAws, [request!: fn(_) ->
Process.sleep(4000)
%{status_code: 200}
end] do
conn = put conn, startpack_path(conn, :update, startpack), startpack: valid
assert redirected_to(conn) == startpack_path(conn, :show, startpack)
startpack = Repo.get_by(Startpack, user_id: user.id)
assert startpack.passport_url
end
end

Open QDialog from QThread and getting return data?

I have a scenario where I am outsourcing my execution part to QThread and during execution i have a need to launch QDialog to take some user inputs within QThread.
For scenario where QDialog calls are not in picture is working fine but for snippet where QDialog code runs, i get
QPixmap: It is not safe to use pixmaps outside the GUI thread
QObject::startTimer: timers cannot be started from another thread
QApplication: Object event filter cannot be in a different thread.
and execution stops abruptly.
My QDialog code :
class ResultDialog(QDialog):
def __init__(self, parent = None):
super(ResultDialog, self).__init__(parent)
self.resultIndex = -1
grid_layout = QGridLayout(self)
failreplace_layout = QHBoxLayout(self)
faildontreplace_layout = QHBoxLayout(self)
self.info_lbl = QLabel(self)
self.info_lbl.setText("Image comparision failed for screen resolution")
self.failreplace_radio = QRadioButton(self)
self.failreplace_radio.setText("Fail and Replace Reference Image")
self.faildontreplace_radio = QRadioButton(self)
self.faildontreplace_radio.setText("Fail and Do not replace Reference Image")
self.tester_comment = QPlainTextEdit(self)
self.tester_comment.clear()
self.tester_comment.setPlainText('Tester comment is desired')
self.tester_comment.setDisabled(True)
self.buttonsend = QPushButton(self)
self.buttonsend.setText("Ok")
self.buttonsend.setCheckable(True)
grid_layout.addWidget(self.info_lbl,0,0)
grid_layout.addWidget(self.failreplace_radio,1,0)
grid_layout.addWidget(self.faildontreplace_radio,2,0)
grid_layout.addWidget(self.tester_comment,3,0)
self.loop = QtCore.QEventLoop()
# OK and Cancel buttons
grid_layout.addWidget(self.buttonsend, 4,0)
self.buttonsend.clicked.connect(self.submitclose)
self.failreplace_radio.clicked.connect(self.onfailreplace_radio)
self.faildontreplace_radio.clicked.connect(self.onfaildontreplace_radio)
def submitclose(self):
self.loop.quit()
self.accept()
def onfailreplace_radio(self):
print "onfailreplace_radio "
self.tester_comment.setDisabled(False)
self.buttonsend.setDisabled(False)
self.tester_comment.clear()
self.resultIndex = 0
def onfaildontreplace_radio(self):
print "onfaildontreplace_radio "
self.tester_comment.setDisabled(False)
self.buttonsend.setDisabled(False)
self.tester_comment.clear()
self.resultIndex = 1
# static method to create the dialog and return
#staticmethod
def returnSelection(parent):
dialog = ResultDialog(parent)
result = dialog.show()
dialog.loop.exec_();
print "dialog.buttonsend.isChecked() ", dialog.buttonsend.isChecked()
if dialog.buttonsend.isChecked() == True:
if not str(dialog.tester_comment.toPlainText()):
QMessageBox.critical(dialog, 'Tester Comment', 'Tester comment is desired')
return (dialog.resultIndex, 'NA')
else:
return (dialog.resultIndex, str(dialog.tester_comment.toPlainText()))
if __name__ == "__main__":
app = QApplication([])
ok = ResultDialog.returnSelection()
print("{}".format(ok))
app.exec_()
And i am calling it from Qthread.run() like :
index, testercomment = ResultDialog.returnSelection(None)
if index == 0 or index == 1:
self.resumeExec()
else:
self.sync.lock();
self.pauseCond.wait(self.sync)
self.sync.unlock();

Get HTTP response body as string (BubbleWrap for RubyMotion)

Using RubyMotion (for the first time!), I want to use Twitter's search API to retrieve some recent tweets for some users so have put together the class below.
The value of tweets is always an empty array. I suspect that BW::HTTP.get(url) spawns its own thread which is causing the issue.
Really, I just want twitter_search_results to return response.body.to_str but I am not sure how to do this.
How do I use RubyMotion (or BubbleWrap) to put an array of Tweet objects into my UIViewController?
class TweetsController
def initialize
#twitter_accounts = %w(dhh google)
#tweets = []
end
def tweets
twitter_search_results
puts #tweets.count
#tweets
end
def create_tweets(response)
BW::JSON.parse(response)["results"].each do |result|
#tweets << Tweet.new(result)
end
end
def twitter_search_results
query = #twitter_accounts.map{ |account| "from:#{account}" }.join(" OR ")
url = "http://search.twitter.com/search.json?q=#{query}"
BW::HTTP.get(url) do |response|
create_tweets(response.body.to_str)
end
end
end
class TwitterViewController < UIViewController
def viewDidLoad
super
self.view.backgroundColor = UIColor.blueColor
#table = UITableView.alloc.initWithFrame(self.view.bounds)
self.view.addSubview #table
#table.dataSource = self
#tweets_controller = TweetsController.new
end
def initWithNibName(name, bundle: bundle)
super
self.tabBarItem = UITabBarItem.alloc.initWithTitle(
"Twitter",
image: UIImage.imageNamed('twitter.png'),
tag: 1)
self
end
def tableView(tableView, numberOfRowsInSection: section)
#tweets_controller.tweets.length
end
def tableView(tableView, cellForRowAtIndexPath: indexPath)
#reuse_id = "Tweet"
cell = UITableViewCell.alloc.initWithStyle(UITableViewCellStyleDefault, reuseIdentifier:#reuse_id)
cell.textLabel.text = #tweets_controller.tweets[indexPath.row].text
return cell
end
end
class Tweet
attr_reader :created_at, :from_user, :text
def initialize(tweet_result)
#created_at = tweet_result["created_at"]
#from_user = tweet_result["from_user"]
#text = tweet_result["text"]
end
end
Full controller code below. I've also put the project on GitHub
class TweetsController
def initialize
#twitter_accounts = %w(dhh google)
#tweets = []
create_tweets
end
def tweets
#tweets
end
def create_tweets
json_data = twitter_search_results.dataUsingEncoding(NSUTF8StringEncoding)
e = Pointer.new(:object)
dict = NSJSONSerialization.JSONObjectWithData(json_data, options:0, error: e)
dict["results"].each do |result|
p result.class
p result
#tweets << Tweet.new(result)
end
end
def twitter_search_results
query = #twitter_accounts.map{ |account| "from:#{account}" }.join(" OR ")
url_string = "http://search.twitter.com/search.json?q=#{query}"
url_string_escaped = url_string.stringByAddingPercentEscapesUsingEncoding(NSUTF8StringEncoding)
url = NSURL.URLWithString(url_string_escaped)
request = NSURLRequest.requestWithURL(url)
response = nil
error = nil
data = NSURLConnection.sendSynchronousRequest(request, returningResponse: response, error: error)
raise "BOOM!" unless (data.length > 0 && error.nil?)
json = NSString.alloc.initWithData(data, encoding: NSUTF8StringEncoding)
end
end
the issue here is asynchronicity. you're almost there, I think, but the create_tweets method is not called before puts #tweets. In this case, I would recommend using a notification, because I think they are good ;-)
TweetsReady = 'TweetsReady' # constants are nice
NSNotificationCenter.defaultCenter.postNotificationName(TweetsReady, object:#tweets)
In your controller, register for this notification in `viewWillAppear` and unregister in `viewWillDisappear`
NSNotificationCenter.defaultCenter.addObserver(self, selector: 'tweets_ready:', name: TweetsReady, object:nil) # object:nil means 'register for all events, not just ones associated with 'object'
# ...
NSNotificationCenter.defaultCenter.removeObserver(self, name:TweetsReady, object:nil)
and you tweets_ready method should implement your UI changes.
def tweets_ready(notification)
#table.reloadData
end

Resources