Elixir Phoenix - uploading multiple images with Task module

Elixir Phoenix - uploading multiple images with Task module - asynchronous

This is my first Elixir/Phoenix project and I am loving it so far. Sadly, I haven't had much time to really explore tasks/concurrency or otp.
I have a form that can allow up to 7 image uploads in one go. I initially set up the code to upload these synchronously, 1 by 1. My next iteration was to attempt to run these in parallel, which can be seen in the upload_many function below.
The total time taken to upload 7 x 250kb images has not reduced (~15 seconds) with my re-write which clearly suggests I am doing something wrong.
I would really appreciate any assistance or advice.
defmodule MyApp.S3 do
def upload_many(params, keys) do
ops = [max_concurrency: System.schedulers_online() * 3, timeout: 20000]
keys
# [{"passport_image_url", file}, {"drivers_license_url", file2}, ...]
|> Task.async_stream(&upload/1, ops)
|> Enum.to_list()
end
def upload({url_key, image_params}) do
unique_filename = get_unique_filename(image_params.filename)
case File.read(image_params.path) do
{:error, _} ->
{:error, url_key, "file could not be read"}
{:ok, image_binary} ->
# returns image url string or error
res = put_object(unique_filename, image_binary)
IO.inspect url_key
Tuple.insert_at(res, 1, url_key)
# {:ok, url_key, url}
end
end
def get_unique_filename(filename) do
file_uuid = UUID.uuid4(:hex)
image_filename = filename
"#{file_uuid}-#{image_filename}"
end
def put_object(unique, image_binary) do
bucket = System.get_env("BUCKET_NAME")
res = ExAws.S3.put_object(bucket, unique, image_binary)
|> ExAws.request!
case res do
%{status_code: 200} ->
{:ok, image_url(unique, bucket)}
_ ->
{:error, "error uploading to S3"}
end
end
def image_url(unique, bucket) do
"https://#{bucket}.s3.amazonaws.com/#{bucket}/#{unique}"
end
end
I have written a test that uploads 7 images and has run it with and without mocking the ExAws request with Process.sleep(4000); %{status_code: 200}. With a 4 second mock, it manages to perform 7 tasks simultaneously in just over 4 seconds, but if I remove the mock and upload the files for real, it takes around 15 seconds again. Here is the test:
test "updates startpack with many file uploads", %{conn: conn, user: user} do
startpack = Repo.insert! %Startpack{user_id: user.id}
image_upload = %Plug.Upload{path: "test/fixtures/foxy.png", filename: "foxy.png"}
# possible solution for multiple fields
images = %{
"passport_image" => image_upload,
"vehicle_insurance_image" => image_upload,
"box_rental_image" => image_upload,
"equipment_rental_image" => image_upload,
"p45_image" => image_upload,
"schedule_d_letter_image" => image_upload,
"loan_out_company_cert_image" => image_upload
}
valid = Map.merge(#valid_attrs, images)
with_mock ExAws, [request!: fn(_) ->
Process.sleep(4000)
%{status_code: 200}
end] do
conn = put conn, startpack_path(conn, :update, startpack), startpack: valid
assert redirected_to(conn) == startpack_path(conn, :show, startpack)
startpack = Repo.get_by(Startpack, user_id: user.id)
assert startpack.passport_url
end
end

Related

Run repeating jon queue

def set_timer(update: Update, context: CallbackContext) -> None:
"""Add a job to the queue."""
chat_id = update.message.chat_id
try:
# args[0] should contain the time for the timer in seconds
due = int(context.args[0])
if due < 0:
update.message.reply_text('Sorry we can not go back to future!')
return
job_removed = remove_job_if_exists(str(chat_id), context)
context.job_queue.run_once(alarm, due, context=chat_id, name=str(chat_id))
text = 'Timer successfully set!'
if job_removed:
text += ' Old one was removed.'
update.message.reply_text(text)
except (IndexError, ValueError):
update.message.reply_text('Usage: /set <seconds>')
How do I compile here by putting job queue run_repeated?

citizen:/scripting/lua/scheduler.lua:61: attempt to call a nil value (upvalue 'fn')

So the code error is this:
Also, the lua code is used for FiveM-coding, using vRP as main framework.
The error appeals a function that is on vRP, and the caller is a base-function from the artifacts.
Even so, this is the code of the artifact that triggers the error
Code
How the error looks like
local GetGameTimer = GetGameTimer
local _sbs = Citizen.SubmitBoundaryStart
local coresume, costatus = coroutine.resume, coroutine.status
local debug = debug
local coroutine_close = coroutine.close or (function(c) end) -- 5.3 compatibility
local hadThread = false
local curTime = 0
-- setup msgpack compat
msgpack.set_string('string_compat')
msgpack.set_integer('unsigned')
msgpack.set_array('without_hole')
msgpack.setoption('empty_table_as_array', true)
-- setup json compat
json.version = json._VERSION -- Version compatibility
json.setoption("empty_table_as_array", true)
json.setoption('with_hole', true)
-- temp
local _in = Citizen.InvokeNative
local function FormatStackTrace()
return _in(`FORMAT_STACK_TRACE` & 0xFFFFFFFF, nil, 0, Citizen.ResultAsString())
end
local function ProfilerEnterScope(scopeName)
return _in(`PROFILER_ENTER_SCOPE` & 0xFFFFFFFF, scopeName)
end
local function ProfilerExitScope()
return _in(`PROFILER_EXIT_SCOPE` & 0xFFFFFFFF)
end
local newThreads = {}
local threads = setmetatable({}, {
-- This circumvents undefined behaviour in "next" (and therefore "pairs")
__newindex = newThreads,
-- This is needed for CreateThreadNow to work correctly
__index = newThreads
})
local boundaryIdx = 1
local runningThread
local function dummyUseBoundary(idx)
return nil
end
local function getBoundaryFunc(bfn, bid)
return function(fn, ...)
local boundary = bid or (boundaryIdx + 1)
boundaryIdx = boundaryIdx + 1
bfn(boundary, coroutine.running())
local wrap = function(...)
dummyUseBoundary(boundary)
local v = table.pack(fn(...))
return table.unpack(v)
end
local v = table.pack(wrap(...))
bfn(boundary, nil)
return table.unpack(v)
end
end

The screenshot of your code shows two calls to getBoundaryFunc
runWithBoundaryStart = getBoundaryFunc(Citizen.SubmitBoundaryStart)
runWithBoundaryEnd = getBoundaryFunc(Citizen.SubmitBoundaryEnd)
In order for fn to become nil either of this funcions must be called without proving the first parameter.
find out wether there are more calls to getBoundaryFunc
find out if its return values are called with nil instead of the expected function value as first parameter
fix that

How to stop the Crawler

I am trying to write a crawler that goes to a website and searches for a list of keywords, with max_Depth of 2. But the scraper is supposed to stop once any of the keyword's appears on any page, the problem i am facing right now is that the crawler does-not stop when it first see's any of the keywords.
Even after trying, early return command, break command and CloseSpider Commands and even python exit commands.
My class of the Crawler:
class WebsiteSpider(CrawlSpider):
name = "webcrawler"
allowed_domains = ["www.roomtoread.org"]
start_urls = ["https://"+"www.roomtoread.org"]
rules = [Rule(LinkExtractor(), follow=True, callback="check_buzzwords")]
crawl_count = 0
words_found = 0
def check_buzzwords(self, response):
self.__class__.crawl_count += 1
crawl_count = self.__class__.crawl_count
wordlist = [
"sfdc",
"pardot",
"Web-to-Lead",
"salesforce"
]
url = response.url
contenttype = response.headers.get("content-type", "").decode('utf-8').lower()
data = response.body.decode('utf-8')
for word in wordlist:
substrings = find_all_substrings(data, word)
for pos in substrings:
ok = False
if not ok:
if self.__class__.words_found==0:
self.__class__.words_found += 1
print(word + "," + url + ";")
STOP!
return Item()
def _requests_to_follow(self, response):
if getattr(response, "encoding", None) != None:
return CrawlSpider._requests_to_follow(self, response)
else:
return []
I want it to stop execution when if not ok: is True.

When I want to stop a spider, I usually use the exception exception scrapy.exceptions.CloseSpider(reason='cancelled') from Scrapy-Docs.
The example there shows how you can use it:
if 'Bandwidth exceeded' in response.body:
raise CloseSpider('bandwidth_exceeded')
In your case something like
if not ok:
raise CloseSpider('keyword_found')
Or is that what you meant with
CloseSpider Commands
and already tried it?

Task.async_stream elixir returning strange output

Using Task.async_stream just fine in my uploading many files to S3.
Attempting to use it in a download_many function that takes a keyword list of urls and ids. When I run the download_many function in an iex session, it returns the following output:
iex(1)> Karma.S3.download_many(1)
#Function<1.112846234/2 in Task.build_stream/3>
#Function<1.112846234/2 in Task.build_stream/3>
Here is the function:
def download_many(_urls) do
urls = [
“5”: “https://engine-image-uploads.s3.amazonaws.com/engine-image-uploads/d4a9f8adb58b4e0b83c47e8f3b21d421-fillable.pdf“,
“3”: “https://engine-image-uploads.s3.amazonaws.com/engine-image-uploads/ccd6d66cb4304b369a025efe3b26e68b-fillable.pdf”
]
ops = [max_concurrency: System.schedulers_online() * 3, timeout: 20000]
tasks = Task.async_stream(urls, &download_with_id/1, ops)
|> Enum.to_list()
IO.inspect tasks
end
def download_with_id({id, url}) do
file_destination = System.cwd() <> “/tmp/altered_document_” <> Atom.to_string(id) <> “.pdf”
download(url, file_destination)
|> Tuple.insert_at(2, id)
end
And a link to the file: https://github.com/karmaradio/karma/blob/async-download_many/lib/S3.ex#L58
I don’t understand the output: #Function<1.112846234/2 in Task.build_stream/3>
Has anyone experienced something similar?
Thanks in advance!

Get HTTP response body as string (BubbleWrap for RubyMotion)

Using RubyMotion (for the first time!), I want to use Twitter's search API to retrieve some recent tweets for some users so have put together the class below.
The value of tweets is always an empty array. I suspect that BW::HTTP.get(url) spawns its own thread which is causing the issue.
Really, I just want twitter_search_results to return response.body.to_str but I am not sure how to do this.
How do I use RubyMotion (or BubbleWrap) to put an array of Tweet objects into my UIViewController?
class TweetsController
def initialize
#twitter_accounts = %w(dhh google)
#tweets = []
end
def tweets
twitter_search_results
puts #tweets.count
#tweets
end
def create_tweets(response)
BW::JSON.parse(response)["results"].each do |result|
#tweets << Tweet.new(result)
end
end
def twitter_search_results
query = #twitter_accounts.map{ |account| "from:#{account}" }.join(" OR ")
url = "http://search.twitter.com/search.json?q=#{query}"
BW::HTTP.get(url) do |response|
create_tweets(response.body.to_str)
end
end
end
class TwitterViewController < UIViewController
def viewDidLoad
super
self.view.backgroundColor = UIColor.blueColor
#table = UITableView.alloc.initWithFrame(self.view.bounds)
self.view.addSubview #table
#table.dataSource = self
#tweets_controller = TweetsController.new
end
def initWithNibName(name, bundle: bundle)
super
self.tabBarItem = UITabBarItem.alloc.initWithTitle(
"Twitter",
image: UIImage.imageNamed('twitter.png'),
tag: 1)
self
end
def tableView(tableView, numberOfRowsInSection: section)
#tweets_controller.tweets.length
end
def tableView(tableView, cellForRowAtIndexPath: indexPath)
#reuse_id = "Tweet"
cell = UITableViewCell.alloc.initWithStyle(UITableViewCellStyleDefault, reuseIdentifier:#reuse_id)
cell.textLabel.text = #tweets_controller.tweets[indexPath.row].text
return cell
end
end
class Tweet
attr_reader :created_at, :from_user, :text
def initialize(tweet_result)
#created_at = tweet_result["created_at"]
#from_user = tweet_result["from_user"]
#text = tweet_result["text"]
end
end

Full controller code below. I've also put the project on GitHub
class TweetsController
def initialize
#twitter_accounts = %w(dhh google)
#tweets = []
create_tweets
end
def tweets
#tweets
end
def create_tweets
json_data = twitter_search_results.dataUsingEncoding(NSUTF8StringEncoding)
e = Pointer.new(:object)
dict = NSJSONSerialization.JSONObjectWithData(json_data, options:0, error: e)
dict["results"].each do |result|
p result.class
p result
#tweets << Tweet.new(result)
end
end
def twitter_search_results
query = #twitter_accounts.map{ |account| "from:#{account}" }.join(" OR ")
url_string = "http://search.twitter.com/search.json?q=#{query}"
url_string_escaped = url_string.stringByAddingPercentEscapesUsingEncoding(NSUTF8StringEncoding)
url = NSURL.URLWithString(url_string_escaped)
request = NSURLRequest.requestWithURL(url)
response = nil
error = nil
data = NSURLConnection.sendSynchronousRequest(request, returningResponse: response, error: error)
raise "BOOM!" unless (data.length > 0 && error.nil?)
json = NSString.alloc.initWithData(data, encoding: NSUTF8StringEncoding)
end
end

the issue here is asynchronicity. you're almost there, I think, but the create_tweets method is not called before puts #tweets. In this case, I would recommend using a notification, because I think they are good ;-)
TweetsReady = 'TweetsReady' # constants are nice
NSNotificationCenter.defaultCenter.postNotificationName(TweetsReady, object:#tweets)
In your controller, register for this notification in `viewWillAppear` and unregister in `viewWillDisappear`
NSNotificationCenter.defaultCenter.addObserver(self, selector: 'tweets_ready:', name: TweetsReady, object:nil) # object:nil means 'register for all events, not just ones associated with 'object'
# ...
NSNotificationCenter.defaultCenter.removeObserver(self, name:TweetsReady, object:nil)
and you tweets_ready method should implement your UI changes.
def tweets_ready(notification)
#table.reloadData
end

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

Elixir Phoenix - uploading multiple images with Task module - asynchronous

Related

Run repeating jon queue

citizen:/scripting/lua/scheduler.lua:61: attempt to call a nil value (upvalue 'fn')

How to stop the Crawler

Task.async_stream elixir returning strange output

Get HTTP response body as string (BubbleWrap for RubyMotion)

Categories

Resources