How do i run this program asynchronously? - asynchronous

As a newbie in python AsyncIO, I have written a sample cook and waiter problem.
import asyncio
async def waiter():
t1 = asyncio.create_task(cook('indian', 10))
t2 = asyncio.create_task(cook('chinese', 5))
t3 = asyncio.create_task(cook('american', 15))
await t1
await t2
await t3
async def cook(name, time):
print('Preparing {}'.format(name))
await asyncio.sleep(time)
print('Prepared {}'.format(name))
asyncio.run(waiter())
ubuntu#ip-172-31-14-144:~$ python3 one.py
Preparing indian
Preparing chinese
Preparing american
Prepared chinese
Prepared indian
Prepared american
ubuntu#ip-172-31-14-144:~$
I understand from the above one.py that, the waiter takes all the orders and then gives to the cook to process them. So to further build up my understanding I thought of making a menu driven program, so that the user can choose.
import asyncio
import aioconsole
menu = {
'item1': 10,
'item2': 5,
'item3': 25,
'item4': 5
}
queue = asyncio.Queue()
tasks = []
async def cook():
print('In queue')
user_option = await queue.get()
user_option -= 1
print(user_option)
print('Preparing {}'.format(list(menu.keys())[user_option-1]))
await asyncio.sleep(menu[list(menu.keys())[user_option-1]])
print('Prepared {}'.format(list(menu.keys())[user_option-1]))
async def get_input():
inp = await aioconsole.ainput('Please enter your desired option\n')
return int(inp)
async def waiter():
user_option = 0
while True:
count = 1
print('*'*100)
print('Hello User..\n')
print('What would you like to have ??\n')
for item in menu:
print('{}. {}'.format(count, item))
count = count + 1
try:
user_option = await asyncio.wait_for(get_input(), timeout=2.0)
except asyncio.TimeoutError:
print('TIMEOUT')
if user_option:
await queue.put(user_option)
tasks.append(asyncio.create_task(coro=cook()))
for i in tasks:
await i
else:
print('In else')
pass
asyncio.run(waiter())
****************************************************************************************************
Hello User..
What would you like to have ??
1. item1
2. item2
3. item3
4. item4
Please enter your desired option
TIMEOUT
In else
****************************************************************************************************
Hello User..
What would you like to have ??
1. item1
2. item2
3. item3
4. item4
Please enter your desired option
1 -> an option is entered here
In queue
0
Preparing item4 # Item is being prepared, but the intention is this should be happening
Prepared item4 # concurrently, so that other users can place their order
****************************************************************************************************
Hello User..
What would you like to have ??
1. item1
2. item2
3. item3
4. item4
Please enter your desired option
Expectation:
In the second program when an option is entered, the cook should process them and print them on the screen concurrently, while an user can place their order, even when cook is preparing some thing.
Problem:
As soon as an option is entered, the waiter function waits for the cook to complete and then displays the menu.
Python 3.8.10 is used
Thanks

import asyncio
import aioconsole
menu = {
'item1': 10,
'item2': 5,
'item3': 25,
'item4': 5
}
tasks = []
#async def cook(queue):
#
# print('In queue')
# user_option = await queue.get()
# user_option -= 1
# print(user_option)
# print('Preparing {}'.format(list(menu.keys())[user_option-1]))
# await asyncio.sleep(menu[list(menu.keys())[user_option-1]])
# print('Prepared {}'.format(list(menu.keys())[user_option-1]))
#
async def cook(queue):
while True:
print('In queue')
user_option = await queue.get()
user_option -= 1
print(user_option)
print('Preparing {}'.format(list(menu.keys())[user_option-1]))
await asyncio.sleep(menu[list(menu.keys())[user_option-1]])
print('Prepared {}'.format(list(menu.keys())[user_option-1]))
async def get_input():
inp = await aioconsole.ainput('Please enter your desired option\n')
return int(inp)
async def waiter(queue):
user_option = 0
while True:
count = 1
print('*'*100)
print('Hello User..\n')
print('What would you like to have ??\n')
for item in menu:
print('{}. {}'.format(count, item))
count = count + 1
try:
user_option = await asyncio.wait_for(get_input(), timeout=1.0)
print('You entered {}'.format(user_option))
except asyncio.TimeoutError:
pass
if user_option > 0:
print('Inserting option into queue {}'.format(user_option))
await queue.put(user_option)
user_option = -1
await asyncio.sleep(3)
async def main():
queue = asyncio.Queue()
task1 = asyncio.create_task(waiter(queue))
task2 = asyncio.create_task(cook(queue))
await asyncio.gather(task1, task2)
asyncio.run(main())
The waiter can now concurrently take orders and the cook function prints when the item is prepared.

Related

How can I use Google Cloud Functions to run a web scraper?

Thanks in advance for your help.
I'm currently running a webscraper - this is the first time I've ever done something like this - It pulls addresses from the URL and then will match the address to the users input. This will be going into a chat bot, I wondering how I can make this run on Google Functions. Whats the process to do this, is there a tutorial anywhere?
This is my code so far. There is a small items file too
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor
from ..items import DataItem
from fuzzywuzzy import fuzz
from urllib.parse import urljoin
import scrapy
class AddressesSpider(scrapy.Spider):
name = 'Addresses'
allowed_domains = ['find-energy-certificate.service.gov.uk']
postcode = "bh10+4ah"
start_urls = ['https://find-energy-certificate.service.gov.uk/find-a-certificate/search-by-postcode?postcode=' + postcode]
## def start_requests(self):
## self.first = input("Please enter the address you would like to match: ")
## yield scrapy.Request(url=self.start_urls[0], callback=self.parse)
def parse(self, response):
first = input("Please enter the address you would like to match: ")
highest_ratios = []
highest_item = None
for row in response.xpath('//table[#class="govuk-table"]//tr'):
address = row.xpath("normalize-space(.//a[#class='govuk-link']/text())").extract()[0].lower()
address = address.rsplit(',', 2)[0]
link = row.xpath('.//a[#class="govuk-link"]/#href').extract()
details = row.xpath("normalize-space(.//td/following-sibling::td)").extract()
ratio = fuzz.token_set_ratio(address, first)
item = DataItem()
item['link'] = link
item['details'] = details
item['address'] = address
item['ratioresult'] = ratio
if len(highest_ratios) < 3:
highest_ratios.append(item)
elif ratio > min(highest_ratios, key=lambda x: x['ratioresult'])['ratioresult']:
highest_ratios.remove(min(highest_ratios, key=lambda x: x['ratioresult']))
highest_ratios.append(item)
highest_ratios_100 = [item for item in highest_ratios if item['ratioresult'] == 100]
if highest_ratios_100:
for item in highest_ratios_100:
yield item
else:
yield max(highest_ratios, key=lambda x: x['ratioresult'])
if len(highest_ratios_100) > 1:
for i, item in enumerate(highest_ratios_100):
print(f"{i+1}: {item['address']}")
selected = int(input("Please select the correct address by entering the number corresponding to the address: ")) - 1
selected_item = highest_ratios_100[selected]
else:
selected_item = highest_ratios_100[0] if highest_ratios_100 else max(highest_ratios, key=lambda x: x['ratioresult'])
new_url = selected_item['link'][0]
new_url = str(new_url)
if new_url:
base_url = 'https://find-energy-certificate.service.gov.uk'
print(f'Base URL: {base_url}')
print(f'New URL: {new_url}')
new_url = urljoin(base_url, new_url)
print(f'Combined URL: {new_url}')
yield scrapy.Request(new_url, callback=self.parse_new_page)
def parse_new_page(self, response):
Postcode = response.xpath('normalize-space((//p[#class="epc-address govuk-body"]/text())[last()])').extract()
Town = response.xpath('normalize-space((//p[#class="epc-address govuk-body"]/text())[last()-1])').extract()
First = response.xpath(".//p[#class='epc-address govuk-body']").extract()
Type = response.xpath('normalize-space(//dd[1]/text())').extract_first()
Walls = response.xpath("//th[contains(text(), 'Wall')]/following-sibling::td[1]/text()").extract()
Roof = response.xpath("//th[contains(text(), 'Roof')]/following-sibling::td[1]/text()").extract()
Heating = response.xpath("//th[text()='Main heating']/following-sibling::td[1]/text()").extract_first()
CurrentScore = response.xpath('//body[1]/div[2]/main[1]/div[1]/div[3]/div[3]/svg[1]/svg[1]/text[1]/text()').re_first("[0-9+]{1,2}")
Maxscore = response.xpath('//body[1]/div[2]/main[1]/div[1]/div[3]/div[3]/svg[1]/svg[2]/text[1]/text()').re_first("[0-9+]{2}")
Expiry = response.xpath('normalize-space(//b)').extract_first()
FloorArea = response.xpath('//dt[contains(text(), "floor area")]/following-sibling::dd/text()').re_first("[0-9+]{2,3}")
Steps = response.xpath("//h3[contains(text(),'Step')]/text()").extract()
yield {
'Postcode': Postcode,
'Town': Town,
'First': First,
'Type': Type,
'Walls': Walls,
'Roof': Roof,
'Heating': Heating,
'CurrentScore': CurrentScore,
'Maxscore': Maxscore,
'Expiry': Expiry,
'FloorArea': FloorArea,
'Steps': Steps
}
I've tried googling and having a look around and can't get how to deploy this as a project to run on google functions or can I just copy the code into the console somewhere?
You can try running your spider from a script. However, a better solution would be to wrap scrapy in its own child process.
For example:
from multiprocessing import Process, Queue
from ... import MySpider
from scrapy.crawler import CrawlerProcess
from scrapy.utils.project import get_project_settings
def my_cloud_function(event, context):
def script(queue):
try:
settings = get_project_settings()
settings.setdict({
'LOG_LEVEL': 'ERROR',
'LOG_ENABLED': True,
})
process = CrawlerProcess(settings)
process.crawl(MySpider)
process.start()
queue.put(None)
except Exception as e:
queue.put(e)
queue = Queue()
# wrap the spider in a child process
main_process = Process(target=script, args=(queue,))
main_process.start() # start the process
main_process.join() # block until the spider finishes
result = queue.get() # check the process did not return an error
if result is not None:
raise result
return 'ok'
You can refer to this tutorial for more info.

Why can't I handle 300 get responses with async?

As part of my homework project, I'm working with imdb.com pages.
For one task I need to make 320 get-requests to turn them into beautifulsoup objects later on.
I'm trying to do that the async way and so far I got this:
def get_tasks(session, url_links):
tasks = []
num = 1 # debugging purposes
for url in url_links:
tasks.append(session.get(url, headers={'Accept-Language': 'en', 'X_FORWARDED_FOR': '2.21.184.0'}, ssl=False))
time.sleep(1) # avoid 503 status_code
print(f"Number of responses get_tasks: {num}") # debugging purposes
num += 1 # debugging purposes
return tasks
# Getting response.texts
results = []
async def get_response_texts(url_links):
async with aiohttp.ClientSession() as session:
tasks = get_tasks(session, url_links)
responses = await asyncio.gather(*tasks)
t1 = time.perf_counter()
num = 1
for response in responses:
results.append(await response.text())
print(f"{num} responses processed") # debugging purposes
num += 1
t2 = time.perf_counter()
print(f'Asynchronous execution: Finished in {t2 - t1} seconds\n')
if __name__ == '__main__':
links = [a list of urls to films as strings]
asyncio.run(get_response_texts(links))
print(len(results))
Here comes the problem: When I process 100 requests, things seem all right, but when I make 300, I get asyncio.exceptions.TimeoutError.
Why is it so and how can I avoid that and make 320 requests asynchronously?

My discord bot keeps getting disconnected for some reason

I haven't changed my bot in weeks and for some reason I would get an error message like this every day for the past 5 or so days https://imgur.com/VCLx2kv
I don't think the error is caused by my code besides the whole loop thing which I don't know how to fix and it hasn't caused me any problems before, but if you're curious about that part I have the part of code that causes that issue below
I already tried regenerating my token.
#client.event
async def dead_check():
i = 1
d = datetime.now()
date = str(d.strftime("%Y-%m-%d"))
server = client.get_server(id = '105388450575859712')
while i == 1:
async for message in client.logs_from(discord.Object(id='561667365927124992'), limit=9999999):
if date in message.content:
usid = message.content.split('=')
usid1 = usid[1].split(' ')
count = message.content.split('#')
cd = message.content.split('?')
ev = cd[1]
if ev == '00':
number = 0
elif ev == '01':
number = 1
elif ev == '10':
number = 2
elif ev == '11':
number = 3
name = count[0]
await client.send_message(discord.Object(id='339182193911922689'), '#here\n' + name + ' has reached the deadline for the **FRICKLING** program.\nThe user has attended ' + str(number) + ' events.')
async for message in client.logs_from(discord.Object(id='567328773922619392'), limit=9999):
if date in message.content and message.reactions:
usid = message.content.split(' ')
user=await client.get_user_info(usid[0])
await client.send_message(discord.Object(id='567771853796540465'), user.mention + ' needs to be paid, if you have already paid him - react with :HYPERS:')
await client.delete_message(message)
await asyncio.sleep(60*60*24)
#client.event
async def on_ready():
await client.change_presence(game=Game(name='with nuclear waste'))
print('Ready, bitch')
asyncio.get_event_loop().run_until_complete(dead_check())
Have you tried reducing the limit of those logs_from calls? 9999999 is a pretty big number, and it may have slowed things down enough that the heartbeat isn't being sent at the proper times. You should also sanitize that image of the error message, it contains your bot token.
Credit to Patrick Haugh, but I wanted to close this thread and he didnt post it as an answer

Fetch datastore entity by id inside of a Dataflow transform

I have 2 datastore models:
class KindA(ndb.Model):
field_a1 = ndb.StringProperty()
field_a2 = ndb.StringProperty()
class KindB(ndb.Model):
field_b1 = ndb.StringProperty()
field_b2 = ndb.StringProperty()
key_to_kind_a = ndb.KeyProperty(KindA)
I want to query KindB and output it to a csv file, but if an entity of KindB points to an entity in KindA I want those fields to be present in the csv as well.
If I was able to use ndb inside of a transform I would setup my pipeline like this
def format(element): # element is an `entity_pb2` object of KindB
try:
obj_a_key_id = element.properties.get('key_to_kind_a', None).key_value.path[0]
except:
obj_a_key_id = None
# <<<<<<<<<<<<<<<<<<<<<<<<<<<<<< HOW DO I DO THIS
obj_a = ndb.Key(KindA, obj_a_key_id).get() if obj_a_key_id else None
return ",".join([
element.properties.get('field_b1', None).string_value,
element.properties.get('field_b2', None).string_value,
obj_a.properties.get('field_a1', None).string_value if obj_a else '',
obj_a.properties.get('field_a2', None).string_value if obj_a else '',
]
def build_pipeline(project, start_date, end_date, export_path):
query = query_pb2.Query()
query.kind.add().name = 'KindB'
filter_1 = datastore_helper.set_property_filter(query_pb2.Filter(), 'field_b1', PropertyFilter.GREATER_THAN, start_date)
filter_2 = datastore_helper.set_property_filter(query_pb2.Filter(), 'field_b1', PropertyFilter.LESS_THAN, end_date)
datastore_helper.set_composite_filter(query.filter, CompositeFilter.AND, filter_1, filter_2)
p = beam.Pipeline(options=pipeline_options)
_ = (p
| 'read from datastore' >> ReadFromDatastore(project, query, None)
| 'format' >> beam.Map(format)
| 'write' >> apache_beam.io.WriteToText(
file_path_prefix=export_path,
file_name_suffix='.csv',
header='field_b1,field_b2,field_a1,field_a2',
num_shards=1)
)
return p
I suppose I could use ReadFromDatastore to query all entities of KindA and then use CoGroupByKey to merge them, but KindA has millions of records and that would be very inefficient.
Per the reccommendations in this answer: https://stackoverflow.com/a/49130224/4458510
I created the following utils, which were inspired by the source code of
DatastoreWriteFn in apache_beam.io.gcp.datastore.v1.datastoreio
write_mutations and fetch_entities in apache_beam.io.gcp.datastore.v1.helper
import logging
import time
from socket import error as _socket_error
from apache_beam.metrics import Metrics
from apache_beam.transforms import DoFn, window
from apache_beam.utils import retry
from apache_beam.io.gcp.datastore.v1.adaptive_throttler import AdaptiveThrottler
from apache_beam.io.gcp.datastore.v1.helper import make_partition, retry_on_rpc_error, get_datastore
from apache_beam.io.gcp.datastore.v1.util import MovingSum
from apache_beam.utils.windowed_value import WindowedValue
from google.cloud.proto.datastore.v1 import datastore_pb2, query_pb2
from googledatastore.connection import Datastore, RPCError
_WRITE_BATCH_INITIAL_SIZE = 200
_WRITE_BATCH_MAX_SIZE = 500
_WRITE_BATCH_MIN_SIZE = 10
_WRITE_BATCH_TARGET_LATENCY_MS = 5000
def _fetch_keys(project_id, keys, datastore, throttler, rpc_stats_callback=None, throttle_delay=1):
req = datastore_pb2.LookupRequest()
req.project_id = project_id
for key in keys:
req.keys.add().CopyFrom(key)
#retry.with_exponential_backoff(num_retries=5, retry_filter=retry_on_rpc_error)
def run(request):
# Client-side throttling.
while throttler.throttle_request(time.time() * 1000):
logging.info("Delaying request for %ds due to previous failures", throttle_delay)
time.sleep(throttle_delay)
if rpc_stats_callback:
rpc_stats_callback(throttled_secs=throttle_delay)
try:
start_time = time.time()
response = datastore.lookup(request)
end_time = time.time()
if rpc_stats_callback:
rpc_stats_callback(successes=1)
throttler.successful_request(start_time * 1000)
commit_time_ms = int((end_time - start_time) * 1000)
return response, commit_time_ms
except (RPCError, _socket_error):
if rpc_stats_callback:
rpc_stats_callback(errors=1)
raise
return run(req)
# Copied from _DynamicBatchSizer in apache_beam.io.gcp.datastore.v1.datastoreio
class _DynamicBatchSizer(object):
"""Determines request sizes for future Datastore RPCS."""
def __init__(self):
self._commit_time_per_entity_ms = MovingSum(window_ms=120000, bucket_ms=10000)
def get_batch_size(self, now):
"""Returns the recommended size for datastore RPCs at this time."""
if not self._commit_time_per_entity_ms.has_data(now):
return _WRITE_BATCH_INITIAL_SIZE
recent_mean_latency_ms = (self._commit_time_per_entity_ms.sum(now) / self._commit_time_per_entity_ms.count(now))
return max(_WRITE_BATCH_MIN_SIZE,
min(_WRITE_BATCH_MAX_SIZE,
_WRITE_BATCH_TARGET_LATENCY_MS / max(recent_mean_latency_ms, 1)))
def report_latency(self, now, latency_ms, num_mutations):
"""Reports the latency of an RPC to Datastore.
Args:
now: double, completion time of the RPC as seconds since the epoch.
latency_ms: double, the observed latency in milliseconds for this RPC.
num_mutations: int, number of mutations contained in the RPC.
"""
self._commit_time_per_entity_ms.add(now, latency_ms / num_mutations)
class LookupKeysFn(DoFn):
"""A `DoFn` that looks up keys in the Datastore."""
def __init__(self, project_id, fixed_batch_size=None):
self._project_id = project_id
self._datastore = None
self._fixed_batch_size = fixed_batch_size
self._rpc_successes = Metrics.counter(self.__class__, "datastoreRpcSuccesses")
self._rpc_errors = Metrics.counter(self.__class__, "datastoreRpcErrors")
self._throttled_secs = Metrics.counter(self.__class__, "cumulativeThrottlingSeconds")
self._throttler = AdaptiveThrottler(window_ms=120000, bucket_ms=1000, overload_ratio=1.25)
self._elements = []
self._batch_sizer = None
self._target_batch_size = None
def _update_rpc_stats(self, successes=0, errors=0, throttled_secs=0):
"""Callback function, called by _fetch_keys()"""
self._rpc_successes.inc(successes)
self._rpc_errors.inc(errors)
self._throttled_secs.inc(throttled_secs)
def start_bundle(self):
"""(re)initialize: connection with datastore, _DynamicBatchSizer obj"""
self._elements = []
self._datastore = get_datastore(self._project_id)
if self._fixed_batch_size:
self._target_batch_size = self._fixed_batch_size
else:
self._batch_sizer = _DynamicBatchSizer()
self._target_batch_size = self._batch_sizer.get_batch_size(time.time()*1000)
def process(self, element):
"""Collect elements and process them as a batch"""
self._elements.append(element)
if len(self._elements) >= self._target_batch_size:
return self._flush_batch()
def finish_bundle(self):
"""Flush any remaining elements"""
if self._elements:
objs = self._flush_batch()
for obj in objs:
yield WindowedValue(obj, window.MAX_TIMESTAMP, [window.GlobalWindow()])
def _flush_batch(self):
"""Fetch all of the collected keys from datastore"""
response, latency_ms = _fetch_keys(
project_id=self._project_id,
keys=self._elements,
datastore=self._datastore,
throttler=self._throttler,
rpc_stats_callback=self._update_rpc_stats)
logging.info("Successfully read %d keys in %dms.", len(self._elements), latency_ms)
if not self._fixed_batch_size:
now = time.time()*1000
self._batch_sizer.report_latency(now, latency_ms, len(self._elements))
self._target_batch_size = self._batch_sizer.get_batch_size(now)
self._elements = []
return [entity_result.entity for entity_result in response.found]
class LookupEntityFieldFn(LookupKeysFn):
"""
Looks-up a field on an EntityPb2 object
Expects a EntityPb2 object as input
Outputs a tuple, where the first element is the input object and the second element is the object found during the
lookup
"""
def __init__(self, project_id, field_name, fixed_batch_size=None):
super(LookupEntityFieldFn, self).__init__(project_id=project_id, fixed_batch_size=fixed_batch_size)
self._field_name = field_name
#staticmethod
def _pb2_key_value_to_tuple(kv):
"""Converts a key_value object into a tuple, so that it can be a dictionary key"""
path = []
for p in kv.path:
path.append(p.name)
path.append(p.id)
return tuple(path)
def _flush_batch(self):
_elements = self._elements
keys_to_fetch = []
for element in self._elements:
kv = element.properties.get(self._field_name, None)
if kv and kv.key_value and kv.key_value.path:
keys_to_fetch.append(kv.key_value)
self._elements = keys_to_fetch
read_keys = super(LookupEntityFieldFn, self)._flush_batch()
_by_key = {self._pb2_key_value_to_tuple(entity.key): entity for entity in read_keys}
output_pairs = []
for input_obj in _elements:
kv = input_obj.properties.get(self._field_name, None)
output_obj = None
if kv and kv.key_value and kv.key_value.path:
output_obj = _by_key.get(self._pb2_key_value_to_tuple(kv.key_value), None)
output_pairs.append((input_obj, output_obj))
return output_pairs
The Key to this is the line response = datastore.lookup(request), where:
datastore = get_datastore(project_id) (from apache_beam.io.gcp.datastore.v1.helper.get_datastore)
request is a LookupRequest from google.cloud.proto.datastore.v1.datastore_pb2
response is LookupResponse from google.cloud.proto.datastore.v1.datastore_pb2
The rest of the above code does things like:
using a single connection to the datastore for a dofn bundle
batches keys together before performing a lookup request
throttles interactions with the datastore if requests start to fail
(honestly I don't know how critical these bits are, I just came across them when browsing the apache_beam source code)
The resulting util function LookupEntityFieldFn(project_id, field_name) is a DoFn that takes in an entity_pb2 object as input, extracts and fetches/gets the key_property that resides on the field field_name, and outputs the result as a tuple (the fetch-result is paired with the input object)
My Pipeline code then became
def format(element): # element is a tuple `entity_pb2` objects
kind_b_element, kind_a_element = element
return ",".join([
kind_b_element.properties.get('field_b1', None).string_value,
kind_b_element.properties.get('field_b2', None).string_value,
kind_a_element.properties.get('field_a1', None).string_value if kind_a_element else '',
kind_a_element.properties.get('field_a2', None).string_value if kind_a_element else '',
]
def build_pipeline(project, start_date, end_date, export_path):
query = query_pb2.Query()
query.kind.add().name = 'KindB'
filter_1 = datastore_helper.set_property_filter(query_pb2.Filter(), 'field_b1', PropertyFilter.GREATER_THAN, start_date)
filter_2 = datastore_helper.set_property_filter(query_pb2.Filter(), 'field_b1', PropertyFilter.LESS_THAN, end_date)
datastore_helper.set_composite_filter(query.filter, CompositeFilter.AND, filter_1, filter_2)
p = beam.Pipeline(options=pipeline_options)
_ = (p
| 'read from datastore' >> ReadFromDatastore(project, query, None)
| 'extract field' >> apache_beam.ParDo(LookupEntityFieldFn(project_id=project, field_name='key_to_kind_a'))
| 'format' >> beam.Map(format)
| 'write' >> apache_beam.io.WriteToText(
file_path_prefix=export_path,
file_name_suffix='.csv',
header='field_b1,field_b2,field_a1,field_a2',
num_shards=1)
)
return p

How to get the current bidding price for a contract

Can someone help me get started with doing some basic things with IBPY? Using IBPY, I just want to be able to enquire the current bidding price for a commodity such as the price of a single share in Google - or the current Eur/dollar exchange rate.
I found the example at the bottom of the page here:
Fundamental Data Using IbPy
useful - but the output is somewhat confusing. How do I print to screen just the current bid/asking price of a single contract?
(Just some bio info - yes I am new to IBPY and python - but I do have over 20 years experience with C)
Many kind thanks in advance!
Using the example you referred to, with slightly changes:
import signal
from ib.opt import ibConnection, message
from ib.ext.Contract import Contract
def price_handler(msg):
if msg.field == 1:
print("bid price = %s" % msg.price)
elif msg.field == 2:
print("ask price = %s" % msg.price)
def main():
tws = ibConnection(port=7497)
tws.register(price_handler, message.tickPrice)
tws.connect()
tick_id = 1
c = Contract()
c.m_symbol = 'AAPL'
c.m_secType = 'STK'
c.m_exchange = "SMART"
c.m_currency = "USD"
tws.reqMktData(tick_id, c, '', False)
signal.pause()
if __name__ == '__main__':
main()
Output:
bid price = 149.55
ask price = 149.56
bid price = 149.59
ask price = 149.61
...

Resources