where is the urllib3 connection pool bound? - python-requests

If a new requests session is instantiated in different methods like so ...
myutil.py
import requests
def method1():
s1 = requests.Session()
def method2():
s1 = requests.Session()
Where is the requests.packages.urllib3.poolmanager.PoolManager bound? Is it bound globally so that the connection pool is shared between s1 and s2, or is it bound to each method's stack so that s1 and s2 have different connection pools?
If it is bound to each method's stack, what options do I have to share the connection pool? For example, should I create a transport adapter and then share than between the methods:
myutil.py
import requests
from requests.adapters import HTTPAdapter
httpAdapter = HTTPAdapter(pool_connections=10, pool_maxsize=100)
def method1():
s1 = requests.Session()
s1.mount('https://', httpAdapter)
def method2():
s1 = requests.Session()
s2.mount('https://', httpAdapter)

I have done some testing and it seems that the pool does NOT get bound globally.
See here for more info.

Related

FastAPI Custom Websocket Object

I want to be able to create a custom WebSocket object rather than using Starlette's so that I can add some more things in the constructor and add some more methods. In FastAPI, you're able to subclass the APIRoute and pass in your own Request object. How would I do the same for the WebSocket router?
As you say, there doesn't seem to be an easy way to set the websocket route class (short of a lot of subclassing and rewriting). I think the simplest way to do this would be to define your own wrapper class around the websocket, taking whatever extra data you want, and then define the methods you need. Then you can inject that as a dependency, either with a separate function, or use the class itself as a dependency, see the documentation for details, which is what I'm doing below.
I've put together a minimal example, where the URL parameter name is passed to the wrapper class:
# main.py
from fastapi import Depends, FastAPI, WebSocket
app = FastAPI()
class WsWrapper:
def __init__(self, websocket: WebSocket, name: str) -> None:
self.name = name
self.websocket = websocket
# You can define all your custom logic here, I'm just adding a print
async def receive_json(self, mode: str = "text"):
print(f"Hello from {self.name}", flush=True)
return await self.websocket.receive_json(mode)
#app.websocket("/{name}")
async def websocket(ws: WsWrapper = Depends()):
await ws.websocket.accept()
while True:
data = await ws.receive_json()
print(data, flush=True)
You can test it by running uvicorn main:app and connecting to ws://localhost:8000/test, and it should print "Hello from test" when receiving JSON.
Ended up just monkeypatching the modules. Track this PR for when monkeypatching isn't necessary: https://github.com/tiangolo/fastapi/pull/4968
from typing import Callable
from fastapi import routing as fastapi_routing
from starlette._utils import is_async_callable
from starlette.concurrency import run_in_threadpool
from starlette.requests import Request as StarletteRequest
from starlette.websockets import WebSocket as StarletteWebSocket
from starlette.types import ASGIApp, Receive, Scope, Send
class Request(StarletteRequest):
pass
class WebSocket(StarletteWebSocket):
pass
def request_response(func: Callable) -> ASGIApp:
"""
Takes a function or coroutine `func(request) -> response`,
and returns an ASGI application.
"""
is_coroutine = is_async_callable(func)
async def app(scope: Scope, receive: Receive, send: Send) -> None:
request = Request(scope, receive=receive, send=send)
# Force all views to be a coroutine
response = await func(request)
if is_coroutine:
response = await func(request)
else:
response = await run_in_threadpool(func, request)
await response(scope, receive, send)
return app
fastapi_routing.request_response = request_response
def websocket_session(func: Callable) -> ASGIApp:
"""
Takes a coroutine `func(session)`, and returns an ASGI application.
"""
# assert asyncio.iscoroutinefunction(func), "WebSocket endpoints must be async"
async def app(scope: Scope, receive: Receive, send: Send) -> None:
session = WebSocket(scope, receive=receive, send=send)
await func(session)
return app
fastapi_routing.websocket_session = websocket_session

Track python simpleHttp server logging information in azure application insights application map

We have different microservices(function apps, vm servers, etc) logging to application insights. A simple python http server is hosted on a linux VM, I want this server to receive a traceparent http header (W3C tracing) log the information to application insights. This python server should create a separate node in the Application map.
I am able to extract the span context from traceparent http header and use it to log the information. But i am not able to view it as a separate node in Application map.
There are middlewares for flask,django for tracing the requests. But there is no ready made solution available for python simple http server.
The goal is to have this python server on vm be represented as a separate node in Application map.
Attaching my python script for reference. (this code was written using the code from flask-middleware)
import six
import logging
import sys
from opencensus.ext.azure.log_exporter import AzureLogHandler
from google.rpc import code_pb2
from opencensus.ext.azure.trace_exporter import AzureExporter
from opencensus.common import configuration
from opencensus.trace import (
attributes_helper,
execution_context,
print_exporter,
samplers,
)
from opencensus.trace import span as span_module
from opencensus.trace import stack_trace, status
from opencensus.trace import tracer as tracer_module
from opencensus.trace import utils
from opencensus.trace.propagation import trace_context_http_header_format
from opencensus.trace import config_integration
HTTP_HOST = attributes_helper.COMMON_ATTRIBUTES['HTTP_HOST']
HTTP_METHOD = attributes_helper.COMMON_ATTRIBUTES['HTTP_METHOD']
HTTP_PATH = attributes_helper.COMMON_ATTRIBUTES['HTTP_PATH']
HTTP_ROUTE = attributes_helper.COMMON_ATTRIBUTES['HTTP_ROUTE']
HTTP_URL = attributes_helper.COMMON_ATTRIBUTES['HTTP_URL']
HTTP_STATUS_CODE = attributes_helper.COMMON_ATTRIBUTES['HTTP_STATUS_CODE']
EXCLUDELIST_PATHS = 'EXCLUDELIST_PATHS'
EXCLUDELIST_HOSTNAMES = 'EXCLUDELIST_HOSTNAMES'
config_integration.trace_integrations(['logging'])
trace_parent_header= "00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01"
APP_INSIGHTS_KEY = "KEY HERE"
logging.basicConfig(
format='%(asctime)s traceId=%(traceId)s spanId=%(spanId)s %(message)s')
log = logging.getLogger(__name__)
def callback_function(envelope):
envelope.tags['ai.cloud.role'] = 'Pixm Agent'
handler = AzureLogHandler(
connection_string='InstrumentationKey=APP_INSIGHTS_KEY')
handler.setFormatter(logging.Formatter('%(traceId)s %(spanId)s %(message)s'))
handler.add_telemetry_processor(callback_function)
log.addHandler(handler)
propogator = trace_context_http_header_format.TraceContextPropagator()
sampler = samplers.ProbabilitySampler(rate=1.0)
exporter = AzureExporter(
connection_string="InstrumentationKey=APP_INSIGHTS_KEY")
exporter.add_telemetry_processor(callback_function)
try:
span_context = propogator.from_headers(
{"traceparent": trace_parent_header})
log.info("he...")
tracer = tracer_module.Tracer(
span_context=span_context,
sampler=sampler,
exporter=exporter,
propagator=propogator)
span = tracer.start_span()
span.span_kind = span_module.SpanKind.SERVER
# Set the span name as the name of the current module name
span.name = '[{}]{}'.format(
'get',
'testurl')
tracer.add_attribute_to_current_span(
HTTP_HOST, 'testurlhost'
)
tracer.add_attribute_to_current_span(
HTTP_METHOD, 'get'
)
tracer.add_attribute_to_current_span(
HTTP_PATH, 'testurlpath'
)
tracer.add_attribute_to_current_span(
HTTP_URL, str('testurl')
)
# execution_context.set_opencensus_attr(
# 'excludelist_hostnames',
# self.excludelist_hostnames
# )
with tracer.span(name="main-ashish"):
for i in range(0, 10):
log.warning("identity logs..."+str(i))
except Exception: # pragma: NO COVER
log.error('Failed to trace request', exc_info=True)
The Application Map finds components by following HTTP dependency calls made between servers with the Application Insights SDK installed.
OpenCensus Python telemetry processors
You can modify cloud_RoleName by changing the ai.cloud.role attribute in the tags field.
def callback_function(envelope):
envelope.tags['ai.cloud.role'] = 'new_role_name'
# AzureLogHandler
handler.add_telemetry_processor(callback_function)
# AzureExporter
exporter.add_telemetry_processor(callback_function)
Correlation headers using W3C TraceContext to log the information to Application Insights
Application Insights is transitioning to W3C Trace-Context, which defines:
traceparent: Carries the globally unique operation ID and unique identifier of the call.
tracestate: Carries system-specific tracing context.
The latest version of the Application Insights SDK supports the Trace-Context protocol.
The correlation HTTP protocol, also called Request-Id, is being deprecated. This protocol defines two headers:
Request-Id: Carries the globally unique ID of the call.
Correlation-Context: Carries the name-value pairs collection of the distributed trace properties.
import logging
from opencensus.trace import config_integration
from opencensus.trace.samplers import AlwaysOnSampler
from opencensus.trace.tracer import Tracer
config_integration.trace_integrations(['logging'])
logging.basicConfig(format='%(asctime)s traceId=%(traceId)s spanId=%(spanId)s %(message)s')
tracer = Tracer(sampler=AlwaysOnSampler())
logger = logging.getLogger(__name__)
logger.warning('Before the span')
with tracer.span(name='hello'):
logger.warning('In the span')
logger.warning('After the span')
You can refer to Application Map: Triage Distributed Applications, Telemetry correlation in Application Insights, and Track incoming requests with OpenCensus Python

How to disable "check_hostname" using Requests library and Python 3.8.5?

using latest Requests library and Python 3.8.5, I can't seem to "disable" certificate checking on my API call. I understand the reasons not to disable, but I'd like this to work.
When i attempt to use "verify=True", the servers I connect to throw this error:
(Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1123)')))
When i attempt to use "verify=False", I get:
Error making PS request to [<redacted server name>] at URL https://<redacted server name/rest/v2/api_endpoint: Cannot set verify_mode to CERT_NONE when check_hostname is enabled.
I don't know how to also disable "check_hostname" as I haven't seen a way to do that with the requests library (which I plan to keep and use).
My code:
self.ps_server = server
self.ps_base_url = 'https://{}/rest/v2/'.format(self.ps_server)
url = self.ps_base_url + endpoint
response = None
try:
if req_type == 'POST':
response = requests.post(url, json=post_data, auth=(self.ps_username, self.ps_password), verify=self.verify, timeout=60)
return json.loads(response.text)
elif req_type == 'GET':
response = requests.get(url, auth=(self.ps_username, self.ps_password), verify=self.verify, timeout=60)
if response.status_code == 200:
return json.loads(response.text)
else:
logging.error("Error making PS request to [{}] at URL {} [{}]".format(server, url, response.status_code))
return {'status': 'error', 'trace': '{} - {}'.format(response.text, response.status_code)}
elif req_type == 'DELETE':
response = requests.delete(url, auth=(self.ps_username, self.ps_password), verify=self.verify, timeout=60)
return response.text
elif req_type == 'PUT':
response = requests.put(url, json=post_data, auth=(self.ps_username, self.ps_password), verify=self.verify, timeout=60)
return response.text
except Exception as e:
logging.error("Error making PS request to [{}] at URL {}: {}".format(server, url, e))
return {'status': 'error', 'trace': '{}'.format(e)}
Can someone shed some light on how I can disable check_hostname as well, so that I can test this without SSL checking?
If you have pip-system-certs, it monkey-patches requests as well. Here's a link to the code: https://gitlab.com/alelec/pip-system-certs/-/blob/master/pip_system_certs/wrapt_requests.py
After digging through requests and urllib3 source for awhile, this is the culprit in pip-system-certs:
ssl_context = ssl.create_default_context()
ssl_context.load_default_certs()
kwargs['ssl_context'] = ssl_context
That dict is used to grab an ssl_context later from a urllib3 connection pool but it has .check_hostname set to True on it.
As far as replacing the utility of the pip-system-certs package, I think forking it and making it only monkey-patch pip would be the right way forward. That or just adding --trusted-host args to any pip install commands.
EDIT:
Here's how it's normally initialized through requests (versions I'm using):
https://github.com/psf/requests/blob/v2.21.0/requests/adapters.py#L163
def init_poolmanager(self, connections, maxsize, block=DEFAULT_POOLBLOCK, **pool_kwargs):
"""Initializes a urllib3 PoolManager.
This method should not be called from user code, and is only
exposed for use when subclassing the
:class:`HTTPAdapter <requests.adapters.HTTPAdapter>`.
:param connections: The number of urllib3 connection pools to cache.
:param maxsize: The maximum number of connections to save in the pool.
:param block: Block when no free connections are available.
:param pool_kwargs: Extra keyword arguments used to initialize the Pool Manager.
"""
# save these values for pickling
self._pool_connections = connections
self._pool_maxsize = maxsize
self._pool_block = block
# NOTE: pool_kwargs doesn't have ssl_context in it
self.poolmanager = PoolManager(num_pools=connections, maxsize=maxsize,
block=block, strict=True, **pool_kwargs)
And here's how it's monkey-patched:
def init_poolmanager(self, *args, **kwargs):
import ssl
ssl_context = ssl.create_default_context()
ssl_context.load_default_certs()
kwargs['ssl_context'] = ssl_context
return super(SslContextHttpAdapter, self).init_poolmanager(*args, **kwargs)

Pass the url into the parse method in scrapy that was consumed from RabbitMQ

I am using the scrapy to consume the message(url) from the RabbitMQ,But When I use the yield to call the parse method passing my url as parameters .The program does not comes inside the callback method.Below is the foloowing code of my spider
# -*- coding: utf-8 -*-
import scrapy
import pika
from scrapy import cmdline
import json
class MydeletespiderSpider(scrapy.Spider):
name = 'Mydeletespider'
allowed_domains = []
start_urls = []
def callback(self,ch, method, properties, body):
print(" [x] Received %r" % body)
body=json.loads(body)
url=body.get('url')
yield scrapy.Request(url=url,callback=self.parse)
def start_requests(self):
cre = pika.PlainCredentials('test', 'test')
connection = pika.BlockingConnection(
pika.ConnectionParameters(host='10.0.12.103', port=5672, credentials=cre, socket_timeout=60))
channel = connection.channel()
channel.basic_consume(self.callback,
queue='Deletespider_Batch_Test',
no_ack=True)
print(' [*] Waiting for messages. To exit press CTRL+C')
channel.start_consuming()
def parse(self, response):
print response.url
pass
cmdline.execute('scrapy crawl Mydeletespider'.split())
My goal is to pass the url response to parse method
To consume urls from rabbitmq you can take a look at scrapy-rabbitmq package:
Scrapy-rabbitmq is a tool that lets you feed and queue URLs from RabbitMQ via Scrapy spiders, using the Scrapy framework.
To enable it, set these values in your settings.py:
# Enables scheduling storing requests queue in rabbitmq.
SCHEDULER = "scrapy_rabbitmq.scheduler.Scheduler"
# Don't cleanup rabbitmq queues, allows to pause/resume crawls.
SCHEDULER_PERSIST = True
# Schedule requests using a priority queue. (default)
SCHEDULER_QUEUE_CLASS = 'scrapy_rabbitmq.queue.SpiderQueue'
# RabbitMQ Queue to use to store requests
RABBITMQ_QUEUE_NAME = 'scrapy_queue'
# Provide host and port to RabbitMQ daemon
RABBITMQ_CONNECTION_PARAMETERS = {'host': 'localhost', 'port': 6666}
# Bonus:
# Store scraped item in rabbitmq for post-processing.
# ITEM_PIPELINES = {
# 'scrapy_rabbitmq.pipelines.RabbitMQPipeline': 1
# }
And in your spider:
from scrapy import Spider
from scrapy_rabbitmq.spiders import RabbitMQMixin
class RabbitSpider(RabbitMQMixin, Spider):
name = 'rabbitspider'
def parse(self, response):
# mixin will take urls from rabbit queue by itself
pass
refer to this : http://30daydo.com/article/512
def start_requests(self) this function should return a generator, else scrapy wont work.

Qt WebEngine set socks5 proxy

I want to set socks5 proxy for my Qt WebEngine app. I use PyQt5.8 , QT5.8.
I set up a socks5 server by danted v1.4.1. I test my socks5 server and it worked good. But when I use it in my app, danted log errors:
error after reading 3 bytes in 0 seconds: client offered no acceptable authentication method
This is my code:
def set_proxy():
from PyQt5.QtNetwork import QNetworkProxy
proxy = QNetworkProxy()
from six.moves.urllib import parse as urlparse
string_proxy = "socks5://username:password#ip:port"
urlinfo = urlparse.urlparse(string_proxy)
proxy = QNetworkProxy()
if urlinfo.scheme == 'socks5':
proxy.setType(QNetworkProxy.Socks5Proxy)
else:
proxy.setType(QNetworkProxy.NoProxy)
if urlinfo.hostname != None:
proxy.setHostName(urlinfo.hostname)
if urlinfo.port != None:
proxy.setPort(urlinfo.port)
if urlinfo.username != None:
proxy.setUser(urlinfo.username)
else:
proxy.setUser('')
if urlinfo.password != None:
proxy.setPassword(urlinfo.password)
else:
proxy.setPassword('')
QNetworkProxy.setApplicationProxy(proxy)
Can anyone help me?
update on 2017/03/29
add proxyAuthenticationRequired signal
def set_proxy(string_proxy):
proxy = QNetworkProxy()
urlinfo = urlparse.urlparse(string_proxy)
if urlinfo.scheme == 'socks5':
proxy.setType(QNetworkProxy.Socks5Proxy)
elif urlinfo.scheme == 'http':
proxy.setType(QNetworkProxy.HttpProxy)
else:
proxy.setType(QNetworkProxy.NoProxy)
proxy.setHostName(urlinfo.hostname)
proxy.setPort(urlinfo.port)
proxy.setUser(urlinfo.username)
proxy.setPassword(urlinfo.password)
QNetworkProxy.setApplicationProxy(proxy)
def handleProxyAuthReq(url, auth, proxyhost):
auth.setUser(username)
auth.setPassword(password)
webView = QtWebEngineWidgets.QWebEngineView()
#proxy_string = "http://username:password#ip:port"
proxy_string = "socks5://username:password#ip:port"
set_proxy(proxy_string)
webView.page().proxyAuthenticationRequired.connect(handleProxyAuthReq)
I test it by my Http proxy and it worded. But when I use Socks5 proxy, the proxyAuthenticationRequired signal can not be emited.
QtWebEngine does not handle the username/password information from QNetworkProxy:
All other proxy settings such as QNetworkProxy::rawHeader(), QNetworkProxy::user(), or QNetworkProxy::password() are ignored.
You'll need to handle proxyAuthenticationRequired and handle authentication there.
update on 2017/03/30
Looks like Chromium does not support authentication with SOCKS proxies.

Resources