request params encryption - http API source airbyte connector - http

I am developing an HTTP API source using the airbyte CDK.
When I am using the request params method I am returning the following response:
return {'startIndex': 0, 'resultsPerPage': self.max_results_per_page,
'pubStartDate': "2022-07-30T13:57:21:000 UTC%2B03:00", 'pubEndDate': "2022-07-31T13:57:21:000 UTC%2B03:00"}
But for some reason, those fields are being encrypted and the params that are being passed to the API are:
pubStartDate: "2022-07-30T13%3A57%3A21%3A000+UTC%252B03%3A00"
pubEndDate: "2022-07-31T13%3A57%3A21%3A000+UTC%252B03%3A00"
Is there anything I am doing wrong?
How can I send those date time strings correctly - without them being encrypted?
The API I am using: https://nvd.nist.gov/developers/vulnerabilities
Thanks in advance, any help will be much appreciated!!
The full connector code:
class NvdVulnerabilitiesStream(HttpStream, ABC):
url_base = "https://services.nvd.nist.gov/"
max_results_per_page = 2000
def __init__(self, days_to_fetch: int):
auth = NoAuth()
super().__init__(authenticator=auth)
self.days_to_fetch = days_to_fetch
def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]:
if response.json()['resultsPerPage'] < self.max_results_per_page:
return None
return {"startIndex": response.json()['startIndex'] + self.max_results_per_page, "resultsPerPage": self.max_results_per_page}
def request_params(
self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None
) -> MutableMapping[str, Any]:
if not next_page_token and self.days_to_fetch != -1:
end_time = datetime.now()
start_time = end_time - timedelta(days=self.days_to_fetch)
if not next_page_token:
return {'startIndex': 0, 'resultsPerPage': self.max_results_per_page,
'pubStartDate': start_time.strftime(DATETIME_SCHEME), 'pubEndDate': end_time.strftime(DATETIME_SCHEME)}
if not next_page_token:
return {"startIndex": 0, "resultsPerPage": self.max_results_per_page}
return {"startIndex": next_page_token['startIndex'], "resultsPerPage": next_page_token['resultsPerPage']}
def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]:
for vulnerability in response.json()["result"]["CVE_Items"]:
yield vulnerability
print(response.json()["startIndex"])
time.sleep(6)
class Vulnerabilities(NvdVulnerabilitiesStream):
primary_key = None
def __init__(self, days_to_fetch: int):
super().__init__(days_to_fetch)
def path(
self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None
) -> str:
return "rest/json/cves/1.0/"```

Related

gRPC: From Node Js, How to send Array of float by using repeated bytes of protobuff to python

I would like to send a list of float list via nodejs and receive it in python using protobuff's repeated bytes type.
The graph helps to understand the problem:
I tried with this configuration and what I get on the python side is not really what I expect:
tensors=[b'-TWW', b'-TWW', b'-TWW', b'-TWW']
Here is my test in node.
Client :
const PROTO_PATH = __dirname + '/route_guide.proto';
const async = require('async');
const grpc = require('#grpc/grpc-js');
const protoLoader = require('#grpc/proto-loader');
const packageDefinition = protoLoader.loadSync(
PROTO_PATH,
{
keepCase: true,
longs: String,
enums: String,
defaults: true,
oneofs: true
});
const routeguide = grpc.loadPackageDefinition(packageDefinition).routeguide;
const client = new routeguide.RouteGuide('localhost:50051',
grpc.credentials.createInsecure());
function runJoin(callback) {
const call = client.join();
call.on('data', function(receivedMessage) {
console.log('Got message "' + JSON.stringify(receivedMessage));
});
call.on('end', callback);
messageToSend = {
msg: 'parameters_res',
parameters_res: {
parameters: {
tensors: [
new Buffer.from(new Float64Array([45.1]).buffer),
new Buffer.from(new Float64Array([45.1, 84.5, 87.9, 87.1]).buffer),
new Buffer.from(new Float64Array([45.1, 84.5, 87.9, 87.1]).buffer),
new Buffer.from(new Float64Array([45.1, 84.5, 87.9, 87.1]).buffer)
],
tensor_type: 'numpy.ndarray'
}
}
}
console.log(messageToSend);
console.log(messageToSend.parameters_res.parameters.tensors)
call.write(messageToSend);
call.end();
}
function main() {
async.series([
runJoin
]);
}
if (require.main === module) {
main();
}
exports.runJoin = runJoin;
route_guide.proto:
syntax = "proto3";
option java_multiple_files = true;
option java_package = "io.grpc.examples.routeguide";
option java_outer_classname = "RouteGuideProto";
option objc_class_prefix = "RTG";
package routeguide;
service RouteGuide {
rpc Join(stream ClientMessage) returns (stream ClientMessage) {}
}
message RouteNote {
repeated bytes model = 1;
}
message ClientMessage {
message Disconnect { Reason reason = 1; }
message ParametersRes { Parameters parameters = 1; }
oneof msg {
Disconnect disconnect = 1;
ParametersRes parameters_res = 2;
}
}
message Parameters {
repeated bytes tensors = 1;
string tensor_type = 2;
}
enum Reason {
UNKNOWN = 0;
RECONNECT = 1;
POWER_DISCONNECTED = 2;
WIFI_UNAVAILABLE = 3;
ACK = 4;
}
Server:
const PROTO_PATH = __dirname + '/route_guide.proto';
const grpc = require('#grpc/grpc-js');
const protoLoader = require('#grpc/proto-loader');
const packageDefinition = protoLoader.loadSync(
PROTO_PATH,
{keepCase: true,
longs: String,
enums: String,
defaults: true,
oneofs: true
});
const routeguide = grpc.loadPackageDefinition(packageDefinition).routeguide;
function join(call) {
call.on('data', function(receivedMessage) {
console.log("SERVER RECEIVE:");
console.log(receivedMessage);
console.log(receivedMessage.parameters_res.parameters.tensors)
for (const element of receivedMessage.parameters_res.parameters.tensors) {
console.log(element)
}
call.write(receivedMessage);
});
call.on('end', function() {
call.end();
});
}
function getServer() {
var server = new grpc.Server();
server.addService(routeguide.RouteGuide.service, {
join: join
});
return server;
}
if (require.main === module) {
var routeServer = getServer();
routeServer.bindAsync('0.0.0.0:50051', grpc.ServerCredentials.createInsecure(), () => {
routeServer.start()
});
}
exports.getServer = getServer;
MyStartegy.py:
from logging import WARNING
from typing import Callable, Dict, List, Optional, Tuple, cast
import numpy as np
import flwr as fl
from flwr.common import (
EvaluateIns,
EvaluateRes,
FitIns,
FitRes,
Parameters,
Scalar,
Weights,
)
from flwr.common.logger import log
from flwr.server.client_manager import ClientManager
from flwr.server.client_proxy import ClientProxy
from flwr.server.strategy.aggregate import aggregate, weighted_loss_avg
from flwr.server.strategy import Strategy
from tensorflow import Tensor
DEPRECATION_WARNING = """
DEPRECATION WARNING: deprecated `eval_fn` return format
loss, accuracy
move to
loss, {"accuracy": accuracy}
instead. Note that compatibility with the deprecated return format will be
removed in a future release.
"""
DEPRECATION_WARNING_INITIAL_PARAMETERS = """
DEPRECATION WARNING: deprecated initial parameter type
flwr.common.Weights (i.e., List[np.ndarray])
will be removed in a future update, move to
flwr.common.Parameters
instead. Use
parameters = flwr.common.weights_to_parameters(weights)
to easily transform `Weights` to `Parameters`.
"""
class MyStrategy(Strategy):
"""Configurable FedAvg strategy implementation."""
# pylint: disable=too-many-arguments,too-many-instance-attributes
def __init__(
self,
fraction_fit: float = 0.1,
fraction_eval: float = 0.1,
min_fit_clients: int = 2,
min_eval_clients: int = 2,
min_available_clients: int = 2,
eval_fn: Optional[
Callable[[Weights], Optional[Tuple[float, Dict[str, Scalar]]]]
] = None,
on_fit_config_fn: Optional[Callable[[int], Dict[str, Scalar]]] = None,
on_evaluate_config_fn: Optional[Callable[[int], Dict[str, Scalar]]] = None,
accept_failures: bool = True,
initial_parameters: Optional[Parameters] = None,
) -> None:
"""Federated Averaging strategy.
Implementation based on https://arxiv.org/abs/1602.05629
Args:
fraction_fit (float, optional): Fraction of clients used during
training. Defaults to 0.1.
fraction_eval (float, optional): Fraction of clients used during
validation. Defaults to 0.1.
min_fit_clients (int, optional): Minimum number of clients used
during training. Defaults to 2.
min_eval_clients (int, optional): Minimum number of clients used
during validation. Defaults to 2.
min_available_clients (int, optional): Minimum number of total
clients in the system. Defaults to 2.
eval_fn (Callable[[Weights], Optional[Tuple[float, float]]], optional):
Function used for validation. Defaults to None.
on_fit_config_fn (Callable[[int], Dict[str, Scalar]], optional):
Function used to configure training. Defaults to None.
on_evaluate_config_fn (Callable[[int], Dict[str, Scalar]], optional):
Function used to configure validation. Defaults to None.
accept_failures (bool, optional): Whether or not accept rounds
containing failures. Defaults to True.
initial_parameters (Parameters, optional): Initial global model parameters.
"""
super().__init__()
self.min_fit_clients = min_fit_clients
self.min_eval_clients = min_eval_clients
self.fraction_fit = fraction_fit
self.fraction_eval = fraction_eval
self.min_available_clients = min_available_clients
self.eval_fn = eval_fn
self.on_fit_config_fn = on_fit_config_fn
self.on_evaluate_config_fn = on_evaluate_config_fn
self.accept_failures = accept_failures
self.initial_parameters = initial_parameters
def __repr__(self) -> str:
rep = f"FedAvg(accept_failures={self.accept_failures})"
return rep
def num_fit_clients(self, num_available_clients: int) -> Tuple[int, int]:
"""Return the sample size and the required number of available
clients."""
num_clients = int(num_available_clients * self.fraction_fit)
return max(num_clients, self.min_fit_clients), self.min_available_clients
def num_evaluation_clients(self, num_available_clients: int) -> Tuple[int, int]:
"""Use a fraction of available clients for evaluation."""
num_clients = int(num_available_clients * self.fraction_eval)
return max(num_clients, self.min_eval_clients), self.min_available_clients
def initialize_parameters(
self, client_manager: ClientManager
) -> Optional[Parameters]:
"""Initialize global model parameters."""
initial_parameters = self.initial_parameters
self.initial_parameters = None # Don't keep initial parameters in memory
if isinstance(initial_parameters, list):
log(WARNING, DEPRECATION_WARNING_INITIAL_PARAMETERS)
initial_parameters = self.weights_to_parameters(weights=initial_parameters)
return initial_parameters
def evaluate(
self, parameters: Parameters
) -> Optional[Tuple[float, Dict[str, Scalar]]]:
"""Evaluate model parameters using an evaluation function."""
if self.eval_fn is None:
# No evaluation function provided
return None
weights = self.parameters_to_weights(parameters)
eval_res = self.eval_fn(weights)
if eval_res is None:
return None
loss, other = eval_res
if isinstance(other, float):
print(DEPRECATION_WARNING)
metrics = {"accuracy": other}
else:
metrics = other
return loss, metrics
def configure_fit(
self, rnd: int, parameters: Parameters, client_manager: ClientManager
) -> List[Tuple[ClientProxy, FitIns]]:
"""Configure the next round of training."""
config = {}
if self.on_fit_config_fn is not None:
# Custom fit config function provided
config = self.on_fit_config_fn(rnd)
fit_ins = FitIns(parameters, config)
# Sample clients
sample_size, min_num_clients = self.num_fit_clients(
client_manager.num_available()
)
clients = client_manager.sample(
num_clients=sample_size, min_num_clients=min_num_clients
)
# Return client/config pairs
return [(client, fit_ins) for client in clients]
def configure_evaluate(
self, rnd: int, parameters: Parameters, client_manager: ClientManager
) -> List[Tuple[ClientProxy, EvaluateIns]]:
"""Configure the next round of evaluation."""
# Do not configure federated evaluation if fraction_eval is 0
if self.fraction_eval == 0.0:
return []
# Parameters and config
config = {}
if self.on_evaluate_config_fn is not None:
# Custom evaluation config function provided
config = self.on_evaluate_config_fn(rnd)
evaluate_ins = EvaluateIns(parameters, config)
# Sample clients
if rnd >= 0:
sample_size, min_num_clients = self.num_evaluation_clients(
client_manager.num_available()
)
clients = client_manager.sample(
num_clients=sample_size, min_num_clients=min_num_clients
)
else:
clients = list(client_manager.all().values())
# Return client/config pairs
return [(client, evaluate_ins) for client in clients]
def aggregate_fit(
self,
rnd: int,
results: List[Tuple[ClientProxy, FitRes]],
failures: List[BaseException],
) -> Tuple[Optional[Parameters], Dict[str, Scalar]]:
"""Aggregate fit results using weighted average."""
if not results:
return None, {}
# Do not aggregate if there are failures and failures are not accepted
if not self.accept_failures and failures:
return None, {}
# Convert results
print("\n\n aggregate_fit")
print(results)
weights_results = [
(self.parameters_to_weights(fit_res.parameters), fit_res.num_examples)
for client, fit_res in results
]
print("weights_results")
print(weights_results)
return self.weights_to_parameters(aggregate(weights_results)), {}
def aggregate_evaluate(
self,
rnd: int,
results: List[Tuple[ClientProxy, EvaluateRes]],
failures: List[BaseException],
) -> Tuple[Optional[float], Dict[str, Scalar]]:
"""Aggregate evaluation losses using weighted average."""
if not results:
return None, {}
# Do not aggregate if there are failures and failures are not accepted
if not self.accept_failures and failures:
return None, {}
loss_aggregated = weighted_loss_avg(
[
(evaluate_res.num_examples, evaluate_res.loss)
for _, evaluate_res in results
]
)
return loss_aggregated, {}
def weights_to_parameters(self, weights: Weights) -> Parameters:
"""Convert NumPy weights to parameters object."""
print('weights_to_parameters')
print(weights)
tensors = [self.ndarray_to_bytes(ndarray) for ndarray in weights]
return Parameters(tensors=tensors, tensor_type="numpy.nda")
def parameters_to_weights(self, parameters: Parameters) -> Weights:
"""Convert parameters object to NumPy weights."""
print('parameters_to_weights')
print(parameters)
return [self.bytes_to_ndarray(tensor) for tensor in parameters.tensors]
# pylint: disable=R0201
def ndarray_to_bytes(self, ndarray: np.ndarray) -> bytes:
"""Serialize NumPy array to bytes."""
print('ndarray_to_bytes')
print(ndarray)
return None
# pylint: disable=R0201
def bytes_to_ndarray(self, tensor: bytes) -> np.ndarray:
"""Deserialize NumPy array from bytes."""
print('bytes_to_ndarray')
print(tensor)
return None
# Start Flower server for three rounds of federated learning
fl.server.start_server(
server_address='localhost:5006',
config={"num_rounds": 2},
strategy=MyStrategy()
)
Is Float64Array the right type?
What should I use on the python side to deserialize the data?
I specify that I cannot modify the proto.
Thank you in advance for your explanations.

SQLAlchemy 1.4.0b1 AsyncSession issue

I'm using SQLAlchemy 1.4.0b1's AsyncSession to update a Postgres db with asyncpg 0.21.0. The code below aims to update objects and add new objects in response to various incoming Redis stream messages
The save_revised coroutine (update) is working fine, and so is the session.add part of the td_move coroutine. However the update part of td_move, at the bottom of the function (starting from if this_train_id and msg.get('from') in finals[crossing]) , only works intermittently : I'm getting some db updates but only ~1/3 or so of the log messages indicating that an update is wanted.
Can anyone suggest what the problem(s) could be please ?
async def main():
logger.info(f"db_updater starting {datetime.now().strftime('%H:%M:%S')}")
engine = create_async_engine(os.getenv('ASYNC_DB_URL'), future=True)
async with AsyncSession(engine) as session:
crossings, headcodes, lean_params, finals, active_trains, train_ids, berthtimes, hc_types = await get_db_data(logger) # noqa: E501
pool = await aioredis.create_redis_pool(('redis', 6379), db=0, password=os.getenv('REDIS_PW'), encoding='utf-8')
last_id = '$'
while True:
all_msgs = await pool.xread(['del_hc_s', 'xing_revised', 'all_td', 'add_hc_s'], latest_ids=[last_id, last_id, last_id, last_id]) # noqa: E501
for stream_name, msg_id, msg in all_msgs:
message = dict(msg)
crossing = message.get('crossing')
if stream_name == 'all_td':
await td_move(message, train_ids, active_trains, finals, lean_params, session)
elif stream_name == 'xing_revised':
await save_revised(message, lean_params[crossing], session)
async def save_revised(msg, params, session):
train_id = msg.get('train_id')
# today_class is a SQLA model class from declarative_base()
today_class = params['today_class']
rev_time = datetime.fromtimestamp(
int(msg.get('revised')))
stmt = update(today_class).where(today_class.train_id == train_id).\
values(xing_revised=rev_time).\
execution_options(synchronize_session="fetch")
await session.execute(stmt)
if msg.get('revised_ten') != 'X':
stmt2 = update(today_class).where(today_class.train_id == train_id).\
values(xing_revised_ten=rev_time).\
execution_options(synchronize_session="fetch")
await session.execute(stmt2)
await session.commit()
async def td_move(msg, train_ids, active_trains, finals, params, session):
crossing = msg.get('crossing')
descr = msg.get('descr')
if crossing:
this_train_id = [s for s in train_ids[crossing] if descr in s]
if this_train_id:
this_train_id = this_train_id[0]
else:
return
if this_train_id and active_trains[crossing].get(this_train_id) and (
is_within_minutes(30, active_trains[crossing].get(this_train_id))):
# Td_Ca_Cc is a SQLA model class from declarative_base()
td = Td_Ca_Cc(
msg_type=msg.get('msg_type'),
descr=msg.get('descr'),
traintype=active_trains[crossing].get(
this_train_id).get('train_type'),
from_berth=msg.get('from'),
to_berth=msg.get('to'),
tdtime=dt_from_timestamp(msg.get('time')),
seconds=0,
area_id=msg.get('area_id'),
updated=datetime.now(),
crossing=crossing
)
session.add(td)
if this_train_id and msg.get('from') in finals[crossing]:
today_class = params[crossing]['today_class']
stmt = update(today_class).where(today_class.train_id == this_train_id).\
values(xing_actual=datetime.now(), cancel_time='XXX').\
execution_options(synchronize_session="fetch")
await session.execute(stmt)
logger.info(f"{crossing} {msg.get('descr')} passed {datetime.now().strftime('%H:%M:%S')}")
await session.commit()
if __name__ == '__main__':
asyncio.run(main())
For future reference the problem was making a (sync) logging call. I removed this (and will add an async logger call), the modified code is now working fine

Is it possible to randomly sample YouTube comments with YouTube API V3?

I have been trying to download all the YouTube comments on popular videos using python requests, but it has been throwing up the following error after about a quarter of the total comments:
{'error': {'code': 400, 'message': "The API server failed to successfully process the request. While this can be a transient error, it usually indicates that the request's input is invalid. Check the structure of the commentThread resource in the request body to ensure that it is valid.", 'errors': [{'message': "The API server failed to successfully process the request. While this can be a transient error, it usually indicates that the request's input is invalid. Check the structure of the commentThread resource in the request body to ensure that it is valid.", 'domain': 'youtube.commentThread', 'reason': 'processingFailure', 'location': 'body', 'locationType': 'other'}]}}
I found this thread detailing the same issue, and it seems that it is not possible to download all the comments on popular videos.
This is my code:
import argparse
import urllib
import requests
import json
import time
start_time = time.time()
class YouTubeApi():
YOUTUBE_COMMENTS_URL = 'https://www.googleapis.com/youtube/v3/commentThreads'
comment_counter = 0
with open("API_keys.txt", "r") as f:
key_list = f.readlines()
key_list = [key.strip('/n') for key in key_list]
def format_comments(self, results, likes_required):
comments_list = []
try:
for item in results["items"]:
comment = item["snippet"]["topLevelComment"]
likes = comment["snippet"]["likeCount"]
if likes < likes_required:
continue
author = comment["snippet"]["authorDisplayName"]
text = comment["snippet"]["textDisplay"]
str = "Comment by {}:\n \"{}\"\n\n".format(author, text)
str = str.encode('ascii', 'replace').decode()
comments_list.append(str)
self.comment_counter += 1
print("Comments downloaded:", self.comment_counter, end="\r")
except(KeyError):
print(results)
return comments_list
def get_video_comments(self, video_id, likes_required):
with open("API_keys.txt", "r") as f:
key_list = f.readlines()
key_list = [key.strip('/n') for key in key_list]
if self.comment_counter <= 900000:
key = self.key_list[0]
elif self.comment_counter <= 1800000:
key = self.key_list[1]
elif self.comment_counter <= 2700000:
key = self.key_list[2]
elif self.comment_counter <= 3600000:
key = self.key_list[3]
elif self.comment_counter <= 4500000:
key = self.key_list[4]
params = {
'part': 'snippet,replies',
'maxResults': 100,
'videoId': video_id,
'textFormat': 'plainText',
'key': key
}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'
}
try:
#data = self.openURL(self.YOUTUBE_COMMENTS_URL, params)
comments_data = requests.get(self.YOUTUBE_COMMENTS_URL, params=params, headers=headers)
except ChunkedEncodingError:
tries = 5
print("Chunked Error. Retrying...")
for n in range(tries):
try:
x = 0
x += 1
print("Trying", x, "times")
response = session.post("https://www.youtube.com/comment_service_ajax", params=params, data=data, headers=headers)
comments_data = json.loads(response.text)
except ChunkedEncodingError as c:
print(c)
results = comments_data.json()
nextPageToken = results.get("nextPageToken")
commments_list = []
commments_list += self.format_comments(results, likes_required)
while nextPageToken:
params.update({'pageToken': nextPageToken})
try:
comments_data = requests.get(self.YOUTUBE_COMMENTS_URL, params=params, headers=headers)
except ChunkedEncodingError as c:
tries = 5
print("Chunked Error. Retrying...")
for n in range(tries):
try:
x = 0
x += 1
print("Trying", x, "times")
response = session.post("https://www.youtube.com/comment_service_ajax", params=params, data=data, headers=headers)
comments_data = json.loads(response.text)
except ChunkedEncodingError as c:
print(c)
results = comments_data.json()
nextPageToken = results.get("nextPageToken")
commments_list += self.format_comments(results, likes_required)
return commments_list
def get_video_id_list(self, filename):
try:
with open(filename, 'r') as file:
URL_list = file.readlines()
except FileNotFoundError:
exit("File \"" + filename + "\" not found")
list = []
for url in URL_list:
if url == "\n": # ignore empty lines
continue
if url[-1] == '\n': # delete '\n' at the end of line
url = url[:-1]
if url.find('='): # get id
id = url[url.find('=') + 1:]
list.append(id)
else:
print("Wrong URL")
return list
def main():
yt = YouTubeApi()
parser = argparse.ArgumentParser(add_help=False, description=("Download youtube comments from many videos into txt file"))
required = parser.add_argument_group("required arguments")
optional = parser.add_argument_group("optional arguments")
here: https://console.developers.google.com/apis/credentials")
optional.add_argument("--likes", '-l', help="The amount of likes a comment needs to be saved", type=int)
optional.add_argument("--input", '-i', help="URL list file name")
optional.add_argument("--output", '-o', help="Output file name")
optional.add_argument("--help", '-h', help="Help", action='help')
args = parser.parse_args()
# --------------------------------------------------------------------- #
likes = 0
if args.likes:
likes = args.likes
input_file = "URL_list.txt"
if args.input:
input_file = args.input
output_file = "Comments.txt"
if args.output:
output_file = args.output
list = yt.get_video_id_list(input_file)
if not list:
exit("No URLs in input file")
try:
vid_counter = 0
with open(output_file, "a") as f:
for video_id in list:
vid_counter += 1
print("Downloading comments for video ", vid_counter, ", id: ", video_id, sep='')
comments = yt.get_video_comments(video_id, likes)
if comments:
for comment in comments:
f.write(comment)
print('\nDone!')
except KeyboardInterrupt:
exit("User Aborted the Operation")
# --------------------------------------------------------------------- #
if __name__ == '__main__':
main()
The next best method would be to randomly sample them. Does anyone know if this is possible with the API V3?
Even if the API returns a processingFailure error, you could still catch that (or any other API error for that matter) for to terminate gracefully your pagination loop. This way your script will provide the top-level comments that it fetched from of the API prior to the occurrence of the first API error.
The error response provided by the YouTube Data API is (usually) of the following form:
{
"error": {
"errors": [
{
"domain": <string>,
"reason": <string>,
"message": <string>,
"locationType": <string>,
"location": <string>
}
],
"code": <integer>,
"message": <string>
}
}
Hence, you could have defined the following function:
def is_error_response(response):
error = response.get('error')
if error is None:
return False
print("API Error: "
f"code={error['code']} "
f"domain={error['errors'][0]['domain']} "
f"reason={error['errors'][0]['reason']} "
f"message={error['errors'][0]['message']!r}")
return True
that you'll invoke after each statement of form results = comments_data.json(). In case of the first occurrence of that statement, you'll have:
results = comments_data.json()
if is_error_response(results):
return []
nextPageToken = results.get("nextPageToken")
For the second instance of that statement:
results = comments_data.json()
if is_error_response(results):
return comments_list
nextPageToken = results.get("nextPageToken")
Notice that the function is_error_response above prints out an error message on stdout in case its argument in an API error response; this is for the purpose of having the user of your script informed about the API call failure.

aiomultiprocess make a pool, but it doesn't work

import asyncio
import aiohttp
import time, json
from aiomultiprocess import Pool
start = time.time()
url = 'http://member-service.test.cn/member/login'
headers = {"Content-Type": "application/json;charset=UTF-8"}
async def get(phone):
session = aiohttp.ClientSession()
response = await session.post(url=url, data=json.dumps(
{"identityType": 1, "password": "111111", "platformSource": "Mall", "remember": True,
"terminalType": "PC", 'identity': phone}), headers=headers)
result = await response.text()
session.close()
print(result)
return result
async def request():
phones = ['186%08d' % x for x in range(0, 10)]
async with Pool() as pool:
result = await pool.map(get, phones) # can't work
return result
coroutine = request()
task = asyncio.ensure_future(coroutine)
loop = asyncio.get_event_loop()
loop.run_until_complete(task)
end = time.time()
print('Cost time:', end - start)
I try to use aiomultiprocess to make a pool.
async with Pool() as pool:
result = await pool.map(get, phones)
but actually can't call get function

Groovy http request, parse headers values

I'm sending HTTP request from application 1
http://localhost:8888/inputs/example-input?ProductId=49823&Orders_orderId=27759
to application 2.
And in application 2 I'm reciving plain string:
inputs/example-input?ProductId=49823&Orders_orderId=27759
I need to get values in they own variables as shown:
def productId = 49823
def orderId = 27759
Is there some groovy way to parse input string inputs/example-input?ProductId=49823&Orders_orderId=27759?
You need to parse the input manually, e.g.:
def input = "inputs/example-input?ProductId=49823&Orders_orderId=27759"
def parsed = input
.split("\\?")[1]
.split("&")
.inject([:]) { m, e ->
def arr = e.split("=")
m[arr[0]] = arr[1]
m
}
def productId = parsed.ProductId
def orderId = parsed.Orders_orderId
assert productId == '49823'
assert orderId == '27759'

Resources