How to POST raw file using Tornado HTTPRequest - http

I want to send some data in POST request using Tornado (AsyncHTTPClient)
rec_body = {'source': self.request.body, 'top': str(self.config["top"]), 'model': self.config["model"]}
where self.request.body is a raw binary file (image).
I try doing this:
http_client = AsyncHTTPClient()
rec_body = {'source': self.request.body, 'top': str(self.config["top"]), 'model': self.config["model"]}
request = HTTPRequest( url = os.path.join(self.config["dest_addr"], self.config["sub_sect"]) , method='POST', body =rec_body)
result = http_client.fetch( request, callback=self.handle_request)
but got this errors
File "/usr/local/lib/python2.7/dist-packages/tornado/httpclient.py", line 424, in __init__
self.body = body
File "/usr/local/lib/python2.7/dist-packages/tornado/httpclient.py", line 468, in body
self._body = utf8(value)
File "/usr/local/lib/python2.7/dist-packages/tornado/escape.py", line 203, in utf8
"Expected bytes, unicode, or None; got %r" % type(value)
TypeError: Expected bytes, unicode, or None; got <type 'dict'>
ERROR:tornado.access:500 POST /upload (192.168.72.84) 13.14ms
What I doing wrong?

I tried curl (naively), requests module all works fine, ut not asynchronously. For tornado`s AsyncHTTPClient there is good recipe from flickr.
Deals with multipart POST requests.
The code is adapted from the recipe found at :
http://code.activestate.com/recipes/146306/
No author name was given.
Author : Alexis Mignon (c)
email : alexis.mignon#gmail.Com
Date : 06/08/2011
Here is the code:
import mimetypes
from tornado.gen import coroutine, Return
from tornado.httpclient import HTTPRequest
from tornado_flickrapi.httpclient import fetch
#coroutine
def posturl(url, fields, files):
try:
response = yield post_multipart(url, fields, files)
except Exception as e:
raise e
raise Return(response)
#coroutine
def post_multipart(url, fields, files):
"""
Post fields and files to an http host as multipart/form-data.
fields is a sequence of (name, value) elements for regular form fields.
files is a sequence of (name, filename, value) elements for data to be
uploaded as files.
Return the server's response page.
"""
content_type, body = encode_multipart_formdata(fields, files)
headers = {"Content-Type": content_type, 'content-length': str(len(body))}
request = HTTPRequest(url, "POST", headers=headers, body=body, validate_cert=False)
try:
response = yield fetch(request)
except Exception as e:
raise e
raise Return(response)
def encode_multipart_formdata(fields, files):
"""
fields is a sequence of (name, value) elements for regular form fields.
files is a sequence of (name, filename, value) elements for data to be
uploaded as files.
Return (content_type, body) ready for httplib.HTTP instance
"""
BOUNDARY = '----------ThIs_Is_tHe_bouNdaRY_$'
CRLF = '\r\n'
L = []
for (key, value) in fields:
L.append('--' + BOUNDARY)
L.append('Content-Disposition: form-data; name="%s"' % key)
L.append('')
L.append(value)
for (key, filename, value) in files:
filename = filename.encode("utf8")
L.append('--' + BOUNDARY)
L.append(
'Content-Disposition: form-data; name="%s"; filename="%s"' % (
key, filename
)
)
L.append('Content-Type: %s' % get_content_type(filename))
L.append('')
L.append(value)
L.append('--' + BOUNDARY + '--')
L.append('')
body = CRLF.join(L)
content_type = 'multipart/form-data; boundary=%s' % BOUNDARY
return content_type, body
def get_content_type(filename):
return mimetypes.guess_type(filename)[0] or 'application/octet-stream'

Related

Why is the YouTube API v3 inconsistent with the amount of comments it lets you download before an error 400?

I am downloading YouTube comments with a python script that uses API keys and the YouTube Data API V3, but sooner or later I run into the following error:
{'error': {'code': 400, 'message': "The API server failed to successfully process the request. While this can be a transient error, it usually indicates that the request's input is invalid. Check the structure of the commentThread resource in the request body to ensure that it is valid.", 'errors': [{'message': "The API server failed to successfully process the request. While this can be a transient error, it usually indicates that the request's input is invalid. Check the structure of the commentThread resource in the request body to ensure that it is valid.", 'domain': 'youtube.commentThread', 'reason': 'processingFailure', 'location': 'body', 'locationType': 'other'}]}}
I am using the following code:
import argparse
import requests
import json
import time
start_time = time.time()
class YouTubeApi():
YOUTUBE_COMMENTS_URL = 'https://www.googleapis.com/youtube/v3/commentThreads'
comment_counter = 0
def is_error_response(self, response):
error = response.get('error')
if error is None:
return False
print("API Error: "
f"code={error['code']} "
f"domain={error['errors'][0]['domain']} "
f"reason={error['errors'][0]['reason']} "
f"message={error['errors'][0]['message']!r}")
print(self.comment_counter)
return True
def format_comments(self, results, likes_required):
comments_list = []
try:
for item in results["items"]:
comment = item["snippet"]["topLevelComment"]
likes = comment["snippet"]["likeCount"]
if likes < likes_required:
continue
author = comment["snippet"]["authorDisplayName"]
text = comment["snippet"]["textDisplay"]
str = "Comment by {}:\n \"{}\"\n\n".format(author, text)
str = str.encode('ascii', 'replace').decode()
comments_list.append(str)
self.comment_counter += 1
print("Comments downloaded:", self.comment_counter, end="\r")
except(KeyError):
print(results)
return comments_list
def get_video_comments(self, video_id, likes_required):
with open("API_keys.txt", "r") as f:
key_list = f.readlines()
comments_list = []
key_list = [key.strip('/n') for key in key_list]
params = {
'part': 'snippet,replies',
'maxResults': 100,
'videoId': video_id,
'textFormat': 'plainText',
'key': key_list[0]
}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'
}
comments_data = requests.get(self.YOUTUBE_COMMENTS_URL, params=params, headers=headers)
results = comments_data.json()
if self.is_error_response(results):
return []
nextPageToken = results.get("nextPageToken")
comments_list = []
comments_list += self.format_comments(results, likes_required)
while nextPageToken:
params.update({'pageToken': nextPageToken})
if self.comment_counter <= 900000:
params.update({'key': key_list[0]})
elif self.comment_counter <= 1800000:
params.update({'key': key_list[1]})
elif self.comment_counter <= 2700000:
params.update({'key': key_list[2]})
elif self.comment_counter <= 3600000:
params.update({'key': key_list[3]})
elif self.comment_counter <= 4500000:
params.update({'key': key_list[4]})
else:
params.update({'key': key_list[5]})
if self.comment_counter % 900001 == 0:
print(params["key"])
comments_data = requests.get(self.YOUTUBE_COMMENTS_URL, params=params, headers=headers)
results = comments_data.json()
if self.is_error_response(results):
return comments_list
nextPageToken = results.get("nextPageToken")
comments_list += self.format_comments(results, likes_required)
return comments_list
def get_video_id_list(self, filename):
try:
with open(filename, 'r') as file:
URL_list = file.readlines()
except FileNotFoundError:
exit("File \"" + filename + "\" not found")
list = []
for url in URL_list:
if url == "\n": # ignore empty lines
continue
if url[-1] == '\n': # delete '\n' at the end of line
url = url[:-1]
if url.find('='): # get id
id = url[url.find('=') + 1:]
list.append(id)
else:
print("Wrong URL")
return list
def main():
yt = YouTubeApi()
parser = argparse.ArgumentParser(add_help=False, description=("Download youtube comments from many videos into txt file"))
required = parser.add_argument_group("required arguments")
optional = parser.add_argument_group("optional arguments")
optional.add_argument("--likes", '-l', help="The amount of likes a comment needs to be saved", type=int)
optional.add_argument("--input", '-i', help="URL list file name")
optional.add_argument("--output", '-o', help="Output file name")
optional.add_argument("--help", '-h', help="Help", action='help')
args = parser.parse_args()
# --------------------------------------------------------------------- #
likes = 0
if args.likes:
likes = args.likes
input_file = "URL_list.txt"
if args.input:
input_file = args.input
output_file = "Comments.txt"
if args.output:
output_file = args.output
list = yt.get_video_id_list(input_file)
if not list:
exit("No URLs in input file")
try:
vid_counter = 0
with open(output_file, "a") as f:
for video_id in list:
vid_counter += 1
print("Downloading comments for video ", vid_counter, ", id: ", video_id, sep='')
comments = yt.get_video_comments(video_id, likes)
if comments:
for comment in comments:
f.write(comment)
print('\nDone!')
except KeyboardInterrupt:
exit("User Aborted the Operation")
# --------------------------------------------------------------------- #
if __name__ == '__main__':
main()
In another thread, it was discovered that google does not currently permit downloading all the comments on a popular video, however you would expect it to cut off at the same point. Instead, I have found that it can range anywhere betweek 1.5 million to 200k comments downloaded before it returns a code 400. Is this to do with a bug in my code, or is the YouTube API rejecting my request as it is clear that is a script? Would adding a time.sleep clause help with this?
(I bring forward this answer -- that I prepared to the question above at the time of its initial post -- because my assertions below seems to be confirmed once again by recent SO posts of this very kind.)
Your observations are correct. But, unfortunately, nobody but Google itself is able to provide a sound and complete answer to your question. Us -- non-Googlers (as myself!), or even the Googlers themselves (since they all sign NDAs) -- can only guess about the things implied.
Here is my educated guess, based on the investigations I made recently when responding to a very much related question (which you quoted above, yourself!):
As you already know, the API uses pagination for to return to callers sets of items of which cardinality exceed the internal limit of 50, or, by case, 100 items to be returned by each and every API endpoint invocation that provides result sets.
If you'll log the nextPageToken property that you obtain from CommentThreads.list via your object results, you'll see that those page tokens get bigger and bigger. Each and every such page token has to be passed on to the next CommentThreads.list call as the parameter pageToken.
The problem is that internally (not specified publicly, not documented) the API has a limit on the sheer length of the HTTP requests it accepts from its callers. (This happens for various reasons; e.g. security.) Therefore, when a given page token is sufficiently long, the HTTP request that the API user issues will exceed that internal limit, producing an internal error. That error surfaces to the API caller as the processingFailure error that you've encountered.
Many questions remain to be answered (e.g. why is that the page tokens have unbounded length?), but, again, those questions belong very much to the internal realm of the back-end system that's behind the API we're using. And those questions cannot be answered publicly, since are very much Google's internal business.

Encoding problem with GET requests in Haskell

I'm trying to get some Json data from a Jira server using Haskell. I'm counting this as "me having problems with Haskell" rather than encodings or Jira because my problem is when doing this in Haskell.
The problem occurs when the URL (or query) has plus signs. After building my request for theproject+order+by+created, Haskell prints it as:
Request {
host = "myjiraserver.com"
port = 443
secure = True
requestHeaders = [("Content-Type","application/json"),("Authorization","<REDACTED>")]
path = "/jira/rest/api/2/search"
queryString = "?jql=project%3Dtheproject%2Border%2Bby%2Bcreated"
method = "GET"
proxy = Nothing
rawBody = False
redirectCount = 10
responseTimeout = ResponseTimeoutDefault
requestVersion = HTTP/1.1
}
But the request fails with this response:
- 'Error in the JQL Query: The character ''+'' is a reserved JQL character. You must
enclose it in a string or use the escape ''\u002b'' instead. (line 1, character
21)'
So it seems like Jira didn't like Haskell's %2B. Do you have any suggestions on what I can do to fix this, or any resources that might be helpful? The same request sans the +order+by+created part is successful.
The code (patched together from these examples):
{-# LANGUAGE OverloadedStrings #-}
import Data.Aeson
import qualified Data.ByteString.Char8 as S8
import qualified Data.Yaml as Yaml
import Network.HTTP.Simple
import System.Environment (getArgs)
-- auth' is echo -e "username:passwd" | base64
foo urlBase proj' auth' = do
let proj = S8.pack (proj' ++ "+order+by+created")
auth = S8.pack auth'
request'' <- parseRequest urlBase
let request'
= setRequestMethod "GET"
$ setRequestPath "/jira/rest/api/2/search"
$ setRequestHeader "Content-Type" ["application/json"]
$ request''
request
= setRequestQueryString [("jql", Just (S8.append "project=" proj))]
$ setRequestHeader "Authorization" [S8.append "Basic " auth]
$ request'
return request
main :: IO ()
main = do
args <- getArgs
case args of
(urlBase:proj:auth:_) -> do
request <- foo urlBase proj auth
putStrLn $ show request
response <- httpJSON request
S8.putStrLn $ Yaml.encode (getResponseBody response :: Value) -- apparently this is required
putStrLn ""
_ -> putStrLn "usage..."
(If you know a simpler way to do the above then I'd take such suggestions as well, I'm just trying to do something analogous to this Python:
import requests
import sys
if len(sys.argv) >= 4:
urlBase = sys.argv[1]
proj = sys.argv[2]
auth = sys.argv[3]
urlBase += "/jira/rest/api/2/search?jql=project="
proj += "+order+by+created"
h = {}
h["content-type"] = "application/json"
h["authorization"] = "Basic " + auth
r = requests.get(urlBase + proj, headers=h)
print(r.json())
)
project+order+by+created is the URL-encoded string for the actual request project order by created (with spaces instead of +). The function setRequestQueryString expects a raw request (with spaces, not URL-encoded), and URL-encodes it.
The Python script you give for comparison essentially does the URL-encoding by hand.
So the fix is to put the raw request in proj:
foo urlBase proj' auth' = do
let proj = S8.pack (proj' ++ " order by created") -- spaces instead of +
...

Python3 - coinbase API authentication

I am trying to access my coinbase account using their API.
I am using the following code: (which is from the website https://developers.coinbase.com/docs/wallet/api-key-authentication)
import json, hmac, hashlib, time, requests
from requests.auth import AuthBase
# Before implementation, set environmental variables with the names API_KEY and API_SECRET
API_KEY = 'API_KEY'
API_SECRET = 'API_SECRET'
# Create custom authentication for Coinbase API
class CoinbaseWalletAuth(AuthBase):
def __init__(self, api_key, secret_key):
self.api_key = api_key
self.secret_key = secret_key
def __call__(self, request):
timestamp = str(int(time.time()))
message = timestamp + request.method + request.path_url + (request.body or '')
signature = hmac.new(self.secret_key, message, hashlib.sha256).hexdigest()
request.headers.update({
'CB-ACCESS-SIGN': signature,
'CB-ACCESS-TIMESTAMP': timestamp,
'CB-ACCESS-KEY': self.api_key,
})
return request
api_url = 'https://api.coinbase.com/v2/'
auth = CoinbaseWalletAuth(API_KEY, API_SECRET)
# Get current user
r = requests.get(api_url + 'user', auth=auth)
print r.json()
# {u'data': {u'username': None, u'resource': u'user', u'name': u'User'...
However I am getting the following error:
Traceback (most recent call last):
File "test1.py", line 44, in <module>
r = requests.get(api_url + 'user', auth=auth)
File "C:\Users\lclar\virtualenv-env\lib\site-packages\requests\api.py", line 72, in get
return request('get', url, params=params, **kwargs)
File "C:\Users\lclar\virtualenv-env\lib\site-packages\requests\api.py", line 58, in request
return session.request(method=method, url=url, **kwargs)
File "C:\Users\lclar\virtualenv-env\lib\site-packages\requests\sessions.py", line 494, in request
prep = self.prepare_request(req)
File "C:\Users\lclar\virtualenv-env\lib\site-packages\requests\sessions.py", line 437, in prepare_request
hooks=merge_hooks(request.hooks, self.hooks),
File "C:\Users\lclar\virtualenv-env\lib\site-packages\requests\models.py", line 309, in prepare
self.prepare_auth(auth, url)
File "C:\Users\lclar\virtualenv-env\lib\site-packages\requests\models.py", line 540, in prepare_auth
r = auth(self)
File "test1.py", line 29, in __call__
signature = hmac.new(self.secret_key, message, hashlib.sha256).encode("utf-8").digest()
File "C:\Users\lclar\AppData\Local\Programs\Python\Python36-32\lib\hmac.py", line 144, in new
return HMAC(key, msg, digestmod)
File "C:\Users\lclar\AppData\Local\Programs\Python\Python36-32\lib\hmac.py", line 42, in __init__
raise TypeError("key: expected bytes or bytearray, but got %r" % type(key).__name__)
TypeError: key: expected bytes or bytearray, but got 'str'
Can someone please help me?
Thanks in advance.
I assuming the data type self.secret_key = secret_key is a string. For Python >= 3.4, hmac.new(key, msg=None, digestmod=''), the key must be of type bytes or bytearray per the docs: https://docs.python.org/3/library/hmac.html
Likewise to avoid the TypeError: Unicode-objects must be encoded before hashing error, do the same for the message variable as shown below:
signature = hmac.new(self.secret_key.encode(), message.encode(), hashlib.sha256).hexdigest()

OpenStack SDK - How to create image with Kernel id and Ramdisk parameters?

I've been trying to create an OpenStack image informing the Kernel Id and Ramdisk Id, using the OpenStack Unified SDK (https://github.com/openstack/python-openstacksdk), but without success. I know this is possible, because the OpenStack CLI have this parameters, as shown on this page (http://docs.openstack.org/cli-reference/glance.html#glance-image-create), where the CLI have the "--kernel-id" and "--ramdisk-id" parameters. I've used this parameter in the terminal and confirmed they work, but I need to use them in python.
I'm trying to use the upload_method, as described here http://developer.openstack.org/sdks/python/openstacksdk/users/proxies/image.html#image-api-v2 but I can't get the attrs parameter right. Documentation only say it is suposed to be a dictionary. Here is the code I'm using
...
atrib = {
'properties': {
'kernel_id': 'd84e1f2b-8d8c-4a4a-8858-77a8d5a93cb1',
'ramdisk_id': 'cfef18e0-006e-477a-a098-593d43435a1e'
}
}
with open(file) as fimage:
image = image_service.upload_image(
name=name,
data=fimage,
disk_format='qcow2',
container_format='bare',
**atrib)
....
And here is the error I'm getting:
File "builder.py", line 121, in main
**atrib
File "/usr/lib/python2.7/site-packages/openstack/image/v2/_proxy.py", line 51, in upload_image
**attrs)
File "/usr/lib/python2.7/site-packages/openstack/proxy2.py", line 193, in _create
return res.create(self.session)
File "/usr/lib/python2.7/site-packages/openstack/resource2.py", line 570, in create
json=request.body, headers=request.headers)
File "/usr/lib/python2.7/site-packages/keystoneauth1/session.py", line 675, in post
return self.request(url, 'POST', **kwargs)
File "/usr/lib/python2.7/site-packages/openstack/session.py", line 52, in map_exceptions_wrapper
http_status=e.http_status, cause=e)
openstack.exceptions.HttpException: HttpException: Bad Request, 400 Bad Request
Provided object does not match schema 'image': {u'kernel_id': u'd84e1f2b-8d8c-4a4a-8858-77a8d5a93cb1', u'ramdisk_id': u'cfef18e0-006e-477a-a098-593d43435a1e'} is not of type 'string' Failed validating 'type' in schema['additionalProperties']: {'type': 'string'} On instance[u'properties']: {u'kernel_id': u'd84e1f2b-8d8c-4a4a-8858-77a8d5a93cb1', u'ramdisk_id': u'cfef18e0-006e-477a-a098-593d43435a1e'}
Already tried to use the update_image method, but without success, passing kernel id and ramdisk id as a strings creates the instance, but it does not boot.
Does anyone knows how to solve this?
what the version of the glance api you use?
I have read the code in openstackclient/image/v1/images.py, openstackclient/v1/shell.py
## in shell.py
def do_image_create(gc, args):
...
fields = dict(filter(lambda x: x[1] is not None, vars(args).items()))
raw_properties = fields.pop('property')
fields['properties'] = {}
for datum in raw_properties:
key, value = datum.split('=', 1)
fields['properties'][key] = value
...
image = gc.images.create(**fields)
## in images.py
def create(self, **kwargs):
...
for field in kwargs:
if field in CREATE_PARAMS:
fields[field] = kwargs[field]
elif field == 'return_req_id':
continue
else:
msg = 'create() got an unexpected keyword argument \'%s\''
raise TypeError(msg % field)
hdrs = self._image_meta_to_headers(fields)
...
resp, body = self.client.post('/v1/images',
headers=hdrs,
data=image_data)
...
and openstackclient/v2/shell.py,openstackclient/image/v2.images.py(and i have debuged this too)
## in shell.py
def do_image_create(gc, args):
...
raw_properties = fields.pop('property', [])
for datum in raw_properties:
key, value = datum.split('=', 1)
fields[key] = value
...
image = gc.images.create(**fields)
##in images.py
def create(self, **kwargs):
"""Create an image.""
url = '/v2/images'
image = self.model()
for (key, value) in kwargs.items():
try:
setattr(image, key, value)
except warlock.InvalidOperation as e:
raise TypeError(utils.exception_to_str(e))
resp, body = self.http_client.post(url, data=image)
...
it seems that, you can create a image use your way in version 1.0, but in version 2.0, you should use the kernel_id and ramdisk_id as below
atrib = {
'kernel_id': 'd84e1f2b-8d8c-4a4a-8858-77a8d5a93cb1',
'ramdisk_id': 'cfef18e0-006e-477a-a098-593d43435a1e'
}
but the OpenStack SDK seems it can't trans those two argments to the url (because there is no Body define in openstack/image/v2/image.py. so you should modify the OpenStack SDK to support this.
BTW, the code of OpenStack is a little different from it's version, but many things are same.

HTTP POST contents in Haskell

I'm trying to post some data to a server in Haskell and the server side is coming up empty.
I'm using the Network.HTTP library for the request.
module Main (main) where
import Network.URI (URI (..), parseURI, uriScheme, uriPath, uriQuery, uriFragment)
import Network.HTTP
import Network.TCP as TCP
main = do
conn <- TCP.openStream "localhost" 80
rawResponse <- sendHTTP conn updateTest
body <- getResponseBody rawResponse
if body == rqBody updateTest
then print "test passed"
else print (body ++ " != " ++ (rqBody updateTest))
updateURI = case parseURI "http://localhost/test.php" of
Just u -> u
updateTest = Request { rqURI = updateURI :: URI
, rqMethod = POST :: RequestMethod
, rqHeaders = [ Header HdrContentType "text/plain; charset=utf-8"
] :: [Header]
, rqBody = "Test string"
}
This test is returning the empty string as the response body from the server, when I think it should be echoing the "Test string" post.
I would ideally like to replicate the functionality of:
curl http://localhost/test.php -d 'Test string' -H 'Content-type:text/plain; charset=utf-8'
and am validating results with serverside test.php:
<?php
print (#file_get_contents('php://input'));
Am I doing this wrong or should I just be trying another library?
You need to specify a Content-Length HTTP header, whose value must be the length of the raw posted data:
updateTest = Request { rqURI = updateURI
, rqMethod = POST
, rqHeaders = [ mkHeader HdrContentType "application/x-www-form-urlencoded"
, mkHeader HdrContentLength "8"
]
, rqBody = "raw data"
}
And with http-conduit:
{-# LANGUAGE OverloadedStrings #-}
import Network.HTTP.Conduit
import qualified Data.ByteString.Lazy as L
main = do
initReq <- parseUrl "http://localhost/test.php"
let req = (flip urlEncodedBody) initReq $
[ ("", "Test string")
-- ,
]
response <- withManager $ httpLbs req
L.putStr $ responseBody response
The "Test string", in the above example, is urlEncoded before being posted.
You can also set the method, content-type, and request body manually. The api is the same as in http-enumerator a good example is:
https://stackoverflow.com/a/5614946

Resources