I want to get info for different user_ids from an API using python requests, I can use a loop and change the id every time, but it is slow, is there a simple way to do this?
import requests
from pprint import pprint
url = "....../api"
paras = {
'username': 'guest',
'password': '123456',
'method': 'location_info',
'barcode': ['1150764','1150765'],
}
r = requests.get(url, params=paras, verify = False)
pprint(r.json())
The result only return the info for latter barcode '1150765'. Is there a way to query 100 barcodes at the same time?
Related
I have written a web scraper which needs to scrape few hundred pages asynchronously in Playwright-Python after login.
I've came across aiometer from #Florimond Manca (https://github.com/florimondmanca/aiometer) to limit requests in the main async function - this works well.
The problem I'm having at the moment, is closing the pages after they've been scraped. The async function just increases the amount of pages load - as it should - but it increases memory consumption significantly if few hundred are loaded.
In the function I'm opening a browser context and passing that to each async scraping request per page, the rationale being that it decreases memory overhead and it conserves the state from my login function (implemented in my main script - not shown).
How can I close the pages after being scraped (in the scrape function)?
import asyncio
import functools
from playwright.async_api import async_playwright
from bs4 import BeautifulSoup
import pandas as pd
import aiometer
urls = [
"https://scrapethissite.com/pages/ajax-javascript/#2015",
"https://scrapethissite.com/pages/ajax-javascript/#2014",
"https://scrapethissite.com/pages/ajax-javascript/#2013",
"https://scrapethissite.com/pages/ajax-javascript/#2012",
"https://scrapethissite.com/pages/ajax-javascript/#2011",
"https://scrapethissite.com/pages/ajax-javascript/#2010"
]
async def scrape(context, url):
page = await context.new_page()
await page.goto(url)
await page.wait_for_load_state(state="networkidle")
await page.wait_for_timeout(1000)
#Getting results off the page
html = await page.content()
soup = BeautifulSoup(html, "lxml")
tables = soup.find_all('table')
dfs = pd.read_html(str(tables))
df=dfs[0]
print("Dataframe in page "+url+ " scraped")
page.close
return df
async def main(urls):
async with async_playwright() as p:
browser = await p.chromium.launch(headless=False)
context = await browser.new_context()
master_results = pd.DataFrame()
async with aiometer.amap(
functools.partial(scrape, context),
urls,
max_at_once=5, # Limit maximum number of concurrently running tasks.
max_per_second=3, # Limit request rate to not overload the server.
) as results:
async for data in results:
print(data)
master_results = pd.concat([master_results,data], ignore_index=True)
print(master_results)
asyncio.run(main(urls))
I've tried the await keyword before page.close() or context.close() throws an error: "TypeError: object method can't be used in 'await' expression".
After reading a few pages, even into the Playwright documentation bug trackers on github: https://github.com/microsoft/playwright/issues/10476 , I found the problem:
I forgot to add parentheses in my page.close function.
page.close()
So simple - but yet took me hours to get to. Probably part of learning to code.
I'm trying to find use the Python Telethon library to find all channels where the Username (ex.#xxxxxxx) contains a user-inputted keyword. I'm using the below code for this purpose, but for some reason, it's not giving all the results that I want. For example, if i type "sonic", it's giving me these results:
https://t.me/sonicboyofficial
https://t.me/SONICc29
I was hoping that the results would have this channel:
https://t.me/sonicsnipereth
but it's not part of the output. does anyone know what i can do to improve this script?
from telethon.errors import SessionPasswordNeededError
from telethon.sync import TelegramClient
from telethon import functions
import configparser
import os, json
def parse_api():
"""Read the hash from config file"""
config_parser = configparser.ConfigParser()
read_config = config_parser.read("api.ini")
if not read_config:
"""Write api id and hash in the api.ini file if not found"""
print("If you do not have api id and api hash, please get them from https://my.telegram.org/")
write_config = configparser.ConfigParser()
config_parser.add_section("API_CONFIG")
api_id = input("Enter api id:")
api_hash = input("Enter api hash:")
# set the configs
config_parser.set("API_CONFIG","api_id",api_id.strip())
config_parser.set("API_CONFIG","api_hash",api_hash.strip())
with open('api.ini', 'w') as api:
config_parser.write(api)
else:
config_parser.read("api.ini")
api_id = config_parser.get("API_CONFIG", "api_id")
api_hash = config_parser.get("API_CONFIG", "api_hash")
return api_id, api_hash
api_id, api_hash = parse_api()
client = TelegramClient("search", api_id, api_hash)
async def main():
await client.start()
if not await client.is_user_authorized():
await client.send_code_request(phone)
try:
await client.sign_in(phone, input('Enter the code: '))
except SessionPasswordNeededError:
await client.sign_in(password=input('Password: '))
print("""
====================================================
Welcome to Searching telegram groups/channels
with search query.
=======================================================
""")
while True:
search = input("Please enter search query:")
result = await client(functions.contacts.SearchRequest(
q=search,
limit=2000
))
d = result.to_dict()
usernames = []
for item in d['chats']:
usernames.append(item['username'])
if not usernames:
print()
print()
print("No groups found. try again?")
for username in usernames:
print("https://t.me/"+username)
"""Saving to GroupList.txt file for auto joining"""
with open("GroupList.txt", "+a") as gl:
gl.write("https://t.me/"+username)
gl.write("\n")
print()
print("File Saved to "+os.getcwd())
choice = input("You wanna searching more..., press(y):")
print()
if choice != "y":
break
with client:
client.loop.run_until_complete(main())
I'm attempting to scrape the data from a table on the following website: https://droughtmonitor.unl.edu/DmData/DataTables.aspx
import requests
from bs4 import BeautifulSoup
url = 'https://droughtmonitor.unl.edu/DmData/DataTables.aspx'
r = requests.get(url)
soup = BeautifulSoup(r.content, 'html.parser')
drought_table = soup.find('table', {'id':'datatabl'}).find('tbody').find_all('tr')
for some reason I am getting no outputs. I've tried to use pandas for the same job
import pandas as pd
url = 'https://droughtmonitor.unl.edu/DmData/DataTables.aspx'
table = pd.read_html(url)
df = table[0]
But also ended up getting an empty dataframe.
What could be causing this?
By checking network tool of browser it's obvious site uses Fetch/XHR to load table in another request.
Image: network monitor
You can use this code to get table data:
import requests
import json
headers = {
'Content-Type': 'application/json; charset=utf-8',
}
params = (
('area', '\'conus\''),
('statstype', '\'1\''),
)
response = requests.get(
'https://droughtmonitor.unl.edu/DmData/DataTables.aspx/ReturnTabularDMAreaPercent_national',
headers=headers, params=params
)
table = json.loads(response.content)
# Code generated by https://curlconverter.com/
So it can actually show the info I want it to in the terminal. But when I prompt it to send it as a discord message it appears to be attempting to send a blank message. It's probably something stupid, but thank you for looking. The language is Python.
import os
import discord
import requests
import json
import pprint
client = discord.Client()
def get_time():
response = requests.get("http://api.timezonedb.com/v2.1/get-time-zone?key=W9BJQ3QMGG69&format=json&by=position&lat=37.9838&lng=23.7275")
return pprint.pprint(response.json())
#client.event
async def on_ready():
print('We have logged in as {0.user}'.format(client))
#client.event
async def on_message(message):
if message.author == client.user:
return
if message.content.startswith('$petertest'):
clock = get_time()
await message.channel.send(clock)
client.run(os.environ['TOKEN'])
You are using the pprint module to print the data to the console itself. That is the issue there
Changing your code to simply return the data will fix the error.
return response.json()
If you want to send the formatted json data to discord, you can use json.dumps:
if message.content.startswith('test'):
clock = get_time()
clock = json.dumps(clock, indent=4)
await message.channel.send(clock)
I have an application running with PHP and CURL.
My idea is to move the application to Python-Django-Requests.
I have been unable to work, I hope you can give me a hand please.
The application works as follows:
Collect: a number, a message and through an API sends an SMS.
PHP code.
http://pastebin.com/PqpBgstD
import requests
import cookielib
posdata = "p_num_text=00513015924048&smstemplate=&message=message_sending&txtcount=8
+char+%3A+1+Sms&hiddcount=152"
jar = cookielib.CookieJar()
user = 'xxx'
pass = 'xxx'
values = {'app': 'page', 'inc': 'login', 'op': 'auth_login',
'username': user, 'password': pass} # data login
r = requests.post(url, data=values, cookies=jar) # Login
values = {'app': 'menu', 'inc': 'send_sms',
'op': 'sendsmstopv_yes'}# values to enter to send the sms
r = requests.post(url, data=values, params=posdata, cookies=jar)# enter the area sms
print r.content
How I can pass the code in CURL to Requests?
Does the above code is fine?
Your code will not work, I've attached the corrected code below, note that you don't need to use cookielib as Requests's cookie will generate a CookieJar object.
import requests
url = "http://dominio.com/subdominio/index.php"
username = 'xxx'
password = 'xxx'
payload = {
'app': 'page',
'inc': 'login',
'op': 'auth_login',
'username': username,
'password': password}
r = requests.post(url, data=payload) # Login
cSMS = "Sms"
payload = {
'p_num_text': '00513015924048',
'smstemplate': '',
'message': 'message_sending',
'txtcount': '8',
'char': cSMS, # your php code seems to be off for this one, double check it
'hiddcount': '153'}
url = "http://dominio.com/subdominio/index.php?app=menu&inc=send_sms&op=sendsmstopv_yes"
r = requests.post(url, data=payload, cookies=r.cookies) # enter the area sms
print r.text