I am having trouble with Beautiful Soup. I am trying to scrape Kayak, but when I print the length of the find_all it is returning 0. I am using selenium in conjunction with Beautiful Soup as well.
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_argument("--headless")
from bs4 import BeautifulSoup
from webdriver_manager.chrome import ChromeDriverManager
## Kayak URL
origin = "PIT"
destination = "ARN"
startdate = "2022-12-18"
url = "https://www.kayak.com/flights/" + origin + "-" + destination + "/" +\
startdate + "?sort=price_a "
## Setting Up Webdriver
driver = webdriver.Chrome(ChromeDriverManager().install(), options=chrome_options)
driver.implicitly_wait(40)
driver.get(url)
soup = BeautifulSoup(driver.page_source, "lxml")
print(len(soup.find_all("span", attrs={'class': 'depart-time base-time'})))
deptimes = soup.find_all("span", attrs={'class': 'depart-time base-time'})
arrtimes = soup.find_all('span', attrs={'class': 'arrival-time base-time'})
meridies = soup.find_all('span', attrs={'class': 'time-meridiem meridiem'})
This is what I am trying to take from the kayak website.
<span class="depart-time base-time">12:45 </span>
With playwright python you can do something like this, it will be very similar with selenium.
You need to select each box of flight and after iterate each element extracting data that you want.
Here you have an example:
from playwright.sync_api import sync_playwright
with sync_playwright() as p:
browser = p.chromium.launch(headless=False)
## Kayak URL
origin = "PIT"
destination = "ARN"
startdate = "2022-12-18"
url = "https://www.kayak.com/flights/" + origin + "-" + destination + "/" + \
startdate + "?sort=price_a "
page = browser.new_page()
page.goto(url)
#select all boxes of each flight
allFlight = page.query_selector_all(".resultInner")
#extract data from each flight
for index, flight in enumerate(allFlight, start=1):
print("Flight nÂș: "+str(index))
print("From " + flight.query_selector(".depart-time").inner_text()
+ flight.query_selector(".depart-time+span").inner_text()
+ " To " + flight.query_selector(".arrival-time").inner_text()
+ flight.query_selector(".arrival-time+span").inner_text())
print("Flight companies: " + flight.query_selector(".section.times .bottom,.multi-airline-names").inner_text().strip())
browser.close()
OUTPUT:
Here you have playwright documentation.
I hope i was able to help you.
Related
I am trying to create ASCII files of acceleration data from various New Zealand earthquakes, code attached below. I was previously able to generate ASCII files of the acceleration data, however, now running the same code I get the message:
UserWarning: The StationXML file has version 1, ObsPy can read versions (1.0, 1.1). Proceed with caution.
And the acceleration files are no longer being written. Please let me know if there is a way to fix this issue.
import os
from obspy import UTCDateTime
from obspy.clients.fdsn import Client as FDSN_Client
from obspy import read_inventory
from obspy.geodetics import kilometers2degrees
client = FDSN_Client("GEONET")
evid="2016p858000"
minrad=kilometers2degrees(0)
maxrad=kilometers2degrees(30)
cat = client.get_events(eventid=evid)
print(cat)
event = cat[0]
origin = event.origins[0]
otime = origin.time
print(otime)
otime = cat[0].origins[0].time
print(otime)
inventory = client.get_stations(latitude=cat[0].origins[0].latitude,
longitude=cat[0].origins[0].longitude,
minradius=minrad,
maxradius=maxrad,
channel="H??",
level="channel",
starttime = otime-60,
endtime = otime+4*60).remove(channel='HNZ')
print(inventory)
from obspy import Stream
st = Stream()
for network in inventory:
for station in network:
try:
st += client.get_waveforms(network.code, station.code, "*", "H??",
otime-60, otime + 4*60, attach_response=True).remove(channel='HNZ')
st_rem1=st.copy()
pre_filt = (0.025, 0.03, 70.0, 80.0)
acc = st_rem1.copy()
acc.remove_response(output='ACC', pre_filt=pre_filt)
print(acc[0])
acc.plot()
acc[0].write('MS_' + evid + '_' + network.code + '_' + station.code + '_' + station[0].code + '.ascii', format='SLIST')
acc[1].write('MS_' + evid + '_' + network.code + '_' + station.code + '_' + station[1].code + '.ascii', format='SLIST')
except:
pass
Containername argument is the one which I am not understanding how to be accessed from configuration file
Test_steps.file
import datetime
from behave import *
import os, json, random, datetime
from Utilities.KafkaConsumer.SupplyChain import Consumer_TransactionReceipts as CTR
from Utilities.KafkaConsumer.SupplyChain import Consumer_EODSnapshot as CES
from Utilities import CosmosDB as Cdb
#given('Delivery receipts are consumed from Kafka topic')
def delivery_receipts_consumption(context):
obj = CTR.TransactionReceipt()
context.message = obj.consume_kafka()
print(context.message)
#then('Calculate the Cost of goods receipted for a SKU and store')
def purchase_volume_validation(context):
location = '6228'
skunumber = '8091776'
today = datetime.date.today()
yesterday = str(today - datetime.timedelta(days=1))
print("Yesterday's date:", yesterday)
cosmosquerydr = "select dr.quantityOfUnits,dr.ownerOnDespatch,dr.packSize from dr where dr.SKUNumber = " + skunumber + " and dr.globalLocationNumber = " + location + " and dr.createdDate like '" + yesterday + "%' "
directreceipts = Cdb.CosmosDB.query_cosmos_db(cosmosquerydr, containername)
directreceipt = [json.loads(d) for d in directreceipts]
So I am kinda new with Pyrogram and I want to create my own Genshin Bot. After using the command redeem code, I want the message to be taken and stored as variable. so can anyone help me with that
after taking the code as input from user I would be able to use genshin.py api wrapper to redeem code. Just need help with getting message and storing it as variable.
import genshin
import os
from dotenv import load_dotenv
from pyrogram import Client, filters
load_dotenv()
global chatid
chatid = 842544591
global uid
uid = os.getenv("uid")
ltuid = os.getenv("ltuid")
ltoken = os.getenv("ltoken")
cookie_token = os.getenv("cookie_token")
api_id = os.getenv("api_id")
api_hash = os.getenv("api_hash")
bot_token = os.getenv("bot_token")
cookies = {"ltuid": ltuid,
"ltoken": ltoken,
"cookie_token": cookie_token,
"uid": uid}
client = genshin.Client(cookies)
bot = Client(
"Genshin Bot",
api_id=api_id,
api_hash=api_hash,
bot_token=bot_token
)
#bot.on_message(filters.command('start'))
def start_command(bot, message):
message.reply_text(
"Welcome to Genshin Auto Tasks Bot.\nFor Getting Started Use /help command.")
#bot.on_message(filters.command('help'))
def help_command(bot, message):
message.reply_text("This is Bot's Help Section")
#bot.on_message(filters.command('notes'))
async def get_notes(bot, message):
data = await client.get_full_genshin_user(uid)
notes = await client.get_notes(uid)
active_days = (data.stats.days_active)
total_characters = (data.stats.characters)
abyss_total_stars = (data.abyss.previous.total_stars)
resin_count = notes.current_resin
resin_recovery_time = notes.remaining_resin_recovery_time
await message.reply_text("Pranay Asia" + "\n" +
"uid : " + str(uid) + "\n" +
"-----------------------------------------------------------------" + "\n" +
"Resin Count: " + str(resin_count) + "/" + str(notes.max_resin) + "\n" +
"Countdown to next resin recovery: " + str(resin_recovery_time) + "\n" +
"Total No. of Active Days: " + str(active_days) + "\n" +
"Total No. of Characters: " + str(total_characters) + "\n" +
"Total Stars in Abyss: " + str(abyss_total_stars)
)
#bot.on_message(filters.command('redeemcode'))
def redeem_code(bot, message):
message.reply_text("Send the Code to Redeem")
bot.run()
try message.text
I use it as a userbot but nothing changes so much. This piece of code saves the sent message to a variable and filters out the command itself. For a bot it will be easier: answer = message.text
#app.on_message(filters.command("ns", prefixes=".") & filters.text)
async def EXAMPLE(_,msg):
orig_text = msg.text.split(".ns ", maxsplit=1)[1]
text = orig_text
With the script below i am trying to get the error text from a span error text box for first name when a name is not entered and the user clicks on the submit button, from a registration screen with the HTML below.(https://www.walmart.com/account/signup. However I end up just getting none instead of the text. When i try to get the text from chrome console i receive the text with the xpath and css selector as "$x("//span[#class='span-error']")[2]" and "$$(".span-error")[2]". I would like some guidance as to what I am missing, i think have tried everything i can think of.
Test case:
Get to the link Enter nothing on the first name,Get a validation
error Assert the validation error Enter an invalid name as ":::" Get
a validation error (they seem to have a couple)and so on
from time import sleep
from conftest import os
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
# LoginScreenSelectors
sel_first_name = "#first-name-su"
sel_last_name = "#last-name-su"
sel_email = "#email-su"
sel_password = "#password-su"
sel_error_message = ".span-error:nth-of-type(2)"
sel_submit = f"""[data-automation-id="signup-submit-btn"]"""
sel_required_error = f"""[data-error="required"]"""
class Login():
'''This will signup to the applcaiton '''
def __init__(self, driver):
self.driver = driver
wait = WebDriverWait(self.driver, 20)
self.first_name = wait.until(
EC.presence_of_element_located((By.CSS_SELECTOR, sel_first_name))
)
self.last_name = wait.until(
EC.visibility_of_element_located((By.CSS_SELECTOR, sel_last_name))
)
self.email = wait.until(
EC.visibility_of_element_located((By.CSS_SELECTOR, sel_email))
)
self.password = wait.until(
EC.visibility_of_element_located((By.CSS_SELECTOR, sel_password))
)
self.submit = wait.until(
EC.visibility_of_element_located((By.CSS_SELECTOR, sel_submit))
)
def login_to_website(self):
wait = WebDriverWait(self.driver, 10)
print(self.driver.title)
self.first_name.click()
self.first_name.send_keys(" ")
self.first_name.send_keys(Keys.TAB)
self.first_name_error_message = wait.until(
EC.presence_of_all_elements_located((By.CSS_SELECTOR, sel_error_message))
)
type(self.first_name_error_message)
error_messages = []
for messages in self.first_name_error_message:
print(error_messages.append(messages.text))
#if i could get this then i could simply get the first index and then keep filtering
it but even that is proving to be difficult.
Try out this to get all elements with error messages
error_messages = self.driver.find_elements(By.XPATH, "//*[#id="sign-up-form"]/div/span[2]']")
This is how I ended up solving it.
Get the xpath as f"""[data-error="required"]""" and then with a for loop cycle through all the error messages for input fields, I hope this will help anyone that is trying to take a stab at validating happy and negative testing on the same testcase.
from time import sleep
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
# LoginScreenSelectors
sel_first_name = "#first-name-su"
sel_last_name = "#last-name-su"
sel_email = "#email-su"
sel_password = "#password-su"
sel_submit = f"""[data-automation-id="signup-submit-btn"]"""
sel_required_error = f"""[data-error="required"]"""
class Register():
'''This wil register the user to the application '''
def __init__(self, driver):
self.driver = driver
wait = WebDriverWait(self.driver, 20)
self.first_name = wait.until(
EC.presence_of_element_located((By.CSS_SELECTOR, sel_first_name))
)
self.last_name = wait.until(
EC.visibility_of_element_located((By.CSS_SELECTOR, sel_last_name))
)
self.email = wait.until(
EC.visibility_of_element_located((By.CSS_SELECTOR, sel_email))
)
self.password = wait.until(
EC.visibility_of_element_located((By.CSS_SELECTOR, sel_password))
)
self.submit = wait.until(
EC.visibility_of_element_located((By.CSS_SELECTOR, sel_submit))
)
def login_to_website(self):
wait = WebDriverWait(self.driver, 10)
'''This will check for validaitons'''
self.last_name.send_keys("Test Automation Last Name")
self.email.send_keys("a#b.com")
self.password.send_keys("password")
self.submit.click()
self.first_name_error_message = wait.until(
EC.presence_of_all_elements_located((By.CSS_SELECTOR, sel_required_error))
)
for first_name_required_message in self.first_name_error_message:
first_name_message = first_name_required_message.text.replace("\n", ":")
# print(first_name_message)
# print(first_name_message[first_name_message.find(":") + 1:])
assert first_name_message[first_name_message.find(":") + 1:] == "First name is required."
sleep(1)
self.last_name.clear()
self.first_name.click()
self.first_name.send_keys("Test Automation First Name ")
self.submit.click()
self.last_name_error_message = wait.until(
EC.presence_of_all_elements_located((By.CSS_SELECTOR, sel_required_error)))
for last_name_required_message in self.last_name_error_message:
last_name_message = last_name_required_message.text.replace("\n", ":")
assert last_name_message[last_name_message.find(":") + 1:] == "Last name is required."
self.email.clear()
self.last_name.click()
self.last_name.send_keys("Test Automation Last Name ")
self.submit.click()
self.email_error_message = wait.until(
EC.presence_of_all_elements_located((By.CSS_SELECTOR, sel_required_error)))
for email_required_message in self.email_error_message:
email_message = email_required_message.text.replace("\n", ":")
assert email_message[email_message.find(":") + 1:] == "Email address is required."
self.password.clear()
self.email.click()
self.email.send_keys("your#email.com")
self.submit.click()
self.password_error_message = wait.until(
EC.presence_of_all_elements_located((By.CSS_SELECTOR, sel_required_error)))
'''only way i could assert it '''
for password_required_message in self.password_error_message:
password_message = password_required_message.text.replace("\n", " ")
assert password_message[password_message.find("d") + 1:]\
.replace(" ", "") == "passwordisrequired."
self.password.click()
self.password.send_keys("something123$")
self.submit.click()
I have a script that pulls data from a .csv file and writes it to a newly-generated PDF using ReportLab. It works fine but the data itself is skewed all over the page when I open the document as it has been written as a string. Is there any way I can write this data from the .csv file to the PDF in the same format as the .csv file (Table Form) so it is organised and readable?
# Script to generate a PDF report after data has been parsed into simInfo.csv file
import csv
import datetime
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
now = datetime.datetime.now()
def import_data(pdf, data_file):
sim_card_info = csv.reader(open(data_file, "r"))
for row in sim_card_info:
_id = row[0]
icc_id = row[1]
sim_id = row[2]
display_name = row[3]
carrier_name = row[4]
number = row[5]
data_roaming = row[6]
mcc = row[7]
mnc = row[8]
pdf_filename = _id + icc_id + sim_id + display_name + carrier_name + number + data_roaming + mcc + mnc + '.pdf'
generate_report(pdf, _id, icc_id, sim_id, display_name, carrier_name, number, data_roaming, mcc, mnc, pdf_filename)
def generate_report(pdf, _id, icc_id, sim_id, display_name, carrier_name, number, data_roaming, mcc, mnc, pdf_filename):
sim_data = "" + _id + icc_id + sim_id + display_name + carrier_name + number + data_roaming + mcc + mnc
pdf.drawString(50, 100, sim_data)
pdf.showPage()
def front_page():
pdf = canvas.Canvas("H:\College Fourth Year\Development Project\Final Year Project 2018\Forensic Reports\Sim Card Report.pdf", pagesize=letter)
pdf.setLineWidth(.3)
pdf.setFont('Helvetica', 12)
pdf.drawString(30, 750, 'LYIT MOBILE FORENSICS DIVISION')
pdf.drawString(500, 750, "Date: " + now.strftime("%d-%m-%y")) # Prints date of the report(on the fly)
pdf.line(500, 747, 595, 747)
pdf.drawString(500, 725, 'Case Number:')
pdf.drawString(580, 725, "10")
pdf.line(500, 723, 595, 723)
line1 = 'This forensic report on sim card data has been compiled by the forensic'
line2 = 'examiner in conclusion to the investigation into the RTA'
line3 = 'case which occurred on the 23/01/2018.'
textObject = pdf.beginText(30, 700)
lines = [line1, line2, line3]
for line in lines:
textObject.textLine(line)
pdf.drawText(textObject)
return pdf
def main():
data_file = 'H:\College Fourth Year\Development Project\Final Year Project 2018\ExtractedEvidence\simCardInfo.csv'
pdf = front_page()
import_data(pdf, data_file)
pdf.save()
print(" Sim Card Forensic Report Generated!")
if __name__ == '__main__':
main()