web scrap stock data from Reuters - web-scraping

I am a programming beginner and trying to extract key metric data (e.g. Beta) for a stock from Reuters. However, it always come back as blank.
my codes are like this:
from bs4 import BeautifulSoup as bs
import requests
import re
url = 'https://www.reuters.com/markets/companies/TSLA.OQ/key-metrics/price-and-volume'
page = requests.get(url)
bs1 = bs(page.text, 'html.parser')
beta=bs1.find_all('th', class_ ='text__text__1FZLe text__dark-grey__3Ml43 text__regular__2N1Xr text__body__yKS5U body__base__22dCE body__body__VgU9Q',text=re.compile('Beta'))
print(beta)
I know it is not correct but I cannot figure out what to do. please help. Ultimate I want to be extract the Beta info for a stock from Reuters. thank you for your help!!!

You can scrape the site (without inspecting the javascript/json) using Selenium, using bs4 from my previous answer but you can use seleniums functions instead.
from selenium import webdriver
from bs4 import BeautifulSoup as bs
# Initiate webdriver
driver = webdriver.Firefox()
# Fetch the web page
driver.get('https://www.reuters.com/markets/companies/TSLA.OQ/key-metrics/price-and-volume')
# Convert the driver page source to a soup object
soup = bs(driver.page_source, 'html.parser')
# Find the table you want to scrape
table = soup.find('table', attrs={'aria-label':'KeyMetrics'})
# Locate the Keys and Value for each of the rows
keys = [i.text for i in table.select('tbody tr th') if i]
values = [i.text for i in table.select('tbody tr td') if i]
# Convert the two lists into a dictionary for a neater output
data = dict(zip(keys,values))
driver.quit()
print(data)
This will return:
{'Price Closing Or Last Bid': '699.20', 'Pricing Date': 'Jul 05', '52 Week High': '1,243.25', '52 Week High Date': 'Nov 04', '52 Week Low': '620.50', '52 Week Low Date': 'Jul 08', '10 Day Average Trading Volume': '31.36', '3 Month Average Trading Volume': '602.72', 'Market Capitalization': '724,644.30', 'Beta': '2.13', '1 Day Price Change': '2.55', '5 Day Price Return (Daily)': '-4.84', '13 Week Price Return (Daily)': '-35.93', '26 Week Price Return (Daily)': '-39.18', '52 Week Price Return (Daily)': '2.99', 'Month To Date Price Return (Daily)': '3.83', 'Year To Date Price Return (Daily)': '-33.84', 'Price Relative To S&P500 (4 Week)': '5.95', 'Price Relative To S&P500 (13 Week)': '-24.33', 'Price Relative To S&P500 (26 Week)': '-23.90', 'Price Relative To S&P500 (52 Week)': '16.99', 'Price Relative To S&P500 (YTD)': '-17.69'}

Here's one way of collecting the data you need:
from bs4 import BeautifulSoup as bs
import requests
import re
url = 'https://www.reuters.com/markets/companies/TSLA.OQ/key-metrics/price-and-volume'
page = requests.get(url)
soup = bs(page.text, 'html.parser')
# Locate the Table you wish to scrape
table = soup.select_one('table.table__table__2px_A')
# Locate the Keys and Value for each of the rows
keys = [i.text for i in table.select('tr th') if i]
values = [i.text for i in table.select('tr td') if i]
# Convert the two lists into a dictionary for a neater output
data = dict(zip(keys,values))
This will return:
{'% Change': '671.00',
'Brent Crude Oil': '-1.40%Negative',
'CBOT Soybeans': '1,626.00',
'Copper': '111.91',
'Future': '1,805.20',
'Gold': '-0.57%Negative',
'Last': '+0.35%Positive'}

Related

How to create an custom entry/exit signal in VectorBT?

thanks for reading!
I'm trying to backtest a strategy that I wrote in Pinescrpt, and I struggle to create my entry conditions.
So this is the code
import numpy as np
import pandas as pd
import vectorbt as vbt
from datetime import datetime
from binance.client import Client
symbols=['SOLUSDT', 'BNBUSDT']
price =
vbt.BinanceData.download(symbols,
start= '5 days ago UTC',
end= 'Now UTC',
interval='30m', missing_index='drop'
).get(['High', 'Low', 'Open', 'Close'])
high = price[0]
low = price[1]
open = price[2]
close = price[3]
stoch = vbt.STOCH.run(
high=high,
low=low,
close = close,
k_window = 14
)
And I want to add
entries = abs(stoch.percent_k['SOLUSDT'] -
stoch.percent_k['SOLUSDT']) > 50 # (mi intention with abs is to get the absolute value)
exits = abs(stoch.percent_k['SOLUSDT'] -
stoch.percent_k['SOLUSDT']) < 5
portfolio = vbt.Portfolio.from_signals(price[3], entries, exits, init_cash=10000)
I pretend to trigger a short order in a symbol and a long order in the second simultaneously with those signals.
And if anyone has a recommendation about where to find educational resources about this particular package (besides the official web) is welcome. I have read the examples in the doc, but it still fills a bit too complex for my level.

my result only get 1 data, can you help me for get more data and save another file?

import requests
from bs4 import BeautifulSoup
import pandas as pd
URL = 'http://h1.nobbd.de/index.php?start='
for page in range(1,10):
req = requests.get(URL + str(page) + '=')
soup = BeautifulSoup(req.text, 'html.parser')
h1 = soup.find_all('div',attrs={'class','report-wrapper'})
for hack in h1:
h2 = hack.find_all("div",attrs={"class","report"})
for i in h2:
layanan = i.find_all('b')[0].text.strip()
report = i.find_all('a')[2].text.strip()
bug_hunter = i.find_all('a')[1].text.strip()
mirror = i.find("a", {"class": "title"})['href']
date = i.find_all("div", {"class": "date"})
for d in date:
waktu = d.text
data = {"Company": [layanan], "Title:": [report], "Submit:": [bug_hunter], "Link:": [mirror], "Date:": [waktu]}
df = pd.DataFrame(data)
my result only get 1 data, can you help me for get more data and save another file?
df.head()
index
Company
Title:
Submit:
Link:
Date:
0
Reddit
Application level DOS at Login Page ( Accepts Long Password )
e100_speaks
https://hackerone.com/reports/1168804
03 Feb 2022
What happens?
Based on your questions code, you will overwrite your dataframe with every iteration, thats why you only get one result.
How to fix?
Create an empty list before your loops
Append all the extracted dicts to this list
Create your dataframe based on that list of dicts
Example
import requests
from bs4 import BeautifulSoup
import pandas as pd
data = []
url = 'http://h1.nobbd.de/index.php?start='
for page in range(1,3):
req = requests.get(url + str(page))
soup = BeautifulSoup(req.text, 'html.parser')
h1 = soup.find_all('div',attrs={'class','report-wrapper'})
for hack in h1:
h2 = hack.find_all("div",attrs={"class","report"})
for i in h2:
layanan = i.find_all('b')[0].text.strip()
report = i.find_all('a')[2].text.strip()
bug_hunter = i.find_all('a')[1].text.strip()
mirror = i.find("a", {"class": "title"})['href']
date = i.find_all("div", {"class": "date"})
for d in date:
waktu = d.text
data.append({'Company':[layanan], 'Title':[report], 'Submit':[hunter], 'link':[mirror], 'Date':[waktu]})
df = pd.DataFrame(data)

Warp10 and streamlit integration?

Two simple questions:
Does Warp10 integrate into streamlit to feed visualisations?
If so, please would you specify how this can be accomplished?
Thanking you in advance.
Best wishes,
There's no direct integration of Warp 10 in streamlit.
Although streamlit can handle any kind of data, it's mainly focused on pandas DataFrame. DataFrames are tables whereas Warp 10 Geo Time Series are time series. So even if Warp 10 was integrated in streamlit, it would require some code to properly format the data for streamlit to give its full potential.
That being said, here is a small example on how to display data stored in Warp 10 with streamlit:
import json
from datetime import datetime, timedelta
import requests
import streamlit as st
from bokeh.palettes import Category10_10 as palette
from bokeh.plotting import figure
# Should be put in a configuration file.
fetch_endpoint = 'http://localhost:8080/api/v0/fetch'
token = 'READ' # Change that to your actual token
def load_data_as_json(selector, start, end):
headers = {'X-Warp10-Token': token}
params = {'selector': selector, 'start': start, 'end': end, 'format': 'json'}
r = requests.get(fetch_endpoint, params=params, headers=headers)
return r.text
st.title('Warp 10 Test')
# Input parameters
selector = st.text_input('Selector', value="~streamlit.*{}")
start_date = st.date_input('Start date', value=datetime.now() - timedelta(days=10))
start_time = st.time_input('Start time')
end_date = st.date_input('End date')
end_time = st.time_input('End time')
# Convert datetime.dates and datetime.times to microseconds (default time unit in Warp 10)
start = int(datetime.combine(start_date, start_time).timestamp()) * 1000000
end = int(datetime.combine(end_date, end_time).timestamp()) * 1000000
# Make the query to Warp 10 and get back a json.
json_data = load_data_as_json(selector, start, end)
gtss = json.loads(json_data)
# Iterate through the json and populate a Bokeh graph.
p = figure(title='GTSs', x_axis_label='time', y_axis_label='value')
for gts_index, gts in enumerate(gtss):
tss = []
vals = []
for point in gts['v']:
tss.append(point[0])
vals.append(point[-1])
p.line(x=tss, y=vals, legend_label=gts['c'] + json.dumps(gts['l']), color=palette[gts_index % len(palette)])
st.bokeh_chart(p, use_container_width=True)
# Also display the json.
st.json(json_data)

Instaloader data scraping using specific hashtag and timeframe

I need help using instaloader to data scrape posts from Instagram that include #slowfashion from a specific timeframe.
I want to scrape the visual and textual data from the posts (specifically, the image/s posted, their descriptions, and comments).
from datetime import datetime
from itertools import dropwhile, takewhile
import instaloader
# Use parameters to save diffrent metadata
L = instaloader.Instaloader(download_pictures=True,download_videos=False,download_comments=False,save_metadata=True)
# Login
username = input("Enter your username: ")
L.interactive_login(username=username)
# User Query
search = input("Enter Hashtag: ")
limit = int(input("How many posts to download: "))
# Hashtag object
hashtags = instaloader.Hashtag.from_name(L.context, search).get_posts()
# Download Period
SINCE = datetime(2021, 5, 1)
UNTIL = datetime(2021, 3, 1)
no_of_downloads = 0
for post in takewhile(lambda p: p.date > UNTIL, dropwhile(lambda p: p.date > SINCE, hashtags)):
if no_of_downloads == limit:
break
print(post.date)
L.download_post(post, "#"+search)
no_of_downloads += 1

Get data-reactid with BeautifulSoup

I want the Code that gives me the change since yesterday of the stock. Right now that would be roughly +5 (+0.59%). What the code returns is : +110$
def tesla_deltaTracker():
url = "https://finance.yahoo.com/quote/TSLA/"
response = requests.get(url)
soup = BeautifulSoup(response.text, 'lxml')
price = soup.find('span', {"data-reactid":'33'}).text
return (price)
there's more than 1 tag with that data-reactid attribute. you want the 2nd one
price = soup.find_all('span', {"data-reactid":'33'})[1].text

Resources