How do I make it so that, these blank spaces are filled up with the next image, instead of opening up another page - docx

How do I make it so that, these blank spaces are filled up with the next image, instead of opening up another page, is there some sort of if statement for docx, which goes like
this is what I mean, visually..
if (space)
-> insert image there
My code:
import docx
from PIL import Image
import os, os.path
from docx.enum.section import WD_ORIENT
from docx.enum.section import WD_SECTION
mydoc = docx.Document()
imgs = []
path1 = "J:/BBIS-G8/Biology/"
valid_images = [".jpg",".gif",".png",".tga"]
for f in os.listdir(path1):
a = 0
ext = os.path.splitext(f)[1]
if ext.lower() not in valid_images:
continue
a += 1
file1 = os.path.join(path1,f)
# print(file1)
imgs.append(Image.open(os.path.join(path1,f)))
current_section = mydoc.sections[0]
new_width, new_height = current_section.page_height, current_section.page_width
new_section = mydoc.add_section(WD_SECTION.NEW_PAGE)
new_section.orientation = WD_ORIENT.LANDSCAPE
new_section.page_width = new_width
new_section.page_height = new_height
mydoc.add_picture(file1, width=docx.shared.Inches(4), height=docx.shared.Inches(5))
mydoc.save("J:/biology.docx")

Related

Everything works correct here. How we can check "title" is in dictionary , if I did not added or appended nothing but it adds itself

import csv
titles = {}
with open("sample4.csv", "r") as file:
reader = csv.DictReader(file)
for row in reader:
title = row["GameNumber"].strip().upper()
if title in titles:
titles[title] += 1
else:
titles[title] = 1
for title in titles:
print(title, titles[title])
Do you wish to replace you if/else block with something shorter?
You can use:
tiltes[title] = titles.get(title, 0) + 1

Wrong page parsed BeautifulSoup?

I want to enter two values on this website https://hausratversicherung.friday.de/ and retrieve the value after submitting it. I wrote the following code
import requests, re
from robobrowser import RoboBrowser
br = RoboBrowser(parser='html.parser')
br.open("https://hausratversicherung.friday.de/")
form = br.get_form()
form['area'] = 100
form['postalCode'] = 44326
br.submit_form(form)
src = str(br.parsed())
start = '<div class="Typography-sc-3c3fuf-0 jEIicc" data-testid="totalPrice">'
end = ' €</div>'
result = re,search('%s(.*)%s' % (start, end),src).group(1)
print(result)
But the browser br is not opening the mentioned page and taking these values.
The postal code 44326 isn't accepted by the server. For other postal codes you can query their API directly:
import json
import requests
area = 100
postalcode = 44309
url = 'https://fdy2-policycenter-production.k8s.blue.friday-prod.de/rest/friday/hc/price?area={area}&postalCode={postalcode}'
data = requests.get(url.format(area=area, postalcode=postalcode)).json()
# uncomment this to print all data:
# print(json.dumps(data, indent=4))
# print some info to screen:
print(data['basicCoverages']['coverages'][0]['insuredSum']['amount'])
print(data['basicCoverages']['coverages'][0]['price']['amount'])
Prints:
65000.0
7.81

How to import data from a HTML table on a website to excel?

I would like to do some statistical analysis with Python on the live casino game called Crazy Time from Evolution Gaming. There is a website that has the data to do this: https://tracksino.com/crazytime. I want the data of the lowest table 'Spin History' to be imported into excel. However, I do not now how this can be done. Could anyone give me an idea where to start?
Thanks in advance!
Try the below code:
import json
import requests
from urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
import csv
import datetime
def scrap_history():
csv_headers = []
file_path = '' #mention your system where you have to save the file
file_name = 'spin_history.csv' # filename
page_number = 1
while True:
#Dynamic URL fetching data in chunks of 100
url = 'https://api.tracksino.com/crazytime_history?filter=&sort_by=&sort_desc=false&page_num=' + str(page_number) + '&per_page=100&period=24hours'
print('-' * 100)
print('URL created : ',url)
response = requests.get(url,verify=False)
result = json.loads(response.text) # loading data to convert in JSON.
history_data = result['data']
print(history_data)
if history_data != []:
with open(file_path + file_name ,'a+') as history:
#Headers for file
csv_headers = ['Occured At','Slot Result','Spin Result','Total Winners','Total Payout',]
csvwriter = csv.DictWriter(history, delimiter=',', lineterminator='\n',fieldnames=csv_headers)
if page_number == 1:
print('Writing CSV header now...')
csvwriter.writeheader()
#write exracted data in to csv file one by one
for item in history_data:
value = datetime.datetime.fromtimestamp(item['when'])
occured_at = f'{value:%d-%B-%Y # %H:%M:%S}'
csvwriter.writerow({'Occured At':occured_at,
'Slot Result': item['slot_result'],
'Spin Result': item['result'],
'Total Winners': item['total_winners'],
'Total Payout': item['total_payout'],
})
print('-' * 100)
page_number +=1
print(page_number)
print('-' * 100)
else:
break
Explanation:
I have implemented the above script using python requests way. The API url https://api.tracksino.com/crazytime_history?filter=&sort_by=&sort_desc=false&page_num=1&per_page=50&period=24hours extarcted from the web site itself(refer screenshot). In the very first step script will take the dynamic URL where page number is dynamic and changed upon on every iteration. For ex:- first it will be page_num = 1 then page_num = 2 and so on till all the data will get extracted.

Replacing figure and table in layout when using global ColumnDataSource

I am using bokeh 0.12.9. I have a table and a figure which I replace in the global layout on callback. I usually build the ColumnDataSource right before I build the new figure/table. Now I wanted to try and see if I can have a global ColumnDataSource so that I can adjust the data via a CDSView (no need to replace table/figure then).
Unfortunately even keeping a separate CDS and view for table and plot fails. When clicking the radio button a couple of times I receive the following javascript error:
Uncaught TypeError: Cannot read property 'data' of undefined
from datetime import date
from random import randint
from bokeh.models import Line
import numpy as np
import pandas as pd
from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource
from bokeh.models.widgets import DataTable, DateFormatter, TableColumn
import bokeh.layouts as layouts
import bokeh.models.widgets as widgets
from bokeh.io import curdoc
from bokeh.models import CustomJS, Slider
from bokeh import palettes
from bokeh.layouts import layout
from bokeh.models import ColumnDataSource, CDSView, IndexFilter
from bokeh.models import widgets
def gen_plot(source=None, view=None):
p = figure(title='test',
x_axis_type="datetime",
plot_width=600, plot_height=400)
colors = palettes.Category10[10]
cols = [str(col) for col in source.column_names]
for ix, col in enumerate(cols):
if col == 'index':
continue
r = p.line(x='index', y=col, source=source, view=view,
legend='_' + col,
color=colors[ix])
p.legend.location = "bottom_left"
return p
def gen_table(source=None, view=None):
columns = [TableColumn(field=ele, title=ele) for ele
in source.column_names]
tab = widgets.DataTable(source=source, view=view, columns=columns,
selectable=False,
reorderable=False,
width=600, height=400)
return tab
def update(attr, old, new):
p = gen_plot(source=cdss[0], view=vs[0])
t = gen_table(source=cdss[1], view=vs[1])
print l.children
l.children[1] = p
l.children[2].children[0] = t
# set up data
cols = ['col1', 'col2', 'col3', 'col4']
df1 = pd.DataFrame(pd.util.testing.getTimeSeriesData())
df1.columns = cols
df2 = pd.DataFrame(pd.util.testing.getTimeSeriesData())
df2.columns = cols
dfs = [df1, df2]
cds1 = ColumnDataSource(df1)
cds2 = ColumnDataSource(df2)
cdss = [cds1, cds2]
filters = [IndexFilter([0, 1, 2, 4])]
filters = []
v1 = CDSView(source=cds1, filters=filters)
v2 = CDSView(source=cds2, filters=filters)
vs = [v1, v2]
# initialize items to replace
p = gen_plot(source=cdss[0], view=vs[0])
t = gen_table(source=cdss[1], view=vs[1])
# initialize controls
radio_wghting = widgets.RadioButtonGroup(labels=["Equal", "Exponential"],
active=0)
radio_wghting.on_change('active', update)
# set up layout
sizing_mode = 'fixed'
l = layout([radio_wghting, p, t], sizing_mode=sizing_mode)
curdoc().add_root(l)
curdoc().title = 'blub'
# call callback initially
update('value', 0, 0)
Any hints are much appreciated!
Now I wanted to try and see if I can have a global ColumnDataSource so
that I can adjust the data via a CDSView (no need to replace
table/figure then).
The code you are showing is the one in which you are trying to replace the figure and table.
When you replace the child of a layout object in that way, you are not actually removing the previous figures from curdoc, and other elements in the document still have the old figures and tables in their references.
You could try something like that to update the sources directly.
for rend in p.renderers:
try:
rend.data_source
except AttributeError:
pass
else:
rend.data_source.data.update(new_data_dictionary)
and
t.source.data.update(new_data_dictionary)
EDIT to answer the comment
from bokeh.io import curdoc
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, Button
from bokeh.layouts import gridplot, widgetbox
from random import random, choice
import numpy as np
my_data = {1:{'x':[],'y':[],'colo':[],'size':[]}}
kelly_colors = [ '#F3C300','#875692', '#F38400', '#A1CAF1','#BE0032', '#C2B280', '#848482','#008856', '#E68FAC', '#0067A5',
'#F99379', '#604E97', '#F6A600','#B3446C', '#DCD300', '#882D17','#8DB600', '#654522', '#E25822','#2B3D26', ]
x = np.arange(0,50,0.1)
def rand_dict():
rand_x = [choice(x) for i in range(7)]
return {'x':rand_x,'y':np.array([random()*100 for i in rand_x]),'colo':np.array([choice(kelly_colors) for i in rand_x]),'size':np.array([(5+int(random()*50)) for i in rand_x])}
def add_stuff():
global my_data
my_data[max(my_data.keys())+1] = rand_dict()
make_doc()
def change_stuff():
global my_data
myfig = curdoc().select_one({"name":"myfig"})
for i,rend in enumerate(myfig.renderers):
try:
rend.data_source
except AttributeError:
pass
else:
my_data[i+1] = rand_dict()
rend.data_source.data.update(my_data[i+1])
def clear_stuff():
global my_data
my_data = {1:{'x':[],'y':[],'colo':[],'size':[]}}
make_doc()
def make_doc():
curdoc().clear()
myfig = figure(plot_width=1000,plot_height=800,outline_line_alpha=0,name='myfig')
myfig.x_range.start = -5
myfig.x_range.end = 55
myfig.y_range.start = -10
myfig.y_range.end = 110
myfig.renderers = []
add_button = Button(label='add stuff',width=100)
change_button = Button(label='change stuff',width=100)
clear_button = Button(label='clear stuff',width=100)
add_button.on_click(add_stuff)
change_button.on_click(change_stuff)
clear_button.on_click(clear_stuff)
grid = gridplot([[myfig,widgetbox(add_button,change_button,clear_button)]],toolbar_location=None)
curdoc().add_root(grid)
update_doc()
def update_doc():
myfig = curdoc().select_one({"name":"myfig"})
for key in my_data:
myfig.scatter(x='x',y='y',color='colo',size='size',source=ColumnDataSource(data=my_data[key]))
curdoc().title = 'mytitle'
make_doc()
what I like about doing this is that you can just save the my_data dictionary with numpy, load it later and keep changing your plots from there.
def load_data():
global my_data
my_data = np.load(path_to_saved_data).item()
make_doc()
You can probably do something similar using pandas dataframes, I am just more comfortable with plain dictionaries.

Unable to get data into correct format after web scraping in python

i have written a code that scrapes the websites: https://www.newegg.com/Product/ProductList.aspx?Submit=ENE&N=-1&IsNodeId=1&Description=GTX&bop=And&Page=
{}&PageSize=36&order=BESTMATCH".format(page)
but when i run this code, data is not formtted, like product name is coming in ever cell and so on price and image.
from urllib.request import urlopen
from bs4 import BeautifulSoup
f = open("Scrapedetails.csv", "w")
Headers = "Item_Name, Price, Image\n"
f.write(Headers)
for page in range(1,15):
page_url = "https://www.newegg.com/Product/ProductList.aspx?
Submit=ENE&N=-1&IsNodeId=1&Description=GTX&bop=And&Page=
{}&PageSize=36&order=BESTMATCH".format(page)
html = urlopen(page_url)
bs0bj = BeautifulSoup(html, "html.parser")
page_details = bs0bj.find_all("div", {"class":"item-container"})
for i in page_details:
Item_Name = i.find("a", {"class":"item-title"})
Price = i.find("li", {"class":"price-current"})
Image = i.find("img")
Name_item = Item_Name.get_text()
Prin = Price.get_text()
imgf = Image["src"]# to get the key src
f.write("{}".format(Name_item).strip()+ ",{}".format(Prin).strip()+
",{}".format(imgf)+ "\n")
f.close()
can someone help me to ammend codes so that i can get name in name column, price in price column and image in image column.
what are the new ways to save data in csv,can someone help me in it with codes too?
Alright i got it solved.
from urllib.request import urlopen
from bs4 import BeautifulSoup
f = open("Scrapedetails.csv", "w")
Headers = "Item_Name, Price, Image\n"
f.write(Headers)
for page in range(1,15):
page_url = "https://www.newegg.com/Product/ProductList.aspx?
Submit=ENE&N=-1&IsNodeId=1&Description=GTX&bop=And&Page=
{}&PageSize=36&order=BESTMATCH".format(page)
html = urlopen(page_url)
bs0bj = BeautifulSoup(html, "html.parser")
page_details = bs0bj.find_all("div", {"class":"item-container"})
for i in page_details:
Item_Name = i.find("a", {"class":"item-title"})
Price = i.find("li", {"class":"price-current"}).find('strong')
Image = i.find("img")
Name_item = Item_Name.get_text().strip()
prin = Price.get_text()
imgf = Image["src"]# to get the key src
print(Name_item)
print(prin)
print('https:{}'.format(imgf))
f.write("{}".format(Name_item).replace(",", "|")+ ",{}".format(prin)+ ",https:{}".format(imgf)+ "\n")
f.close()
These are the codes for anyone who wishes to start with webscraping a simplest way

Resources