unknown url type: 'URL' - this error i am getting. can any one help me for this - web-scraping

import csv
import re
from urllib.request import urlopen
with open('Input_textscrapping_CSV.csv', "rt") as f:
reader = csv.reader(f)
for line in reader:
#id = line[0]
url = line[1] # assuming your url is your first column
print(url)
#print(id,url)
#the following is added by me
page = urlopen(url)
html = page.read().decode("utf-8")
pattern = "<title.*?>.*?</title.*?>"
match_results = re.search(pattern, html, re.IGNORECASE)
title = match_results.group()
title = re.sub("<.*?>", "", title) # Remove HTML tags
print(title)
I tried the above code but it is giving error is unknown url type: 'URL'.

Related

The Output is None, None, None Python using sql lite

Hello Everyone i need help why my output result is none in the print('bla bla') line so from my output is None, None, None that actually insert from npm , nama , and jurusan but the output is none ,can anybody help me solve it thanks
import sqlite3
import tkinter
from tkinter import *
from tkinter import ttk
def Daftar():
window = Tk()
window.title("Welcome to TutorialsPoint")
window.geometry('400x400')
window.configure(background = "grey");
Lnpm = Label(window, text="Please Input Your npm: ").grid(row=0, column=0)
Lnama = Label(window,text="Please Input Your nama: ").grid(row=1, column=0)
Ljurusan = Label(window,text="Please Input Your jurusan: ").grid(row=2, column=0)
npm = Entry(window).grid(row = 0,column = 1)
nama = Entry(window).grid(row = 1,column = 1)
jurusan = Entry(window).grid(row = 2,column = 1)
def Clicked():
print("First Name: %s\nLast Name: %s\nLast Name: %s" % (npm, nama, jurusan))
connect = sqlite3.connect('Presensi.db')
cur = connect.cursor()
connect.execute("INSERT OR IGNORE INTO user(npm,nama,jurusan) values(?,?,?)", (str(npm),str(nama),str(jurusan)))
connect.execute("INSERT OR IGNORE INTO presensi(nama) values(?)", (str(nama),))
connect.commit()
cur.close()
btn = ttk.Button(window ,text="Register",command= Clicked()).grid(row=3,column=0)
window.mainloop()
You've got two big issues here:
the grid() function of the Entry object returns None and that's what npm, nama and jurusan are None. What you have to do is store the Entry object, not the value returned from grid().
you're not calling get() on the Entry objects to get their input values
What you can do is create a class in which you store the Entry objects. The callback function of the Button object can then be a method of the class.
I've reorganised your code to do this:
from tkinter import Tk, Label, Button, Entry
import sqlite3
class Daftar:
def __init__(self, master):
self.window = master
self.window.title("Welcome to TutorialsPoint")
self.window.geometry('400x400')
self.window.configure(background = "grey");
self.Lnpm = Label(self.window, text="Please Input Your npm: ").grid(row=0, column=0)
self.Lnama = Label(self.window,text="Please Input Your nama: ").grid(row=1, column=0)
self.Ljurusan = Label(self.window,text="Please Input Your jurusan: ").grid(row=2, column=0)
#Entry objects for later use
self.npm = Entry(self.window)
self.npm.grid(row = 0,column = 1)
self.nama = Entry(self.window)
self.nama.grid(row = 1,column = 1)
self.jurusan = Entry(self.window)
self.jurusan.grid(row = 2,column = 1)
self.btn = Button(self.window ,text="Register",command = self.Clicked).grid(row=3,column=0)
def Clicked(self):
#Get the entry values
npm = self.npm.get()
nama = self.nama.get()
jurusan = self.jurusan.get()
print("First Name: %s\nLast Name: %s\nLast Name: %s" % (npm, nama, jurusan))
connect = sqlite3.connect('Presensi.db')
cur = connect.cursor()
connect.execute("INSERT OR IGNORE INTO user(npm,nama,jurusan) values(?,?,?)", (npm,nama,jurusan))
connect.execute("INSERT OR IGNORE INTO presensi(nama) values(?)", (nama,))
connect.commit()
cur.close()
root = Tk()
my_gui = Daftar(root)
root.mainloop()
window.mainloop()

"Parser must be a string or character stream, not datetime" error in lambda aws function - can't figure out how to fix it

I am getting a very annoying error when trying to save/test this Lambda Boto3 function. There are other threads here on this issue, but i have spent about 2 hours trying to debug this and can't figure out what i'm doing wrong (it's probably something obvious). Any help would be appreciated!
{
"errorMessage": "Parser must be a string or character stream, not datetime",
"errorType": "TypeError",
"stackTrace": [
" File \"/var/task/lambda_function.py\", line 35, in lambda_handler\n a = dateutil.parser.parse(instance.launch_time)\n",
" File \"/var/runtime/dateutil/parser/_parser.py\", line 1358, in parse\n return DEFAULTPARSER.parse(timestr, **kwargs)\n",
" File \"/var/runtime/dateutil/parser/_parser.py\", line 646, in parse\n res, skipped_tokens = self._parse(timestr, **kwargs)\n",
" File \"/var/runtime/dateutil/parser/_parser.py\", line 722, in _parse\n l = _timelex.split(timestr) # Splits the timestr into tokens\n",
" File \"/var/runtime/dateutil/parser/_parser.py\", line 207, in split\n return list(cls(s))\n",
" File \"/var/runtime/dateutil/parser/_parser.py\", line 76, in __init__\n '{itype}'.format(itype=instream.__class__.__name__))\n"
]
}
import json
import boto3
import time
import datetime
import dateutil
from dateutil.parser import parse
def lambda_handler(event, context):
detailDict = event["detail"]
ec2 = boto3.resource('ec2')
instanceId = str(detailDict["instance-id"])
instance = ec2.Instance(instanceId)
instanceState = instance.state
a = dateutil.parser.parse(instance.launch_time)
b = current_time = datetime.datetime.now(launch_time.tzinfo)
# returns a timedelta object
c = a-b
print('Difference: ', c)
minutes = c.seconds / 60
print('Difference in minutes: ', minutes)
Message=str(instanceId)+" is "+str(instanceState["Name"])
return {
'statusCode': 200,
'body': Message
}
The launch-time property is already a datetime property. You do not need to parse it.
Reference:
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ec2.html

openpyxl - How to preserve xlsx custom properties

How do I preserve custom properties from xlsx template which I am modifying with openpyxl? When I save() workbook using openpyxl these custom properties vanish!
Custom properties can be found here:-
On Mac -> Go to File Menu in Excel -> Properties ... -> Custom tab ->
Properties section
I am posting a pure python solution to reading and writing Workbook.CustomDocumentProperties just because I am currently also feeling the pain of not having this in openpyxl, and I needed a quick workaround for a personal automation project.
In fact, I will try to implement this feature (and hopefully later Worksheet.CustomProperties) into openpyxl myself if I can get my head around how to do all the plumbing the library needs: https://foss.heptapod.net/openpyxl/openpyxl/-/issues/1003
Update: I pushed my contribution and it should be accepted and merged shortly :) https://foss.heptapod.net/openpyxl/openpyxl/-/merge_requests/384
So for now, here is a workaround, converting the .xlsx to .zip, then reading and writing the .xml files in the zip directly, and then renaming to .xlsx at the end.
To read Workbook.CustomDocumentProperties you can do this - only very slightly modified from this great answer: https://stackoverflow.com/a/46919795/9792594
from lxml import etree as ET
import zipfile
def get_custom_doc_properties(filename):
path_file = os.path.abspath(filename)
base, ext = os.path.splitext(path_file)
zip_filename = base + ".zip"
os.rename(path_file, zip_filename)
main_ns = "{http://schemas.openxmlformats.org/spreadsheetml/2006/main}"
docPr_ns = "{http://schemas.openxmlformats.org/officeDocument/2006/custom-properties}"
docPr_type = "{http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes}" #i4, r8, filetime, bool, lpwstr
r_ns = "{http://schemas.openxmlformats.org/officeDocument/2006/relationships}"
cusPr_type = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/customProperty"
with zipfile.ZipFile(zip_filename) as zip:
props = zip.open('docProps/custom.xml')
text = props.read()
xml = ET.fromstring(text)
workbook_props = {}
for child in XML:
if (child.tag == f"{docPr_ns}property"):
for cusPr in child:
workbook_props[child.attrib['name']] = cusPr.text
return workbook_props
#call like this:
get_custom_doc_properties(f'./example.xlsx')
And to add one prop to a document which already has custom doc props (and therefore already has a 'docProps/custom.xml' file), is pretty easy and we just append one more custom property to the xml.
(However, if the document had no current custom doc props, then we need to generate the 'docProps/custom.xml' file from scratch, as well as add a content override and a relationship - see code comments):
import os
from lxml import etree as ET
import zipfile
import shutil
import datetime
from tempfile import NamedTemporaryFile
def set_workbook_custom_document_properties(filename, cus_doc_prop_name, cus_doc_prop_val):
if not isinstance(cus_doc_prop_name, str):
print("you must supply a string as the 'cus_doc_prop_name'")
return
if isinstance(cus_doc_prop_val, str):
docPr_type_suffix = "lpwstr"
cus_doc_prop_str = cus_doc_prop_val
elif isinstance(cus_doc_prop_val, int):
docPr_type_suffix = "i4"
cus_doc_prop_str = str(cus_doc_prop_val)
elif isinstance(cus_doc_prop_val, float):
docPr_type_suffix = "r8"
cus_doc_prop_str = str(cus_doc_prop_val)
elif isinstance(cus_doc_prop_val, bool):
docPr_type_suffix = "bool"
cus_doc_prop_str = str(cus_doc_prop_val)
elif isinstance(cus_doc_prop_val, datetime.datetime):
docPr_type_suffix = "filetime"
cus_doc_prop_str = cus_doc_prop_val.strftime("%Y-%m-%dT%H:%M:%SZ")
else:
print("you must supply a string, int, float, bool, or date, as the 'cus_doc_prop_val'")
return
path_file = os.path.abspath(filename)
base, ext = os.path.splitext(path_file)
zip_filename = base + ".zip"
os.rename(path_file, zip_filename)
main = "http://schemas.openxmlformats.org/spreadsheetml/2006/main"
main_ns = "{%s}" % main
docPr = "http://schemas.openxmlformats.org/officeDocument/2006/custom-properties"
docPr_ns = "{%s}" % docPr
docPr_type = "http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes"
docPr_type_ns = "{%s}" % docPr_type #i4, r8, filetime, bool, lpwstr
docPr_rel_type = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties"
docPr_content_type = "application/vnd.openxmlformats-officedocument.custom-properties+xml"
r_ns = "{http://schemas.openxmlformats.org/officeDocument/2006/relationships}"
cusPr_type = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/customProperty"
xml_declaration = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
base_xml = '{dec}<Properties xmlns="{docPr}" xmlns:vt="{docPr_type}"></Properties>'.format(dec=xml_declaration, docPr=docPr, docPr_type=docPr_type).encode('utf-8')
with NamedTemporaryFile() as tmp_file:
tmpname = os.path.basename(tmp_file.name)
with zipfile.ZipFile(zip_filename, 'r') as zip_in:
with zipfile.ZipFile(tmpname, 'w') as zip_out:
zip_out.comment = zip_in.comment # preserve the comment
custom_present = 'docProps/custom.xml' in zip_in.namelist()
for item in zip_in.infolist():
if item.filename == 'docProps/custom.xml':
custom_xml = ET.fromstring(zip_in.read(item.filename))
elif custom_present == False and item.filename == '_rels/.rels':
rels_xml = ET.fromstring(zip_in.read(item.filename))
elif custom_present == False and item.filename == '[Content_Types].xml':
content_types_xml = ET.fromstring(zip_in.read(item.filename))
else:
zip_out.writestr(item, zip_in.read(item.filename))
if custom_present:
# if custom.xml is already present we just need to append:
max_pid = 1
for node in custom_xml:
max_pid = max(int(node.attrib['pid']), max_pid)
else:
# if custom.xml is not present, we need to create it
# and also to add an override to [Content_Types].xml
# and also to add a relationship to _rels/.rels
custom_xml = ET.parse(BytesIO(base_xml)).getroot()
max_pid = 1
child_override = ET.SubElement(content_types_xml, "Override")
child_override.attrib['ContentType'] = docPr_content_type
child_override.attrib['PartName'] = '/docProps/custom.xml'
zip_out.writestr('[Content_Types].xml', ET.tostring(content_types_xml))
max_rid = 0
for node in rels_xml:
max_rid = max(int(node.attrib['Id'].replace("rId", "")), max_rid)
child_rel = ET.SubElement(rels_xml, "Relationship")
child_rel.attrib['Type'] = docPr_rel_type
child_rel.attrib['Target'] = 'docProps/custom.xml'
child_rel.attrib['Id'] = "rID" + str(max_rid + 1)
zip_out.writestr('_rels/.rels', ET.tostring(rels_xml))
child = ET.SubElement(custom_xml, "property")
child.attrib['name'] = cus_doc_prop_name
child.attrib['pid'] = str(max_pid + 1)
child.attrib['fmtid'] = "{D5CDD505-2E9C-101B-9397-08002B2CF9AE}"
val = ET.SubElement(child, f"{docPr_type_ns}{docPr_type_suffix}")
val.text = cus_doc_prop_str
print(ET.tostring(custom_xml, pretty_print=True))
zip_out.writestr('docProps/custom.xml', ET.tostring(custom_xml))
zip_out.close()
zip_in.close()
shutil.copyfile(tmpname, zip_filename)
os.rename(zip_filename, path_file)
#call it like this:
set_workbook_custom_document_properties(f'./example.xlsx', "testDocProp7", 2.5)

Stop the error when trying to manipulate the items of a Listbox, but no item is selected?

I am a beginner in tkinter. I am making a list of names. You can delete, select and edit it, but if I don't select anything in the list and click these buttons, it says:
Exception in Tkinter callback Traceback (most recent call last): File
"C:\Python34\lib\tkinter\__init__.py", line 1533, in __call__ return
self.func(*args) File "C:\Users\user\Desktop\HOW_TOUGH - NEW\Change_user.py",
line 60, in Edit (idx, ) = d ValueError: need more than 0 values to unpack'''
I am planning to disable the buttons if the user doesn't click anything but I am not expert enough. Here's my code (it's a child window)
from tkinter import *
from tkinter import ttk
from tkinter import messagebox
class Nick:
def __init__(self, master ):
self.master = master
self.window = Toplevel(master)
self.window.title('Change User')
self.window.geometry('300x300')
self.window.minsize(300, 300)
self.window.maxsize(300, 300)
self.nickname = StringVar()
self.lb = Listbox(self.window, selectmode = 'SINGLE')
f= open('users.txt','r')
rec = f.readlines()
f.close()
for i in rec:
p = i.find('|')
nickname = i[:p]
self.lb.insert(END, nickname)
self.lb.pack()
self.Ed = ttk.Button(self.window, text = 'Edit', command = self.Edit).pack()
self.Del = ttk.Button(self.window, text = 'Delete', command = self.Delete).pack()
self.Bac = ttk.Button(self.window, text = 'Back', command = self.Back).pack()
self.Okay = ttk.Button(self.window, text = 'Ok', command = self.Ok).pack()
def Back(self):
self.window.destroy()
def Delete(self):
d = self.lb.curselection()
(idx, ) = d
self.lb.delete(idx)
f = open('users.txt','r')
r = f.readlines()
f.close()
rec = r[idx]
r.remove(rec)
f = open('users.txt','w')
new = ''.join(r)
r = f.write(new)
f.close()
messagebox.showinfo(title='Success', message = 'Delete successful')
def Edit(self):
d = self.lb.curselection()
(idx, ) = d
import Edit as Edet
Edet.Edit(self.master, idx)
def Ok(self):
d = self.lb.curselection()
(idx, ) = d
get = self.lb.get(idx)
self.window.destroy()
print (get)
print (d)
The method curselection() returns an empty tuple when nothing is selected. You can skip those methods just by adding a
if not d:
return
If you want to gray out your buttons, you can do this:
button["state"] = DISABLED
Note that this won't work currently with your code as you did this:
self.button = ttk.Button(...).pack()
The problem lies in the call of pack() which returns None, effectively binding self.button to None. Just assign the button object to the variable first and then pack it. Furthermore, it's not recommended to import * from Tkinter because you're dropping ~190 names in your namespace. Just use
import tkinter as tk

URL request, python to R translation please

I'm trying to request some data via the mt gox API (mtgox.com) and theres some example code in python that I'd like to basically copy into R.
import hmac, base64, hashlib, urllib2
base = 'https://data.mtgox.com/api/2/'
def makereq(key, secret, path, data):
hash_data = path + chr(0) + data
secret = base64.b64decode(secret)
sha512 = hashlib.sha512
hmac = str(hmac.new(secret, hash_data, sha512))
header = {
'User-Agent': 'My-First-Trade-Bot',
'Rest-Key': key,
'Rest-Sign': base64.b64encode(hmac),
'Accept-encoding': 'GZIP',
}
return urllib2.Request(base + path, data, header)
I have some R code already
install.packages("base64")
install.packages("caTools")
install.packages("digest")
install.packages("RCurl")
library(RCurl)
library(caTools)
library(base64)
base<- "https://data.mtgox.com/api/2"
path<- "BTCUSD/money/ticker"
APIkey<-"******" #this is private but its a long hex number
secretAPIkey<-"*****" #this too, but this is in base64
makeReq<-function(key, secret, path, post_data)
{
browser()
message <- paste(path, NULL, post_data)
secret<-base64decode(secret,"character")
theHmac <-hmac(secret,message,"sha512")
header <-
{
c(
User.Agent = "My Bot",
Rest.Key = key,
Rest.Sign = base64encode(theHmac),
Acccept.encoding = "GZIP"
)
}
return (getURL(paste(base,path), post_data, header) )
}
I don't know how to get the "header" thing to work though, and I might be using getURL() incorrectly.
If you want to see the whole problem, the instructions are here https://bitbucket.org/nitrous/mtgox-api/overview, scroll down to the first block of code.
but I'm probably just making some elementary mistake with R header syntax...
try to use postForm (from RCurl) instead of getURL:
postForm(paste(base,path),
.opts = list(postfields = post_data,
useragent = 'R',
httpheader = c('Rest-Key' = key,
'Rest-Sign' = base64encode(theHmac)),
timeout = 4,
ssl.verifypeer = FALSE)
)

Resources