i really need help to finish this task since it's related to my research and I’m new to python and scrapy .
*the task is to select all input field (type=text or password or file ) and store it's (id) in back-end DB , besides the page link where this input belongs *
my code to select input fields
def parse_item(self, response):
self.log('%s' % response.url)
hxs = HtmlXPathSelector(response)
item=IsaItem()
item['response_fld']=response.url
item['text_input']=hxs.select("//input[(#id or #name) and (#type = 'text' )]/#id ").extract()
item['pass_input']=hxs.select("//input[(#id or #name) and (#type = 'password')]/#id").extract()
item['file_input']=hxs.select("//input[(#id or #name) and (#type = 'file')]/#id").extract()
return item
Database pipeline code :
class SQLiteStorePipeline(object):
def __init__(self):
self.conn = sqlite3.connect('./project.db')
self.cur = self.conn.cursor()
def process_item(self, item, spider):
self.cur.execute("insert into inputs ( input_name) values(?)" , (item['text_input'][0] ), )
self.cur.execute("insert into inputs ( input_name) values(?)" , (item['pass_input'][0] ,))
self.cur.execute("insert into inputs ( input_name) values(?)" ,(item['file_input'][0] , ))
self.cur.execute("insert into links (link) values(?)", (item['response_fld'][0], ))
self.conn.commit()
return item
but i still get error like this
self.cur.execute("insert into inputs ( input_name) values(?)" , (item['text_input'][0] ), )
exceptions.IndexError: list index out of range
or database store only first letter !!
Database links table
╔════════════════╗
║ links ║
╠════════════════╣
║ id │input ║
╟──────┼─────────╢
║ 1 │ t ║
╟──────┼─────────╢
║ 2 │ t ║
╚══════╧═════════╝
Note it should "tbPassword" or "tbUsername"
ouput fron json file
{"pass_input": ["tbPassword"], "file_input": [], "response_fld": "http://testaspnet.vulnweb.com/Signup.aspx", "text_input": ["tbUsername"]}
{"pass_input": [], "file_input": [], "response_fld": "http://testaspnet.vulnweb.com/default.aspx", "text_input": []}
{"pass_input": ["tbPassword"], "file_input": [], "response_fld": "http://testaspnet.vulnweb.com/login.aspx", "text_input": ["tbUsername"]}
{"pass_input": [], "file_input": [], "response_fld": "http://testaspnet.vulnweb.com/Comments.aspx?id=0", "text_input": []}
You are getting IndexError because you try to get the first item in the list, which sometimes is empty.
I would do it like this.
The spider:
def parse_item(self, response):
self.log('%s' % response.url)
hxs = HtmlXPathSelector(response)
item = IsaItem()
item['response_fld'] = response.url
res = hxs.select("//input[(#id or #name) and (#type = 'text' )]/#id ").extract()
item['text_input'] = res[0] if res else None # None is default value in case no field found
res = hxs.select("//input[(#id or #name) and (#type = 'password')]/#id").extract()
item['pass_input'] = res[0] if res else None # None is default value in case no field found
res = hxs.select("//input[(#id or #name) and (#type = 'file')]/#id").extract()
item['file_input'] = res[0] if res else None # None is default value in case no field found
return item
The pipeline:
class SQLiteStorePipeline(object):
def __init__(self):
self.conn = sqlite3.connect('./project.db')
self.cur = self.conn.cursor()
def process_item(self, item, spider):
self.cur.execute("insert into inputs ( input_name) values(?)", (item['text_input'],))
self.cur.execute("insert into inputs ( input_name) values(?)", (item['pass_input'],))
self.cur.execute("insert into inputs ( input_name) values(?)", (item['file_input'],))
self.cur.execute("insert into links (link) values(?)", (item['response_fld'],))
self.conn.commit()
return item
I don't really know anything about this technology, but here's my guess:
Try
insert into inputs ( input_name) values(?)" , (item['text_input'] )
instead of
insert into inputs ( input_name) values(?)" , (item['text_input'][0] ).
As for the 'list index out of range' error, it seems like your item is empty, something you should check for.
Related
I am trying to implement a QAbstractProxyModel that maps an SqlTableModel to a tree-like data structure. The table has a column called parent_id, whose value is added to the createIndex call as a third argument. The question is similar to this user's post, only that I am working in Python not in C++.
The TreeView loads correctly:
But when I try to expand an item, the application crashes. Debugging tells me that there seems to be an infinite loop of index, rowCount and mapToSource being called.
I am at the end of my wits. Do you have any ideas? See the MWE below.
from __future__ import annotations
from PySide6.QtWidgets import QGridLayout
from PySide6.QtWidgets import QTreeView
from PySide6.QtWidgets import QApplication
from PySide6.QtWidgets import QMainWindow
from PySide6.QtWidgets import QWidget
from PySide6.QtCore import QModelIndex
from PySide6.QtCore import Qt
from PySide6.QtCore import Slot
from PySide6.QtCore import QAbstractProxyModel
from PySide6.QtSql import QSqlDatabase
from PySide6.QtSql import QSqlQuery
from PySide6.QtSql import QSqlTableModel
class CustomTreeModel(QAbstractProxyModel):
def __init__(self, database: str, parent: QWidget = None):
QAbstractProxyModel.__init__(self, parent)
sourceModel = QSqlTableModel(parent, database)
sourceModel.setTable('test')
sourceModel.select()
self.setSourceModel(sourceModel)
def flags(self, proxyIndex: QModelIndex) -> Qt.ItemFlags:
return Qt.ItemIsEnabled | Qt.ItemIsEditable
def data(self, proxyIndex: QModelIndex, role: int):
print("data")
if proxyIndex.isValid:
sourceIndex = self.mapToSource(proxyIndex)
return sourceIndex.data(role)
return None
def index(
self,
row: int,
column: int,
parentIndex: QModelIndex
) -> QModelIndex:
print("index")
if row < 0 and column < 0:
return QModelIndex()
parentId = parentIndex.internalPointer()
return self.createIndex(row, column, parentId)
def mapFromSource(self, sourceIndex: QModelIndex) -> QModelIndex:
print("mapFromSource")
if self.isRootItem(sourceIndex):
return QModelIndex()
if sourceIndex.column() == 0:
sourceId = sourceIndex.data()
else:
sourceId = sourceIndex.siblingAtColumn(0).data()
parentId = self.getParentId(sourceId)
childIds = self.getChildIds(parentId)
row = childIds.index(sourceId)
column = sourceIndex.column()
proxyIndex = self.createIndex(row, column, parentId)
return proxyIndex
def mapToSource(self, proxyIndex: QModelIndex) -> QModelIndex:
print("mapToSource")
if self.isRootItem(proxyIndex):
return QModelIndex()
parentId = proxyIndex.internalPointer()
childIds = self.getChildIds(parentId)
rowId = childIds[proxyIndex.row()]
rowIds = self.getAllIds()
sourceRow = rowIds.index(rowId)
sourceColumn = proxyIndex.column()
sourceIndex = self.sourceModel().index(sourceRow, sourceColumn)
return sourceIndex
def rowCount(self, parentIndex: QModelIndex) -> int:
print("rowCount")
if parentIndex.column() > 0:
return 0
parentId = parentIndex.internalPointer()
childIds = self.getChildIds(parentId)
return len(childIds)
def columnCount(self, parentIndex: QModelIndex) -> int:
print("columnCount")
if parentIndex.column() > 0:
return 0
numColumns = self.sourceModel().columnCount(parentIndex)
return numColumns
def parent(self, childIndex: QModelIndex) -> QModelIndex:
print("parent")
if childIndex.column() > 0:
return QModelIndex()
sourceIndex = self.mapToSource(childIndex)
childId = sourceIndex.siblingAtColumn(0).data()
parentId = self.getParentId(childId)
if not parentId:
return QModelIndex()
parentParentId = self.getParentId(parentId)
parentIds = self.getChildIds(parentParentId)
parentRow = parentIds.index(parentId)
parentIndex = self.createIndex(parentRow, 0, parentId)
return parentIndex
def getParentId(self, childId: str) -> str | None:
table = self.sourceModel().tableName()
query = QSqlQuery()
query.prepare(f"""
SELECT parent_id
FROM {table}
WHERE id=?
""")
query.addBindValue(childId)
query.exec_()
if query.first():
parentId = query.value(0)
return parentId if parentId else None
return None
def hasChildren(self, parentIndex: QModelIndex) -> bool:
if parentIndex.column() > 0:
return False
parentId = parentIndex.internalPointer()
childIds = self.getChildIds(parentId)
return len(childIds) > 0
def getAllIds(self) -> list[str]:
table = self.sourceModel().tableName()
query = QSqlQuery()
query.prepare(f"""
SELECT id
FROM {table}
""")
query.exec_()
ids = []
while query.next():
ids.append(query.value(0))
return ids
def getChildIds(self, parentId: str | None) -> list[str]:
table = self.sourceModel().tableName()
query = QSqlQuery()
if not parentId or parentId == '':
query.prepare(f"""
SELECT id
FROM {table}
WHERE parent_id IS NULL OR parent_id=''
""")
else:
query.prepare(f"""
SELECT id
FROM {table}
WHERE parent_id=?""")
query.addBindValue(parentId)
query.exec_()
childIds = []
while query.next():
childIds.append(query.value(0))
return childIds
def isRootItem(self, index: QModelIndex):
return index.row() == -1 and index.column() == -1
class CustomTreeWidget(QWidget):
def __init__(self, parent: QWidget = None):
QWidget.__init__(self, parent)
self.model: CustomTreeModel
self.view = QTreeView(self)
layout = QGridLayout(self)
layout.addWidget(self.view)
self.setLayout(layout)
#Slot()
def setDatabase(self):
database = QSqlDatabase.database()
model = CustomTreeModel(database, self)
self.view.setModel(model)
self.model = model
def initTestDatabase():
query = QSqlQuery()
query.prepare("""
CREATE TABLE test (
"id" TEXT,
"text" TEXT,
"parent_id" TEXT,
PRIMARY KEY("id")
);
""")
query.exec_()
query = QSqlQuery()
query.prepare("""
INSERT INTO test (
id, text, parent_id)
VALUES
(?, ?, ?),
(?, ?, ?),
(?, ?, ?),
(?, ?, ?);
""")
query.addBindValue("ID101")
query.addBindValue("Text")
query.addBindValue(None)
query.addBindValue("ID102")
query.addBindValue("Text")
query.addBindValue("ID101")
query.addBindValue("ID103")
query.addBindValue("Text")
query.addBindValue("ID101")
query.addBindValue("ID104")
query.addBindValue("Text")
query.addBindValue(None)
query.exec_()
if __name__ == "__main__":
projectDb = QSqlDatabase.addDatabase("QSQLITE")
projectDb.setDatabaseName(":memory:")
projectDb.open()
initTestDatabase()
app = QApplication()
mainWindow = QMainWindow()
widget = CustomTreeWidget(mainWindow)
widget.setDatabase()
mainWindow.setCentralWidget(widget)
mainWindow.showMaximized()
app.exec_()
I have the following coding:-
Progam 1
class Person :
'''A base to define Person properties.'''
def __inti__( self , name ):
self.name = name
def speak( self , msg = '(Calling The Base Class)' ):
print( self.name , msg )
Program 2
from Person import *
'''A derived class to define Man properties.'''
class Man( Person ):
def speak( self , msg ):
print( self.name , ':\n\tHello!' , msg )
Program 3
from Person import *
'''A derived class to define Hombre properties.'''
class Hombre( Person ):
def speak( self , msg ):
print( self.name , ':\n\tHola!' , msg )
Program 4
from Man import *
from Hombre import *
guy__1 = Man('Richard')
guy__2 = Hombre('Ricardo')
guy__1.speak( 'It\'s a beautiful evening.\n' )
guy__2.speak( 'Es una tarde hermosa.\n' )
Person.speak( guy__1 )
Person.speak( guy__2 )
I get the following error;-
guy__1 = Man('Richard')
TypeError: object() takes no parameters
change
def __inti__( self , name ):
to
def __init__( self , name ):
How to delete columns with all null values in SQLite? I've got nearly 200 columns and don't want to list them all.
For SQLite you will want to try something along to lines of:
DELETE FROM myTable WHERE myColumn IS NULL OR trim(myColumn) = '';
You have to use another language to automate it.
## pip install sqlite_utils
import argparse
import sqlite_utils
def tracer(sql, params) -> None:
print("SQL: {} - params: {}".format(sql, params))
def connect(args) -> sqlite_utils.Database:
db = sqlite_utils.Database(args.database, tracer=tracer if args.verbose >= 2 else None)
db.execute("PRAGMA main.cache_size = 8000")
return db
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser()
parser.add_argument("database")
parser.add_argument("table")
parser.add_argument("--verbose", "-v", action="count", default=0)
args = parser.parse_args()
return args
def remove_empty_cols() -> None:
args = parse_args()
db = connect(args)
total_rows = db[args.table].count
for col in [col.name for col in db[args.table].columns if col.type == 'TEXT']:
details = db[args.table].analyze_column(col, total_rows=total_rows)
if details.num_null == total_rows and details.num_distinct == 0:
with db.conn:
db.execute(f'alter table "{args.table}" drop column "{col}"')
if __name__ == "__main__":
remove_empty_cols()
Run like this:
python remove_empty_cols.py video.db reddit_posts
Using a subquery like this did not seem to work:
SELECT 'alter table reddit_posts drop column ' || name || ';' ddl
FROM pragma_table_info('reddit_posts') t
WHERE "notnull"=0
AND (
SELECT count(t.name) FROM reddit_posts
) = 0
but if you did not want to use python you could run this then manually fill in columns that have the result of 0;
SELECT 'select count(' || name || ') from reddit_posts;' dml
FROM pragma_table_info('reddit_posts') t
WHERE "notnull"=0 AND "type"='TEXT';
SELECT 'alter table reddit_posts drop column ' || name || ';' ddl
FROM pragma_table_info('reddit_posts') t
WHERE name IN (
...
);
Based on previous questions here I managed to create the dataset, print all recipes listed and now I am trying to pick one of the recipes from that list and show its Title, Instructions and Ingredients. The instructions are mapped to the Recipes via the pkID column and the ingredients are mapped to the Recipes through a recipeID column. When I open the database on Sqlite Database Browser I can access this information inside the Tables dropdown list, so I suppose the proper name for them are tables within the database.
I am not being able to "filter" by pkID and by recipeID, so that after picking one recipe, only the appropriate content is shown.
This is the code in Python of what I am trying to do in Genie:
def PrintSingleRecipe(self,which):
sql = 'SELECT * FROM Recipes WHERE pkID = %s' % str(which)
print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
for x in cursor.execute(sql):
recipeid =x[0]
print "Title: " + x[1]
print "Serves: " + x[2]
print "Source: " + x[3]
print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
sql = 'SELECT * FROM Ingredients WHERE RecipeID = %s' % recipeid
print 'Ingredient List:'
for x in cursor.execute(sql):
print x[1]
print ''
print 'Instructions:'
sql = 'SELECT * FROM Instructions WHERE RecipeID = %s' % recipeid
for x in cursor.execute(sql):
print x[1]
print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
resp = raw_input('Press A Key -> ')
I have not been able to improve much of my code, it seems that using the approach I used before of iterating in a step statement cannot be used here. This is how far I got in Genie:
def PrintSingleRecipe(db:Database)
stmt:Statement = PreparedStatements.select_all( db )
res:int = UserInterface.raw_input("Select a recipe -> ").to_int()
cols:int = stmt.column_count ()
var row = new dict of string, string
item:int = 1
print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
while res == ROW
for i:int = 0 to (cols - 1)
row[ stmt.column_name( i ) ] = stmt.column_text( i )
stdout.printf( "%-5s", item.to_string( "%03i" ))
stdout.printf( "%-30s", row[ "Title" ])
stdout.printf( "%-20s", row[ "Serves" ])
stdout.printf( "%-30s\n", row[ "Source" ])
print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
print "Ingredient list"
print " "
stdout.printf("%-5s", item.to_string( "%03i" ))
I have found a solution to the problem, maybe it can be optimized. For now it is enough.
Answers from another question helped immensely. The solution I used was to use the exec function and point the callback to the PrintSingleRecipe().
Some adjustments had to be done for it to work as a callback, but I got what I needed.
Here is the code where the function gets called:
while true
response:string = UserInterface.get_input_from_menu()
if response == "1" // Show All Recipes
PrintAllRecipes(db)
else if response is "2" // Search for a recipe
pass
else if response is "3" //Show a Recipe
res:string = UserInterface.raw_input("Select a recipe -> ")
sql:string = "SELECT * FROM Recipes WHERE pkID = " + res
db.exec(sql, PrintSingleRecipe, null)
else if response is "4"//Delete a recipe
pass
else if response is "5" //Add a recipe
pass
else if response is "6" //Print a recipe
pass
else if response is "0" //Exit
print "Goodbye"
break
else
print "Unrecognized command. Try again."
Here is how the PrintSingleRecipe looks like:
def PrintSingleRecipe(n_columns:int, values:array of string, column_names:array of string):int
print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
for i:int = 0 to n_columns
stdout.printf ("%s = %s\n", column_names[i], values[i])
print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
print "Ingredient list"
print " "
return 0
I'm trying to set up a custom sort behaviour for my model/view structure in PySide.
All items are dictionaries and I need to filter by their keys (I'm not after a table view of those dictionaries but rather need to present them as one entity each).
I inherited from QSortFilterProxyModel and re-implemented the lessThan method. It works fine the first time the sort widget is changed (which triggers the proxy's sort() method), but after that the lessThan method is no longer called. I have no idea why and an hoping to get some help here. I'm more than happy to consider any suggestions how to tackle this in a different way if that is where the solution lies.
This is my proxy:
class ProxyModel ( QSortFilterProxyModel ):
def __init__( self, parent=None ):
super( ProxyModel, self).__init__( parent )
self.setFilterCaseSensitivity( Qt.CaseInsensitive )
self.setSortCaseSensitivity( Qt.CaseInsensitive )
self.setDynamicSortFilter( True )
def sortBy( self, attr ):
print 'sorting by', attr
self.__sortBy = attr
self.sort( 0, Qt.AscendingOrder ) # THIS DOES NOT GET CALLED WHEN THE COMBO BOX CHANGES A SECOND TIME
def lessThan( self, left, right ):
'''Custom sorting behaviour'''
leftTool = ( self.sourceModel().itemFromIndex( left ) )
rightTool = ( self.sourceModel().itemFromIndex( right ) )
leftData = leftTool.data()[ self.__sortBy ]
rightData = rightTool.data()[ self.__sortBy ]
return leftData < rightData
And here is the complete test code:
import sys
from PySide.QtGui import *
from PySide.QtCore import *
class MainWidget( QWidget ) :
def __init__( self, parent=None ):
super( MainWidget, self ).__init__()
self.listView = MyListView()
model = MyModel()
# MODELS AND VIEWS
self.proxyModel = ProxyModel()
self.proxyModel.setSourceModel( model )
self.listView.setModel(self.proxyModel)
# LAYOUTS
verticalLayout = QVBoxLayout()
filterLayout = QHBoxLayout()
# SORTING WIDGET
sortLayout = QHBoxLayout()
sortLabel = QLabel( 'sort:' )
self.sortWidget = QComboBox()
self.sortWidget.addItems( ['title', 'author', 'downloads'] )
self.sortWidget.currentIndexChanged.connect( self.sortTools )
sortLayout.addWidget( sortLabel )
sortLayout.addWidget( self.sortWidget )
verticalLayout.addLayout( filterLayout )
verticalLayout.addLayout( sortLayout )
verticalLayout.insertWidget(0, self.listView)
self.setLayout( verticalLayout )
def sortTools( self ):
text = self.sortWidget.currentText()
self.proxyModel.sortBy( text )
class ProxyModel ( QSortFilterProxyModel ):
def __init__( self, parent=None ):
super( ProxyModel, self).__init__( parent )
self.setFilterCaseSensitivity( Qt.CaseInsensitive )
self.setSortCaseSensitivity( Qt.CaseInsensitive )
self.setDynamicSortFilter( True )
def sortBy( self, attr ):
print 'sorting by', attr
self.__sortBy = attr
self.sort( 0, Qt.AscendingOrder ) # THIS DOES NOT GET CALLED WHEN THE COMBO BOX CHANGES A SECOND TIME
def lessThan( self, left, right ):
'''Custom sorting behaviour'''
leftTool = ( self.sourceModel().itemFromIndex( left ) )
rightTool = ( self.sourceModel().itemFromIndex( right ) )
leftData = leftTool.data()[ self.__sortBy ]
rightData = rightTool.data()[ self.__sortBy ]
return leftData < rightData
class MyListView( QListView ):
def __init__( self, parent=None ):
super( MyListView, self).__init__( parent )
self.setEditTriggers( QListView.NoEditTriggers )
self.setViewMode( QListView.IconMode )
self.setMovement( QListView.Static )
self.setResizeMode( QListView.Adjust )
self.setDragEnabled( True )
class MyModel( QStandardItemModel ):
def __init__( self, parent=None ):
super( MyModel, self).__init__( parent )
self.init_data()
def init_data(self):
row = 0
toolData = [ {'title':'ToolA', 'author':'John Doe', 'downloads':123, 'category':'color'},
{'title':'ToolB', 'author':'me', 'downloads':13, 'category':'color'},
{'title':'ToolC', 'author':'you', 'downloads':321, 'category':'transform'},
{'title':'ToolD', 'author':'unknown', 'downloads':2, 'category':'transform'}]
for tool in toolData:
item = QStandardItem( '%(title)s by %(author)s (%(category)s) - %(downloads)s downloads' % tool )
item.setData( tool )
self.setItem( row, 0, item )
row += 1
if __name__ == '__main__':
app = QApplication( sys.argv )
mainWidget = MainWidget()
mainWidget.resize( 400, 400 )
mainWidget.show()
sys.exit( app.exec_() )
Basically Qt "thinks" your Proxy is already sorted. To be precise it checks in C++ whether:
(d->dynamic_sortfilter &&
d->proxy_sort_column == column &&
d->sort_order == order)
So, to solve your problem, you can either set dynamic_sortfilter to False (it will have side-effects) or, maybe better, invalidate your sorting:
def sortBy( self, attr ):
print 'sorting by', attr
self.__sortBy = attr
self.invalidate() #invalidate helps
self.sort( 0, Qt.AscendingOrder )