Python sqlite3: how to quickly and cleanly interrupt long running query with, e.g., KeyboardInterrupt - sqlite

Using the sqlite3 module in Python a long running query is not quickly interrupted/canceled when a SIGINT (e.g. Control-C) is received. There is an interrupt() method provided by sqlite3, but there are no examples of how to use it.
Is there a simple way to interrupt/cancel a long running query running via Python/sqlite3?
To illustrate, first generate a test database & table:
import sqlite3
from random import randint
conn = sqlite3.connect("randtable.db", 10.0)
cursor = conn.cursor()
cursor.execute("CREATE TABLE randint (id integer, rand integer)")
for i in range(1000000):
if i % 1000 == 0:
print ("{0}...".format(i))
rand = randint(0,1000000)
cursor.execute("INSERT INTO randint VALUES ({0},{1})".format(i,rand))
conn.commit()
conn.close()
Then execute a long running Python/sqlite3 script in a terminal and try to interrupt it with Control-C:
from __future__ import print_function
import sqlite3
def main():
# Long running query (pathological by design)
statement ='''
SELECT DISTINCT a.id,a.rand
FROM randint a
JOIN randint b ON a.id=b.rand
JOIN randint c ON a.id=c.rand
JOIN randint d ON a.id=d.rand
JOIN randint e ON a.id=e.rand
JOIN randint f ON a.id=f.rand
JOIN randint g ON a.id=g.rand
JOIN randint h ON a.id=h.rand
ORDER BY a.id limit 10'''
conn = sqlite3.connect('randtable.sqlite', 10.0)
cursor = conn.cursor()
print ("Executing query")
cursor.execute(statement)
rows = cursor.fetchall()
print ("ROWS:")
for row in rows:
print (" ", row)
conn.close()
return
if __name__ == "__main__":
main()
Running the above script in a terminal and then pressing Control-C (or sending SIGINT some other way) will eventually cancel the query and script but it can take quite a bit of time, many minutes. The exact same query running in the sqlite3 command line tool is near-instantly canceled when Control-C is pressed.
Thanks in advance!

Your answer covers it, but (after letting it slip my mind yesterday - sorry!) I remembered I'd promised to write an answer, so here's another version that demonstrates you can do this without globals. I've also used a threading.Event here instead of a signal to demonstrate there's a few different ways of signalling a thread that it's time to do something (but for your purposes, stick with signal because that's perfect for reacting to a Ctrl+C):
import sqlite3
import time
import threading
# Background thread that'll kill our long running query after 1 second
def kill_it(connection, event):
event.wait()
time.sleep(1)
connection.interrupt()
# Make some tables with lots of data so we can make a long running query
def big_query(conn, kill_event):
print('Making big tables')
conn.execute(
"CREATE TABLE foo (i integer primary key, s text);")
conn.execute(
"CREATE TABLE bar (j integer primary key, s text);")
conn.execute(
"INSERT INTO foo VALUES %s" % ", ".join("(%d, 'foo')" % i for i in range(10000)))
conn.execute(
"INSERT INTO bar VALUES %s" % ", ".join("(%d, 'bar')" % i for i in range(10000)))
kill_event.set()
print('Running query')
cur = conn.cursor()
cur.execute(
"SELECT * FROM foo, bar")
print(len(cur.fetchall()))
def main():
conn = sqlite3.connect('foo.db')
kill_event = threading.Event()
kill_thread = threading.Thread(target=kill_it, args=(conn, kill_event))
kill_thread.start()
big_query(conn, kill_event)
kill_thread.join()
if __name__ == '__main__':
main()

Answering my own question since I think I've worked it out. Below is what I've come up with, any comments on this code would be greatly appreciated.
#!/usr/bin/env python
from __future__ import print_function
import sqlite3
import threading
import signal
import os
import time
conn = None
shutdown = False
def main():
global conn
# Long running query (pathological by design)
statement ='''
SELECT DISTINCT a.id,a.rand
FROM randint a
JOIN randint b ON a.id=b.rand
JOIN randint c ON a.id=c.rand
JOIN randint d ON a.id=d.rand
JOIN randint e ON a.id=e.rand
JOIN randint f ON a.id=f.rand
JOIN randint g ON a.id=g.rand
JOIN randint h ON a.id=h.rand
ORDER BY a.id limit 10'''
conn = sqlite3.connect('randtable.sqlite', 10.0)
cursor = conn.cursor()
print ("Executing query")
try:
cursor.execute(statement)
except Exception as err:
if str(err) != "interrupted":
print ("Database error: {0}".format(str(err)))
return None
rows = cursor.fetchall()
print ("ROWS:")
for row in rows:
print (" ", row)
conn.close()
conn = None
return
def interrupt(signum, frame):
global conn
global shutdown
print ("Interrupt requested")
if conn:
conn.interrupt()
if __name__ == "__main__":
signal.signal(signal.SIGINT, interrupt)
mainthread = threading.Thread(target=main)
mainthread.start()
while mainthread.isAlive():
time.sleep(0.2)

Related

SQLlite: Why is the insert SQL statement not updating the table in the database?

I have a Python Pysimplegui form that is connecting to a SQL Lite database.
The function to create and update a table called fitness_class is:
def createFitnessClassTable(conn):
'''
'''
SQL = """CREATE TABLE IF NOT EXISTS fitness_class (
fitness_class_id integer PRIMARY KEY,
fitness_class_name text NOT NULL,
date_and_time text NOT NULL
);"""
sql_create = """INSERT OR IGNORE INTO fitness_class(fitness_class_id,fitness_class_name,date_and_time)
VALUES(?,?,?)"""
data = [
(1, 'Cardio', 'Thursday 35pm'),
(2, 'Pilates', 'Friday 911am'),
(3, 'Spin', 'Monday 2 4pm')
]
try:
c = conn.cursor()
c.execute(SQL)
c.close()
connection = conn.cursor()
connection.executemany(sql_create, data)
connection.close()
except Error as e:
# print(e)
sg.Popup(e)
return False
return True
When the function is called, this is creating the table and I am not getting any error messages. However, this is not saving the data (from the insert statement) either.
These are the rows related to calling the function
#!/usr/bin/python
import os
import PySimpleGUI as sg
from tkinter import *
import re
import sys
import PySimpleGUI as sg
import sqlite3
sys.path.append(os.path.dirname(__file__))
conn = dbconnect()
createFitnessClassTable(conn=conn)
conn.commit
conn.close()
I am confused because I have a similar function to create another table which is working correctly (i.e. creating the table if it doesn't exist and populating it with the data):
def createMembershipTable(conn):
'''
'''
SQL = """
CREATE TABLE IF NOT EXISTS membership (
membership_type_id integer PRIMARY KEY,
membership_type text NOT NULL,
weekly_amount real NOT NULL
);"""
sql_create = """INSERT OR IGNORE INTO membership(membership_type_id,membership_type,weekly_amount)
VALUES(?,?,?)"""
data = [(1, 'Basic', 10.00),
(2, 'Regular', 15.00),
(3, 'Premium', 20.00)
]
try:
c = conn.cursor()
c.execute(SQL)
c.close()
connection = conn.cursor()
connection.executemany(sql_create, data)
connection.close()
except Error as e:
print(e)
return False
return True
The lines to call that function:
conn = dbconnect()
createMembershipTable(conn)
conn.commit()
conn.close()
What am I missing? Why would the function createMembershipTable work as expected though the function createFitnessClassTable not work when they are both almost identical?
Just after posting (and 3 hours later), I realized the issue:
It was missing parenthesis after the conn.commit() in the createFitnessClassTable function call.

How can I open a db.sqlite3 file and have a look at its content?

I don't know how to open a db.sqlite3 file in reader-friendly way.
I hope the data in it would be shown in tables
Upload your file here and get the tabulated result:
http://inloop.github.io/sqlite-viewer/
OR run a Python script like below
def create_connection(db_file):
""" create a database connection to the SQLite database specified by the db_file :param db_file: database file :return: Connection object or None """ conn = None
try:
conn = sqlite3.connect(db_file) except Error as e:
print(e) return conn
def select_all_tasks(conn):
""" Query all rows in the tasks table :param conn: the Connection object :return: """
cur = conn.cursor()
cur.execute("SELECT * FROM tasks") rows = cur.fetchall() for row in rows: print(row)

Error with connectin to database using sqlite3 with python

When running following code I get error which I posted at the bottom of the post. I followed tutorial on creating databases from here.
These functions worked when creating previous databases though.
I am using jupyter notebook v 3.5.
def create_connection(db_file):
try:
conn = sqlite3.connect(db_file)
return conn
except sqlite3.Error as e:
print("Connection error: [%s]" % e)
return None
def create_table(conn, create_table_sql ):
try:
c = conn.cursor()
c.execute(create_table_sql)
except sqlite3.Error as e:
print("Connection error while creating table: [%s]" % e)
def sqlTables(db_file):
sql_create_synset_table = ''' CREATE TABLE IF NOT EXISTS table_data (
id TEXT NOT NULL,
status TEXT NOT NULL,
confidence_score INT NOT NULL,
); '''
conn = create_connection(db_file)
if conn is not None:
create_table(conn,sql_create_synset_table)
else:
print("Error! cannot create db conn.")
def upload_data(db_file):
sqlTables(db_file)
conn = create_connection(db_file)
cursor = conn.cursor()
with conn:
for i in range(len(id_list)):
s_id = id_list[i]
status = status_list[i]
conf = conf_list[i]
cursor.execute("INSERT INTO table_data(id, status, confidence_score) VALUES(?,?,?)"\
,(s_id, status, conf))
conn.commit()
upload_data("path/to/db/table.db")
Connection error while creating table: [near ")": syntax error]
---> 12 cursor.execute("INSERT INTO table_data(id, status, confidence_score) VALUES(?,?,?)" ,(sset_id, stus, conf))
OperationalError: no such table: table_data

How to use sqlalchemy to select data from a database?

I have two sqlalchemy scripts, one that creates a database and a few tables and another that selects data from them.
create_database.py
from sqlalchemy import create_engine, Table, Column, Integer, String, MetaData, ForeignKey, select
engine = create_engine('sqlite:///test.db', echo=True)
metadata = MetaData()
addresses = Table ('addresses', metadata,
Column('id', Integer, primary_key=True),
Column('user_id', None, ForeignKey('users.id')),
Column('email_addresses', String, nullable=False)
)
users = Table ('users', metadata,
Column('id', Integer, primary_key=True),
Column('name', String),
Column('fullname', String),
)
metadata.create_all(engine)
select.py
from sqlalchemy import create_engine, select
engine = create_engine('sqlite:///test.db', echo=True)
conn = engine.connect()
s = select([users])
result = conn.execute(s)
I am able to run the create_database.py script but when I run the select.py script I get the following error
$ python select.py
Traceback (most recent call last):
File "select.py", line 5, in <module>
s = select([users])
I am able to run the select statement from within the create_database.py by appending the following to create_database.py
conn = engine.connect()
s = select([users])
result = conn.execute(s)
How can I run the select statements from a separate script than create_database.py
The script select.py does not see users and addresses defined in create_database.py. Import them in select.py before using them.
In select.py:
from create_database import users, addresses
## Do something with users and addresses

Big SELECT optimization

I am using SQLite in Python for a big file management system. I have a big flat file (100 millions lines) that I want to sort using the values of 3 columns (which are integers), so that I could iterate and do some computation.
I used SQLite with a big SELECT ... ORDER BY (with an index on one column). Since this big SELECT is too memory demanding I need to call it several times (with OFFSET and LIMIT).
I could use Linux sort, but I want it to be platform independent. It is working fine (as long as the right PRAGMA are correctly set), but slow. How to optimize this?
Commands are like:
PRAGMA journal_mode = OFF
PRAGMA synchronous = 0
PRAGMA locking_mode = EXCLUSIVE
PRAGMA count_change = OFF
PRAGMA temp_store = 2
CREATE TABLE tmpTranscripts_arm_3R_transcripts (id INTEGER PRIMARY KEY, name varchar(255), chromosome varchar(255), start int(11), end int(11), direction tinyint(4), tags varchar(1023), bin int(11), exons varchar(10000))
CREATE INDEX 'iTranscript_arm_3R_14943' ON 'tmpTranscripts_arm_3R_transcripts' (start, end, direction)
INSERT INTO tmpTranscripts_arm_3R_transcripts (name, chromosome, start, end, direction, tags, bin, exons) VALUES ('SRR060644.1', 'arm_3R', 11450314, 11450337, -1, 'feature=transcript;bestRegion=(self);nbGaps=0;nbMismatches=0;ID=SRR060644.1;identity=100.0', 300011450, '')
(this, more than 10 millions times)
SELECT * FROM tmpTranscripts_arm_3R_transcripts ORDER BY start, end, direction LIMIT 0, 10000
(this, as much as needed)
I have written some sample script that create your database and go through all its elements. And it looks like it works much faster than your wrote in comments. Are you sure that database access is a bottleneck? Maybe in your script you do something more and this takes so much time.
I have checked 2 databases SQLite and MongoDB with 5 millions of items.
For SQLite inserting all rows took ~1200 seconds and selection them around 300 seconds.
MongoDB was faster and insert took ~400 seconds while select less than 100 seconds.
Please check your code with my samples and check if your select is similar. I used cursor instead of LIMIT/OFFSET.
If this still doesn't help then I think MongoDB is worth a shot. It has one disadvantage - it require 64-bit OS to support large database (like yours). If you newer used it before then here is shortest installation guide for windows:
Download and unpack MongoDB for windows 64-bit from http://www.mongodb.org/downloads
Run "mongod.exe --dbpath ."
Download module for python 2.x from http://pypi.python.org/pypi/pymongo/ or for python 3.x from http://pypi.python.org/pypi/pymongo3/
Run my script
And here are my python 3.x test scripts for SQLite
import sqlite3
from time import time
conn = sqlite3.connect('test.dbase')
c = conn.cursor()
c.execute("""PRAGMA journal_mode = OFF""")
c.execute("""PRAGMA synchronous = 0""")
c.execute("""PRAGMA locking_mode = EXCLUSIVE""")
c.execute("""PRAGMA count_change = OFF""")
c.execute("""PRAGMA temp_store = 2""")
c.execute("""CREATE TABLE tmpTranscripts_arm_3R_transcripts (id INTEGER PRIMARY KEY, name varchar(255), chromosome varchar(255), start int(11), end int(11), direction tinyint(4), tags varchar(1023), bin int(11), exons varchar(10000))""")
c.execute("""CREATE INDEX 'iTranscript_arm_3R_14943' ON 'tmpTranscripts_arm_3R_transcripts' (start, end, direction)""")
t1 = time()
for i in range(0, 5000000):
c.execute("""INSERT INTO tmpTranscripts_arm_3R_transcripts (name, chromosome, start, end, direction, tags, bin, exons) VALUES ('SRR060644.1', 'arm_3R', %d, %d, %d, 'feature=transcript;bestRegion=(self);nbGaps=0;nbMismatches=0;ID=SRR060644.1;identity=100.0', 300011450, '')""" % ((i+123)%352, (i+523)%422, (i+866)%536))
if(not i%10000):
print("Insert:", i)
t2 = time()
print("Insert time", t2-t1)
conn.commit()
t1 = time()
c.execute("""SELECT * FROM tmpTranscripts_arm_3R_transcripts ORDER BY start, end, direction""")
i = 0
for row in c:
a = row[0]
if(not i%10000):
print("Get:", i, row)
i+=1
t2 = time()
print("Sort time", t2-t1)
c.close()
and for MongoDB
from pymongo import Connection
from pymongo import ASCENDING, DESCENDING
from time import time
connection = Connection()
connection = Connection('localhost', 27017)
db = connection['test-database']
collection = db['test-collection']
posts = db.posts
posts.create_index([("start", ASCENDING), ("end", ASCENDING), ("direction", ASCENDING)])
t1 = time()
for i in range(0, 5000000):
post = { "name": 'SRR060644.1',
"chromosome": 'arm_3R',
"start": (i+123)%352,
"end": (i+523)%422,
"direction": (i+866)%536,
"tags": 'feature=transcript;bestRegion=(self);nbGaps=0;nbMismatches=0;ID=SRR060644.1;identity=100.0',
"bin": 300011450,
"exons": ''}
posts.insert(post)
if(not i%10000):
print("Insert:", i)
t2 = time()
print("Insert time", t2-t1)
t1 = time()
i = 0
for post in posts.find().sort([("start", ASCENDING), ("end", ASCENDING), ("direction", ASCENDING)]):
if(not i%10000):
print("Get:", i, post)
i+=1
t2 = time()
print("Sort time", t2-t1)

Resources