I'm looping through a folder with some sql files. For each file I want to push them as xcom value with specific value for each queries.
The code below is kind of working however not when adding else statement. not set value is overwriting everything.
directory = r'airflow_home/dags/sql'
for filename in os.listdir(directory):
with open(os.path.join(directory, filename), 'r') as file:
sqlFile = file.read()
file.close()
if filename == 'api_params.sql':
query = sqlFile.format(partitioned_key,execution_date_second,partitioned_key,next_execution_date_second)
if filename == 'create_fact_table.sql':
query = sqlFile.format(fact_table_dest)
if filename == 'create_geo_table.sql':
query = sqlFile.format(fact_table_dest)
if filename == f'{geo_type}'+'.sql':
query = sqlFile.format(execution_date)
filename = 'geo_query'
if filename == 'schema_' + f'{schema}' + '.sql':
query = sqlFile.format(fact_table_dest,raw_table_dest,execution_date,next_execution_date)
filename = 'production_query'
if filename == 'insert_key.sql':
query = sqlFile.format(raw_table_dest,execution_date,next_execution_date)
else:
query = 'not set'
task_instance.xcom_push(key=filename, value=query)
can someone explain me what's happening here?
You are using multiple if statements, which are executed one after the other. The else just referring to the last if statement and therefore overwriting previous set query parameter. What you are actually looking for is elif - see Python Docs.
directory = r'airflow_home/dags/sql'
for filename in os.listdir(directory):
with open(os.path.join(directory, filename), 'r') as file:
sqlFile = file.read()
file.close()
if filename == 'api_params.sql':
query = sqlFile.format(partitioned_key,execution_date_second,partitioned_key,next_execution_date_second)
elif filename == 'create_fact_table.sql':
query = sqlFile.format(fact_table_dest)
elif filename == 'create_geo_table.sql':
query = sqlFile.format(fact_table_dest)
elif filename == f'{geo_type}'+'.sql':
query = sqlFile.format(execution_date)
filename = 'geo_query'
elif filename == 'schema_' + f'{schema}' + '.sql':
query = sqlFile.format(fact_table_dest,raw_table_dest,execution_date,next_execution_date)
filename = 'production_query'
elif filename == 'insert_key.sql':
query = sqlFile.format(raw_table_dest,execution_date,next_execution_date)
else:
query = 'not set'
task_instance.xcom_push(key=filename, value=query)
Related
How do I preserve custom properties from xlsx template which I am modifying with openpyxl? When I save() workbook using openpyxl these custom properties vanish!
Custom properties can be found here:-
On Mac -> Go to File Menu in Excel -> Properties ... -> Custom tab ->
Properties section
I am posting a pure python solution to reading and writing Workbook.CustomDocumentProperties just because I am currently also feeling the pain of not having this in openpyxl, and I needed a quick workaround for a personal automation project.
In fact, I will try to implement this feature (and hopefully later Worksheet.CustomProperties) into openpyxl myself if I can get my head around how to do all the plumbing the library needs: https://foss.heptapod.net/openpyxl/openpyxl/-/issues/1003
Update: I pushed my contribution and it should be accepted and merged shortly :) https://foss.heptapod.net/openpyxl/openpyxl/-/merge_requests/384
So for now, here is a workaround, converting the .xlsx to .zip, then reading and writing the .xml files in the zip directly, and then renaming to .xlsx at the end.
To read Workbook.CustomDocumentProperties you can do this - only very slightly modified from this great answer: https://stackoverflow.com/a/46919795/9792594
from lxml import etree as ET
import zipfile
def get_custom_doc_properties(filename):
path_file = os.path.abspath(filename)
base, ext = os.path.splitext(path_file)
zip_filename = base + ".zip"
os.rename(path_file, zip_filename)
main_ns = "{http://schemas.openxmlformats.org/spreadsheetml/2006/main}"
docPr_ns = "{http://schemas.openxmlformats.org/officeDocument/2006/custom-properties}"
docPr_type = "{http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes}" #i4, r8, filetime, bool, lpwstr
r_ns = "{http://schemas.openxmlformats.org/officeDocument/2006/relationships}"
cusPr_type = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/customProperty"
with zipfile.ZipFile(zip_filename) as zip:
props = zip.open('docProps/custom.xml')
text = props.read()
xml = ET.fromstring(text)
workbook_props = {}
for child in XML:
if (child.tag == f"{docPr_ns}property"):
for cusPr in child:
workbook_props[child.attrib['name']] = cusPr.text
return workbook_props
#call like this:
get_custom_doc_properties(f'./example.xlsx')
And to add one prop to a document which already has custom doc props (and therefore already has a 'docProps/custom.xml' file), is pretty easy and we just append one more custom property to the xml.
(However, if the document had no current custom doc props, then we need to generate the 'docProps/custom.xml' file from scratch, as well as add a content override and a relationship - see code comments):
import os
from lxml import etree as ET
import zipfile
import shutil
import datetime
from tempfile import NamedTemporaryFile
def set_workbook_custom_document_properties(filename, cus_doc_prop_name, cus_doc_prop_val):
if not isinstance(cus_doc_prop_name, str):
print("you must supply a string as the 'cus_doc_prop_name'")
return
if isinstance(cus_doc_prop_val, str):
docPr_type_suffix = "lpwstr"
cus_doc_prop_str = cus_doc_prop_val
elif isinstance(cus_doc_prop_val, int):
docPr_type_suffix = "i4"
cus_doc_prop_str = str(cus_doc_prop_val)
elif isinstance(cus_doc_prop_val, float):
docPr_type_suffix = "r8"
cus_doc_prop_str = str(cus_doc_prop_val)
elif isinstance(cus_doc_prop_val, bool):
docPr_type_suffix = "bool"
cus_doc_prop_str = str(cus_doc_prop_val)
elif isinstance(cus_doc_prop_val, datetime.datetime):
docPr_type_suffix = "filetime"
cus_doc_prop_str = cus_doc_prop_val.strftime("%Y-%m-%dT%H:%M:%SZ")
else:
print("you must supply a string, int, float, bool, or date, as the 'cus_doc_prop_val'")
return
path_file = os.path.abspath(filename)
base, ext = os.path.splitext(path_file)
zip_filename = base + ".zip"
os.rename(path_file, zip_filename)
main = "http://schemas.openxmlformats.org/spreadsheetml/2006/main"
main_ns = "{%s}" % main
docPr = "http://schemas.openxmlformats.org/officeDocument/2006/custom-properties"
docPr_ns = "{%s}" % docPr
docPr_type = "http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes"
docPr_type_ns = "{%s}" % docPr_type #i4, r8, filetime, bool, lpwstr
docPr_rel_type = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties"
docPr_content_type = "application/vnd.openxmlformats-officedocument.custom-properties+xml"
r_ns = "{http://schemas.openxmlformats.org/officeDocument/2006/relationships}"
cusPr_type = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/customProperty"
xml_declaration = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
base_xml = '{dec}<Properties xmlns="{docPr}" xmlns:vt="{docPr_type}"></Properties>'.format(dec=xml_declaration, docPr=docPr, docPr_type=docPr_type).encode('utf-8')
with NamedTemporaryFile() as tmp_file:
tmpname = os.path.basename(tmp_file.name)
with zipfile.ZipFile(zip_filename, 'r') as zip_in:
with zipfile.ZipFile(tmpname, 'w') as zip_out:
zip_out.comment = zip_in.comment # preserve the comment
custom_present = 'docProps/custom.xml' in zip_in.namelist()
for item in zip_in.infolist():
if item.filename == 'docProps/custom.xml':
custom_xml = ET.fromstring(zip_in.read(item.filename))
elif custom_present == False and item.filename == '_rels/.rels':
rels_xml = ET.fromstring(zip_in.read(item.filename))
elif custom_present == False and item.filename == '[Content_Types].xml':
content_types_xml = ET.fromstring(zip_in.read(item.filename))
else:
zip_out.writestr(item, zip_in.read(item.filename))
if custom_present:
# if custom.xml is already present we just need to append:
max_pid = 1
for node in custom_xml:
max_pid = max(int(node.attrib['pid']), max_pid)
else:
# if custom.xml is not present, we need to create it
# and also to add an override to [Content_Types].xml
# and also to add a relationship to _rels/.rels
custom_xml = ET.parse(BytesIO(base_xml)).getroot()
max_pid = 1
child_override = ET.SubElement(content_types_xml, "Override")
child_override.attrib['ContentType'] = docPr_content_type
child_override.attrib['PartName'] = '/docProps/custom.xml'
zip_out.writestr('[Content_Types].xml', ET.tostring(content_types_xml))
max_rid = 0
for node in rels_xml:
max_rid = max(int(node.attrib['Id'].replace("rId", "")), max_rid)
child_rel = ET.SubElement(rels_xml, "Relationship")
child_rel.attrib['Type'] = docPr_rel_type
child_rel.attrib['Target'] = 'docProps/custom.xml'
child_rel.attrib['Id'] = "rID" + str(max_rid + 1)
zip_out.writestr('_rels/.rels', ET.tostring(rels_xml))
child = ET.SubElement(custom_xml, "property")
child.attrib['name'] = cus_doc_prop_name
child.attrib['pid'] = str(max_pid + 1)
child.attrib['fmtid'] = "{D5CDD505-2E9C-101B-9397-08002B2CF9AE}"
val = ET.SubElement(child, f"{docPr_type_ns}{docPr_type_suffix}")
val.text = cus_doc_prop_str
print(ET.tostring(custom_xml, pretty_print=True))
zip_out.writestr('docProps/custom.xml', ET.tostring(custom_xml))
zip_out.close()
zip_in.close()
shutil.copyfile(tmpname, zip_filename)
os.rename(zip_filename, path_file)
#call it like this:
set_workbook_custom_document_properties(f'./example.xlsx', "testDocProp7", 2.5)
How to delete columns with all null values in SQLite? I've got nearly 200 columns and don't want to list them all.
For SQLite you will want to try something along to lines of:
DELETE FROM myTable WHERE myColumn IS NULL OR trim(myColumn) = '';
You have to use another language to automate it.
## pip install sqlite_utils
import argparse
import sqlite_utils
def tracer(sql, params) -> None:
print("SQL: {} - params: {}".format(sql, params))
def connect(args) -> sqlite_utils.Database:
db = sqlite_utils.Database(args.database, tracer=tracer if args.verbose >= 2 else None)
db.execute("PRAGMA main.cache_size = 8000")
return db
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser()
parser.add_argument("database")
parser.add_argument("table")
parser.add_argument("--verbose", "-v", action="count", default=0)
args = parser.parse_args()
return args
def remove_empty_cols() -> None:
args = parse_args()
db = connect(args)
total_rows = db[args.table].count
for col in [col.name for col in db[args.table].columns if col.type == 'TEXT']:
details = db[args.table].analyze_column(col, total_rows=total_rows)
if details.num_null == total_rows and details.num_distinct == 0:
with db.conn:
db.execute(f'alter table "{args.table}" drop column "{col}"')
if __name__ == "__main__":
remove_empty_cols()
Run like this:
python remove_empty_cols.py video.db reddit_posts
Using a subquery like this did not seem to work:
SELECT 'alter table reddit_posts drop column ' || name || ';' ddl
FROM pragma_table_info('reddit_posts') t
WHERE "notnull"=0
AND (
SELECT count(t.name) FROM reddit_posts
) = 0
but if you did not want to use python you could run this then manually fill in columns that have the result of 0;
SELECT 'select count(' || name || ') from reddit_posts;' dml
FROM pragma_table_info('reddit_posts') t
WHERE "notnull"=0 AND "type"='TEXT';
SELECT 'alter table reddit_posts drop column ' || name || ';' ddl
FROM pragma_table_info('reddit_posts') t
WHERE name IN (
...
);
Based on previous questions here I managed to create the dataset, print all recipes listed and now I am trying to pick one of the recipes from that list and show its Title, Instructions and Ingredients. The instructions are mapped to the Recipes via the pkID column and the ingredients are mapped to the Recipes through a recipeID column. When I open the database on Sqlite Database Browser I can access this information inside the Tables dropdown list, so I suppose the proper name for them are tables within the database.
I am not being able to "filter" by pkID and by recipeID, so that after picking one recipe, only the appropriate content is shown.
This is the code in Python of what I am trying to do in Genie:
def PrintSingleRecipe(self,which):
sql = 'SELECT * FROM Recipes WHERE pkID = %s' % str(which)
print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
for x in cursor.execute(sql):
recipeid =x[0]
print "Title: " + x[1]
print "Serves: " + x[2]
print "Source: " + x[3]
print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
sql = 'SELECT * FROM Ingredients WHERE RecipeID = %s' % recipeid
print 'Ingredient List:'
for x in cursor.execute(sql):
print x[1]
print ''
print 'Instructions:'
sql = 'SELECT * FROM Instructions WHERE RecipeID = %s' % recipeid
for x in cursor.execute(sql):
print x[1]
print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
resp = raw_input('Press A Key -> ')
I have not been able to improve much of my code, it seems that using the approach I used before of iterating in a step statement cannot be used here. This is how far I got in Genie:
def PrintSingleRecipe(db:Database)
stmt:Statement = PreparedStatements.select_all( db )
res:int = UserInterface.raw_input("Select a recipe -> ").to_int()
cols:int = stmt.column_count ()
var row = new dict of string, string
item:int = 1
print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
while res == ROW
for i:int = 0 to (cols - 1)
row[ stmt.column_name( i ) ] = stmt.column_text( i )
stdout.printf( "%-5s", item.to_string( "%03i" ))
stdout.printf( "%-30s", row[ "Title" ])
stdout.printf( "%-20s", row[ "Serves" ])
stdout.printf( "%-30s\n", row[ "Source" ])
print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
print "Ingredient list"
print " "
stdout.printf("%-5s", item.to_string( "%03i" ))
I have found a solution to the problem, maybe it can be optimized. For now it is enough.
Answers from another question helped immensely. The solution I used was to use the exec function and point the callback to the PrintSingleRecipe().
Some adjustments had to be done for it to work as a callback, but I got what I needed.
Here is the code where the function gets called:
while true
response:string = UserInterface.get_input_from_menu()
if response == "1" // Show All Recipes
PrintAllRecipes(db)
else if response is "2" // Search for a recipe
pass
else if response is "3" //Show a Recipe
res:string = UserInterface.raw_input("Select a recipe -> ")
sql:string = "SELECT * FROM Recipes WHERE pkID = " + res
db.exec(sql, PrintSingleRecipe, null)
else if response is "4"//Delete a recipe
pass
else if response is "5" //Add a recipe
pass
else if response is "6" //Print a recipe
pass
else if response is "0" //Exit
print "Goodbye"
break
else
print "Unrecognized command. Try again."
Here is how the PrintSingleRecipe looks like:
def PrintSingleRecipe(n_columns:int, values:array of string, column_names:array of string):int
print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
for i:int = 0 to n_columns
stdout.printf ("%s = %s\n", column_names[i], values[i])
print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
print "Ingredient list"
print " "
return 0
When I try to put form = SQLFORM.grid(db.mytable) in my controller the request changes to my/web/site/view?_signature=520af19b1095db04dda2f1b6cbea3a03c3551e13 which causes my if statement in controller to collapse. Can smbd please explain why this happens?
If I put user_signature=False then on view load the grid is shown (though the looks is awful, and I still need to find out how to change the view of my table), but on search,edit, etc. click, the same thing happens again. The url is changed and I get an error
Any suggestions?
thank you
EDIT
This is my edit function
#auth.requires_login()
def edit():
#Load workers
workers = db(db.worker.w_organisation == 10).select(db.worker.w_id_w, db.worker.w_organisation, db.worker.w_first_name, db.worker.w_last_name,db.worker.w_nick_name,db.worker.w_email,db.worker.w_status,db.worker.w_note).as_list()
#Define the query object. Here we are pulling all contacts having date of birth less than 18 Nov 1990
query = ((db.worker.w_organisation == 10) & (db.worker.w_status==db.status.s_id_s))
#Define the fields to show on grid. Note: (you need to specify id field in fields section in 1.99.2
fields = (db.worker.w_first_name, db.worker.w_last_name,db.worker.w_nick_name,db.worker.w_email,db.status.s_code,db.worker.w_note)
#Define headers as tuples/dictionaries
headers = { 'worker.w_first_name' : 'Ime',
'worker.w_last_name' : 'Priimek',
'worker.w_nick_name' : 'Vzdevek',
'worker.w_email' : 'E-posta',
'status.s_code': 'Status',
'worker.w_note' : 'Komentar' }
#Let's specify a default sort order on date_of_birth column in grid
default_sort_order=[db.worker.w_last_name]
#Creating the grid object
form = SQLFORM.grid(query=query, fields=fields, headers=headers,searchable=True, orderby=default_sort_order,create=True, \
deletable=True, editable=True, maxtextlength=64, paginate=25,user_signature=False
)
form = SQLFORM.grid(db.worker,user_signature=False)
workersDb = db((db.worker.w_organisation == 10) & (db.worker.w_status==db.status.s_id_s)).select(db.worker.w_id_w, \
db.worker.w_organisation, db.worker.w_first_name, \
db.worker.w_last_name,db.worker.w_nick_name,db.worker.w_email,\
db.status.s_code,db.worker.w_note).as_list()
workersList = []
for rec in workersDb:
status = rec['status']['s_code']
workers = rec['worker']
if not rec["worker"]["w_first_name"]:
polno_ime = rec["worker"]["w_last_name"]
elif not rec["worker"]["w_last_name"]:
polno_ime = rec["worker"]["w_first_name"]
else:
polno_ime = rec["worker"]["w_first_name"] + " " + rec["worker"]["w_last_name"]
rec["worker"]['w_full_name'] = polno_ime
rec["worker"]["w_status"] = status
data = rec["worker"]
#print rec
#print data
workersList.append(rec["worker"])
# If type of arg is int, we know that user wants to edit a script with an id of the argument
if(request.args[0].isdigit()):
script = db(getDbScript(request.args[0])).select(db.script.sc_lls, db.script.sc_name, db.script.id, db.script.sc_menu_data).first()
formData = str(script["sc_menu_data"])
#form = SQLFORM.grid(db.auth_user)
#print formData
# If we dont get any results that means that user is not giving proper request and we show him error
#print script
#Parsing script to be inserted into view
if not script:
return error(0)
return dict(newScript = False, script = script, formData = formData, workers = workersList, form = form)
# If the argument is new we prepare page for new script
elif request.args[0] == 'new':
scripts = db((auth.user.organization == db.script.sc_organization)).select(db.script.sc_name, db.script.id, workers = workersList, form = form)
return dict(newScript = True, scripts = scripts, workers = workersList, form = form)
# Else error
else:
return error(0)
also not to mention the sqlgrid looks awful, here is link to the picture https://plus.google.com/103827646559093653557/posts/Bci4PCG4BQQ
I am trying to read a text file using Dynamics AX. However, the following code replaces any spaces in the lines with commas:
// Open file for read access
myFile = new TextIo(fileName , 'R');
myFile.inFieldDelimiter('\n');
fileRecord = myFile.read();
while (fileRecord)
{
line = con2str(fileRecord);
info(line);
…
I have tried various combinations of the above code, including specifying a blank '' field delimiter, but with the same behaviour.
The following code works, but seems like there should be a better way to do this:
// Open file for read access
myFile = new TextIo(fileName , 'R');
myFile.inRecordDelimiter('\n');
myFile.inFieldDelimiter('_stringnotinfile_');
fileRecord = myFile.read();
while (fileRecord)
{
line = con2str(fileRecord);
info(line);
The format of the file is field format. For example:
DATAFIELD1 DATAFIELD2 DATAFIELD3
DATAFIELD1 DATAFIELD3
DATAFIELD1 DATAFIELD2 DATAFIELD3
So what I end up with unless I use the workaround above is something like:
line=DATAFIELD1,DATAFIELD2,DATAFIELD3
The underlying problem here is that I have mixed input formats. Some of the files just have line feeds {LF} and others have {CR}{LF}. Using my workaround above seems to work for both. Is there a way to deal with both, or to strip \r from the file?
Con2Str:
Con2Str will retrieve a list of values from a container and by default uses comma (,) to separate the values.
client server public static str Con2Str(container c, [str sep])
If no value for the sep parameter is specified, the comma character will be inserted between elements in the returned string.
Possible options:
If you would like the space to be the default separator, you can pass space as the second parameter to the method Con2Str.
One other option is that you can also loop through the container fileRecord to fetch the individual elements.
Code snippet 1:
Below code snippet loads the file contents into textbuffer and replace the carriage returns (\r) with new line (\n) character. The condition if (strlen(line) > 1) will help to skip empty strings due to the possible occurrence of consecutive newline characters.
TextBuffer textBuffer;
str textString;
str clearText;
int newLinePos;
str line;
str field1;
str field2;
str field3;
counter row;
;
textBuffer = new TextBuffer();
textBuffer.fromFile(#"C:\temp\Input.txt");
textString = textBuffer.getText();
clearText = strreplace(textString, '\r', '\n');
row = 0;
while (strlen(clearText) > 0 )
{
row++;
newLinePos = strfind(clearText, '\n', 1, strlen(clearText));
line = (newLinePos == 0 ? clearText : substr(clearText, 1, newLinePos));
if (strlen(line) > 1)
{
field1 = substr(line, 1, 14);
field2 = substr(line, 15, 12);
field3 = substr(line, 27, 10);
info('Row ' + int2str(row) + ', Column 1: ' + field1);
info('Row ' + int2str(row) + ', Column 2: ' + field2);
info('Row ' + int2str(row) + ', Column 3: ' + field3);
}
clearText = (newLinePos == 0 ? '' : substr(clearText, newLinePos + 1, strlen(clearText) - newLinePos));
}
Code snippet 2:
You could use File macro instead of hard coding the values \r\n and R that denotes the read mode.
TextIo inputFile;
container fileRecord;
str line;
str field1;
str field2;
str field3;
counter row;
;
inputFile = new TextIo(#"c:\temp\Input.txt", 'R');
inputFile.inFieldDelimiter("\r\n");
row = 0;
while (inputFile.status() == IO_Status::Ok)
{
row++;
fileRecord = inputFile.read();
line = con2str(fileRecord);
if (line != '')
{
field1 = substr(line, 1, 14);
field2 = substr(line, 15, 12);
field3 = substr(line, 27, 10);
info('Row ' + int2str(row) + ', Column 1: ' + field1);
info('Row ' + int2str(row) + ', Column 2: ' + field2);
info('Row ' + int2str(row) + ', Column 3: ' + field3);
}
}
Never tried to use the default RecordDelimiter as FieldDelimiter and not setting another RecordDelimiter explicitly. Normally rows (Records) are delimited by \n and fields are delimited by comma, tab, semicolon or some other symbol. You might also be hitting some weird behaviour when TextIO is assuming correct UTF-format. You didn't supply an example of some rows from you datafile, so guessing is hard.
Read more about TextIO here: http://msdn.microsoft.com/en-us/library/aa603840.aspx
EDIT:
With the additional example of file content, it seems to me the file is a fixed width file, where each column has its own fixed width. I would rather recommend using subStr if that is the case. Read about substr here: http://msdn.microsoft.com/en-us/library/aa677836.aspx
use StrAlpha to restrict blank values after you convert Con2Str