Moto doesn't mock the db properly - amazon-dynamodb

What am I doing wrong here?
This is my first time using moto and I'm really confused
conftest.py:
#pytest.fixture(scope='module')
def dynamodb(aws_credentials):
with mock_dynamodb2():
yield boto3.resource('dynamodb', region_name='us-east-1')
#pytest.fixture(scope='module')
def dynamodb_table(dynamodb):
"""Create a DynamoDB surveys table fixture."""
table = dynamodb.create_table(
TableName='MAIN_TABLE',
KeySchema=[
[..]
table.meta.client.get_waiter('table_exists').wait(TableName='MAIN_TABLE')
yield
testfile:
mport json
import boto3
from moto import mock_dynamodb2
#mock_dynamodb2
def test_lambda_handler(get_assets_event, dynamodb, dynamodb_table):
from [...] import lambda_handler
dynamodb = boto3.client('dynamodb')
response = lambda_handler(get_assets_event, "")
data = json.loads(response["body"])
assert response["statusCode"] == 200
assert "message" in response["body"]
assert data["message"] == "hello world"
# assert "location" in data.dict_keys()
But the issue is my lambda is using a helper, which has a dynamodb helper under the hood and that dbhelper starts like this:
dynamodb = boto3.resource('dynamodb')
table = dynamodb.Table(os.environ.get('MAIN_TABLE'))
def read_item(key: Dict):
try:
return table.get_item(Key=key)
except ClientError as e:
logging.exception(e)
raise exceptions.DatabaseReadException(f"Error reading from db: {key}") from e
Is that even possible to mock like this?
I feel like when I import the lambda handler it tries to overwrite my mocked db, but can't because obviously there's no os environ variable with the table name.

Related

Adjust save location of Custom XCom Backend on a per task basis

I have posted a discussion question about this here as well https://github.com/apache/airflow/discussions/19868
Is it possible to specify arguments to a custom xcom backend? If I could force a task to return data (pyarrow table/dataset, pandas dataframe) which would save a file in the correct container with a "predictable file location" path, then that would be amazing. A lot of my custom operator code deals with creating the blob_path, saving the blob, and pushing a list of the blob_paths to xcom.
Since I work with many clients, I would prefer to have the data for Client A inside of the client-a container which uses a different SAS
When I save a file I consider that a "stage" of the data so I would prefer to keep it, so ideally I could provide a blob_path which matches the folder structure I generally use
class WasbXComBackend(BaseXCom):
def __init__(
self,
container: str = "airflow-xcom-backend",
path: str = guid(),
partition_columns: Optional[list[str]] = None,
existing_data_behavior: Optional[str] = None,
) -> None:
super().__init__()
self.container = container
self.path = path
self.partition_columns = partition_columns
self.existing_data_behavior = existing_data_behavior
#staticmethod
def serialize_value(self, value: Any):
if isinstance(value, pd.DataFrame):
hook = AzureBlobHook(wasb_conn_id="azure_blob")
with io.StringIO() as buf:
value.to_csv(path_or_buf=buf, index=False)
hook.load_string(
container_name=self.container,
blob_name=f"{self.path}.csv",
string_data=buf.getvalue(),
)
value = f"{self.container}/{self.path}.csv"
elif isinstance(value, pa.Table):
hook = AzureBlobHook(wasb_conn_id="azure_blob")
write_options = ds.ParquetFileFormat().make_write_options(
version="2.6", use_dictionary=True, compression="snappy"
)
written_files = []
ds.write_dataset(
data=value,
schema=value.schema,
base_dir=f"{self.container}/{self.path}",
format="parquet",
partitioning=self.partition_columns,
partitioning_flavor="hive",
existing_data_behavior=self.existing_data_behavior,
basename_template=f"{self.task_id}-{self.ts_nodash}-{{i}}.parquet",
filesystem=hook.create_filesystem(),
file_options=write_options,
file_visitor=lambda x: written_files.append(x.path),
use_threads=True,
max_partitions=2_000,
)
value = written_files
return BaseXCom.serialize_value(value)
#staticmethod
def deserialize_value(self, result) -> Any:
result = BaseXCom.deserialize_value(result)
if isinstance(result, str) and result.endswith(".csv"):
hook = AzureBlobHook(wasb_conn_id="azure_blob")
with io.BytesIO() as input_io:
hook.get_stream(
container_name=self.container,
blob_name=str(self.path),
input_stream=input_io,
)
input_io.seek(0)
return pd.read_csv(input_io)
elif isinstance(result, list) and ".parquet" in result:
hook = AzureBlobHook(wasb_conn_id="azure_blob")
return ds.dataset(
source=result, partitioning="hive", filesystem=hook.create_filesystem()
)
return result
It's not clear exactly what information you want to be able to retrieve to use as part of your "predictable file location". But there is a PR to pass basic things like dag_id, task_id etc on to serialize_value so that you can use them when naming your stored objects.
Until that is merged, you'll have to override BaseXCom.set.
You need to override BaseXCom.set
a working ,in production example
class MyXComBackend(BaseXCom):
#classmethod
#provide_session
def set(cls, key, value, execution_date, task_id, dag_id, session=None):
session.expunge_all()
# logic to use this custom_xcom_backend only with the necessary dag and task
if cls.is_task_to_custom_xcom(dag_id, task_id):
value = cls.custom_backend_saving_fn(value, dag_id, execution_date, task_id)
else:
value = BaseXCom.serialize_value(value)
# remove any duplicate XComs
session.query(cls).filter(
cls.key == key, cls.execution_date == execution_date, cls.task_id == task_id, cls.dag_id == dag_id
).delete()
session.commit()
# insert new XCom
from airflow.models.xcom import XCom # noqa
session.add(XCom(key=key, value=value, execution_date=execution_date, task_id=task_id, dag_id=dag_id))
session.commit()
#staticmethod
def is_task_to_custom_xcom(dag_id: str, task_id: str) -> bool:
return True # custom your logic here if necessary

" __conform__() is not a valid Streamlit command."

While I was running my code in pycharm, it is showing:
__conform__() is not a valid Streamlit command."
I am trying to store input and result in my streamlit app in sqlite3 database and and display it in tabular format in same streamlit app.
Main code page:home_task.py:
import streamlit as st
from homework_db import create_table, add_data
def main():
st.title("Streamlit Exercise")
menu = ['Insert', 'Read']
choice = st.sidebar.selectbox("Menu", menu)
create_table()
if choice == 'Insert':
st.subheader('Lets check Sentiment')
line = st.text_area("Enter the sentence")
result = st.text("Positive")
if st.button("Add Task"):
add_data(line, result)
st.success("Successfully added data in database")
elif choice == 'Read':
st.subheader('Datatable')
if __name__ == "__main__":
main()
The other file: homework_db.py:
import sqlite3
conn = sqlite3.connect("homework2_db", check_same_thread=False)
c = conn.cursor()
## database-table-field-datatype##
def create_table():
c.execute('CREATE TABLE IF NOT EXISTS database(sentence TEXT, sentiment TEXT)')
def add_data(line, result):
c.execute('INSERT INTO database(sentence,sentiment) VALUES(?,?)', (line, result))
conn.commit()
Everything seems fine, I have followed many youtube videos for sql query, it seems right but I am not sure why streamlit is not accepting the code.
I also checked python docs, (https://docs.python.org/3.6/library/sqlite3.html#letting-your-object-adapt-itself) but could not figure out how this is related to my problem.
It's because result is not a string but a Streamlit object:
>>> import streamlit
>>> result = st.text("Positive")
>>> type(result)
<class 'streamlit.delta_generator.DeltaGenerator'>
What you need is to do is remove the st.text():
if choice == 'Insert':
st.subheader('Lets check Sentiment')
line = st.text_area("Enter the sentence")
result = "Positive" # no st.text()
if st.button("Add Task"):
add_data(line, result)
st.success("Successfully added data in database")

I am using jupyter to edit, but after import pymongo , why can't list the data I stored in MongoDB?

I am just the beginner of python, so sorry about this basic problem
enter image description here
import pymongo
client = pymongo.MongoClient('localhost', 27017)
ceshi = client['ceshi']
item_info = ['item_info3']
for i in item_info:
if i.find('area'):
print(i)
I am using Jupiter to edit, but after import pymongo , I can't list the data I stored in MongoDB. What could be the reason?
from bs4 import BeautifulSoup
import requests
import time
import pymongo
client = pymongo.MongoClient('localhost',27017)
ceshi = client['ceshi']
url_list = ceshi['url_list3']
item_info = ceshi['item_info3']
def get_links_from(channel,pages,who_sells=0):
#http://bj.58.com/diannao/0/pn2/
list_view = '{}{}/pn{}/'.format(channel,str(who_sells),str(pages))
wb_date = requests.get(list_view)
time.sleep(1)
soup = BeautifulSoup(wb_date.text,'lxml')
links = soup.select('tr.zzinfo > td.t > a.t')
if soup.find('td','t'):
for link in links:
item_link = link.get('href').split('?')[0]
else:
pass
url_list.insert_one({'url':item_link})
print(url_list)
def get_item_info(url):
wb_data = requests.get(url)
soup = BeautifulSoup(wb_data.text,'lxml')
no_longer_exist = '404' in
soup.find('script',type="text/javascript").get('src').split('/')
if no_longer_exist:
pass
else:
title = soup.title.text
price = soup.select('span.price.c_f50')[0].text
date = soup.select('li.time')[0].text
area = list(soup.select('spam.c_25d a')[0].stripped_strings) if
soup.find_all('c_25d') else None
item_info.insert_one({'title':title,'price':price,'date':date,'area':area})
print(item_info)
#print({'title':title,'price':price,'date':date,'area':area})
get_item_info('http://bj.58.com/pingbandiannao/25347275157966x.shtml')
#get_links_from('http://bj.58.com/yunfuyongpin/',2)
It looks like you are looping through an array of strings and trying to call find on a string:
item_info = ['item_info3']
for i in item_info:
if i.find('area')
If item_info3 is the name of your mongo collection in the ceshi database then you should do something like:
item_info = ceshi['item_info3']
Even then, I don't believe your find query is correct. It should be something like this:
for i in item_info.find():
print(i)
More information on Mongo/python:
https://docs.mongodb.org/getting-started/python/query/

Python - BaseHTTPServer , issue with POST and GET

I am making a very simple application with 2 webpages at the moment under URLs: localhost:8080/restaurants/ and localhost:8080/restaurants/new.
I have a sqlite database which i manipulate with SQLAlchemy in my python code.
On my first page localhost:8080/restaurants/, this just contains the lists of restaurants available in my database.
My second page localhost:8080/restaurants/new, is where i have a form in order to a new restaurant such that it displays on localhost:8080/restaurants.
However Whenever i enter a new restaurant name on form at localhost:8080/restaurants/new, it fails to redirect me back to localhost:8080/restaurants/ in order to show me the new restaurant, instead it just remains on the same url link localhost:8080/restaurants/new with the message "No data received" .
Below is my code:
import cgi
from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer
#import libraries and modules
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from database_setup import Base, Restaurant, MenuItem
#create and connect to database
engine = create_engine('sqlite:///restaurantmenu.db')
Base.metadata.bind=engine
DBSession = sessionmaker(bind=engine)
session = DBSession()
class webServerHandler(BaseHTTPRequestHandler):
""" class defined in the main method"""
def do_GET(self):
try:
#look for url then ends with '/hello'
if self.path.endswith("/restaurants"):
self.send_response(200)
#indicate reply in form of html to the client
self.send_header('Content-type', 'text/html')
#indicates end of https headers in the response
self.end_headers()
#obtain all restaurant names from databse
restaurants = session.query(Restaurant).all()
output = ""
output += "<html><body><a href='/restaurants/new'>Add A New Restaurant</a>"
output += "</br></br>"
for restaurant in restaurants:
output += restaurant.name
output += """<div>
<a href='#'>Edit</a>
<a href='#'>Delete</a>
</div>"""
output += "</br></br>"
output += "</body></html>"
self.wfile.write(output)
print output
return
if self.path.endswith("/restaurants/new"):
self.send_response(200)
self.send_header('Content-type', 'text/html')
self.end_headers()
output = ""
output += "<html><body>"
output += "<h1>Add New Restaurant</h1>"
output += "<form method='POST' enctype='multipart/form-data action='/restaurants/new'>"
output += "<input name='newRestaurant' type='text' placeholder='New Restaurant Name'>"
output += "<input name='Create' type='submit' label='Create'>"
output += "</form></body></html>"
self.wfile.write(output)
return
except IOError:
self.send_error(404, "File %s not found" % self.path)
def do_POST(self):
try:
if self.path.endswith("/restaurants/new"):
ctype, pdict = cgi.parse_header(self.headers.getheader('content-type'))
#check of content-type is form
if ctype == 'mulitpart/form-data':
#collect all fields from form, fields is a dictionary
fields = cgi.parse_multipart(self.rfile, pdict)
#extract the name of the restaurant from the form
messagecontent = fields.get('newRestaurant')
#create the new object
newRestaurantName = Restaurant(name = messagecontent[0])
session.add(newRestaurantName)
session.commit()
self.send_response(301)
self.send_header('Content-type', 'text/html')
self.send_header('Location','/restaurants')
self.end_headers()
except:
pass
def main():
"""An instance of HTTPServer is created in the main method
HTTPServer is built off of a TCP server indicating the
transmission protocol
"""
try:
port = 8080
#server address is tuple & contains host and port number
#host is an empty string in this case
server = HTTPServer(('', port), webServerHandler)
print "Web server running on port %s" % port
#keep server continually listening until interrupt occurs
server.serve_forever()
except KeyboardInterrupt:
print "^C entered, stopping web server...."
#shut down server
server.socket.close()
#run main method
if __name__ == '__main__':
main()
for reference here is my database_setup file where i create the database:
import sys
#importing classes from sqlalchemy module
from sqlalchemy import Column, ForeignKey, Integer, String
#delcaritive_base , used in the configuration
# and class code, used when writing mapper
from sqlalchemy.ext.declarative import declarative_base
#relationship in order to create foreign key relationship
#used when writing the mapper
from sqlalchemy.orm import relationship
#create_engine to used in the configuration code at the
#end of the file
from sqlalchemy import create_engine
#this object will help set up when writing the class code
Base = declarative_base()
class Restaurant(Base):
"""
class Restaurant corresponds to restaurant table
in the database to be created.
table representation for restaurant which
is in the database
"""
__tablename__ = 'restaurant'
#column definitions for the restaurant table
id = Column(Integer, primary_key=True)
name = Column(String(250), nullable=False)
class MenuItem(Base):
"""
class MenuItem corresponds to restaurant table
table representation for menu_item which
is in the database
"""
__tablename__ = 'menu_item'
#column definitions for the restaurant table
name = Column(String(80), nullable=False)
id = Column(Integer, primary_key=True)
course = Column(String(250))
description = Column(String(250))
price = Column(String(8))
restaurant_id = Column(Integer, ForeignKey('restaurant.id'))
restaurant = relationship(Restaurant)
#create an instance of create_engine class
#and point to the database to be used
engine = create_engine(
'sqlite:///restaurantmenu.db')
#that will soon be added into the database. makes
#the engine
Base.metadata.create_all(engine)
I can't figure out why i cannot add new restuarants
I know this was a long time ago, but I figured out your problem.
First, the enctype='multipart/form-data' in your do_GET function under the if self.path.endswith("/restaurants/new"): portion is missing a final single quote. Second, you misspelt 'multipart' in if ctype == 'multipart/form-data':. Hope that can help you or others.
As shteeven said, the problem was with the encryption type in the form.
As the quote was missed, the 'Content-type' changed to 'application/x-www-form-urlencoded' so in that case you should parse it different as it's a string.
In order to manage both enctype you can modify your do_POST as the following
def do_POST(self):
try:
if self.path.endswith("/restaurants/new"):
ctype, pdict = cgi.parse_header(self.headers.getheader('content-type'))
print ctype
#check of content-type is form
if (ctype == 'multipart/form-data') or (ctype == 'application/x-www-form-urlencoded'):
#collect all fields from form, fields is a dictionary
if ctype == 'multipart/form-data':
fields = cgi.parse_multipart(self.rfile, pdict)
else:
content_length = self.headers.getheaders('Content-length')
length = int(content_length[0])
body = self.rfile.read(length)
fields = urlparse.parse_qs(body)
#extract the name of the restaurant from the form
messagecontent = fields.get('newRestaurant')
#create the new object
newRestaurantName = Restaurant(name = messagecontent[0])
session.add(newRestaurantName)
session.commit()
self.send_response(301)
self.send_header('Location','/restaurants')
self.end_headers()
return
Hope this extra information is useful for you!

how to add data from ui (made using pyqt) to sqlite db?

i have made a simple pyqt gui with QLineEdit and QPushButton. i like to add showurl(in con.py given below) to sqlite db. the code :
ui.py
from PyQt4 import QtCore, QtGui
try:
_fromUtf8 = QtCore.QString.fromUtf8
except AttributeError:
def _fromUtf8(s):
return s
try:
_encoding = QtGui.QApplication.UnicodeUTF8
def _translate(context, text, disambig):
return QtGui.QApplication.translate(context, text, disambig, _encoding)
except AttributeError:
def _translate(context, text, disambig):
return QtGui.QApplication.translate(context, text, disambig)
class Ui_Form(object):
def setupUi(self, Form):
Form.setObjectName(_fromUtf8("Form"))
Form.resize(400, 300)
self.verticalLayout_2 = QtGui.QVBoxLayout(Form)
self.verticalLayout_2.setObjectName(_fromUtf8("verticalLayout_2"))
self.verticalLayout = QtGui.QVBoxLayout()
self.verticalLayout.setObjectName(_fromUtf8("verticalLayout"))
self.inserturl = QtGui.QLineEdit(Form)
self.inserturl.setObjectName(_fromUtf8("inserturl"))
self.verticalLayout.addWidget(self.inserturl)
spacerItem = QtGui.QSpacerItem(20, 40, QtGui.QSizePolicy.Minimum, QtGui.QSizePolicy.Expanding)
self.verticalLayout.addItem(spacerItem)
self.insertdb_btn = QtGui.QPushButton(Form)
self.insertdb_btn.setObjectName(_fromUtf8("insertdb_btn"))
self.verticalLayout.addWidget(self.insertdb_btn)
self.verticalLayout_2.addLayout(self.verticalLayout)
self.retranslateUi(Form)
QtCore.QMetaObject.connectSlotsByName(Form)
def retranslateUi(self, Form):
Form.setWindowTitle(_translate("Form", "Form", None))
self.insertdb_btn.setText(_translate("Form", "ok", None))
con.py
import sys
from PyQt4 import QtCore, QtGui, QtSql
from insertdb2_ui import Ui_Form
def createConnection():
db = QtSql.QSqlDatabase.addDatabase('QSQLITE')
db.setDatabaseName('webscrap.db')
if db.open():
return True
else:
print db.lastError().text()
return False
class Test(QtGui.QWidget, Ui_Form):
def __init__(self):
super(Test, self).__init__()
self.setupUi(self)
self.insertdb_btn.clicked.connect(self.onClick)
def onClick(self):
showurl = self.inserturl.text()
print showurl
if __name__ == "__main__":
app = QtGui.QApplication(sys.argv)
if not createConnection():
sys.exit(1)
window = Test()
window.show()
sys.exit(app.exec_())
i made the input data ie. showurl print on terminal. i want to add the showurl to sqlitedb webscrap.db which has fields id and url.
I am also confused about using model view(QSqlTableModel, QDataWidgetMapper).
You can use the QtSql.QSqlQuery() class to create query objects that interact with your database. To execute a query (in this case, an INSERT query), perform the following steps:
Instantiate a QSqlQuery() object.
Call the prepare() method on that object, passing to the prepare() method the SQL text that you want to execute.
Use the addBindValue() method to pass in variables to your query -- in this case, you want to pass in showurl.
Finally, call exec_() on the query object to execute
the query. If there is an error, you can retrieve it by calling on
the query object lastError().text().
So, in code, you would need to change your onClick() function to:
def onClick(self):
showurl = self.inserturl.text()
query_object = QtSql.QSqlQuery()
query_object.prepare("INSERT INTO table (url) VALUES (?)")
query_object.addBindValue(showurl)
if not query_object.exec_():
print query_object.lastError().text()
Please note that I did not specify a table name to insert into because you provided field names (id and url) but not a table name; therefore the query string uses a fake table name (table). You should replace this with a real table name. If you do not have a table in your database, you need to create one. Please reference https://www.sqlite.org/lang.html for documentation on SQLite commands (e.g. for creating tables). Please also refer to the PyQt4 documentation on QtSql.QSqlQuery(): http://pyqt.sourceforge.net/Docs/PyQt4/qsqlquery.html
For QSqlTableModel and QDataWidgetMapper, it is difficult to help without having a specific question to answer about them, but I highly recommend that you check out this excellent guide to both of those classes: http://doc.qt.digia.com/qq/qq21-datawidgetmapper.html. This guide does not use PyQt4 (instead it uses Qt alone, so the code is in C++), but the code is sparse and the concepts should resonate even if you are not familiar with C++.

Resources