Mocking SQLAlchemy test within Strawberry/FastAPI - fastapi

I'm working on creating unit tests for a FastAPI, Strawberry, and SQLAlchemy setup. The current API is working and returning data correctly, but I cannot figure out how to mock the underlying database for unit tests. Would love any help/guidance to figure out this issue.
Below is the test code I"m currently working with, which I'm hoping will be enough to solve this issues but happy to post more if it helps. Running this currently will produce and output of ExecutionResult(data=None, errors=[GraphQLError("'NoneType' object is not subscriptable", locations=[SourceLocation(line=3, column=13)], path=['biomarkers'])], extensions={}), which seems to indicate that it is almost working but not quite reaching the mocked data within UnifiedAlchemyMagicMock.
import uuid
import unittest
from unittest import mock
import strawberry
from strawberry.extensions import Extension
from alchemy_mock.mocking import UnifiedAlchemyMagicMock
from app.api.api_v1 import api
from app.models import biomarker as biomarker_models
class MockSession:
'''Create Mock Session for Db'''
session = UnifiedAlchemyMagicMock(data=[
(
[mock.call.query(biomarker_models.Biomarker)],
[biomarker_models.Biomarker(
name="hello",
id=uuid.UUID('1a8d8791-946c-4fc4-8f5d-1b0c4f5ee2f5'),
quest_biomarker_code="quest"),
biomarker_models.Biomarker(
name="test",
id=uuid.uuid4(),
quest_biomarker_code="palazo")]
)
])
class MockRequest(Extension):
'''Mock Request state for context'''
def on_request_start(self):
self.execution_context.context["db"] = MockSession()
def on_request_end(self):
self.execution_context.context["db"].close()
class BioMarkerTestCase(unittest.TestCase):
'''Test Biomarker'''
def setUp(self) -> None:
self.strawberry_schema = strawberry.Schema(
query=api.Query,
mutation=api.Mutation,
extensions=[MockRequest],
types=api.QUERY_TYPE_LIST)
def test_query_get_all(self) -> None:
'''test biomarker query'''
query = """
query {
biomarkers {
id
name
whyItMatters
questBiomarkerCode
modeOfAcquisition
questRefRangeLow
questRefRangeHigh
optimalRangeLow
optimalRangeHigh
withinRangeRecommendations
belowRangeRecommendations
aboveRangeRecommendations
crossReferenceBiomarkers
notes
resourcesCited
measurementUnits
isCritical
resultDataType
critical{
id
biomarkerId
isPriority1
priority1Range
isPriority2
priority2Range
}
}
}
"""
query_result = query_result = self.strawberry_schema.execute_sync(query)
self.assertIsNotNone(query_result.data)

When using
query_result = query_result = self.strawberry_schema.execute_sync(query)
the context_value is defaulted to None, which I think is the cause of your errors.
try with:
query_result = query_result = self.strawberry_schema.execute_sync(query, context_value={})

Related

How to write unittest for #task decorated Airflow tasks?

I am trying to write unittests for some of the tasks built with Airflow TaskFlow API. I tried multiple approaches for example, by creating a dagrun or only running the task function but nothing is helping.
Here is a task where I download a file from S3, there is more stuff going on but I removed that for this example.
#task()
def updates_process(files):
context = get_current_context()
try:
updates_file_path = utils.download_file_from_s3_bucket(files.get("updates_file"))
except FileNotFoundError as e:
log.error(e)
return
# Do something else
Now I was trying to write a test case where I can check this except clause. Following is one the example I started with
class TestAccountLinkUpdatesProcess(TestCase):
#mock.patch("dags.delta_load.updates.log")
#mock.patch("dags.delta_load.updates.get_current_context")
#mock.patch("dags.delta_load.updates.utils.download_file_from_s3_bucket")
def test_file_not_found_error(self, download_file_from_s3_bucket, get_current_context, log):
download_file_from_s3_bucket.side_effect = FileNotFoundError
task = account_link_updates_process({"updates_file": "path/to/file.csv"})
get_current_context.assert_called_once()
log.error.assert_called_once()
I also tried by creating a dagrun as shown in the example here in docs and fetching the task from the dagrun but that also didin't help.
I was struggling to do this myself, but I found that the decorated tasks have a .function parameter: https://github.dev/apache/airflow/blob/be7cb1e837b875f44fcf7903329755245dd02dc3/airflow/decorators/base.py#L522
You can then use .funciton to call the actual function. Using your example:
class TestAccountLinkUpdatesProcess(TestCase):
#mock.patch("dags.delta_load.updates.log")
#mock.patch("dags.delta_load.updates.get_current_context")
#mock.patch("dags.delta_load.updates.utils.download_file_from_s3_bucket")
def test_file_not_found_error(self, download_file_from_s3_bucket, get_current_context, log):
download_file_from_s3_bucket.side_effect = FileNotFoundError
task = dags.delta_load.updates.updates_process
# Call the function for testing
task.function({"updates_file": "path/to/file.csv"})
get_current_context.assert_called_once()
log.error.assert_called_once()
This prevents you from having to set up any of the DAG infrastructure and just run the python function as intended!
This is what I could figure out. Not sure if this is the right thing but it works.
class TestAccountLinkUpdatesProcess(TestCase):
TASK_ID = "updates_process"
#classmethod
def setUpClass(cls) -> None:
cls.dag = dag_delta_load()
#mock.patch("dags.delta_load.updates.log")
#mock.patch("dags.delta_load.updates.get_current_context")
#mock.patch("dags.delta_load.updates.utils.download_file_from_s3_bucket")
def test_file_not_found_error(self, download_file_from_s3_bucket, get_current_context, log):
download_file_from_s3_bucket.side_effect = FileNotFoundError
task = self.dag.get_task(task_id=self.TASK_ID)
task.op_args = [{"updates_file": "file.csv"}]
task.execute(context={})
log.error.assert_called_once()
UPDATE: Based on the answer of #AetherUnbound I did some investigation and found that we can use task.__wrapped__() to call the actual python function.
class TestAccountLinkUpdatesProcess(TestCase):
#mock.patch("dags.delta_load.updates.log")
#mock.patch("dags.delta_load.updates.get_current_context")
#mock.patch("dags.delta_load.updates.utils.download_file_from_s3_bucket")
def test_file_not_found_error(self, download_file_from_s3_bucket, get_current_context, log):
download_file_from_s3_bucket.side_effect = FileNotFoundError
update_process.__wrapped__({"updates_file": "file.csv"})
log.error.assert_called_once()

Pact: Error when trying to setup mock provider

I'm trying to write my first Pact-python test using pytest, Could someone please tell me what's wrong with my code?
import unittest
import requests
import json
import pytest
import atexit
from pact import Consumer, Provider
pact = Consumer('Consumer').has_pact_with(Provider('Provider'), host_name='mockservice', port=8080)
pact.start_service()
atexit.register(pact.stop_service)
class InterviewDetails(unittest.TestCase):
def test_candidate_report_api(self):
candidate_report_payload = {}
resp = requests.post("http://localhost:1234/users/",data=json.dumps(candidate_report_payload))
response = json.loads(resp.text)
return response
#pytest.mark.health1
def test_candidate_report(self):
expected = {}
(pact.given('Comment')
.upon_receiving('comment')
.with_request(method='POST', path="http://localhost:1234/users/", headers={})
.will_respond_with(200, body=expected))
with pact:
pact.setup()
result = self.test_candidate_report_api()
self.assertEqual(result, expected)
pact.verify()
The error from stacktrace:
AttributeError: module 'pact' has no attribute 'Like'
Can you please confirm you're using pact-python from https://github.com/pact-foundation/pact-python/ (and not pactman, a project that is not maintained by the Pact Foundation)?
It might be related to the way you have setup your test?
Here is an example project you can use for reference: https://github.com/pactflow/example-consumer-python/
Relevant test code:
"""pact test for product service client"""
import json
import logging
import os
import requests
from requests.auth import HTTPBasicAuth
import pytest
from pact import Consumer, Like, Provider, Term, Format
from src.consumer import ProductConsumer
log = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
print(Format().__dict__)
PACT_MOCK_HOST = 'localhost'
PACT_MOCK_PORT = 1234
PACT_DIR = os.path.dirname(os.path.realpath(__file__))
#pytest.fixture
def consumer():
return ProductConsumer(
'http://{host}:{port}'
.format(host=PACT_MOCK_HOST, port=PACT_MOCK_PORT)
)
#pytest.fixture(scope='session')
def pact(request):
pact = Consumer('pactflow-example-consumer-python').has_pact_with(
Provider('pactflow-example-provider-python'), host_name=PACT_MOCK_HOST, port=PACT_MOCK_PORT,
pact_dir="./pacts", log_dir="./logs")
try:
print('start service')
pact.start_service()
yield pact
finally:
print('stop service')
pact.stop_service()
def test_get_product(pact, consumer):
expected = {
'id': "27",
'name': 'Margharita',
'type': 'Pizza'
}
(pact
.given('a product with ID 10 exists')
.upon_receiving('a request to get a product')
.with_request('GET', '/product/10')
.will_respond_with(200, body=Like(expected)))
with pact:
user = consumer.get_product('10')
assert user.name == 'Margharita'

How to change xcom in Airflow to accomodate large data?

I am using the following code in my Airflow operator:
import json
import pandas as pd
from airflow.exceptions import AirflowException
from airflow.hooks.http_hook import HttpHook
from airflow.models import BaseOperator
from airflow.utils.decorators import apply_defaults
from airflow.contrib.hooks.gcs_hook import GoogleCloudStorageHook
class HttpToGoogleCloudStorageOperator(BaseOperator):
template_fields = ['endpoint', 'data', 'headers', ]
template_ext = ()
ui_color = '#f4a460'
#apply_defaults
def __init__(self,
endpoint,
project_id,
table_id,
data=None,
headers=None,
auth=None,
http_conn_id='http_default',
*args, **kwargs):
super(HttpToGoogleCloudStorageOperator, self).__init__(*args, **kwargs)
self.table_id = table_id
self.http_conn_id = http_conn_id
self.method = "GET"
self.endpoint = endpoint
self.headers = headers or {}
self.auth = auth
self.data = data or {}
def execute(self, context):
http = HttpHook(self.method, http_conn_id=self.http_conn_id)
self.log.info("Calling HTTP method " + self.endpoint)
response = http.run(self.endpoint, self.data, self.headers,auth=self.auth)
self.log.info("Got response")
Unfortunately the data returned is too large (about 5k) to fit in the standard xcom and I get this error:
{taskinstance.py:1059} ERROR - (_mysql_exceptions.DataError) (1406, "Data too long for column 'value' at row 1")
Is there a way I can tell http_hook to use a different xcom, or (even better) not use xcom at all? I have looked around and I do not see a solution.
Thanks for any tips or pointers.
Edit: Here is how I call the operator. Note that nowhere do I specify xcom.
query_load_task = HttpToGoogleCloudStorageOperator(
task_id="query_load_task",
endpoint=endpoint,
project_id="my_gcp_poroject_id",
table_id="dataset.table",
data=None,
auth=(username, password))
It's preferable to store data to a system designed for such (e.g.: the file system, AWS S3, Azure, etc.) and instead return a unique identifier to reference the location of the data, for the file system this would likely be the full path (e.g.: /tmp/acme_response_20200709.csv) that way you leverage the best of both the storage system and your database.
If you add your code I'd be happy to take a crack at writing up some psuedo-code as an example.

Running all python unittests from different modules as a suite having parameterized class constructor

I am working on a design pattern to make my python unittest as a POM, so far I have written my page classes in modules HomePageObject.py,FilterPageObject.py, my base class (for common stuff)TestBase in BaseTest.py, my testcase modules are TestCase1.py and TestCase2.py and one runner module runner.py.
In runner class i am using loader.getTestCaseNames to get all the tests from a testcase class of a module. In both the testcase modules the name of the test class is same 'Test' and also the method name is same 'testName'
Since the names are confilicting while importing it in runner, only one test is getting executed. I want python to scan all the modules that i specify for tests in them and run those even the name of classes are same.
I got to know that nose might be helpful in this, but not sure how i can implement it here. Any advice ?
BaseTest.py
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver import ChromeOptions
import unittest
class TestBase(unittest.TestCase):
driver = None
def __init__(self,testName,browser):
self.browser = browser
super(TestBase,self).__init__(testName)
def setUp(self):
if self.browser == "firefox":
TestBase.driver = webdriver.Firefox()
elif self.browser == "chrome":
options = ChromeOptions()
options.add_argument("--start-maximized")
TestBase.driver = webdriver.Chrome(chrome_options=options)
self.url = "https://www.airbnb.co.in/"
self.driver = TestBase.getdriver()
TestBase.driver.implicitly_wait(10)
def tearDown(self):
self.driver.quit()
#staticmethod
def getdriver():
return TestBase.driver
#staticmethod
def waitForElementVisibility(locator, expression, message):
try:
WebDriverWait(TestBase.driver, 20).\
until(EC.presence_of_element_located((locator, expression)),
message)
return True
except:
return False
TestCase1.py and TestCase2.py (same)
from airbnb.HomePageObject import HomePage
from airbnb.BaseTest import TestBase
class Test(TestBase):
def __init__(self,testName,browser):
super(Test,self).__init__(testName,browser)
def testName(self):
try:
self.driver.get(self.url)
h_page = HomePage()
f_page = h_page.seachPlace("Sicily,Italy")
f_page.selectExperience()
finally:
self.driver.quit()
runner.py
import unittest
from airbnb.TestCase1 import Test
from airbnb.TestCase2 import Test
loader = unittest.TestLoader()
test_names = loader.getTestCaseNames(Test)
suite = unittest.TestSuite()
for test in test_names:
suite.addTest(Test(test,"chrome"))
runner = unittest.TextTestRunner()
result = runner.run(suite)
Also even that one test case is getting passed, some error message is coming
Ran 1 test in 9.734s
OK
Traceback (most recent call last):
File "F:\eclipse-jee-neon-3-win32\eclipse\plugins\org.python.pydev.core_6.3.3.201805051638\pysrc\runfiles.py", line 275, in <module>
main()
File "F:\eclipse-jee-neon-3-win32\eclipse\plugins\org.python.pydev.core_6.3.3.201805051638\pysrc\runfiles.py", line 97, in main
return pydev_runfiles.main(configuration) # Note: still doesn't return a proper value.
File "F:\eclipse-jee-neon-3-win32\eclipse\plugins\org.python.pydev.core_6.3.3.201805051638\pysrc\_pydev_runfiles\pydev_runfiles.py", line 874, in main
PydevTestRunner(configuration).run_tests()
File "F:\eclipse-jee-neon-3-win32\eclipse\plugins\org.python.pydev.core_6.3.3.201805051638\pysrc\_pydev_runfiles\pydev_runfiles.py", line 773, in run_tests
all_tests = self.find_tests_from_modules(file_and_modules_and_module_name)
File "F:\eclipse-jee-neon-3-win32\eclipse\plugins\org.python.pydev.core_6.3.3.201805051638\pysrc\_pydev_runfiles\pydev_runfiles.py", line 629, in find_tests_from_modules
suite = loader.loadTestsFromModule(m)
File "C:\Python27\lib\unittest\loader.py", line 65, in loadTestsFromModule
tests.append(self.loadTestsFromTestCase(obj))
File "C:\Python27\lib\unittest\loader.py", line 56, in loadTestsFromTestCase
loaded_suite = self.suiteClass(map(testCaseClass, testCaseNames))
TypeError: __init__() takes exactly 3 arguments (2 given)
I did this by searching for all the modules of test classes with a pattern and then used __import__(modulename) and called its Test class with desired parameters,
Here is my runner.py
import unittest
import glob
loader = unittest.TestLoader()
suite = unittest.TestSuite()
test_file_strings = glob.glob('Test*.py')
module_strings = [str[0:len(str)-3] for str in test_file_strings]
for module in module_strings:
mod = __import__(module)
test_names =loader.getTestCaseNames(mod.Test)
for test in test_names:
suite.addTest(mod.Test(test,"chrome"))
runner = unittest.TextTestRunner()
result = runner.run(suite)
This worked but still looking for some organized solutions.
(Not sure why second time its showing Ran 0 tests in 0.000s )
Finding files... done.
Importing test modules ... ..done.
----------------------------------------------------------------------
Ran 2 tests in 37.491s
OK
----------------------------------------------------------------------
Ran 0 tests in 0.000s
OK

Gremlin Python project By clause

Have a graph running on Datastax Enterprise Graph (5.1 Version), running on Cassandra storage.
Trying to run a query to get both the ID and the property.
In Gremlin Console I can do this:
gremlin> g.V(1).project("v", "properties").by().by(valueMap())
==>[v:v[1],properties:[name:[marko],age:[29]]]
How can I translate the valueMap call still using Python GraphTraversal API. I know I can run a direct query via Session Execution, like this.
session.execute_graph("g.V().has(\"Node_Name\",\"A\").project(\"v\", \"properties\").by().by(valueMap())",{"name":graph_name})
Below is my setup code.
from dse.cluster import Cluster, EXEC_PROFILE_GRAPH_DEFAULT
from dse_graph import DseGraph
from dse.cluster import GraphExecutionProfile, EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT
from dse.graph import GraphOptions
from gremlin_python.process.traversal import T
from gremlin_python.process.traversal import Order
from gremlin_python.process.traversal import Cardinality
from gremlin_python.process.traversal import Column
from gremlin_python.process.traversal import Direction
from gremlin_python.process.traversal import Operator
from gremlin_python.process.traversal import P
from gremlin_python.process.traversal import Pop
from gremlin_python.process.traversal import Scope
from gremlin_python.process.traversal import Barrier
graph_name = "TEST"
graph_ip = ["127.0.0.1"]
graph_port = 9042
schema = """
schema.edgeLabel("Group").create();
schema.propertyKey("Version").Text().create();
schema.edgeLabel("Group").properties("Version").add()
schema.vertexLabel("Example").create();
schema.edgeLabel("Group").connection("Example", "Example").add()
schema.propertyKey("Node_Name").Text().create();
schema.vertexLabel("Example").properties("Node_Name").add()
schema.vertexLabel("Example").index("exampleByName").secondary().by("Node_Name").add();
"""
profile = GraphExecutionProfile(
graph_options=GraphOptions(graph_name=graph_name))
client = Cluster(
contact_points=graph_ip, port=graph_port,
execution_profiles={EXEC_PROFILE_GRAPH_DEFAULT: profile}
)
graph_name = graph_name
session = client.connect()
graph = DseGraph.traversal_source(session)
# force the schema to be clean
session.execute_graph(
"system.graph(name).ifExists().drop();",
{'name': graph_name},
execution_profile=EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT
)
session.execute_graph(
"system.graph(name).ifNotExists().create();",
{'name': graph_name},
execution_profile=EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT
)
session.execute_graph(schema)
session.shutdown()
session = client.connect()
graph = DseGraph.traversal_source(session)
Update:
I guess i have not made the problem clear. It is in python and not in gremlin console. So running code like graph.V().has("Node_Name","A").project("v","properties").by().by(valueMap()).toList()
will give following result. How to execute the gremlin query while still remain in the GLV level, not drop down to text serialized query to Gremlin-Server?
Traceback (most recent call last):
File "graph_test.py", line 79, in <module>
graph.V().has("Node_Name","A").project("v", "properties").by().by(valueMap()).toList()
NameError: name 'valueMap' is not defined
I may not fully understand your question but it seems like you largely have the answer most of the way there. This last line of code:
graph = DseGraph.traversal_source(session)
should probably be written as:
g = DseGraph.traversal_source(session)
The return value of traversal_source(session) is a TraversalSource and not a Graph instance and by convention TinkerPop tends to refer to such a variable as g. Once you have a TraversalSource, then you can just write your Gremlin.
g = DseGraph.traversal_source(session)
g.V().has("Node_Name","A").project("v", "properties").by().by(valueMap()).toList()

Resources