Substitute for having clause in CosmosDB - azure-cosmosdb

I have the following result from a query, where count field is derived from an aggregate function
[
{
"count": 1,
"facilityName": "Hyundai Service Center"
},
{
"count": 2,
"facilityName": "Honda Service Center"
},
{
"count": 1,
"facilityName": "Kat Service Center"
}
]
I want to display only those facilityName where count >= 2.
How can we achieve this?

I tried to implement your requirement with Stored procedure,please refer to my SP code:
function sample(idArray) {
var collection = getContext().getCollection();
var length = idArray.length;
var sqlQuery = {
"query": 'SELECT count(c.id) as cnt, f.facilityName from c join f in c.facilities '+
'where array_contains( #idArray,c.id,true) ' +
'AND c.entityType = "ServiceInformationFacility" group by f.facilityName',
"parameters": [
{"name": "#idArray", "value": idArray}
]
}
// Query documents and take 1st item.
var isAccepted = collection.queryDocuments(
collection.getSelfLink(),
sqlQuery,
function (err, feed, options) {
if (err) throw err;
if (!feed || !feed.length) {
var response = getContext().getResponse();
response.setBody('no docs found');
}
else {
var response = getContext().getResponse();
var returenArray = [];
for(var i=0;i<feed.length;i++){
if(feed[i].cnt==length)
returenArray.push(feed[i])
}
response.setBody(returenArray);
}
});
if (!isAccepted) throw new Error('The query was not accepted by the server.');
}
Input param:
["6ECF4568-CB0E-4E11-A5CD-1206638F9C39","2ECF4568-CB0E-4E11-A5CD-1206638F9C39"]
Get output:
UPDATES:
So,if your collection is partitioned,maybe stored procedure is not suitable for you because partition key is necessary for execution of SP.Please refer to my detailed explanations in this thread:Delete Documents from Cosmos using Query without Partition Key Specification
Actually, there is no complex logic in my above sp code.It just loop the result of the sql and try to find which object.count equals the idArray.length which means the object.facilityName exists cross all the documents.
So,you don't have to use SP, you can use any tiny piece of code to handle the logic I describe above.

Related

How to create AppSync DynamoDB resolver for object with relationships

I have a single DynamoDB table that has Games and Players. I currently have the following Lambda resolver that works for my AppSync getGame query. The question is, is it possible to write a DynamoDB resolver using the velocity templates that does the same so I can avoid the lambda invocation.
const AWS = require('aws-sdk');
const docClient = new AWS.DynamoDB.DocumentClient();
exports.handler = async (event) => {
let result = null;
let params;
switch(event.field) {
case "getGame":
const id = event.arguments.id;
if (!id) {
throw new Error('Missing game id');
};
params = {
TableName: 'games',
KeyConditionExpression: 'pk = :pk AND sk = :sk',
ExpressionAttributeValues: {
':pk': 'game',
':sk': `meta_${id}`
}
};
const game = (await docClient.query(params).promise()).Items[0];
// get players
const gameKey = `game_${game.sk.split('_')[1]}_${game.sk.split('_')[2]}`;
params = {
TableName: 'games',
KeyConditionExpression: 'pk = :pk AND begins_with(sk, :sk)',
ExpressionAttributeValues: {
':pk': gameKey,
':sk': 'player_'
}
};
game.players = (await docClient.query(params).promise()).Items;
result = game;
break;
}
return result;
};
And the result looks like
{
"gsipk": "NEW_OPEN",
"sk": "meta_1578241126110_35660fcc-3cde-4d30-9ebd-09abba1aedf7",
"gsisk": "level_1_players_4",
"pk": "game",
"players": [
{
"gsipk": "player_3a7bb19c-0ccd-42df-a606-acd8b1f5e288",
"gsisk": "game_1578241126110_35660fcc-3cde-4d30-9ebd-09abba1aedf7",
"points": 0,
"num": 4,
"place": null,
"sk": "player_3a7bb19c-0ccd-42df-a606-acd8b1f5e288",
"pieces": [],
"wilds": 0,
"pk": "game_1578241126110_35660fcc-3cde-4d30-9ebd-09abba1aedf7",
"color": "gold",
"pows": 0
},
{
"gsipk": "player_96b772b1-4127-43da-b550-029d5c632675",
"gsisk": "game_1578241126110_35660fcc-3cde-4d30-9ebd-09abba1aedf7",
"points": 0,
"num": 2,
"place": null,
"sk": "player_96b772b1-4127-43da-b550-029d5c632675",
"pieces": [],
"wilds": 0,
"pk": "game_1578241126110_35660fcc-3cde-4d30-9ebd-09abba1aedf7",
"color": "blue",
"pows": 0
},
{
"gsipk": "player_9d30c675-930f-401b-ac5f-8db32bb2acb8",
"gsisk": "game_1578241126110_35660fcc-3cde-4d30-9ebd-09abba1aedf7",
"points": 0,
"num": 3,
"place": null,
"sk": "player_9d30c675-930f-401b-ac5f-8db32bb2acb8",
"pieces": [],
"wilds": 0,
"pk": "game_1578241126110_35660fcc-3cde-4d30-9ebd-09abba1aedf7",
"color": "green",
"pows": 0
},
{
"gsipk": "player_ab179ad1-a160-44f8-b438-0e93385b6c47",
"gsisk": "game_1578241126110_35660fcc-3cde-4d30-9ebd-09abba1aedf7",
"points": 0,
"num": 1,
"place": null,
"sk": "player_ab179ad1-a160-44f8-b438-0e93385b6c47",
"pieces": [],
"wilds": 0,
"pk": "game_1578241126110_35660fcc-3cde-4d30-9ebd-09abba1aedf7",
"color": "red",
"pows": 0
}
]
}
Okay, thanks to #cyberwombat's comment:
Unless you remap your data to be able to fetch all items in one request
I was able to figure this out. First, I had to refactor my table a bit. I changed the primary key (hash) to be game_<uuid> and then referenced the game details with a sort key (range) like meta_<timestamp> and players with a sort key like player_<uuid>.
Once I did this, I was able to use this resolver to Query and return the game details and all players with this request mapping template:
{
"version": "2017-02-28",
"operation": "Query",
"query" : {
"expression" : "pk = :pk",
"expressionValues" : {
":pk": { "S": "$ctx.arguments.pk" }
}
}
}
The above query returns 5 items (the 4 players and the game metadata). I then used a response mapping template like this:
#set($game = {})
#set($players = [])
#foreach($item in $ctx.result.items)
#if($util.matches('meta_\d*', $item.sk))
## main game object
#set($game = $item)
#else
## player
$util.qr($players.add($item))
#end
#end
$util.qr($game.put("players", $players))
$util.toJson($game)
Now I have a SINGLE query to DynamoDB and no lambda resolvers... beautiful.
Unless you remap your data to be able to fetch all items in one request you will need Pipeline resolvers.. In summary a pipeline is a number of resolvers in line wrapped in a before/after template.
In your case these before/after templates are not really used so the basic setup would be:
Before template (nothing is needed so an empty json is fine)
{}
After template (to pass the result from previous calls on)
$util.toJson($ctx.result)
Then you will have 2 DynamoDB resolvers. These are the same as other DynamoDB resolvers you may have previously done except that in the seecond one, in order to access the first DynamoDB resolvers result you will use $ctx.prev.result. So let's say you passed the game ID in the response of the first call as:
{
"game_id": "$ctx.result.get('theGameId')",
...
}
Then this can be accessed in second request template as $ctx.prev.result.game_id. You can also use the stash instead - $ctx.stash.put() and $ctx.prev.get(). The stash is useful if you need to do something in the BEFORE request (the very first one which we have blank for now) and pass that throughout the resolvers.

CosmosDb Sql query that matches common values in array between documents

I am working with Cosmos DB and I want to write a SQL query that will match common value in array of documents based on id.
To elaborate, imagine you have the following three documents:
{
"id": "2ECF4568-CB0E-4E11-A5CD-1206638F9C39",
"entityType": "ServiceInformationFacility",
"facilities": [
{
"id": "6F706BA3-27AD-45B8-9831-A531E37C4C17",
"facilityName": "Kat Service Center",
"phoneNumber": "9879561234"
},
{
"id": "7F706BA3-27AD-45B8-9831-A531E37C4C17",
"facilityName": "Honda Service Center",
"phoneNumber": "9879561234"
}]
},
{
"id": "3ECF4568-CB0E-4E11-A5CD-1206638F9C39",
"entityType": "ServiceInformationFacility",
"facilities": [
{
"id": "8F706BA3-27AD-45B8-9831-A531E37C4C17",
"facilityName": "Hyundai Service Center",
"phoneNumber": "9879561234"
},
{
"id": "7F706BA3-27AD-45B8-9831-A531E37C4C17",
"facilityName": "Honda Service Center",
"phoneNumber": "9879561234"
}]
},
{
"id": "6ECF4568-CB0E-4E11-A5CD-1206638F9C39",
"entityType": "ServiceInformationFacility",
"facilities": [
{
"id": "8F706BA3-27AD-45B8-9831-A531E37C4C17",
"facilityName": "Hyundai Service Center",
"phoneNumber": "9879561234"
},
{
"id": "7F706BA3-27AD-45B8-9831-A531E37C4C17",
"facilityName": "Honda Service Center",
"phoneNumber": "9879561234"
} ]
}
I want to write a query that return all the common facility based on id.That means when passing the list of Ids, the facility exists in the given Ids should be display(not either or).
so in the above collection it should only return "facilityName": "Honda Service Center" by passing parameter id("2ECF4568-CB0E-4E11-A5CD-1206638F9C39","3ECF4568-CB0E-4E11-A5CD-1206638F9C39","6ECF4568-CB0E-4E11-A5CD-1206638F9C39").
So far I have tried:
SELECT q.facilityName FROM c
join q in c.facilities
where c.id in('6ECF4568-CB0E-4E11-A5CD-1206638F9C39','2ECF4568-CB0E-4E11-A5CD-1206638F9C39')AND c.entityType = 'ServiceInformationFacility'
It gives me all the facility name but I need only facility which are common in the above documents that is "facilityName": "Honda Service Center".
Thanks in advance
It gives me all the facility name but I need only facility which are
common in the above documents that is "facilityName": "Honda Service
Center".
I may get your point now.However,i'm afraid that's impossible in cosmos sql. I try to count number of appearance of facilitiesName cross the documents and get below solution which is closest with your need.
sql:
SELECT count(c.id) as cnt, f.facilityName from c
join f in c.facilities
where array_contains(['6ECF4568-CB0E-4E11-A5CD-1206638F9C39','2ECF4568-CB0E-4E11-A5CD-1206638F9C39'],c.id,true)
AND c.entityType = 'ServiceInformationFacility'
group by f.facilityName
output:
Then i tried to extend it with some subquery but no luck. So i'd suggest using stored procedure to finish the next job.The main purpose is looping above result and judge if the cnt equals the [ids array].length.
Update Answer for Stored procedure code:
input param for #idArray:["6ECF4568-CB0E-4E11-A5CD-1206638F9C39","2ECF4568-CB0E-4E11-A5CD-1206638F9C39"]
Sp code:
function sample(idArray) {
var collection = getContext().getCollection();
var length = idArray.length;
var sqlQuery = {
"query": 'SELECT count(c.id) as cnt, f.facilityName from c join f in c.facilities '+
'where array_contains( #idArray,c.id,true) ' +
'AND c.entityType = "ServiceInformationFacility" group by f.facilityName',
"parameters": [
{"name": "#idArray", "value": idArray}
]
}
// Query documents and take 1st item.
var isAccepted = collection.queryDocuments(
collection.getSelfLink(),
sqlQuery,
function (err, feed, options) {
if (err) throw err;
if (!feed || !feed.length) {
var response = getContext().getResponse();
response.setBody('no docs found');
}
else {
var response = getContext().getResponse();
var returenArray = [];
for(var i=0;i<feed.length;i++){
if(feed[i].cnt==length)
returenArray.push(feed[i])
}
response.setBody(returenArray);
}
});
if (!isAccepted) throw new Error('The query was not accepted by the server.');
}
Output:

Getting grouped (distinct?!?) array of values from Cosmos SQL

Given a collection of Cosmos documents similar to the following, I'd like to generate a grouped (distinct?!?) list of "categories" using Cosmos SQL. Any help in this regard would be greatly appreciated.
[
{
"id": "f0136e76-8e66-6a5a-3790-b577001d6420",
"itemId": "analyze-and-visualize-your-data-with-azure-cosmos-db-notebooks",
"title": "Built-in Jupyter notebooks in Azure Cosmos DB are now available",
"categories": [
"Developer",
"Database",
"Data Science"
]
},
{
"id": "f0136e76-8e66-6a5a-3790-b577001d6420",
"itemId": "analyze-and-visualize-your-data-with-azure-cosmos-db-notebooks",
"title": "Built-in Jupyter notebooks in Azure Cosmos DB are now available",
"categories": [
"Developer",
"Database",
"Data Science"
]
},
{
"id": "d98c1dd4-008f-04b2-e980-0998ecf8427e",
"itemId": "improving-azure-virtual-machines-resiliency-with-project-tardigrade",
"title": "Improving Azure Virtual Machines resiliency with Project Tardigrade",
"categories": [
"Virtual Machines",
"Supportability",
"Monitoring"
]
}
]
GroupBY is not supported by Azure CosmosDB so far. You can alternatively use Stored Procedure to implement your requirement.
Base on the sample documents you have given above, here is a sample stored Procedure
function groupBy() {
var collection = getContext().getCollection();
var collectionLink = collection.getSelfLink();
var isValid = collection.queryDocuments(
collectionLink,
'SELECT * FROM stackoverflow s',
{EnableCrossPartitionQuery: true},
function (err, feed, options) {
if (err) throw err;
if (!feed || !feed.length) {
var response = getContext().getResponse();
console.log(JSON.stringify(response));
response.setBody('no docs found');
}
else {
var response = getContext().getResponse();
var items = {};
for(var i=0;i<feed.length;i++){
var categories = feed[i].categories;
for(var j=0;j<categories.length;j++){
items[categories[j]] = categories[j]
}
var distinctArray = [];
for(var distinctObj in items){
distinctArray.push(items[distinctObj]);
}
}
response.setBody(distinctArray);
}
});
if (!isValid) throw new Error('Kindly check your query, which not accepted by the server.');
}

Parameterize Query while using OFFSET clause

I've used following stored procedure and try to parameterize the query by passing the number parameter. However it gives me error while executing the stored procedure. Any insights is really helpful
function uspGetUsersByPage(number) {
//Set Environment
let context = getContext();
let coll = context.getCollection();
let link = coll.getSelfLink();
let response = context.getResponse();
let query = {
query: 'SELECT * FROM a WHERE a.DocumentType = "UserRole" and a.AuditFields.IsLatest = true and a.AuditFields.IsDeleted = false OFFSET #number LIMIT 20'
, parameters: [{ name: '#number', value: number }]
};
//Execute the query against the collection
let runquery = coll.queryDocuments(link, query, {}, callbackfn);
//Call function to throw an error(if any) or display the output
function callbackfn(err, queryoutput) {
if (err) {
throw err;
}
if (!queryoutput || !queryoutput.length) {
response.setBody(null);
}
else {
response.setBody(queryoutput);
}
};
//Display standard output if query doesnt get any results
if (!runquery) { throw Error('Unable to retrieve requested information'); }
};
Please see my simple test following your description.
Data:
Stored procedure:
function sample(prefix) {
var collection = getContext().getCollection();
var query = {query: "SELECT c.id,c.number FROM c offset #num limit 1", parameters:
[{name: "#num", value: prefix}]};
console.log(query);
var isAccepted = collection.queryDocuments(
collection.getSelfLink(),
query,
function (err, feed, options) {
if (err) throw err;
// Check the feed and if empty, set the body to 'no docs found', 
// else take 1st element from feed
if (!feed || !feed.length) {
var response = getContext().getResponse();
response.setBody('no docs found');
}
else {
var response = getContext().getResponse();
response.setBody(feed);
}
});
if (!isAccepted) throw new Error('The query was not accepted by the server.');
}
Input:
Output:

how to add an empty or data map in dynamodb?

I have something like this:
{
Records:{}
}
and I just want to add data inside records like this:
{
Id: 123,
Records:{
10001: { event : "item1" }
}
}
My 1st attempt:
var params = {
TableName : "Records",
Key : { Id : 123 },
UpdateExpression: 'set Record.#k1.event = :v1',
ExpressionAttributeNames: { '#k1' : 10001},
ExpressionAttributeValues: { ':v1' : 'item1' }
};
Because Record.10001 doesn't exist, it give me error:
The document path provided in the update expression is invalid for update
"ADD" only support NUMBER and SET type so its not suitable in my case.
My 2nd attempt (trying to create a empty map 1st):
var params = {
TableName : "Records",
Key : { Id : 123 },
UpdateExpression: 'set Record.#k1 = {}',
ExpressionAttributeNames: { '#k1' : 10001},
};
It fails as my syntax is probably incorrect and I cannot find how to create a empty map to an attribute.
So my question is how do I add structured object as a map or empty map to an existing item like the example above?
I can easily modifying the data from the amazon DynamoDB data management page so at least I know there exist a way to do it. I just need to know how.
Thanks
I somehow successfully done it this way:
var params = {
TableName: "Records",
Key: { Id: 123 },
UpdateExpression: 'set Records.#k1 = :v1',
ExpressionAttributeNames: {'#k1': '10001',},
ExpressionAttributeValues: {':v1': { event: 'Yay~' } }
};
optionally you could use
UpdateExpression: 'set Records.#k1 = if_not_exists( Records.#k1, :v1)',
to avoid overwriting existing records.
I found a workaround for the error we get.
try:
table.update_item(
Key = {'Id' : id},
UpdateExpression = 'SET Records.#key1.#key2 = :value1',
ExpressionAttributeNames = {'#key1': '10001', '#key2' : 'event'},
ExpressionAttributeValues = {':value1': 'singing'}
)
except:
table.update_item(
Key = {'Id' : id},
UpdateExpression = 'SET Records = :value1',
ExpressionAttributeValues = {':value1': {'10001': {'event' : 'dance'}}}
)
When we try to update for the first time we get "The document path provided in the update expression is invalid for update" error. An exception will be thrown since there will be no nested JSON.
Thereafter, any updates say for example Record.10002 will not throw exception. Update function in the try block will be executed.
Please comment on this if this is the correct way of handling this kind of scenario.
one of the possible resolution to get rid of such problem: you have to have an empty object created and exist in DB in advance.
for instance,
during generation of your object (original creation in DB):
field: { }
to update this field use:
TableName: ....,
Key: {
.....
},
UpdateExpression: 'set field.#mapKey = :v',
ExpressionAttributeNames: {
'#mapKey': myName
},
ExpressionAttributeValues: {
':v': myValue
}
This can be done in c# as follows.
If you have the following json:
"invoice":{
"M":{
":number":{
"S":"XPN1052"
}":date":{
"S":"02-17-2022"
}
}
}
You can use the following code to insert an invoice record:
ExpressionAttributeValues = new Dictionary<string, AttributeValue> {
{":invoice", new AttributeValue { M = new Dictionary<string, AttributeValue>{
{":date", new AttributeValue { S = invoice.TxnDate.ToString("MM-dd-yyyy") }},
{":number", new AttributeValue{ S = invoice.DocNumber } } }
} }},
UpdateExpression = "SET invoice = :invoice",
var params = {
TableName : "Records",
Key : { Id : 123 },
UpdateExpression: 'set #Records = :obj',
ExpressionAttributeNames: { '#Records' : 'Records'},
ExpressionAttributeValues: { ':obj' : {'10001': { 'event' : 'item1' }}}
};
In order to insert/update Dynamodb items dynamically based on a given dictionary with various keys (eventually columns in Dynamodb) we can construct query string as shown below:
# given dictionary with needs to be dumped into dynamodb
dict_to_insert = {'column1':'value1',
'column2.subdocument1':{'subkey1':'subvalue1',
'subkey2':'subvalue2'}...}...
# placeholder variables
# Using upsert for insert/update mllog records on dynamodb
query = "SET"
values_dict = {}
names_dict = {}
i = 0
# iterating over given dictionary and construct the query
for key, value in dict_to_insert.items():
# if item key column provide in the given dictionary
# then it should be skipped
if key == 'ITEM_KEY_NAME':
i += 1
continue
# None values causing error on update_item
# So, better to ignore them as well
if value is not None:
# if_not_exists method prevents overwriting existing
# values on columns
query += " " + "#k_" + str(i) + " = if_not_exists(#k_" + str(i) + \
", :v_" + str(i)+")"
if key != list(dict_to_insert.keys())[-1]: # if not the last attribute add ,
query += ","
# Defining dynamodb columns and their corresponding values
values_dict["#k_" + str(i)] = key
names_dict[":v_" + str(i)] = value
i += 1
Once we construct the query string as shown above, then we can confidently run our update_item method on Dynamodb:
# table obj here is boto3 dynamodb resource instance
table.update_item(
TableName='DYNAMODB_TABLENAME',
Key={'ITEM_KEY_NAME': 'ANY_VALUE_AS_KEY'},
UpdateExpression=query.strip(),
ExpressionAttributeNames=names_dict,
ExpressionAttributeValues=values_dict)

Resources