Getting grouped (distinct?!?) array of values from Cosmos SQL - azure-cosmosdb

Given a collection of Cosmos documents similar to the following, I'd like to generate a grouped (distinct?!?) list of "categories" using Cosmos SQL. Any help in this regard would be greatly appreciated.
[
{
"id": "f0136e76-8e66-6a5a-3790-b577001d6420",
"itemId": "analyze-and-visualize-your-data-with-azure-cosmos-db-notebooks",
"title": "Built-in Jupyter notebooks in Azure Cosmos DB are now available",
"categories": [
"Developer",
"Database",
"Data Science"
]
},
{
"id": "f0136e76-8e66-6a5a-3790-b577001d6420",
"itemId": "analyze-and-visualize-your-data-with-azure-cosmos-db-notebooks",
"title": "Built-in Jupyter notebooks in Azure Cosmos DB are now available",
"categories": [
"Developer",
"Database",
"Data Science"
]
},
{
"id": "d98c1dd4-008f-04b2-e980-0998ecf8427e",
"itemId": "improving-azure-virtual-machines-resiliency-with-project-tardigrade",
"title": "Improving Azure Virtual Machines resiliency with Project Tardigrade",
"categories": [
"Virtual Machines",
"Supportability",
"Monitoring"
]
}
]

GroupBY is not supported by Azure CosmosDB so far. You can alternatively use Stored Procedure to implement your requirement.
Base on the sample documents you have given above, here is a sample stored Procedure
function groupBy() {
var collection = getContext().getCollection();
var collectionLink = collection.getSelfLink();
var isValid = collection.queryDocuments(
collectionLink,
'SELECT * FROM stackoverflow s',
{EnableCrossPartitionQuery: true},
function (err, feed, options) {
if (err) throw err;
if (!feed || !feed.length) {
var response = getContext().getResponse();
console.log(JSON.stringify(response));
response.setBody('no docs found');
}
else {
var response = getContext().getResponse();
var items = {};
for(var i=0;i<feed.length;i++){
var categories = feed[i].categories;
for(var j=0;j<categories.length;j++){
items[categories[j]] = categories[j]
}
var distinctArray = [];
for(var distinctObj in items){
distinctArray.push(items[distinctObj]);
}
}
response.setBody(distinctArray);
}
});
if (!isValid) throw new Error('Kindly check your query, which not accepted by the server.');
}

Related

DynamoDb Nested Map Update

After working a bit with DynamoDb I’ve run into an issue that from what I’ve read so far is not really ideal for DynamoDb. So before I make the switch to RDS, I’d like to see if there’s anyway I can achieve what I need with DynamoDb. I’ve also thought about breaking this out into multiple tables for DynamoDb
Below of my Data schema. There is a list nested inside the item. I need to be able to append strings to the list.
{
“server-id”: “123345678”,
“server-name”: “my-server”
“topics”: [
{
“name”: “my-topic”,
“subscribers”: [] //This is what I need to append
}
]
}
Yes, this is possible.
var AWS = require('aws-sdk');
AWS.config.update({region: 'us-east-1'});
var ddb = new AWS.DynamoDB({apiVersion: '2012-08-10'});
var params = {
ExpressionAttributeNames: {
"#T": "topics",
"#S": "subscribers"
},
ExpressionAttributeValues: {
":vals": {
L: [
{ N: "123" },
{ N: "456" }
]
}
},
Key: {
'server-id': { S: '123345678' }
},
ReturnValues: "ALL_NEW",
TableName: 'dummy-table',
UpdateExpression: "SET #T[0].#S = list_append(#T[0].#S, :vals)"
};
ddb.updateItem(params, function(err, data) {
if (err) {
console.log("Error", err);
} else {
console.log("Success", data);
}
});

Substitute for having clause in CosmosDB

I have the following result from a query, where count field is derived from an aggregate function
[
{
"count": 1,
"facilityName": "Hyundai Service Center"
},
{
"count": 2,
"facilityName": "Honda Service Center"
},
{
"count": 1,
"facilityName": "Kat Service Center"
}
]
I want to display only those facilityName where count >= 2.
How can we achieve this?
I tried to implement your requirement with Stored procedure,please refer to my SP code:
function sample(idArray) {
var collection = getContext().getCollection();
var length = idArray.length;
var sqlQuery = {
"query": 'SELECT count(c.id) as cnt, f.facilityName from c join f in c.facilities '+
'where array_contains( #idArray,c.id,true) ' +
'AND c.entityType = "ServiceInformationFacility" group by f.facilityName',
"parameters": [
{"name": "#idArray", "value": idArray}
]
}
// Query documents and take 1st item.
var isAccepted = collection.queryDocuments(
collection.getSelfLink(),
sqlQuery,
function (err, feed, options) {
if (err) throw err;
if (!feed || !feed.length) {
var response = getContext().getResponse();
response.setBody('no docs found');
}
else {
var response = getContext().getResponse();
var returenArray = [];
for(var i=0;i<feed.length;i++){
if(feed[i].cnt==length)
returenArray.push(feed[i])
}
response.setBody(returenArray);
}
});
if (!isAccepted) throw new Error('The query was not accepted by the server.');
}
Input param:
["6ECF4568-CB0E-4E11-A5CD-1206638F9C39","2ECF4568-CB0E-4E11-A5CD-1206638F9C39"]
Get output:
UPDATES:
So,if your collection is partitioned,maybe stored procedure is not suitable for you because partition key is necessary for execution of SP.Please refer to my detailed explanations in this thread:Delete Documents from Cosmos using Query without Partition Key Specification
Actually, there is no complex logic in my above sp code.It just loop the result of the sql and try to find which object.count equals the idArray.length which means the object.facilityName exists cross all the documents.
So,you don't have to use SP, you can use any tiny piece of code to handle the logic I describe above.

CosmosDb Sql query that matches common values in array between documents

I am working with Cosmos DB and I want to write a SQL query that will match common value in array of documents based on id.
To elaborate, imagine you have the following three documents:
{
"id": "2ECF4568-CB0E-4E11-A5CD-1206638F9C39",
"entityType": "ServiceInformationFacility",
"facilities": [
{
"id": "6F706BA3-27AD-45B8-9831-A531E37C4C17",
"facilityName": "Kat Service Center",
"phoneNumber": "9879561234"
},
{
"id": "7F706BA3-27AD-45B8-9831-A531E37C4C17",
"facilityName": "Honda Service Center",
"phoneNumber": "9879561234"
}]
},
{
"id": "3ECF4568-CB0E-4E11-A5CD-1206638F9C39",
"entityType": "ServiceInformationFacility",
"facilities": [
{
"id": "8F706BA3-27AD-45B8-9831-A531E37C4C17",
"facilityName": "Hyundai Service Center",
"phoneNumber": "9879561234"
},
{
"id": "7F706BA3-27AD-45B8-9831-A531E37C4C17",
"facilityName": "Honda Service Center",
"phoneNumber": "9879561234"
}]
},
{
"id": "6ECF4568-CB0E-4E11-A5CD-1206638F9C39",
"entityType": "ServiceInformationFacility",
"facilities": [
{
"id": "8F706BA3-27AD-45B8-9831-A531E37C4C17",
"facilityName": "Hyundai Service Center",
"phoneNumber": "9879561234"
},
{
"id": "7F706BA3-27AD-45B8-9831-A531E37C4C17",
"facilityName": "Honda Service Center",
"phoneNumber": "9879561234"
} ]
}
I want to write a query that return all the common facility based on id.That means when passing the list of Ids, the facility exists in the given Ids should be display(not either or).
so in the above collection it should only return "facilityName": "Honda Service Center" by passing parameter id("2ECF4568-CB0E-4E11-A5CD-1206638F9C39","3ECF4568-CB0E-4E11-A5CD-1206638F9C39","6ECF4568-CB0E-4E11-A5CD-1206638F9C39").
So far I have tried:
SELECT q.facilityName FROM c
join q in c.facilities
where c.id in('6ECF4568-CB0E-4E11-A5CD-1206638F9C39','2ECF4568-CB0E-4E11-A5CD-1206638F9C39')AND c.entityType = 'ServiceInformationFacility'
It gives me all the facility name but I need only facility which are common in the above documents that is "facilityName": "Honda Service Center".
Thanks in advance
It gives me all the facility name but I need only facility which are
common in the above documents that is "facilityName": "Honda Service
Center".
I may get your point now.However,i'm afraid that's impossible in cosmos sql. I try to count number of appearance of facilitiesName cross the documents and get below solution which is closest with your need.
sql:
SELECT count(c.id) as cnt, f.facilityName from c
join f in c.facilities
where array_contains(['6ECF4568-CB0E-4E11-A5CD-1206638F9C39','2ECF4568-CB0E-4E11-A5CD-1206638F9C39'],c.id,true)
AND c.entityType = 'ServiceInformationFacility'
group by f.facilityName
output:
Then i tried to extend it with some subquery but no luck. So i'd suggest using stored procedure to finish the next job.The main purpose is looping above result and judge if the cnt equals the [ids array].length.
Update Answer for Stored procedure code:
input param for #idArray:["6ECF4568-CB0E-4E11-A5CD-1206638F9C39","2ECF4568-CB0E-4E11-A5CD-1206638F9C39"]
Sp code:
function sample(idArray) {
var collection = getContext().getCollection();
var length = idArray.length;
var sqlQuery = {
"query": 'SELECT count(c.id) as cnt, f.facilityName from c join f in c.facilities '+
'where array_contains( #idArray,c.id,true) ' +
'AND c.entityType = "ServiceInformationFacility" group by f.facilityName',
"parameters": [
{"name": "#idArray", "value": idArray}
]
}
// Query documents and take 1st item.
var isAccepted = collection.queryDocuments(
collection.getSelfLink(),
sqlQuery,
function (err, feed, options) {
if (err) throw err;
if (!feed || !feed.length) {
var response = getContext().getResponse();
response.setBody('no docs found');
}
else {
var response = getContext().getResponse();
var returenArray = [];
for(var i=0;i<feed.length;i++){
if(feed[i].cnt==length)
returenArray.push(feed[i])
}
response.setBody(returenArray);
}
});
if (!isAccepted) throw new Error('The query was not accepted by the server.');
}
Output:

Task timed out after 3.00 seconds - Lambda application with nodeJS

I am trying to put a hard-coded data item to DynamoDB. I am using AWS SDK object to perform this update. And all the debug "Console.log" in the below code is getting printed but eventually it prints Task timed out after 3.00 seconds
With no update to the DynamoDB
function updatedb(intent, session, callback) {
let country;
const repromptText = null;
const sessionAttributes = {};
let shouldEndSession = false;
console.log("In the function");
const AWS = require("aws-sdk");
const docClient = new AWS.DynamoDB.DocumentClient({ region: 'eu-west-1' });
var params = {
TableName: "Location",
Item: {
"LocationID": { "S": "11" },
"Country": { "S": "10" },
"Description": { "S": "10" },
"Name": { "S": "10" }
}
};
console.log("Param loaded & executing the DocClient Put");
docClient.put(params, function (err, data) {
if (err) {
speechOutput = 'Update failed';
console.error("Unable to create table. Error JSON:", JSON.stringify(err, null, 2));
callback(sessionAttributes,
buildSpeechletResponse(intent.name, speechOutput, repromptText, shouldEndSession));
} else {
console.log("Created table. Table description JSON:", JSON.stringify(data, null, 2));
speechOutput = 'Update successful';
callback(sessionAttributes,
buildSpeechletResponse(intent.name, speechOutput, repromptText, shouldEndSession));
}
});
}
The following items are already checked
1) There is a table named "Location" in DynamoDB
2) Both DynamoDB and this lambda function are in ue-west-1 (Ireland)
3) The role assigned for this Lambda function can do all operation on this table. See the policy details below
{
"Version": "2012-10-17",
"Statement": [
{
"Sid": "Stmt1510603004000",
"Effect": "Allow",
"Action": [
"dynamodb:*"
],
"Resource": [
"arn:aws:dynamodb:eu-west-1:752546663632:table/Location"
]
}
]
}
How does my Lambda function locate the table "location" just with the region?- the code does not appear to have end-point, etc.? - just developed based on a tutorial.
Is that what I am missing?
Please can you help?
I had a similar issue, try putting require statements in the beginning of your function.
const AWS = require("aws-sdk");
const docClient = new AWS.DynamoDB.DocumentClient({ region: 'eu-west-1' });
I believe that AWS locates the table based on your identity, in combination with the region and the table name.
I was able to successfully post to a table using this code:
const AWS = require('aws-sdk');
const dynamoDB = new AWS.DynamoDB({region: 'us-west-2'});
var params = {
TableName: "my-table",
Item: {
"LocationID": { S: "11" },
"Country": { S: "10" },
"Description": { S: "10" },
"Name": { S: "10" }
}
};
dynamoDB.putItem(params, (err, data) => {
if (err){
console.error(err.stack);
} else {
console.log(data);
}
});
If you can in fact post to the table from the CLI, then there is still at least one remaining issue: it appears that you are using the DocumentClient class incorrectly. It looks like you're mixing up the syntax for DynamoDB.putItem with the syntax for DynamoDB.DocumentClient.put.
If you notice, my code uses the DynamoDB class directly-- based on what you're doing, I see no reason why you couldn't do the same. Otherwise, you should change your Item object:
var params = {
TableName: "my-table",
Item: {
"LocationID": "11",
"Country": "10",
"Description": "10",
"Name": "10"
}
};
My guess is your code is currently erroring out because you are trying to insert Maps where you want to insert Strings. If you have Cloudwatch configured you could check the logs.
Finally, I don't see you using callback in your code. If your intention is to respond to a client calling the lambda you should do that. Depending on your NodeJS version, the lambda can simply time out without returning a useful response.

How to exceed the limit of scan data for 1mb in dynamodb

I am using dynamodb with nodejs, I am having 3000 records, and I am writing 60+ segments in the code, each segment scanning the data of 1mb and displaying the results to 60+ segments of 1 mb limit. So please provide the solution how to get the 3000 records scan in single step that means in one segment. Please provide the solution quickly because i am strucked in the middle of my project. Please help me. Below is my code.
var AWS = require("aws-sdk");
var async = require("async");
AWS.config.update({
region: "---",
endpoint: "-----------",
accessKeyId: "-----------------",
secretAccessKey:"----------"
});
var db = new AWS.DynamoDB.DocumentClient()
var table = "rets_property_all";
var pstart =new Date () .getTime ();
async.parallel({
0 : function(callback){
db.scan ({TableName: table,
ProjectionExpression: "#cityname,ListingKey ",
FilterExpression: "#cityname = :v_id",
ExpressionAttributeNames: {
"#cityname": "CityName",
},
ExpressionAttributeValues: {":v_id" : 'BALTIMORE'},
TotalSegments: 63,
Segment: 0//by the worker who has been called
},function (err , res) {
callback (null , res.Items);
});
},
1 : function(callback){
db.scan ({TableName: table,
ProjectionExpression: "#cityname,ListingKey ",
FilterExpression: "#cityname = :v_id",
ExpressionAttributeNames: {
"#cityname": "CityName",
},
ExpressionAttributeValues: {":v_id" : 'BALTIMORE'},
TotalSegments: 63,
Segment: 1//by the worker who has been called
}, function (err , res) {
callback (null , res.Items);
});
},
2 : function(callback){
db.scan ({TableName: table,
ProjectionExpression: "#cityname,ListingKey ",
FilterExpression: "#cityname = :v_id",
ExpressionAttributeNames: {
"#cityname": "CityName",
},
ExpressionAttributeValues: {":v_id" : 'BALTIMORE'},
TotalSegments: 63,
Segment: 2//by the worker who has been called
}, function (err , res) {
callback (null , res.Items);
});
},
--------
---------
------
62 : function(callback){
db.scan ({TableName: table,
ProjectionExpression: "#cityname,ListingKey ",
FilterExpression: "#cityname = :v_id",
ExpressionAttributeNames: {
"#cityname": "CityName",
},
ExpressionAttributeValues: {":v_id" : 'BALTIMORE'},
TotalSegments: 63,
Segment: 62//by the worker who has been called
}, function (err , res) {
callback (null , res.Items);
});
},
},function(err,results){
if (err) {throw err; }
var pend = new Date () .getTime ();
console.log (results);
})
Actually, there is no way to override the 1 MB limit of scan. This is the DynamoDB design restriction and can't be overridden by any API. You may need to understand the limitation of the architecture or AWS service design.
You can use LastEvaluatedKey on the subsequent scans to start from where the previous scan ended.
The result set from a Scan is limited to 1 MB per call. You can use
the LastEvaluatedKey from the scan response to retrieve more results.
The use case is unclear why you wanted to get all 3000 records in one scan. Even, if you have a specific use case, simply it can't be achieved on DynamoDB scan.
Even, in relation database, you get the cursor and go through the iterations to get all the records sequentially. Similarly, in DynamoDB you have to use Scan recursively until LastEvaluatedKey is null.
Sample code for single scan recursively until LastEvaluatedKey is null:-
var docClient = new AWS.DynamoDB.DocumentClient();
var params = {
TableName: table,
ProjectionExpression: "#cityname,ListingKey ",
FilterExpression: "#cityname = :v_id",
ExpressionAttributeNames: {
"#cityname": "CityName",
},
ExpressionAttributeValues: { ":v_id": 'BALTIMORE' }
};
docClient.scan(params, onScan);
function onScan(err, data) {
if (err) {
console.error("Unable to scan the table. Error JSON:", JSON.stringify(err, null, 2));
} else {
// print all the movies
console.log("Scan succeeded.");
data.Items.forEach(function (movie) {
console.log("Item :", JSON.stringify(movie));
});
// continue scanning if we have more movies
if (typeof data.LastEvaluatedKey != "undefined") {
console.log("Scanning for more...");
params.ExclusiveStartKey = data.LastEvaluatedKey;
docClient.scan(params, onScan);
}
}
}

Resources