Arangodb AQL recursive graph traversal - recursion

I have a graph with three collections which items can be connected by edges.
ItemA is a parent of itemB which in turn is a parent of itemC.
Elements only can be connected by edges in direction
"_from : child, _to : parent"
Currently I can get only "linear" result with this AQL query:
LET contains = (FOR v IN 1..? INBOUND 'collectionA/itemA' GRAPH 'myGraph' RETURN v)
RETURN {
"root": {
"id": "ItemA",
"contains": contains
}
}
And result looks like this:
"root": {
"id": "itemA",
"contains": [
{
"id": "itemB"
},
{
"id": "itemC"
}
]
}
But I need to get a "hierarchical" result of graph traversal like that:
"root": {
"id": "itemA",
"contains": [
{
"id": "itemB",
"contains": [
{
"id": "itemC"
}
}
]
}
So, can I get this "hierarchical" result running an aql query?
One more thing: traversal should run until leaf nodes will be encountered. So depth of the traversal is unknown in advance.

I have found solution. We decided to use UDF (user defined functions).
Here is a few steps to construct the proper hierarchical structure:
Register the function in arango db.
Run your aql query, that constructs a flat structure (vertex and corresponding path for this vertex). And pass result as input parameter of your UDF function.
Here my function just append each element to its parent
In my case:
1) Register the function in arango db.
db.createFunction(
'GO::LOCATED_IN::APPENT_CHILD_STRUCTURE',
String(function (root, flatStructure) {
if (root && root.id) {
var elsById = {};
elsById[root.id] = root;
flatStructure.forEach(function (element) {
elsById[element.id] = element;
var parentElId = element.path[element.path.length - 2];
var parentEl = elsById[parentElId];
if (!parentEl.contains)
parentEl.contains = new Array();
parentEl.contains.push(element);
delete element.path;
});
}
return root;
})
);
2) Run AQL with udf:
LET flatStructure = (FOR v,e,p IN 1..? INBOUND 'collectionA/itemA' GRAPH 'myGraph'
LET childPath = (FOR pv IN p.vertices RETURN pv.id_source)
RETURN MERGE(v, childPath))
LET root = {"id": "ItemA"}
RETURN GO::LOCATED_IN::APPENT_CHILD_STRUCTURE(root, flatStructure)
Note: Please don't forget the naming convention when implement your functions.

I also needed to know the answer to this question so here is a solution that works.
I'm sure the code will need to be customised for you and could do with some improvements, please comment accordingly if appropriate for this sample answer.
The solution is to use a Foxx Microservice that supports recursion and builds the tree. The issue I have is around looping paths, but I implemented a maximum depth limit that stops this, hard coded to 10 in the example below.
To create a Foxx Microservice:
Create a new folder (e.g. recursive-tree)
Create the directory scripts
Place the files manifest.json and index.js into the root directory
Place the file setup.js in the scripts directory
Then create a new zip file with these three files in it (e.g. Foxx.zip)
Navigate to the ArangoDB Admin console
Click on Services | Add Service
Enter an appropriate Mount Point, e.g. /my/tree
Click on Zip tab
Drag in the Foxx.zip file you created, it should create without issues
If you get an error, ensure the collections myItems and myConnections don't exist, and the graph called myGraph does not exist, as it will try to create them with sample data.
Then navigate to the ArangoDB admin console, Services | /my/tree
Click on API
Expand /tree/{rootId}
Provide the rootId parameter of ItemA and click 'Try It Out'
You should see the result, from the provided root id.
If the rootId doesn't exist, it returns nothing
If the rootId has no children, it returns an empty array for 'contains'
If the rootId has looping 'contains' values, it returns nesting up to depth limit, I wish there was a cleaner way to stop this.
Here are the three files:
setup.js (to be located in the scripts folder):
'use strict';
const db = require('#arangodb').db;
const graph_module = require("org/arangodb/general-graph");
const itemCollectionName = 'myItems';
const edgeCollectionName = 'myConnections';
const graphName = 'myGraph';
if (!db._collection(itemCollectionName)) {
const itemCollection = db._createDocumentCollection(itemCollectionName);
itemCollection.save({_key: "ItemA" });
itemCollection.save({_key: "ItemB" });
itemCollection.save({_key: "ItemC" });
itemCollection.save({_key: "ItemD" });
itemCollection.save({_key: "ItemE" });
if (!db._collection(edgeCollectionName)) {
const edgeCollection = db._createEdgeCollection(edgeCollectionName);
edgeCollection.save({_from: itemCollectionName + '/ItemA', _to: itemCollectionName + '/ItemB'});
edgeCollection.save({_from: itemCollectionName + '/ItemB', _to: itemCollectionName + '/ItemC'});
edgeCollection.save({_from: itemCollectionName + '/ItemB', _to: itemCollectionName + '/ItemD'});
edgeCollection.save({_from: itemCollectionName + '/ItemD', _to: itemCollectionName + '/ItemE'});
}
const graphDefinition = [
{
"collection": edgeCollectionName,
"from":[itemCollectionName],
"to":[itemCollectionName]
}
];
const graph = graph_module._create(graphName, graphDefinition);
}
mainfest.json (to be located in the root folder):
{
"engines": {
"arangodb": "^3.0.0"
},
"main": "index.js",
"scripts": {
"setup": "scripts/setup.js"
}
}
index.js (to be located in the root folder):
'use strict';
const createRouter = require('#arangodb/foxx/router');
const router = createRouter();
const joi = require('joi');
const db = require('#arangodb').db;
const aql = require('#arangodb').aql;
const recursionQuery = function(itemId, tree, depth) {
const result = db._query(aql`
FOR d IN myItems
FILTER d._id == ${itemId}
LET contains = (
FOR c IN 1..1 OUTBOUND ${itemId} GRAPH 'myGraph' RETURN { "_id": c._id }
)
RETURN MERGE({"_id": d._id}, {"contains": contains})
`);
tree = result._documents[0];
if (depth < 10) {
if ((result._documents[0]) && (result._documents[0].contains) && (result._documents[0].contains.length > 0)) {
for (var i = 0; i < result._documents[0].contains.length; i++) {
tree.contains[i] = recursionQuery(result._documents[0].contains[i]._id, tree.contains[i], depth + 1);
}
}
}
return tree;
}
router.get('/tree/:rootId', function(req, res) {
let myResult = recursionQuery('myItems/' + req.pathParams.rootId, {}, 0);
res.send(myResult);
})
.response(joi.object().required(), 'Tree of child nodes.')
.summary('Tree of child nodes')
.description('Tree of child nodes underneath the provided node.');
module.context.use(router);
Now you can invoke the Foxx Microservice API end point, providing the rootId it will return the full tree. It's very quick.
The example output of this for ItemA is:
{
"_id": "myItems/ItemA",
"contains": [
{
"_id": "myItems/ItemB",
"contains": [
{
"_id": "myItems/ItemC",
"contains": []
},
{
"_id": "myItems/ItemD",
"contains": [
{
"_id": "myItems/ItemE",
"contains": []
}
]
}
]
}
]
}
You can see that Item B contains two children, ItemC and ItemD, and then ItemD also contains ItemE.
I can't wait until ArangoDB AQL improves the handling of variable depth paths in the FOR v, e, p IN 1..100 OUTBOUND 'abc/def' GRAPH 'someGraph' style queries. Custom visitors were not recommended for use in 3.x but haven't really be replaced with something as powerful for handling wild card queries on the depth of a vertex in a path, or handling prune or exclude style commands on path traversal.
Would love to have comments/feedback if this can be simplified.

Related

Problems structuring and querying a table properly - DynamoDB

I am new to dynamoDB. I am having difficulty developing a table structure. I have data that can best be thought of as a folder structure. There are folders which are nested in parent folders. Most of the time, I will be querying for all folders with a given parent folder, however, there are times when I will be querying individual folders.
If I use the parent_id (parent folder) as the partition key and the id of the individual folder as the sort key, I believe that this creates a table where all related files are stored together and I can query them efficiently. However, I have questions.
First, the query "works" in that it returns the data, but is it written so that it queries the data correctly and is not merely scrolling through the whole table?
router.get("/api/children_folders/:parent_id", (req, res, next) => {
let parent_id = req.params.parent_id;
let params = {
TableName: tableName,
KeyConditionExpression: "parent_id = :pid",
ExpressionAttributeValues: {
":pid": parent_id,
},
ScanIndexForward: false,
};
docClient.query(params, (err, data) => {
if (err) {
console.log(err);
return res.status(err.statusCode).send({
message: err.message,
status: err.statusCode,
});
} else {
return res.status(200).send(data);
}
});
});
Second, if I want to query for individual tags, do I need to pass in a combination of the parent folder ID and the actual ID, or is this OK?
router.get("/api/folder/:folder_id", (req, res, next) => {
let tag_id = req.params.folder_id;
let params = {
TableName: tableName,
KeyConditionExpression: "folder_id = :fid",
ExpressionAttributeValues: {
":fid": folder_id,
},
Limit: 1,
};
docClient.query(params, (err, data) => {
if (err) {
console.log(err);
return res.status(err.statusCode).send({
message: err.message,
status: err.statusCode,
});
} else {
if (!_.isEmpty(data.Items)) {
return res.status(200).send(data.Items[0]);
} else {
return res.status(404).send();
}
}
});
});
I just feel like I am missing some thing here and I want to make sure that I am grabbing the data correctly.
The PK, should be something that would divide the load equally (ideally). I don't the fully picture of your problem but assuming you can chose a good parent folder as a partition key, then you can insert every file/dir with a sort key representing its full path
For example:
PK SK
/home /username/pictures/cat.jpg
This way if you want to get a specific item you can use the get item request
var params = {
Key: {
"PK": { "S": "/home" },
"SK": { "S": "/username/pictures/cat.jpg" }
},
TableName: tableName
};
var result = await dynamodb.getItem(params).promise()
Now if you want to list all the files in "/home/username/pictures" you can use begins with query
const params = {
TableName: 'tablenName',
KeyConditionExpression: '#PK = :root_path and begins_with(#SK, :sub_path)',
ExpressionAttributeNames:{
"#user_id": "root_path",
"#user_relation": 'sub_path'
},
ExpressionAttributeValues: {
":root_path": "/home",
":sub_path": "/username/pictures"
}
}

How to add elements to a svelte writable store vector

I have this store:
export const flights = writable<APIResponse>([])
And I want to add elements at the end of that array. I tried his:
flights.set({ ...flights, input })
But that doesn't add, it overwrites the existing elements, leaving only the one in input. How can I do that?
I am in a .ts. I'm taking over someone else's job who left the company and I'm new to all of this, I still don't have a clear idea of this mix of languages/frameworks.
When I print flights appears empty.
console.warn(flights store: + JSON.stringify(flights))
{}
Some advances. It seems it was not empty. I wasn't printing it the correct way. I can see the elements added if I add them like this:
unconfirmed_flights.update((data) => {
data.push(input))
return data
})
and print the content like this:
unconfirmed_flights .update((data) => {
console.warn(JSON.stringify(data))
return data
})
That prints something like: [{json_object}, {json_object}].
The thing is that in fact I have two stores:
export const flights = writable<APIResponse>([])
export const unconfirmed_flights = writable<APIResponse>([])
The code receives several items that are added to unconfirmed_flights correctly. Then a dialog opens and if the user presses accept I need to copy the items in unconfirmed_flights to flights. I do that like this. First I create an index (id) with the empty array:
flights.update((data) => {
data[id] = []
return data
})
Then I add all the elements in unconfirmed_flights:
unconfirmed_flights.update((uplan) => {
flights.update((data) => {
data[id].push(uplan)
return data
})
return uplan
})
But the result, instead of
{"id": [{json_object}, {json_object}]}
is
{"id": [[{json_object}, {json_object}]]}
With that nested array. However, if I don't do the step of data[id] = [], I get a "Uncaught (in promise) TypeError: data.push is not a function", that I read is because the index does not exist. How can I avoid that nested array?
const flights = writable([])
If you want to add a value to a store from a .js file use .update() where the current value is available
flights.update(value => [...value, newValue])
Inside a .svelte file the store variable can be prefixed with $ to access the value and the new value could be added like this
$flights = [...$flights, newValue]
After three days of unsuccessful attemps to add the content exactly as I needed, I found the help of a JS expert. There it goes the solution.
Having
flights = {"1": { "name": "name1" } }
and
unconfirmed_flights = [ {"2": { "name": "name2" } }, {"3": { "name": "name3" } } ]
The goal was to add the unconfirmed_flights to flights:
flights = {"1": { "name": "name1" },
"2": { "name": "name2" },
"3": { "name": "name3" } }
And that was done like this:
flights.update((plan) => {
const uplan = get(unconfirmed_flights)
uplan.forEach((uplanItem) => {
plan = { ...plan, ...uplanItem }
})
return plan
})
being
export type APIResponse = { [key: string]: any }
export const flights = writable<APIResponse>([])
export const unconfirmed_flights = writable<APIResponse[]>([])

How to order by child value in Firebase

I have data like this:
scores: {
uid1: {
score: 15,
displayName: "Uciska"
},
uid2: {
score: 3,
displayName: "Bob"
},
uid3: {
etc...
}
}
I want to rank them by score and keep only the top 100.
I did that by following the doc. But it does not work. It always returns the same order even if the score changes.
const query = firebase.database().ref('scores')
.orderByChild('score')
.limitToLast(100)
query.on('child_added', snapshot => {
const score = snapshot.val().score
console.log(score)
})
I added that too in the rules to optimize but I'm not sure it's correct:
"scores": {
".indexOn": ["score"]
}
What is the right way to go?
Your code is correct and should show the desired result.
You may encounter difficulties to see the result due to the 'child_added' event since "the listener is passed a snapshot containing the new child's data", as detailed here in the doc.
You could use the once() method, as follows, which will show the result a bit more clearly since it will display the entire set of scores.
const query = firebase.database().ref('scores')
.orderByChild('score')
.limitToLast(100)
query.once('value', function (snapshot) {
snapshot.forEach(function (childSnapshot) {
var childKey = childSnapshot.key;
var childData = childSnapshot.val();
console.log(childData);
// ...
});
});
Also, your rule can be written as ".indexOn": "score" since there is only one parameter.

File System to JSON tree asynchronously in Javascript Chrome App

I have been going at this for hours, but my head is going round in circles and confusing itself with async with callbacks...
I am looking to take a DirectoryEntry in a chrome app and recursively create a JSON file tree, like below:
[{
name: ent.name,
path: ent.fullPath,
isDir: ent.isDirectory
},
{
name: ent.name,
path: ent.fullPath,
isDir: ent.isDirectory,
children: [
{
name: ent.name,
path: ent.fullPath,
isDir: ent.isDirectory
},
{
name: ent.name,
path: ent.fullPath,
isDir: ent.isDirectory
}
]
}]
In searching I have found many node.js implementations, and have it working in node, but in trying to port it over to FileSystem API and chrome.filesystem in particular, it all goes to pot.
I have also found https://stackoverflow.com/a/21909322 this post which list_dir function is very nearly what I want but it creates a flat structure of all files rather than the structure.
I have tried adapting to create the children but cannot get my head around the callbacks.
function walkSync(dirent, cb, listing) {
console.log('Entering walkSync()')
if (listing === undefined) listing = [];
var reader = dirent.createReader();
var read_some = reader.readEntries.bind(reader, function(ents) {
if (ents.length === 0)
return cb && cb(listing);
process_some(ents, 0);
function process_some(ents, i) {
for(; i < ents.length; i++) {
var obj = { name: ents[i].name, isDir: ents[i].isDirectory};
if (ents[i].isDirectory) {
obj.children = walkSync(ents[i], process_some.bind(null, ents, i + 1), listing);
}
listing.push(obj);
}
read_some();
}
}, function() {
console.error('error reading directory');
});
read_some();
}
Any pointers to help me get my head round recursion and callbacks to create this would be a great help and reduce the amount of further lost hours..
Thanks!

How to query two types of records in CouchDB

I’m having issues getting two dependant types of data from a PouchDB database.
I have a list of cars that I get like so:
localDB.query(function(doc) {
if (doc.type === ‘list’) {
emit(doc);
}
}, {include_docs : true}).then(function(response) {
console.log(“cars”, response);
// Save Cars List to app
for(var i = 0; i < response.rows.length; i++) {
addToCarsList(response.rows[i].id, response.rows[i].carNumber);
}
console.log(“Cars List: " + carsListToString());
return response;
}).then(function(listRecord) {
listRecord.rows.forEach(function(element, index){
console.log(index + ' -> ', element);
localDB.query(function(doc) {
console.log("filtering with carNb = " + element.carNb);
if (doc.type === 'defect' && doc.listId == getCurrentListId() && doc.carNb == element.carNb ) {
emit(doc);
}
}, {include_docs : false}).then(function(result){
console.log("defects", result);
}).catch(function(err){
console.log("an error has occurred", err);
});
});
}).catch(function(err) {
console.log('error', err);
});
Here's what happens. After getting the list of cars, then for each cars I would like to query the defects and store then in some arrays. Then when all that querying is done, I want to build the UI with the data saved.
But what's happening is that the forEach gets processed quickly and does not wait for the inner async'd localDb.query.
How can I query some documents based on an attribute from a parent query? I looked into promises in the PouchDB doc but I can't understand how to do it.
(please forget about curly quotes and possible lint errors, this code was anonymized by hand and ultra simplified)
The method you are looking for is Promise.all() (execute all promises and return when done).
However, your query is already pretty inefficient. It would be better to create a persistent index, otherwise it has to do a full database scan for every query() (!). You can read up on the PouchDB query guide for details.
I would recommend installing the pouchdb-upsert plugin and then doing:
// helper method
function createDesignDoc(name, mapFunction) {
var ddoc = {
_id: '_design/' + name,
views: {}
};
ddoc.views[name] = { map: mapFunction.toString() };
return ddoc;
}
localDB.putIfNotExists(createDesignDoc('my_index', function (doc) {
emit([doc.type, doc.listId, doc.carNb]);
})).then(function () {
// find all docs with type 'list'
return localDB.query('my_index', {
startkey: ['list'],
endkey: ['list', {}],
include_docs: true
});
}).then(function (response) {
console.log("cars", response);
// Save Cars List to app
for(var i = 0; i < response.rows.length; i++) {
addToCarsList(response.rows[i].id, response.rows[i].carNumber);
}
console.log("Cars List: " + carsListToString());
return response;
}).then(function (listRecord) {
return PouchDB.utils.Promise.all(listRecord.rows.map(function (row) {
// find all docs with the given type, listId, carNb
return localDB.query('my_index', {
key: ['defect', getCurrentListId(), row.doc.carNb],
include_docs: true
});
}));
}).then(function (finalResults) {
console.log(finalResults);
}).catch(function(err){
console.log("an error has occurred", err);
});
I'm using a few tricks here:
emit [doc.type, doc.listId, doc.carNb], which allows us to query by type or by type+listId+carNb.
when querying for just the type, we can do {startkey: ['list'], endkey: ['list', {}]}, which matches just those with the type "list" because {} is the "higher" than strings in CouchDB object collation order.
PouchDB.utils.Promise is a "hidden" API, but it's pretty safe to use if you ask me. It's unlikely we'll change it.
Edit Another option is to use the new pouchdb-find plugin, which offers a simplified query API designed to replace the existing map/reduce query() API.
Another approach would be to pull both the list docs and the defect docs down at the same time then merge them together using a reduce like method that will convert them into an array of objects:
{
_id: 1,
type: 'list',
...
defects: [{
type: 'defect'
listId: 1
...
}]
}
By pulling the list and the defects down in one call you save a several calls to the pouchdb query engine, but you do have to iterate through every result to build your collection of lists objects with and embedded array of defects.
// This is untested code so it may not work, but you should get the idea
var _ = require('underscore');
// order documents results by list then defect
var view = function (doc) {
if (doc.type === 'list') {
emit([doc._id, doc.carNumber, 1);
} else if (doc.type === 'defect') {
emit([doc.listId, doc.carNb, 2])
}
}
localDB.query(view, { include_docs: true })
.then(function(response) {
return _(response.rows)
.reduce(function(m, r) {
if (r.key[2] === 1) {
// initialize
r.doc.defects = [];
m.push(r.doc)
return m;
}
if (r.key[2] === 2) {
var list = _(m).last()
if (list._id === r.key[0] && list.carNumber === r.key[1]) {
list.defects.push(r.doc);
}
return m;
}
}, []);
})
.then(function(lists) {
// bind to UI
});
With couch, we found reducing calls to the couch engine to be more performant, but I don't know if this approach is better for PouchDB, but this should work as a solution, especially if you are wanting to embed several collections into one list document.

Resources