Current Neo4J documentation states that to create a relationship, both nodes are locked.
Now consider one node be a master node and all other nodes being created are to be related to it. For example, all new "Animal" nodes need to be related to a master "Zoo" node.
So, when a lot of new "Animal" nodes are being created with relationship to existing "Zoo" node, wouldn't all Cypher requests just queue up waiting on the lock ? Because every request needs to lock the "Zoo" node.
I'm observing slow downs when a lot of data is being created on a graph in this manner.
Is there a way to tell Neo4J to not lock the nodes for a Cypher CREATE request ? Can some sort of parallel writes be enabled ?
Update :
Test Results:
Create Zoo nodes: time taken (ms): 2222
Create Animal nodes for separate Zoos: time taken (ms): 2206
Create Animal nodes for same Zoo: time taken (ms): 9015
There is a difference of about 7 seconds for just 200 simultaneous queries.
Test Script (using NodeJS Driver, neo4j version 2.2.4, ubuntu linux):
NodeJS Driver
Neo4j version 2.2.4, community edition
Ubuntu Linux
var seraph = require('seraph');
var moment = require('moment');
var async = require('async');
var neo4j = seraph({
"url": "http://localhost:7474",
"user": "neo4j",
"pass": "neo4j"
});
var num = 200;
async.series([
function (cb) {
// clean
neo4j.query('MATCH (n:Zoo)-[r:HAS]->(a:Animal) DELETE n,r,a', cb);
},
function (cb) {
// clean
neo4j.query('MATCH (n:Zoo) DELETE n', cb);
},
function (cb) {
// we create Zoo nodes, each with a num property
var start = moment();
var count = 0;
var abort = false;
for (var i = 0; i < num; i++) {
neo4j.query('CREATE (n:Zoo {obj})', { obj: { num: i } }, function (err, nodes) {
if (err) {
cb(err);
abort = true;
} else {
count++;
if (count >= num && !abort) {
console.log('Create Zoo nodes: time taken (ms): ' + moment().diff(start));
cb();
}
}
});
}
},
function (cb) {
// we create (Zoo)-[HAS]->(Animal) nodes, each Animal node related with a SEPARATE Zoo node
var start = moment();
var count = 0;
var abort = false;
for (var i = 0; i < num; i++) {
neo4j.query('MATCH (n:Zoo) WHERE n.num = {num} CREATE (n)-[r:HAS]->(a:Animal) RETURN a LIMIT 1', { num: i }, function (err, nodes) {
if (err) {
cb(err);
abort = true;
} else {
count++;
if (count >= num && !abort) {
console.log('Create Animal nodes for separate Zoos: time taken (ms): ' + moment().diff(start));
cb();
}
}
});
}
},
function (cb) {
// we create (Zoo)-[HAS]->(Animal) nodes, each Animal node related with SAME Zoo node
var start = moment();
var count = 0;
var abort = false;
for (var i = 0; i < num; i++) {
neo4j.query('MATCH (n:Zoo) WHERE n.num = 0 CREATE (n)-[r:HAS]->(a:Animal) RETURN a LIMIT 1', function (err, nodes) {
if (err) {
cb(err);
abort = true;
} else {
count++;
if (count >= num && !abort) {
console.log('Create Animal nodes for same Zoo: time taken (ms): ' + moment().diff(start));
cb();
}
}
});
}
}
], function (err) {
if (err) {
console.error(err);
}
process.exit(0);
});
As it's mentionned on the question Is it possible to override Neo4j lock behavior for relationships? :
It's not possible to override the locking behaviour. Neo4j used to
support multiple isolation levels, so it might be that the word
"default" is from that time and that the page needs an update.
But you can still have better performances using cypher Foreach statement if you need to create N relationships to one node. Also, upgrading to Neo4j 2.3 should be good for you, as an upgrade should ever be (don't forget to set allow_store_upgrade=true)
Related
I'm using Node/Puppeteer in the code below, passing in a large list of URL's for traversal and scraping. It has been difficult to do it asynchronously, though I find that I am getting closer and closer to the answer. I am currently stuck on an issue related to the following error.
UnhandledPromiseRejectionWarning: Unhandled promise rejection (rejection id: 17): Error: Protocol error (Target.createTarget): Target closed.
This error occurs once upon every iteration of the while loop. Though I'm not sure what I may be doing incorrectly.
Could someone help me do the following:
1) Diagnose the source of the error.
2) Potentially find a more effective way to traverse a large list of URLs asynchronously.
async function subProc(list, batchSize) {
let subList = null;
let i = 0;
while (list.length > 0) {
let browser = await puppeteer.launch();
subList = list.splice(0, batchSize);
console.log("Master List Size :: " + list.length);
console.log("SubList Size :: " + subList.length);
for (let j = 0; j < subList.length; j++) {
promiseArray.push(new Promise((resolve, reject) => {
resolve(pageScrape(subList[j], browser));
}));
}
Promise.all(promiseArray)
.then(response => {
procArray.concat(response);
});
promiseArray = new Array();
try {
await browser.close();
} catch(ex){
console.log(ex);
}
};
}
async function pageScrape(url, browser) {
let page = await browser.newPage();
await page.goto(url, {
timeout: 0
});
await page.waitFor(1000);
return await page.evaluate(() => {
let appTitle = document.querySelector('').innerText;
let companyName = document.querySelector('').innerText;
let dateListed = document.evaluate("", document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.innerText;
let category = document.evaluate("']//a//strong", document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.innerText;
/* */
return {
appTitle,
companyName,
dateListed,
category
}
}).then(response => {
let urlData = {
id: subList[j],
appName: response.appTitle,
companyName: response.companyName,
dateListed: response.dateListed,
category: response.category
}
return urlData;
});
};
I figured out the solution to the problem I was having.
Every computer is limited in its processing ability, so instead of iterating through 1000 urls simultaneously you have to break it down into smaller pieces.
By using a PromiseAll, and iterating and scraping 10 urls at a time and storing these values in an array, I was able to throttle the processing required to iterate through all 1000 urls.
processBatch(subData, 10, procArray).then((processed)=>{
for(let i = 0; i < procArray.length; i++){
for(let j = 0; j < procArray[i].length; j++){
results.push(procArray[i][j]);
}
}
function processBatch(masterList, batchSize, procArray){
return Promise.all(masterList.splice(0, batchSize).map(async url =>
{
return singleScrape(url)
})).then((results) => {
if (masterList.length < batchSize) {
console.log('done');
procArray.push(results);
return procArray;
} else {
console.log('MasterList Size :: ' + masterList.length);
procArray.push(results);
return processBatch(masterList, batchSize, procArray);
}
})
}
I would like to count how many entreprise are in some category but I'm stuck with the asynchrone concept.
Here's what I already have:
Category.getall(function(err, cat){
if(err) return res.negotiate(err);
catIds = []
for( var iCat in cat){
catIds.push(cat[iCat].id)
// and here I would like do something like
Entreprise.count({category_id: cat[iCat].id}, function(err, nbr){
categoriesOUT.push({categorie: cat, entreprise_number: nbr })
// I know that i can not do it but it's just to help to understand the logic I would like to have.
if(cat.length==iCat){
return res.json({categories: categoriesOUT})
}
})
}
})
There are a couple of ways to handle this. One would be to bring in a promise library like Q. Another would be a single database call that can count up enterprise objects grouped by category_id... however, I think that would go beyond Waterline's normal queries, you would have to use .query or .native or something.
The easiest quick fix for you is to just keep a counter of how many results you have handled. You may get tired of this approach after using it a couple of times, but it would look something like this:
Category.getall(function(err, cat){
if(err) { return res.negotiate(err); }
var catIds = [], categoriesOut = [], processedCategories = 0;
for( var iCat in cat){
catIds.push(cat[iCat].id)
Entreprise.count({category_id: cat[iCat].id}, function(err, nbr) {
if (err) {
categoriesOUT.push({categorie: cat, entreprise_number: 0});
} else {
categoriesOUT.push({categorie: cat, entreprise_number: nbr });
}
processedCategories += 1;
if (processedCategories >= cat.length) {
return res.json({categories: categoriesOUT});
}
});
}
});
Here's how I finaly get it only with MySQL request as suggered by #arbuthnott
(The category field is call domaine here)
Domaine.getall(function(err, domaines){
if(err){return res.negotiate(err)}
var domNames = {}, domContain = {}, domOut = [];
Entreprise.query('SELECT domaine_id, COUNT(*) FROM entreprise GROUP BY domaine_id', function(err, entreprises){
if(err){return res.negotiate(err)}
entreprises = JSON.parse(JSON.stringify(entreprises));
for(var ent of entreprises){
domContain[ent['domaine_id']] = ent['COUNT(*)'];
}
for(var iDom in domaines){
var countAdded = false;
for(var dc in domContain){
if(dc==domaines[iDom].id) {
domaines[iDom].entreprises_count = domContain[dc];
countAdded = true;
}
}
if(!countAdded) domaines[iDom].entreprises_count = 0;
}
res.json({domaines:domaines})
})
})
Is there a way to do a nightly backup of an App Maker database? Just in case a user accidentally deletes any data?
Even just having an outputted spreadsheet would be acceptable.
You can create an clock based Installable Trigger that will execute once a day in the morning before office hours.
This piece of code will be on a Server side script and will look something like this:
function createInstallableTrigger() {
// Runs at 5am in the timezone of the script
ScriptApp.newTrigger("backUp")
.timeBased()
.atHour(5)
.everyDays(1) // Frequency is required if you are using atHour() or nearMinute()
.create();
}
function backUp() {
try {
var spreadSheet = SpreadsheetApp.openById("").getActiveSheet(),
dataToBackUp = [],
globalKeys = {
model: ["first_name", "last_name", "email"],
label: ["First Name", "Last Name", "Email"]
},
var records = app.models.requests.newQuery().run();
if(records.length >= 1) {
for (var i = 0; i < records.length; i++) {
var newLine = [];
for (var x = 0; x < globalKeys.model.length; x++) {
newLine.push(records[i][globalKeys.model[x]]);
}
dataToBackUp.push(newLine);
// at the end, push it all on the spreadsheet
if(i === records.length - 1) {
// check if there is any entry at all
if(dataToBackUp.length >= 1) {
// append column titles first
spreadSheet.appendRow(globalKeys.label);
//
spreadSheet.getRange(2, 1, dataToBackUp.length, globalKeys.model.length).setValues(dataToBackUp);
}
}
}
}
} catch(e) {
console.log(e);
}
}
So, meteor reruns helper code every time it is called in the template, right? My issue is that I have a heavy helper that returns a large object. I'm iterating over a list of these objects and then over some nested objects which is resulting in a really big lag.
So, are there any design patterns that prevent recalling the whole helper every time? Or do I just need to break up my object?
Template.deliveries.helpers({
current_delivery: function() {
var delivery_id = Template.instance().data.current_delivery_id;
var delivery = Deliveries.findOne({'_id': delivery_id});
var project = Projects.findOne({'_id':Session.get('current_project_id')});
var secondary_profile_names = [];
if (Session.get('delivery_include_secondaries')) {
for (var n in project.delivery_profiles) {
if (project.delivery_profiles[n].name === delivery.delivery_profile) {
if (project.delivery_profiles[n].secondary_deliverables) {
secondary_profile_names = project.delivery_profiles[n].secondary_deliverables;
}
break;
}
}
}
$("#delivery-profile").val(delivery.delivery_profile);
var elements = $.map(delivery.elements, function(id, idx) {
i_el = InternalElements.findOne({'_id': id});
i_el.source_element = SourceElements.findOne({'_id':i_el.source_element});
if (secondary_profile_names) {
i_el.secondary_elements = InternalElements.find({
'source_element':i_el.source_element._id,
'name':{'$in': secondary_profile_names},
"$or": [{'is_primary':false}, {'is_primary': {'$exists':false}}]
},{
'sort':{'version':-1},
'limit':1
}).fetch();
} else {
i_el.secondary_elements = [];
}
return i_el;
});
delivery.elements = elements.sort(function(a,b) { return (a.shot_name > b.shot_name) - (a.shot_name < b.shot_name); });
return delivery;
},
});
A pattern I've used successfully is to cache the results of expensive computations in a local collection.
MyLocalCache = new Mongo.Collection();
I like to make objects in this collection 1:1 with the original object so I reuse the _id from the original along with any keys and values that don't require transformation then extend the object with the transformed values.
I’m having issues getting two dependant types of data from a PouchDB database.
I have a list of cars that I get like so:
localDB.query(function(doc) {
if (doc.type === ‘list’) {
emit(doc);
}
}, {include_docs : true}).then(function(response) {
console.log(“cars”, response);
// Save Cars List to app
for(var i = 0; i < response.rows.length; i++) {
addToCarsList(response.rows[i].id, response.rows[i].carNumber);
}
console.log(“Cars List: " + carsListToString());
return response;
}).then(function(listRecord) {
listRecord.rows.forEach(function(element, index){
console.log(index + ' -> ', element);
localDB.query(function(doc) {
console.log("filtering with carNb = " + element.carNb);
if (doc.type === 'defect' && doc.listId == getCurrentListId() && doc.carNb == element.carNb ) {
emit(doc);
}
}, {include_docs : false}).then(function(result){
console.log("defects", result);
}).catch(function(err){
console.log("an error has occurred", err);
});
});
}).catch(function(err) {
console.log('error', err);
});
Here's what happens. After getting the list of cars, then for each cars I would like to query the defects and store then in some arrays. Then when all that querying is done, I want to build the UI with the data saved.
But what's happening is that the forEach gets processed quickly and does not wait for the inner async'd localDb.query.
How can I query some documents based on an attribute from a parent query? I looked into promises in the PouchDB doc but I can't understand how to do it.
(please forget about curly quotes and possible lint errors, this code was anonymized by hand and ultra simplified)
The method you are looking for is Promise.all() (execute all promises and return when done).
However, your query is already pretty inefficient. It would be better to create a persistent index, otherwise it has to do a full database scan for every query() (!). You can read up on the PouchDB query guide for details.
I would recommend installing the pouchdb-upsert plugin and then doing:
// helper method
function createDesignDoc(name, mapFunction) {
var ddoc = {
_id: '_design/' + name,
views: {}
};
ddoc.views[name] = { map: mapFunction.toString() };
return ddoc;
}
localDB.putIfNotExists(createDesignDoc('my_index', function (doc) {
emit([doc.type, doc.listId, doc.carNb]);
})).then(function () {
// find all docs with type 'list'
return localDB.query('my_index', {
startkey: ['list'],
endkey: ['list', {}],
include_docs: true
});
}).then(function (response) {
console.log("cars", response);
// Save Cars List to app
for(var i = 0; i < response.rows.length; i++) {
addToCarsList(response.rows[i].id, response.rows[i].carNumber);
}
console.log("Cars List: " + carsListToString());
return response;
}).then(function (listRecord) {
return PouchDB.utils.Promise.all(listRecord.rows.map(function (row) {
// find all docs with the given type, listId, carNb
return localDB.query('my_index', {
key: ['defect', getCurrentListId(), row.doc.carNb],
include_docs: true
});
}));
}).then(function (finalResults) {
console.log(finalResults);
}).catch(function(err){
console.log("an error has occurred", err);
});
I'm using a few tricks here:
emit [doc.type, doc.listId, doc.carNb], which allows us to query by type or by type+listId+carNb.
when querying for just the type, we can do {startkey: ['list'], endkey: ['list', {}]}, which matches just those with the type "list" because {} is the "higher" than strings in CouchDB object collation order.
PouchDB.utils.Promise is a "hidden" API, but it's pretty safe to use if you ask me. It's unlikely we'll change it.
Edit Another option is to use the new pouchdb-find plugin, which offers a simplified query API designed to replace the existing map/reduce query() API.
Another approach would be to pull both the list docs and the defect docs down at the same time then merge them together using a reduce like method that will convert them into an array of objects:
{
_id: 1,
type: 'list',
...
defects: [{
type: 'defect'
listId: 1
...
}]
}
By pulling the list and the defects down in one call you save a several calls to the pouchdb query engine, but you do have to iterate through every result to build your collection of lists objects with and embedded array of defects.
// This is untested code so it may not work, but you should get the idea
var _ = require('underscore');
// order documents results by list then defect
var view = function (doc) {
if (doc.type === 'list') {
emit([doc._id, doc.carNumber, 1);
} else if (doc.type === 'defect') {
emit([doc.listId, doc.carNb, 2])
}
}
localDB.query(view, { include_docs: true })
.then(function(response) {
return _(response.rows)
.reduce(function(m, r) {
if (r.key[2] === 1) {
// initialize
r.doc.defects = [];
m.push(r.doc)
return m;
}
if (r.key[2] === 2) {
var list = _(m).last()
if (list._id === r.key[0] && list.carNumber === r.key[1]) {
list.defects.push(r.doc);
}
return m;
}
}, []);
})
.then(function(lists) {
// bind to UI
});
With couch, we found reducing calls to the couch engine to be more performant, but I don't know if this approach is better for PouchDB, but this should work as a solution, especially if you are wanting to embed several collections into one list document.