I want to factorize my code in Cloud Functions in order to improve readability and maintenance. The code below works but after waiting for all Promises to complete with Promises.all(), the code timeout.
The things I don't understand is that :
It works great and complete without timeout when toiletJsonObject["fields"]["adresse"] = formatAddress(toiletJsonObject["fields"]["adresse"]) is commented
If it works without the line above, the timeout should be due to the formatAddress() function. However, this function in not an async one and just return a string synchronously. Maybe that's what I misunderstand.
So my questions are :
How to correct my code to avoid timeout?
what's the best way to factorize code with custom functions that are only accessible inside the file and therefore does not need export ?
The entire code :
import * as functions from "firebase-functions";
import * as admin from "firebase-admin";
import fetch from "node-fetch";
admin.initializeApp();
const db = admin.firestore();
export const tempoCF = functions.firestore.document("/tempo/{docId}").onCreate(async () => {
console.log("onCreate")
const settings = { method: "Get" }
const metaUrl = "https://opendata.paris.fr/api/datasets/1.0/sanisettesparis/"
const toiletUpdateDateRef = db.collection('toilets').doc("updateDate")
try {
// Get meta data to check last update date
const metaResponse = await fetch(metaUrl, settings)
const metaJson = await metaResponse.json()
const metaUpdateDate = metaJson["metas"]["modified"]
const lastUpdatedDateDoc = await toiletUpdateDateRef.get()
if (!lastUpdatedDateDoc.exists) {
console.log("No existing date document, create one and add last update date : " + metaUpdateDate)
await fetchDataFromURL()
return toiletUpdateDateRef.set({ "lastUpdateDate": metaUpdateDate })
} else {
const lastUpdateDate = lastUpdatedDateDoc.data()["lastUpdateDate"]
// If date from meta data newer that saved date : get data and update
if (new Date(lastUpdateDate) < new Date(metaUpdateDate)) {
console.log("New data available, update database")
await fetchDataFromURL()
return toiletUpdateDateRef.set({ "lastUpdateDate": metaUpdateDate })
}
else {
console.log("No new data available, do nothing")
return null
}
}
}
catch (error) {
console.log(error);
return null;
}
}
);
async function fetchDataFromURL() {
const dataUrl = "https://opendata.paris.fr/api/records/1.0/search/?dataset=sanisettesparis&q=&rows=-1"
const settings = { method: "Get" }
try {
const response = await fetch(dataUrl, settings)
const json = await response.json()
const promises = []
console.log("fetch data and add toilets to collection")
json["records"].forEach(toiletJsonObject => {
delete toiletJsonObject["fields"]["geo_shape"]
toiletJsonObject["fields"]["adresse"] = formatAddress(toiletJsonObject["fields"]["adresse"])
console.log("after updating adresse field: " + toiletJsonObject["fields"].toString())
const p = db.collection("toilets").doc(toiletJsonObject["recordid"]).set(toiletJsonObject["fields"])
promises.push(p)
})
console.log("finished creating promises. Wait for all to complete")
return Promise.all(promises);
}
catch (error) {
console.log(error);
return null;
}
}
const linkWords = ["de", "des", "du", "le"]
const linkLetters = ["l", "d"]
const firstWordsAddress = ["face", "opposé", "au"]
const alwaysLowerCaseWords = ["ville", "rue"]
function formatAddress(address) {
let processedAddress = ""
if (address != null) {
//if (address.length <= 1) processedAddress = address.toUpperCase();
// Split string into list of words
var wordsList = address.split(' ')
.filter((word) => {
// If there is a word in front of the street number, don't use it
if (firstWordsAddress.includes(word.toLowerCase())) return false
// Else use it
return true
})
var capitalizedList = wordsList.map((word) => {
const lowerCaseWord = word.toLowerCase() //TOSTRING ?
// If current word is a link word, don't capitalize
if (linkWords.includes(lowerCaseWord))
return lowerCaseWord
// If current word is a link letter, add ' char
else if (linkLetters.includes(lowerCaseWord))
return lowerCaseWord + '\''
// If current word should always be in lower case, don't capitalize
else if (alwaysLowerCaseWords.includes(lowerCaseWord))
return word.toLowerCase() //TOSTRING
// Else, capitalize the word
return word[0].toUpperCase() + word.substr(1).toLowerCase()
});
// Always capitalize first word of the address
capitalizedList[0] = capitalizedList[0][0].toUpperCase() + capitalizedList[0].substr(1).toLowerCase()
processedAddress = capitalizedList.join(' ')
processedAddress = processedAddress.replace("\' ", "\'")
processedAddress = processedAddress.trim()
}
return processedAddress
}
Regarding the formatAddress() helper function you defined, there doesn't appear to be an issue with it in it's current form. It can happily run through the entire list of 644 addresses ~210 times per second.
Any timeouts are instead likely to be caused by performing so many database writes in quick succession. When running fetchDataFromURL(), you "spam" the Firestore server with a request for each toilet object you are uploading.
The best-practice approach would be to compile a Batched Write and then commit the result once you've finished processing the data.
As stated in that documentation:
A batched write can contain up to 500 operations. Each operation in the batch counts separately towards your Cloud Firestore usage. Within a write operation, field transforms like serverTimestamp, arrayUnion, and increment each count as an additional operation.
Note: The current list of field transforms includes serverTimestamp, arrayUnion, arrayRemove, and increment. Reference: FieldValue
Creating/deleting/writing a document to Firestore is considered "one operation". Because a field transform requires reading the document, then writing data to that document, it is counted as "two operations".
Because a single batched write is limited to 500 operations, you should split your data up into smaller batched writes so that each batch is less than this 500 operations limit. The easiest way to achieve this would be to use this MultiBatch class (included below) that I've updated from one of my old answers.
If the data you are writing to a Cloud Firestore document is just basic data, use one of multibatch.create(), multibatch.delete(), multibatch.set(), or multibatch.update(). Each time one of these is called, the internal operations counter is increased by 1.
If the data you are writing to Cloud Firestore contains any FieldValue
transforms, use one of multibatch.transformCreate(), multibatch.transformDelete(), multibatch.transformSet(), or multibatch.transformUpdate(). Each time one of these is called, the internal operations counter is increased by 2.
Once the internal counter exceeds 500, it automatically starts a new batched write and adds it to it's internal list.
When you've queued up all your data ready to send off to Firestore, call multibatch.commit().
console.log("Fetching data from third-party server...")
const response = await fetch(dataUrl, settings)
const json = await response.json()
console.log("Data obtained. Parsing as Firestore documents...")
const batch = new MultiBatch(db)
json["records"].forEach(toiletJsonObject => {
delete toiletJsonObject["fields"]["geo_shape"]
toiletJsonObject["fields"]["adresse"] = formatAddress(toiletJsonObject["fields"]["adresse"])
console.log("after updating adresse field: " + toiletJsonObject["fields"].toString())
batch.set(db.collection("toilets").doc(toiletJsonObject["recordid"]), toiletJsonObject["fields"])
})
console.log("Finished parsing. Committing data to Firestore...")
const results = await batch.commit() // see notes about MultiBatch#commit()
console.log("Finished data upload!")
return results;
import { firestore } from "firebase-admin";
/**
* Helper class to compile an expanding `firestore.WriteBatch`.
*
* Using an internal operations counter, this class will automatically start a
* new `firestore.WriteBatch` instance when it detects it has hit the operations
* limit of 500. Once prepared, you can commit the batches together.
*
* Note: `FieldValue` transform operations such as `serverTimestamp`,
* `arrayUnion`, `arrayRemove`, `increment` are counted as two operations. If
* your written data makes use of one of these, you should use the appropriate
* `transformCreate`, `transformSet` or `transformUpdate` method so that the
* internal counter is correctly increased by 2 (the normal versions only
* increase the counter by 1).
*
* If not sure, just use `delete`, `transformCreate`, `transformSet`, or
* `transformUpdate` functions for every operation as this will make sure you
* don't exceed the limit.
*
* #author Samuel Jones [MIT License] (#samthecodingman)
* #see https://stackoverflow.com/a/66692467/3068190
* #see https://firebase.google.com/docs/firestore/manage-data/transactions
* #see https://firebase.google.com/docs/reference/js/firebase.firestore.FieldValue
*/
export class MultiBatch {
constructor(dbRef) {
this.dbRef = dbRef;
this.committed = false;
this.currentBatch = this.dbRef.batch();
this.currentBatchOpCount = 0;
this.batches = [this.currentBatch];
}
_getCurrentBatch(count) {
if (this.committed) throw new Error("MultiBatch already committed.");
if (this.currentBatchOpCount + count > 500) {
// operation limit exceeded, start a new batch
this.currentBatch = this.dbRef.batch();
this.currentBatchOpCount = 0;
this.batches.push(this.currentBatch);
}
this.currentBatchOpCount += count;
return this.currentBatch;
}
/** Creates the document, fails if it exists. */
create(ref, data) {
this._getCurrentBatch(1).create(ref, data);
return this;
}
/**
* Creates the document, fails if it exists.
*
* Used for commands that contain serverTimestamp, arrayUnion, etc
*/
transformCreate(ref, data) {
this._getCurrentBatch(2).create(ref, data);
return this;
}
/** Writes the document, creating/overwriting/etc as applicable. */
set(ref, data, options = undefined) {
this._getCurrentBatch(1).set(ref, data, options);
return this;
}
/**
* Writes the document, creating/overwriting/etc as applicable.
*
* Used for commands that contain serverTimestamp, arrayUnion, etc
*/
transformSet(ref, data, options = undefined) {
this._getCurrentBatch(2).set(ref, data, options);
return this;
}
/** Merges data into the document, failing if the document doesn't exist. */
update(ref, data, ...fieldsOrPrecondition) {
this._getCurrentBatch(1).update(ref, data, ...fieldsOrPrecondition);
return this;
}
/**
* Merges data into the document, failing if the document doesn't exist.
*
* Used for commands that contain serverTimestamp, arrayUnion, etc
*/
transformUpdate(ref, data, ...fieldsOrPrecondition) {
this._getCurrentBatch(2).update(ref, data, ...fieldsOrPrecondition);
return this;
}
/** Used when for basic update operations */
delete(ref) {
this._getCurrentBatch(1).delete(ref);
return this;
}
/**
*
* Commits all of the batches to Firestore.
*
* Note: Unlike normal batch operations, this may cause one or more atomic
* writes. One batch may succeed where others fail. By default, if any batch
* fails, it will fail the whole promise. This can be suppressed by passing in
* a truthy value as the first argument and checking the results returned by
* this method.
*
* #param {boolean} [suppressErrors=false] Whether to suppress errors on a
* per-batch basis.
* #return {firestore.WriteResult[]} array containing an array of
* `WriteResult` objects (and error-batch pairs if `suppressErrors=true`),
* for each batch.
*/
commit(suppressErrors = false) {
this.committed = true;
const mapCallback = suppressErrors
? (batch) => batch.commit().catch((error) => ({ error, batch }))
: (batch) => batch.commit();
return Promise.all(this.batches.map(mapCallback));
}
}
Related
Recently I have been working a lot with Cosmos and ran in to an issue when looking at deleting documents.
I need to delete around ~40 million documents in my Cosmos Container, I've looked around quite a bit and found a few options of which i have tried. two of the fastest of which I've tried are using a stored procedure within cosmos to delete records and using a bulk executor.
Both of these options have given subpar results compared to what I am looking for. I believe this should be obtainable within a couple hours but at the moment I am getting performance of around 1 hour per million recordsT
the two methods I used can also be seen here:
Stack Overflow Post on Document Deletion
My documents are about 35 keys long where half are string values and the other half are float/integer values, if that matters, and there are around 100k records per partition.
Here is are the two examples that I am using to attempt the deletion:
This first one is using C# and the documentation that helped me with this is here:
GitHub Documentation azure-cosmosdb-bulkexecutor-dotnet-getting-started
using System;
using System.Collections.Generic;
using System.Configuration;
using System.Diagnostics;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Azure.Documents;
using Microsoft.Azure.Documents.Client;
using Microsoft.Azure.CosmosDB.BulkExecutor;
using Microsoft.Azure.CosmosDB.BulkExecutor.BulkImport;
using Microsoft.Azure.CosmosDB.BulkExecutor.BulkDelete;
namespace BulkDeleteSample
{
class Program
{
private static readonly string EndpointUrl = "xxxx";
private static readonly string AuthorizationKey = "xxxx";
private static readonly string DatabaseName = "xxxx";
private static readonly string CollectionName = "xxxx";
static ConnectionPolicy connectionPolicy = new ConnectionPolicy
{
ConnectionMode = ConnectionMode.Direct,
ConnectionProtocol = Protocol.Tcp
};
static async Task Main(string[] args)
{
DocumentClient client = new DocumentClient(new Uri(EndpointUrl), AuthorizationKey, connectionPolicy);
DocumentCollection dataCollection = GetCollectionIfExists(client, DatabaseName, CollectionName);
// Set retry options high during initialization (default values).
client.ConnectionPolicy.RetryOptions.MaxRetryWaitTimeInSeconds = 30;
client.ConnectionPolicy.RetryOptions.MaxRetryAttemptsOnThrottledRequests = 9;
BulkExecutor bulkExecutor = new BulkExecutor(client, dataCollection);
await bulkExecutor.InitializeAsync();
// Set retries to 0 to pass complete control to bulk executor.
client.ConnectionPolicy.RetryOptions.MaxRetryWaitTimeInSeconds = 0;
client.ConnectionPolicy.RetryOptions.MaxRetryAttemptsOnThrottledRequests = 0;
List<Tuple<string, string>> pkIdTuplesToDelete = new List<Tuple<string, string>>();
for (int i = 0; i < 99999; i++)
{
pkIdTuplesToDelete.Add(new Tuple<string, string>("1", i.ToString()));
}
BulkDeleteResponse bulkDeleteResponse = await bulkExecutor.BulkDeleteAsync(pkIdTuplesToDelete);
}
static DocumentCollection GetCollectionIfExists(DocumentClient client, string databaseName, string collectionName)
{
return client.CreateDocumentCollectionQuery(UriFactory.CreateDatabaseUri(databaseName))
.Where(c => c.Id == collectionName).AsEnumerable().FirstOrDefault();
}
}
}
The second one is using a stored procedure I found which delete data from a given partition using a query, of which I am running via a python notebook.
Here is the stored procedure:
/**
* A Cosmos DB stored procedure that bulk deletes documents for a given query.
* Note: You may need to execute this sproc multiple times (depending whether the sproc is able to delete every document within the execution timeout limit).
*
* #function
* #param {string} query - A query that provides the documents to be deleted (e.g. "SELECT c._self FROM c WHERE c.founded_year = 2008"). Note: For best performance, reduce the # of properties returned per document in the query to only what's required (e.g. prefer SELECT c._self over SELECT * )
* #returns {Object.<number, boolean>} Returns an object with the two properties:
* deleted - contains a count of documents deleted
* continuation - a boolean whether you should execute the sproc again (true if there are more documents to delete; false otherwise).
*/
function bulkDeleteSproc(query) {
var collection = getContext().getCollection();
var collectionLink = collection.getSelfLink();
var response = getContext().getResponse();
var responseBody = {
deleted: 0,
continuation: true
};
// Validate input.
if (!query) throw new Error("The query is undefined or null.");
tryQueryAndDelete();
// Recursively runs the query w/ support for continuation tokens.
// Calls tryDelete(documents) as soon as the query returns documents.
function tryQueryAndDelete(continuation) {
var requestOptions = {continuation: continuation};
var isAccepted = collection.queryDocuments(collectionLink, query, requestOptions, function (err, retrievedDocs, responseOptions) {
if (err) throw err;
if (retrievedDocs.length > 0) {
// Begin deleting documents as soon as documents are returned form the query results.
// tryDelete() resumes querying after deleting; no need to page through continuation tokens.
// - this is to prioritize writes over reads given timeout constraints.
tryDelete(retrievedDocs);
} else if (responseOptions.continuation) {
// Else if the query came back empty, but with a continuation token; repeat the query w/ the token.
tryQueryAndDelete(responseOptions.continuation);
} else {
// Else if there are no more documents and no continuation token - we are finished deleting documents.
responseBody.continuation = false;
response.setBody(responseBody);
}
});
// If we hit execution bounds - return continuation: true.
if (!isAccepted) {
response.setBody(responseBody);
}
}
// Recursively deletes documents passed in as an array argument.
// Attempts to query for more on empty array.
function tryDelete(documents) {
if (documents.length > 0) {
// Delete the first document in the array.
var isAccepted = collection.deleteDocument(documents[0]._self, {}, function (err, responseOptions) {
if (err) throw err;
responseBody.deleted++;
documents.shift();
// Delete the next document in the array.
tryDelete(documents);
});
// If we hit execution bounds - return continuation: true.
if (!isAccepted) {
response.setBody(responseBody);
}
} else {
// If the document array is empty, query for more documents.
tryQueryAndDelete();
}
}
}
I'm not sure if I am doing anything wrong or it the performance just isn't there with cosmos but I'm finding it quite difficult to achieve what I'm looking for, any advice is greatly appreciated.
I want to perform a compound query in firestore where I would like to get all documents with field bloodgroup equal to A+ and with field createdBy not equal to email. This email is that of the logged in user. When I perform the query I get NullPointerException. How to perform the query correctly 021-07-24 19:50:24.746 17550-17550/com.example.bloodbankcompany E/AndroidRuntime: FATAL EXCEPTION: main Process: com.example.bloodbankcompany, PID: 17550 java.lang.NullPointerExceptionatcom.example.bloodbankcompany.UserlistActivity$EventChangeListener3$1.onEvent(UserlistActivity.kt:217) I am storing the document snapshot inside the userArrayList array. Without the whereNotEqualTo query I am getting output where my documents get listed in recyclerview.
private fun EventChangeListener2(){
val sharedPreferences1 = getSharedPreferences("email", Context.MODE_PRIVATE)
val email: String? = sharedPreferences1.getString("email","null")?.trim()
Toast.makeText(this, "ssrae$email", Toast.LENGTH_SHORT ).show()
mFireStore.collection("applicationForm").whereNotEqualTo("createdBy",email).whereEqualTo("bloodgroup","A+").addSnapshotListener(object : EventListener<QuerySnapshot>{
override fun onEvent(value: QuerySnapshot?, error: FirebaseFirestoreException?) {
if (error!= null){
Log.e("firestore error", error.message.toString())
}
for(dc: DocumentChange in value?.documentChanges!!){
if (dc.type== DocumentChange.Type.ADDED){
userArrayList.add(dc.document.toObject(User1::class.java))
var number=userArrayList
var number1 =userArrayList
}
// Toast.makeText(applicationContext,userArrayList.toString(), Toast.LENGTH_SHORT).show()
}
myAdapter.notifyDataSetChanged()
}
})
}
Well, I have edited a little bit of your code, if it still doesn't work add a comment.
Also, an explanation about changes is commented below.
private fun EventChangeListener2() {
val sharedPreferences1 = getSharedPreferences("email", Context.MODE_PRIVATE)
val email: String? = sharedPreferences1.getString("email", "null")?.trim()
Log.d("firestore email", email.toString())
Toast.makeText(this, "ssrae$email", Toast.LENGTH_SHORT).show()
// try and catch will avoid your app to crash.
try {
//ref
var ref = mFireStore.collection("applicationForm")
.whereEqualTo("bloodgroup", "A+")
/**
* I believe since your email is of type nullable and there may be
* maybe a chance that email is null and is given to whereNotEqualTo
* I am just making an assumption here since I don't know what you
* recieve from sharedPreferences and whether it is null or not
*
* So, what I have done here is,
* firstly, I have split the firestore call into 2 parts and
* Secondly, I have a null-check for email, if it is
* not-null ref will also include this query
*
*/
//null Check for email
if (email != null) ref = ref.whereNotEqualTo("createdBy", email)
// Snapshot Listener
ref.addSnapshotListener(object : EventListener<QuerySnapshot> {
override fun onEvent(value: QuerySnapshot?, error: FirebaseFirestoreException?) {
if (error != null) {
Log.e("firestore error", error.message.toString())
}
for (dc: DocumentChange in value?.documentChanges!!) {
if (dc.type == DocumentChange.Type.ADDED) {
userArrayList.add(dc.document.toObject(User1::class.java))
var number = userArrayList
var number1 = userArrayList
}
// Toast.makeText(applicationContext,userArrayList.toString(), Toast.LENGTH_SHORT).show()
}
myAdapter.notifyDataSetChanged()
}
})
} catch (e: Exception) {
Log.e("firestore error", "Error", e)
}
}
EDIT:
According to firebase docs,
https://firebase.google.com/docs/firestore/query-data/indexing#exemptions
If you attempt a compound query with a range clause that doesn't map to an existing index, you receive an error. The error message includes a direct link to create the missing index in the Firebase console.
I'm implementing pagination for my Flutter app with Firestore and I am running into a design issue.
I'm using services class to abstract database operation from the business logic of my app through data model class like so:
UI <- business logic (riverpod) <- data model class <- stateless firestore service
This works great as it follows the separation of concerns principles.
However, in the Firestore library, the only way to implement pagination is to save the last DocumentSnapshot to reference it in the next query using startAfterDocument(). This means, as my database services are stateless, I would need to save this DocumentSnapshot in my business logic code, who should in principle be completely abstracted from Firestore.
My first instinct would be to reconstruct a DocumentSnapshot from my data model class inside the service and use that for the pagination, but I would not be able to reconstruct it completely so I wonder if that would be enough.
Has anyone run into this issue? How did you solve it?
Cheers!
I stumbled upon the exact same issue, even though I was using Bloc instead of Riverpod.
I wrote a whole article on that, in order to support also live updates to the list and allowing infinite scrolling: ARTICLE ON MEDIUM
My approach was to order the query by name and id (for example), and using startAfter instead of startAfterDocument.
For example:
import 'package:cloud_firestore/cloud_firestore.dart';
import 'package:infite_firestore_list/domain/list_item_entity.dart';
import 'package:infite_firestore_list/domain/item_repository.dart';
class FirebaseItemRepository implements ItemRepository {
final _itemsCollection = FirebaseFirestore.instance.collection('items');
#override
Future<Stream<List<ListItem>>> getItems({
String startAfterName = '',
String startAfterId = '',
int paginationSize = 10,
}) async {
return _itemsCollection
.orderBy("name")
.orderBy(FieldPath.documentId)
.startAfter([startAfterName, startAfterId])
.limit(paginationSize)
.snapshots()
.map((querySnapshot) => querySnapshot.docs.map((doc) {
return ListItemDataModel.fromFirestoreDocument(doc).toDomain();
}).toList());
}
}
in this way in your logic you only have to use id and name or whatever fields you wish to use, for example a date.
If you use a combination of multiple orderBy, the first time you run the query, Firebase may ask you to build the index with a link that will appear in the logs.
The drawback of this approach is that it only works if you are sure that the fields you are using in the orderBy are uniques. In fact, if for example you sort by date, if two fields have the same date and you use startAfter that date (first item), you may skip the second item with the same date...
In my example, the startAfterId doesn't seem useful, but in the usecase I had, it solved some edgecases I stumbled upon.
Alternative
An alternative I thought but that I personally didn't like (hence I did not mention it in my article) could be to store an array of the snapshots of the last documents of each page in the repository itself.
Than use the id from the logic domain to request a new page and make the correspondance id <--> snapshot in the repository itself.
This approach could be interesting if you are expecting a finite amount of pages and hence a controlled array in your repository singleton, otherwise it smell memory leaking and that's why I personally do not like this approach to stay as general as possible.
The very definition of paging (you are at one page; you go to the next page) is Stateful, so attempting to do it "stateless" has no meaning.
I don't work in flutter, but in JS/React I built the following class that returns an OBJECT that has the PageForward/PageBack methods, and properties to hold the required data/state:
export class PaginateFetch {
/**
* constructs an object to paginate through large Firestore Tables
* #param {string} table a properly formatted string representing the requested collection
* - always an ODD number of elements
* #param {array} filterArray an (optional) 3xn array of filter(i.e. "where") conditions
* The array is assumed to be sorted in the correct order -
* i.e. filterArray[0] is added first; filterArray[length-1] last
* returns data as an array of objects (not dissimilar to Redux State objects)
* with both the documentID and documentReference added as fields.
* #param {array} sortArray a 2xn array of sort (i.e. "orderBy") conditions
* #param {?string} refPath (optional) allows "table" parameter to reference a sub-collection
* of an existing document reference (I use a LOT of structured collections)
* #param {number} limit page size
* #category Paginator
*/
constructor(
table,
filterArray = null,
sortArray = null,
refPath = null,
limit = PAGINATE_DEFAULT
) {
const db = dbReference(refPath);
/**
* current limit of query results
* #type {number}
*/
this.limit = limit;
/**
* underlying query for fetch
* #private
* #type {Query}
*/
this.Query = sortQuery(
filterQuery(db.collection(table), filterArray),
sortArray
);
/**
* current status of pagination
* #type {PagingStatus}
* -1 pending; 0 uninitialized; 1 updated;
*/
this.status = PAGINATE_INIT;
}
/**
* executes the query again to fetch the next set of records
* #async
* #method
* #returns {Promise<RecordArray>} returns an array of record - the next page
*/
PageForward() {
const runQuery = this.snapshot
? this.Query.startAfter(last(this.snapshot.docs))
: this.Query;
this.status = PAGINATE_PENDING;
return runQuery
.limit(this.limit)
.get()
.then((QuerySnapshot) => {
this.status = PAGINATE_UPDATED;
//*IF* documents (i.e. haven't gone beyond start)
if (!QuerySnapshot.empty) {
//then update document set, and execute callback
//return Promise.resolve(QuerySnapshot);
this.snapshot = QuerySnapshot;
}
return Promise.resolve(RecordsFromSnapshot(this.snapshot));
});
}
/**
* executes the query again to fetch the previous set of records
* #async
* #method
* #returns {Promise<RecordArray>} returns an array of record - the next page
*/
PageBack() {
const runQuery = this.snapshot
? this.Query.endBefore(this.snapshot.docs[0])
: this.Query;
this.status = PAGINATE_PENDING;
return runQuery
.limitToLast(this.limit)
.get()
.then((QuerySnapshot) => {
this.status = PAGINATE_UPDATED;
//*IF* documents (i.e. haven't gone back ebfore start)
if (!QuerySnapshot.empty) {
//then update document set, and execute callback
this.snapshot = QuerySnapshot;
}
return Promise.resolve(RecordsFromSnapshot(this.snapshot));
});
}
}
/**
* #private
* #typedef {Object} filterObject
* #property {!String} fieldRef
* #property {!String} opStr
* #property {any} value
*/
/**
* ----------------------------------------------------------------------
* #private
* #function filterQuery
* builds and returns a query built from an array of filter (i.e. "where")
* conditions
* #param {Query} query collectionReference or Query to build filter upong
* #param {?filterObject} [filterArray] an (optional) 3xn array of filter(i.e. "where") conditions
* #returns {Query} Firestore Query object
*/
const filterQuery = (query, filterArray = null) => {
return filterArray
? filterArray.reduce((accQuery, filter) => {
return accQuery.where(filter.fieldRef, filter.opStr, filter.value);
}, query)
: query;
};
/**
* #private
* #typedef {Object} sortObject
* #property {!String} fieldRef
* #property {!String} dirStr
*/
/**
* ----------------------------------------------------------------------
* #private
* #function sortQuery
* builds and returns a query built from an array of filter (i.e. "where")
* conditions
* #param {Query} query collectionReference or Query to build filter upong
* #param {?sortObject} [sortArray] an (optional) 2xn array of sort (i.e. "orderBy") conditions
* #returns Firestore Query object
*/
const sortQuery = (query, sortArray = null) => {
return sortArray
? sortArray.reduce((accQuery, sortEntry) => {
return accQuery.orderBy(sortEntry.fieldRef, sortEntry.dirStr || "asc");
//note "||" - if dirStr is not present(i.e. falsy) default to "asc"
}, query)
: query;
};
If you are using or can use any orderBy queries. You can use startAfter with your last queries value. For example if you orderBy date you can use last date for your next pagination query.
startAfter method reference
I have the following DynamoDB query which returns the first record with the hash apple and time-stamp less than some_timestamp.
Map<String, Condition> keyConditions = newHashMap();
keyConditions.put("HASH", new Condition().
withComparisonOperator(EQ).
withAttributeValueList(new AttributeValue().withS("apple")))
);
keyConditions.put("TIMESTAMP", new Condition().
withComparisonOperator(LE).
withAttributeValueList(new AttributeValue().withN(some_timestamp)))
);
QueryResult queryResult = dynamoDBClient.query(
new QueryRequest().
withTableName("TABLE").
withKeyConditions(keyConditions).
withLimit(1).
withScanIndexForward(SCAN_INDEX_FORWARD)
);
I need to execute many queries of this kind and so my question: is it possible to batch execute these queries? Something like the following API.
Map<String, Condition> keyConditions = newHashMap();
keyConditions.put("HASH", new Condition().
withComparisonOperator(EQ).
withAttributeValueList(new AttributeValue().withS("apple")))
);
keyConditions.put("TIMESTAMP", new Condition().
withComparisonOperator(LE).
withAttributeValueList(new AttributeValue().withN(some_timestamp)))
);
QueryRequest one = new QueryRequest().
withTableName("TABLE").
withKeyConditions(keyConditions).
withLimit(1).
withScanIndexForward(SCAN_INDEX_FORWARD);
keyConditions = newHashMap();
keyConditions.put("HASH", new Condition().
withComparisonOperator(EQ).
withAttributeValueList(new AttributeValue().withS("pear")))
);
keyConditions.put("TIMESTAMP", new Condition().
withComparisonOperator(LE).
withAttributeValueList(new AttributeValue().withN(some_other_timestamp)))
);
QueryRequest two = new QueryRequest().
withTableName("TABLE").
withKeyConditions(keyConditions).
withLimit(1).
withScanIndexForward(SCAN_INDEX_FORWARD)
ArrayList<String> queryRequests = new ArrayList<String>() {{
add(one);
add(two);
}};
List<QueryResult> queryResults = dynamoDBClient.query(queryRequests);
From a very similar question in the AWS forums here:
DynamoDB's Query API only supports a single "use" of the index in the query operation, and as a result, the "hash" of the index you're querying has to be specified as an EQ condition. DynamoDB does not currently have any kind of "batch query" API, so unfortunately what you're looking for is not possible today in a single API call. If these were GetItem requests (not suitable for your use case though), you could issue a BatchGetItem request.
In the meantime, since it looks like you're using Java, my recommendation would be to use threads to issue multiple query requests in parallel. Here's some sample code that accomplishes this, but you'll want to consider how you want your application to handle pagination / partial results, and errors:
/**
* Simulate a "Batch Query" operation in DynamoDB by querying an index for
* multiple hash keys
*
* Resulting list may be incomplete if any queries time out. Returns a list of
* QueryResult so that LastEvaluatedKeys can be followed. A better implementation
* would answer the case where some queries fail, deal with pagination (and
* Limit), have configurable timeouts. One improvement on this end would be
* to make a simple immutable bean that contains a query result or exception,
* as well as the associated request. Maybe it could even be called back with
* a previous list for pagination.
*
* #param hashKeyValues (you'll also need table name / index name)
* #return a list of query results for the queries that succeeded
* #throws InterruptedException
*/
public List<QueryResult> queryAll(String... hashKeyValues)
throws InterruptedException {
// initialize accordingly
int timeout = 2 * 1000;
ExecutorService executorService = Executors.newFixedThreadPool(10);
final List<QueryResult> results =
new ArrayList<QueryResult>(hashKeyValues.length);
final CountDownLatch latch =
new CountDownLatch(hashKeyValues.length);
// Loop through the hash key values to "OR" in the final list of results
for (final String hashKey : hashKeyValues) {
executorService.submit(new Runnable() {
#Override
public void run() {
try {
// fill in parameters
QueryResult result = dynamodb.query(new QueryRequest()
.withTableName("MultiQueryExample")
.addKeyConditionsEntry("City", new Condition()
.withComparisonOperator("EQ")
.withAttributeValueList(new AttributeValue(hashKey))));
// one of many flavors of dealing with concurrency
synchronized (results) {
results.add(result);
}
} catch (Throwable t) {
// Log and handle errors
t.printStackTrace();
} finally {
latch.countDown();
}
}
});
}
// Wait for all queries to finish or time out
latch.await(timeout, TimeUnit.MILLISECONDS);
// return a copy to prevent concurrent modification of
// the list in the face of timeouts
synchronized (results) {
return new ArrayList<QueryResult>(results);
}
}
I'm rusty with delegates and closures in JavaScript, and think I came across a situation where I'd like to try to use one or both.
I have a web app that behaves a lot like a forms app, with fields hitting a server to change data on every onBlur or onChange (depending on the form element). I use ASP.NET 3.5's Web Services and jQuery to do most of the work.
What you need to know for the example:
isBlocking() is a simple mechanism to form some functions to be synchronous (like a mutex)
isDirty(el) checks to make sure the value of the element actually changed before wasting a call to the server
Agent() returns a singleton instance of the WebService proxy class
getApplicationState() passes a base-64 encoded string to the web service. This string represents the state of the application -- the value of the element and the state are passed to a service that does some calculations. The onSuccess function of the web service call returns the new state, which the client processes and updates the entire screen.
waitForCallback() sets a flag that isBlocking() checks for the mutex
Here's an example of one of about 50 very similar functions:
function Field1_Changed(el) {
if (isBlocking()) return false;
if (isDirty(el)) {
Agent().Field1_Changed($j(el).val(), getApplicationState());
waitForCallback();
}
}
The big problem is that the Agent().Field_X_Changed methods can accept a different number of parameters, but it's usually just the value and the state. So, writing these functions gets repetitive. I have done this so far to try out using delegates:
function Field_Changed(el, updateFunction, checkForDirty) {
if (isBlocking()) return false;
var isDirty = true; // assume true
if (checkForDirty === true) {
isDirty = IsDirty(el);
}
if (isDirty) {
updateFunction(el);
waitForCallback();
}
}
function Field1_Changed(el) {
Field_Changed(el, function(el) {
Agent().Field1_Changed($j(el).val(), getTransactionState());
}, true);
}
This is ok, but sometimes I could have many parameters:
...
Agent().Field2_Changed($j(el).val(), index, count, getApplicationState());
....
What I'd ultimately like to do is make one-linen calls, something like this (notice no getTransactionState() calls -- I would like that automated somehow):
// Typical case: 1 value parameter
function Field1_Changed(el) {
Field_Changed(el, delegate(Agent().Field1_Changed, $j(el).val()), true);
}
// Rare case: multiple value parameters
function Field2_Changed(el, index, count) {
Field_Changed(el, delegate(Agent().Field1_Changed, $j(el).val(), index, count), true);
}
function Field_Changed(el, theDelegate, checkIsDirty) {
???
}
function delegate(method) {
/* create the change delegate */
???
}
Ok, my first question is: Is this all worth it? Is this harder to read but easier to maintain or the other way around? This is a pretty good undertaking, so I may end up putting a bounty on this one, but I'd appreciate any help you could offer. Thanks!
UPDATE
So, I've accepted an answer based on the fact that it pointed me in the right direction. I thought I'd come back and post my solution so that others who may just be starting out with delegates have something to model from. I'm also posting it to see if anybody wants to try an optimize it or make suggestions. Here's the common Field_Changed() method I came up with, with checkForDirty and omitState being optional parameters:
function Field_Changed(el, args, delegate, checkForDirty, omitState) {
if (isBlocking()) return false;
if (!$j.isArray(args) || args.length == 0) {
alert('The "args" parameter in Field_Changed() must be an array.');
return false;
}
checkForDirty = checkForDirty || true; // assume true if not passed
var isDirty = true; // assume true for updates that don't require this check
if (checkForDirty === true) {
isDirty = fieldIsDirty(el);
}
if (isDirty) {
omitState = omitState || false; // assume false if not passed
if (!omitState) {
var state = getTransactionState();
args.push(state);
}
delegate.apply(this, args);
waitForCallback();
}
}
It handles everything I need it to (check for dirty, applying the application state when I need it to, and forcing synchronous webservice calls. I use it like this:
function TransactionAmount_Changed(el) {
Field_Changed(el, [cleanDigits($j(el).val())], Agent().TransactionAmount_Changed, true);
}
cleanDigits strips out junk characters the user may have tried to type in. So, thanks to everyone, and happy coding!
OK, few things:
Delegates are extremely simple in javascript since functions are first class members.
Function.apply lets you call a function with an array of arguments.
So you can write it this way
function Field_Changed(delegate, args)
{
if (isBlocking()) return false;
if (isDirty(args[0])) { //args[0] is el
delegate.apply(this, args);
waitForCallback();
}
}
And call it as:
Field_Changed(Agent().Field2_Changed, [el, getApplicationState(), whatever...]);
I have been using the following utility function that I wrote a long time ago:
/**
* #classDescription This class contains different utility functions
*/
function Utils()
{}
/**
* This method returns a delegate function closure that will call
* targetMethod on targetObject with specified arguments and with
* arguments specified by the caller of this delegate
*
* #param {Object} targetObj - the object to call the method on
* #param {Object} targetMethod - the method to call on the object
* #param {Object} [arg1] - optional argument 1
* #param {Object} [arg2] - optional argument 2
* #param {Object} [arg3] - optional argument 3
*/
Utils.createDelegate = function( targetObj, targetMethod, arg1, arg2, arg3 )
{
// Create an array containing the arguments
var initArgs = new Array();
// Skip the first two arguments as they are the target object and method
for( var i = 2; i < arguments.length; ++i )
{
initArgs.push( arguments[i] );
}
// Return the closure
return function()
{
// Add the initial arguments of the delegate
var args = initArgs.slice(0);
// Add the actual arguments specified by the call to this list
for( var i = 0; i < arguments.length; ++i )
{
args.push( arguments[i] );
}
return targetMethod.apply( targetObj, args );
};
}
So, in your example, I would replace
function Field1_Changed(el) {
Field_Changed(el, delegate(Agent().Field1_Changed, $j(el).val()), true);
}
With something along the lines
function Field1_Changed(el) {
Field_Changed(el, Utils.createDelegate(Agent(), Agent().Field1_Changed, $j(el).val()), true);
}
Then, inside of Agent().FieldX_Changed I would manually call getApplicationState() (and encapsulate that logic into a generic method to process field changes that all of the Agent().FieldX_Changed methods would internally call).
Closures and delegates in JavaScript:
http://www.terrainformatica.com/2006/08/delegates-in-javascript/
http://www.terrainformatica.com/2006/08/delegates-in-javascript-now-with-parameters/