Add an attribute to every item in a DynamoDB table with millions of items - amazon-dynamodb

I am pretty new to AWS, but I need to add an attribute (and set the value to something) to every single item in a DynamoDB table.
I'm trying to write a generic script that will be able to do this to any table passed into it.
After a little bit of digging, it looks like I should be able to do this just by doing a scan of the table, getting the items, and then updating every single item.
My questions are:
(1) Does this seem like a reasonable approach?
(2) Is there an easier / better way to do this automatically? (a table-wide addition of an attribute to every item in the table?)

Yes it sounds reasonable, not ideal, but reasonable
Sure, there is always a better way, but this is an easy way, Here is a snippet I ran as a local JS file...
const AWS = require('aws-sdk');
const dynamoDb = new AWS.DynamoDB.DocumentClient({
region:'us-east-1'
});
async function main(event, context) {
let tableContents;
try{
//get items from dynamo
const params = {
TableName: `${YOUR_TABLE_NAME}`,
};
tableContents = await scanDB(params);
}catch(err){
console.log(err);
return err;
}
let calls = [];
tableContents.forEach(function(value){
let params = {
ExpressionAttributeValues: {
":newAttribute": false,
},
Key: {
"indexKey": value.indexKey
},
TableName: `${YOUR_TABLE_NAME}`,
UpdateExpression: "SET newAttribute = :newAttribute",
};
calls.push(dynamoDb.update(params).promise());
});
let response;
try{
response = await Promise.all(calls);
}catch(err){
console.log(err);
}
return response;
}
async function scanDB(params) {
let dynamoContents = [];
let items;
do{
items = await dynamoDb.scan(params).promise();
items.Items.forEach((item) => dynamoContents.push(item));
params.ExclusiveStartKey = items.LastEvaluatedKey;
}while(typeof items.LastEvaluatedKey != "undefined");
return dynamoContents;
};
main();
Good luck!

Related

Issue with dynamo db scan not returning results

New to Dynamo DB and have spent a bit of time searching Stackoverflow for my issue but not having any luck. I currently have a pretty default setup table have some data being populated in (confirmed by using the item explorer tab in the console). I was referencing the AWS Dynanmo SDK and also this post stackOverflow 44589967 to basically return the first item from the table. Does not matter which item just any item. So here is very basic lambda, and I did confirm permissions:
const AWS = require('aws-sdk');
const ddb = new AWS.DynamoDB.DocumentClient({region: 'us-east-1'});
exports.handler = async function(event, context) {
let result = await ddb.scan({
"TableName": "DynamoDB_Project",
"Limit": 1,
})
console.log(result);
}
I am getting null and wondering if its actually how I am storing the data
screen shot of table explorer
which is similar to my scan
await ddb.put({
"TableName": "DynamoDB_Project",
"DateTempHumid": {
"Date": event.date,
"Humidty": event.humidty,
"Temperature": event.temperature
},
})
Did you forget to add the .promise() function to the string of calls?
const AWS = require('aws-sdk');
const ddb = new AWS.DynamoDB.DocumentClient({region: 'us-east-1'});
exports.handler = async function(event, context) {
let result = await ddb.scan({
"TableName": "DynamoDB_Project",
"Limit": 1,
}).promise(); // HERE
console.log(result);
}
Hope that helps

Is this the best way to update all documents in a Firestore collection?

There are similar questions and answers out there, but none that seem concerned about concurrency or using transactions when iterating over the entire collection.
To ensure that I'm not reading one version of a document, updating it, then clobbering some change that happened since the read, I'd like to use transactions.
Firestore transactions are limited to 500 updates.
My approach is to use pagination, getting 500 document references at a time.
Then use those refs with getAll within a transaction to guarantee no concurrent modification can happen.
Is it OK/wise to pass 500 refs to getAll like this?
Is this approach efficient, from a performance and cost perspective?
Pseudo-code (TypeScript-like)
const pageSize = 500;
let lastSnapshot = null;
let count = 0;
do {
// Get up to 500 (pageSize) document references
let query = firestore.collection('myCollection').limit(pageSize);
if (lastSnapshot) {
myDataQuery = myDataQuery.startAfter(lastSnapshot);
}
const snapshots = await playerPlotQuery.get();
const refs = snapshots.docs.map(d => d.ref);
// Start a transaction
await firestore.runTransaction(async transaction => {
// Get all the documents again, this time in a transaction
const snapshots = await transaction.getAll(...refs);
for (const snapshot of snapshots) {
// Perform some update on each document
transaction.update(snapshot.ref,{someField: 0, anotherField: "foo"});
}
});
} while (refs.length === pageSize);
Because you are making use of Transaction#getAll(), I'm going to assume you are using the Node client for Firestore. This means you have access to Transaction#get() and can execute the query directly inside the transaction. This change alone cuts your document reads in half because you are no longer checking them twice.
const pageSize = 500;
const baseQuery = db.collection('myCollection').limit(pageSize); // highly recommend using orderBy here on something like a 'creationTime' property
let totalCount = 0, pageCount = 0;
do {
const pageQuery = lastSnapshot ? baseQuery.startAfter(lastSnapshot) : baseQuery;
totalCount += pageCount = await db.runTransaction(async (transaction) => {
const querySnapshot = await transaction.get(pageQuery);
querySnapshot.forEach(docSnap => {
transaction.update(docSnap.ref, { someField: 0, anotherField: "foo" });
lastSnapshot = docSnap;
});
return querySnapshot.size; // return the page count inside the runTransaction's Promise
});
} while (pageCount === pageSize);

How to get list of documents and not listen for changes in flutter cloud firestore?

My application is again fetching list of items from firestore whenever I make a sort locally.
Due to which I am losing my sorted list and getting the original list back again.
Essentially, I am looking for a .once() alternative as I used in firebase realtime db with JS.
fetchItemsFromDb().then((itemsFromDb) {
setState(() {
items = itemsFromDb;
isProcessed = true;
});
});
fetchItemsFromDb() async {
List<Item> items = [];
await Firestore.instance.collection('items').getDocuments().then((data) {
items = data.documents.map((DocumentSnapshot item) {
var i = item.data;
return Item(
i['itemName'],
int.parse(i['quantity']),
int.parse(i['price']),
i['veg'],
i['uid'],
i['timestamp'],
LatLng(i['location'].latitude, i['location'].longitude),
);
}).toList();
});
return items;
}
FetchItemsFromDB() should be working how you expect it to, it could be that the code calling the function:
fetchItemsFromDb().then((itemsFromDb) {
setState(() {
items = itemsFromDb;
isProcessed = true;
});
});
is being run again when you do not expect it. Does that code live in a build method? If so it will run anytime the widget it is in rebuilds, which depending on how you are doing your local sort may be happening. If you only need it to run once maybe add it to the initState() function for the widget.

Can Firestore update multiple documents matching a condition, using one query?

In other words, I'm trying to figure out what is the Firestore equivalent to this in SQL:
UPDATE table SET field = 'foo' WHERE <condition>`
Yes, I am asking how to update multiple documents, at once, but unlike the linked questions, I'm specifically asking how to do this in one shot, without reading anything into memory, because there's no need to do so when all you want is to set a flag on all documents matching a condition.
db.collection('table')
.where(...condition...)
.update({
field: 'foo',
});
is what I expected to work, CollectionReference doesn't have an .update method.
The
Transactions and Batched Writes documentation mentions transactions and batched writes. Transactions are out because "A transaction consists of any number of get() operations followed by any number of write operations" Batched writes are also not a solution because they work document-by-document.
With MongoDB, this would be
db.table.update(
{ /* where clause */ },
{ $set: { field: 'foo' } }
)
So, can Firestore update multiple documents with one query, the way SQL database or MongoDB work, i.e. without requiring a round-trip to the client for each document? If not, how can this be done efficiently?
Updating a document in Cloud Firestore requires knowings its ID. Cloud Firestore does not support the equivalent of SQL's update queries.
You will always have to do this in two steps:
Run a query with your conditions to determine the document IDs
Update the documents with individual updates, or with one or more batched writes.
Note that you only need the document ID from step 1. So you could run a query that only returns the IDs. This is not possible in the client-side SDKs, but can be done through the REST API and Admin SDKs as shown here: How to get a list of document IDs in a collection Cloud Firestore?
Frank's answer is actually a great one and does solve the issue.
But for those in a hurry maybe this snippet might help you:
const updateAllFromCollection = async (collectionName) => {
const firebase = require('firebase-admin')
const collection = firebase.firestore().collection(collectionName)
const newDocumentBody = {
message: 'hello world'
}
collection.where('message', '==', 'goodbye world').get().then(response => {
let batch = firebase.firestore().batch()
response.docs.forEach((doc) => {
const docRef = firebase.firestore().collection(collectionName).doc(doc.id)
batch.update(docRef, newDocumentBody)
})
batch.commit().then(() => {
console.log(`updated all documents inside ${collectionName}`)
})
})
}
Just change what's inside the where function that queries the data and the newDocumentBody which is what's getting changed on every document.
Also don't forget to call the function with the collection's name.
The simplest approach is this
const ORDER_ITEMS = firebase.firestore().collection('OrderItems')
ORDER_ITEMS.where('order', '==', 2)
.get()
.then(snapshots => {
if (snapshots.size > 0) {
snapshots.forEach(orderItem => {
ORDER_ITEMS.doc(orderItem.id).update({ status: 1 })
})
}
})
For Dart / Flutter user (editted from Renato Trombini Neto)
// CollectionReference collection = FirebaseFirestore.instance.collection('something');
// This collection can be a subcollection.
_updateAllFromCollection(CollectionReference collection) async {
var newDocumentBody = {"username": ''};
User firebaseUser = FirebaseAuth.instance.currentUser;
DocumentReference docRef;
var response = await collection.where('uid', isEqualTo: firebaseUser.uid).get();
var batch = FirebaseFirestore.instance.batch();
response.docs.forEach((doc) {
docRef = collection.doc(doc.id);
batch.update(docRef, newDocumentBody);
});
batch.commit().then((a) {
print('updated all documents inside Collection');
});
}
If anyone's looking for a Java solution:
public boolean bulkUpdate() {
try {
// see https://firebase.google.com/docs/firestore/quotas#writes_and_transactions
int writeBatchLimit = 500;
int totalUpdates = 0;
while (totalUpdates % writeBatchLimit == 0) {
WriteBatch writeBatch = this.firestoreDB.batch();
// the query goes here
List<QueryDocumentSnapshot> documentsInBatch =
this.firestoreDB.collection("student")
.whereEqualTo("graduated", false)
.limit(writeBatchLimit)
.get()
.get()
.getDocuments();
if (documentsInBatch.isEmpty()) {
break;
}
// what I want to change goes here
documentsInBatch.forEach(
document -> writeBatch.update(document.getReference(), "graduated", true));
writeBatch.commit().get();
totalUpdates += documentsInBatch.size();
}
System.out.println("Number of updates: " + totalUpdates);
} catch (Exception e) {
return false;
}
return true;
}
Combining the answers from Renato and David, plus async/await syntax for batch part. Also enclosing them a try/catch in case any promise fails:
const updateAllFromCollection = async (collectionName) => {
const firebase = require('firebase-admin');
const collection = firebase.firestore().collection(collectionName);
const newDocumentBody = { message: 'hello world' };
try {
const response = await collection.where('message', '==', 'goodbye world').get();
const batch = firebase.firestore().batch();
response.docs.forEach((doc) => {
batch.update(doc.ref, newDocumentBody);
});
await batch.commit(); //Done
console.log(`updated all documents inside ${collectionName}`);
} catch (err) {
console.error(err);
}
return;
}
I like some of the answers but I feel this is cleaner:
import * as admin from "firebase-admin";
const db = admin.firestore();
const updates = { status: "pending" }
await db
.collection("COLLECTION_NAME")
.where("status", "==", "open")
.get()
.then((snap) => {
let batch = db.batch();
snap.docs.forEach((doc) => {
const ref = doc.ref;
batch.update(ref, updates);
});
return batch.commit();
});
It uses batched updates and the "ref" from the doc.
If you have already gathered uids for updating collections, simply do these steps.
if(uids.length) {
for(let i = 0; i < uids.length; i++) {
await (db.collection("collectionName")
.doc(uids[i]))
.update({"fieldName": false});
};
};

React native function return nothing

I'm trying to do something simple here - connect to a sqlite database and return the number of records. In the code below I can output the len variable with console.log, but nothing gets returned. Am I missing something obvious? Thanks.
const db = SQLite.openDatabase({ name: 'favorites.db' });
export default class PlayerScreen extends React.Component {
fetch(){
console.log('fetching data from database');
var query = "SELECT * FROM items";
var params = [];
db.transaction((tx) => {
tx.executeSql(query,params, (tx, results) => {
var len = results.rows.length;
return len;
}, function(){
console.log('Profile: Something went wrong');
});
});
}
render() {
return <Text>{this.fetch()}</Text>
}
}
A lot of things are wrong here. Fetch is an asynchronous concept and React rendering is synchronous. You cannot use fetch or anything asynchronously inside the render method.
So, what should you do instead?
First, do the fetch in the componentDidMount of the component.
Also set an initial state in componentWillMount
componentWillMount() {
this.state = { length: 0 };
}
componentDidMount() {
// I use call here to make sure this.fetch gets the this context
this.fetch.call(this);
}
Secondly, attach the results to the innerState of the component in your fetch method
fetch() {
var query = "SELECT * FROM items";
var params = [];
db.transaction((tx) => {
tx.executeSql(query,params, (tx, results) => {
var len = results.rows.length;
// this.setState will trigger a re-render of the component
this.setState({ length: len });
}, function(){
console.log('Profile: Something went wrong');
});
});
}
Now, in the render method, you can use this.state.length to render the value.
render() {
return <Text>{this.state.length}</Text>
}
Hope this helps you out
Just putting this here for any other poor souls who this may help since this thread was the top result for me:
SQL isn't case sensitive, but React is. All my variables were lowercase, but my SQL tables used uppercase. So "SELECT key" actually returned "Key".
Therefore, rewriting the SQL query as "SELECT key as key" fixed it for me, since the first "key" isn't case sensitive, but the second one is.
Alternatively, just change the database headers to whatever case you'll be using in your program.

Resources