waitForResponse does not work in the cluster.tasks callback - web-scraping

I need to open 20 pages parallelly and click on a button then wait for a response after that get the data from a tag. and my code is:
async function getPageData(links) {
return new Promise(async (resolve, reject) => {
try {
const cluster = await Cluster.launch({
concurrency: Cluster.CONCURRENCY_PAGE,
maxConcurrency: 200,
monitor: true,
});
let allData = [];
await cluster.task(async function getData({ page, data: url }) {
await page.goto(url, {
waitUntil: 'networkidle2',
});
const buttonQuery = 'button[role=tab]:first-child';
const buttonElement = await page.waitForSelector(buttonQuery);
await buttonElement.click(buttonElement);
await page.waitForResponse('https://XXX'); // the problem is here
const data = await page.evaluate(getList);
const [oscillators, summary, movingAverage] = data;
allData.push({ oscillators, summary, movingAverage });
});
links.map(async function addQueue(link) {
cluster.queue(link);
});
await cluster.idle();
await cluster.close();
resolve(allData);
} catch (e) {
reject(e);
}
});
but it just work for the first time and ignore the rest of the tasks. but when I remove page.waitForResponse() the all tasks will be run as expected.
How can I make all tasks wait for their response then extract the data?

Related

Can i dispatch many actions in getServersideprops?

In my social media app in Home page i want to dispatch 3 actions from my api:
posts , users , userDetails
But this may show an error(500) on vercel because the request takes a lot of time to get all these data.
vercel Error
this error will not appear again after refreshing the page !!!
i think that's because the request takes a lot of time to get all the data.
-> getServersideProps Code
export const getServerSideProps = wrapper.getServerSideProps(
store => async (context) =>
{
const {req} = context
const session = await getSession({ req });
await store.dispatch(fetchPostsAction());
await store.dispatch(fetchUsersAction(4));
await store.dispatch(LoggedInUserAction({email:session.user.email}));
})
-> fetchPostsAction Code
"post/list",
async (_, { rejectWithValue, getState, dispatch }) => {
try
{
let link = `${URL}/api/posts`;
const { data } = await axios.get(link,{
headers: { "Accept-Encoding": "gzip,deflate,compress" }
});
console.log("#2 got the data",data)
return data;
} catch (error) {
if (!error?.response) throw error;
return rejectWithValue(error?.response?.data);
}
}
);
-> extraReducer Code
builder.addCase(createpostAction.pending, (state, action) => {
state.createPostLoading = true;
});
builder.addCase(createpostAction.fulfilled, (state, action) => {
state.postLists = [...state.postLists, action.payload.post].sort((a, b) => b.createdAt > a.createdAt ? 1 : -1)
state.createPostLoading = false;
state.isCreated = true;
state.appErr = null;
state.serverErr = null;
});
builder.addCase(createpostAction.rejected, (state, action) => {
state.createPostLoading = false;
state.appErr =
action?.payload?.message || action?.payload?.error?.message;
state.serverErr = action?.error?.message;
});
-> get posts from api Code
handler.get(async (req, res) =>
{
await db.connect();
try {
const posts = await Post.find().populate({
path: 'user',
model: 'User',
}).populate({
path:'comments',
options: {sort: {'createdAt' : -1} }
}).sort('-createdAt')
res.status(200).json({
success:true,
posts
});
} catch (err) {
res.status(500).json(err.message)
}
await db.disconnect();
})
so what is the best way to fetch all these data in next js ?
I hope there is a way to solve this problem

Returning a value from an asynchronous callback (Node.JS)

So, I have my route which console.logs 'undefined':
router.get("/validate-pin", async (req, res) => {
// restrict when done
try {
const { userId, pin } = req.query;
const isActivePin = await pinsDB.compareActivePin(userId, pin);
console.log(isActivePin)
return res.status(200).json(isActivePin);
} catch (error) {
console.log(error);
res.status(500).json({ error: "db error: ", error });
}
});
I have my compareActivePin method, which logs out the 'res' parameter, but for some reason doesn't return it:
async function compareActivePin(userId, received_pin) {
const active_pin = await db("account_pins").where({ userId, isActive: true });
const pinIsValidated = bcrypt.compareSync(
received_pin,
active_pin[0].account_pin
);
if (pinIsValidated) {
let skLocation = await db("sks").where({ userId }).select("url");
await readKey(skLocation[0].url, (res) => {
// console.log(res);
return res;
});
} else return false;
}
And I have my readKey method, which actually grabs the data I want my compareActivePin to return. This works like a charm.
const readKey = async (key, callback) => {
const aws = require("aws-sdk");
aws.config.update({
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY,
accessKeyId: process.env.AWS_ACCESS_KEY_ID,
region: "us-east-2",
});
const s3 = new aws.S3();
const getParams = { Bucket: process.env.SK_BUCKET, Key: `${key}.txt` };
await s3.getObject(getParams, (err, data) => {
if (err) return err;
return callback(data.Body.toString());
});
};
So, just to recap. When I hit my endpoint, I pass in a userId and pin (strings). This calls the compareActivePin method which validates the pin and then, if the pin is valid, it then calls readKey, which grabs the file from S3 and returns the text within the file.
Like I said, I'm able to log it out to the console from within the readKey callback, but when I try to log it out as the returned value from the route, it comes back undefined.
Hoping someone could point me in the right direction.
Thanks...
I ended up answering my own question. I don't think it's possible to get a return value from the callback, so I ended up paring down the call from the database and sending the response from the readKey function using the router response object, like so:
//CompareActivePin Function
async function compareActivePin(userId, received_pin) {
const active_pin = await db("account_pins").where({ userId, isActive: true });
const pinIsValidated = bcrypt.compareSync(
received_pin,
active_pin[0].account_pin
);
return pinIsValidated;
}
//Router Call
router.get("/validate-pin", async (req, res) => {
// restrict when done
try {
const { userId, pin } = req.query;
const isActivePin = await pinsDB.compareActivePin(userId, pin);
if (isActivePin) {
let skLocation = await skDB.findUrl(userId);
readKeyFunc(skLocation[0].url, (result) => {
return res.status(200).json({ confirmed: isActivePin, key: result });
});
} else return res.status(401).json({ confirmed: isActivePin, key: null });
} catch (error) {
res.status(500).json({ error: "db error: ", error });
}
});
This also goes a long way toward keeping my database methods pure and separating my concerns.
Thanks, StackOverflow!

Retry multiple fetch

How do I retry this fetch x times if it fails?
The code is based on this article: https://dmitripavlutin.com/javascript-fetch-async-await/
async function fetchData() {
const [firstResponse, secondResponse] = await Promise.all([
fetch(firstUrl),
fetch(secondUrl),
]);
const first = await firstResponse.json();
const second = await secondResponse.json();
return [first, second];
}
fetchData()
.then(([first, second]) => {
console.log("success");
})
.catch((error) => {
console.log("error");
});
Since the requests are independent of each other, I'd have a utility function that will retry X times and then use that in the Promise.all. I'd also have a utility function for fetching JSON that handles the fetch API footgun where it doesn't check HTTP success (see my blog post here). So something along these lines:
// Fetch JSON
function fetchJSON(...args) {
const response = await fetch(...args);
if (!response.ok) {
throw new Error(`HTTP error ${response.status}`);
}
return response.json();
}
// Fetch JSON with up to `retries` retries
async fetchJSONWithRetry(retries, ...args) {
while (retries > 0) {
try {
const result = await fetchJSON(...args);
return result;
} catch (e) {
if (--retries === 0) {
throw e;
}
}
}
}
// Your `fetchData`
async function fetchData(retries = 5) {
const [first, second] = await Promise.all([
fetchJSONWithRetry(retries, firstUrl),
fetchJSONWithRetry(retries, secondUrl),
]);
return [first, second];
}

How to capture dynamic content from element inside an iFrame?

When I try to capture text from an element inside an iFrame where the element content changes every second, I get "undefined". What might I be doing wrong?
Code:
const { firefox } = require('playwright');
const fs = require('fs');
var url = 'http://jsfiddle.net/6vnam1jr/1/show';
var section_path = 'xpath=/html/body/div/div/div/section';
var iframe_path = 'xpath=/html/body/div/iframe';
var text_path = 'xpath=//html/body/div[2]/div[2]';
async function getElementText(browser,page){
await page.goto(url);
await page.click(section_path);
const frame = await page.$(iframe_path);
const contentFrame = await frame.contentFrame();
await sleep(1000);
let handle = await contentFrame.$eval(text_path);
console.log(handle)
// try again
await sleep(1000);
handle = await contentFrame.$eval(text_path);
console.log(handle)
closeBrowser(browser);
}
async function closeBrowser(browser){
await browser.close();
}
function sleep(ms) {
return new Promise((resolve) => {
setTimeout(resolve, ms);
});
}
(async () => {
const browser = await firefox.launch({ headless: false });
const page = await browser.newPage();
getElementText(browser,page);
})();```
Thanks for the repro. frame.$eval is an API to run a JS function in the browser which takes the element as an argument.
I believe what you are looking for is an ElementHandle to this element. You can use frame.waitForSelector or frame.$ for this purpose. I have verified that they are not undefined.
// ...
let handle = await contentFrame.waitForSelector(text_path);

AWS Lambda: Async Calls outside handler (initialization section, invoke lambda)

I would like to call an asynchronous function outside the lambda handler with by the following code:
var client;
(async () => {
var result = await initSecrets("MyWebApi");
var secret = JSON.parse(result.Payload);
client= new MyWebApiClient(secret.API_KEY, secret.API_SECRET);
});
async function initSecrets(secretName) {
var input = {
"secretName" : secretName
};
var result = await lambda.invoke({
FunctionName: 'getSecrets',
InvocationType: "RequestResponse",
Payload: JSON.stringify(input)
}).promise();
return result;
}
exports.handler = async function (event, context) {
var myReq = await client('Request');
console.log(myReq);
};
The 'client' does not get initialized. The same code works perfectly if executed within the handler.
initSecrets contains a lambda invocation of getSecrets() which calls the AWS SecretsManager
Has anyone an idea how asynchronous functions can be properly called for initialization purpose outside the handler?
Thank you very much for your support.
I ran into a similar issue trying to get next-js to work with aws-serverless-express.
I fixed it by doing the below (using typescript so just ignore the :any type bits)
const appModule = require('./App');
let server: any = undefined;
appModule.then((expressApp: any) => {
server = createServer(expressApp, null, binaryMimeTypes);
});
function waitForServer(event: any, context: any){
setImmediate(() => {
if(!server){
waitForServer(event, context);
}else{
proxy(server, event, context);
}
});
}
exports.handler = (event: any, context: any) => {
if(server){
proxy(server, event, context);
}else{
waitForServer(event, context);
}
}
So for your code maybe something like
var client = undefined;
initSecrets("MyWebApi").then(result => {
var secret = JSON.parse(result.Payload);
client= new MyWebApiClient(secret.API_KEY, secret.API_SECRET)
})
function waitForClient(){
setImmediate(() => {
if(!client ){
waitForClient();
}else{
client('Request')
}
});
}
exports.handler = async function (event, context) {
if(client){
client('Request')
}else{
waitForClient(event, context);
}
};
client is being called before it has initialised; the client var is being "exported" (and called) before the async function would have completed. When you are calling await client() the client would still be undefined.
edit, try something like this
var client = async which => {
var result = await initSecrets("MyWebApi");
var secret = JSON.parse(result.Payload);
let api = new MyWebApiClient(secret.API_KEY, secret.API_SECRET);
return api(which) // assuming api class is returning a promise
}
async function initSecrets(secretName) {
var input = {
"secretName" : secretName
};
var result = await lambda.invoke({
FunctionName: 'getSecrets',
InvocationType: "RequestResponse",
Payload: JSON.stringify(input)
}).promise();
return result;
}
exports.handler = async function (event, context) {
var myReq = await client('Request');
console.log(myReq);
};
This can be also be solved with async/await give Node v8+
You can load your configuration in a module like so...
const fetch = require('node-fetch');
module.exports = async () => {
const config = await fetch('https://cdn.jsdelivr.net/gh/GEOLYTIX/public/z2.json');
return await config.json();
}
Then declare a _config outside the handler by require / executing the config module. Your handler must be an async function. _config will be a promise at first which you must await to resolve into the configuration object.
const _config = require('./config')();
module.exports = async (req, res) => {
const config = await _config;
res.send(config);
}
Ideally you want your initialization code to run during the initialization phase and not the invocation phase of the lambda to minimize cold start times. Synchronous code at module level runs at initialization time and AWS recently added top level await support in node14 and newer lambdas: https://aws.amazon.com/blogs/compute/using-node-js-es-modules-and-top-level-await-in-aws-lambda/ . Using this you can make the init phase wait for your async initialization code by using top level await like so:
const sleep = ms => new Promise(resolve => setTimeout(resolve, ms))
console.log("start init");
await sleep(1000);
console.log("end init");
export const handler = async (event) => {
return {
statusCode: 200,
body: JSON.stringify('Hello from Lambda!'),
};
};
This works great if you are using ES modules. If for some reason you are stuck using commonjs (e.g. because your tooling like jest or ts-node doesn't yet fully support ES modules) then you can make your commonjs module look like an es module by making it export a Promise that waits on your initialization rather than exporting an object. Like so:
const sleep = ms => new Promise(resolve => setTimeout(resolve, ms))
const main = async () => {
console.log("start init");
await sleep(1000);
console.log("end init");
const handler = async (event) => {
return {
statusCode: 200,
body: JSON.stringify('Hello from Lambda!'),
};
};
return { handler };
};
# note we aren't exporting main here, but rather the result
# of calling main() which is a promise resolving to {handler}:
module.exports = main();

Resources