Puppeteer - How to use page.click() inside page.evaluate() - web-scraping

I am scraping a table and each row has a button to show a modal with information. I need to scraping the information from the modal for each row but I dont know how to open the modal. I have tried with page.click(selector) inside page.evaluate() but It didnt work.
I have tested the next code.
const users = await page.evaluate(() => {
const usersTable = Array.from(document.querySelectorAll('table#grilla > tbody > tr'))
const userInfo = usersTable.map(async userTable => {
await page.click('td > a#modificar-2')
await page.click('div > button[type=submit].close')
const username = userTable.children[2].innerText
const firstName = userTable.children[4].innerText
const lastName = userTable.children[5].innerText
const email = userTable.children[6].innerText
const estado = userTable.children[7].innerText
const fullName = firstName + lastName
return { username, fullName, email, estado }
})
return userInfo
})
I dont know how to pass page.click() or another option inside page.evaluate()

If you use page.evaluate() you're switching the context from node puppeteer to browser, so you have to use JS native functions like
click: document.querySelector(selector).click().
If you have errors like Error: Evaluation failed: TypeError: Cannot read property 'click' of null probably the element you wanted to click isn't on the page (or it's hidden or something).

From inside that loop you would do:
userTable.querySelector('td > a#modificar-2').click()
(no await)

You can return css selectors of elements you want to click from page.evaluate and then perform page.click on them. For getting css selector for the element you can use the code from an answer to this question:
const elemToClick = page.evaluate(() => {
let elem;
//...
return cssPath(elem);
function cssPath(el) {
if (!(el instanceof Element)) return;
var path = [];
while (el.nodeType === Node.ELEMENT_NODE) {
var selector = el.nodeName.toLowerCase();
if (el.id) {
selector += '#' + el.id;
path.unshift(selector);
break;
} else {
var sib = el,
nth = 1;
while ((sib = sib.previousElementSibling)) {
if (sib.nodeName.toLowerCase() == selector) nth++;
}
if (nth != 1) selector += ':nth-of-type(' + nth + ')';
}
path.unshift(selector);
el = el.parentNode;
}
return path.join(' > ');
}
});
page.click(elemToClick);

Related

Scrolling only side menu div and others should be fixed when menu is open

I have a side menu and when it's open, the body can be partially seen. My side menu might be long so you could scroll on it. But when the menu is at the bottom you then scroll on the body, and I don't want this behaviour.
Similar to Scrolling only content div, others should be fixed but I'm using React. Other content should be scrollable when my side menu is closed. Think of the content as side menu in the example in the link. So far I'm using the same technique provided by that answer but it's ugly (kinda jQuery):
preventOverflow = (menuOpen) => { // this is called when side menu is toggled
const body = document.getElementsByTagName('body')[0]; // this should be fixed when side menu is open
if (menuOpen) {
body.className += ' overflow-hidden';
} else {
body.className = body.className.replace(' overflow-hidden', '');
}
}
// css
.overflow-hidden {
overflow-y: hidden;
}
What should I do with Reactjs?
You should make a meta component in react to change things on the body as well as changing things like document title and things like that. I made one a while ago to do that for me. I'll add it here.
Usage
render() {
return (
<div>
<DocumentMeta bodyClasses={[isMenuOpen ? 'no-scroll' : '']} />
... rest of your normal code
</div>
)
}
DocumentMeta.jsx
import React from 'react';
import _ from 'lodash';
import withSideEffect from 'react-side-effect';
var HEADER_ATTRIBUTE = "data-react-header";
var TAG_NAMES = {
META: "meta",
LINK: "link",
};
var TAG_PROPERTIES = {
NAME: "name",
CHARSET: "charset",
HTTPEQUIV: "http-equiv",
REL: "rel",
HREF: "href",
PROPERTY: "property",
CONTENT: "content"
};
var getInnermostProperty = (propsList, property) => {
return _.result(_.find(propsList.reverse(), property), property);
};
var getTitleFromPropsList = (propsList) => {
var innermostTitle = getInnermostProperty(propsList, "title");
var innermostTemplate = getInnermostProperty(propsList, "titleTemplate");
if (innermostTemplate && innermostTitle) {
return innermostTemplate.replace(/\%s/g, innermostTitle);
}
return innermostTitle || "";
};
var getBodyIdFromPropsList = (propsList) => {
var bodyId = getInnermostProperty(propsList, "bodyId");
return bodyId;
};
var getBodyClassesFromPropsList = (propsList) => {
return propsList
.filter(props => props.bodyClasses && Array.isArray(props.bodyClasses))
.map(props => props.bodyClasses)
.reduce((classes, list) => classes.concat(list), []);
};
var getTagsFromPropsList = (tagName, uniqueTagIds, propsList) => {
// Calculate list of tags, giving priority innermost component (end of the propslist)
var approvedSeenTags = {};
var validTags = _.keys(TAG_PROPERTIES).map(key => TAG_PROPERTIES[key]);
var tagList = propsList
.filter(props => props[tagName] !== undefined)
.map(props => props[tagName])
.reverse()
.reduce((approvedTags, instanceTags) => {
var instanceSeenTags = {};
instanceTags.filter(tag => {
for(var attributeKey in tag) {
var value = tag[attributeKey].toLowerCase();
var attributeKey = attributeKey.toLowerCase();
if (validTags.indexOf(attributeKey) == -1) {
return false;
}
if (!approvedSeenTags[attributeKey]) {
approvedSeenTags[attributeKey] = [];
}
if (!instanceSeenTags[attributeKey]) {
instanceSeenTags[attributeKey] = [];
}
if (!_.has(approvedSeenTags[attributeKey], value)) {
instanceSeenTags[attributeKey].push(value);
return true;
}
return false;
}
})
.reverse()
.forEach(tag => approvedTags.push(tag));
// Update seen tags with tags from this instance
_.keys(instanceSeenTags).forEach((attr) => {
approvedSeenTags[attr] = _.union(approvedSeenTags[attr], instanceSeenTags[attr])
});
instanceSeenTags = {};
return approvedTags;
}, []);
return tagList;
};
var updateTitle = title => {
document.title = title || document.title;
};
var updateBodyId = (id) => {
document.body.setAttribute("id", id);
};
var updateBodyClasses = classes => {
document.body.className = "";
classes.forEach(cl => {
if(!cl || cl == "") return;
document.body.classList.add(cl);
});
};
var updateTags = (type, tags) => {
var headElement = document.head || document.querySelector("head");
var existingTags = headElement.querySelectorAll(`${type}[${HEADER_ATTRIBUTE}]`);
existingTags = Array.prototype.slice.call(existingTags);
// Remove any duplicate tags
existingTags.forEach(tag => tag.parentNode.removeChild(tag));
if (tags && tags.length) {
tags.forEach(tag => {
var newElement = document.createElement(type);
for (var attribute in tag) {
if (tag.hasOwnProperty(attribute)) {
newElement.setAttribute(attribute, tag[attribute]);
}
}
newElement.setAttribute(HEADER_ATTRIBUTE, "true");
headElement.insertBefore(newElement, headElement.firstChild);
});
}
};
var generateTagsAsString = (type, tags) => {
var html = tags.map(tag => {
var attributeHtml = Object.keys(tag)
.map((attribute) => {
const encodedValue = HTMLEntities.encode(tag[attribute], {
useNamedReferences: true
});
return `${attribute}="${encodedValue}"`;
})
.join(" ");
return `<${type} ${attributeHtml} ${HEADER_ATTRIBUTE}="true" />`;
});
return html.join("\n");
};
var reducePropsToState = (propsList) => ({
title: getTitleFromPropsList(propsList),
metaTags: getTagsFromPropsList(TAG_NAMES.META, [TAG_PROPERTIES.NAME, TAG_PROPERTIES.CHARSET, TAG_PROPERTIES.HTTPEQUIV, TAG_PROPERTIES.CONTENT], propsList),
linkTags: getTagsFromPropsList(TAG_NAMES.LINK, [TAG_PROPERTIES.REL, TAG_PROPERTIES.HREF], propsList),
bodyId: getBodyIdFromPropsList(propsList),
bodyClasses: getBodyClassesFromPropsList(propsList),
});
var handleClientStateChange = ({title, metaTags, linkTags, bodyId, bodyClasses}) => {
updateTitle(title);
updateTags(TAG_NAMES.LINK, linkTags);
updateTags(TAG_NAMES.META, metaTags);
updateBodyId(bodyId);
updateBodyClasses(bodyClasses)
};
var mapStateOnServer = ({title, metaTags, linkTags}) => ({
title: HTMLEntities.encode(title),
meta: generateTagsAsString(TAG_NAMES.META, metaTags),
link: generateTagsAsString(TAG_NAMES.LINK, linkTags)
});
var DocumentMeta = React.createClass({
propTypes: {
title: React.PropTypes.string,
titleTemplate: React.PropTypes.string,
meta: React.PropTypes.arrayOf(React.PropTypes.object),
link: React.PropTypes.arrayOf(React.PropTypes.object),
children: React.PropTypes.oneOfType([
React.PropTypes.object,
React.PropTypes.array
]),
bodyClasses: React.PropTypes.array,
},
render() {
if (Object.is(React.Children.count(this.props.children), 1)) {
return React.Children.only(this.props.children);
} else if (React.Children.count(this.props.children) > 1) {
return (
<span>
{this.props.children}
</span>
);
}
return null;
},
});
DocumentMeta = withSideEffect(reducePropsToState, handleClientStateChange, mapStateOnServer)(DocumentMeta);
module.exports = DocumentMeta;
This component could probably be changed a little for your case (withSideEffect is used for both client and server side rendering... if you arent using server side rendering then its probably not completely necessary) but the component will work on client side rendering if you would like to use it there as well.
ReactJS doesn't have direct access to the <body> element, and that's the element that needs to have its overflow-y style changed. So while what you're doing isn't perhaps the prettiest code, it's not entirely wrong either.
The only real suggestion I'd give is (shudder) using inline styles on the body instead of a classname so as to avoid having to introduce the CSS declaration. As long as your menu is the only thing responsible for updating the overflow-y attribute, there's no reason you can't use an inline style on it. Mashing that down with the ?: operator results in fairly simple code:
body.style.overflowY = menuOpen ? "hidden" : "";
And then you can just delete the .overflow-hidden class in its entirety.
If for some reason multiple things are managing the overflow state of the body, you might want to stick with classnames and assign a unique one for each thing managing it, something like this:
if (menuOpen) {
body.className += ' menu-open';
}
else {
// Use some tricks from jQuery to remove the "menu-open" class more elegantly.
var className = " " + body.className + " ";
className = className.replace(" overflow-hidden ", " ").replace(/\s+/, " ");
className = className.substr(1, className.length - 2);
}
CSS:
body.menu-open {
overflow-y: hidden;
}

How to make TagsInput to work with both auto complete & free text

UPDATE
This issue is already discussed in github here
I am using tagsinput with typeahead in bootstrap 3. The problem which I am experiencing is with the value in case if user selects the existing tag. Display text shows it right but .val() returns its actual object. Below is the code
$('#tags').tagsinput({
//itemValue: 'value',
typeahead: {
source: function (query) {
//tags = [];
//map = {};
return $.getJSON('VirtualRoomService.asmx/GetTags?pid=' + $("#<%=hdnPID.ClientID%>").val() + '&tok=' + query)
//, function (data) {
// $.each(data, function (i, tag) {
// map[tag.TagValue] = tag;
// tags.push(tag.TagValue);
// });
// return process(tags);
//});
}
}
//freeElementSelector: "#freeTexts"
});
The problem with above code is that it results as below while fetching tags from web method
This happens when user select the existing tag. New tags no issues. I tried setting itemValue & itemText of tagsinput but not worked. Hence I decided a work-around of this problem. Since I could able get the json string as ['IRDAI", Object], if can somehow parse these object & get the actual tag value then I get the expected result of the code I am looking at.
Below is what it appears in tags input as [object Object] for text selected by user from auto populated drop down
[![enter imt
If I i specify TagId & TagValue to itemValue & itemText as below code
$('#tags').tagsinput({
itemValue: 'TagId',
itemText: 'TagValue',
typeahead: {
source: function (query) {
//tags = [];
//map = {};
return $.getJSON('VirtualRoomService.asmx/GetTags?pid=' + $("#<%=hdnPID.ClientID%>").val() + '&tok=' + query)
//, function (data) {
// $.each(data, function (i, tag) {
// //map[tag.TagValue] = tag;
// tags.push(tag.TagValue);
// });
//});
// return process(tags);
}
}
//freeElementSelector: "#freeTexts"
});
Then the result is displaying as below when below code is executed
var arr = junit.Tags.split(',');
for (var i = 0; i < arr.length; i++) {
$('#tags').tagsinput('add', arr[i]);
}
Given your example JSON response from your data source:
[
{"TagId":"1", "TagValue":"eSign"},
{"TagId":"2", "TagValue":"eInsurance Account"}
]
You'll need to tell tagsinput how to map the attributes from your response objects using itemValue and itemText in your tagsinput config object. It looks like you may have started down that path, but didn't reach the conclusion, which should look something like:
$('#tags').tagsinput({
itemValue: 'TagId',
itemText: 'TagValue',
typeahead: {
source: function (query) {
return $.getJSON('VirtualRoomService.asmx/GetTags?pid=' + $("#<%=hdnPID.ClientID%>").val() + '&tok=' + query);
}
}
});
Be sure to checkout the tagsinput examples.
This may not be the clean solution but I got around this issue through below parsing method. Hope this helps someone.
var items = $('#tags').tagsinput("items");
var tags = '';
for(i = 0; i < items.length; i++)
{
if(JSON.stringify(items[i]).indexOf('{') >= 0) {
tags += items[i].TagValue;
tags += ',';
} else {
tags += items[i];
tags += ',';
}
}

MeteorJS: CALLBACKS

PROBLEM: I want to parse the elements in a page from another website, glue resulting elements in an object and insert it in a Mongo collection. Before insertion i want to check if my Mongo yet has an identical object. If it does it shall exit the running functions, otherwise i want the script to start parsing the next target.
Example:
I have a function that connects to a webpage and returns its body content
It is parsed
When <a></a> elements are met, another callback is called in which all parsed elements are merged in one object and inserted in a collection
My code :
var Cheerio = Meteor.npmRequire('cheerio');
var lastUrl;
var exit = false;
Meteor.methods({
parsing:function(){
this.unblock();
request("https://example.com/", Meteor.bindEnvironment(function(error, response, body) {
if (!error && response.statusCode == 200) {
$ = Cheerio.load(body);
var k = 1;
$("div.content").each(function() {
var name = $...//parsing
var age = $....//parsing
var url = $...//parsing <a></a> elements
var r = request("https://example.com/"+url, Meteor.bindEnvironment(function(error, response, body) {
lastUrl = response.request.uri.href;// get the last routing link
var metadata = {
name: name,
age: age
url: lastUrl
};
var postExist;
postExist = Posts.findOne(metadata); // return undefined if doesnt exist, AND every time postExist = undefined ??
if (!postExist){
Posts.insert(metadata);// if post doesnt exist (every time go here ??)
}
else {
exit = true; // if exist
}
}));
if (exit === true) return false;
});
}
}));
}
});
Problem 1 : The problem is my function works every time, but it doesn't stop even if the object exists in my collection
Problem 2 : postExist is always undefined
EDIT : The execution must stop and wait until the second request's response.
var url = $...//parsing <a></a> elements
//STOP HERE AND WAIT !!
var r = request("https://example.com/"+url, Meteor.bindEnvironment(function(error, response, body) {
Looks like you want the second request to be synchronous and not asynchronous.
To achieve this, use a future
var Cheerio = Meteor.npmRequire('cheerio');
var Future = Meteor.npmRequire('fibers/future');
var lastUrl;
var exit = false;
Meteor.methods({
parsing:function(){
this.unblock();
request("https://example.com/", Meteor.bindEnvironment(function(error, response, body) {
if (!error && response.statusCode == 200) {
$ = Cheerio.load(body);
var k = 1;
$("div.content").each(function() {
var name = $...//parsing
var age = $....//parsing
var url = $...//parsing <a></a> elements
var fut = new Future();
var r = request("https://example.com/"+url, Meteor.bindEnvironment(function(error, response, body) {
lastUrl = response.request.uri.href;// get the last routing link
var metadata = {
name: name,
age: age
url: lastUrl
};
var postExist;
postExist = Posts.findOne(metadata); // return undefined if doesnt exist
if (!postExist) {
Posts.insert(metadata);// if post doesnt exist (every time go here ??)
fut.return(true);
} else {
fut.return(false);
}
}));
var status = fut.wait();
return status;
});
}
}));
}
});
You can use futures whenever you can't utilize callback functions (e.g. you want the user to wait on the result of a callback before presenting info).
Hopefully that helps,
Elliott
This is the opposite :
postExist = Posts.findOne(metadata); // return undefined if doesnt exist > you're right
if (!postExist){ //=if NOT undefined = if it EXISTS !
Posts.insert(metadata);
}else {
exit = true; // if undefined > if it DOES NOT EXIST !
}
You need to inverse the condition or the code inside

Filter results from Google Autocomplete

Is there a way to get the results from Google Autocomplete API before it's displayed below the input? I want to show results from any country except U.S.A.
I found this question: Google Maps API V3 - Anyway to retrieve Autocomplete results instead of dropdown rendering it? but it's not useful, because the method getQueryPredictions only returns 5 elements.
This is an example with UK and US Results: http://jsfiddle.net/LVdBK/
Is it possible?
I used the jquery autocomplete widget and called the google methods manually.
For our case, we only wanted to show addresses in Michigan, US.
Since Google doesn't allow filtering out responses to that degree you have to do it manually.
Override the source function of the jquery autocomplete
Call the google autocompleteService.getQueryPredictions method
Filter out the results you want and return them as the "response" callback of the jquery autocomplete.
Optionally, if you need more detail about the selected item from Google, override the select function of the jquery autocomplete and make a call to Google's PlacesService.getDetails method.
The below assumes you have the Google api reference with the "places" library.
<script src="https://maps.googleapis.com/maps/api/js?key=[yourKeyHere]&libraries=places&v=weekly" defer></script>
var _autoCompleteService; // defined globally in script
var _placesService; // defined globally in script
//...
// setup autocomplete wrapper for google places
// starting point in our city
var defaultBounds = new google.maps.LatLngBounds(
new google.maps.LatLng('42.9655426','-85.6769166'),
new google.maps.LatLng('42.9655426','-85.6769166'));
if (_autoCompleteService == null) {
_autoCompleteService = new google.maps.places.AutocompleteService();
}
$("#CustomerAddress_Street").autocomplete({
minLength: 2,
source: function (request, response) {
if (request.term != '') {
var googleRequest = {
input: request.term,
bounds: defaultBounds,
types: ["geocode"],
componentRestrictions: { 'country': ['us'] },
fields: ['geometry', 'formatted_address']
}
_autoCompleteService.getQueryPredictions(googleRequest, function (predictions) {
var michiganOnly = new Array(); // array to hold only addresses in Michigan
for (var i = 0; i < predictions.length; i++) {
if (predictions[i].terms.length > 0) {
// find the State term. Could probably assume it's predictions[4], but not sure if it is guaranteed.
for (var j = 0; j < predictions[i].terms.length; j++) {
if (predictions[i].terms[j].value.length == 2) {
if (predictions[i].terms[j].value.toUpperCase() == 'MI') {
michiganOnly.push(predictions[i]);
}
}
}
}
}
response(michiganOnly);
});
}
},
select: function (event, ui) {
if (ui != null) {
var item = ui.item;
var request = {
placeId: ui.item.place_id
}
if (_placesService == null) {
$("body").append("<div id='GoogleAttribution'></div>"); // PlacesService() requires a field to put it's attribution image in. For now, just put on on the body
_placesService = new google.maps.places.PlacesService(document.getElementById('GoogleAttribution'));
}
_placesService.getDetails(request, function (result, status) {
if (result != null) {
const place = result;
if (!place.geometry) {
// User entered the name of a Place that was not suggested and
// pressed the Enter key, or the Place Details request failed.
//window.alert("No details available for input: '" + place.name + "'");
return;
}
else {
var latitude = place.geometry.location.lat();
var longitude = place.geometry.location.lng();
// do something with Lat/Lng
}
}
});
}
}
}).autocomplete("instance")._renderItem = function (ul, item) {
// item is the prediction object returned from our call to getQueryPredictions
// return the prediction object's "description" property or do something else
return $("<li>")
.append("<div>" + item.description + "</div>")
.appendTo(ul);
};
$("#CustomerAddress_Street").autocomplete("instance")._renderMenu = function (ul, items) {
// Google's terms require attribution, so when building the menu, append an item pointing to their image
var that = this;
$.each(items, function (index, item) {
that._renderItemData(ul, item);
});
$(ul).append("<li class='ui-menu-item'><div style='display:flex;justify-content:flex-end;'><img src='https://maps.gstatic.com/mapfiles/api-3/images/powered-by-google-on-white3.png' /></div></li>")
}

input text return event in Meteor

I want to capture the event of a user pressing enter on an input of type="text" when they are filling out a form. This is done all over the web, yet the answer eludes me.
This is what I have so far:
In the html file, I have a text input like so:
<input type="text" size=50 class="newlink">
In the Javascript file, I am trying to capture the the user pressing enter to effectively submit the form. I am then grabbing the text from the input and going to stash it in the database:
Template.newLink.events = {
'submit input.newLink': function () {
var url = template.find(".newLink").value;
// add to database
}
};
The submit event is emitted from forms, not single input elements.
The built in event map for meteor is documented here: http://docs.meteor.com/#eventmaps.
You'll have to listen for a keyboard event (keydown, keypress, keyup). Within the event handler, check, if it's the return/enter key (Keycode 13), and proceed on success.
Template.newLink.events = {
'keypress input.newLink': function (evt, template) {
if (evt.which === 13) {
var url = template.find(".newLink").value;
// add to database
}
}
};
You could look into how this is achieved in the todos example (client/todos.js).
It uses a generic event handler for input fields (as seen below). You can browse the rest of the code for usage.
////////// Helpers for in-place editing //////////
// Returns an event map that handles the "escape" and "return" keys and
// "blur" events on a text input (given by selector) and interprets them
// as "ok" or "cancel".
var okCancelEvents = function (selector, callbacks) {
var ok = callbacks.ok || function () {};
var cancel = callbacks.cancel || function () {};
var events = {};
events['keyup '+selector+', keydown '+selector+', focusout '+selector] =
function (evt) {
if (evt.type === "keydown" && evt.which === 27) {
// escape = cancel
cancel.call(this, evt);
} else if (evt.type === "keyup" && evt.which === 13 ||
evt.type === "focusout") {
// blur/return/enter = ok/submit if non-empty
var value = String(evt.target.value || "");
if (value)
ok.call(this, value, evt);
else
cancel.call(this, evt);
}
};
return events;
};
I used this js function once to suppress the user using the return key in the text field to submit the form data. Perhaps you could modify it to suit the capture?
function stopRKey(evt) { // Stop return key functioning in text field.
var evt = (evt) ? evt : ((event) ? event : null);
var node = (evt.target) ? evt.target : ((evt.srcElement) ? evt.srcElement : null);
if ((evt.keyCode == 13) && (node.type=="text")) { return false; }
}
document.onkeypress = stopRKey;
You can also use event.currentTarget.value
Template.newLink.events = {
'keypress input.newLink': function (evt) {
if (evt.which === 13) {
var url = event.currentTarget.value;
// add to database
}
}
};

Resources