Access restfulAPI token using R - r

my project includes fetching the token from restful API using R. THe API uses OAuth 2.0. The project is confidential, so cannot provide the information.
can anyone please help me to access the token using coding in R. For instance
URL: facebook.com
username: ABC
Password: qwerty

Here is one approach that can be considered for Facebook :
library(RSelenium)
url <- "https://www.facebook.com/"
shell('docker run -d -p 4445:4444 selenium/standalone-firefox')
remDr <- remoteDriver(remoteServerAddr = "localhost", port = 4445L, browserName = "firefox")
remDr$open()
remDr$navigate(url)
web_Obj_Username <- remDr$findElement("css selector", '#email')
web_Obj_Username$sendKeysToElement(list("ABC"))
web_Obj_Password <- remDr$findElement("css selector", '#pass')
web_Obj_Password$sendKeysToElement(list("qwerty"))
html_Content <- remDr$getPageSource()[[1]]
id_Submit <- unlist(stringr::str_extract_all(html_Content, 'royal_login_button" type="submit" id=".........'))
id_Submit <- stringr::str_extract_all(id_Submit, 'id=".........')
id_Submit <- stringr::str_remove_all(id_Submit, 'id|=|\"')
web_Obj_Submit <- remDr$findElement("id", id_Submit)
web_Obj_Submit$click()

Related

Web scraping with R: findElement doesn't recognise drop down menu

I'm trying to scrape a government database with multiple dropdown menus. Using RSelenium, I've managed to click on the button taking me to the interactive database ("Sistema de Catastros de superficie frutícola regional"), and I'm now trying to click on the drop-down menus (ex: region, year), but keep getting errors that there's NoSuchElement. I've tried multiple xpaths based on inspect element and the Selector Gadget chrome extension to no avail. It looks like each of the dropdown menus is a combobox.
If helpful, my end goal is to go through each of the regions, years, and crops; scraping the table generated by each one.
library(RSelenium)
library(tidyverse)
rdriver = rsDriver(browser = "chrome", port = 9515L, chromever = "106.0.5249.61")
obj = rdriver$client
obj$navigate("https://www.odepa.gob.cl/estadisticas-del-sector/catastros-fruticolas")
link = obj$findElement(using = 'xpath', value = '//*[#id="content"]/div/div/div/div/div[1]/div[2]/div/div[2]/div[1]/div/div/div[3]/div/p[2]/a')$clickElement()
When you click on the button, a new tab appears.
You have to switch tabs with the help of
remDr$switchToWindow(remDr$getWindowHandles()[[2]])
Here is an example.
library(rvest)
library(RSelenium)
shell('docker run -d -p 4445:4444 selenium/standalone-firefox')
remDr <- remoteDriver(remoteServerAddr = "localhost", port = 4445L, browserName = "firefox")
remDr$open()
Sys.sleep(15)
url <- "https://www.odepa.gob.cl/estadisticas-del-sector/catastros-fruticolas"
remDr$navigate(url)
Sys.sleep(15)
web_Obj_Button <- remDr$findElement("xpath", '//*[#id="content"]/div/div/div/div/div[1]/div[2]/div/div[2]/div[1]/div/div/div[3]/div/p[2]/a')
web_Obj_Button$clickElement()
remDr$switchToWindow(remDr$getWindowHandles()[[2]])
web_Obj_Date <- remDr$findElement("css selector", "#mat-select-value-3 > span > span")
web_Obj_Date$clickElement()
remDr$screenshot(TRUE)

RSelenium message:no such element: Unable to locate element

I intend to download and clean databases using RSelenium. I am able to open the link however I am having trouble downloading and opening the database. I believe the xpath is right but when I try to open I receive the following error
Selenium message:no such element: Unable to locate element: {"method":"xpath","selector":"//*[#id="ESTBAN_AGENCIA"]"}
My code is the following:
dir <- getwd()
file_path <- paste0(dir,"\\DataBase") %>% str_replace_all("/", "\\\\\\")
eCaps <- list(
chromeOptions =
list(prefs = list('download.default_directory' = file_path))
)
system("taskkill /im java.exe /f", intern=FALSE, ignore.stdout=FALSE)
#Creating server
rD <- rsDriver(browser = "chrome",
chromever = "101.0.4951.15",
port = 4812L,
extraCapabilities = eCaps)
#Creating the driver to use R
remDr <- remoteDriver(
remoteServerAddr = "localhost",
browserName = "chrome",
port = 4812L)
#Open server
remDr$open()
#Navegating in the webpage of ESTABAN
remDr$navigate("https://www.bcb.gov.br/acessoinformacao/legado?url=https:%2F%2Fwww4.bcb.gov.br%2Ffis%2Fcosif%2Festban.asp")
##Download
remDr$findElement(using ="xpath", '//*[#id="ESTBAN_AGENCIA"]/option[1]')
The element you are trying to access is inside an iframe and you need switch that iframe first in order to access the element.
remDr$navigate("https://www.bcb.gov.br/acessoinformacao/legado?url=https:%2F%2Fwww4.bcb.gov.br%2Ffis%2Fcosif%2Festban.asp")
#Switch to Iframe
webElem <- remDr$findElement("css", "iframe#framelegado")
remDr$switchToFrame(webElem)
##Download
remDr$findElement(using ="xpath", '//*[#id="ESTBAN_AGENCIA"]/option[1]')

RSelenium: configure firefox remotedriver to use Tor network

I am trying to use RSelenium with firefox using a local proxy (Tor) on a linux machine.
I had no problem in installing Tor following this tuto, and the command line wget -qO - https://api.ipify.org; echo do get me an new IP.
Now I am willing to use firefox with RSelenium going through the Tor localhost on port 9050:
State Recv-Q Send-Q Local Address:Port Peer Address:Port
LISTEN 0 128 127.0.0.1:9050 *:*
LISTEN 0 128 127.0.0.1:9051 *:*
I use a standalone selenium java (selenium-server-standalone-2.53.0.jar), which work fine with regular RSelenium: here is an example getting the ip displayed on ipchicken
library(RSelenium)
remDr <- remoteDriver(
remoteServerAddr = "localhost",
port = 4444L,
browserName = "firefox"
)
remDr$open()
remDr$navigate("https://ipchicken.com/")
ip <- remDr$findElements(using = "css", value ='b')
print(ip[[1]]$getElementText())
And I do get my IP. Now I want to see it happen with Tor. I thus try to add the proxy option when connecting the remotedriver with firefox:
eCaps <- list("moz:firefoxOptions" = list(
args = c('--proxy-server=localhost:9050'
)))
remDr <- remoteDriver(
remoteServerAddr = "localhost",
port = 4444L,
browserName = "firefox",
extraCapabilities = eCaps
)
I tried '--proxy-server=localhost:9050', '--proxy-server=http://localhost:9050','--proxy-server=socks5://localhost:9050', '--proxy-server=127.0.0.1:9050', and it did not output any error and gave me my initial IP. So it is not working. The standalone says it does execute with the options: for example
22:59:10.288 INFO - Executing: [new session: Capabilities [{nativeEvents=true, browserName=firefox, javascriptEnabled=true, moz:firefoxOptions={args=--proxy-server= 127.0.0.1:9050}, version=, platform=ANY}]])
22:59:10.297 INFO - Creating a new session for Capabilities [{nativeEvents=true, browserName=firefox, javascriptEnabled=true, moz:firefoxOptions={args=--proxy-server= 127.0.0.1:9050}, version=, platform=ANY}]
22:59:30.323 INFO - Done: [new session: Capabilities [{nativeEvents=true, browserName=firefox, javascriptEnabled=true, moz:firefoxOptions={args=--proxy-server= 127.0.0.1:9050}, version=, platform=ANY}]]
What Am I doing wrong ?
Edit
After user1207289's answer, and after realizing that you could directly create a firefox profile in RSelenium, I tried:
eCaps <- makeFirefoxProfile(list(network.proxy.type = 1,
network.proxy.socks = "127.0.0.1",
network.proxy.socks_port = 9050,
network.proxy.socks_version = 5))
remDr <- remoteDriver(
remoteServerAddr = "localhost",
port = 4444L,
browserName = "firefox",
extraCapabilities = eCaps
)
I used integer for network.proxy.socks_port, network.proxy.socks_port and network.proxy.type because of this question, but tried with character also, without any success. I tried with and without network.proxy.socks_version = 5, and it did not work (I am getting my normal ip). I tried network.proxy.socks_port = 9150, but it did not work.
I also tried
eCaps <- list("moz:firefoxOptions" = list(
args = c('network.proxy.socks=127.0.0.1:9050' ,'network.proxy.type=1' )
)
)
but that did not work either.
I could connect to TOR using webdriver and firefox with the below . Just make sure TOR is installed and running. I used it on mac (catalina). You can check port settings according to your OS , in case they are different.
It is in c# but you can pretty much do it for any binding
FirefoxOptions firefoxOptions = new FirefoxOptions();
firefoxOptions.SetPreference("network.proxy.type", 1);
firefoxOptions.SetPreference("network.proxy.socks", "127.0.0.1");
firefoxOptions.SetPreference("network.proxy.socks_port", 9150);
FirefoxDriverService service = FirefoxDriverService.CreateDefaultService();
IWebDriver driver = new FirefoxDriver(service, firefoxOptions);
When this opens a firefox browser instance , Just visit https://check.torproject.org/ on the same instance to check if it is connected to TOR. And that will confirm you are connected and will show your new ip also
After A lot of searching, I found a way: RSelenium has the getFirefoxProfile function which allows to get a firefox profile.
So I first configured the profile directly from firefox following the same tuto and copied it to my R folder. Using
fprof <- getFirefoxProfile("myprofile.default")
remDr <- remoteDriver(
remoteServerAddr = "localhost",
port = 4444L,
browserName = "firefox",
extraCapabilities = fprof
)
Did work.

Phantomjs returns 404 in R when attempting webscraping

I am trying to collect some data from the OTC Markets (within the confines of their robots.txt) and I cannot connect to the webpage.
The first step I tried was just to scrape the HTML right off the page, but the page requires javascript to load.
So I downloaded phantomjs and connected that way. However, this leads to a 404 error page
I then changed the user-agent to something resembling a user to see if it would let me connect and still, no luck! What is going on here
Here is a reproducible version of my code, any help would be appreciated. Phantomjs can be downloaded here: http://phantomjs.org/
library(rvest)
library(xml2)
library(V8)
# example website, I have no correlation to this stock
url <- 'https://www.otcmarkets.com/stock/YTROF/profile'
# create javascript file that phantomjs can process
writeLines(sprintf("var page = require('webpage').create();
page.settings.userAgent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.120 Safari/537.36';
page.open('%s', function () {
console.log(page.content); //page source
phantom.exit();
});", url), con="scrape.js")
html <- system("phantomjs.exe_PATH scrape.js", intern = TRUE)
page_html <- read_html(html)
I have been able to get the html content with the following code which is not based on PhantomJS but on Selenium :
library(RSelenium)
shell('docker run -d -p 4445:4444 selenium/standalone-firefox')
remDr <- remoteDriver(remoteServerAddr = "localhost", port = 4445L, browserName = "firefox")
remDr$open()
remDr$navigate('https://www.otcmarkets.com/stock/YTROF/profile')
remDr$executeScript("scroll(0, 5000)")
remDr$executeScript("scroll(0, 10000)")
remDr$executeScript("scroll(0, 15000)")
Sys.sleep(4)
remDr$screenshot(display = TRUE, useViewer = TRUE)
html_Content <- remDr$getPageSource()[[1]]
It is important to give time to the page to load before we extract the html content.
Here is another approach based on RDCOMClient :
library(RDCOMClient)
url <- 'https://www.otcmarkets.com/stock/YTROF/profile'
IEApp <- COMCreate("InternetExplorer.Application")
IEApp[['Visible']] <- TRUE
IEApp$Navigate(url)
Sys.sleep(5)
doc <- IEApp$Document()
Sys.sleep(5)
html_Content <- doc$documentElement()$innerText()

Open new tab in RSelenium

I have the following code with which I try to open the url into a new tab every take a new url loaded from the for loop open to a new tab. What I made until know is this:
library("RSelenium")
startServer()
checkForServer()
remDr <- remoteDriver()
remDr$open()
remDr$navigate("http://www.google.com/")
Sys.sleep(5)
myurllist <- c("https://cran.r-project.org/", "http://edition.cnn.com/", "https://cran.r-project.org/web/packages/")
for (i in 1:length(myurllist)) {
url <- url_list[i]
webElem <- remDr$findElement("css", "urlLink")
webElem$sendKeysToElement(list(key = "t"))
remDr$navigate(url)
Sys.sleep(5)
}
From selenium I found this answer
A new tab is opened by pressing CTRL+T, not T:
library("RSelenium")
startServer()
checkForServer()
remDr <- remoteDriver()
remDr$open()
remDr$navigate("http://www.google.com/")
url_list <- c("http://edition.cnn.com/", "https://cran.r-project.org/web/packages/")
for (url in url_list) {
webElem <- remDr$findElement("css", "html")
webElem$sendKeysToElement(list(key="control", "t"))
remDr$navigate(url)
}

Resources