I was running a script for webscraping in RStudio and got the following error:
Selenium message:javascript error: this.each is not a function
(Session info: chrome=81.0.4044.129)
Build info: version: '4.0.0-alpha-2', revision: 'f148142cf8', time: '2019-07-01T21:30:10'
System info: host: 'xxxxxx', ip: 'xxx.xxx.x.xxx', os.name: 'Windows 10', os.arch: 'amd64', os.version: '10.0', java.version: '1.8.0_231'
Driver info: driver.version: unknown
Error: Summary: JavaScriptError
Detail: An error occurred while executing user supplied JavaScript.
class: org.openqa.selenium.JavascriptException
Further Details: run errorDetails method
I don't really understand what the problem is and how I might solve it.
Does anyone know how to solve this problem? I am still quite new to this, so concrete steps would be very practical for me.
Thank you in advance!
Edit: This is the script I'm using. The Error seems to occur just before "#end of the main loop"
library(data.table) # Required for rbindlist
library(dplyr) # Required to use the pipes %>% and some table manipulation commands
library(magrittr) # Required to use the pipes %>%
library(rvest) # Required for read_html
library(RSelenium) # Required for webscraping with javascript
library(lubridate) # Required to collect dates
library(stringr)
library(purrr)
options(stringsAsFactors = F) #needed to prevent errors when merging data frames
#Paste the GoodReads Url
url <- "https://www.goodreads.com/book/show/1885.Pride_and_Prejudice?ac=1&from_search=true&qid=VkA2NbcGBa&rank=1"
languageOnly = F #If FALSE, "all languages" is chosen
#Set your browser settings
rD <- rsDriver(port = 4585L, browser = "chrome", chromever = "81.0.4044.69")
remDr <- rD[["client"]]
remDr$setTimeout(type = "implicit", 2000)
remDr$navigate(url)
bookTitle = unlist(remDr$getTitle())
finalData = data.frame()
# Main loop going through the website pages
morePages = T
pageNumber = 1
while(morePages){
#Select reviews in correct language.
#It should also work if you only fill in the numeral language code, and leave the first one empty.
selectLanguage = if(languageOnly){
selectLanguage = remDr$findElement("xpath", "//select[#id='language_code']/option[#value='']")
} else {
selectLanguage = remDr$findElement("xpath", "//select[#id='language_code']/option[5]")
}
selectLanguage$clickElement()
Sys.sleep(3)
#Expand all reviews
expandMore <- remDr$findElements("link text", "...more")
sapply(expandMore, function(x) x$clickElement())
#Extracting the reviews from the page
reviews <- remDr$findElements("css selector", "#bookReviews .stacked")
reviews.html <- lapply(reviews, function(x){x$getElementAttribute("outerHTML")[[1]]})
reviews.list <- lapply(reviews.html, function(x){read_html(x) %>% html_text()} )
reviews.text <- unlist(reviews.list)
#Some reviews have only rating and no text, so we process them separately
onlyRating = unlist(map(1:length(reviews.text), function(i) str_detect(reviews.text[i], "^\\\n\\\n")))
#Full reviews
if(sum(!onlyRating) > 0){
filterData = reviews.text[!onlyRating]
fullReviews = purrr::map_df(seq(1, length(filterData), by=2), function(i){
review = unlist(strsplit(filterData[i], "\n"))
data.frame(
date = mdy(review[2]), #date
username = str_trim(review[5]), #user
rating = str_trim(review[9]), #overall
comment = str_trim(review[12]) #comment
)
})
#Add review text to full reviews
fullReviews$review = unlist(purrr::map(seq(2, length(filterData), by=2), function(i){
str_trim(str_remove(filterData[i], "\\s*\\n\\s*\\(less\\)"))
}))
} else {
fullReviews = data.frame()
}
#partial reviews (only rating)
if(sum(onlyRating) > 0){
filterData = reviews.text[onlyRating]
partialReviews = purrr::map_df(1:length(filterData), function(i){
review = unlist(strsplit(filterData[i], "\n"))
data.frame(
date = mdy(review[9]), #date
username = str_trim(review[4]), #user
rating = str_trim(review[8]), #overall
comment = "",
review = ""
)
})
} else {
partialReviews = data.frame()
}
finalData = rbind(finalData, fullReviews, partialReviews)
#Go to next page if possible
nextPage = remDr$findElements("xpath", "//a[#class='next_page']")
if(length(nextPage) > 0){
message(paste("PAGE", pageNumber, "Processed - Going to next"))
nextPage[[1]]$clickElement()
pageNumber = pageNumber + 1
Sys.sleep(2)
} else {
message(paste("PAGE", pageNumber, "Processed - Last page"))
morePages = FALSE
}
}
#end of the main loop
#Replace missing ratings by 'not rated'
finalData$rating = ifelse(finalData$rating == "", "not rated", finalData$rating)
#Stop server
rD[["server"]]$stop()
#set directory to where you wish the file to go
#copy your working directory and exchange all backward slashes with forward slashes
getwd()
setwd("C:/Users/ledgreve/Desktop/GoodReads_TextMining-master/Scripts/New Scripts/Test1")
#Write results
write.csv(finalData, paste0(bookTitle, ".csv"), row.names = F)
message("FINISHED!")
Just my own update: This issue was resolved after I reinstalled java and installed rjava (https://cimentadaj.github.io/blog/2018-05-25-installing-rjava-on-windows-10/installing-rjava-on-windows-10/)
Related
I've requested an access key and set it. Sys.getenv("EBIRDST_KEY") returns the correct key. ebirdst_download(species = "Sharp-tailed Grouse") returns an error
Error in ebirdst_download(species = "Sharp-tailed Grouse") : Cannot
access Status and Trends data URL. Ensure that you have a working
internet connection and a valid API key for the Status and Trends
data.
My internet is working. Looking at the source code, I believe the error is generated because the function reads a url, essentially read_json(stringr::str_glue("{api_url}list-obj/{species}?key={key}")) which for some reason is null.
https://rdrr.io/github/CornellLabofOrnithology/ebirdst/src/R/ebirdst-loading.R?fbclid=IwAR1JYbCoD_VGwtZ0e1tz7yEPIR1buwN3GUyraZqokeS8rFTox4g3ceWRnns
But I can get it to work if I use the following lines of the source code. I'm not sure why the ebirdst function is failing.
species<-"Sharp-tailed Grouse"
path = rappdirs::user_data_dir("ebirdst")
species <- get_species(species)
which_run <- which(ebirdst::ebirdst_runs$species_code == species)
run <- ebirdst::ebirdst_runs$run_name[which_run]
key<-Sys.getenv("EBIRDST_KEY")
api_url <- "https://st-download.ebird.org/v1/"
list_obj_url <- stringr::str_glue("{api_url}list-obj/{species}?key={key}")
files <-jsonlite::read_json(list_obj_url, simplifyVector = TRUE)
files <- data.frame(file = files)
files <- files[!stringr::str_detect(files$file, "\\.db$"), , drop = FALSE]
files$src_path <- stringr::str_glue("{api_url}fetch?objKey={files$file}",
"&key={key}")
files$dest_path <- file.path(path, files$file)
files$exists <- file.exists(files$dest_path)
dirs <- unique(dirname(files$dest_path))
for (d in dirs) {
dir.create(d, showWarnings = FALSE, recursive = TRUE)
}
old_timeout <- getOption("timeout")
options(timeout = max(3000, old_timeout))
for (i in seq_len(nrow(files))) {
dl_response <- utils::download.file(files$src_path[i],
files$dest_path[i],
mode = "wb")
if (dl_response != 0) {
stop("Error downloading file: ", files$file[i])
}
}
I am new to R, I want to import 5 minute data of stock into excel sheetbut I find a unexpected error in my code, error show in 16, 17 and 75,76 row which
actuly is Header cookies like:-
_ga and _gid
it showing- Error: unexpected input in "_"
dont now why its showing that
Below is the source code:
library(httr)
library(curl)
library(mime)
library(openssl)
library(R6)
library(jsonlite)
###### Input to the code ######
###### Right click on the zerodha chart page and go to inspect element, go to network, find the chart link and extract below values
symbol_list<-read.csv("D://Trading/Stocks IDs.csv")
path_to_store_data<-("D:/Trading/R Experiement/")
_ga="GA1.2.416497127.1623159701"
_gid="GA1.2.4254895.1626415653"
enctoken="ib1rlX0e8dvp3qsnGvoL4aH3zsBgJA4kxtDPkUd+AMe7p6M6X26mn5w7EapJ+dMbZDtxZif59a2IhLLOwy5xEJAEnZ8iFYohilpfckj1bp8Mf9kX9RoHSA=="
kf_session="g665FGdUPvLaikeQQTomiejLqJYcUPg6"
public_token="bOrpvYIZJ4rhR8ytnAPccc9VFsTZjvSP"
user_id="ZE6166"
from="2021-07-01"
to="2021-07-16"
dir.create(paste0(path_to_store_data,from,"to",to))
path_to_store_data=paste0(path_to_store_data,from,"to",to,"/")
st <- as.Date(from)
en <- as.Date(to)
###### Specify Interval
###### minute,2minute,3minute,4minute,10minute,15minute,60minute,day
interval="5minute"
data_pull<-function(symbol_list){
i=1
for (i in i:nrow(symbol_list)){
ID=symbol_list[i,2]
symbol_name=symbol_list[i,1]
print(ID)
print(symbol_name)
theDate <-st
datalist = list()
big_data<-data.frame()
print(theDate)
print(en)
while (theDate<=en)
{
NextDate<- as.Date(theDate+30)
if (NextDate > as.Date(Sys.Date())){
NextDate<-en
}
dt_range=paste0(theDate,"&to=",NextDate)
print(dt_range)
url<- paste0("https://kite.zerodha.com/oms/instruments/historical/",ID,"/",interval)
httr::GET(
url = url,
add_headers(authorization=enctoken),
query = list(
user_id = user_id,
oi = "1",
from = theDate,
to = NextDate,
_ga= _ga,
_gid= _gid,
kf_session= kf_session,
public_token=public_token,
user_id= user_id,
enctoken= enctoken
)
) -> res
dat <- httr::content(res)
jsonRespText<-content(res,as="text")
#print(jsonRespText)
document<-fromJSON(txt=jsonRespText)
x<-document[["data"]]
y<-x[["candles"]]
if (length(y) <5){
theDate<-as.Date(theDate)+30
next
print("hi")
print(theDate)
}
dt<-as.data.frame(document)
dt<-dt[-1]
colnames(dt)[1]<-"TIME"
colnames(dt)[2]<-"Open"
colnames(dt)[3]<-"High"
colnames(dt)[4]<-"Low"
colnames(dt)[5]<-"CLOSE"
colnames(dt)[6]<-"VOLUME"
colnames(dt)[7]<-"SYMBOL"
dt$SYMBOL<-symbol_name
dt$TIME<-gsub("\\+0530","",dt$TIME)
dt$TIME<-gsub("T"," ",dt$TIME)
dt$Date <- as.Date(dt$TIME) #already got this one from the answers above
dt$TIME1 <- format(as.POSIXct(dt$TIME) ,format = "%H:%M:%S")
datalist[[i]] <- dt
#print("4")
theDate<-as.Date(theDate)+30
big_data = rbind(big_data,dt)
print(theDate)
}
file= paste0(path_to_store_data,symbol_name,".csv",sep="")
print(file)
write.csv(big_data,file,row.names = F)
print(theDate)
}
}
undebug(data_pull)
data_pull(symbol_list)
here is a error image
I'm using chromote R package and I'm testing it with shiny application. I'm trying to click on the icon that should duplicate few select elements. But all I have is tooltip when I take a screenshot and if I open the browser it freezes the R process.
Here is my code:
#' Run shiny in background - based on shinytest source code
#' #export
shiny.bg <- function(path, loadTimeout = 10000, shinyOptions = list()) {
tempfile_format <- tempfile("%s-", fileext = ".log")
p <- callr::r_bg(function(path, shinyOptions) {
do.call(shiny::runApp, c(path, shinyOptions))
},
args = list(
path = normalizePath(path),
shinyOptions = shinyOptions
),
stdout = sprintf(tempfile_format, "shiny-stdout"),
stderr = sprintf(tempfile_format, "shiny-stderr"),
supervise = TRUE
)
if (! p$is_alive()) {
abort(paste0(
"Failed to start shiny. Error: ",
strwrap(readLines(p$get_error_file()))
))
}
## Try to read out the port. Try 5 times/sec, until timeout.
max_i <- loadTimeout / 1000 * 5
for (i in seq_len(max_i)) {
err_lines <- readLines(p$get_error_file())
if (!p$is_alive()) {
abort(paste0(
"Error starting application:\n", paste(err_lines, collapse = "\n")
))
}
if (any(grepl("Listening on http", err_lines))) break
Sys.sleep(0.2)
}
if (i == max_i) {
abort(paste0(
"Cannot find shiny port number. Error:\n", paste(err_lines, collapse = "\n")
))
}
line <- err_lines[grepl("Listening on http", err_lines)]
m <- rematch::re_match(text = line, "https?://(?<host>[^:]+):(?<port>[0-9]+)")
url <- sub(".*(https?://.*)", "\\1", line)
list(
process = p,
url = url
)
}
#' Run shiny application and Chromeote instance
chromote.shiny <- function() {
chr <- chromote::ChromoteSession$new()
app <- shiny.bg('.')
chr$Page$navigate(app$url)
chr$Page$loadEventFired()
chr$screenshot()
list(
chr = chr,
app = app
)
}
#' kill browser and R shiny process
cleanUp <- function(obj) {
obj$chr$Browser$close()
obj$app$process$kill()
}
#' click on the element
chromote.click <- function(chromote, selector) {
doc = chromote$DOM$getDocument()
node = chromote$DOM$querySelector(doc$root$nodeId, selector)
box <- chromote$DOM$getBoxModel(node$nodeId)
left <- box$model$content[[1]]
top <- box$model$content[[2]]
x <- left + (box$model$width / 2)
y <- top + (box$model$height / 2)
chromote$Input$dispatchMouseEvent(type = "mousePressed", x = x, y = y, button="left")
chromote$Input$dispatchMouseEvent(type = "mouseReleased", x = x, y = y, button="left")
}
tmp <- chromote.shiny()
chromote.click(tmp$chr, ".clone-pair")
tmp$chr$screenshot()
I have no idea how I can debug this and there are not much information how to make a click, I've found dispatchMouseEvent in issue in GitHub repo for chromote.
Links to repo https://github.com/rstudio/chromote
The reason why I want to use chromote is I want to create unit/integration test for my application and shinytest is way outdated it use phantomJS that was abandoned years ago (so you need to use very old JavaScript because otherwise pantomJS will throw error and test will fail) and RSelenium is also not maintained anymore.
Had the same issue..
I found this library that uses chromote but has a number of functions (GetElement, Click) from RSelenium.
install.packages("remotes")
remotes::install_github("rundel/hayalbaz")
I need to just a simple log in the webpage login page and how do I check that login is successful or not?
library(httr)
library(jsonlite)
library(tictoc)
library(data.table)
library(properties)
library(futile.logger)
library(crayon)
library(XML)
library(methods)
library(compare)
library(tictoc)
args = commandArgs(trailingOnly=TRUE)
server.name <- "lgloz050.lss.emc.com"
port.no <- "58443"
default.path <- "/APG/lookup/"
set_config(config(ssl_verifypeer = 0L))
config.s3 <- fread("Configuration_modify.csv")
config.s3$bc <- config.s3$testReport
config.s3$testReport <- gsub(">>","/", config.s3$testReport)
config.s3$testReport <- gsub(" ","%20", config.s3$testReport)
config.s3$link <- paste("https://",server.name,":",port.no,default.path,config.s3$testReport,"/report.csv", sep = "")
properties = read.csv2("Configuration.properties",sep = "=", blank.lines.skip = TRUE,header = FALSE,stringsAsFactors = FALSE )
colnames(properties) <- c("key", "value")
config.s3$link <- gsub("$","PH_", config.s3$link)
#config.s3$link
for(i in 1:nrow(properties)){
if(startsWith(properties[i,1],"$")){
print(properties[i,1])
for (j in 1: nrow(config.s3)) {
config.s3[j]$link = gsub(paste("PH_",substring(trimws(properties[i,1]),2),sep = "")
,trimws(properties[i,2]),config.s3[j]$link,ignore.case = TRUE)
}
}
}
result <- config.s3[, list(bc,TestCaseID,link),]
auth <- function(link,user.name="*****", password="******"){
res <- GET(link,add_headers("accept"="text/json"))
res <- POST('https://lgloz050.lss.emc.com:58443/APG/j_security_check'
,set_cookies=res$cookies
,body = "j_username=*****&j_password=******"
,add_headers("Content-Type" ="application/x-www-form-linkencoded" ))
return(res)
}
fetch <- function(link,save.location,cookies){
fetch.success = TRUE
res <- GET(link
,add_headers("Authorization"="Basic **************")
,set_cookies=cookies)
tryCatch({repot_data <- fread(content(res,"text"),header = TRUE);
fwrite(data.frame(repot_data),save.location,row.names = FALSE);
flog.info(green("'\u2713' - Fetch Completed successfully ..."))
flog.info(paste("link : ",link))},
error = function(e){fetch.success= FALSE; flog.error(paste("\u2715 - Not able to fetch data,file not created "))})
return(fetch.success)
}
config.s3$save.location = sub("TruthData","testData",config.s3$truthReport,ignore.case = T)
response = auth(config.s3[1]$link)
# Function Call - fetch all the report data
result[,fetch:=FALSE]
result[,fetch.time:=0]
pb <- winProgressBar(title="Fetching Reports... ", label="0%", min=0, max=100, initial=0,width = 500)
for (i in 1:nrow(config.s3)) {
tic()
getWinProgressBar(pb)
setWinProgressBar(pb, i*(100/nrow(config.s3)), label =paste(round(i*(100/nrow(config.s3)))," % \n",config.s3[i]$testReport))
flog.info(paste("report",i,"started",config.s3[i]$link))
fetch.success = fetch(config.s3[i]$link,config.s3[i]$save.location,response$cookies)
t <- toc()
t$toc
result[i]$fetch <- fetch.success
result[i]$fetch.time <- t$toc / 10000
}
close(pb)
result[,-c("link"),with=FALSE]
this is the code to fetch the CSV file but the file has HTML content of login page. please tell me where I am doing mistak and what i have to correct and modify to get the correct data.
suggest some procedure. Thanks in advance.
I got the solution which is as follows:
library(httr)
library(rvest)
url <- "https://lgloz050.lss.emc.com:58443/APG/"
dn_url <- "https://lgloz050.lss.emc.com:58443/APG/lookup/Report%20Library/Amazon%20S3/Inventory/Accounts/report.csv"
session <- html_session(url)
form <- html_form(session)[[1]]
fl_fm <- set_values(form,
j_username = "*****",
j_password = "********")
main_page <- submit_form(session, fl_fm)
downlaod <- jump_to(main_page,dn_url)
writeBin(downlaod$response$content, basename(dn_url))
on the execution of this code it will successfully log in and download the report and the downloaded report has the same content which is required. I do this for one file next I am trying to download the multiple files in one execution.
Thanks to you all for your support. let me know if there any other solution possible or any kind of modification is required in the above code.
I tried to do Parallel Programming in R by modified my script. On my script I did two parallel programming. First one was done but the second was error whereas the script structure were same. Below is my code:
library(rvest)
library(RMySQL)
library(curl)
library(gdata)
library(doMC)
library(foreach)
library(doParallel)
library(raster)
trim <- function (x) gsub("^\\s+|\\s+$", "", x)
setwd('/home/chandra/R/IlmuOne/MisterAladin')
no_cores <- detectCores()
cl<-makeCluster(no_cores)
registerDoParallel(cl)
MasterData = read.xls("Master Hotels - FINAL.xlsx", sheet = 1, header = TRUE)
MasterData$url_agoda = as.character(MasterData$url_agoda)
today = as.Date(format(Sys.time(), "%Y-%m-%d"))+2
ntasks <- nrow(MasterData)
#This section perfomed well
foreach(i=1:ntasks) %dopar% {
url = MasterData$url_agoda[i]
if (trim(url)!='-' & trim(url)!='')
{
from = gregexpr(pattern ='=',url)[[1]][1]
piece1 = substr(url,1,from)
from = gregexpr(pattern ='&los=',url)[[1]][1]
piece2 = substr(url,from,nchar(url))
MasterData$url_agoda[i] = paste0(piece1,today,piece2)
}
}
con <- dbConnect(RMySQL::MySQL(), username = "root", password = "master",host = "localhost", dbname = "mister_aladin")
#Tried first 10 data
#Below section was error and always return error: Error in { : task 1 failed - "could not find function "%>%""
foreach(a=1:10, .packages='foreach') %dopar% {
hotel_id = MasterData$id[a]
vendor = 'Agoda'
url = MasterData$url_agoda[a]
if (url!='-')
{
tryCatch({
hotel <- curl(url) %>%
read_html() %>%
html_nodes(xpath='//*[#id="room-grouping"]') %>%
html_table(fill = TRUE)
hotel <- hotel[[1]]
hotel$hotel_id= hotel_id
hotel$vendor= vendor
colnames(hotel)[1] = 'TheSpace'
colnames(hotel)[4] = 'PricePerNight'
room = '-'
hotel$NormalPrice = 0
hotel$FinalPrice = 0
for(i in 1:nrow(hotel))
{
if (i==1 | (!grepl('See photos',hotel$TheSpace[i]) & hotel$TheSpace[i]!='') )
{
room = hotel$TheSpace[i]
}
hotel$TheSpace[i] = room
#Normal Price
if (gregexpr(pattern ='IDR',hotel$PricePerNight[i])[[1]][1][1]==1)
{
split = strsplit(hotel$PricePerNight[i],'\n')[[1]]
NormalPrice = trim(split[2])
hotel$NormalPrice[i] = NormalPrice
NormalPrice = as.integer(gsub(",","",NormalPrice))
hotel$NormalPrice[i] = NormalPrice
}
#Final Price
if (gregexpr(pattern ='IDR',hotel$PricePerNight[i])[[1]][1][1]==1)
{
split = strsplit(hotel$PricePerNight[i],'\n')[[1]]
FinalPrice = trim(split[6])
hotel$FinalPrice[i] = FinalPrice
FinalPrice = as.integer(gsub(",","",FinalPrice))
hotel$FinalPrice[i] = FinalPrice
}
hotel$NormalPrice[is.na(hotel$NormalPrice)] <- 0
hotel$FinalPrice[is.na(hotel$FinalPrice)] <- 0
}
hotel = hotel[which(hotel$FinalPrice!=0),c("TheSpace","NormalPrice","FinalPrice")]
colnames(hotel) = c('room','normal_price','final_price')
hotel$log = format(Sys.time(), "%Y-%m-%d %H:%M:%S")
hotel$hotel_id = hotel_id
hotel$vendor = vendor
Push = hotel[,c('hotel_id','room','normal_price','final_price','vendor','log')]
#print(paste0('Agoda: push one record, hotel id ',hotel_id,'!'))
#cat(paste(paste0('Agoda: push one record, hotel id ',hotel_id,'!'),'\n'))
dbWriteTable(conn=con,name='prices_',value=as.data.frame(Push), append = TRUE, row.names = F)
},
error = function(e) {
Sys.sleep(2)
e
})
}
}
dbDisconnect(con)
stopImplicitCluster()
Every time I run the script it always gives me error: Error in { : task 1 failed - "could not find function "%>%""
I already check every post on this forum and tried to apply it but no one works.
Please advise any solution
you have to use .packages = c("magrittr", ...) and include all the packages, which are necessary to run the code within the foreach loop. However, .packages = "foreach" is not helping.
See, you can imagine that all the packages you define in .packages are forwareded / loaded in each parallel worker.
The %>% operator requires the package magrittr. In this case however it does not suffice to load it at the beginning of your script - it needs to be loaded for each of the nodes. You could add this line to the creation of your cluster to accomplish this:
cl<-makeCluster(no_cores)
registerDoParallel(cl)
clusterCall(cl, function() library(magrittr))