Submit POST form when rvest doesn't recognize submit button - r

I would like to submit the following form (the form appears after you click on link "Kliknite na ..."):
http://www1.biznet.hr/HgkWeb/do/extlogon
I have to enter one parameter, named "OIB" and submit the form by clicking "Trazi".
Here is my code:
library(httr)
library(rvest)
sess <- html_session("http://www1.biznet.hr/HgkWeb/do/extlogon")
search_page <- sess %>%
follow_link(1)
form <- html_form(search_page)[[6]]
fill_form <- set_values(form, 'clanica.cla_oib' = '94989605030')
firma_i <- submit_form(search_page, fill_form, submit = 'submit')
Last line produces an error:
Error: Unknown submission name 'submit'. Possible values:
clanica.asTextDatumGasenjaTo, clanica.asTextUdr_id
I don't understand why rvest recognize this two parameters as submit buttons when they don't contain submit name or type. And why rvest doesn't recognize submit button "Trazi" as submit parameter? In, short, how to change filled form to execute the form?

The problem is that some of the input miss the type attr, and rvest does not check this appropriately.
To illustrate the problem:
library(httr)
library(rvest)
#> Loading required package: xml2
sess <- html_session("http://www1.biznet.hr/HgkWeb/do/extlogon")
search_page <- sess %>%
follow_link(1)
#> Navigating to /HgkWeb/do/extlogon;jsessionid=88295900F3F932C85A25BB18F326BE28
form <- html_form(search_page)[[6]]
fill_form <- set_values(form, 'clanica.cla_oib' = '94989605030')
Some of the fields do not have the type attribute:
sapply(fill_form$fields, function(x) '['(x, 'type'))
#> $clanica.limitSearchToActiveCompany.type
#> [1] "radio"
#>
#> $clanica.limitSearchToActiveCompany.type
#> [1] "radio"
#>
#> $joinBy.useInnerJoin.type
#> [1] "checkbox"
#>
#> $nazivTvrtke.type
#> [1] "text"
#>
#> $nazivZapocinjeSaPredanomVrijednoscu.type
#> [1] "checkbox"
#>
#> $clanica.cla_jmbp.type
#> [1] "text"
#>
#> $clanica.cla_mbs.type
#> [1] "text"
#>
#> $clanica.cla_oib.type
#> [1] "text"
#>
#> $asTextKomoraId.NA
#> NULL
#>
#> $clanica.asTextOpc_id.NA
#> NULL
#>
#> $clanica.cla_opcina.type
#> [1] "hidden"
#>
#> $clanica.asTextNas_id.NA
#> NULL
#>
#> $clanica.cla_naselje.type
#> [1] "hidden"
#>
#> $clanica.pos_id.NA
#> NULL
#>
#> $clanica.postaNaziv.type
#> [1] "hidden"
#>
#> $clanica.cla_ulica.type
#> [1] "text"
#>
#> $clanica.asTextDatumUpisaFrom.type
#> [1] "text"
#>
#> $clanica.asTextDatumUpisaTo.type
#> [1] "text"
#>
#> $clanica.asTextDatumGasenjaFrom.type
#> [1] "text"
#>
#> $clanica.asTextDatumGasenjaTo.type
#> [1] "text"
#>
#> $clanica.asTextUdr_id.NA
#> NULL
#>
#> $clanica.asTextVel_id.NA
#> NULL
#>
#> $nkd2007.type
#> [1] "text"
#>
#> $nkd2007PretrazivanjePoGlavnojDjelatnosti.type
#> [1] "radio"
#>
#> $nkd2007PretrazivanjePoGlavnojDjelatnosti.type
#> [1] "radio"
#>
#> $submit.type
#> [1] "submit"
#>
#> $org.apache.struts.taglib.html.CANCEL.type
#> [1] "submit"
#>
#> $orderBy.order1.NA
#> NULL
#>
#> $orderBy.order2.NA
#> NULL
#>
#> $limit.type
#> [1] "text"
#>
#> $searchForRowCount.type
#> [1] "checkbox"
#>
#> $joinBy.gfiGodina.NA
#> NULL
#>
#> $joinBy.gfiBrojZaposlenihFrom.type
#> [1] "text"
#>
#> $joinBy.gfiBrojZaposlenihTo.type
#> [1] "text"
#>
#> $joinBy.gfiUkupniPrihodFrom.type
#> [1] "text"
#>
#> $joinBy.gfiUkupniPrihodTo.type
#> [1] "text"
This messes up the internal function submit_request and specifically the Filter() in it.
It's referenced here, and a fix is proposed in this PR, but it hasn't been merged since Jul 2016, so don't hold your breath.
The fix in the PR basically check if a type attr is present:
# form.R, row 280
is_submit <- function(x) 'type' %in% names(x) &&
tolower(x$type) %in% c("submit", "image", "button")
For a quick fix you can change the data you have, overriding the NULL attr, with a random type:
fill_form$fields <- lapply(fill_form$fields, function(x) {
null_type = is.null(x$type)
if (null_type) x$type = 'text'
x
})
firma_i <- submit_form(search_page, fill_form, submit = 'submit')
firma_i
#> <session> http://www1.biznet.hr/HgkWeb/do/fullSearchPost
#> Status: 200
#> Type: text/html;charset=UTF-8
#> Size: 4366
Created on 2018-08-27 by the reprex package (v0.2.0).

Related

R use the name of function as a parameter in formals

I am working with several classification algorithms from different libraries (for example):
library(ranger) #RandomForest
library(gbm) #Gradient Boosting
I need to use formals function to get all the arguments from all of them.
The following attempts works perfectly:
formals(gbm)
formals(gbm::gbm)
formals("gbm")
functionName="gbm"
formals(functionName)
What I need is to parametrize the name of the package as well as the name of the function, but it fails. Something like this:
> packageName="gbm"
> functionName="gbm"
> formals(packageName::functionName)
Error in loadNamespace(x) : there is no package called ‘packageName’
Is there anyway to do it?
Thanks
The good thing is that :: accepts strings:
`::`("gbm", "gbm")
The above code is working.
However, when we use an object name in which the string is stored, :: takes this as literal expression and looks for a package called packageName.
packageName <- functionName <- "gbm"
`::`(packageName, functionName)
#> Error in loadNamespace(x): there is no package called 'packageName'
With base R we can use eval(bquote()) and evaluate the strings early with .().
By evaluating the strings early with .() we make it clear that we are really looking for the string value (that is the value of packageName) and not the object name itself.
formals(eval(bquote(`::`(.(packageName), .(functionName)))))
#> $formula
#> formula(data)
#>
#> $distribution
#> [1] "bernoulli"
#>
#> $data
#> list()
#>
#> $weights
#>
#>
#> $var.monotone
#> NULL
#>
#> $n.trees
#> [1] 100
#>
#> $interaction.depth
#> [1] 1
#>
#> $n.minobsinnode
#> [1] 10
#>
#> $shrinkage
#> [1] 0.1
#>
#> $bag.fraction
#> [1] 0.5
#>
#> $train.fraction
#> [1] 1
#>
#> $cv.folds
#> [1] 0
#>
#> $keep.data
#> [1] TRUE
#>
#> $verbose
#> [1] FALSE
#>
#> $class.stratify.cv
#> NULL
#>
#> $n.cores
#> NULL
With {rlang} we can use inject() and !! sym():
library(rlang)
formals(inject(`::`(!! sym(packageName), !! sym(functionName))))
Of course in base R we always have the option to eval(parse()):
packageName="gbm"
functionName="gbm"
formals(eval(str2lang(paste0(packageName, "::", functionName))))
Created on 2023-02-19 with reprex v2.0.2

Prevent R spawned process from exiting on error

I'm trying to build a tool that interacts with an R subprocess, but the process exits whenever R encounters an error.
Is there a way to prevent that?
Here is a simple exemple: as you can see, the process exits as soon as it encounters an error:
library(subprocess)
# Spawning an R process
r <- spawn_process(
Sys.which("R"), c("--vanilla", "--quiet")
)
Sys.sleep(1)
# Checking and reading the state
process_state(r)
#> [1] "running"
process_read(r)
#> $stdout
#> [1] "> "
#>
#> $stderr
#> character(0)
# Writing a normal call
process_write(r, "print(2)\n")
#> [1] 9
Sys.sleep(1)
process_state(r)
#> [1] "running"
process_read(r)
#> $stdout
#> [1] "print(2)" "[1] 2" "> "
#>
#> $stderr
#> character(0)
# Writing a call that will fail
process_write(r, "a\n")
#> [1] 2
Sys.sleep(1)
# The process has exited
process_state(r)
#> [1] "exited"
Created on 2019-11-17 by the reprex package (v0.3.0)
For example, if we compare with a NodeJS process, it doesn't exits after an error:
library(subprocess)
n <- spawn_process(
Sys.which("node"), "-i"
)
process_write(n, "a\n")
#> [1] 2
Sys.sleep(1)
process_read(n)
#> $stdout
#> [1] "> Thrown:" "ReferenceError: a is not defined"
#> [3] "> "
#>
#> $stderr
#> character(0)
process_state(n)
#> [1] "running"
process_write(n, "console.error('a')\n")
#> [1] 19
Sys.sleep(1)
process_read(n)
#> $stdout
#> [1] "undefined" "> "
#>
#> $stderr
#> [1] "a"
process_state(n)
#> [1] "running"
Created on 2019-11-17 by the reprex package (v0.3.0)
or with bash:
library(subprocess)
n <-spawn_process(
Sys.which("bash"),
)
process_write(n, "a\n")
#> [1] 2
Sys.sleep(1)
process_read(n)
#> $stdout
#> character(0)
#>
#> $stderr
#> [1] "/bin/bash: line 1: a: command not found"
process_state(n)
#> [1] "running"
Created on 2019-11-17 by the reprex package (v0.3.0)
It seems as though R is being run in batch mode, in which case R will exit if an error is thrown at the top level. If you add the --interactive option then R will continue to run after errors.
r <- spawn_process(
Sys.which("R"), c("--vanilla", "--quiet", "--interactive")
)

rforcecom.getObjectDescription got error towards come salesforce data table

I used this function to get all the fields name of Campaign table, but it returned this error
The salesforcer package can return details about object metadata in Salesforce. Here is an example:
library(tidyverse)
library(salesforcer)
sf_auth(username, password, security_token)
# retrieve a list of metadata about the Contact object
contact_object <- sf_describe_object('Contact')
#> $activateable
#> [1] "false"
#>
#> $childRelationships
#> [1] "..."
#>
#> $fields
#> [1] "..."
# retrieve a tbl_df of all fields on the Contact object
contact_fields <- sf_describe_object_fields('Contact')
#> # A tibble: 64 x 39
#> aggregatable aiPredictionField autoNumber ...
#> <chr> <chr> <chr>
#> 1 true false false
#> 2 false false false
#> 3 true false false
#> #...

performance of rho2hat and ppm in spatstat

I want to compare the performance of nonparametric intensity function estimator "rho2hat" with "ppm".
My question how can I perform a test to see estimation works better? I couldn't use quadrat.test?
As mentioned in the comment by #adrian-baddeley your suggested strategy
will just measure the difference between two results. The way you can
calculate the expected number of points in different regions from an
estimated intensity is as follows:
library(spatstat)
#> Loading required package: nlme
#> Loading required package: rpart
#>
#> spatstat 1.51-0.035 (nickname: 'Spatfefe')
#> For an introduction to spatstat, type 'beginner'
elev <- bei.extra$elev
grad <- bei.extra$grad
non <- rho2hat(bei, elev, grad)
pred <- predict(non)
grid <- quadrats(pred, nx = 4, ny = 2)
by(pred, grid, integral)
#> 1:
#> [1] 522.0247
#>
#> 2:
#> [1] 503.6255
#>
#> 3:
#> [1] 344.8552
#>
#> 4:
#> [1] 522.834
#>
#> 5:
#> [1] 454.5958
#>
#> 6:
#> [1] 470.2484
#>
#> 7:
#> [1] 556.1989
#>
#> 8:
#> [1] 415.8889

How to enumerate all S4 methods implemented by a package?

I'm looking for a way to query all S4 methods implemented by a particular package (given through its namespace environment). I think I could enumerate all objects that start with .__T__, but I'd rather prefer using a documented and/or less hackish way.
> ls(asNamespace("RSQLite"), all.names = TRUE, pattern = "^[.]__T__")
[1] ".__T__dbBegin:DBI" ".__T__dbBeginTransaction:RSQLite"
[3] ".__T__dbBind:DBI" ".__T__dbClearResult:DBI"
[5] ".__T__dbColumnInfo:DBI" ".__T__dbCommit:DBI"
[7] ".__T__dbConnect:DBI" ".__T__dbDataType:DBI"
[9] ".__T__dbDisconnect:DBI" ".__T__dbExistsTable:DBI"
[11] ".__T__dbFetch:DBI" ".__T__dbGetException:DBI"
[13] ".__T__dbGetInfo:DBI" ".__T__dbGetPreparedQuery:RSQLite"
[15] ".__T__dbGetQuery:DBI" ".__T__dbGetRowCount:DBI"
[17] ".__T__dbGetRowsAffected:DBI" ".__T__dbGetStatement:DBI"
[19] ".__T__dbHasCompleted:DBI" ".__T__dbIsValid:DBI"
[21] ".__T__dbListFields:DBI" ".__T__dbListResults:DBI"
[23] ".__T__dbListTables:DBI" ".__T__dbReadTable:DBI"
[25] ".__T__dbRemoveTable:DBI" ".__T__dbRollback:DBI"
[27] ".__T__dbSendPreparedQuery:RSQLite" ".__T__dbSendQuery:DBI"
[29] ".__T__dbUnloadDriver:DBI" ".__T__dbWriteTable:DBI"
[31] ".__T__fetch:DBI" ".__T__isSQLKeyword:DBI"
[33] ".__T__make.db.names:DBI" ".__T__show:methods"
[35] ".__T__sqlData:DBI" ".__T__SQLKeywords:DBI"
I think showMethods is the only thing available in methods, but it does not actually return the functions as an object, just prints them to the screen.
The following will return a list of the methods defined in an environment. Adapted from covr::replacements_S4(), which is used to modify all methods in a package to track coverage.
S4_methods <- function(env) {
generics <- methods::getGenerics(env)
res <- Map(generics#.Data, generics#package, USE.NAMES = FALSE,
f = function(name, package) {
what <- methods::methodsPackageMetaName("T", paste(name, package, sep = ":"))
table <- get(what, envir = env)
mget(ls(table, all.names = TRUE), envir = table)
})
res[lengths(res) > 0]
}
m <- S4_methods(asNamespace("DBI"))
length(m)
#> [1] 21
m[1:3]
#> [[1]]
#> [[1]]$DBIObject
#> function(dbObj, obj, ...) {
#> dbiDataType(obj)
#> }
#> <environment: namespace:DBI>
#> attr(,"target")
#> An object of class "signature"
#> dbObj
#> "DBIObject"
#> attr(,"defined")
#> An object of class "signature"
#> dbObj
#> "DBIObject"
#> attr(,"generic")
#> [1] "dbDataType"
#> attr(,"generic")attr(,"package")
#> [1] "DBI"
#> attr(,"class")
#> [1] "MethodDefinition"
#> attr(,"class")attr(,"package")
#> [1] "methods"
#>
#>
#> [[2]]
#> [[2]]$character
#> function(drvName, ...) {
#> findDriver(drvName)(...)
#> }
#> <environment: namespace:DBI>
#> attr(,"target")
#> An object of class "signature"
#> drvName
#> "character"
#> attr(,"defined")
#> An object of class "signature"
#> drvName
#> "character"
#> attr(,"generic")
#> [1] "dbDriver"
#> attr(,"generic")attr(,"package")
#> [1] "DBI"
#> attr(,"class")
#> [1] "MethodDefinition"
#> attr(,"class")attr(,"package")
#> [1] "methods"
#>
#>
#> [[3]]
#> [[3]]$`DBIConnection#character`
#> function(conn, statement, ...) {
#> rs <- dbSendStatement(conn, statement, ...)
#> on.exit(dbClearResult(rs))
#> dbGetRowsAffected(rs)
#> }
#> <environment: namespace:DBI>
#> attr(,"target")
#> An object of class "signature"
#> conn statement
#> "DBIConnection" "character"
#> attr(,"defined")
#> An object of class "signature"
#> conn statement
#> "DBIConnection" "character"
#> attr(,"generic")
#> [1] "dbExecute"
#> attr(,"generic")attr(,"package")
#> [1] "DBI"
#> attr(,"class")
#> [1] "MethodDefinition"
#> attr(,"class")attr(,"package")
#> [1] "methods"
I think you want the showMethods function, as in:
showMethods(where=asNamespace("RSQLite"))
The output is:
Function: dbBegin (package DBI)
conn="SQLiteConnection"
Function: dbBeginTransaction (package RSQLite)
conn="ANY"
Function: dbClearResult (package DBI)
res="SQLiteConnection"
res="SQLiteResult"
Function: dbColumnInfo (package DBI)
res="SQLiteResult"
and this goes on for many more rows. ?showMethods will has some additional arguments for tailoring the results.

Resources