Implement call retries with httr::RETRY() function in API call (R) - r

I use the UN Comtrade data API with R.
library(rjson)
get.Comtrade <- function(url="http://comtrade.un.org/api/get?"
,maxrec=50000
,type="C"
,freq="A"
,px="HS"
,ps="now"
,r
,p
,rg="all"
,cc="TOTAL"
,fmt="json"
)
{
string<- paste(url
,"max=",maxrec,"&" #maximum no. of records returned
,"type=",type,"&" #type of trade (c=commodities)
,"freq=",freq,"&" #frequency
,"px=",px,"&" #classification
,"ps=",ps,"&" #time period
,"r=",r,"&" #reporting area
,"p=",p,"&" #partner country
,"rg=",rg,"&" #trade flow
,"cc=",cc,"&" #classification code
,"fmt=",fmt #Format
,sep = ""
)
if(fmt == "csv") {
raw.data<- read.csv(string,header=TRUE)
return(list(validation=NULL, data=raw.data))
} else {
if(fmt == "json" ) {
raw.data<- fromJSON(file=string)
data<- raw.data$dataset
validation<- unlist(raw.data$validation, recursive=TRUE)
ndata<- NULL
if(length(data)> 0) {
var.names<- names(data[[1]])
data<- as.data.frame(t( sapply(data,rbind)))
ndata<- NULL
for(i in 1:ncol(data)){
data[sapply(data[,i],is.null),i]<- NA
ndata<- cbind(ndata, unlist(data[,i]))
}
ndata<- as.data.frame(ndata)
colnames(ndata)<- var.names
}
return(list(validation=validation,data =ndata))
}
}
}
However, sometimes it fails to connect server and I need to run the code several times to start working. Solution given here, to use Retry() function, which retries a request until it succeeds, seems attractive.
However, I have some difficulties implementing this function in the code given above. has anybody used it before and knows how to recode it?

An API call using httr::RETRY could look like the following:
library(httr)
library(jsonlite)
res <- RETRY(
verb = "GET",
url = "http://comtrade.un.org/",
path = "api/get",
encode = "json",
times = 3,
query = list(
max = 50000,
type = "C",
freq = "A",
px = "HS",
ps = "now",
r = 842,
p = "124,484",
rg = "all",
cc = "TOTAL",
fmt = "json"
)
)
# alternativ: returns dataset as a `list`:
# parsed_content <- content(res, as = "parsed")
# returns dataset as a `data.frame`:
json_content <- content(res, as = "text")
parsed_content <- parse_json(json_content, simplifyVector = TRUE)
parsed_content$validation
parsed_content$dataset
I'd suggest rewriting the get.Comtrade function using httr:
get.Comtrade <- function(verb = "GET",
url = "http://comtrade.un.org/",
path = "api/get",
encode = "json",
times = 3,
max = 50000,
type = "C",
freq = "A",
px = "HS",
ps = "now",
r,
p,
rg = "all",
cc = "TOTAL",
fmt = "json") {
res <- httr::RETRY(
verb = verb,
url = url,
path = path,
encode = encode,
times = times,
query = list(
max = max,
type = type,
freq = freq,
px = px,
ps = ps,
r = r,
p = p,
rg = rg,
cc = cc,
fmt = fmt
)
)
jsonlite::parse_json(content(res, as = "text"), simplifyVector = TRUE)
}
s1 <- get.Comtrade(r = "842", p = "124,484", times = 5)
print(s1)
Please see this and this for more information on library(httr).

Related

Strange R package behaviour: "could not find function"

I have some functionality which works fine outside of a package, but when I put it into a package, devtools::load_all, and try to run one of the functions (DTL_similarity_search_results_fast) , another function (DTL_similarity_search) which should be loaded by the package is not found when it gets run inside of DTL_similarity_search_results_fast.
The code is:
messagef <- function(...) message(sprintf(...))
printf <- function(...) print(sprintf(...))
pattern_to_vec <- function(pattern, as_int = F, keep_list = FALSE) {
ret <- strsplit(pattern, ",")
if(length(pattern) == 1 && !keep_list)
ret <- ret[[1]]
if(as_int){
ret <- lapply(ret, as.integer)
}
ret
}
DTL_similarity_search <- function(search_pattern = "1,2,1,2,1,2,1,2",
transformation = "interval",
database_names = "dtl,wjazzd,omnibook",
metadata_filters = '{"dtl": {}, "wjazzd": {}, "esac": {}, "omnibook": {}}',
filter_category = "0",
minimum_similarity = 1.0,
max_edit_distance = NA,
max_length_difference = 0) {
url <- suppressWarnings(httr::modify_url("https://staging-dtl-pattern-api.hfm-weimar.de/", path = "/patterns/similar"))
if(is.na(max_edit_distance)){
max_edit_distance <- purrr::map_int(pattern_to_vec(search_pattern, keep_list = T), length) %>% min()
}
messagef("[DTL API] Starting search for %s", search_pattern)
resp <- suppressWarnings(httr::POST(url, body = list( n_gram = search_pattern,
transformation = transformation,
database_names = database_names,
metadata_filters = metadata_filters,
filter_category = filter_category,
minimum_similarity = minimum_similarity,
max_edit_distance = max_edit_distance,
max_length_difference = max_length_difference, filter_category = 0),
encode = "form"))
#browser()
#print(httr::content(resp, "text"))
if (httr::http_error(resp)) {
messagef(
"[DTL API] Similarity Search request failed [%s]\n%s\n<%s>",
httr::status_code(resp),
"",#parsed$message,
""#parsed$documentation_url
)
return(NULL)
}
parsed <- jsonlite::fromJSON(httr::content(resp, "text"), simplifyVector = FALSE)
messagef("[DTL API] Retrieved search ID %s of for pattern %s", parsed$search_id, search_pattern)
parsed$search_id
}
DTL_get_results <- function(search_id) {
url <- suppressWarnings(httr::modify_url("http://staging-dtl-pattern-api.hfm-weimar.de/", path = "/patterns/get"))
#messagef("[DTL API] Retrieving results for search_id %s", search_id)
resp <- suppressWarnings(httr::GET(url, query = list(search_id = search_id)))
if (httr::http_error(resp)) {
messagef(
"[DTL API] Similarity Search request failed [%s]\n%s\n<%s>",
httr::status_code(resp),
"",#parsed$message,
""#parsed$documentation_url
)
return(NULL)
}
print(httr::content(resp, "text"))
#browser()
parsed <- jsonlite::fromJSON(httr::content(resp, "text"), simplifyVector = FALSE)
messagef("[DTL API] Retrieved %s lines for search_id %s", length(parsed), search_id)
purrr::map_dfr(parsed, function(x){
if(is.null(x$within_single_phrase)){
x$within_single_phrase <- FALSE
}
#browser()
tibble::as_tibble(x) %>% dplyr::mutate(melid = as.character(melid))
})
}
DTL_similarity_search_results <- function(search_patterns = "1,2,1,2,1,2,1,2",
transformation = "interval",
database_names = "dtl,wjazzd,omnibook",
metadata_filters = '{"dtl": {}, "wjazzd": {}, "esac": {}, "omnibook": {}}',
filter_category = "0",
minimum_similarity = 1.0,
max_edit_distance = NA,
max_length_difference = 0) {
results <- tibble::tibble()
if(is.na(max_edit_distance)){
max_edit_distance <- purrr:::map_int(pattern_to_vec(search_patterns, keep_list = T), length) %>% min()
}
for(pattern in search_patterns){
print('DTL_similarity_search')
print(DTL_similarity_search)
search_id <- DTL_similarity_search(pattern,
transformation,
database_names,
metadata_filters,
filter_category,
minimum_similarity,
max_edit_distance = max_edit_distance,
max_length_difference = max_length_difference)
if(is.null(search_id)){
next
}
ret <- DTL_get_results(search_id)
if(!is.null(ret) && nrow(ret) > 0){
ret$search_pattern <- pattern
}
results <- dplyr::bind_rows(results, ret)
}
#browser()
if(nrow(results))
results %>% dplyr::distinct(melid, start, length, .keep_all = T)
}
DTL_similarity_search_results_fast <- function(search_patterns = "1,2,1,2,1,2,1,2",
transformation = "interval",
database_names = "dtl,wjazzd,omnibook",
metadata_filters = '{"dtl": {}, "wjazzd": {}, "esac": {}, "omnibook": {}}',
filter_category = "0",
minimum_similarity = 1.0,
max_edit_distance = NA,
max_length_difference = 0){
if(is.na(max_edit_distance)){
max_edit_distance <- purrr::map_int(pattern_to_vec(search_patterns, keep_list = T), length) %>% min()
}
future::plan(future::multisession)
results <- furrr:::future_map_dfr(search_patterns, function(pattern){
print('DTL_similarity_search2')
search_id <- DTL_similarity_search(pattern,
transformation,
database_names,
metadata_filters,
filter_category,
minimum_similarity,
max_edit_distance = max_edit_distance,
max_length_difference = max_length_difference)
if(is.null(search_id)){
return(tibble::tibble())
}
ret <- DTL_get_results(search_id)
if(!is.null(ret) && nrow(ret) > 0 )ret$search_pattern <- pattern
ret
})
#browser()
results %>% dplyr::distinct(melid, start, length, .keep_all = TRUE)
}
Then after load_all() when I try to run:
res <- DTL_similarity_search_results_fast()
I get:
Error in DTL_similarity_search(pattern, transformation,
database_names, : could not find function "DTL_similarity_search
but running a similar, different function works using the same procedure:
res <- DTL_similarity_search_results()

How do I get attributes of list elements recursively in R?

I have a nested list structure with some elements (not all) having attributes that I want to keep (I've converted some xml output to a list). I'm trying to flatten it into a data.frame. The structure is something like this:
myList <- structure(list(address = structure(list(Address = list(Line = list("xxxxxxx"),
Line = list("xxxxxxx"), Line = list("xxxxxxx"), PostCode = list(
"XXX XXX"))), type = "Residential", verified = "Unverified"),
amount = structure(list(paymentAmount = list(maxAmount = list(
amountPart = structure(list(Amount = list("0.00")), component = "Standard"),
amountPart = structure(list(Amount = list("0.00")), component = "Thing1"),
amountPart = structure(list(Amount = list("0.00")), component = "Thing2"),
amountPart = structure(list(Amount = list("0.00")), component = "Thing3"),
amountPart = structure(list(Amount = list("0.00")), component = "Thing4"),
amountPart = structure(list(Amount = list("100.00")), component = "Thing5"),
amountPart = structure(list(Amount = list("0.00")), component = "Thing6")),
otherAmount = list(Amount = list("0.00")),
discount = list("0.00"),
transition = list(
"0.00"), discounts = list(), regularPayment = list(
"200.00")),
paymentInfo = list(income = structure(list(
net = list("0")), refNumber = "xxxxxxx"))),
paymentDate = "2021-03-22", startDate = "2021-02-16", endDate = "2021-03-15")),
type = "Normal")
I've tried rapply(myList, attributes) but that just seems to return NULL.
I've also tried using a loop in a recursive function:
get_attributes <- function(myList, attribute_list = NULL) {
if (is.null(attribute_list)) attribute_list <- list()
for (i in seq_along(myList)) {
if (is.list(myList[[i]])) {
attribute_list <- c(attribute_list, sapply(myList[[i]], attributes))
attribute_list <- get_attributes(myList[[i]], attribute_list)
} else {
attribute_list <- c(attribute_list, attributes(myList[[i]]))
}
}
attribute_list
}
Once I've got the list of attributes, I then want to put them in a one row data.frame - something like data.frame(address.type = "Residential", address.verified = "Unverified", component.1 = "Standard", component.2 = "Thing1"
The function with a loop is a bit messy and not very 'R', and it also seems to spit out lots of repeated elements that I don't want. Does anyone have any idea how to implement this more elegantly?
UPDATE
I've refined the loop implementation to this, which seems to work, but I just couldn't figure out how to use either purrr or one of the *apply functions in place of the loop:
get_attributes <- function(myList, attribute_list = NULL, prefix = NULL) {
if (is.null(attribute_list)) {
attribute_list <- list()
}
if (is.null(prefix)) {
prefix <- ""
}
for (i in seq_along(myList)) {
name <- names(myList)[i]
attrs <- attributes(myList[[i]])
if (!is.null(attrs)) {
names(attrs) <- paste0(prefix, name, ".", names(attrs))
attrs <- attrs[!grepl("\\.names$", names(attrs))]
attribute_list <- c(attribute_list, attrs)
}
if (is.list(myList[[i]])) {
attribute_list <- get_attributes(myList[[i]],
attribute_list,
paste0(prefix, name, "."))
}
}
attribute_list
}
do.call(data.frame, get_attributes(myList))
You can gather all the attributes available and just keep the ones you are interested from it.
library(purrr)
map_df(myList, ~map_chr(attributes(.x), toString))
# names type verified paymentDate startDate endDate
# <chr> <chr> <chr> <chr> <chr> <chr>
#1 Address Residential Unverified NA NA NA
#2 paymentAmount, paymentInfo NA NA 2021-03-22 2021-02-16 2021-03-15

How to use "tryCatch" to skip errors in a nested loop in R?

I am trying to load a few data with a nested-loop using pageviews. You already helped me get this result:
library("pageviews")
lang = c("it.wikipedia")
bm = c("ECB","Christine Lagarde")
x <- list(
list(),
list(),
list(),
list(),
list()
) # store results
for (i in seq_along(lang)) {
for (j in seq_along(bm)) {
x[[i]][[j]] = article_pageviews(project = lang[i], article = bm[j], platform = "all", user_type = "user", start = "2015100100", end = today(), reformat = TRUE, granularity = "daily")
}
}
The last step I need to do, however, involves reading some article for which project doesn't exist. Find an example below:
lang = c("it.wikipedia")
bm = c("Philip Lane")
x = article_pageviews(project = lang, article = bm, platform = "all", user_type = "user", start = "2015100100", end = today(), reformat = TRUE, granularity = "daily")
# Error in FUN(X[[i]], ...) :
# The date(s) you used are valid, but we either do not have data for those date(s), or the project you asked for is not loaded yet. Please check https://wikimedia.org/api/rest_v1/?doc for more information.
I would like to add this to the loop. I tried a few solutions but I don't manage to make the loop skip over if there is an error. I post below one mistaken attempt:
lang = c("it.wikipedia")
bm = c("ECB", "Christine Lagarde", "Philip Lane")
for (i in seq_along(lang)) {
for (j in seq_along(bm)) {
skip_to_next <- FALSE
tryCatch(x[[i]][[j]] = article_pageviews(project = lang[i], article = bm[j], platform = "all", user_type = "user", start = "2015100100", end = today(), reformat = TRUE, granularity = "daily"), error = function(e) {skip_to_next <<- TRUE})
if(skip_to_next) { next }
}
}
Can anyone help me run the loop and skip whenever it meets an error?
Thanks a lot!
You can use tryCatch as :
library(pageviews)
library(purrr)
lang = c("it.wikipedia")
bm = c("ECB", "Christine Lagarde", "Philip Lane")
map_df(lang, function(x) map_df(bm, function(y)
tryCatch(article_pageviews(project = x, article = y, platform = "all", user_type = "user", start = "2015100100", end = today(), reformat = TRUE, granularity = "daily"),
error = function(e) {}))) -> result

Pagination loop is stuck at x > 99

I am scraping some data from an API, and my code works just fine as long as I extract pages 0 to 98. Whenever my loop reaches 99, I get an error Error: Internal Server Error (HTTP 500)..
Tried to find an answer but I am only proficient in R and C# and cannot understand Python or other.
keywords = c('ABC OR DEF')
parameters <- list(
'q' = keywords,
num_days = 1,
language = 'en',
num_results = 100,
page = 0,
'api_key' = '123456'
)
response <- httr::GET(get_url, query = parameters)
# latest_page_number <- get_last_page(parsed)
httr::stop_for_status(response)
content <- httr::content(response, type = 'text', encoding = 'utf-8')
parsed <- jsonlite::fromJSON(content, simplifyVector = FALSE, simplifyDataFrame = TRUE)
num_pages = round(parsed[["total_results"]]/100)
print(num_pages)
result = parsed$results
for(x in 1:(num_pages))
{
print(x)
parameters <- list(
'q' = keywords,
page = x,
num_days = 7,
language = 'en',
num_results = 100,
'api_key' = '123456'
)
response <- httr::GET(get_url, query = parameters)
httr::stop_for_status(response)
content <- httr::content(response, type = 'text', encoding = 'utf-8')
# content <- httr::content(response)
parsed <- jsonlite::fromJSON(content, simplifyVector = FALSE, simplifyDataFrame = TRUE)
Sys.sleep(0.2)
result = rbind(result,parsed$results[,colnames(result)])
}

Knitr - Error in usemethod("round_any"): no applicable method for round_any applied

I'm trying to output some of my code results in knitr. Now the strange thing is, the code generates the error in the title. But running round_any() seperately and outputting it in knitr is fine.
knitr code
```{r, echo = FALSE, message=FALSE, warning=FALSE}
source("BooliQuery.R")
BooliQuery()
```
My code
library(digest)
library(stringi)
library(jsonlite)
library(plyr)
BooliQuery <- function(area = "stockholm", type="lägenhet", sincesold = "", FUN = "", limit = 250, offset = 0, mode = 1) {
#raw data fetch + adjust.
lOriginal <- GETAPI(area, type, sincesold, FUN, limit, offset)
lOriginal$AreaSize <- round_any(lOriginal$livingArea, 10, floor)
lOriginal$PriceDiff <- lOriginal$soldPrice - lOriginal$listPrice
#Create frame overview
Overview.Return <- Frame.Overview(lOriginal)
#Mode - return selector
ifelse( mode == 1, return (Overview.Return), return (lOriginal) )
}
Frame.Overview <- function(lOriginal) {
#Aggregate mean
listPrice <- aggregate(lOriginal, list(lOriginal$AreaSize), FUN = mean, na.rm = TRUE)
colnames(listPrice)[1] <- "SegGroup"
listPrice <- listPrice[, c("SegGroup", "listPrice", "soldPrice", "PriceDiff", "rent", "livingArea", "constructionYear") ]
#Perform Rounding
listPrice[, c(2:5)] <- round(listPrice[,c(2:5)], digits = 0)
listPrice[, 6] <- round(listPrice[, 6], digits = 1)
listPrice[, 7] <- signif(listPrice[,7], digits = 4)
return(listPrice)
}
GETAPI <- function(area = "stockholm", type="lägenhet", sincesold = "", FUN = "", limit = 250, offset = 0) {
#ID Info
key <- "PRIVATE KEY"
caller.ID <- "USERNAME"
#//
unix.timestamp <- as.integer( as.POSIXct(Sys.time()) )
random.string <- stri_rand_strings( n = 1, length = 16)
#Sha1-Hash: CallerID + time + key + unique, 40-char hexadecimal
hash.string <- paste0(caller.ID, unix.timestamp, key, random.string)
hash.sha1 <- digest(hash.string,"sha1",serialize=FALSE)
#Create URL
api.string <- "https://api.booli.se/sold?q="
url.string <- paste0(api.string, area, "&objectType=" , type , "&minSoldDate=", sincesold, FUN, "&limit=", limit, "&offset=", offset,"&callerId=", caller.ID, "&time=" ,
unix.timestamp, "&unique=", random.string, "&hash=", hash.sha1)
#Parse JSON
parsed.JSON <- fromJSON(txt = url.string)
return(parsed.JSON$sold)
}
Running the code seperately in console is fine. So what could be wrong?

Resources