Pagination loop is stuck at x > 99 - r

I am scraping some data from an API, and my code works just fine as long as I extract pages 0 to 98. Whenever my loop reaches 99, I get an error Error: Internal Server Error (HTTP 500)..
Tried to find an answer but I am only proficient in R and C# and cannot understand Python or other.
keywords = c('ABC OR DEF')
parameters <- list(
'q' = keywords,
num_days = 1,
language = 'en',
num_results = 100,
page = 0,
'api_key' = '123456'
)
response <- httr::GET(get_url, query = parameters)
# latest_page_number <- get_last_page(parsed)
httr::stop_for_status(response)
content <- httr::content(response, type = 'text', encoding = 'utf-8')
parsed <- jsonlite::fromJSON(content, simplifyVector = FALSE, simplifyDataFrame = TRUE)
num_pages = round(parsed[["total_results"]]/100)
print(num_pages)
result = parsed$results
for(x in 1:(num_pages))
{
print(x)
parameters <- list(
'q' = keywords,
page = x,
num_days = 7,
language = 'en',
num_results = 100,
'api_key' = '123456'
)
response <- httr::GET(get_url, query = parameters)
httr::stop_for_status(response)
content <- httr::content(response, type = 'text', encoding = 'utf-8')
# content <- httr::content(response)
parsed <- jsonlite::fromJSON(content, simplifyVector = FALSE, simplifyDataFrame = TRUE)
Sys.sleep(0.2)
result = rbind(result,parsed$results[,colnames(result)])
}

Related

Implement call retries with httr::RETRY() function in API call (R)

I use the UN Comtrade data API with R.
library(rjson)
get.Comtrade <- function(url="http://comtrade.un.org/api/get?"
,maxrec=50000
,type="C"
,freq="A"
,px="HS"
,ps="now"
,r
,p
,rg="all"
,cc="TOTAL"
,fmt="json"
)
{
string<- paste(url
,"max=",maxrec,"&" #maximum no. of records returned
,"type=",type,"&" #type of trade (c=commodities)
,"freq=",freq,"&" #frequency
,"px=",px,"&" #classification
,"ps=",ps,"&" #time period
,"r=",r,"&" #reporting area
,"p=",p,"&" #partner country
,"rg=",rg,"&" #trade flow
,"cc=",cc,"&" #classification code
,"fmt=",fmt #Format
,sep = ""
)
if(fmt == "csv") {
raw.data<- read.csv(string,header=TRUE)
return(list(validation=NULL, data=raw.data))
} else {
if(fmt == "json" ) {
raw.data<- fromJSON(file=string)
data<- raw.data$dataset
validation<- unlist(raw.data$validation, recursive=TRUE)
ndata<- NULL
if(length(data)> 0) {
var.names<- names(data[[1]])
data<- as.data.frame(t( sapply(data,rbind)))
ndata<- NULL
for(i in 1:ncol(data)){
data[sapply(data[,i],is.null),i]<- NA
ndata<- cbind(ndata, unlist(data[,i]))
}
ndata<- as.data.frame(ndata)
colnames(ndata)<- var.names
}
return(list(validation=validation,data =ndata))
}
}
}
However, sometimes it fails to connect server and I need to run the code several times to start working. Solution given here, to use Retry() function, which retries a request until it succeeds, seems attractive.
However, I have some difficulties implementing this function in the code given above. has anybody used it before and knows how to recode it?
An API call using httr::RETRY could look like the following:
library(httr)
library(jsonlite)
res <- RETRY(
verb = "GET",
url = "http://comtrade.un.org/",
path = "api/get",
encode = "json",
times = 3,
query = list(
max = 50000,
type = "C",
freq = "A",
px = "HS",
ps = "now",
r = 842,
p = "124,484",
rg = "all",
cc = "TOTAL",
fmt = "json"
)
)
# alternativ: returns dataset as a `list`:
# parsed_content <- content(res, as = "parsed")
# returns dataset as a `data.frame`:
json_content <- content(res, as = "text")
parsed_content <- parse_json(json_content, simplifyVector = TRUE)
parsed_content$validation
parsed_content$dataset
I'd suggest rewriting the get.Comtrade function using httr:
get.Comtrade <- function(verb = "GET",
url = "http://comtrade.un.org/",
path = "api/get",
encode = "json",
times = 3,
max = 50000,
type = "C",
freq = "A",
px = "HS",
ps = "now",
r,
p,
rg = "all",
cc = "TOTAL",
fmt = "json") {
res <- httr::RETRY(
verb = verb,
url = url,
path = path,
encode = encode,
times = times,
query = list(
max = max,
type = type,
freq = freq,
px = px,
ps = ps,
r = r,
p = p,
rg = rg,
cc = cc,
fmt = fmt
)
)
jsonlite::parse_json(content(res, as = "text"), simplifyVector = TRUE)
}
s1 <- get.Comtrade(r = "842", p = "124,484", times = 5)
print(s1)
Please see this and this for more information on library(httr).

Only Table in rpivotTable

I'm using the rpivotTable package in Shiny application and I'd like to have only the choice of 'Table' for the users (no charts)
The RenderName argument is only used to choose the default display...
output$pivot <- renderRpivotTable(
rpivotTable(iris,
rendererName = "Table" )
)
Many thanks in advance !
There are multiple issues here.
you can specify renderers via the anonymos renderers argument in rpivotTable(). I have the JS code form here.
however, there is a bug when only selecting one option. In this case, rpivotTable() wraps the argument in a list again (see the Map() call in the original function code) and the forwarding to JS fails.
Therefore, I accounted for this issue and extended the function a bit. Play around with aggregators/renderers to see how it behaves differently to the original rpivotTable() function.
# define own function
my_rpivotTable <- function (data, rows = NULL, cols = NULL, aggregatorName = NULL,
vals = NULL, rendererName = NULL, sorter = NULL, exclusions = NULL,
inclusions = NULL, locale = "en", subtotals = FALSE, ...,
width = 800, height = 600, elementId = NULL)
{
if (length(intersect(class(data), c("data.frame", "data.table",
"table", "structable", "ftable"))) == 0) {
stop("data should be a data.frame, data.table, or table",
call. = F)
}
if (length(intersect(c("table", "structable", "ftable"),
class(data))) > 0)
data <- as.data.frame(data)
params <- list(rows = rows, cols = cols, aggregatorName = aggregatorName,
vals = vals, rendererName = rendererName, sorter = sorter,
...)
params <- Map(function(p) {
# added to the class check -------------------------------------------------
if (length(p) == 1 && class(p[[1]]) != "JS_EVAL") {
p = list(p)
}
return(p)
}, params)
par <- list(exclusions = exclusions, inclusions = inclusions)
params <- c(params, par)
params <- Filter(Negate(is.null), params)
x <- list(data = data, params = params, locale = locale,
subtotals = subtotals)
htmlwidgets::createWidget(name = "rpivotTable", x, width = width,
height = height, elementId = elementId, package = "rpivotTable")
}
# create the pivot table
my_rpivotTable(
expand.grid(LETTERS, 1:3),
aggregatorName = "Count",
aggregators = list(Sum = htmlwidgets::JS('$.pivotUtilities.aggregators["Sum"]'),
Count = htmlwidgets::JS('$.pivotUtilities.aggregators["Count"]')),
rendererName = "fancyTable",
renderers = list(fancyTable = htmlwidgets::JS('$.pivotUtilities.renderers["Table"]'))
)

Error in `V<-`(`*tmp*`, value = `*vtmp*`) : invalid indexing

I used the bibliometrix function in R, and want to plot some useful graphs.
library(bibliometrix)
??bibliometrix
D<-readFiles("E:\\RE\\savedrecs.txt")
M <- convert2df(D,dbsource = "isi", format= "plaintext")
results <- biblioAnalysis(M ,sep = ";" )
S<- summary(object=results,k=10, pause=FALSE)
plot(x=results,k=10,pause=FALSE)
options(width=100)
S <- summary(object = results, k = 10, pause = FALSE)
NetMatrix <- biblioNetwork(M1, analysis = "co-occurrences", network = "author_keywords", sep = ";")
S <- normalizeSimilarity(NetMatrix, type = "association")
net <- networkPlot(S, n = 200, Title = "co-occurrence network",type="fruchterman", labelsize = 0.7, halo = FALSE, cluster = "walktrap",remove.isolates=FALSE, remove.multiple=FALSE, noloops=TRUE, weighted=TRUE)
res <- thematicMap(net, NetMatrix, S)
plot(res$map)
But in the net <- networkPlot(S, n = 200, Title = "co-occurrence network",type="fruchterman", labelsize = 0.7, halo = FALSE, cluster = "walktrap",remove.isolates=FALSE, remove.multiple=FALSE, noloops=TRUE, weighted=TRUE), it shows error
Error in V<-(*tmp*, value = *vtmp*) : invalid indexing
. Also I cannot do the CR, it always shows unlistCR. I cannot use the NetMatrix function neither.
Some help me plsssssssss
The problem is in the data itself not in the code you presented. When I downloaded the data from bibliometrix.com and changed M1 to M (typo?) in biblioNetwork function call everything worked perfectly. Please see the code below:
library(bibliometrix)
# Plot bibliometric analysis results
D <- readFiles("http://www.bibliometrix.org/datasets/savedrecs.txt")
M <- convert2df(D, dbsource = "isi", format= "plaintext")
results <- biblioAnalysis(M, sep = ";")
S <- summary(results)
plot(x = results, k = 10, pause = FALSE)
# Plot Bibliographic Network
options(width = 100)
S <- summary(object = results, k = 10, pause = FALSE)
NetMatrix <- biblioNetwork(M, analysis = "co-occurrences", network = "author_keywords", sep = ";")
S <- normalizeSimilarity(NetMatrix, type = "association")
net <- networkPlot(S, n = 200, Title = "co-occurrence network", type = "fruchterman",
labelsize = 0.7, halo = FALSE, cluster = "walktrap",
remove.isolates = FALSE, remove.multiple = FALSE, noloops = TRUE, weighted = TRUE)
# Plot Thematic Map
res <- thematicMap(net, NetMatrix, S)
str(M)
plot(res$map)

Knitr - Error in usemethod("round_any"): no applicable method for round_any applied

I'm trying to output some of my code results in knitr. Now the strange thing is, the code generates the error in the title. But running round_any() seperately and outputting it in knitr is fine.
knitr code
```{r, echo = FALSE, message=FALSE, warning=FALSE}
source("BooliQuery.R")
BooliQuery()
```
My code
library(digest)
library(stringi)
library(jsonlite)
library(plyr)
BooliQuery <- function(area = "stockholm", type="lägenhet", sincesold = "", FUN = "", limit = 250, offset = 0, mode = 1) {
#raw data fetch + adjust.
lOriginal <- GETAPI(area, type, sincesold, FUN, limit, offset)
lOriginal$AreaSize <- round_any(lOriginal$livingArea, 10, floor)
lOriginal$PriceDiff <- lOriginal$soldPrice - lOriginal$listPrice
#Create frame overview
Overview.Return <- Frame.Overview(lOriginal)
#Mode - return selector
ifelse( mode == 1, return (Overview.Return), return (lOriginal) )
}
Frame.Overview <- function(lOriginal) {
#Aggregate mean
listPrice <- aggregate(lOriginal, list(lOriginal$AreaSize), FUN = mean, na.rm = TRUE)
colnames(listPrice)[1] <- "SegGroup"
listPrice <- listPrice[, c("SegGroup", "listPrice", "soldPrice", "PriceDiff", "rent", "livingArea", "constructionYear") ]
#Perform Rounding
listPrice[, c(2:5)] <- round(listPrice[,c(2:5)], digits = 0)
listPrice[, 6] <- round(listPrice[, 6], digits = 1)
listPrice[, 7] <- signif(listPrice[,7], digits = 4)
return(listPrice)
}
GETAPI <- function(area = "stockholm", type="lägenhet", sincesold = "", FUN = "", limit = 250, offset = 0) {
#ID Info
key <- "PRIVATE KEY"
caller.ID <- "USERNAME"
#//
unix.timestamp <- as.integer( as.POSIXct(Sys.time()) )
random.string <- stri_rand_strings( n = 1, length = 16)
#Sha1-Hash: CallerID + time + key + unique, 40-char hexadecimal
hash.string <- paste0(caller.ID, unix.timestamp, key, random.string)
hash.sha1 <- digest(hash.string,"sha1",serialize=FALSE)
#Create URL
api.string <- "https://api.booli.se/sold?q="
url.string <- paste0(api.string, area, "&objectType=" , type , "&minSoldDate=", sincesold, FUN, "&limit=", limit, "&offset=", offset,"&callerId=", caller.ID, "&time=" ,
unix.timestamp, "&unique=", random.string, "&hash=", hash.sha1)
#Parse JSON
parsed.JSON <- fromJSON(txt = url.string)
return(parsed.JSON$sold)
}
Running the code seperately in console is fine. So what could be wrong?

Variable Names in Environments

I wrote 2 functions to archive objects from R (with their names - that's really important for me) and, if an object is of class lm, to archive data used to conduct this model.
My code is below :
archive_object_and_data <- function(object, archiveData = TRUE, rememberName = TRUE){
object <- deparse(substitute(object))
md5hash <- digest(object)
dir <- paste0(getwd(), "/")
dir.create(file.path(dir, md5hash), showWarnings = FALSE)
if (rememberName){
save( file = paste0(dir, md5hash, "/obj.rda"), ascii = TRUE, list = objectName,
envir = parent.frame(2))
}else{
save( file = paste0(dir, md5hash, "/obj2.rda"), ascii = TRUE, list =objectName,
envir = parent.frame(5))}
if ( archiveData )
archiveDataFromObj( object, md5hash, changeBool = FALSE )
}
#
archiveDataFromObj <- function (object, md5hash, changeBool = TRUE)
UseMethod("archiveDataFromObj")
archiveDataFromObj.default <- function(object, md5hash, changeBool = TRUE) {
}
archiveDataFromObj.lm <- function(object, md5hash, changeBool = TRUE) {
extractedDF <- object$model
md5hashDF <- archive_object_and_data(extractedDF, rememberName = changeBool)
}
And the object I want to archive looks like this :
data( iris )
test_data <- iris[,-5]
model2 <- lm(Sepal.Length~. , data=test_data)
archive_object_and_data(model2)
And error appears like this :
Error in save(file = paste0(dir, md5hash, "/obj2.rda"), ascii = TRUE, :
object ‘extractedDF’ not found
6 stop(sprintf(ngettext(n, "object %s not found", "objects %s not found"),
paste(sQuote(list[!ok]), collapse = ", ")), domain = NA)
5 save(file = paste0(dir, md5hash, "/obj2.rda"), ascii = TRUE,
list = objectName, envir = parent.frame(5))
4 archive_object_and_data(extractedDF, rememberName = changeBool) at archiveDataFromObj.R
3 archiveDataFromObj.lm(object, md5hash, changeBool = FALSE) at archiveDataFromObj.R#1
2 archiveDataFromObj(object, md5hash, changeBool = FALSE) at archive_object_and_data.R
1 archive_object_and_data(model2)
Can any1 help me with this problem?
It looks like there's a problem with that line :(
}else{
save( file = paste0(dir, md5hash, "/obj2.rda"), ascii = TRUE, list =objectName,
envir = parent.frame(5))}

Resources