curl_fetch_disk shutting down with no error message - r

The code below keeps shutting down after a minute with no error messages. Does anybody know why ? I think it has to do with the function curl_fetch_disk...
library(dplyr)
library(curl)
main.dir <- "C:/Users/blue/Desktop/Arc"
liste.urls <- read.csv('C:/Users/blue/Desktop/Arc/test2.csv', header = FALSE)
subsub.dir <- 'hydro_l'
liste.urls$hydro_l <- paste0(liste.urls$V1, "/hydro_l/")
liste.urls$rep <- sapply(liste.urls, substring, 94, 100)
for (i in 1:length(liste.urls)) {
url.download <- as.character(liste.urls$hydro_l[i+6])
handle = new_handle(dirlistonly = TRUE)
con = curl(url.download, "r", handle)
tbl = read.table(con, stringsAsFactors = TRUE, fill = TRUE)
close(con)
head(tbl)
liste.fichiers.urls <- paste0(url.download, tbl[, 1])
fichiers = basename(liste.fichiers.urls)
sub.dir <- liste.urls$rep[i+6]
dir.create(file.path(main.dir, sub.dir))
dir.create(file.path(main.dir, sub.dir, subsub.dir))
setwd(file.path(main.dir, sub.dir, subsub.dir))
for (j in 1:length(liste.fichiers.urls)) {
curl_fetch_disk(liste.fichiers.urls[j], fichiers[j])
}
}

Related

Implement call retries with httr::RETRY() function in API call (R)

I use the UN Comtrade data API with R.
library(rjson)
get.Comtrade <- function(url="http://comtrade.un.org/api/get?"
,maxrec=50000
,type="C"
,freq="A"
,px="HS"
,ps="now"
,r
,p
,rg="all"
,cc="TOTAL"
,fmt="json"
)
{
string<- paste(url
,"max=",maxrec,"&" #maximum no. of records returned
,"type=",type,"&" #type of trade (c=commodities)
,"freq=",freq,"&" #frequency
,"px=",px,"&" #classification
,"ps=",ps,"&" #time period
,"r=",r,"&" #reporting area
,"p=",p,"&" #partner country
,"rg=",rg,"&" #trade flow
,"cc=",cc,"&" #classification code
,"fmt=",fmt #Format
,sep = ""
)
if(fmt == "csv") {
raw.data<- read.csv(string,header=TRUE)
return(list(validation=NULL, data=raw.data))
} else {
if(fmt == "json" ) {
raw.data<- fromJSON(file=string)
data<- raw.data$dataset
validation<- unlist(raw.data$validation, recursive=TRUE)
ndata<- NULL
if(length(data)> 0) {
var.names<- names(data[[1]])
data<- as.data.frame(t( sapply(data,rbind)))
ndata<- NULL
for(i in 1:ncol(data)){
data[sapply(data[,i],is.null),i]<- NA
ndata<- cbind(ndata, unlist(data[,i]))
}
ndata<- as.data.frame(ndata)
colnames(ndata)<- var.names
}
return(list(validation=validation,data =ndata))
}
}
}
However, sometimes it fails to connect server and I need to run the code several times to start working. Solution given here, to use Retry() function, which retries a request until it succeeds, seems attractive.
However, I have some difficulties implementing this function in the code given above. has anybody used it before and knows how to recode it?
An API call using httr::RETRY could look like the following:
library(httr)
library(jsonlite)
res <- RETRY(
verb = "GET",
url = "http://comtrade.un.org/",
path = "api/get",
encode = "json",
times = 3,
query = list(
max = 50000,
type = "C",
freq = "A",
px = "HS",
ps = "now",
r = 842,
p = "124,484",
rg = "all",
cc = "TOTAL",
fmt = "json"
)
)
# alternativ: returns dataset as a `list`:
# parsed_content <- content(res, as = "parsed")
# returns dataset as a `data.frame`:
json_content <- content(res, as = "text")
parsed_content <- parse_json(json_content, simplifyVector = TRUE)
parsed_content$validation
parsed_content$dataset
I'd suggest rewriting the get.Comtrade function using httr:
get.Comtrade <- function(verb = "GET",
url = "http://comtrade.un.org/",
path = "api/get",
encode = "json",
times = 3,
max = 50000,
type = "C",
freq = "A",
px = "HS",
ps = "now",
r,
p,
rg = "all",
cc = "TOTAL",
fmt = "json") {
res <- httr::RETRY(
verb = verb,
url = url,
path = path,
encode = encode,
times = times,
query = list(
max = max,
type = type,
freq = freq,
px = px,
ps = ps,
r = r,
p = p,
rg = rg,
cc = cc,
fmt = fmt
)
)
jsonlite::parse_json(content(res, as = "text"), simplifyVector = TRUE)
}
s1 <- get.Comtrade(r = "842", p = "124,484", times = 5)
print(s1)
Please see this and this for more information on library(httr).

Strange R package behaviour: "could not find function"

I have some functionality which works fine outside of a package, but when I put it into a package, devtools::load_all, and try to run one of the functions (DTL_similarity_search_results_fast) , another function (DTL_similarity_search) which should be loaded by the package is not found when it gets run inside of DTL_similarity_search_results_fast.
The code is:
messagef <- function(...) message(sprintf(...))
printf <- function(...) print(sprintf(...))
pattern_to_vec <- function(pattern, as_int = F, keep_list = FALSE) {
ret <- strsplit(pattern, ",")
if(length(pattern) == 1 && !keep_list)
ret <- ret[[1]]
if(as_int){
ret <- lapply(ret, as.integer)
}
ret
}
DTL_similarity_search <- function(search_pattern = "1,2,1,2,1,2,1,2",
transformation = "interval",
database_names = "dtl,wjazzd,omnibook",
metadata_filters = '{"dtl": {}, "wjazzd": {}, "esac": {}, "omnibook": {}}',
filter_category = "0",
minimum_similarity = 1.0,
max_edit_distance = NA,
max_length_difference = 0) {
url <- suppressWarnings(httr::modify_url("https://staging-dtl-pattern-api.hfm-weimar.de/", path = "/patterns/similar"))
if(is.na(max_edit_distance)){
max_edit_distance <- purrr::map_int(pattern_to_vec(search_pattern, keep_list = T), length) %>% min()
}
messagef("[DTL API] Starting search for %s", search_pattern)
resp <- suppressWarnings(httr::POST(url, body = list( n_gram = search_pattern,
transformation = transformation,
database_names = database_names,
metadata_filters = metadata_filters,
filter_category = filter_category,
minimum_similarity = minimum_similarity,
max_edit_distance = max_edit_distance,
max_length_difference = max_length_difference, filter_category = 0),
encode = "form"))
#browser()
#print(httr::content(resp, "text"))
if (httr::http_error(resp)) {
messagef(
"[DTL API] Similarity Search request failed [%s]\n%s\n<%s>",
httr::status_code(resp),
"",#parsed$message,
""#parsed$documentation_url
)
return(NULL)
}
parsed <- jsonlite::fromJSON(httr::content(resp, "text"), simplifyVector = FALSE)
messagef("[DTL API] Retrieved search ID %s of for pattern %s", parsed$search_id, search_pattern)
parsed$search_id
}
DTL_get_results <- function(search_id) {
url <- suppressWarnings(httr::modify_url("http://staging-dtl-pattern-api.hfm-weimar.de/", path = "/patterns/get"))
#messagef("[DTL API] Retrieving results for search_id %s", search_id)
resp <- suppressWarnings(httr::GET(url, query = list(search_id = search_id)))
if (httr::http_error(resp)) {
messagef(
"[DTL API] Similarity Search request failed [%s]\n%s\n<%s>",
httr::status_code(resp),
"",#parsed$message,
""#parsed$documentation_url
)
return(NULL)
}
print(httr::content(resp, "text"))
#browser()
parsed <- jsonlite::fromJSON(httr::content(resp, "text"), simplifyVector = FALSE)
messagef("[DTL API] Retrieved %s lines for search_id %s", length(parsed), search_id)
purrr::map_dfr(parsed, function(x){
if(is.null(x$within_single_phrase)){
x$within_single_phrase <- FALSE
}
#browser()
tibble::as_tibble(x) %>% dplyr::mutate(melid = as.character(melid))
})
}
DTL_similarity_search_results <- function(search_patterns = "1,2,1,2,1,2,1,2",
transformation = "interval",
database_names = "dtl,wjazzd,omnibook",
metadata_filters = '{"dtl": {}, "wjazzd": {}, "esac": {}, "omnibook": {}}',
filter_category = "0",
minimum_similarity = 1.0,
max_edit_distance = NA,
max_length_difference = 0) {
results <- tibble::tibble()
if(is.na(max_edit_distance)){
max_edit_distance <- purrr:::map_int(pattern_to_vec(search_patterns, keep_list = T), length) %>% min()
}
for(pattern in search_patterns){
print('DTL_similarity_search')
print(DTL_similarity_search)
search_id <- DTL_similarity_search(pattern,
transformation,
database_names,
metadata_filters,
filter_category,
minimum_similarity,
max_edit_distance = max_edit_distance,
max_length_difference = max_length_difference)
if(is.null(search_id)){
next
}
ret <- DTL_get_results(search_id)
if(!is.null(ret) && nrow(ret) > 0){
ret$search_pattern <- pattern
}
results <- dplyr::bind_rows(results, ret)
}
#browser()
if(nrow(results))
results %>% dplyr::distinct(melid, start, length, .keep_all = T)
}
DTL_similarity_search_results_fast <- function(search_patterns = "1,2,1,2,1,2,1,2",
transformation = "interval",
database_names = "dtl,wjazzd,omnibook",
metadata_filters = '{"dtl": {}, "wjazzd": {}, "esac": {}, "omnibook": {}}',
filter_category = "0",
minimum_similarity = 1.0,
max_edit_distance = NA,
max_length_difference = 0){
if(is.na(max_edit_distance)){
max_edit_distance <- purrr::map_int(pattern_to_vec(search_patterns, keep_list = T), length) %>% min()
}
future::plan(future::multisession)
results <- furrr:::future_map_dfr(search_patterns, function(pattern){
print('DTL_similarity_search2')
search_id <- DTL_similarity_search(pattern,
transformation,
database_names,
metadata_filters,
filter_category,
minimum_similarity,
max_edit_distance = max_edit_distance,
max_length_difference = max_length_difference)
if(is.null(search_id)){
return(tibble::tibble())
}
ret <- DTL_get_results(search_id)
if(!is.null(ret) && nrow(ret) > 0 )ret$search_pattern <- pattern
ret
})
#browser()
results %>% dplyr::distinct(melid, start, length, .keep_all = TRUE)
}
Then after load_all() when I try to run:
res <- DTL_similarity_search_results_fast()
I get:
Error in DTL_similarity_search(pattern, transformation,
database_names, : could not find function "DTL_similarity_search
but running a similar, different function works using the same procedure:
res <- DTL_similarity_search_results()

Error in `V<-`(`*tmp*`, value = `*vtmp*`) : invalid indexing

I used the bibliometrix function in R, and want to plot some useful graphs.
library(bibliometrix)
??bibliometrix
D<-readFiles("E:\\RE\\savedrecs.txt")
M <- convert2df(D,dbsource = "isi", format= "plaintext")
results <- biblioAnalysis(M ,sep = ";" )
S<- summary(object=results,k=10, pause=FALSE)
plot(x=results,k=10,pause=FALSE)
options(width=100)
S <- summary(object = results, k = 10, pause = FALSE)
NetMatrix <- biblioNetwork(M1, analysis = "co-occurrences", network = "author_keywords", sep = ";")
S <- normalizeSimilarity(NetMatrix, type = "association")
net <- networkPlot(S, n = 200, Title = "co-occurrence network",type="fruchterman", labelsize = 0.7, halo = FALSE, cluster = "walktrap",remove.isolates=FALSE, remove.multiple=FALSE, noloops=TRUE, weighted=TRUE)
res <- thematicMap(net, NetMatrix, S)
plot(res$map)
But in the net <- networkPlot(S, n = 200, Title = "co-occurrence network",type="fruchterman", labelsize = 0.7, halo = FALSE, cluster = "walktrap",remove.isolates=FALSE, remove.multiple=FALSE, noloops=TRUE, weighted=TRUE), it shows error
Error in V<-(*tmp*, value = *vtmp*) : invalid indexing
. Also I cannot do the CR, it always shows unlistCR. I cannot use the NetMatrix function neither.
Some help me plsssssssss
The problem is in the data itself not in the code you presented. When I downloaded the data from bibliometrix.com and changed M1 to M (typo?) in biblioNetwork function call everything worked perfectly. Please see the code below:
library(bibliometrix)
# Plot bibliometric analysis results
D <- readFiles("http://www.bibliometrix.org/datasets/savedrecs.txt")
M <- convert2df(D, dbsource = "isi", format= "plaintext")
results <- biblioAnalysis(M, sep = ";")
S <- summary(results)
plot(x = results, k = 10, pause = FALSE)
# Plot Bibliographic Network
options(width = 100)
S <- summary(object = results, k = 10, pause = FALSE)
NetMatrix <- biblioNetwork(M, analysis = "co-occurrences", network = "author_keywords", sep = ";")
S <- normalizeSimilarity(NetMatrix, type = "association")
net <- networkPlot(S, n = 200, Title = "co-occurrence network", type = "fruchterman",
labelsize = 0.7, halo = FALSE, cluster = "walktrap",
remove.isolates = FALSE, remove.multiple = FALSE, noloops = TRUE, weighted = TRUE)
# Plot Thematic Map
res <- thematicMap(net, NetMatrix, S)
str(M)
plot(res$map)

twitteR how to search for two hashtags

enter code hereIs it possible to look for two different hastags in one searchTwitter command?
Example
my_h <- as.POSIXlt(Sys.time())
my_h <- strptime(my_h, format = "%Y-%m-%d %H:%M:%S", tz = "CET")
hrs <- function(u) {
x <- u * 3600
return(x)
}
my_h <- my_h - hrs(24)
my_h <- data.frame(day = strptime(my_h, "%Y-%m-%d", tz = ""))
I want to look for hastags #dn and #park
I can do it separately as below
tweets<-twListToDF(searchTwitter("#dn", n=5000, since = as.character(my_h$day)))
write.table(tweets, "all_dn_tweets.csv", row.names = F, append = T, sep = ";", col.names = F)
tweets<-twListToDF(searchTwitter("#park", n=5000, since = as.character(my_h$day)))
write.table(tweets, "all_park_tweets.csv", row.names = F, append = T, sep = ";", col.names = F)
The question is if these two can be squeezed into one?
Try something like this
hashtags <- c("#metallica", "#slayer")
needle <- paste(hashtags, collapse = " OR ")
tweets <- searchTwitter(needle, n = 10)
df <- twListToDF(tweets)
for (hashtag in hashtags) {
write.csv(df[grep(hashtag, tolower(df$text), fixed = TRUE), ], paste0(hashtag, ".csv"))
}
Using tolower on tweets may need some error handling - you'll find plenty of infos on that on the web.
I found the answer posted here more elegant. For the present case it basically is
hashtags <- '#metallica + #slayer'
tweets <- searchTwitter(hashtags, n = 10, lang = 'en', retryOnRateLimit = 100)
tweetsDF <- twListToDF(tweets)

Knitr - Error in usemethod("round_any"): no applicable method for round_any applied

I'm trying to output some of my code results in knitr. Now the strange thing is, the code generates the error in the title. But running round_any() seperately and outputting it in knitr is fine.
knitr code
```{r, echo = FALSE, message=FALSE, warning=FALSE}
source("BooliQuery.R")
BooliQuery()
```
My code
library(digest)
library(stringi)
library(jsonlite)
library(plyr)
BooliQuery <- function(area = "stockholm", type="lägenhet", sincesold = "", FUN = "", limit = 250, offset = 0, mode = 1) {
#raw data fetch + adjust.
lOriginal <- GETAPI(area, type, sincesold, FUN, limit, offset)
lOriginal$AreaSize <- round_any(lOriginal$livingArea, 10, floor)
lOriginal$PriceDiff <- lOriginal$soldPrice - lOriginal$listPrice
#Create frame overview
Overview.Return <- Frame.Overview(lOriginal)
#Mode - return selector
ifelse( mode == 1, return (Overview.Return), return (lOriginal) )
}
Frame.Overview <- function(lOriginal) {
#Aggregate mean
listPrice <- aggregate(lOriginal, list(lOriginal$AreaSize), FUN = mean, na.rm = TRUE)
colnames(listPrice)[1] <- "SegGroup"
listPrice <- listPrice[, c("SegGroup", "listPrice", "soldPrice", "PriceDiff", "rent", "livingArea", "constructionYear") ]
#Perform Rounding
listPrice[, c(2:5)] <- round(listPrice[,c(2:5)], digits = 0)
listPrice[, 6] <- round(listPrice[, 6], digits = 1)
listPrice[, 7] <- signif(listPrice[,7], digits = 4)
return(listPrice)
}
GETAPI <- function(area = "stockholm", type="lägenhet", sincesold = "", FUN = "", limit = 250, offset = 0) {
#ID Info
key <- "PRIVATE KEY"
caller.ID <- "USERNAME"
#//
unix.timestamp <- as.integer( as.POSIXct(Sys.time()) )
random.string <- stri_rand_strings( n = 1, length = 16)
#Sha1-Hash: CallerID + time + key + unique, 40-char hexadecimal
hash.string <- paste0(caller.ID, unix.timestamp, key, random.string)
hash.sha1 <- digest(hash.string,"sha1",serialize=FALSE)
#Create URL
api.string <- "https://api.booli.se/sold?q="
url.string <- paste0(api.string, area, "&objectType=" , type , "&minSoldDate=", sincesold, FUN, "&limit=", limit, "&offset=", offset,"&callerId=", caller.ID, "&time=" ,
unix.timestamp, "&unique=", random.string, "&hash=", hash.sha1)
#Parse JSON
parsed.JSON <- fromJSON(txt = url.string)
return(parsed.JSON$sold)
}
Running the code seperately in console is fine. So what could be wrong?

Resources