How to implement async calls in R HTTPUV startServer? - r

The R httpuv startServer function should support async processing in the call portion of the app parameter but I'm not able to get it to work. Does anyone know how to do this? The example below won't work but it shows the idea of what I'm trying to do, run each request (or for a specific page) async so a page can load while another request is processing.
startServer(
host,
port,
app = list(
call = function(req) {
req <- list(
"REQUEST_METHOD" = req$REQUEST_METHOD,
"SCRIPT_NAME" = req$SCRIPT_NAME,
"PATH_INFO" = req$PATH_INFO,
"QUERY_STRING" = req$QUERY_STRING,
"SERVER_NAME" = req$SERVER_NAME,
"SERVER_PORT" = req$SERVER_PORT,
"HEADERS" = req$HEADERS,
"rook.input" = req[["rook.input"]]$read_lines()
)
future_promise({
if(req$PATH_INFO %in% valid_dynamic_paths){
x <- eval(dynamic[[req$PATH_INFO]][req$REQUEST_METHOD])
list(
status = x[["status"]],
headers = x[["headers"]],
body = x[["body"]]
)
}else{
list(
status = 404,
headers = list(
'Content-Type' = 'text/html'
),
body = "404. Page not found."
)
}
})
},
staticPaths = static
)
)

I was able to get something similar to work. The code below shows the gist of it:
# fork a process for each new request
future::plan(future::multicore)
httpuv::runServer("0.0.0.0", 8080, list(
call = function(req) {
# `as.promise` is necessary, because `httpuv` is using `is.promise`
# under the hood to act differently. Unfortunately `is.promise` returns
# `FALSE` for a `future`.
promises::as.promise(
future::future({
Sys.sleep(5)
# Respond with HTTP 200 OK
list(
status = 200,
body = "Slept for 5 seconds",
headers = list(
# Content-Type is important, otherwise you will run
# into a "not compatible with STRSXP" error.
"content-type" = "text/plain"
)
)
})
)
}
))
Calling the server with to requests at (nearly) the same time, will show that you are waiting only for 5 seconds for both requests, and not 5 for one and 10 for the other.
time curl -s localhost:8080 > /dev/null &
time curl -s localhost:8080 > /dev/null
# After 5 seconds you should see output similar to the following:
# real 0m5.089s
# user 0m0.011s
# sys 0m0.010s
# real 0m5.112s
# user 0m0.020s
# sys 0m0.024s

Related

R - mocking API requests with `gh` package

I am trying to mock the output of a gh API request:
httptest2::with_mock_dir("gh", {
test_that("api works", {
gh::gh("GET /repos/r-lib/gh")
})
})
I am trying to set up testing for custom functions that routinely make API calls to GitHub and I am using gh to make these requests. I am following this tutorial as guidance: https://books.ropensci.org/http-testing/
However, no directory is created when this function is run. Is there anyway to capture the output of gh::gh and store it as a mock API return so that I can run my tests without needing GitHub authentication or even an internet connection?
httptest2 is specifically designed to test httr2 requests:
This package helps with writing tests for packages that use httr2
Unfortunately, gh uses httr:
Imports:
cli (>= 3.0.1),
gitcreds,
httr (>= 1.2),
ini,
jsonlite
This means that you can't directly use httptest2 with gh.
However, using gh source code, you can extract the parameters of the GET request sent to httr by gh:
gh_get <- function(endpoint, ..., per_page = NULL, .token = NULL, .destfile = NULL,
.overwrite = FALSE, .api_url = NULL, .method = "GET",
.limit = NULL, .accept = "application/vnd.github.v3+json",
.send_headers = NULL, .progress = TRUE, .params = list()) {
params <- c(list(...), .params)
params <- gh:::drop_named_nulls(params)
if (is.null(per_page)) {
if (!is.null(.limit)) {
per_page <- max(min(.limit, 100), 1)
}
}
if (!is.null(per_page)) {
params <- c(params, list(per_page = per_page))
}
req <- gh:::gh_build_request(
endpoint = endpoint, params = params,
token = .token, destfile = .destfile,
overwrite = .overwrite, accept = .accept,
send_headers = .send_headers,
api_url = .api_url, method = .method
)
req
}
req <- gh_get("GET /repos/r-lib/gh")
req
#$method
#[1] "GET"
#$url
#[1] "https://api.github.com/repos/r-lib/gh"
#$headers
# User-Agent Accept
# "https://github.com/r-lib/gh" "application/vnd.github.v3+json"
#$query
#NULL
#$body
#NULL
#$dest
#<request>
#Output: write_memory
This allows with the example you provided to use httr2 to send the same request :
library(httr2)
resp_httr2 <- request(base_url=req$url) %>%
req_perform() %>%
resp_body_json()
If you are mainly interested in json content, the results are the same, only the attributes differ :
resp_gh <- gh::gh("GET /repos/r-lib/gh")
all.equal(resp_gh,resp_httr2,check.attributes=FALSE)
#[1] TRUE
If you want to use httptest2, switching to httr2 would work:
with_mock_dir("gh", {
test_that("api works", {
resp <- request(base_url=req$url) %>%
req_perform() %>%
resp_body_json()
expect_equal(resp$full_name,"r-lib/gh")})
})
#Test passed 🎉
#[1] TRUE
Offline testing now works because gh\api.github.com directory was created by httptest2.
Maybe you can take inspiration from tests/testthat/test-mock-repos.R
res <- gh(
TMPL("/repos/{owner}/{repo}"),
owner = "gh-testing",
repo = test_repo,
.token = tt()
)
expect_equal(res$name, test_repo)
expect_equal(res$description, "Test repo for gh")
expect_equal(res$homepage, "https://github.com/r-lib/gh")
expect_false(res$private)
expect_false(res$has_issues)
expect_false(res$has_wiki)
A GET method would not create any directory.

Add logs in a R Plumber Api

I'm creating some APIs with R and Plumber. I configure the entrypoint.R like that
library(plumber)
library(logger)
# Create Log directory
log_dir <- "/api/logs"
if (!fs::dir_exists(log_dir)) fs::dir_create(log_dir)
log_appender(appender_tee(tempfile("plumber_", log_dir, ".log")))
convert_empty <- function(string) {
if (string == "") {
"-"
} else {
string
}
}
# Initiate
# pr <- plumber::plumb(here::here('plumber.R')) # local test
pr <- plumber::plumb(rev(commandArgs())[1]) # via Docker
args <- list(host = '0.0.0.0', port = 8000)
if (packageVersion('plumber') >= '1.0.0') { pr$setDocs(TRUE) } else { args$swagger <- TRUE }
# Create log hook
pr$registerHooks(
list(
preroute = function() {
# Start timer for log info
tictoc::tic()
},
postroute = function(req, res) {
end <- tictoc::toc(quiet = TRUE)
# Log details about the request and the response
# log_info('{convert_empty(req$REMOTE_ADDR)} "{convert_empty(req$HTTP_USER_AGENT)}" {convert_empty(req$HTTP_HOST)} {convert_empty(req$REQUEST_METHOD)} {convert_empty(req$PATH_INFO)} {convert_empty(res$status)} {round(end$toc - end$tic, digits = getOption("digits", 5))}')
log_info('{convert_empty(req$REMOTE_ADDR)}')
}
)
)
# Run entrypoint
do.call(pr$run, args)
I can't have the full logs because this line
log_info('{convert_empty(req$REMOTE_ADDR)} "{convert_empty(req$HTTP_USER_AGENT)}" {convert_empty(req$HTTP_HOST)} {convert_empty(req$REQUEST_METHOD)} {convert_empty(req$PATH_INFO)} {convert_empty(res$status)} {round(end$toc - end$tic, digits = getOption("digits", 5))}')
creates an error and the application stops to work.
Also, I want to add my custom logs in the API to track the flow for debug purposes. For example in the api
#* Ping
#* #get /ping
function() {
rtn <- jsonlite::unbox(data.frame(Status = TRUE))
return(rtn)
}
I want to add my log so I can check what functions are calling and what is happening in them.
I found a library called Log4R that allows me to add logs from my functions and save them on a file. Same examples here.
Install the package
install.packages("log4r")
Use the package
logger <- logger()
info(logger, "Located nearest gas station.")
#> INFO [2019-09-04 16:31:04] Located nearest gas station.
warn(logger, "Ez-Gas sensor network is not available.")
#> WARN [2019-09-04 16:31:04] Ez-Gas sensor network is not available.
debug(logger, "Debug messages are suppressed by default.")

How to improve formatting of slack messages using slackr?

I'm using slackr to send alert messages to a Slack channel. It works great except the message format is not great and I want to improve it.
install_github("hrbrmstr/slackr")
library(slackr)
slackr_setup(channel="#alerts", username="Mark Davis",
incoming_webhook_url = "https://hooks.slack.com/services/T31P8UDAB/BCH4HKQSC/*********",
api_token = "*********", echo = F)
alert="On Monday, 2018-09-03 # 2pm Pacific..."
slackr(alert)
Here is an example of how a message from slackr looks in Slack:
Here is an example of how I'd like it to look:
slackr doesn't seem to have many options in the way of formatting. I was thinking of building an image and inserting that, but I'm having trouble building an image out of a text file using R.
Perhaps there is another api I could call that could take my text and format it for slack?
I'm open to any suggestions.
Addendum:
Slackr has an option to upload files, so my latest attempt is to create an image from the text message and upload that object.
I am able to create a png file from the text message using the magick library. I created an image with a colored background, and I simply add the message text to the image:
library(magick)
alert_picture <- image_read('alert_480x150_dark_red.png')
alert_picture=image_annotate(alert_picture, DreamCloud_Alert, size = 20, gravity = "southwest",
color = "white", location = "+10+10")
image_write(alert_picture, path = "alert_picture.png", format = "png")
The image looks pretty good (although there doesn't seem to be an easy way to bold or underline specific words in the message), but the obstacle now is that I can't get the upload command to work.
slackr_upload(filename = "alert_picture.png")
I don't get any error messages but nothing is uploaded to slack.
I got around this issue by using the httr package to execute the post image function to slack.
Thanks to Adil B. for providing the solution:
Post Image to Slack Using HTTR package in R
I am not sure this is what you meant, but I solved allowing formatting like in a regular slack message by altering the slackr_bot() function and just removing the 2 sets of 3 back-ticks at the end of the code where it says text. Then just call it slackr_bot1() or something, and then you can post formatted messages. This is the function after the back-ticks removal:
slackr_bot1 <- function(...,
channel=Sys.getenv("SLACK_CHANNEL"),
username=Sys.getenv("SLACK_USERNAME"),
icon_emoji=Sys.getenv("SLACK_ICON_EMOJI"),
incoming_webhook_url=Sys.getenv("SLACK_INCOMING_URL_PREFIX")) {
if (incoming_webhook_url == "") {
stop("No incoming webhook URL specified. Did you forget to call slackr_setup()?", call. = FALSE)
}
if (icon_emoji != "") { icon_emoji <- sprintf(', "icon_emoji": "%s"', icon_emoji) }
resp_ret <- ""
if (!missing(...)) {
# mimics capture.output
# get the arglist
args <- substitute(list(...))[-1L]
# setup in-memory sink
rval <- NULL
fil <- textConnection("rval", "w", local = TRUE)
sink(fil)
on.exit({
sink()
close(fil)
})
# where we'll need to eval expressions
pf <- parent.frame()
# how we'll eval expressions
evalVis <- function(expr) withVisible(eval(expr, pf))
# for each expression
for (i in seq_along(args)) {
expr <- args[[i]]
# do something, note all the newlines...Slack ``` needs them
tmp <- switch(mode(expr),
# if it's actually an expresison, iterate over it
expression = {
cat(sprintf("> %s\n", deparse(expr)))
lapply(expr, evalVis)
},
# if it's a call or a name, eval, printing run output as if in console
call = ,
name = {
cat(sprintf("> %s\n", deparse(expr)))
list(evalVis(expr))
},
# if pretty much anything else (i.e. a bare value) just output it
integer = ,
double = ,
complex = ,
raw = ,
logical = ,
numeric = cat(sprintf("%s\n\n", as.character(expr))),
character = cat(sprintf("%s\n\n", expr)),
stop("mode of argument not handled at present by slackr"))
for (item in tmp) if (item$visible) { print(item$value, quote = FALSE); cat("\n") }
}
on.exit()
sink()
close(fil)
# combined all of them (rval is a character vector)
output <- paste0(rval, collapse="\n")
loc <- Sys.getlocale('LC_CTYPE')
Sys.setlocale('LC_CTYPE','C')
on.exit(Sys.setlocale("LC_CTYPE", loc))
resp <- POST(url = incoming_webhook_url, encode = "form",
add_headers(`Content-Type` = "application/x-www-form-urlencoded",
Accept = "*/*"), body = URLencode(sprintf("payload={\"channel\": \"%s\", \"username\": \"%s\", \"text\": \"%s\"%s}",
channel, username, output, icon_emoji)))
warn_for_status(resp)
}
return(invisible())
}
slackr_bot1("*test* on time")

Retain information about requested URL when using curl::curl_fetch_multi

I'm using the following code to perform multiple simultaneous requests.
urls <- c("https://httpbin.org/status/301", "https://httpbin.org/status/302", "https://httpbin.org/status/200")
result <- list()
p <- curl::new_pool(total_con = 10, host_con = 5, multiplex = T)
cb <- function(res) {
result <<- append(result, list(res))
cat("requested URL: ", url, "last URL: ", res$url, "\n\n")
}
for (url in urls) {
curl::curl_fetch_multi(url, done = cb, handle = curl::new_handle(failonerror = F, nobody = F, followlocation = T, ssl_verifypeer = 0), pool = p)
}
curl::multi_run(pool = p)
As you can see, I would like to print to the console the requested URL and the URL, that finally answered with 200 ok.
The following is printed to the console:
requested URL: https://httpbin.org/status/200 last URL: https://httpbin.org/status/200
requested URL: https://httpbin.org/status/200 last URL: https://httpbin.org/get
requested URL: https://httpbin.org/status/200 last URL: https://httpbin.org/get
The requested URL in the console output is always https://httpbin.org/status/200, because it's the last URL that used in the for-loop. So, that is the wrong way to do it.
How can I retain information about the initial requested URL when using curl_fetch_multi to use it after multi_run returned? That means it would be ideal if the requested URL would be added to the res-list to query it with something like cat("requested URL: ", res$requested_url, "last URL: ", res$url, "\n\n").
I had a similar issue where I wanted to do asynchronous POST requests using curl_fetch_multi and check which requests succeeded and which failed. However, due to the structure of the POST statement (all fields are in the request body) there is no identifying information whatsoever in the response object. My solution was to generate custom callback functions which carried an identifier.
urls <- c("https://httpbin.org/status/301", "https://httpbin.org/status/302", "https://httpbin.org/status/200")
result <- list()
# create an identifier for each url
url.ids = paste0("request_", seq_along(urls))
# custom callback function generator: generate a unique function for each url
cb = function(id){
function(res){
result[[id]] <<- res
}
}
# create the list of callback functions
cbfuns = lapply(url.ids, cb)
p <- curl::new_pool(total_con = 10, host_con = 5, multiplex = T)
for (i in seq_along(urls)) {
curl::curl_fetch_multi(urls[i], done = cbfuns[[i]], handle = curl::new_handle(failonerror = F, nobody = F, followlocation = T, ssl_verifypeer = 0), pool = p)
}
curl::multi_run(pool = p)
In this example, the custom callback functions are simply used to name the elements of result:
names(result)
## [1] "request_3" "request_1" "request_2"
which can then be used to tie each response back to the original request.

make concurrent RCurl GET requests for set of URLs

I wrote a function to use RCurl to obtain the effective URL for a list of shortened URL redirects (bit.ly, t.co, etc.) and handle errors when the effective URL locates a document (PDFs tend to throw "Error in curlPerform... embedded nul in string.")
I would like to make this function more efficiently if possible (while keeping it in R). As written the run-time is prohibitively long for un-shortening a thousand or more URLs.
?getURI tells us that by default, getURI/getURL goes asynchronous when the length of the url vector is >1. But my performance seems totally linear, presumably because sapply turns the thing into one big for loop and the concurrency is lost.
Is there anyway I can speed up these requests? Extra credit for fixing the "embedded nul" issue.
require(RCurl)
options(RCurlOptions = list(verbose = F, followlocation = T,
timeout = 500, autoreferer = T, nosignal = T,
useragent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2)"))
# find successful location (or error msg) after any redirects
getEffectiveUrl <- function(url){
c = getCurlHandle()
h = basicHeaderGatherer()
curlSetOpt( .opts = list(header=T, verbose=F), curl= c, .encoding = "CE_LATIN1")
possibleError <- tryCatch(getURI( url, curl=c, followlocation=T,
headerfunction = h$update, async=T),
error=function(e) e)
if(inherits(possibleError, "error")){
effectiveUrl <- "ERROR_IN_PAGE" # fails on linked documents (PDFs etc.)
} else {
headers <- h$value()
names(headers) <- tolower(names(headers)) #sometimes cases change on header names?
statusPrefix <- substr(headers[["status"]],1,1) #1st digit of http status
if(statusPrefix=="2"){ # status = success
effectiveUrl <- getCurlInfo(c)[["effective.url"]]
} else{ effectiveUrl <- paste(headers[["status"]] ,headers[["statusmessage"]]) }
}
effectiveUrl
}
testUrls <- c("http://t.co/eivRJJaV4j","http://t.co/eFfVESXE2j","http://t.co/dLI6Q0EMb0",
"http://www.google.com","http://1.uni.vi/01mvL","http://t.co/05Mz00DHLD",
"http://t.co/30aM6L4FhH","http://www.amazon.com","http://bit.ly/1fwWZLK",
"http://t.co/cHglxQkz6Z") # 10th URL redirects to content w/ embedded nul
system.time(
effectiveUrls <- sapply(X= testUrls, FUN=getEffectiveUrl, USE.NAMES=F)
) # takes 7-10 secs on my laptop
# does Vectorize help?
vGetEffectiveUrl <- Vectorize(getEffectiveUrl, vectorize.args = "url")
system.time(
effectiveUrls2 <- vGetEffectiveUrl(testUrls)
) # nope, makes it worse
I had bad experience with RCurl and Async request. R would completely freeze (though no error message, CPU and RAM did not spike) with only concurrent 20 requests.
I recommend switching to CURL and using curl_fetch_multi() function. It my case it could easily handle 50000 JSON request in one pool (with some division into subpools under the hood).
https://cran.r-project.org/web/packages/curl/vignettes/intro.html#async_requests

Resources