Keep all variables after every iteration - r

Using the following snippet it is possible to iterate two api call in json format
At the end of the loop there is a command with transforms json to data frame
How is it possible to keep in every iteration all these variables and fill NA if a value in the specific variable does not exist in a specific iteration?
library(jsonlite)
library(httpuv)
library(httr)
myapp <- oauth_app(appname = "insert_your_credentials",
key = "insert_your_credentials",
secret = "insert_your_credentials")
github_token <- oauth2.0_token(oauth_endpoints("github"), myapp)
gtoken <- config(token = github_token)
df <- data.frame(link = c("https://api.github.com/search/commits?q=%22image%22+AND+%22covid%22?page=1&per_page=100", "https://api.github.com/search/commits?q=%22image%22+AND+%22covid%22?page=2&per_page=100"))
for (i in 1:nrow(df)) {
req <- GET(df$link[i])
# Extract content from a request
json1 = content(req)
# Convert to a data.frame
char <- rawToChar(req$content)
dfcollection <- jsonlite::fromJSON(char)
}

It may be wrapped with tryCatch or possibly from purrr. Below code uses possibly
library(purrr)
library(jsonlite)
convertToJSON <- function(x)
{
req <- GET(x)
# Extract content from a request
json1 = content(req)
# Convert to a data.frame
char <- rawToChar(req$content)
dfcollection <- jsonlite::fromJSON(char)
return(dfcollection)
}
pconvertToJSON <- possibly(convertToJSON, otherwise = NA)
out <- map(df$link, pconvertToJSON)

Related

How can I build this JSON file in R?

Working in R, I am having difficulty building a JSON file that I would use in an API call.
The required format for the JSON file can be seen here:
https://developer.trimblemaps.com/restful-apis/routing/route-reports/post-route-reports/
The input to the exercise is a dataframe like so:
Shipper_Latitude <- c(1,2,3,4)
Shipper_Longitude <- c(1,2,3,4)
r_combine.NewShipperLat <- c(1,2,3,4)
r_combine.NewShipperLon <- c(1,2,3,4)
r4 <- data.frame(Shipper_Latitude,Shipper_Longitude,r_combine.NewShipperLat,r_combine.NewShipperLon)
My attempt at building the required JSON file is as follows:
# assemble lat and long for starting location:
tempfuna <- function(Lat,Lon) {list(Coords = list(Lat = Lat,Lon = Lon))}
df_jsona <- mapply(FUN = tempfuna,Lat = r4$Shipper_Latitude, Lon = r4$Shipper_Longitude)
df_jsona <- lapply(df_jsona, function(x) {list(Coords = x)})
# assemble lat and long for ending location:
tempfunb <- function(Lat,Lon) {list(Coords = list(Lat = Lat,Lon = Lon))}
df_jsonb <- mapply(FUN = tempfunb,Lat = r4$r_combine.NewShipperLat, Lon = r4$r_combine.NewShipperLon)
df_jsonb <- lapply(df_jsonb, function(x) {list(Coords = x)})
# assemble list of ReportRoutes:
tempfunc <- function(A,B) {list(ReportRoutes = list(Stops = list(A,B)))}
df_jsonc <- mapply(FUN = tempfunc,A = df_jsona, B = df_jsonb)
# create final list:
post_body <- list(ReportRoutes = df_jsonc)
I get an error when I use the resulting file in an API call.
I think the problem is that the list items in the ReportRoutes list are incorrectly named. For example, the first item is named “Coords.ReportRoutes” instead of [[1]]].
How can I rework the above to produce the required JSON file?

How to write multiple .csv files based on responses from API that are date dependent

I am trying to create csv files that correspond to the response received from an API on a given date.
Instead of having to edit my code each time I want a new date, it seems logical to create a loop. I created a vector called "date", and tried to run the following code.
library(httr)
library(jsonlite)
date=c("201801","201802","201803","201804","201805")
for(i in 1:5){
url="https://website.com/api/data"
body=list(fields=list("symbol", "letter", "number"),
history=date[i])
response=POST(url, body=body, encode="json")
data=content(response)$data #data is a portion of the response#
write.csv(data[[i]], paste(i, ".csv"))
}
Note that if I eliminate the for loop and just use an element from the date vector, I get the output desired for one date
url="https://website.com/api/data"
body=list(fields=list("symbol", "letter", "number"),
history=date[2])
response=POST(url, body=body, encode="json")
data=content(response)$data #data is a portion of the response#
write.csv(data, '2.csv')
Using the for loop creates an empty response. Any ideas as to where I'm going wrong?
As mentioned, with iterations such as for or lapply running potentially open-ended processes, consider having the loop wait for process to complete using Sys.sleep(##). Also, use seq_along to avoid hard-coded count of the dates vector:
dates = c("201801","201802","201803","201804","201805")
for(i in seq_along(dates)){
url = "https://website.com/api/data"
body = list(fields = list("symbol", "letter", "number"),
history = dates[i])
response = POST(url, body = body, encode = "json")
data = content(response)$data # data is a portion of the response
Sys.sleep(5) # ADJUST SECONDS AS NEEDED
write.csv(data, paste0(dates[i], ".csv"))
}
Alternatively, use lapply and even return the data after writing each csv into a list for use later:
data_list <- lapply(seq_along(dates), function(i) {
url = "https://website.com/api/data"
body = list(fields = list("symbol", "letter", "number"),
history = dates[i])
response = POST(url, body = body, encode = "json")
data = content(response)$data # data is a portion of the response
Sys.sleep(5) # ADJUST SECONDS AS NEEDED
write.csv(data, paste0(dates[i], ".csv"))
return(data)
})
You can make this a bit more readable like this:
library(httr)
library(jsonlite)
library(tidyverse)
date <- c("201801","201802","201803","201804","201805")
url <- "https://website.com/api/data"
# Define function
write_files <- function(date, i) {
body <- list(fields = list("symbol", "letter", "number"),
history = date)
response <- POST(url, body = body, encode = "json")
data <- content(response)$data
write.csv(data, file = i)
}
# Apply function to each element of date
imap(date, write_files)
(your example was not reproducible so this is untested)

R Function with for Loops creates several dataframes, how to have each one have a different name

I have a for loop that loops through a list of urls,
url_list <- c('http://www.irs.gov/pub/irs-soi/04in21id.xls',
'http://www.irs.gov/pub/irs-soi/05in21id.xls',
'http://www.irs.gov/pub/irs-soi/06in21id.xls',
'http://www.irs.gov/pub/irs-soi/07in21id.xls',
'http://www.irs.gov/pub/irs-soi/08in21id.xls',
'http://www.irs.gov/pub/irs-soi/09in21id.xls',
'http://www.irs.gov/pub/irs-soi/10in21id.xls',
'http://www.irs.gov/pub/irs-soi/11in21id.xls',
'http://www.irs.gov/pub/irs-soi/12in21id.xls',
'http://www.irs.gov/pub/irs-soi/13in21id.xls',
'http://www.irs.gov/pub/irs-soi/14in21id.xls',
'http://www.irs.gov/pub/irs-soi/15in21id.xls')
dowloads an excel file from each one assigns it to a dataframe and performs a set of data cleaning operations on it.
library(gdata)
for (url in url_list){
test <- read.xls(url)
cols <- c(1,4:5,97:98)
test <- test[-(1:8),cols]
test <- test[1:22,]
test <- test[-4,]
test$Income <-test$Table.2.1...Returns.with.Itemized.Deductions..Sources.of.Income..Adjustments..Itemized.Deductions.by.Type..Exemptions..and.Tax..Items..by.Size.of.Adjusted.Gross.Income..Tax.Year.2015..Filing.Year.2016.
test$Total_returns <- test$X.2
test$return_dollars <- test$X.3
test$charitable_deductions <- test$X.95
test$charitable_deduction_dollars <- test$X.96
test[1:5] <- NULL
}
My problem is that the loop simply writes over the same dataframe for each iteration through the loop. How can I have it assign each iteration through the loop to a data frame with a different name?
Use assign. This question is a duplicate of this post: Change variable name in for loop using R
For your particular case, you can do something like the following:
for (i in 1:length(url_list)){
url = url_list[i]
test <- read.xls(url)
cols <- c(1,4:5,97:98)
test <- test[-(1:8),cols]
test <- test[1:22,]
test <- test[-4,]
test$Income <-test$Table.2.1...Returns.with.Itemized.Deductions..Sources.of.Income..Adjustments..Itemized.Deductions.by.Type..Exemptions..and.Tax..Items..by.Size.of.Adjusted.Gross.Income..Tax.Year.2015..Filing.Year.2016.
test$Total_returns <- test$X.2
test$return_dollars <- test$X.3
test$charitable_deductions <- test$X.95
test$charitable_deduction_dollars <- test$X.96
test[1:5] <- NULL
assign(paste("test", i, sep=""), test)
}
You could write to a list:
result_list <- list()
for (i_url in 1:length(url_list)){
url <- url_list[i_url]
...
result_list[[i_url]] <- test
}
You can also name the list
names(result_list) <- c("df1","df2","df3",...)
Here's another approach with lapply instead of for loops which will write all resulting data.frames as separate list items which can then be re-named (if needed).
url_list <- c('http://www.irs.gov/pub/irs-soi/04in21id.xls',
...
'http://www.irs.gov/pub/irs-soi/15in21id.xls')
readURLFunc <- function(z){
test <- readxl::read_xls(z)
...
test[1:5] <- NULL
return(test)}
data_list <- lapply(url_list, readURLFunc)

How do I fix the “No encoding Supplied” error?

I am facing difficulties after running the code and trying to export the dataset to a spreadsheet or txt.file.
I am newbie to R, so maybe this question is trivial.
After running the following code:
eia_series <- function(api_key, series_id, start = NULL, end = NULL, num = NULL, tidy_data = "no", only_data = FALSE){
# max 100 series
# test if num is not null and either start or end is nut null. Not allowed
# api_key test for character.
# series_id test for character.
# if start/end not null, then check if format matches series id date format
# parse date and numerical data
# parse url
series_url <- httr::parse_url("http://api.eia.gov/series/")
series_url$query$series_id <- paste(series_id, collapse = ";")
series_url$query$api_key <- api_key
series_url$query$start <- start
series_url$query$end <- end
series_url$query$num <- num
# get data
series_data <- httr::GET(url = series_url)
series_data <- httr::content(series_data, as = "text")
series_data <- jsonlite::fromJSON(series_data)
# Move data from data.frame with nested list and NULL excisting
series_data$data <- series_data$series$data
series_data$series$data <- NULL
# parse data
series_data$data <- lapply(X = series_data$data,
FUN = function(x) data.frame(date = x[, 1],
value = as.numeric(x[, 2]),
stringsAsFactors = FALSE))
# add names to the list with data
names(series_data$data) <- series_data$data
# parse dates
series_data$data <- eia_date_parse(series_list = series_data$data, format_character = series_data$series$f)
# tidy up data
if(tidy_data == "tidy_long"){
series_data$data <- lapply(seq_along(series_data$data),
function(x) {cbind(series_data$data[[x]],
series_time_frame = series_data$series$f[x],
series_name = series_data$series$series_id[x],
stringsAsFactors = FALSE)})
series_data$data <- do.call(rbind, series_data$data)
}
# only data
if(only_data){
series_data <- series_data$data
}
return(series_data)
}
After running the function
eia_series(api_key = "XXX",series_id = c("PET.MCRFPOK1.M", "PET.MCRFPOK2.M"))
I tried to "transfer" the data in order to export it but got the following error:
No encoding supplied: defaulting to UTF-8.
I don't understand why. Could you help me out?
That doesn't look like an error, rather a statement. Probably coming from httr::content(series_data, as = "text"). Look in https://cran.r-project.org/web/packages/httr/vignettes/quickstart.html in The body section. It shouldn't be a problem, as long as your data returns what you expect. Otherwise you can try different encoding or there is a bug elsewhere.
Try:
series_data <- httr::content(series_data, as = "text", encoding = "UTF-8")

Using httr package, setting a header where the header name is a variable

I want to set a header in an request using the R httr package and set a header, when I have the name of the header in a variable.
I would like to do something like this:
tokenName = 'X-Auth-Token'
get_credentials_test <- function (token) {
url <- paste(baseUrl,"/api/usercredentials", sep = '')
r <- GET(url, add_headers(tokenName = token))
r
}
however, the above code seems to set a header with the name tokenName.
It does work if I do the following:
get_credentials_test <- function (token) {
url <- paste(baseUrl,"/api/usercredentials", sep = '')
r <- GET(url, add_headers('X-Auth-Token' = token))
r
}
but I want to have some flexibility if the name of the header changes and the requirement to add the header is sprinkled liberally around the code. I am not sure if it is possible to add a header contained with a variable but that is what I would like to do.
You could create the headers as a named vector, and then pass it as the .headers argument:
h <- c(token)
names(h) <- tokenName
r <- GET(url, add_headers(.headers = h))
While this works because add_headers takes a .headers argument (see here), a more general alternative for calling a function with arbitrary argument names is do.call:
h <- list(token)
names(h) <- tokenName
r <- GET(url, do.call(add_headers, h))
It's easy with structure():
get_creds <- function(base.url, path, header.name, token) {
url <- paste0(base.url, path)
header <- structure(token, names = header.name)
r <- httr::GET(url, httr::add_headers(header))
r
}

Resources