I'm trying to create an API connection with Postman, but i need to authenticate myself using Type = API Key. these are my credentials
My problem is because i don't know hot to add that cretentials to my Rscript to be able to access, this is my current code
library(httr)
library(tidyverse)
library(plyr)
# Settings
.proxy <- list(url = "gbiss-l-ss31.int.dir.witowa.com",
user = "svc-g-gad",
pwd = "5vcGBgaGaSrf",
port = 8090,
Header = list(Key <- 'X-EDS-USER',
Value <- 'B6E6685F-DB0C-438A-983F')
)
format_url_data <- function(x){
raw <- httr::GET(url = x,
httr::use_proxy(
url = .proxy$url,
port = .proxy$port,
username = .proxy$user,
password = .proxy$pwd
)
)
raw <- intToUtf8(raw$content)
jsonlite::fromJSON(raw)
}
############################ FOR FF (Don't change anything) ##################################################################
#Define Url
# Define Url
basehttr <- 'https://iat.eds.gateway-api.willistowerswatson.com/Clients/Search?query='
endhttr <- 'APPLE'
endhttr_backup <- endhttr
endhttr <- URLencode(endhttr)
url <- glue::glue('{basehttr}{endhttr}')
# Get the information and convert to df
dt <- tryCatch( {
dt <- purrr::map(url, ~format_url_data(.))},
error = function( error_condition ) {
basehttr <- "https://qa.eds.gateway-api.willistowerswatson.com/gateway-api/Clients/Search?query="
endhttr <- 'APPLE'
endhttr_backup <- endhttr
endhttr <- URLencode(endhttr)
url <- glue::glue('{basehttr}{endhttr}')
dt <- purrr::map(url, ~format_url_data(.))
}
)
A <- dt %>% as.list.data.frame()
B <- ldply (A, data.frame)
Data <- B %>%
drop_na(name) %>% as.data.frame()
Data$Name <- gsub("[^[:alnum:][:blank:]?&/\\-]", "", Data$Name)
When i run my code get an Authentication error
Could you help me to connect with Postman with that credentials?
Thanks
Related
Code that previously worked now throws an error due to the server requesting HTTPS calls and no longer accepts HTTP.
Can this code me modified to work? Or is a new process required for encrypted HTTPS?
Thank you!
# Connecting to EIA API
# install.packages(c("httr", "jsonlite"))
library(httr)
library(jsonlite)
key <- "e77e9bd3c8bc84927fad13088f4bff28"
padd_key <- list('PET.MCRRIP12.M','PET.MCRRIP22.M',
'PET.MCRRIP32.M','PET.MCRRIP42.M',
'PET.MCRRIP52.M')
startdate <- "2010-01-01" #YYYY-MM-DD
enddate <- "2022-02-13" #YYYY-MM-DD
j = 0
for (i in padd_key) {
url <- paste('http://api.eia.gov/series/?api_key=',key,'&series_id=',i,sep="")
res <- GET(url)
json_data <- fromJSON(rawToChar(res$content))
data <- data.frame(json_data$series$data)
data$Year <- substr(data$X1,1,4)
data$Month <- substr(data$X1,5,6)
data$Day <- 1
data$Date <- as.Date(paste(data$Year, data$Month, data$Day, sep='-'))
colnames(data)[2] <- json_data$series$name
data <- data[-c(1,3,4,5)]
if (j == 0){
data_final <- data
}
else{
data_final <- merge(data_final,data,by="Date")
}
j = j + 1
}
data_final <- subset(data_final, Date >= startdate & Date <= enddate)
Yes, it can - you just use https in the URL.
The shortest demonstration I could think of is:
httr::GET("https://httpbin.org/get")
In your code you just need to change the line where you define the url variable:
url <- paste('https://api.eia.gov/series/?api_key=',key,'&series_id=',i,sep="")
I am very new to web-scraping, and I am having some difficulty scraping this website's content. I basic would like to collect the pesticide name and active ingredient, but the URL does not change, and I could not find a way to click the grids. Any help?
library(RSelenium)
library(rvest)
library(tidyverse)
rD <- rsDriver(browser="firefox", port=4547L, verbose=F)
remDr <- rD[["client"]]
remDr$navigate("http://www.cdms.net/Label-Database")
This site calls an API to get the list of manufacturers: http://www.cdms.net/labelssds/Home/ManList?Keys=
On the products page, it also uses another API with the manufacturer ID, for example: http://www.cdms.net/labelssds/Home/ProductList?manId=537
You just need to loop through the Lst array and append the result to a dataframe.
For instance, the following code get all the products for the first 5 manufacturers :
library(httr)
manufacturers <- content(GET("http://www.cdms.net/labelssds/Home/ManList?Keys="), as = "parsed", type = "application/json")
maxManufacturer <- 5
index <- 1
manufacturerCount <- 0
data = list()
for(m in manufacturers$Lst){
print(m$label)
productUrl <- modify_url("http://www.cdms.net/labelssds/Home/ProductList",
query = list(
"manId" = m$value
)
)
products <- content(GET(productUrl), as = "parsed", type = "application/json")
for(p in products$Lst){
data[[index]] = p
index <- index + 1
}
manufacturerCount <- manufacturerCount + 1
if (manufacturerCount == maxManufacturer){
break
}
Sys.sleep(0.500) #add delay for scraping
}
df <- do.call(rbind, data)
options(width = 1200)
print(df)
I am fairly new with R. I decided for my own learning process to scrape the tracks that my favorite radio station is playing and then add these songs to my spotify playlist. This way I can listen to the music of my favorite radio station without any advertising
What is going well?
I can scrape the songs and add a test song to my spotify playlist.
Where does it go wrong?
Via the spotify API I retrieve all information about the songs based on the artist and title. I only need the spotify:track:xxxxx part of every response I get back. When I try to extract the part spotify: track: (track uri) from every response I get the error: subscript out of bounds:
### Radio2 playlist scraper ###
#Loading packages#
install.packages("rvest")
library(rvest)
install.packages("dplyr")
library("dplyr")
install.packages("remotes")
remotes::install_github("charlie86/spotifyr")
library(spotifyr)
install.packages('knitr', dependencies = TRUE)
library(knitr)
install.packages("stringr")
library("stringr")
install.packages("jsonlite")
library("jsonlite")
library(jsonlite)
library(purrr)
library(data.table)
library(httr)
library(magrittr)
library(rvest)
library(ggplot2)
#Get playlist url #
url <- "https://www.nporadio2.nl/playlist"
#Read HTML code from pagen#
webpage <- read_html(url)
#Get Artist and Title#
artist <- html_nodes(webpage, '.fn-artist')
title <- html_nodes(webpage, '.fn-song')
#Artist and Title to text#
artist_text <- html_text(artist)
title_text <- html_text(title)
#Artist and Title to dataframe#
artiest <- as.data.frame(artist_text)
titel_text <- as.data.frame(title_text)
#Make one dataframe#
radioplaylist <- cbind(artiest$artist_text, titel_text$title_text)
radioplaylist <- as.data.frame(radioplaylist)
radioplaylist
#Rename columns#
colnames(radioplaylist)[1] <- "Artiest"
colnames(radioplaylist)[2] <- "Titel"
radioplaylist
#Remove duplicate songs#
radioplaylistuniek <- radioplaylist %>% distinct(Artiest, Titel, .keep_all = TRUE)
#Write to csv#
date <- Sys.Date()
date
write.csv(radioplaylistuniek, paste0("C://Users//Kantoor//Radio2playlists//playlist - ", date, ".csv"))
#Set spotify API#
Sys.setenv(SPOTIFY_CLIENT_ID = 'xxxxxxxxxxxxx')
Sys.setenv(SPOTIFY_CLIENT_SECRET = 'xxxxxxxxxxxx')
access_token <- get_spotify_access_token()
# Client and secret#
clientID <- "xxxxxxxxxxxxxxx"
secret <- "xxxxxxxxxxxxxx"
# Get access token and write this to authorization header #
response = POST(
'https://accounts.spotify.com/api/token',
accept_json(),
authenticate(clientID, secret),
body = list(grant_type = 'client_credentials'),
encode = 'form',
verbose()
)
token = content(response)$access_token
authorization.header = paste0("Bearer ", token)
# Generate URLS #
radioplaylistuniektest <- radioplaylistuniek[1:100,]
urls <- list(c("https://api.spotify.com/v1/search?q=track:")) %>% paste0(radioplaylistuniektest$Titel) %>% paste0(c("%20artist:")) %>% paste0(radioplaylistuniektest$Artiest) %>% paste(c("&type=track&limit=1"), sep = "")
# Get track information#
lijstwijk <- lapply(urls, GET, simplifyMatrix=TRUE, flatten=TRUE, config = add_headers(authorization = authorization.header))
# Get trackuri from each response#
lijstwijkuri <- lapply(lijstwijk, function(item) content(item, as="parsed")$tracks$items[[1]]$uri)
Error in content(item, as = "parsed")$tracks$items[[1]] :
subscript out of bounds
When I remove the track URI from the response for a few songs, lets say for the first 5, everything goes well:
# Generate URLS #
radioplaylistuniektest <- radioplaylistuniek[1:5,]
urls <- list(c("https://api.spotify.com/v1/search?q=track:")) %>% paste0(radioplaylistuniektest$Titel) %>% paste0(c("%20artist:")) %>% paste0(radioplaylistuniektest$Artiest) %>% paste(c("&type=track&limit=1"), sep = "")
# Get track information#
lijstwijk <- lapply(urls, GET, simplifyMatrix=TRUE, flatten=TRUE, config = add_headers(authorization = authorization.header))
# Get trackuri from each response#
lijstwijkuri <- lapply(lijstwijk, function(item) content(item, as="parsed")$tracks$items[[1]]$uri)
lijstwijkuri
[[1]]
[1] "spotify:track:5Xhqe9xu6bKRSqLj1mS1SB"
[[2]]
[1] "spotify:track:21YxK0klhpfLW8budkJaMF"
[[3]]
[1] "spotify:track:468OIV1LzYrm3rluVKl8AU"
[[4]]
[1] "spotify:track:3yDhZq8f17SmumVmEyCaRN"
[[5]]
[1] "spotify:track:0IseLavjQ32B5wxYxWeuw5"
How to fix the subscript out of bounds error?
What is going wrong? How can i fix the subscript out of bounds error for extracting the spotify:track:xxxx part from each response?
Got the solution. So for anyone who is curious. This is how i fixed it:
# Unlist results #
responses <- unlist(lapply(lijstwijk, paste, collapse=" "))
# Results to dataframe #
responsesdf <- as.data.frame(responses)
# Get spotify:track string#
uriperurl <- data.frame(uri = str_extract(responsesdf$responses, "(spotify:track:)\\w+"))
I am trying to generate a dataset of comments from a Youtube video and am having trouble looping over the pageToken using the Google API. Below is a snippet of code. Why doesn't the 'while' loop work?
base_url <- "https://www.googleapis.com/youtube/v3/commentThreads/"
data = "list"
api_opts <- list(
part = "snippet",
maxResults = 100,
textFormat = "plainText",
videoId = "N708P-A45D0", # This is an example of a video id
key = "google developer key goes here",
fields = "items,nextPageToken",
orderBy = "published")
init_results <- httr::content(httr::GET(base_url, query = api_opts))
data <- init_results$items
api_opts$pageToken <- init_results$nextPageToken
api_opts$pageToken <- gsub("\\=", "", init_results$nextPageToken)
print(api_opts$pageToken)
while (api_opts$pageToken != "") {
print(api_opts$pageToken)
next_results <- httr::content(httr::GET(base_url, query = api_opts))
data <- c(data, next_results$items)
api_opts$pageToken <- gsub("\\=", "", next_results$nextPageToken)
}
organize_data = function(){
sub_data <- lapply(data, function(x) {
data.frame(
Comment = x$snippet$topLevelComment$snippet$textDisplay,
User = x$snippet$topLevelComment$snippet$authorDisplayName,
ReplyCount = x$snippet$totalReplyCount,
LikeCount = x$snippet$topLevelComment$snippet$likeCount,
PublishTime = x$snippet$topLevelComment$snippet$publishedAt,
CommentId = x$snippet$topLevelComment$id,
stringsAsFactors=FALSE)
})
}
sample <- organize_data()
L <- length(sample)
sample <- data.frame(matrix(unlist(sample), nrow=L, byrow=T))
colnames(sample) <- c("Comment", "User", "ReplyCount", "LikeCount", "PublishTime", "CommentId")
head(sample)
Thanks for looking, in case anyone else has this problem in the future, below is what I did to fix this problem. I still can't get the replies to the replies.
####
# NEW TRY
# Note: according to YouTube "YouTube currently supports replies only for top-level comments. However, replies to replies may be supported in the future."
####
rm(list=ls())
data = "list"
# Initialize
init_results <- httr::content(httr::GET("https://www.googleapis.com/youtube/v3/commentThreads?part=snippet%2C+replies&maxResults=100&textFormat=plainText&videoId=N708P-A45D0&fields=items%2CnextPageToken&key=[my google developer key]"))
data <- init_results$items
init_results$nextPageToken
print(init_results$nextPageToken)
# Begin loop
while (init_results$nextPageToken != ""){
# Make the page token URL encoded
api_opts_pageToken <- gsub("=", "%3D", init_results$nextPageToken)
# Write the call with the updated page token
get_call <- gsub("api_pageToken", api_opts_pageToken, "https://www.googleapis.com/youtube/v3/commentThreads?part=snippet%2C+replies&maxResults=100&pageToken=api_pageToken&textFormat=plainText&videoId=N708P-A45D0&fields=items%2CnextPageToken&key==[my google developer key]")
# Pull out the data from this page token call
next_results <- httr::content(httr::GET(get_call))
# Update the datafile
data <- c(data,next_results$items)
# Update the page token
print(next_results$nextPageToken)
init_results$nextPageToken <- next_results$nextPageToken
}
organize_data = function(){
sub_data <- lapply(data, function(x) {
data.frame(
Comment = x$snippet$topLevelComment$snippet$textDisplay,
User = x$snippet$topLevelComment$snippet$authorDisplayName,
ReplyCount = x$snippet$totalReplyCount,
LikeCount = x$snippet$topLevelComment$snippet$likeCount,
PublishTime = x$snippet$topLevelComment$snippet$publishedAt,
CommentId = x$snippet$topLevelComment$id,
stringsAsFactors=FALSE)
})
}
sample <- organize_data()
L <- length(sample)
sample <- data.frame(matrix(unlist(sample), nrow=L, byrow=T))
colnames(sample) <- c("Comment", "User", "ReplyCount", "LikeCount", "PublishTime", "CommentId")
head(sample)
dim(sample)
I'm using the following code in R to download data from Google Trends, that I took mostly from here http://christophriedl.net/2013/08/22/google-trends-with-r/
############################################
## Query GoogleTrends from R
##
## by Christoph Riedl, Northeastern University
## Additional help and bug-fixing re cookies by
## Philippe Massicotte Université du Québec à Trois-Rivières (UQTR)
############################################
# Load required libraries
library(RCurl) # For getURL() and curl handler / cookie / google login
library(stringr) # For str_trim() to trip whitespace from strings
# Google account settings
username <- "USERNAME"
password <- "PASSWORD"
# URLs
loginURL <- "https://accounts.google.com/accounts/ServiceLogin"
authenticateURL <- "https://accounts.google.com/accounts/ServiceLoginAuth"
trendsURL <- "http://www.google.com/trends/TrendsRepport?"
############################################
## This gets the GALX cookie which we need to pass back with the login form
############################################
getGALX <- function(curl) {
txt = basicTextGatherer()
curlPerform( url=loginURL, curl=curl, writefunction=txt$update, header=TRUE, ssl.verifypeer=FALSE )
tmp <- txt$value()
val <- grep("Cookie: GALX", strsplit(tmp, "\n")[[1]], val = TRUE)
strsplit(val, "[:=;]")[[1]][3]
return( strsplit( val, "[:=;]")[[1]][3])
}
############################################
## Function to perform Google login and get cookies ready
############################################
gLogin <- function(username, password) {
ch <- getCurlHandle()
ans <- (curlSetOpt(curl = ch,
ssl.verifypeer = FALSE,
useragent = getOption('HTTPUserAgent', "R"),
timeout = 60,
followlocation = TRUE,
cookiejar = "./cookies",
cookiefile = ""))
galx <- getGALX(ch)
authenticatePage <- postForm(authenticateURL, .params=list(Email=username, Passwd=password, GALX=galx, PersistentCookie="yes", continue="http://www.google.com/trends"), curl=ch)
authenticatePage2 <- getURL("http://www.google.com", curl=ch)
if(getCurlInfo(ch)$response.code == 200) {
print("Google login successful!")
} else {
print("Google login failed!")
}
return(ch)
}
##
# returns string w/o leading or trailing whitespace
trim <- function (x) gsub("^\\s+|\\s+$", "", x)
get_interest_over_time <- function(res, clean.col.names = TRUE) {
# remove all text before "Interest over time" data block begins
data <- gsub(".*Interest over time", "", res)
# remove all text after "Interest over time" data block ends
data <- gsub("\n\n.*", "", data)
# convert "interest over time" data block into data.frame
data.df <- read.table(text = data, sep =",", header=TRUE)
# Split data range into to only end of week date
data.df$Week <- gsub(".*\\s-\\s", "", data.df$Week)
data.df$Week <- as.Date(data.df$Week)
# clean column names
if(clean.col.names == TRUE) colnames(data.df) <- gsub("\\.\\..*", "", colnames(data.df))
# return "interest over time" data.frame
return(data.df)
}
############################################
## Read data for a query
############################################
ch <- gLogin( username, password )
authenticatePage2 <- getURL("http://www.google.com", curl=ch)
res <- getForm(trendsURL, q="sugar", geo="US", content=1, export=1, graph="all_csv", curl=ch)
# Check if quota limit reached
if( grepl( "You have reached your quota limit", res ) ) {
stop( "Quota limit reached; You should wait a while and try again lateer" )
}
df <- get_interest_over_time(res)
head(df)
write.csv(df,"sugar.csv")
When I search just for the US, or any single country, everything works fine, but I need more disagreggated data, at Metropolitan Area. However, I cannot get those queries to work with this script. Whenever I do it, by typing, for example "US-IL" in the geo field, I get an error:
Error in read.table(text = data, sep = ",", header = TRUE) :
more columns than column names
The same happens if I try to do take a trend for a Metropolitan Area (using something like "US-IL-602" for Chicago, for example). Does anyone know how could I modify this script to make it work?
Thank you very much,
Brian.