R 'differing number of rows' error using OpenWeather API - r

For an assignment I'm trying to fetch weather forecast data from the OpenWeather API. My code returns this error:
Error in data.frame(city = city, weather = weather, visibility = visibility, :
arguments imply differing number of rows: 40, 1, 0, 12
I clicked to re-run with RStudio's debug and got:
Not sure what's causing this error. Can anyone help me find the problem in my code? Very grateful for any help.
# To create empty vectors to hold data temporarily
city <- c()
weather <- c()
visibility <- c()
temp <- c()
temp_min <- c()
temp_max <- c()
pressure <- c()
humidity <- c()
wind_speed <- c()
wind_deg <- c()
forecast_datetime <- c()
season <- c()
# To get 5-day forecast data for select cities
get_weather_forecast_by_cities <- function(city_names){
df <- data.frame()
for (city_name in city_names){
url_get='https://api.openweathermap.org/data/2.5/forecast'
weather_api_key <- "[*my API key*]"
forecast_query <- list(q = city_name, appid = weather_api_key, units="metric")
response <- GET(url_get, query=forecast_query)
json_list <-content(response, as="parsed")
results <- json_list$list
for(result in results) {
# Get weather data and append them to vectors
city <- c(city, city_name)
}
# Combine into a DF
weather <- c(weather, result$weather[[1]]$main)
visibility <- c(visibility, result$visibility)
temp <- c(temp, result$main$temp)
temp_min <- c(temp_min, result$main$temp_min)
temp_max <- c(temp_max, result$main$temp_max)
pressure <- c(pressure, result$main$pressure)
humidity <- c(humidity, result$main$humidity)
wind_speed <- c(wind_speed, result$wind$speed)
wind_deg <- c(wind_deg, result$wind$deg)
forecast_datetime <- c(forecast_datetime, result$dt_text)
months <- as.numeric(format(as.Date(forecast_datetime), '%m'))
index <- setNames(rep(c('winter', 'spring', 'summer', 'fall'), each=3), c(12,1:11))
season <- unname(index[as.character(months)])
weather_df <- data.frame(city=city, weather=weather, visibility=visibility, temp=temp, temp_min=temp_min, temp_max=temp_max, pressure=pressure, humidity=humidity, wind_deg=wind_deg, forecast_datetime=forecast_datetime, months=months, index=index, season=season)
}
return(df)
}
cities <- c("Seoul", "New York City", "Paris", "London", "Taiyuan")
cities_weather_df <- get_weather_forecast_by_cities(cities)

Related

How to get a day weather forecast for a list of cities using R?

I tried to get the date frame using this function but I cant get it right, as I am trying to Write a function to return a data frame containing 5-day weather forecasts for a list of cities but I do not know what is missing. Here I using openweathermap.org for the weather data.
Thanks.
# Get forecast data for a given city list
get_weather_forecaset_by_cities <- function(city_names){
df <- data.frame()
for (city_name in city_names){
# Forecast API URL
forecast_url <- 'https://api.openweathermap.org/data/2.5/forecast'
# Create query parameters
forecast_query <- list(q = city_name, appid = "b0847c4a1554d3c63d46d0e9249500f0", units="metric")
# Make HTTP GET call for the given city
responce<- GET(forecast_url, query= forecast_query)
json_result <- content(responce, as="parsed")
# Note that the 5-day forecast JSON result is a list of lists. You can print the reponse to check the results
result <- json_result
# Loop the json result
for(result in results) {
city <- c(city, city_name)
}
# Add the R Lists into a data frame
# $weather is also a list with one element, its $main element indicates the weather status such as clear or rain
city <- c(result$city$name)
weather <- c( result$weather[[1]]$main)
# Get Visibility
visibility <- c( result$visibility)
# Get current temperature
temp <- c(result$main$temp)
# Get min temperature
temp_min <- c( result$main$temp_min)
# Get max temperature
temp_max <- c( result$main$temp_max)
# Get pressure
pressure <- c( result$main$pressure)
# Get humidity
humidity <- c(result$main$humidity)
# Get wind speed
wind_speed <- c( result$wind$speed)
# Get wind direction
wind_deg <- c( result$wind$deg)
weather_data_frame <- data.frame(city,
weather,
visibility,
temp,
temp_min,
temp_max,
pressure,
humidity,
wind_speed,
wind_deg)
}
# Return a data frame
return(df)
}
cities <- c("Seoul", "Washington, D.C.", "Paris", "Suzhou")
cities_weather_df <- get_weather_forecaset_by_cities(cities)
library(httr)
library(dplyr)
library(stringr)
library(jsonlite)
> city_names <- c("Seoul", "Washington, D.C.", "Paris", "Suzhou")
> api_id<-'b0847c4a1554d3c63d46d0e9249500f0'
> main_df<-data.frame()
> for (city_name in city_names){
> url<-str_glue("http://api.openweathermap.org/data/2.5/forecast?id=524901&appid={api_id}&q={city_name}")
> main_df<- bind_rows(main_df,get_weather_forecaset_by_cities(url))
> }
>
>
> get_weather_forecaset_by_cities <- function(url){
> web_content <- httr::GET(url)
> web_content <- content(web_content,"text")
> json_data <- fromJSON(web_content, flatten = TRUE)
> df <- as.data.frame(json_data)
> return(df)
> }

Convert multiple list to nested list of given structure using function [R]

I have a set of lists which I would like to convert into the nested list of a certain structure. My initial data look like list_1_1 ... list_2_2. I would like them to be like final_desired_output.
I can do this step by step by extracting desired variable and appending to the output list one by one. However, this dummy example contains only 2 data subsets (first_lists and list second_lists), while the real life data are far >1 GB. Thus, I would like to do it with a function, which I unfortunatly do not know how to do, as nested lists are not well covered in tutorials. Any assistance?
# some dummy data
one_1 <- c(1:10)
one_2 <- c(2:15)
one_3 <- c(3:20)
starting_one_1 <- 1
starting_one_2 <- 2
starting_one_3 <- 3
ending_one_1 <- c(11)
ending_one_2 <- c(16)
ending_one_3 <- c(21)
two_1 <- c(1:100)
two_2 <- c(1:15)
starting_two_1 <- 5
starting_two_2 <- 10
ending_two_1 <- c(101)
ending_two_2 <- c(16)
# lists mimicking output I currently have
list_1_1 <- list(one_1, one_2, one_3)
list_1_2 <- list(starting_one_1, starting_one_2, starting_one_3)
list_1_3 <- list(ending_one_1, ending_one_2, ending_one_3)
list_2_1 <- list(two_1, two_2)
list_2_2 <- list(starting_two_1, starting_two_2)
list_2_3 <- list(ending_two_1, ending_two_2)
# producing desired otput
list_1_1_desired <- list()
list_1_1_desired[["sequence"]] <- one_1
list_1_1_desired[["starting"]] <- starting_one_1
list_1_1_desired[["ending"]] <- ending_one_1
list_1_2_desired <- list()
list_1_2_desired[["sequence"]] <- one_2
list_1_2_desired[["starting"]] <- starting_one_2
list_1_2_desired[["ending"]] <- ending_one_2
list_1_3_desired <- list()
list_1_3_desired[["sequence"]] <- one_3
list_1_3_desired[["starting"]] <- starting_one_3
list_1_3_desired[["ending"]] <- ending_one_3
list_2_1_desired <- list()
list_2_1_desired[["sequence"]] <- two_1
list_2_1_desired[["starting"]] <- starting_two_1
list_2_1_desired[["ending"]] <- ending_two_1
list_2_2_desired <- list()
list_2_2_desired[["sequence"]] <- two_2
list_2_2_desired[["starting"]] <- starting_two_2
list_2_2_desired[["ending"]] <- ending_two_2
first_lists <- list(list_1_1_desired, list_1_2_desired, list_1_3_desired)
names(first_lists) <- c("one_1", "one_2", "one_3")
second_lists <- list(list_2_1_desired, list_2_2_desired)
names(second_lists) <- c("two_1", "two_2")
# this is what I would like to obtain
final_desired_output <- list()
final_desired_output[["one"]] <- first_lists
final_desired_output[["two"]] <- second_lists
You could use purrr::transpose:
out <- mget(ls(pattern = '^list.*\\d$')) %>%
split(sub("_\\d+$", '', names(.))) %>%
map(~transpose(set_names(.,c('sequence', 'starting', 'ending'))))
all.equal(out, final_desired_output, check.attributes = FALSE)
[1] TRUE

R obs Number Limited

I've been stuck with the data here for days, as I want to get data from API Binance, which is surely over ten thousand obs, but the R only limited the obs at 1500L.
I have been advised to use loop, but it doesn't help any.
Any help would be totally my gratitude!
library(httr)
library(jsonlite)
library(lubridate)
# api description:
#
get
("https://github.com/binance-exchange/binance-official-api-docs/blob/master/rest-api.md"
)
#klinecandlestick-data
options(stringsAsFactors = FALSE)
url <- "https://api.binance.com"
path <- "/api/v3/exchangeInfo"
raw.result <- GET(url = url, path = path)
not.cool.data <- rawToChar(raw.result$content)
list1 <- fromJSON(not.cool.data)
list <- list1$symbols$symbol
klines2 <- rbindlist(lapply(
c('LTCTUSD', 'LTCBNB'),
binance_klines,
interval = '30m',
start_time = '2017-01-01',
end_time = '2021-01-08'
))
names(klines2)
sapply(klines2, function(x) length(unique(x)))
klines2
df.1 <- list.files(pattern = "2017-2021")
df.1_r <- vector(mode = integer,
length = length(klines2))
tickling <- unique(klines2$symbol)
tickling
low <- c()
high <- c()
for (symbol in tickling) {
look.at <- klines2$symbol == symbol
low <- append(low,min(symbol$low[look.at]))
high <- append(high, max(symbol$high[look.at]))
}
tickling

Creating a more efficient code for unique combinations of tickers in bloomberg.

I am trying to look at all the unique combinations of the constituents within the FTSE100. Furthermore I then want to look at the historical ratio between each unique combination.
The method below works but seems a little clunky especially the very last for loop. I would appreciate any suggestions on how to make this code more efficient.
Thank you in advance....
#######################################################################################
#install all packages and apply library
#######################################################################################
#install.packages("openxlsx")
library("openxlsx")
#install.packages("devtools")
library("devtools")
#install.packages("Rblpapi")
library("Rblpapi")
#install.packages("zoo")
library("zoo")
#install.packages("TTR")
library("TTR")
#install.packages("lubridate")
library("lubridate")
#install.packages("quantmod")
library("quantmod")
#install.packages("MASS")
library("MASS")
#install.packages("dplyr")
library("dplyr")
#install.packages("ggplot2")
library("ggplot2")
con <- blpConnect()
option.fields <- c("periodicitySelection")
option.values <- c("MONTHLY")
bbg.options <- structure(option.values, names = option.fields)
##########################################################################################################################
# # FOR REMOVING STOCKS BY LENGTH#########################################################################################
##########################################################################################################################
bb <- Sys.Date()-1
cc <- 4800
aa <- bb-cc
##########################################################################################################################
# # BENCHMARK FOR CONSTITUTENTS###########################################################################################
##########################################################################################################################
benchmark1 <- "UKX INDEX"
##########################################################################################################################
# # BENCHMARK FOR CALCULATIONS############################################################################################
##########################################################################################################################
benchmark2 <- "UKX INDEX"
##########################################################################################################################
# # MKT CAP RESTRICTION###################################################################################################
##########################################################################################################################
mktcap <- 1
require(Rblpapi)
con <- blpConnect()
#########################################################################################################################
# # PULL IN ALL TICKERS FROM THE BROADMARKET INDEX.XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#########################################################################################################################
p <- bds(benchmark1, "indx_mweight_hist", overrides = c(end_date_override="20170508"))
p$tickers <- paste(p$`Index Member`, " EQUITY")
p <- cbind(p[3], p[2])
tickers <- p[1]
#########################################################################################################################
# # CHECK THEIR LENGTH AND WHETHER IT IS VALID. CREATE FILTERED TICKERS.XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#########################################################################################################################
filteredtickers <- rep(0, nrow(tickers))
con <- blpConnect()
for (i in 1:nrow(tickers)){
q <- bdh(tickers[i,],
fields = c("CUR_MKT_CAP"),
start.date = aa, end.date = bb)
q$date <- as.Date(q$date, format = "%d/%m/%Y")
q$CUR_MKT_CAP <- as.numeric(q$CUR_MKT_CAP)
#class(q) == "data.frame"
qlength <- length(q$CUR_MKT_CAP)
if(qlength > 2500){
filteredtickers[i] <- tickers[i,]}
}
#########################################################################################################################
# # CHECK AND FILTER BY MARKET CAP RESTRICTION.
#########################################################################################################################
t <- as.data.frame(filteredtickers, stringsAsFactors = FALSE)
t <- subset(t, filteredtickers!="0")
colnames(t) <- "tickers"
filteredtickersII <- rep(0, nrow(t))
for (i in 1:nrow(t)){
qq <- bdh(t[i,],
fields = c("CUR_MKT_CAP"),
start.date = aa, end.date = bb)
qq$date <- as.Date(qq$date, format = "%d/%m/%Y")
qq$CUR_MKT_CAP <- as.numeric(qq$CUR_MKT_CAP)
qqlength <- (last(qq$CUR_MKT_CAP))/1000
if(qlength > mktcap){
filteredtickersII[i] <- t[i,]}
}
s <- as.data.frame(filteredtickersII, stringsAsFactors = FALSE)
s <- subset(s, filteredtickersII!="0")
colnames(s) <- "tickers"
stocklength <- rep(0, nrow(s))
for (i in 1:nrow(s)){
qr <- bdh(s[i,],
fields = c("CUR_MKT_CAP"),
start.date = aa, end.date = bb)
qr$date <- as.Date(qr$date, format = "%d/%m/%Y")
qr$CUR_MKT_CAP <- as.numeric(qr$CUR_MKT_CAP)
if(length(qr$CUR_MKT_CAP) == qlength){
stocklength[i] <- s[i,]
}
}
sfiltered <- as.data.frame(stocklength, stringsAsFactors = FALSE)
sfiltered <- subset(sfiltered, stocklength!="0")
colnames(sfiltered) <- "tickers"
#########################################################################################################################
# # Create two vectors of tickers s1 and s2
#########################################################################################################################
s1 <- sfiltered
s2 <- sfiltered
s1 <- as.vector(s1$tickers)
s2 <- as.vector(s2$tickers)
#########################################################################################################################
# # Create all the combinations between the two vectors
#########################################################################################################################
t <- expand.grid(s1=s1, s2=s2)
t <- t[order(t$s1),]
t <- data.frame(t)
#########################################################################################################################
# # if element from s1 and s2 are equal then make NA
#########################################################################################################################
for(i in 1:nrow(t)){
if(t[i,1] == t[i,2]){
t[i,2] = NA
}
}
#########################################################################################################################
# # remove all rows with NA's XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#########################################################################################################################
t1 <- t[complete.cases(t),]
#########################################################################################################################
# # find all unique combinations by removing duplicate combinations XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#########################################################################################################################
t1.sort = t(apply(t1, 1, sort))
t1 <- t1[!duplicated(t1.sort),]
t11 <- as.vector(t1$s1)
t12 <- as.vector(t1$s2)
uvzscore <- rep(0, nrow(t1))
#########################################################################################################################
# # calculate scores for all the unique combinations XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#########################################################################################################################
for (i in 1:nrow(t1)){
u <- bdh(t11[i],
fields = c("PX_LAST"),
start.date = aa, end.date = bb, options = bbg.options,
overrides = NULL)
u$date <- as.Date(u$date, format = "%d/%m/%Y")
u$PX_LAST <- as.numeric(u$PX_LAST)
ulength <- length(u$PX_LAST)
v <- bdh(t12[i],
fields = c("PX_LAST"),
start.date = aa, end.date = bb, options = bbg.options,
overrides = NULL)
v$date <- as.Date(v$date, format = "%d/%m/%Y")
v$PX_LAST <- as.numeric(v$PX_LAST)
vlength <- length(v$PX_LAST)
uv <- u$PX_LAST/v$PX_LAST
uvaverage <- mean(uv)
uvstdev <- sd(uv)
uvzscore[i] <- (50 + (10*((last(uv) - uvaverage)/uvstdev)))
}

Looking up tickers for different time periods in a loop with quantmod

I'm able to loop through and calculate the overnight/over-weekend returns for a list of tickers when the time period is the same for every ticker, but am having trouble when the time period I want to look up is different for each ticker.
For example, with:
symbols <- c("AAPL", "GOOG"," MSFT")
dates <- as.Date(c("2015-01-04", "2015-01-05", "2015-01-06"))
example.df <- data.frame(tickers, dates)
example.df
tickers dates
1 AAPL 2015-01-04
2 GOOG 2015-01-05
3 MSFT 2015-01-06
I'd want the overnight return for AAPL between 2015-01-04 and 2015-01-05, for GOOG between 2015-01-05 and 2015-01-06, etc. If it was a Friday, I'd want the next Monday.
I can can get what I'm looking for by looking up each individual ticker like this:
library(quantmod)
library(dplyr)
# date range accounts for weekends
getSymbols("AAPL", from = "2016-01-04", to = "2016-01-08")
data <- as.data.frame(AAPL)
colnames(data) <- c("open","high","low","close","volume","adj.")
# overnight return calculation
data$overnight.return <- data$open / lag(data$close, default = 0) - 1
data$overnight.return <- paste(round(data$overnight.return * 100, 3), "%",sep= "")
# the overnight/over-weekend returns for the specified date
data.df.final <- slice(data, 2)
Of course that's terribly slow.
Here's as far as I was able to get trying to make a loop out of it:
# needs to be a loop itself and inside the other 'for' loop somehow I think
symbol.list <- example.df[,1]
start <- data[,2]
end <- data[,2] + days(3)
results <- NULL
for (i in symbol.list) {
data <- getSymbols(Symbols = i,
src = "yahoo",
from = start, to = end,
auto.assign = FALSE)
if (inherits(data, 'try-error')) next
colnames(data) <- c("open","high","low","close","volume","adj.")
data <- as.data.frame(data)
data <- cbind(date = rownames(data), data)
data$overnightRtn <- as.vector(data$open / lag(data$close, default = 0) - 1)
data$overnightRtn <- paste(round(data$overnightRtn * 100, 3), "%")
data <- slice(data, 2)
results <- bind_rows(results, data)
}
How can I add the date looping aspect to the above ticker loop?
maybe this is what you are looking for. See that I'm using an index, not the actual list, so I can refer to every element of your data frame (it is not optimized, but it is doing the job you described in the function):
symbols <- c("AAPL", "GOOG"," MSFT") ## " MSFT" has an extra space
dates <- as.Date(c("2015-01-04", "2015-01-05", "2015-01-06"))
example.df <- data.frame(tickers=symbols, dates) ## there was an error here in your example.
symbol.list <- trimws(example.df[,1])
start <- as.Date(example.df[,2])
end <- as.Date(example.df[,2]) + days(3)
results <- NULL
for (i in 1:NROW(symbol.list)) {
try(dataX <- getSymbols(Symbols = symbol.list[i],
src = "yahoo",
from = start[i], to = end[i],
auto.assign = FALSE),silent=T)
if (!exists("dataX")) {cat("Error in ",i,"\n");next}
colnames(dataX) <- c("open","high","low","close","volume","adj.")
dataX <- as.data.frame(dataX)
dataX <- cbind(date = rownames(dataX), dataX)
dataX$overnightRtn <- as.vector(dataX$open / lag(dataX$close, default = 0) - 1)
dataX$overnightRtn <- paste(round(dataX$overnightRtn * 100, 3), "%")
data2 <- slice(dataX, 2);rm(dataX)
results <- if (is.null(results)) data2 else rbind(results, data2)
}

Resources