How to simplify quantmod commands to load stock data - r

If I want to load stock data, this is how I do it (for Google as an example):
## most recent close price
getSymbols("GOOG")
last(GOOG)[,4]
## total equity
getFinancials("GOOG")
viewFinancials(GOOG.f, type='BS', period='A',subset = NULL)['Total Equity',1]
## Net Income
viewFinancials(GOOG.f, type='IS', period='Q',subset = NULL)['Net Income',1]
...the list goes on.
But it would be much more practical to have to type GOOG only once and then have it replaced with a generic name in the rest of the code. How can this be done in quantmod?

The option auto.assign=FALSE should solve the problem.
Below is a modified version of your code. Extending it to a larger number of tickers and treating them, e.g., in a loop should be straightforward.
library(quantmod)
CollectionOfTickers <- c("GOOG")
IndexOfCurrentTicker <- 1
# the part that follows could be extracted as a function
CurrentTicker <- getSymbols(CollectionOfTickers[IndexOfCurrentTicker], auto.assign=FALSE)
Cl(last(CurrentTicker)) ## most recent close price
## total equity
CurrentTickerFinancials <- getFinancials(CollectionOfTickers[IndexOfCurrentTicker], auto.assign=FALSE)
viewFinancials(CurrentTickerFinancials, type='BS', period='A',subset = NULL)['Total Equity',1]
## Net Income
viewFinancials(CurrentTickerFinancials, type='IS', period='Q',subset = NULL)['Net Income',1]
Note that "GOOG" is no longer hard-coded. It is defined only once, in the vector CollectionOfTickers and the entry of this vector is retrieved by using the variable IndexOfCurrentTicker which could represent a looping variable in a larger collection of tickers.
Edit
A variant of this code to perform a loop over several tickers could be programmed like this:
library(quantmod)
CollectionOfTickers <- c("GOOG","AAPL","TSLA","MSFT")
for (TickerName in CollectionOfTickers) {
CurrentTicker <- getSymbols(TickerName, auto.assign=FALSE)
cat("========\nData for ticker ", TickerName,"\n")
## most recent close price:
print(Cl(last(CurrentTicker)))
CurrentTickerFinancials <- getFinancials(TickerName, auto.assign=FALSE)
## total equity:
print(viewFinancials(CurrentTickerFinancials, type='BS', period='A',subset = NULL)['Total Equity',1])
## Net Income:
print(viewFinancials(CurrentTickerFinancials, type='IS', period='Q',subset = NULL)['Net Income',1])
cat("========\n")
}
The code quality could be improved by some further refactoring, but in any case this should work.
Hope this helps.

I think this is what you want. If you need something else...post back...
require(XML)
require(plyr)
getKeyStats_xpath <- function(symbol) {
yahoo.URL <- "http://finance.yahoo.com/q/ks?s="
html_text <- htmlParse(paste(yahoo.URL, symbol, sep = ""), encoding="UTF-8")
#search for <td> nodes anywhere that have class 'yfnc_tablehead1'
nodes <- getNodeSet(html_text, "/*//td[#class='yfnc_tablehead1']")
if(length(nodes) > 0 ) {
measures <- sapply(nodes, xmlValue)
#Clean up the column name
measures <- gsub(" *[0-9]*:", "", gsub(" \\(.*?\\)[0-9]*:","", measures))
#Remove dups
dups <- which(duplicated(measures))
#print(dups)
for(i in 1:length(dups))
measures[dups[i]] = paste(measures[dups[i]], i, sep=" ")
#use siblings function to get value
values <- sapply(nodes, function(x) xmlValue(getSibling(x)))
df <- data.frame(t(values))
colnames(df) <- measures
return(df)
} else {
# break
cat("Could not find",symbol,"\n")
return(data.frame(NA))
}
}
tickers <- c("AXP","BA","CAT","CSCO","CVX","DD","DIS","GE","GS","HD","IBM","INTC","JNJ","JPM","KO","MCD","MMM","MRK","MSFT","NKE","PFE","PG","T","TRV","UNH","UTX","V","VZ","WMT","XOM")
stats <- ldply(tickers, getKeyStats_xpath)
stats <- stats[!rowSums(is.na(stats)) == length(stats),]
rownames(stats) <- tickers
write.csv(t(stats), "FinancialStats_updated.csv",row.names=TRUE)

Related

Downloading Yahoo stock prices in R - date setting

I am downloading data on stocks from yahoo finance with tseries package. The issue is that I am not getting the most recent date - last price is always for 2 days ago.
Below is my code, can you please advise what I should correct to get all the available prices?
Thank you!
`dir <- "D:/Yahoo stock prices" #location
setwd(dir)
# Packages needed
require(tseries)
require(zoo)
YH <- read.csv2(file="SBI.csv",header=T, sep=";", dec=".")
date <- "2012-09-20"
penny_stocks <- c("SMDS.L", "MNDI.L", "SKG.L")
prices <- NULL
for(i in 1:length(YH[,1])){
prices <- try(get.hist.quote(as.character(YH[i,1]),
start=date,
quote='Open'
)
,silent=TRUE
)
if(!is.character(prices)){
if(as.character(YH[i,1]) %in% penny_stocks) prices <- prices / 100
prices <- as.data.frame(prices)
prices <- cbind(rownames(prices),prices)
colnames(prices) <- c("date",as.character(YH[i,1]))
if(length(prices) > 1){
if(i == 1){
allprices <- prices
names <- c("date",as.character(YH[i,1]))
} else {
names <- append(colnames(allprices),as.character(YH[i,1]))
allprices <- merge(allprices,prices,by ="date", all.x = TRUE)
colnames(allprices) <- names
}
}
}
}
write.csv2(allprices,"Prices 200511.csv")
warnings()
`
At the moment of writing, the data available on the yahoo site is until the 2020-05-12. You need to specify the end date as by default in tseries, the end is defined as Sys.Date() - 1. So using tseries::get.hist.quote("SMDS.L", end = Sys.Date(), quote = "Open") will return the data until 2020-05-12. Now you would expect that the default would be good enough, but there are a lot of issues with the yahoo data and getting the correct last records if the data is not located in the US. There probably is a process in place that loads the data a day after closing.
Note that the default settings of tseries::get.hist.quote are slightly different than the defaults of underlying function call to quantmod::getSymbols. tseries uses the default Sys.Date() - 1, quantmod uses Sys.Date(). Also the start dates are different. tseries uses "1991-01-02" as the start date, quantmod uses "2007-01-01".

R create an object in a function and rename the object by using a variable but retaining the object for use in the current session

I have a function that I use to get financial data from the Wall Street Journal website. Basically I want to make a copy of the data held in symData and give it a name the same as symbol. That means the objects are in the workspace and can be reused for looking at other information. I don't want to keep them permanently so creating temp files on the filesystem is not my favoured method.
The problem I have is that I can't figure out how to do it.
library(httr)
library(XML)
library(data.table)
getwsj.quotes <- function(symbol)
{
myUrl <- sprintf("https://quotes.wsj.com/AU/XASX/%s/FINANCIALS", symbol)
symbol.data <- GET(myUrl)
x <- content(symbol.data, as = 'text')
wsj.tables <- sub('cr_dataTable cr_sub_capital', '\\1', x)
symData <- readHTMLTable(wsj.tables)
mytemp <- summary(symData)
print(mytemp)
d2e <- gsub('^.* ', '', names(symData[[8]]))
my.out <- sprintf("%s has Debt to Equity Ratio of %s", symbol, d2e)
print(my.out)
}
TickerList <- c("AMC", "ANZ")
for (Ticker in TickerList)
{
Ticker.Data <- lapply(Ticker, FUN = getwsj.quotes)
}
The Ticker.Data output is:
> Ticker.Data
[[1]]
[1] "ANZ has Debt to Equity Ratio of 357.41"
The output from mytemp <- summary(symData) has the following:
Length Class Mode
NULL 12 data.frame list
NULL 2 data.frame list
...
I tried various ways of doing it when I call the function and all I ever get is the last symbols data. I have searched for hours trying to get an answer but so far, no luck. I need to walk away for a few hours.
Any information would be most helpful.
Regards
Stephen
Edited: I changed my answer based on the suggestion by #MrFlick. It solved another problem.
library(httr)
library(XML)
library(data.table)
getwsj.quotes <- function(Symbol)
{
MyUrl <- sprintf("https://quotes.wsj.com/AU/XASX/%s/FINANCIALS", Symbol)
Symbol.Data <- GET(MyUrl)
x <- content(Symbol.Data, as = 'text')
wsj.tables <- sub('cr_dataTable cr_sub_capital', '\\1', x)
SymData <- readHTMLTable(wsj.tables)
return(SymData)
}
TickerList <- c("AMC", "ANZ", "BHP", "BXB", "CBA", "COL", "CSL", "IAG", "MQG", "NAB", "RIO", "S32", "SCG", "SUN", "TCL", "TLS", "WBC", "WES", "WOW", "WPL")
SymbolDataList <- lapply(TickerList, FUN = getwsj.quotes)
Thanks again.

Rvest webscraping limited results (R)

I am new to webscraping and have tried several methods to perform a rvest across multiple pages. Somehow it is still not working and I only get 15 results instead of the 207 products listed in this category. What am I doing wrong?
library(rvest)
all_df<-0
library(data.table)
for(i in 1:5){
url_fonq <- paste0("https://www.fonq.nl/producten/categorie-lichtbronnen/?p=",i,sep="")
webpage_fonq <- read_html(url_fonq)
head(webpage_fonq)
product_title_data_html <- html_nodes(webpage_fonq, '.product-title')
product_title_data <- html_text(product_title_data_html)
head(product_title_data)
product_title_data<-gsub("\n","",product_title_data)
product_title_data<-gsub(" ","",product_title_data)
head(product_title_data)
length(product_title_data)
product_price_data_html <- html_nodes(webpage_fonq, '.product-price')
product_price_data <- html_text(product_price_data_html)
head(product_price_data)
product_price_data<-gsub("\n","",product_price_data)
product_price_data<-gsub(" ","",product_price_data)
head(product_price_data)
product_price_data
length(product_price_data)
fonq.df <- data.frame(Procuct_title = product_title_data, Price = product_price_data)
all_df <-list(fonq.df)
}
final2<-rbindlist(all_df,fill = TRUE)
View(final2)
The problem is that you keep only the data scraped from the last page of the website, and thus you have the last 15 products stored only.
So instead of overwriting the all_df variable in every iteration
all_df <- list(fonq.df)
append the fonq.df dataframe at the end of the all_df:
all_df <- bind_rows(all_df, fonq.df)
Here is my complete solution:
library(rvest)
all_df <- list()
library(dplyr)
for(i in 1:5){
url_fonq <- paste0("https://www.fonq.nl/producten/categorie-lichtbronnen/?p=",i,sep="")
webpage_fonq <- read_html(url_fonq)
head(webpage_fonq)
product_title_data_html <- html_nodes(webpage_fonq, '.product-title')
product_title_data <- html_text(product_title_data_html)
head(product_title_data)
product_title_data<-gsub("\n","",product_title_data)
product_title_data<-gsub(" ","",product_title_data)
head(product_title_data)
length(product_title_data)
product_price_data_html <- html_nodes(webpage_fonq, '.product-price')
product_price_data <- html_text(product_price_data_html)
head(product_price_data)
product_price_data<-gsub("\n","",product_price_data)
product_price_data<-gsub(" ","",product_price_data)
head(product_price_data)
product_price_data
length(product_price_data)
fonq.df <- data.frame(Procuct_title = product_title_data, Price = product_price_data)
all_df <-bind_rows(all_df, fonq.df)
}
View(all_df)

R rbind - numbers of columns of arguments do not match

How can I ignore a data set if some column names don't exist in it?
I have a list of weather data from a stream but I think certain key weather conditions don't exist and therefore I have this error below with rbind:
Error in rbind(deparse.level, ...) :
numbers of columns of arguments do not match
My code:
weatherDf <- data.frame()
for(i in weatherData) {
# Get the airport code.
airport <- i$airport
# Get the date.
date <- as.POSIXct(as.numeric(as.character(i$timestamp))/1000, origin="1970-01-01", tz="UTC-1")
# Get the data in dailysummary only.
dailySummary <- i$dailysummary
weatherDf <- rbind(weatherDf, ldply(
list(dailySummary),
function(x) c(airport, format(as.Date(date), "%Y-%m-%d"), x[["meanwindspdi"]], x[["meanwdird"]], x[["meantempm"]], x[["humidity"]])
))
}
So how can I make sure these key conditions below exist in the data:
meanwindspdi
meanwdird
meantempm
humidity
If any of them does not exit, then ignore the bunch of them. Is it possible?
EDIT:
The content of weatherData is in jsfiddle (I can't post it here as it is too long and I dunno where is the best place to show the data publicly for R...)
EDIT 2:
I get some error when I try to export the data into a txt:
> write.table(weatherData,"/home/teelou/Desktop/data/data.txt",sep="\t",row.names=FALSE)
Error in data.frame(date = list(pretty = "January 1, 1970", year = "1970", :
arguments imply differing number of rows: 1, 0
What does it mean? It seems that there are some errors in the data...
EDIT 3:
I have exported my entire data in .RData to my google drive:
https://drive.google.com/file/d/0B_w5RSQMxtRSbjdQYWJMX3pfWXM/view?usp=sharing
If you use RStudio, then you can just import the data.
EDIT 4:
target_names <- c("meanwindspdi", "meanwdird", "meantempm", "humidity")
# If it has data then loop it.
if (!is.null(weatherData)) {
# Initialize a data frame.
weatherDf <- data.frame()
for(i in weatherData) {
if (!all(target_names %in% names(i)))
next
# Get the airport code.
airport <- i$airport
# Get the date.
date <- as.POSIXct(as.numeric(as.character(i$timestamp))/1000, origin="1970-01-01", tz="UTC-1")
# Get the data in dailysummary only.
dailySummary <- i$dailysummary
weatherDf <- rbind(weatherDf, ldply(
list(dailySummary),
function(x) c(airport, format(as.Date(date), "%Y-%m-%d"), x[["meanwindspdi"]], x[["meanwdird"]], x[["meantempm"]], x[["humidity"]])
))
}
# Rename column names.
colnames(weatherDf) <- c("airport", "key_date", "ws", "wd", "tempi", 'humidity')
# Convert certain columns weatherDf type to numberic.
columns <-c("ws", "wd", "tempi", "humidity")
weatherDf[, columns] <- lapply(columns, function(x) as.numeric(weatherDf[[x]]))
}
Inspect the weatherDf:
> View(weatherDf)
Error in .subset2(x, i, exact = exact) : subscript out of bounds
You can use next to skip the current iteration of the loop and go to the next iteration:
target_names <- c("meanwindspdi", "meanwdird", "meantempm", "humidity")
for(i in weatherData) {
if (!all(target_names %in% names(i)))
next
# continue with loop...

Downloading Yahoo stock prices in R

This is a newbie question in R. I am downloading yahoo finance monthly stock price data using R where the ticker names are read from a text file. I am using a loop to read the ticker names to download the data and putting them in a list. My problem is some ticker names may not be correct thus my code stops when it encounters this case. I want the following.
skip the ticker name if it is not correct.
Each element in the list is a dataframe. I want the ticker names to be appended to variable names in element dataframes.
I need an efficient way to create a dataframe that has the closing prices as variables.
Here is the sample code for the simplified version of my problem.
library(tseries)
tckk <- c("MSFT", "C", "VIA/B", "MMM") # ticker names defined
numtk <- length(tckk);
ustart <- "2000-12-30";
uend <- "2007-12-30" # start and end date
all_dat <- list(); # empty list to fill in the data
for(i in 1:numtk)
{
all_dat[[i]] <- xxx <- get.hist.quote(instrument = tckk[i], start=ustart, end=uend, quote = c("Open", "High", "Low", "Close"), provider = "yahoo", compression = "m")
}
The code stops at the third entry but I want to skip this ticker and move on to "MMM". I have heard about Trycatch() function but do not know how to use it.
As per question 2, I want the variable names for the first element of the list to be "MSFTopen", "MSFThigh", "MSFTlow", and "MSFTclose". Is there a better to way to do it apart from using a combination of loop and paste() function.
Finally, for question 3, I need a dataframe with three columns corresponding to closing prices. Again, I am trying to avoid a loop here.
Thank you.
Your best bet is to use quantmod and store the results as a time series (in this case, it will be xts):
library(quantmod)
library(plyr)
symbols <- c("MSFT","C","VIA/B","MMM")
#1
l_ply(symbols, function(sym) try(getSymbols(sym)))
symbols <- symbols[symbols %in% ls()]
#2
sym.list <- llply(symbols, get)
#3
data <- xts()
for(i in seq_along(symbols)) {
symbol <- symbols[i]
data <- merge(data, get(symbol)[,paste(symbol, "Close", sep=".")])
}
This also a little late...If you want to grab data with just R's base functions without dealing with any add-on packages, just use the function read.csv(URL), where the URL is a string pointing to the right place at Yahoo. The data will be pulled in as a dataframe, and you will need to convert the 'Date' from a string to a Date type in order for any plots to look nice. Simple code snippet is below.
URL <- "http://ichart.finance.yahoo.com/table.csv?s=SPY"
dat <- read.csv(URL)
dat$Date <- as.Date(dat$Date, "%Y-%m-%d")
Using R's base functions may give you more control over the data manipulation.
I'm a little late to the party, but I think this will be very helpful to other late comers.
The stockSymbols function in TTR fetches instrument symbols from nasdaq.com, and adjusts the symbols to be compatible with Yahoo! Finance. It currently returns ~6,500 symbols for AMEX, NYSE, and NASDAQ. You could also take a look at the code in stockSymbols that adjusts tickers to be compatible with Yahoo! Finance to possibly adjust some of the tickers in your file.
NOTE: stockSymbols in the version of TTR on CRAN is broken due to a change on nasdaq.com, but it is fixed in the R-forge version of TTR.
I do it like this, because I need to have the historic pricelist and a daily update file in order to run other packages:
library(fImport)
fecha1<-"03/01/2009"
fecha2<-"02/02/2010"
Sys.time()
y <- format(Sys.time(), "%y")
m <- format(Sys.time(), "%m")
d <- format(Sys.time(), "%d")
fecha3 <- paste(c(m,"/",d,"/","20",y), collapse="")
write.table(yahooSeries("GCI", from=fecha1, to=fecha2), file = "GCI.txt", sep="\t", quote = FALSE, eol="\r\n", row.names = TRUE)
write.table(yahooSeries("GCI", from=fecha2, to=fecha3), file = "GCIupdate.txt", sep="\t", quote = FALSE, eol="\r\n", row.names = TRUE)
GCI <- read.table("GCI.txt")
GCI1 <- read.table("GCIupdate.txt")
GCI <- rbind(GCI1, GCI)
GCI <- unique(GCI)
write.table(GCI, file = "GCI.txt", sep="\t", quote = FALSE, eol="\r\n", row.names = TRUE)
If your ultimate goal is to get the data.frame of three columns of closing prices, then the new package tidyquant may be better suited for this.
library(tidyquant)
symbols <- c("MSFT", "C", "VIA/B", "MMM")
# Download data in tidy format.
# Will remove VIA/B and warn you.
data <- tq_get(symbols)
# Ticker symbols as column names for closing prices
data %>%
select(.symbol, date, close) %>%
spread(key = .symbol, value = close)
This will scale to any number of stocks, so the file of 1000 tickers should work just fine!
Slightly modified from the above solutions... (thanks Shane and Stotastic)
symbols <- c("MSFT", "C", "MMM")
# 1. retrieve data
for(i in seq_along(symbols)) {
URL <- paste0("http://ichart.finance.yahoo.com/table.csv?s=", symbols[i])
dat <- read.csv(URL)
dat$Date <- as.Date(dat$Date, "%Y-%m-%d")
assign(paste0(symbols[i]," _data"), dat)
dat <- NULL
}
Unfortunately, URL "ichart.finance.yahoo.com" is dead and not working now. As I know, Yahoo closed it and it seems it will not be opened.
Several days ago I found nice alternative (https://eodhistoricaldata.com/) with an API very similar to Yahoo Finance.
Basically, for R-script described above you just need to change this part:
URL <- paste0("ichart.finance.yahoo.com/table.csv?s=", symbols[i])
to this:
URL <- paste0("eodhistoricaldata.com/api/table.csv?s=", symbols[i])
Then add an API key and it will work in the same way as before. I saved a lot of time for my R-scripts on it.
Maybe give the BatchGetSymbols library a try. What I like about it over quantmod is that you can specify a time period for your data.
library(BatchGetSymbols)
# set dates
first.date <- Sys.Date() - 60
last.date <- Sys.Date()
freq.data <- 'daily'
# set tickers
tickers <- c('FB','MMM','PETR4.SA','abcdef')
l.out <- BatchGetSymbols(tickers = tickers,
first.date = first.date,
last.date = last.date,
freq.data = freq.data,
cache.folder = file.path(tempdir(),
'BGS_Cache') ) # cache in tempdir()

Resources