enter image description hereI try to export the data (which I got by coding from Yahoo). I can export data, but the date part does not go through, only the value part goes through. How can I export the date and value together to excel.
#Code:
library(tseries)
library(prophet)
library(tidyverse)
library(writexl)
library(readxl)
#determine date
start = "2013-01-01"
end = "2021-05-25"
#get the data from sources
TL <- get.hist.quote(instrument = "TRYUSD=X",
start = start,
end = end,
quote = "Close",
compression = "d")
#change the format of the dataset
y <- data.frame(TL)
#convert dataframe to excel document
write_xlsx(y,"C:/Users/hay/OneDrive/Desktop/Turkish Lira Forecast/TL.xlsx")#Load the dataset
#upload the dataset from excel file
bitcoin <- read_excel("C:/Users/hay/OneDrive/Desktop/bitcoin.xlsx")
View(bitcoin)
#call the prophet function to fit the model
model <- prophet(TL)
future <- make_future_dataframe(model, periods = 365)
tail(future)
#forecast
forecast <- predict(model, future)
tail(forecast[c('ds', 'yhat', 'yhat_lower', 'yhat_upper')])
#plot the model estimates
dyplot.prophet(model, forecast)
prophet_plot_components(model, forecast)
We can use
library(dplyr)
library(tibble)
y %>%
rownames_to_column('Date') %>%
writexl::write_xlsx("data.xlsx")
The date part can be extracted from the rownames -
y$Date <- rownames(y)
writexl::write_xlsx(y,"data.xlsx")
Related
I was working on an assignment,
library(tidyverse)
library(quantmod)
library(lubridate)
macro <- c("GDPC1", "CPIAUCSL","DTB3", "DGS10", "DAAA", "DBAA", "UNRATE", "INDPRO", "DCOILWTICO")
rm(macro_factors)
for (i in 1:length(macro)){
getSymbols(macro[i], src = "FRED")
data <- as.data.frame(get(macro[i]))
data$date <- as.POSIXlt.character(rownames(data))
rownames(data) <- NULL
colnames(data)[1] <- "macro_value"
data$quarter <- as.yearqtr(data$date)
data$macro_ticker <- rep(macro[i], dim(data)[1])
data <- data%>%
mutate(date = ymd(date))%>%
group_by(quarter)%>%
top_n(1,date) %>%
filter(date >= "1980-01-01", date <= "2019-12-31") %>%
if(i == 1){macro_factors <- data} else {macro_factors <- rbind(macro_factors, data)}
}
but this came out
Error in as.POSIXlt.character(rownames(data)) :
character string is not in a standard unambiguous format
I try follow the online tutorial of using as.POSIXct() by convert the data from charater to numeric first, but it did not work for my case, and I check the class of the data and the data shown like "year-month-day", and is in the class of character, supposedly the function as.POSIXlt() will work right?
There are several problems:
POSIXlt class should not be used in data frames. Also do not use POSIXct for dates since you can get into needless time zone problems.
to convert an xts object, such as the object produced by getSymbols , to a data frame use fortify.zoo
depending on what you want to do you might not need to convert from xts to a data frame in the first place. Suggest reading about xts and zoo in the documentation of those packages.
This gives a list of data frames L and then a long data frame DF containing them all.
library(dplyr, exclude = c("filter", "lag"))
library(quantmod) # also brings in xts and zoo
macro <- c("GDPC1", "CPIAUCSL")
getData <- function(symb) symb %>%
getSymbols(src = "FRED", auto.assign = FALSE) %>%
aggregate(as.yearqtr, tail, 1) %>%
window(start = "1980q1", end = "2019q4") %>%
fortify.zoo
L <- Map(getData, macro)
DF <- bind_rows(L, .id = "id")
I have been using the tbats and nnetar functions from the forecast package to produce an hourly electric load forecast with a forecasting horizon of a week and a month, and both models perform satisfactorily. My data set comprises of hourly values from January 2017 up to early May 2022 (46848 values). However, when I try to make an hourly load forecast up to the end of the year (07/05/2022-31/12/2022, 5736 hourly values), the results are either flat or lose seasonality. Does anyone have any idea why the long-term forecast gives such poor results? Any idea on either model will be highly appreciated. I apologise for the very large data set.
I have uploaded the data set on git hub:
df <- read.csv(file = "https://raw.githubusercontent.com/Argiro1983/Load/LOAD/LOAD_2017_2022.csv", sep=";")
#fix datetime
df$TIME<- with(df, sprintf("%02d:00", TIME-1))
df$DATE<-as.Date(df$DATE, "%d/%m/%Y")
df$TIME <- paste(df$TIME, ':00', sep = '')
View(df)
library(ggpubr)
library(chron)
df$TIME <- chron(times=df$TIME)
DATETIME<-as.POSIXct(paste(df$DATE, df$TIME), origin = "1970-01-01 00:00:00", tz="UTC", usetz=TRUE)
my_df <- data.frame(timestamp = as.POSIXct(DATETIME, format = "%d.%m.%Y %H:%M", origin = "1970-01-01 00:00:00", tz = "UTC"), input = df[,3])
my_df <- setNames(my_df, c("DATETIME","LOAD"))
Particularly the TBATS model results lose seasonality and seem strange. The code I used is the following:
library(ggplot2)
library(forecast)
library(tseries)
library(dplyr)
Load = ts(my_df[, c('LOAD')])
my_df$Clean_Load = tsclean(Load)
Clean_Load = ts(my_df[, c('Clean_Load')])
load_ts = ts(Clean_Load)
msts <- msts(load_ts, seasonal.periods=c(24,168,8760), start=c(2017,01))
plot(msts, main="Load", xlab="Year", ylab="MWh")
s <- tbats(msts)
sp<- predict(s,h=5736)
The results are also flat when I run the nnetar function, with or without temperature as an external regressor. I have tried different lambdas, but none seems to work:
#create dataframe for temperature historical values
Temperature_history <- read.csv(file = "https://raw.githubusercontent.com/Argiro1983/Load/LOAD/Temperature_history.csv", sep=";")
DATETIME<-as.POSIXct(Temperature_history$Datetime, format = "%d/%m/%Y %H:%M", tz="UCT", usetz=TRUE)
Temperature_df <- data.frame(timestamp = as.POSIXct(DATETIME, format = "%d/%m/%Y %H:%M", tz = "UCT"), input = Temperature_history$Temperature)
Temperature_df<- setNames(Temperature_df, c("DATETIME","TEMPERATURE"))
#create dataframe for temperature forecasted values
Temperature_forecast <- read.csv(file = "https://raw.githubusercontent.com/Argiro1983/Load/LOAD/Temperature_forecast.csv", sep=";")
DATETIME2<-as.POSIXct(Temperature_forecast$datehour, format = "%d/%m/%Y %H:%M", tz="UCT", usetz=TRUE)
Temp_forecast <- data.frame(timestamp = as.POSIXct(DATETIME2, format = "%d/%m/%Y %H:%M", tz = "UCT"), input = Temperature_forecast$TEMP_FORECAST)
View(Temp_forecast)
Temp_forecast <- setNames(Temp_forecast, c("DATETIME","TEMPERATURE"))
View(Temp_forecast)
#define and run NN model
library(forecast)
myts = ts(my_df$LOAD, frequency = 24)
fit2 = nnetar(myts,xreg = Temperature_df$TEMPERATURE, lambda = 0.5, P=1, MaxNWts=1177)
nnetforecast <- forecast(fit2, xreg = Temp_forecast$TEMPERATURE, h = 5736, PI = F, npaths=100, bootstrap = TRUE)
autoplot(nnetforecast, h = 5736)
First, your code won't work because the github link does not point to the csv file. Replace the first line as follows
df <- read.csv(file = "https://raw.githubusercontent.com/Argiro1983/Load/LOAD/LOAD_2017_2022.csv", sep=";")
Then running your code, I get reasonable results for the tbats model for the first few weeks:
sp <- forecast(s,h=14*24)
autoplot(sp, include=14*24)
Using a time series model to forecast much further ahead makes little sense here.
In any case, there are well-developed models for electricity demand that will do better than either TBATS or NNETAR. For a simple starting point, try Tao Hong's vanilla model, described in Section 2.2 of https://doi.org/10.1016/j.ijforecast.2015.09.006. It's just a linear regression, but it will do better than any of these models you are trying.
with the help of below code, I am able to get stocks information on a daily basis. But is there a way to get it on a minute basis. Please help
symbols <- c("GODREJIND.NS", "ASHOKLEY.NS")
sd <- as.Date("2019-10-25")
ed <- as.Date("2019-10-31")
tq_get(symbols,
from = sd,
to = ed )
In your data if date is "YYYY-MM-dd HH:MM:SS" format then you can extract minutewise data using below code.
df$MINUTES_sd <- minutes(df$sd)
df$MINUTES_ed <- minutes(df$ed)
Similarly you can extract year,month,day,hour from data to further analysis of your data set.
For example:
df <- data.frame(symbol=c("GODREJIND.NS", "ASHOKLEY.NS"),sd="2019-10-
25",ed='2019-10-31')
df$YEAR_sd <- year(df$sd)
df$MONTH_sd <- month(df$sd)
df$DAY_sd <- day(df$sd)
df$YEAR_ed <- year(df$ed)
df$MONTH_ed <- month(df$ed)
df$DAY_ed <- day(df$ed)
I am trying to complete a problem that pulls from two data sets that need to be combined into one data set. To get to this point, I need to rbind both data sets by the year-month information. Unfortunately, the first data set needs to be tallied by year-month info, and I can't seem to figure out how to change the date so I can have month-year info rather than month-day-year info.
This is data on avalanches and I need to write code totally the number of avalanches each moth for the Snow Season, defined as Dec-Mar. How do I do that?
I keep trying to convert the format of the date to month-year but after I change it with
as.Date(avalancheslc$Date, format="%y-%m")
all the values for Date turn to NA's....help!
# write the webscraper
library(XML)
library(RCurl)
avalanche<-data.frame()
avalanche.url<-"https://utahavalanchecenter.org/observations?page="
all.pages<-0:202
for(page in all.pages){
this.url<-paste(avalanche.url, page, sep=" ")
this.webpage<-htmlParse(getURL(this.url))
thispage.avalanche<-readHTMLTable(this.webpage, which=1, header=T)
avalanche<-rbind(avalanche,thispage.avalanche)
}
# subset the data to the Salt Lake Region
avalancheslc<-subset(avalanche, Region=="Salt Lake")
str(avalancheslc)
avalancheslc$monthyear<-format(as.Date(avalancheslc$Date),"%Y-%m")
# How can I tally the number of avalanches?
The final output of my dataset should be something like:
date avalanches
2000-1 18
2000-2 4
2000-3 10
2000-12 12
2001-1 52
This should work (I tried it on only 1 page, not all 203). Note the use of the option stringsAsFactors = F in the readHTMLTable function, and the need to add names because 1 column does not automatically get one.
library(XML)
library(RCurl)
library(dplyr)
avalanche <- data.frame()
avalanche.url <- "https://utahavalanchecenter.org/observations?page="
all.pages <- 0:202
for(page in all.pages){
this.url <- paste(avalanche.url, page, sep=" ")
this.webpage <- htmlParse(getURL(this.url))
thispage.avalanche <- readHTMLTable(this.webpage, which = 1, header = T,
stringsAsFactors = F)
names(thispage.avalanche) <- c('Date','Region','Location','Observer')
avalanche <- rbind(avalanche,thispage.avalanche)
}
avalancheslc <- subset(avalanche, Region == "Salt Lake")
str(avalancheslc)
avalancheslc <- mutate(avalancheslc, Date = as.Date(Date, format = "%m/%d/%Y"),
monthyear = paste(year(Date), month(Date), sep = "-"))
This is a newbie question in R. I am downloading yahoo finance monthly stock price data using R where the ticker names are read from a text file. I am using a loop to read the ticker names to download the data and putting them in a list. My problem is some ticker names may not be correct thus my code stops when it encounters this case. I want the following.
skip the ticker name if it is not correct.
Each element in the list is a dataframe. I want the ticker names to be appended to variable names in element dataframes.
I need an efficient way to create a dataframe that has the closing prices as variables.
Here is the sample code for the simplified version of my problem.
library(tseries)
tckk <- c("MSFT", "C", "VIA/B", "MMM") # ticker names defined
numtk <- length(tckk);
ustart <- "2000-12-30";
uend <- "2007-12-30" # start and end date
all_dat <- list(); # empty list to fill in the data
for(i in 1:numtk)
{
all_dat[[i]] <- xxx <- get.hist.quote(instrument = tckk[i], start=ustart, end=uend, quote = c("Open", "High", "Low", "Close"), provider = "yahoo", compression = "m")
}
The code stops at the third entry but I want to skip this ticker and move on to "MMM". I have heard about Trycatch() function but do not know how to use it.
As per question 2, I want the variable names for the first element of the list to be "MSFTopen", "MSFThigh", "MSFTlow", and "MSFTclose". Is there a better to way to do it apart from using a combination of loop and paste() function.
Finally, for question 3, I need a dataframe with three columns corresponding to closing prices. Again, I am trying to avoid a loop here.
Thank you.
Your best bet is to use quantmod and store the results as a time series (in this case, it will be xts):
library(quantmod)
library(plyr)
symbols <- c("MSFT","C","VIA/B","MMM")
#1
l_ply(symbols, function(sym) try(getSymbols(sym)))
symbols <- symbols[symbols %in% ls()]
#2
sym.list <- llply(symbols, get)
#3
data <- xts()
for(i in seq_along(symbols)) {
symbol <- symbols[i]
data <- merge(data, get(symbol)[,paste(symbol, "Close", sep=".")])
}
This also a little late...If you want to grab data with just R's base functions without dealing with any add-on packages, just use the function read.csv(URL), where the URL is a string pointing to the right place at Yahoo. The data will be pulled in as a dataframe, and you will need to convert the 'Date' from a string to a Date type in order for any plots to look nice. Simple code snippet is below.
URL <- "http://ichart.finance.yahoo.com/table.csv?s=SPY"
dat <- read.csv(URL)
dat$Date <- as.Date(dat$Date, "%Y-%m-%d")
Using R's base functions may give you more control over the data manipulation.
I'm a little late to the party, but I think this will be very helpful to other late comers.
The stockSymbols function in TTR fetches instrument symbols from nasdaq.com, and adjusts the symbols to be compatible with Yahoo! Finance. It currently returns ~6,500 symbols for AMEX, NYSE, and NASDAQ. You could also take a look at the code in stockSymbols that adjusts tickers to be compatible with Yahoo! Finance to possibly adjust some of the tickers in your file.
NOTE: stockSymbols in the version of TTR on CRAN is broken due to a change on nasdaq.com, but it is fixed in the R-forge version of TTR.
I do it like this, because I need to have the historic pricelist and a daily update file in order to run other packages:
library(fImport)
fecha1<-"03/01/2009"
fecha2<-"02/02/2010"
Sys.time()
y <- format(Sys.time(), "%y")
m <- format(Sys.time(), "%m")
d <- format(Sys.time(), "%d")
fecha3 <- paste(c(m,"/",d,"/","20",y), collapse="")
write.table(yahooSeries("GCI", from=fecha1, to=fecha2), file = "GCI.txt", sep="\t", quote = FALSE, eol="\r\n", row.names = TRUE)
write.table(yahooSeries("GCI", from=fecha2, to=fecha3), file = "GCIupdate.txt", sep="\t", quote = FALSE, eol="\r\n", row.names = TRUE)
GCI <- read.table("GCI.txt")
GCI1 <- read.table("GCIupdate.txt")
GCI <- rbind(GCI1, GCI)
GCI <- unique(GCI)
write.table(GCI, file = "GCI.txt", sep="\t", quote = FALSE, eol="\r\n", row.names = TRUE)
If your ultimate goal is to get the data.frame of three columns of closing prices, then the new package tidyquant may be better suited for this.
library(tidyquant)
symbols <- c("MSFT", "C", "VIA/B", "MMM")
# Download data in tidy format.
# Will remove VIA/B and warn you.
data <- tq_get(symbols)
# Ticker symbols as column names for closing prices
data %>%
select(.symbol, date, close) %>%
spread(key = .symbol, value = close)
This will scale to any number of stocks, so the file of 1000 tickers should work just fine!
Slightly modified from the above solutions... (thanks Shane and Stotastic)
symbols <- c("MSFT", "C", "MMM")
# 1. retrieve data
for(i in seq_along(symbols)) {
URL <- paste0("http://ichart.finance.yahoo.com/table.csv?s=", symbols[i])
dat <- read.csv(URL)
dat$Date <- as.Date(dat$Date, "%Y-%m-%d")
assign(paste0(symbols[i]," _data"), dat)
dat <- NULL
}
Unfortunately, URL "ichart.finance.yahoo.com" is dead and not working now. As I know, Yahoo closed it and it seems it will not be opened.
Several days ago I found nice alternative (https://eodhistoricaldata.com/) with an API very similar to Yahoo Finance.
Basically, for R-script described above you just need to change this part:
URL <- paste0("ichart.finance.yahoo.com/table.csv?s=", symbols[i])
to this:
URL <- paste0("eodhistoricaldata.com/api/table.csv?s=", symbols[i])
Then add an API key and it will work in the same way as before. I saved a lot of time for my R-scripts on it.
Maybe give the BatchGetSymbols library a try. What I like about it over quantmod is that you can specify a time period for your data.
library(BatchGetSymbols)
# set dates
first.date <- Sys.Date() - 60
last.date <- Sys.Date()
freq.data <- 'daily'
# set tickers
tickers <- c('FB','MMM','PETR4.SA','abcdef')
l.out <- BatchGetSymbols(tickers = tickers,
first.date = first.date,
last.date = last.date,
freq.data = freq.data,
cache.folder = file.path(tempdir(),
'BGS_Cache') ) # cache in tempdir()