Hi I have a dataframe object called NormalizedPnL. Here it is:
print(head(NormalizedPnL))
print(class(NormalizedPnL))
print(class(NormalizedPnL[,1]))
print(class(NormalizedPnL[,2]))
print(class(NormalizedPnL[,3]))
businessdate start A Strat B Strat C
1 2014-01-01 0.000000000 0.00000000 0.000000000
2 2014-01-02 0.016764200 0.04218263 0.011912007
3 2014-01-03 0.001179697 -0.02683310 -0.000897083
4 2014-01-06 -0.033131903 0.01902207 0.021104512
5 2014-01-07 -0.033215587 -0.06347915 -0.018900792
6 2014-01-08 0.045181350 -0.00732205 -0.016600410
[1] "data.frame"
[1] "Date" # you can see here the this column is a date
[1] "numeric"
[1] "numeric"
Now I make it a xts object:
NormalizedPnL_xts<- xts(NormalizedPnL[,-1], order.by= as.Date(NormalizedPnL[,1]))
print(head(NormalizedPnL_xts))
print(class(head(NormalizedPnL_xts)))
print(class(NormalizedPnL_xts[,1]))
print(class(NormalizedPnL_xts[,2]))
print(class(NormalizedPnL_xts[,3]))
Strat A Start B Strat C
2014-01-01 0.000000000 0.00000000 0.000000000
2014-01-02 0.016764200 0.04218263 0.011912007
2014-01-03 0.001179697 -0.02683310 -0.000897083
2014-01-06 -0.033131903 0.01902207 0.021104512
2014-01-07 -0.033215587 -0.06347915 -0.018900792
2014-01-08 0.045181350 -0.00732205 -0.016600410
[1] "xts" "zoo"
[1] "xts" "zoo"
[1] "xts" "zoo"
[1] "xts" "zoo"
you can see that all the columns are zoo
Now I try to use performance Analytics:
charts.PerformanceSummary(NormalizedPnL_xts,geometric= FALSE,cex.axis=1.5)
And I get the error:
The data cannot be converted into a time series. If you are trying to pass
in names from a data object with one column, you should use the
form 'data[rows, columns, drop = FALSE]'. Rownames should have standard date formats, such as
'1985-03-15'.
Error in `[<-.data.frame`(`*tmp*`, i, 2, value = c(90600, 60400, 302000 :
replacement has 3 rows, data has 1
Error in `[<-.data.frame`(`*tmp*`, i, 8, value = c(-742200, -494800, -2474000 :
replacement has 3 rows, data has 1
Can anyone tell me what the issue is? The error mentions having 1 column but I have 3.
Works for me:
library(PerformanceAnalytics)
NormalizedPnL <-
structure(list(businessdate = c("2014-01-01", "2014-01-02", "2014-01-03",
"2014-01-06", "2014-01-07", "2014-01-08"), StratA = c(0, 0.0167642,
0.001179697, -0.033131903, -0.033215587, 0.04518135), StratB = c(0,
0.04218263, -0.0268331, 0.01902207, -0.06347915, -0.00732205),
StratC = c(0, 0.011912007, -0.000897083, 0.021104512, -0.018900792,
-0.01660041)), .Names = c("businessdate", "StratA", "StratB",
"StratC"), class = "data.frame", row.names = c("1", "2", "3",
"4", "5", "6"))
NormalizedPnL_xts <- xts(NormalizedPnL[,-1], order.by=as.Date(NormalizedPnL[,1]))
charts.PerformanceSummary(NormalizedPnL_xts,geometric=FALSE,cex.axis=1.5)
Related
I have an issue when I use as.Date(as.yearqtr(test[,1],format ="%qQ%Y"),frac =1), but it returns an error,and quater-year didn't change to date. The error is:
error in as.yearqtr(as.numeric(x)) (list) object cannot be coerced to type 'double'
This is my dataframe in R.
TIME VALUE
1Q2019 1
2Q2019 2
3Q2019 3
4Q2019 4
The ideal output is
TIME VALUE
2019-03-31 1
2019-06-30 2
2019-09-30 3
2019-12-31 4
We can convert to Date with zoo and get the last date of the quarter with frac. We use some RegEx to rearrange in zoo's suitable format:
df$TIME=as.Date(as.yearqtr(gsub("(\\d)(Q)(\\d{1,})","\\3 Q\\1",df$TIME)),frac = 1)
df
TIME VALUE
1 2019-03-31 1
2 2019-06-30 2
3 2019-09-30 3
4 2019-12-31 4
Data:
df <-structure(list(TIME = structure(1:4, .Label = c("1Q2019", "2Q2019",
"3Q2019", "4Q2019"), class = "factor"), VALUE = 1:4), class = "data.frame", row.names = c(NA,
-4L))
Here is a function that will return a vector of dates, given an input vector in the form of 1Q2019...
dateStrings <- c("1Q2019","2Q2019","3Q2019","4Q2019","1Q2020")
lastDayOfQuarter <- function(x){
require(lubridate)
result <- NULL
months <-c(3,6,9,12)
days <- c(31,30,30,31)
for(i in 1:length(x)) {
qtr <- as.numeric(substr(x[i],1,1))
result[i] <- mdy(paste(months[qtr],days[qtr],(substr(x[i],3,6)),sep="-"))
}
as.Date(result)
}
lastDayOfQuarter(dateStrings)
and the output:
>lastDayOfQuarter(dateStrings)
[1] "2019-03-31" "2019-06-30" "2019-09-30" "2019-12-31" "2020-03-31"
>
i just started with R and finished some tutorials. However, i am trying to get into time series analysis and got big troubles with it. I made a data frame that looks like that:
Date Time T1
1 2014-05-22 15:15:00 21.6
2 2014-05-22 15:20:00 21.2
3 2014-05-22 15:25:00 21.3
4 2014-05-22 15:30:00 21.5
5 2014-05-22 15:35:00 21.1
6 2014-05-22 15:40:00 21.5
Since i didn't want to work with half days, i removed the first and last day from the data frame. Since R didnt recognize the date nor time as such, but as "factor", i used the lubridate library to change it properly. Now it looks like that:
Date Time T1
1 2014-05-23 0S 14.2
2 2014-05-23 5M 0S 14.1
3 2014-05-23 10M 0S 14.6
4 2014-05-23 15M 0S 14.3
5 2014-05-23 20M 0S 14.4
6 2014-05-23 25M 0S 14.5
Now the trouble really starts. Using ts function changes date to 16944 and time to 0. How do i setup a data frame with the correct start date and frequency? A new set of data comes in everty 5 min so frequency should be 288. I also tried to set the start date as a vector. Since 22th of may was the 142th day of the year i tried this
ts_df <- ts(df, start=c(2014, 142/365), frequency=288)
No error there, but when i go for start(ds_df) i get and end(ds_df):
[1] 2013.998
[1] 2058.994
Can anyone give me a hint how to work with these kind of data?
"ts" class is typically not a good fit for that type of data. Assuming DF is the data frame shown reproducibly in the Note at the end of this answer we convert it to a "zoo" class object and then perform some manipulations. The related xts package could also be used.
library(zoo)
z <- read.zoo(DF, index = 1:2, tz = "")
window(z, start = "2014-05-22 15:25:00")
head(z, 3) # first 3
head(z, -3) # all but last 3
tail(z, 3) # last 3
tail(z, -3) # all but first 3
z[2:4] # 2nd, 3rd and 4th element of z
coredata(z) # numeric vector of data values
time(z) # vector of datetimes
fortify.zoo(z) # data frame whose 2 cols are (1) datetimes and (2) data values
aggregate(z, as.Date, mean) # convert to daily averaging values
ym <- aggregate(z, as.yearmon, mean) # convert to monthly averaging values
frequency(ym) <- 12 # only needed since ym only has length 1
as.ts(ym) # year/month series can be reasonably converted to ts
plot(z)
library(ggplot2)
autoplot(z)
read.zoo could also have been used to read the data in from a file.
Note: DF used above in reproducible form:
DF <- structure(list(Date = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = "2014-05-22",
class = "factor"),
Time = structure(1:6, .Label = c("15:15:00", "15:20:00",
"15:25:00", "15:30:00", "15:35:00", "15:40:00"), class = "factor"),
T1 = c(21.6, 21.2, 21.3, 21.5, 21.1, 21.5)), .Names = c("Date",
"Time", "T1"), class = "data.frame", row.names = c("1", "2",
"3", "4", "5", "6"))
Currently I have multiple dataframes in a list with the following format:
datetime precip code
1 2015-04-15 00:00:00 NA M
2 2015-04-15 01:00:00 NA M
3 2015-04-15 02:00:00 NA M
4 2015-04-15 03:00:00 NA M
5 2015-04-15 04:00:00 NA M
6 2015-04-15 05:00:00 NA M
Each dataframe has a different start and end date but I will like each dataframe to start from 2015-04-01 0:00:00 to 2015-11-30 23:59:59. I would like to generate rows for the missing dates in datetime in each dataframe and fill the precipcolumn with NAso that I have a continuous time series with nrow=5856in each dataframe.
Ignore the code column. If values exist for precip, do not alter them, just fill the additional datetime rows with NAs
My attempt so far yields an error:
library(dplyr)
dates <- seq.POSIXt(as.POSIXlt("2015-04-01 0:00:00"), as.POSIXlt("2015-11-30 23:59:59"), by="hour",tz="GMT")
ts <- format.POSIXct(dates,"%Y/%m/%d %H:%M")
df <- data.frame(datetime=ts)
dat=mylist
final_list <- lapply(dat, function(x) full_join(df,dat$precip))
Error in UseMethod("tbl_vars") :
no applicable method for 'tbl_vars' applied to an object of class "c('double', 'numeric')"
link to sample file in case it is needed
Thanks for your suggestions.
As vitor pointed out above, you can only join two data.frames, not a data.frame and a vector. dplyr also plays nice with POSIXct, but not POSIXlt (Hadley has a preference), so if you store your data as actual time, it will be easier to join usefully.
Further, within lapply, you need to use the variable of the function you create (x here), or you'll just be repeating the same thing. Don't subset the data.frames, either, if you want to join them; you need a column in each with the same name and data type.
All together, you need something like:
library(dplyr)
df$datetime <- as.POSIXct(df$datetime, tz = "GMT")
df <- tbl_df(df) # not necessary, but prints nicely
list_df <- list(df, df) # fake list of data.frames
# make a data.frame of sequence to join on
seq_df <- data_frame(datetime = seq.POSIXt(as.POSIXct("2015-04-01 0:00:00", tz = 'GMT'),
as.POSIXct("2015-11-30 23:59:59", tz = 'GMT'),
by="hour",tz="GMT"))
lapply(list_df, function(x){full_join(x, seq_df)})
# Joining by: "datetime"
# Joining by: "datetime"
# [[1]]
# Source: local data frame [5,857 x 3]
#
# datetime precip code
# (POSI) (lgl) (fctr)
# 1 2015-04-15 00:00:00 NA M
# 2 2015-04-15 01:00:00 NA M
# 3 2015-04-15 02:00:00 NA M
# 4 2015-04-15 03:00:00 NA M
# 5 2015-04-15 04:00:00 NA M
# 6 2015-04-15 05:00:00 NA M
# 7 2015-04-01 04:00:00 NA NA
# 8 2015-04-01 05:00:00 NA NA
# 9 2015-04-01 06:00:00 NA NA
# 10 2015-04-01 07:00:00 NA NA
# .. ... ... ...
#
# [[2]]
# Source: local data frame [5,857 x 3]
#
# datetime precip code
# (POSI) (lgl) (fctr)
# 1 2015-04-15 00:00:00 NA M
# 2 2015-04-15 01:00:00 NA M
# 3 2015-04-15 02:00:00 NA M
# 4 2015-04-15 03:00:00 NA M
# 5 2015-04-15 04:00:00 NA M
# 6 2015-04-15 05:00:00 NA M
# 7 2015-04-01 04:00:00 NA NA
# 8 2015-04-01 05:00:00 NA NA
# 9 2015-04-01 06:00:00 NA NA
# 10 2015-04-01 07:00:00 NA NA
# .. ... ... ...
Data:
df <- structure(list(datetime = structure(c(1429056000, 1429059600, 1429063200, 1429066800,
1429070400, 1429074000), class = c("POSIXct", "POSIXt"), tzone = "GMT"), precip = c(NA,
NA, NA, NA, NA, NA), code = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = "M",
class = "factor")), .Names = c("datetime", "precip", "code"), row.names = c("1",
"2", "3", "4", "5", "6"), class = c("tbl_df", "tbl", "data.frame"))
I wanted to plot a graph of n(y-axis) versus date(x-axis) in R, but due to the format of the date displayed in my data, the order of the date wasn't in the correct ascending order. How can I solve this? Appreciate for the help.
hybrid <- readWorksheetFromFile(excel.file, sheet="ResultSet", header=TRUE)
wb <- loadWorkbook(excel.file)
setMissingValue(wb,value=c("NA"))
hybrid1 <- readWorksheet(wb, sheet="ResultSet", header=TRUE)
I used the dplyr function. Suppose each Pub.Number have a unique code & I replaced it with one. Then, I count the number of it for a certain date.
hybrid <- mutate(hybrid1, n=sum(Publication.Number=1))
p1 <- select(hybrid1, Publication.Date, n)
pt <- count(p1, Publication.Date, wt=n)
The output look like this:
pt
Source: local data frame [627 x 2]
Publication.Date n
(chr) (dbl)
1 01.01.2013 1
2 01.01.2014 8
3 01.01.2015 10
4 01.02.2012 3
5 01.03.2012 16
6 01.04.2015 2
7 01.05.2012 1
8 01.05.2013 7
9 01.05.2014 23
10 01.06.2011 1
.. ... ...
Then, I plotted it but R recognized Pub.Date as character
qplot(x=Publication.Date, y=n, data=pt, geom="point")
x <- hybrid1[,2]
class(x)
[1] "character"
The graph I've plotted is a mess because of the wrong order of the date
I tried using the as.Date function but it seems that it's not complete (I'm using R version 3.2.2)
> pt[,1] <- as.Date(pt[,1], format='%d.%m.%Y’)
+
First convert 'Publication.Date’ to Date format, then order:
using your data:
data <- read.table(pipe('pbpaste'),sep='',header=T,stringsAsFactors = F)
data <- data[,-1]
names(data) <- c('Pub.Date', 'n’)
Pub.Date n
1 01.01.2014 8
2 01.01.2015 10
3 01.02.2012 3
4 01.03.2012 16
5 01.04.2015 2
6 01.05.2012 1
7 01.05.2013 7
8 01.05.2014 23
9 01.06.2011 1
convert ‘Pub.Date’ to date format:
data[,1] <- as.Date(data[,1],format='%d.%m.%Y’)
and order:
data[order(data$"Pub.Date",data$n), ]
Pub.Date n
9 2011-06-01 1
3 2012-02-01 3
4 2012-03-01 16
6 2012-05-01 1
7 2013-05-01 7
1 2014-01-01 8
8 2014-05-01 23
2 2015-01-01 10
5 2015-04-01 2
In the usual course of data input with R, values like " 01.01.2013" will become factor variables. Since they are not in one of the two "stadard Date formats: YYYY/MM/DD or YYYY-MM-DD, they cannot be input directly as "Date"s with "colClasses" unless you build an "as.DT" method. You will need to make sure they are character vectors either by using stringsAsFactors=FALSE in a read function or by coercing to character with as.character after they are input. That header you have displayed makes me think this data has been manipualtes dsomehow, perhaps with functions in the dplyr package?
res <- structure(list(Publication.Date = structure(1:10, .Label = c("01.01.2013",
"01.01.2014", "01.01.2015", "01.02.2012", "01.03.2012", "01.04.2015",
"01.05.2012", "01.05.2013", "01.05.2014", "01.06.2011"), class = "factor"),
n = c(1L, 8L, 10L, 3L, 16L, 2L, 1L, 7L, 23L, 1L)), .Names = c("Publication.Date",
"n"), class = "data.frame", row.names = c("1", "2", "3", "4",
"5", "6", "7", "8", "9", "10"))
> res
Publication.Date n
1 01.01.2013 1
2 01.01.2014 8
3 01.01.2015 10
4 01.02.2012 3
5 01.03.2012 16
6 01.04.2015 2
7 01.05.2012 1
8 01.05.2013 7
9 01.05.2014 23
10 01.06.2011 1
> res$Publication.Date <- as.Date( as.character(res$Publication.Date), format="%m.%d.%Y")
Then you can plot:
png(); qplot(x=Publication.Date, y=n, data=res, geom="point"); dev.off()
I couldn't find a solution to this on net. The two xts objects match on number of rows and columns. Still I get following error for merge operation - "number of items to replace is not a multiple of replacement length".
Following is the R code along with printed output at interim steps. I am bit new to R. So if you notice any steps in program that could be done better then can you advise me on that as well. Thanks.
> # LOAD THE SPY DATA AND CREATE A DATA FRAME WITH RETURN COLUMN
> library(quantmod)
> library(PerformanceAnalytics)
> getSymbols("SPY", src='yahoo', index.class=c("POSIXt","POSIXct"), from='2002-01-01')
> SPY<-to.monthly(SPY)
> SPY.ret<-Return.calculate(SPY$SPY.Close)
> print(head(SPY.ret))
SPY.Close
Jan 2002 NA
Feb 2002 -0.018098831
Mar 2002 0.029868840
Apr 2002 -0.059915390
May 2002 -0.005951292
Jun 2002 -0.080167070
> index(SPY.ret) = as.Date(index(SPY)) # Convert to Date format as xts index is a Date.
> colnames(SPY.ret) <- "SPY"
> print(head(SPY.ret))
SPY
2002-01-01 NA
2002-02-01 -0.018098831
2002-03-01 0.029868840
2002-04-01 -0.059915390
2002-05-01 -0.005951292
2002-06-01 -0.080167070
> #LOAD THE TRADE FILE & CREATE A DATA FRAME WITH PROFIT COLUMN
> trades = as.xts(read.zoo(file="Anvi/CSV/ARS_EW_R2_SPDR.csv", index.column="Exit.time", format="%m/%d/%Y", header=TRUE, sep=","))
Warning message:
In zoo(rval3, ix) :
some methods for “zoo” objects do not work if the index entries in ‘order.by’ are not unique
> df = trades$Profit
> print(head(df))
Profit
2003-09-30 " 0.079734219"
2004-01-31 " 0.116722585"
2004-03-31 " 0.060347888"
2004-04-30 " 0.100379816"
2004-07-31 " 0.084048027"
2004-07-31 " 0.018710103"
> df$Profits = as.numeric(trades$Profit)
> df = df$Profit #Inefficent way to convert Profit column to numeric?
> print(head(df))
Profit
2003-09-30 0.07973422
2004-01-31 0.11672259
2004-03-31 0.06034789
2004-04-30 0.10037982
2004-07-31 0.08404803
2004-07-31 0.01871010
> df = aggregate(df, by=index(df))
> colnames(df) = "Profit"
> print(head(df))
Profit
2003-09-30 0.07973422
2004-01-31 0.11672259
2004-03-31 0.06034789
2004-04-30 0.10037982
2004-07-31 0.10275813
2004-11-30 0.02533904
>
> #MERGE THE SPY RET AND TRADE RESULTS DATA FRAMES
> temp = head(df)
> temp1 = head(SPY.ret)
> print(temp)
Profit
2003-09-30 0.07973422
2004-01-31 0.11672259
2004-03-31 0.06034789
2004-04-30 0.10037982
2004-07-31 0.10275813
2004-11-30 0.02533904
> print(temp1)
SPY
2002-01-01 NA (Note: I tried replacing NA with 0 but still same error).
2002-02-01 -0.018098831
2002-03-01 0.029868840
2002-04-01 -0.059915390
2002-05-01 -0.005951292
2002-06-01 -0.080167070
> mdf = merge(x=temp, y=temp1, all=TRUE)
Error in z[match0(index(a), indexes), ] <- a[match0(indexes, index(a)), :
number of items to replace is not a multiple of replacement length
>
What I am trying to do above is merge the objects such that resulting object's index is a UNION and has two columns "SPY", "PROFIT". The empty cells in each of the columns in the merged object are filled with 0.
aggregate returns a zoo object, not an xts object. That means the zoo method of merge is being dispatched instead of the xts method. Your code works fine if both objects are xts objects.
temp <-
structure(c(0.07973422, 0.11672259, 0.06034789, 0.10037982, 0.10275813,
0.02533904), .Dim = c(6L, 1L), index = structure(c(12325, 12448,
12508, 12538, 12630, 12752), class = "Date"), class = "zoo",
.Dimnames = list(NULL, "Profit"))
temp1 <-
structure(c(NA, -0.018098831, 0.02986884, -0.05991539, -0.005951292,
-0.08016707), .Dim = c(6L, 1L), index = structure(c(1009864800,
1012543200, 1014962400, 1017640800, 1020229200, 1022907600), tzone = "",
tclass = "Date"), .indexCLASS = "Date", tclass = "Date", .indexTZ = "",
tzone = "", .Dimnames = list(NULL, "SPY"), class = c("xts", "zoo"))
merge(temp, temp1) # error
merge(as.xts(temp), temp1, fill=0) # works, filled with zeros
# Profit SPY
# 2002-01-01 0.00000000 NA
# 2002-02-01 0.00000000 -0.018098831
# 2002-03-01 0.00000000 0.029868840
# 2002-04-01 0.00000000 -0.059915390
# 2002-05-01 0.00000000 -0.005951292
# 2002-06-01 0.00000000 -0.080167070
# 2003-09-30 0.07973422 0.000000000
# 2004-01-31 0.11672259 0.000000000
# 2004-03-31 0.06034789 0.000000000
# 2004-04-30 0.10037982 0.000000000
# 2004-07-31 0.10275813 0.000000000
# 2004-11-30 0.02533904 0.000000000