Back testing for Stock analysis with R - r

I am a newbie of "R" and I want to write a script for back testing my strategy of buy and sell according to the EMA. I write the following code according to some reference from Web. However, the script got an error message in line 72 but I cannot figure out the problem. Anybody can help to solve my problem? Thanks in advance.
library(quantmod)
stock1<-getSymbols("^DJI",src="yahoo",from="2010-01-01",auto.assign=F)
stock1<-na.locf(stock1)
stock1$EMA9<-EMA(Cl(stock1),n=9)
stock1$EMA19<-EMA(Cl(stock1),n=19)
stock1$EMACheck<-ifelse(stock1$EMA9>stock1$EMA19,1,0)
stock1$EMA_CrossOverUp<-ifelse(diff(stock1$EMACheck)==1,1,0)
stock1$EMA_CrossOverDown<-ifelse(diff(stock1$EMACheck)==-1,-1,0)
stock1<-stock1[index(stock1)>="2010-01-01",]
stock1_df<-data.frame(index(stock1),coredata(stock1))
colnames(stock1_df)<-c("Date","Open","High","Low","Close","Volume","Adj","EMA9","EMA19","EMACheck","EMACheck_up","EMACheck_down")
head(stock1_df)
#To calculate the number of crossoverup transactions during the duration from 2010-01-01
sum(stock1_df$EMACheck_up==1 & index(stock1)>="2010-01-01",na.rm=T)
stock1_df$Date[stock1_df$EMACheck_up==1 & index(stock1)>="2010-01-01"]
sum(stock1_df$EMACheck_down==-1 & index(stock1)>="2010-01-01",na.rm=T)
stock1_df$Date[stock1_df$EMACheck_down==-1 & index(stock1)>="2010-01-01"]
#To generate the transcation according to the strategy
transaction_dates<-function(stock2,Buy,Sell)
{
Date_buy<-c()
Date_sell<-c()
hold<-F
stock2[["Hold"]]<-hold
for(i in 1:nrow(stock2)) {
if(hold == T) {
stock2[["Hold"]][i]<-T
if(stock2[[Sell]][i] == -1) {
#stock2[["Hold"]][i]<-T
hold<-F
}
} else {
if(stock2[[Buy]][i] == 1) {
hold<-T
stock2[["Hold"]][i]<-T
}
}
}
stock2[["Enter"]]<-c(0,ifelse(diff(stock2[["Hold"]])==1,1,0))
stock2[["Exit"]]<-c(ifelse(diff(stock2[["Hold"]])==-1,-1,0),0)
Buy_date <- stock2[["Date"]][stock2[["Enter"]] == 1]
Sell_date <- stock2[["Date"]][stock2[["Exit"]] == -1]
if (length(Sell_date)<length(Buy_date)){
#Sell_date[length(Sell_date)+1]<-tail(stock2[["Date"]],n=2)[1]
Buy_date<-Buy_date[1:length(Buy_date)-1]
}
return(list(DatesBuy=Buy_date,DatesSell=Sell_date))
}
#transaction dates generate:
transactionDates<-transaction_dates(stock1_df,"EMACheck_up","EMACheck_down")
transactionDates
num_transaction1<-length(transactionDates[[1]])
Open_price<-function(df,x) {df[as.integer(rownames(df[df[["Date"]]==x,]))+1,][["Open"]]}
transactions_date<-function(df,x) {df[as.integer(rownames(df[df[["Date"]]==x,]))+1,][["Date"]]}
transactions_generate<-function(df,num_transaction)
{
price_buy<-sapply(1:num_transaction,function(x) {Open_price(df,transactionDates[[1]][x])})
price_sell<-sapply(1:num_transaction,function(x) {Open_price(df,transactionDates[[2]][x])})
Dates_buy<-as.Date(sapply(1:num_transaction,function(x) {transactions_date(df,transactionDates[[1]][x])}))
Dates_sell<-as.Date(sapply(1:num_transaction,function(x) {transactions_date(df,transactionDates[[2]][x])}))
transactions_df<-data.frame(DatesBuy=Dates_buy,DatesSell=Dates_sell,pricesBuy=price_buy,pricesSell=price_sell)
#transactions_df$return<-100*(transactions_df$pricesSell-transactions_df$pricesBuy)/transactions_df$pricesBuy
transactions_df$Stop_loss<-NA
return(transactions_df)
}
transaction_summary<-transactions_generate(stock1_df,num_transaction1)
transaction_summary$Return<-100*(transaction_summary$pricesSell-transaction_summary$pricesBuy)/transaction_summary$pricesBuy
transaction_summary

Your code fails on this line:
transactionDates<-transaction_dates(stock1_df,"EMACheck_up","EMACheck_down")
The reason is that the first 19 records of stock1_df contain NA values in the columns "EMACheck_up" and "EMACheck_down".
head(stock1_df)
EMACheck_up EMACheck_down
1 NA NA
2 NA NA
3 NA NA
4 NA NA
5 NA NA
6 NA NA
You can solve your issue by running na.locf before running the offending line of code.
stock1_df <- na.locf(stock1_df)
transactionDates <-
transaction_dates(stock1_df, "EMACheck_up", "EMACheck_down")
Skipping the first 19 rows (or first month) would also work.
You might want to look into quantstrat if you want to do more in backtesting strategies. But what you have now does the trick.

Related

how to make a sub vector from another vector that includes all values that are true in R programming

Im quite new to programming and am facing a certain issue. I have the below 2 vectors one has expenses and the other has revenues for 12 months:
'''
revenue <- c(14574.49, 7606.46, 8611.41, 9175.41, 8058.65, 8105.44, 11496.28, 9766.09, 10305.32, 14379.96, 10713.97, 15433.50)
expenses <- c(12051.82, 5695.07, 12319.20, 12089.72, 8658.57, 840.20, 3285.73, 5821.12, 6976.93, 16618.61, 10054.37, 3803.96)
'''
I have to be able to pull a list of the profit(revenue - expenses) where the revenues are greater than the mean revenue of the 12 months. I cant seem to figure it out.
This is what I used below to try to get this:
'''
for(i in 1:length(revenue)){
if(revenue > mean.revenue){
good_months <- c(revenue>mean.revenue)
}
}
good_months
'''
Problem with this is that it only pulls out either true or false for those months. How can I get only the True values?
Perhaps you can use this
> which(revenue > mean(revenue))
[1] 1 7 10 11 12
> (revenue - expenses)[revenue > mean(revenue)]
[1] 2522.67 8210.55 -2238.65 659.60 11629.54

R Load XML to dataframe, and include attributes

I am having trouble loading XML file into R data frame.
This is my XML structure [the data is made up]:
<?xml version="1.0" encoding="UTF-8"?>
-<CancerExtract>
-<CancerRegRec>
-<Demographic>
-<PatientName>
<PatSurname>Jones</PatSurname>
<PatFirstName>John</PatFirstName>
<PatSecondName>Peter</PatSecondName>
</PatientName>
-<PatientDetail Sex="1" IndigStatus="12">
<DOB>01012000</DOB>
<MedicareNo>xxxx776xxx66xx</MedicareNo>
<COB>1101</COB>
<Language>1201</Language>
</PatientDetail>
-<PatientAddress>
<StreetAddr>1 Address Rd</StreetAddr>
<Suburb>AwesomeCity</Suburb>
<Postcode>ZZ304</Postcode>
</PatientAddress>
</Demographic>
-<Tumour>
-<TreatingDoctor>
<TDSurname>Doctor</TDSurname>
<TDFirstName>The Good</TDFirstName>
<TDAddress>FixemUp ct</TDAddress>
<TDMediProvidNo>DR0001</TDMediProvidNo>
</TreatingDoctor>
-<HospitalEpisode>
<HospitalName>FixMeUp</HospitalName>
<CampusCode>0000</CampusCode>
<URN>123456</URN>
<AdmissionDate>01012020</AdmissionDate>
<DischargeDate>03012020</DischargeDate>
</HospitalEpisode>
-<TumourDetail Grade="1" ECOG="9">
<DiagnosisDate>01012050</DiagnosisDate>
<PrimarySite>C61</PrimarySite>
<Morph>81403</Morph>
<Investigations>8 8 7 10 3</Investigations>
<AdditInfo>Some free text can be available here</AdditInfo>
</TumourDetail>
<CStage Stage="9" StagingSystem="99"/>
-<GP>
<GPSurname>MyGP</GPSurname>
<GPFirstName>Peter</GPFirstName>
<GPAddress>100 GP street</GPAddress>
</GP>
-<RegDetail>
<RegName>Some name</RegName>
<RegDate>05122021</RegDate>
</RegDetail>
</Tumour>
</CancerRegRec>
-<CancerRegRec>
-<Demographic>
-<PatientName>
<PatSurname>Pt2</PatSurname>
<PatFirstName>Frits</PatFirstName>
<PatSecondName/>
</PatientName>
-<PatientDetail Sex="4" IndigStatus="22" SomeOtherVariable="random value">
<DOB>12121834</DOB>
<MedicareNo>xxxxxxxx00001</MedicareNo>
<COB>1201</COB>
<Language>1201</Language>
</PatientDetail>
-<PatientAddress>
<StreetAddr>1 church street</StreetAddr>
<Suburb>Cityname Here</Suburb>
<Postcode>7777YY</Postcode>
</PatientAddress>
</Demographic>
-<Tumour>
+<TreatingDoctor>
-<HospitalEpisode>
<HospitalName>HospitalName two </HospitalName>
<CampusCode>2166192</CampusCode>
<URN>10REWR8XX640</URN>
<AdmissionDate>23122025</AdmissionDate>
<DischargeDate>23122027</DischargeDate>
</HospitalEpisode>
-<TumourDetail EstDateFlag="1" PriorDiagFlag="Y" Laterality="8">
<DiagnosisDate>01121812</DiagnosisDate>
<WhereDiagnosed>At home</WhereDiagnosed>
<PrimarySite>C9000</PrimarySite>
<Morph>81403</Morph>
<Investigations>7 3 1</Investigations>
<MetSite>C792 C788</MetSite>
<AdditInfo>This is a second record. </AdditInfo>
</TumourDetail>
<CStage Stage="9" StagingSystem="99"/>
-<GP>
<GPSurname>Jones</GPSurname>
<GPFirstName>John</GPFirstName>
<GPAddress>Test street 12 Unit 1</GPAddress>
</GP>
-<RegDetail>
<RegName>Me Myself and I</RegName>
<RegDate>01011801</RegDate>
</RegDetail>
</Tumour>
</CancerRegRec>
</CancerExtract>
I created this R function to load the file and extract all data:
load_XML_File <- function(file){
load <- tryCatch(expr = { xml2::read_xml(file) },
warning = function(warning_condition) {
message(paste("\n\n\nWarning loading file: ", file))
message("\nHere's the original warning message:\n")
message(warning_condition)
return(NA)
},
error = function(error_condition) {
message(paste("\n\n\nError loading file: ", file))
message("\nHere's the original error message:\n")
message(error_condition)
return(NA)
},
finally = {
message(paste0("\nLoaded file ", file))
}
)
PerPt <- xml2::xml_find_all(load, ".//CancerRegRec")
tmp <- xml2::as_list(PerPt)
if(length(tmp) == 0){out <- NA}
if(length(tmp) >= 1){
for(i in 1:length(tmp)){
tt <- data.frame(t(data.frame(unlist(tmp[i]))))
rownames(tt) <- NULL
if(i == 1){out <- tt}
if(i > 1){out <- plyr::rbind.fill(out, tt)}
}
}
return(out)
}
This works well and is fast enough for my purpose, but does NOT extract the attributes.
How would I change my function so that also the attributes are included?
> load_XML_File(file)
Loaded file H:/TMP/testFile.xml
Demographic.PatientName.PatSurname Demographic.PatientName.PatFirstName Demographic.PatientName.PatSecondName Demographic.PatientDetail.DOB
1 Jones John Peter 01012000
2 Pt2 Frits <NA> 12121834
Demographic.PatientDetail.MedicareNo Demographic.PatientDetail.COB Demographic.PatientDetail.Language Demographic.PatientAddress.StreetAddr
1 xxxx776xxx66xx 1101 1201 1 Address Rd
2 xxxxxxxx00001 1201 1201 1 church street
Demographic.PatientAddress.Suburb Demographic.PatientAddress.Postcode Tumour.TreatingDoctor.TDSurname Tumour.TreatingDoctor.TDFirstName
1 AwesomeCity ZZ304 Doctor The Good
2 Cityname Here 7777YY Jansen Jan
Tumour.TreatingDoctor.TDAddress Tumour.TreatingDoctor.TDMediProvidNo Tumour.HospitalEpisode.HospitalName Tumour.HospitalEpisode.CampusCode
1 FixemUp ct DR0001 FixMeUp 0000
2 Jansen rd DVR0001 HospitalName two 2166192
Tumour.HospitalEpisode.URN Tumour.HospitalEpisode.AdmissionDate Tumour.HospitalEpisode.DischargeDate Tumour.TumourDetail.DiagnosisDate
1 123456 01012020 03012020 01012050
2 10REWR8XX640 23122025 23122027 01121812
Tumour.TumourDetail.PrimarySite Tumour.TumourDetail.Morph Tumour.TumourDetail.Investigations Tumour.TumourDetail.AdditInfo Tumour.GP.GPSurname
1 C61 81403 8 8 7 10 3 Some free text can be available here MyGP
2 C9000 81403 7 3 1 This is a second record. Jones
Tumour.GP.GPFirstName Tumour.GP.GPAddress Tumour.RegDetail.RegName Tumour.RegDetail.RegDate Tumour.TumourDetail.WhereDiagnosed Tumour.TumourDetail.MetSite
1 Peter 100 GP street Some name 05122021 <NA> <NA>
2 John Test street 12 Unit 1 Me Myself and I 01011801 At home C792 C788
It seems like the attributes are present on tmp.
PerPt <- xml2::xml_find_all(load, ".//CancerRegRec")
tmp <- xml2::as_list(PerPt)
This function visits each element of a list, recursively. It makes attributes into members of the element.
move_attr_to_member <- function(x) {
## capture names, and attributes but not names
names <- names(x)
attributes <- attributes(unname(x))
## recursive application
if (is.list(x))
x <- lapply(x, fun)
## return x (with attributes but not names removed) and attributes
attributes(x) <- NULL
names(x) <- names
c(x, attributes)
}
This could be used like
list_with_attrs_as_members <- move_attr_to_member(tmp)
A tibble is easily created with
dplyr::bind_rows(lapply(list_with_attrs_as_members, unlist))
I'd carefully check the output of move_attr_to_member() to make sure that it's doing the right thing!

Back testing for Stock Market with R

I am very new user for R and want to use R for back testing my Strategy. I try to combine some scripts found in web. However, it did not work according my idea. My problem is the transaction date cannot be generated according to my strategy design date.
library(quantmod)
library(lubridate)
stock1<-getSymbols("AAPL",src="yahoo",from="2016-01-01",auto.assign=F)
stock1<-na.locf(stock1)
stock1$EMA9<-EMA(Cl(stock1),n=9)
stock1$EMA19<-EMA(Cl(stock1),n=19)
stock1$EMACheck<-ifelse(stock1$EMA9>stock1$EMA19,1,0)
stock1$EMA_CrossOverUp<-ifelse(diff(stock1$EMACheck)==1,1,0)
stock1$EMA_CrossOverDown<-ifelse(diff(stock1$EMACheck)==-1,-1,0)
stock1<-stock1[index(stock1)>="2016-01-01",]
stock1_df<-data.frame(index(stock1),coredata(stock1))
colnames(stock1_df)<-c("Date","Open","High","Low","Close","Volume","Adj","EMA9","EMA19","EMACheck","EMACheck_up","EMACheck_down")
#To calculate the number of crossoverup transactions during the duration from 2016-01-01
sum(stock1_df$EMACheck_up==1 & index(stock1)>="2016-01-01",na.rm=T)
stock1_df$Date[stock1_df$EMACheck_up==1 & index(stock1)>="2016-01-01"]
sum(stock1_df$EMACheck_down==-1 & index(stock1)>="2016-01-01",na.rm=T)
stock1_df$Date[stock1_df$EMACheck_down==-1 & index(stock1)>="2016-01-01"]
#To generate the transcation according to the strategy
transaction_dates<-function(stock2,Buy,Sell)
{
Date_buy<-c()
Date_sell<-c()
hold<-F
stock2[["Hold"]]<-hold
for(i in 1:nrow(stock2)) {
if(hold == T) {
stock2[["Hold"]][i]<-T
if(stock2[[Sell]][i] == -1) {
#stock2[["Hold"]][i]<-T
hold<-F
}
} else {
if(stock2[[Buy]][i] == 1) {
hold<-T
stock2[["Hold"]][i]<-T
}
}
}
stock2[["Enter"]]<-c(0,ifelse(diff(stock2[["Hold"]])==1,1,0))
stock2[["Exit"]]<-c(ifelse(diff(stock2[["Hold"]])==-1,-1,0),0)
Buy_date <- stock2[["Date"]][stock2[["Enter"]] == 1]
Sell_date <- stock2[["Date"]][stock2[["Exit"]] == -1]
if (length(Sell_date)<length(Buy_date)){
#Sell_date[length(Sell_date)+1]<-tail(stock2[["Date"]],n=2)[1]
Buy_date<-Buy_date[1:length(Buy_date)-1]
}
return(list(DatesBuy=Buy_date,DatesSell=Sell_date))
}
#transaction dates generate:
stock1_df <- na.locf(stock1_df)
transactionDates<-transaction_dates(stock1_df,"EMACheck_up","EMACheck_down")
transactionDates
num_transaction1<-length(transactionDates[[1]])
Open_price<-function(df,x) {df[as.integer(rownames(df[df[["Date"]]==x,]))+1,][["Open"]]}
transactions_date<-function(df,x) {df[as.integer(rownames(df[df[["Date"]]==x,]))+1,][["Date"]]}
transactions_generate<-function(df,num_transaction)
{
price_buy<-sapply(1:num_transaction,function(x) {Open_price(df,transactionDates[[1]][x])})
price_sell<-sapply(1:num_transaction,function(x) {Open_price(df,transactionDates[[2]][x])})
Dates_buy<-as.Date(sapply(1:num_transaction,function(x) {transactions_date(df,transactionDates[[1]][x])}))
Dates_sell<-as.Date(sapply(1:num_transaction,function(x) {transactions_date(df,transactionDates[[2]][x])}))
transactions_df<-data.frame(DatesBuy=Dates_buy,DatesSell=Dates_sell,pricesBuy=price_buy,pricesSell=price_sell)
#transactions_df$return<-100*(transactions_df$pricesSell-transactions_df$pricesBuy)/transactions_df$pricesBuy
transactions_df$Stop_loss<-NA
return(transactions_df)
}
transaction_summary<-transactions_generate(stock1_df,num_transaction1)
transaction_summary$Return<-100*(transaction_summary$pricesSell-transaction_summary$pricesBuy)/transaction_summary$pricesBuy
transaction_summary
sum(transaction_summary$Return,na.rm=T)
Hi, I am very new user for R and want to use R for back testing my Strategy. I try to combine some scripts found in web. However, it did not work according my idea. My problem is the transaction date cannot be generated according to my strategy design date.
problem as this image
The code you have is to complicated for it's own good.
The issue lies in the fact that the functions Open_price and transactions_date look for use rownames to find a record number and then take the next one. But then instead of looking for the rownames again, it is used as an index. There it goes wrong.
If you look at the following result for the first date, it returns 40.
as.integer(rownames(stock1_df[stock1_df[["Date"]] == "2016-03-01", ]))
[1] 40
So the next record it would look for will be 41. But stock_df[41, ] is not the same as rowname 41. An issue with rownames is that if you filter / remove records from the data.frame the rownames don't change. To get the correct index number you should use which. If you look at the stock1_df, you can see that it returns 21 and we need record 22
which(stock1_df[["Date"]] == "2016-03-01")
[1] 21
I changed the Open_price and transactions_date functions to use the which function. This will now return the correct results.
Open_price <- function(df, x) {
df[which(df[["Date"]] == x) + 1, ][["Open"]]
}
transactions_date <- function(df, x) {
df[which(df[["Date"]] == x) + 1, ][["Date"]]
}
head(transaction_summary)
DatesBuy DatesSell pricesBuy pricesSell Stop_loss Return
1 2016-03-02 2016-04-25 100.51 105.00 NA 4.467215
2 2016-05-27 2016-06-20 99.44 96.00 NA -3.459374
3 2016-07-13 2016-09-12 97.41 102.65 NA 5.379322
4 2016-09-15 2016-11-02 113.86 111.40 NA -2.160547
5 2016-12-12 2017-06-13 113.29 147.16 NA 29.896728
6 2017-07-17 2017-09-19 148.82 159.51 NA 7.183166
A bit of advice, try to use spaces in your code. That makes it more readable. Look for example at this style guide. Your whole code be rewritten to only use stock1 without the need to turning it into a data.frame halfway your code. But for now the code does what it needs to do.

Building a tridiagonalsolver

Was given a task to code a tridiagonal solver.
However when i try to run the solver, i get no results.
I can't find my error in the code and would appreciate any help that i can get.
a<-cbind(-1,-1,-1)
b<-cbind(2.04,2.04,2.04,2.04)
c<-cbind(-1,-1,-1)
d<-c(40.8,0.8,0.8,200)
tridiagsolver<-function(a,b,c,d){
N<-length(b)
for (n in (2:N)){
ratio<-a[n]/b[n-1]
b[n]=b[n]-ratio*c[n-1]
d[n]=d[n]-ratio*d[n-1]
}
d[N]=d[N]/b[N]
for (n in (1:(N-1))){
d[N-n]=(d[N-n]-c[N-n]*d[N-n+1])/b[N-n]
}
return(d)
}
tridiagsolver(a,b,c,d)
> tridiagsolver(a,b,c,d)
[1] NA NA NA NA

Trouble with R function

I try to create a function. But when I change the sequence of it then it create NA values out. Any particular reason to it? Thanks
new<-function(x){
min2<-NULL
min1<-NULL
len<-length(unique(x))
for (i in 1:(len-1))
min2[i]<-sort(x,partial=(len-i+1))[(len-i+1)]
min1[i]<-sort(x,partial=(len-i)) [(len-i)]
return((min1))
}
x<-c(1,11,40,120)
new(x)
[1] 120 40 11
new<-function(x){
min2<-NULL
min1<-NULL
len<-length(unique(x))
for (i in 1:(len-1))
min1[i]<-sort(x,partial=(len-i)) [(len-i)]
min2[i]<-sort(x,partial=(len-i+1))[(len-i+1)]
return((min1))
}
x<-c(1,11,40,120)
new(x)
[1] NA NA 11
You forgot curly parentheses around the expression you want to repeat in you for loop:
new<-function(x){
min2<-NULL
min1<-NULL
len<-length(unique(x))
for (i in 1:(len-1)) {
min2[i]<-sort(x,partial=(len-i+1))[(len-i+1)]
min1[i]<-sort(x,partial=(len-i)) [(len-i)]
}
return(min1)
}

Resources