termstrc datastream import estim_nss dyncouponbonds - r

I am trying to using termstrc to calculate a time series of nelson siegal parm estimates. I am trying to get the code below to run, but I don't think I have the data in the right format. I'm not sure how to get the data to be organised as a "dyncouponbonds" class properly. The error I get is:
Error in (pos_cf[i] + 1):pos_cf[i + 1] : NA/NaN argument
In addition: Warning message:
In max(n_of_cf) : no non-missing arguments to max; returning -Inf
Apologies if I have not asked in the correct format etc. First time using stackoverflow
importdatastream <- function(datafiles){
#datafiles = c("france S.csv", "france AC.csv", "france CP.csv")
rawdata <- read.csv(datafiles[1], dec=".",sep=",",colClasses = "character")
rawdata_AC <- read.csv(datafiles[2], dec=".",sep=",",colClasses = "character", header=TRUE, check.names=FALSE)
rawdata_CP <- read.csv(datafiles[3], dec=".",sep=",",colClasses = "character", header=TRUE, check.names=FALSE)
DATES <- as.character(as.Date(names(rawdata_AC)[-(1:2)], format="%d.%m.%y"))
DATES <- paste(substring(DATES,9,10),substring(DATES,6,7),substring(DATES,1,4),sep="")
AC <- rawdata[,"ACCRUED"]
CP <- rawdata[,"PRICE"]
dslist <- list()
for(i in 1:length(DATES)){
TODAY <- DATES[i]
AC <- rawdata_AC[-(1:2)][,i]
CP <- rawdata_CP[-(1:2)][,i]
datastreamlist <- function(rawdata, TODAY, AC, CP){
data <- list()
data$ISIN <- rawdata[,"ISIN"]
data$MATURITYDATE <- as.Date(rawdata[,"MATURITYDATE"],format="%d%m%Y")
data$ISSUEDATE <- as.Date(rawdata[,"ISSUEDATE"],format="%d%m%Y")
data$COUPONRATE <- as.numeric(rawdata[,"COUPONRATE"])
data$PRICE <- as.numeric(CP)
data$ACCRUED <- as.numeric(AC)
data$CASHFLOWS <- list()
data$TODAY <- as.Date(TODAY,format="%d%m%Y")
NEXTCOUPON <- ifelse(as.Date(paste(rawdata[,"COUPONDATE"],substring(TODAY,5,8),sep=""),format="%d%m%Y") > data$TODAY,
paste(rawdata[,"COUPONDATE"],substring(TODAY,5,8),sep=""),
paste(rawdata[,"COUPONDATE"],as.character(as.numeric(substring(TODAY,5,8))+1),sep=""))
NCOUPON <- as.numeric(substring(rawdata[,"MATURITYDATE"],5,8)) - as.numeric(substring(NEXTCOUPON,5,8))
# cash flows ISIN
data$CASHFLOWS$ISIN <- vector()
for(i in 1:length(NCOUPON)){
data$CASHFLOWS$ISIN <- c(data$CASHFLOWS$ISIN,rep(data$ISIN[i],NCOUPON[i]+1))
}
# cash flows
data$CASHFLOWS$CF <- vector()
for(i in 1:length(NCOUPON)){
data$CASHFLOWS$CF <- c(data$CASHFLOWS$CF,c(rep(data$COUPONRATE[i]*100,NCOUPON[i]),100+data$COUPONRATE[i]*100))
}
# cash flow dates
data$CASHFLOWS$DATE <- vector()
for(i in 1:length(NCOUPON)){
data$CASHFLOWS$DATE <- c(data$CASHFLOWS$DATE,paste(rawdata[i,"COUPONDATE"],as.numeric(substring(NEXTCOUPON[i],5,8)) + seq(0,NCOUPON[i]),sep=""))
}
data$CASHFLOWS$DATE <- as.Date(data$CASHFLOWS$DATE,format="%d%m%Y")
data
}
dslist[[i]] <- datastreamlist(rawdata, TODAY, AC, CP)
}
dslist
}
datafiles = c("france S.csv", "france AC.csv", "france CP.csv")
govbondsts <- importdatastream(datafiles)
class(govbondsts)="couponbonds"
#class(govbondsts)="dyncouponbonds"
ns_res <- estim_nss(govbondsts, c("FRANCE"), matrange="all" ,method = "ns", tauconstr = list(c(0.2, 7, 0.2)), optimtype = "allglobal")
NEW WORKING R CODE:
if (!("termstrc" %in% installed.packages())) install.packages("termstrc")
library(termstrc)
datafiles = c("france S.csv", "france AC.csv", "france CP.csv")
rawdata <- read.csv(datafiles[1], dec=".", sep=",", colClasses = "character")
rawdata_AC <- read.csv(datafiles[2], dec=".",sep=",",colClasses = "character", header=TRUE, check.names=FALSE)
rawdata_CP <- read.csv(datafiles[3], dec=".",sep=",",colClasses = "character", header=TRUE, check.names=FALSE)
DATES <- as.character(as.Date(names(rawdata_AC)[-(1:2)], format="%d.%m.%y"))
DATES <- paste(substring(DATES,9,10),substring(DATES,6,7),substring(DATES,1,4),sep="")
AC <- rawdata[,"ACCRUED"]
CP <- rawdata[,"PRICE"]
dslist <- list()
for(i in 1:length(DATES)) {
TODAY <- DATES[i]
AC <- rawdata_AC[-(1:2)][,i]
CP <- rawdata_CP[-(1:2)][,i]
datastreamlist <- function(rawdata, TODAY, AC, CP){
data <- list()
data$ISIN <- rawdata[,"ISIN"]
data$MATURITYDATE <- as.Date(rawdata[,"MATURITYDATE"],format="%d%m%Y")
data$ISSUEDATE <- as.Date(rawdata[,"ISSUEDATE"],format="%d%m%Y")
data$COUPONRATE <- as.numeric(rawdata[,"COUPONRATE"])
data$PRICE <- as.numeric(CP)
data$ACCRUED <- as.numeric(AC)
data$CASHFLOWS <- list()
data$TODAY <- as.Date(TODAY,format="%d%m%Y")
NEXTCOUPON <- ifelse(as.Date(paste(rawdata[,"COUPONDATE"],substring(TODAY,5,8),sep=""),format="%d%m%Y") > data$TODAY,
paste(rawdata[,"COUPONDATE"],substring(TODAY,5,8),sep=""),
paste(rawdata[,"COUPONDATE"],as.character(as.numeric(substring(TODAY,5,8))+1),sep=""))
NCOUPON <- as.numeric(substring(rawdata[,"MATURITYDATE"],5,8)) - as.numeric(substring(NEXTCOUPON,5,8))
# cash flows ISIN
data$CASHFLOWS$ISIN <- vector()
for(i in 1:length(NCOUPON)){
data$CASHFLOWS$ISIN <- c(data$CASHFLOWS$ISIN,rep(data$ISIN[i],NCOUPON[i]+1))
}
# cash flows
data$CASHFLOWS$CF <- vector()
for(i in 1:length(NCOUPON)){
data$CASHFLOWS$CF <- c(data$CASHFLOWS$CF,c(rep(data$COUPONRATE[i]*100,NCOUPON[i]),100+data$COUPONRATE[i]*100))
}
# cash flow dates
data$CASHFLOWS$DATE <- vector()
for(i in 1:length(NCOUPON)){
data$CASHFLOWS$DATE <- c(data$CASHFLOWS$DATE,paste(rawdata[i,"COUPONDATE"],as.numeric(substring(NEXTCOUPON[i],5,8)) + seq(0,NCOUPON[i]),sep=""))
}
data$CASHFLOWS$DATE <- as.Date(data$CASHFLOWS$DATE,format="%d%m%Y")
data
}
dslist[[i]] <- list(FRANCE=datastreamlist(rawdata, TODAY, AC, CP))
class(dslist[[i]]) <- "couponbonds"
}
class(dslist) <- "dyncouponbonds"
dl_res <- estim_nss(dslist, c("FRANCE"), method = "dl", lambda = 1/3)
plot(dl_res)

Related

Requires numeric/complex matric/vector argument

So I have a large data set that I have imported and split up. I've made sure to attach everything and tried to run a code to determine the number of breakpoints using AIC.
rm(list=ls())
library(Matching)
library(segmented)
dinosaurs=read.csv("C:/Users/user/Desktop/NEW PLOTS FOR DINOS/centrum_input_fin.csv")
attach(dinosaurs)
names(dinosaurs)
dino_names <- names(dinosaurs)
#NEED TO EXPORT FILES (EXPORT THE ALL_DATA_PLUS_SORTED OUT)
all_data_plus_sorted<-NULL
for(j in 1:length(dino_names))
{
with_gaps<-eval(parse(text = dino_names[j]))
gaps <- which(is.na(with_gaps))
non_gaps <-which(1:length(with_gaps) %in%gaps==FALSE)
sorted_without_gaps <- sort(with_gaps[!is.na(with_gaps)],decreasing=TRUE)
ordered_with_gaps<-rep(NA,length(with_gaps))
for(k in 1:length(non_gaps))
{
ordered_with_gaps[non_gaps[k]] <- sorted_without_gaps[k]
}
to_export<-cbind(with_gaps,ordered_with_gaps)
colnames(to_export)<-c(paste(dino_names[j],"_actual_with_gaps",sep=""),paste(dino_names[j],"_ordered_with_gaps",sep=""))
all_data_plus_sorted<- cbind(all_data_plus_sorted,to_export)
}
all_data_plus_sorted
attach(as.data.frame(all_data_plus_sorted))
print(dinosaurs)
detach(as.data.frame(all_data_plus_sorted))
detach(dinosaurs)
#split species
Dyoplosaurus_acutosquameus_ROM734 <- Dyoplosaurus_acutosquameus_ROM734[!is.na(Dyoplosaurus_acutosquameus_ROM734)]
Staurikosaurus_pricei <- Staurikosaurus_pricei[!is.na(Staurikosaurus_pricei)]
Opistocoelocaudia_skarzynskii <- Opistocoelocaudia_skarzynskii[!is.na(Opistocoelocaudia_skarzynskii)]
Stegosaurus_stenops._NHMUKPVR36730 <- Stegosaurus_stenops._NHMUKPVR36730[!is.na(Stegosaurus_stenops._NHMUKPVR36730)]
Giraffatitan_brancai <- Giraffatitan_brancai[!is.na(Giraffatitan_brancai)]
Camptosaurus <- Camptosaurus[!is.na(Camptosaurus)]
Camptosaurus_prestwichii <- Camptosaurus_prestwichii[!is.na(Camptosaurus_prestwichii)]
A_greppini <- A_greppini[!is.na(A_greppini)]
Astrophocaudia_slaughteri_SMU61732 <- Astrophocaudia_slaughteri_SMU61732[!is.na(Astrophocaudia_slaughteri_SMU61732)]
Tastavinsaurus_sanzi_gen_MPZ999 <- Tastavinsaurus_sanzi_gen_MPZ999[!is.na(Tastavinsaurus_sanzi_gen_MPZ999)]
MOZ_Pv1221 <- MOZ_Pv1221[!is.na(MOZ_Pv1221)]
Mamenchisaurus <- Mamenchisaurus[!is.na(Mamenchisaurus)]
Bromtosaurus_CMNo3018 <- Bromtosaurus_CMNo3018[!is.na(Bromtosaurus_CMNo3018)]
Lufengosaurus_Hueni <- Lufengosaurus_Hueni[!is.na(Lufengosaurus_Hueni)]
Mamenchisaurus_hochuanensi <- Mamenchisaurus_hochuanensi[!is.na(Mamenchisaurus_hochuanensi)]
Spinosaurus_FSACKK11888 <- Spinosaurus_FSACKK11888[!is.na(Spinosaurus_FSACKK11888)]
Buitreraptor_MPCNPV370 <- Buitreraptor_MPCNPV370[!is.na(Buitreraptor_MPCNPV370)]
Buitreraptor_MPCA245 <- Buitreraptor_MPCA245[!is.na(Buitreraptor_MPCA245)]
Huabeisaurus_allocotus_HBV20001 <- Huabeisaurus_allocotus_HBV20001[!is.na(Huabeisaurus_allocotus_HBV20001)]
Tethyshadros_insularis_SC57021 <- Tethyshadros_insularis_SC57021[!is.na(Tethyshadros_insularis_SC57021)]
Compsognathus_longipes_CNJ79 <- Compsognathus_longipes_CNJ79[!is.na(Compsognathus_longipes_CNJ79)]
Archaeopteryx12 <- Archaeopteryx12[!is.na(Archaeopteryx12)]
Sinosauropteryx_NIGP127586 <- Sinosauropteryx_NIGP127586[!is.na(Sinosauropteryx_NIGP127586)]
Sinosauropteryx_NIGP_127587 <- Sinosauropteryx_NIGP_127587[!is.na(Sinosauropteryx_NIGP_127587)]
Tetonosaurus_tilletti_AMNH3040 <- Tetonosaurus_tilletti_AMNH3040[!is.na(Tetonosaurus_tilletti_AMNH3040)]
Bambiraptor_feinbergi_FIP001 <- Bambiraptor_feinbergi_FIP001[!is.na(Bambiraptor_feinbergi_FIP001)]
Seimosaurus.halli_NMMNH3690 <- Seimosaurus.halli_NMMNH3690[!is.na(Seimosaurus.halli_NMMNH3690)]
Diluvicursor_pickeringi_NMVP221080 <- Diluvicursor_pickeringi_NMVP221080[!is.na(Diluvicursor_pickeringi_NMVP221080)]
Zhejiungosuurus_lishuiensis_ZMNHM8718 <- Zhejiungosuurus_lishuiensis_ZMNHM8718[!is.na(Zhejiungosuurus_lishuiensis_ZMNHM8718)]
Tianyulong_confuciusi_STMN.263 <- Tianyulong_confuciusi_STMN.263[!is.na(Tianyulong_confuciusi_STMN.263)]
Lusotitan_atalaiensis <- Lusotitan_atalaiensis[!is.na(Lusotitan_atalaiensis)]
Nemegtonykus_citus_MPCD100203 <- Nemegtonykus_citus_MPCD100203[!is.na(Nemegtonykus_citus_MPCD100203)]
Elaphrosaurus_bambergi_MBR4960 <- Elaphrosaurus_bambergi_MBR4960[!is.na(Elaphrosaurus_bambergi_MBR4960)]
Nomingia_gobiensis_GIN100119 <- Nomingia_gobiensis_GIN100119[!is.na(Nomingia_gobiensis_GIN100119)]
Nomingia_gobiensis_MPCD100119 <- Nomingia_gobiensis_MPCD100119[!is.na(Nomingia_gobiensis_MPCD100119)]
Chirostenotes_pergracilis <- Chirostenotes_pergracilis[!is.na(Chirostenotes_pergracilis)]
Seismosaurus_hallorum_NMMNHP3690 <- Seismosaurus_hallorum_NMMNHP3690[!is.na(Seismosaurus_hallorum_NMMNHP3690)]
Heterodontosaurus_tucki_SAMPKK1332 <- Heterodontosaurus_tucki_SAMPKK1332[!is.na(Heterodontosaurus_tucki_SAMPKK1332)]
Jianianhualong_tengi_DLXH1218 <- Jianianhualong_tengi_DLXH1218[!is.na(Jianianhualong_tengi_DLXH1218)]
Yinlong_downsi_IVPPV18685 <- Yinlong_downsi_IVPPV18685[!is.na(Yinlong_downsi_IVPPV18685)]
Neimongosaurus_yangi_LHV0001 <- Neimongosaurus_yangi_LHV0001[!is.na(Neimongosaurus_yangi_LHV0001)]
Magnapaulia_laticaudus_LACM17715 <- Magnapaulia_laticaudus_LACM17715[!is.na(Magnapaulia_laticaudus_LACM17715)]
Ouranosaurus_nigeriensis <- Ouranosaurus_nigeriensis[!is.na(Ouranosaurus_nigeriensis)]
Dreadnoughtus_schrani_MPMPV1156 <- Dreadnoughtus_schrani_MPMPV1156[!is.na(Dreadnoughtus_schrani_MPMPV1156)]
Pectodens_zhenyuensis_IVPPV18578 <- Pectodens_zhenyuensis_IVPPV18578[!is.na(Pectodens_zhenyuensis_IVPPV18578)]
Dilophosaurus_wetherilli <- Dilophosaurus_wetherilli[!is.na(Dilophosaurus_wetherilli)]
Gobihadros_mongoliensis_MPCD100746 <- Gobihadros_mongoliensis_MPCD100746[!is.na(Gobihadros_mongoliensis_MPCD100746)]
Gobihadros_mongoliensis_MPCD100755 <- Gobihadros_mongoliensis_MPCD100755[!is.na(Gobihadros_mongoliensis_MPCD100755)]
Auroraceratops_rugosus_GJ07913 <- Auroraceratops_rugosus_GJ07913[!is.na(Auroraceratops_rugosus_GJ07913)]
Patagotitan_mayorum_MPEFPV <- Patagotitan_mayorum_MPEFPV[!is.na(Patagotitan_mayorum_MPEFPV)]
Eoraptor_lunensi_PVSJ512 <- Eoraptor_lunensi_PVSJ512[!is.na(Eoraptor_lunensi_PVSJ512)]
Corythosaurus_casuarius <- Corythosaurus_casuarius[!is.na(Corythosaurus_casuarius)]
Caihong._Juji_PMoLB00175 <- Caihong._Juji_PMoLB00175[!is.na(Caihong._Juji_PMoLB00175)]
Eosinopteryx_brevipenna_YFGPT5197 <- Eosinopteryx_brevipenna_YFGPT5197[!is.na(Eosinopteryx_brevipenna_YFGPT5197)]
Rahonavis_ostromi_UA8656 <- Rahonavis_ostromi_UA8656[!is.na(Rahonavis_ostromi_UA8656)]
Changyuraptor_yangi_HGB016 <- Changyuraptor_yangi_HGB016[!is.na(Changyuraptor_yangi_HGB016)]
Herrerasaurus_ischigualastensis_PVL2566 <- Herrerasaurus_ischigualastensis_PVL2566[!is.na(Herrerasaurus_ischigualastensis_PVL2566)]
Herrerasaurus_ischigualastensis_UNSJ53 <- Herrerasaurus_ischigualastensis_UNSJ53[!is.na(Herrerasaurus_ischigualastensis_UNSJ53)]
Ischioceratops_zhuchengensis <- Ischioceratops_zhuchengensis[!is.na(Ischioceratops_zhuchengensis)]
Koreaceratops_hwaseongensis <- Koreaceratops_hwaseongensis[!is.na(Koreaceratops_hwaseongensis)]
# CHOOSE SAMPLE TO ANALYSE
#_________________________________________________________________________________________________
# choose sample
name_to_test <- "Koreaceratops_hwaseongensis"
y_val <- eval(parse(text = paste(name_to_test,"_actual_with_gaps",sep="")))
x_val<-1:length(y_val)
# USE AIC TO DECIDE HOW MANY BREAKS TO USE
#_________________________________________________________________________________________________
# extract AIC for models with 1-3 breakpoints
my_max_it=10
all_mods<-NULL
for(h in 1:4)
{
mod1<-segmented(lm(y_val~x_val),seg.Z=~x_val,psi=NA,control=seg.control(K=h,quant=TRUE,it.max=my_max_it),model=TRUE,nboot=50)
all_mods<-rbind(all_mods,c(h,extractAIC(mod1)[2]))
}
all_mods
my_K<-subset(all_mods,all_mods[,2]==min(all_mods[,2]))[1]
When i run the last section of the code i get the error Error in
crossprod(x, y) :
requires numeric/complex matrix/vector arguments
Not too sure why because I have put it in a data frame, is it because I'm importing the file incorrectly? Not sure how to fix.

Error data must be of a vector type was null R

I don't know what I'm doing wrong. On Rstudio cloud everything works but in rstudio deskop appears an error.
myData <- read.csv('C:/Users/iwona/Desktop/myData.csv', header = TRUE)
print('wpisz numer województwa, dla którego zostanie przygotowana anliza')
h1 <- readline(prompt="0-Dolnoslaskie, 1-Kujawsko-Pomorskie, 2-Lubelskie, 3-Lubuskie, 4-Lodzkie,
5-Malopolskie, 6-Mazowieckie, 7-Opolskie, 8-Podkarpackie, 9-Podlaskie, 10-Pomorskie,
11-Slaskie, 12-Swietokrzyskie, 13-Warminsko-Mazurskie, 14-Wielkopolskie, 15-Zachodniopomorskie")
h <- as.numeric(h1)
months_prices <- vector()
prices <- vector()
dataSummary <- vector()
regions <- as.character(myData$Nazwa[5:20])
years <- 2006:2019
services <- vector()
#regions <- dataframe
serviceNames <- c('Ryz','Mieso wolowe bez kosci','Kurczeta patroszone','Kielbasa suszona','Ser dojrzewajacy','Herbata czarna, lisciasta','Podkoszulek meski bawelniany, bez rekawa','Oczyszczanie chemiczne garnituru meskiego 2-czesciowego','Wegiel kamienny','Olej napedowy')
#for(h in 0:15) # h for regions
#h <- 5
par(mfrow=c(5,2))
cumulative <- data.frame(years=integer(), prices=logical(), services=logical())
for(i in 0:9) # i for services
{
for (j in 0:13) # j for years 2006-2019
{
for(k in 0:11) #k for months
{
months_prices[k+1] <- as.numeric(as.matrix(myData[5+h,4+j+(i*14)+(k*140)])[1,])
}
prices[j+1] <- mean(months_prices)
services[j+1] <- serviceNames[i+1]
months_prices <- vector() # empty
}
pricesForService.data <- data.frame(years, prices, services)
cumulative <- rbind(cumulative, pricesForService.data)
tmp <- cumulative[cumulative$services == serviceNames[i+1], ]
plot(tmp$years, tmp$prices,
main = serviceNames[i+1],
xlab="Lata", ylab="Ceny",
type = "l",
col = "blue")
mtext(regions[h+1], side = 3, line = -1.5, outer = TRUE)
}
I get the following error:
Error in array(x, c(length(x), 1L), if (!is.null(names(x)))
list(names(x), : 'data' must be of a vector type, was 'NULL'
Thank for your help

R apply function to a list of price data

in the code below I am grabbing data for three symbols, then I want to apply a simple function(which is a trading strategy) to this data. Ideally, I would then run statistics on those returns, such as those native to PerformanceAnalytics.
library("quantmod")
library("PerformanceAnalytics")
options(scipen=999)
PriceData <- new.env()
Symbols <- c("SPY", "QQQ", "IWM")
StartDt <- as.Date("2015-01-01")
suppressWarnings(getSymbols(Symbols, src="yahoo", env=PriceData, from=StartDt))
x <- list()
for (i in 1:length(Symbols)) {
x[[i]] <- get(Symbols[i], pos=PriceData)
}
SYSTEM <- function(data){
ret<- Delt(Cl(x[[i]]),Op(x[[i]]),type = 'arithmetic')
mavga <- SMA(Cl(x[[i]]), n=10)
mavgb <- SMA(Cl(x[[i]]), n=20)
sig <- ifelse(mavga>mavgb,1,0)+ifelse(mavgb>mavga,-1,0)
sig <- lag(sig,1)
sig[is.na(sig)]=0
strategyreturn <- sig * ret
return(strategyreturn)
}
######I'm doing something wrong here######
y <- lappy(x,SYSTEM)
z <- do.call("cbind", y)
PerformanceTable <- function(returns){
scalar <- 252
CS <- t(Return.cumulative(returns, geometric = FALSE))
SR <- t(SharpeRatio.annualized(returns, scale=scalar, geometric = FALSE))
SOR <- t(SortinoRatio(returns))
MDD <- t(maxDrawdown(returns))
WP <- colSums(returns > 0)/colSums(returns != 0)
WP <- as.data.frame(WP)
ASD = t(sd.annualized(returns, scale=scalar))
Stat <- cbind(CS,SR,SOR,MDD,ASD,WP)
colnames(Stat) <- c("Profit","SharpeRatio", "Sortino","MaxDrawdown", "AnnStdDev", "WinPct")
print("Performance Table")
print(Stat)
return (Stat)
}
Perf <- PerformanceTable(y)
You are almost there, just replace x[[i]] by data in your function:
SYSTEM <- function(data){
ret<- Delt(Cl(data),Op(data),type = 'arithmetic')
mavga <- SMA(Cl(data), n=10)
mavgb <- SMA(Cl(data), n=20)
sig <- ifelse(mavga>mavgb,1,0)+ifelse(mavgb>mavga,-1,0)
sig <- lag(sig,1)
sig[is.na(sig)]=0
strategyreturn <- sig * ret
return(strategyreturn)
}
and add a l to lappy()
y <- lapply(x,SYSTEM)
z <- do.call("cbind", y)

improve execution time in a for loop (R)

Here is a reproducible example of a for loop. Since I want to do 3000 iterations and my matrix is way bigger than this reproducible example, the computer crushes. Any ideas on how can I do it?
I have read that loops are not reccomended in R and instead the web proposes to use vectors and apply functions but I wasn't able to build my formula with that functions...
The matrix:
row.names <- c('A2003','B2010','C2011','D2010','E2001','F2005','F2009','G2003','G2007','H2004','H2010')
sp1 <- c(4,83,1,2,4,3,1,5,7,2,4)
sp2 <- c(5,0,2,3,10,5,0,2,4,3,1)
sp3 <- c(7,2,4,8,7,2,4,83,1,5,7)
sp4 <- c(0,2,4,2,4,12,1,5,7,2,4)
Site <- c('A','B','C','D','E','F','F','G','G','H','H')
Year <- c('2003','2010','2011','2010','2001','2005','2009','2003','2007','2004','2010')
Obs <- c(1,1,1,4,9,6,8,2,5,2,3)
ID <- c('A2003','B2010','C2011','D2010','E2001','F2005','F2009','G2003','G2007','H2004','H2010')
df<- data.frame(row.names, sp1, sp2, sp3, sp4, Site, Year, Obs, ID)
rownames(df) <- df[,1]
df[,1] <- NULL
df
df.1 <- subset(df, Obs == 1)
df.more <- subset(df, Obs >= 2)
df.1
df.more
The loop function:
require (vegan)
iterations <- 3000
out <- vector("list", iterations)
for(i in 1:iterations){
rnd.more <- do.call(rbind, lapply(split(df.more, df.more$Site),
function(df.more) df.more[sample(nrow(df.more), 1,replace=FALSE) , ])
)
rnd.df <- rbind(df.1,rnd.more)
rnd.df.bc <- as.matrix(vegdist(rnd.df[1:4], method="bray"))
rnd.df.bc[lower.tri(rnd.df.bc,diag=TRUE)] <- NA
triang <- rnd.df.bc[!is.na(rnd.df.bc)]
mean.bc <- mean(triang)
out[[i]] <- list(rnd = rnd.df, bc = rnd.df.bc, ave = mean.bc)
}
Extraction of the results:
all.rnd.df <- lapply(out, "[[", "rnd")
capture.output(all.rnd.df,file="all.rnd.df.txt")
all.rnd.df.bc <- lapply(out, "[[", "bc")
capture.output(all.rnd.df.bc,file="all.rnd.df.bc.txt")
all.triang <- lapply(out, "[[", "ave")
capture.output(all.triang,file="all.triang.txt")
precompute the sample indices:
idx <- lapply(1:iterations, function(x) {
tapply(1:nrow(df.more), as.character(df.more$Site), function(y) {
if(length(y) == 1) y else sample(y, 1)
})
})
idx <- lapply(idx, function(ids) c(1:nrow(df.1), ids + nrow(df.1)))
precompute a placeholder data.frame to index
rnd.df <- rbind(df.1, df.more)
now you just index the precomputed objects without having to calculate them every loop:
iterations <- 3000
out <- vector("list", iterations)
for(i in 1:iterations){
rnd.df.bc <- as.matrix(vegdist(rnd.df[idx[[i]] ,1:4], method="bray"))
rnd.df.bc[lower.tri(rnd.df.bc,diag=TRUE)] <- NA
triang <- rnd.df.bc[!is.na(rnd.df.bc)]
mean.bc <- mean(triang)
out[[i]] <- list(rnd = rnd.df, bc = rnd.df.bc, ave = mean.bc)
}
Benchmarks:
f1 = my method
f2 = OPs code
> microbenchmark(f1(), f2(), times=5L)
Unit: seconds
expr min lq mean median uq max neval
f1() 2.21069 4.877017 4.666875 5.27416 5.444411 5.528096 5
f2() 13.54813 13.554965 19.500247 14.51089 27.074520 28.812732 5
Make it parallel:
cl <- makeCluster(3)
registerDoSNOW(cl)
out <- foreach(i = 1:iterations, .packages=c('vegan')) %do%
{
rnd.df.bc <- as.matrix(vegdist(rnd.df[idx[[i]] ,1:4], method="bray"))
rnd.df.bc[lower.tri(rnd.df.bc,diag=TRUE)] <- NA
triang <- rnd.df.bc[!is.na(rnd.df.bc)]
mean.bc <- mean(triang)
list(rnd = rnd.df, bc = rnd.df.bc, ave = mean.bc)
}
stopCluster(cl)
Not being familiar with the vegan package, I'm a little limited in what advice I can give you. For the most part, you've constructed your for loop well and as you can see below, you don't gain much by converting it to a function and running it through lapply.
I think your best bet is to parallelize your code. In the example below, if you convert your for loop to a function and use parLapply, you can shave a couple seconds off if you include the cluster build time. If you exclude the cluster build time, it's about 5 times faster on my 7 cores. Changes in computational time will vary by the number of cores you can run on. But I think that may be your best option for now.
library(parallel)
library(vegan)
row.names <- c('A2003','B2010','C2011','D2010','E2001','F2005','F2009','G2003','G2007','H2004','H2010')
sp1 <- c(4,83,1,2,4,3,1,5,7,2,4)
sp2 <- c(5,0,2,3,10,5,0,2,4,3,1)
sp3 <- c(7,2,4,8,7,2,4,83,1,5,7)
sp4 <- c(0,2,4,2,4,12,1,5,7,2,4)
Site <- c('A','B','C','D','E','F','F','G','G','H','H')
Year <- c('2003','2010','2011','2010','2001','2005','2009','2003','2007','2004','2010')
Obs <- c(1,1,1,4,9,6,8,2,5,2,3)
ID <- c('A2003','B2010','C2011','D2010','E2001','F2005','F2009','G2003','G2007','H2004','H2010')
df<- data.frame(row.names, sp1, sp2, sp3, sp4, Site, Year, Obs, ID)
rownames(df) <- df[,1]
df[,1] <- NULL
df
df.1 <- subset(df, Obs == 1)
df.more <- subset(df, Obs >= 2)
df.1
df.more
more.fun <- function(df.more, df.1)
{
rnd.more <- do.call(rbind, lapply(split(df.more, df.more$Site),
function(df.more) df.more[sample(nrow(df.more), 1,replace=FALSE) , ])
)
rnd.df <- rbind(df.1,rnd.more)
rnd.df.bc <- as.matrix(vegdist(rnd.df[1:4], method="bray"))
rnd.df.bc[lower.tri(rnd.df.bc,diag=TRUE)] <- NA
triang <- rnd.df.bc[!is.na(rnd.df.bc)]
mean.bc <- mean(triang)
list(rnd = rnd.df, bc = rnd.df.bc, ave = mean.bc)
}
start.orig <- Sys.time()
set.seed(pi)
iterations <- 3000
out <- vector("list", iterations)
for(i in 1:iterations){
rnd.more <- do.call(rbind, lapply(split(df.more, df.more$Site),
function(df.more) df.more[sample(nrow(df.more), 1,replace=FALSE) , ])
)
rnd.df <- rbind(df.1,rnd.more)
rnd.df.bc <- as.matrix(vegdist(rnd.df[1:4], method="bray"))
rnd.df.bc[lower.tri(rnd.df.bc,diag=TRUE)] <- NA
triang <- rnd.df.bc[!is.na(rnd.df.bc)]
mean.bc <- mean(triang)
out[[i]] <- list(rnd = rnd.df, bc = rnd.df.bc, ave = mean.bc)
}
end.orig <- Sys.time()
start.apply <- Sys.time()
fn = out <- lapply(1:3000, function(i) more.fun(df.more, df.1))
end.apply <- Sys.time()
start.parallel <- Sys.time()
cl <- makeCluster(7)
clusterEvalQ(cl, library(vegan))
clusterExport(cl, c("df.more", "df.1", "more.fun"))
start.parallel.apply <- Sys.time()
out <- parLapply(cl, 1:3000, function(i) more.fun(df.more, df.1))
end.parallel <- Sys.time()
#* Compare times
end.orig - start.orig
end.apply - start.apply
end.parallel - start.parallel
end.parallel - start.parallel.apply
(the time comparisons here are pretty crude)

Using RStudio when I click on Run it executes properly but use source and it gives me a error message

I have the following code which optimizes and is suppose to export the results to a csv. All works well but the last line of code
write.csv(solutionsMatrix, file="SLATE/DK.A1.CSV")
When i click on Run it executes correctly but when i click on Source it gives me a error message
Error in source("~/.active-rstudio-document", echo = TRUE) :
~/.active-rstudio-document:76:34: unexpected symbol
75:
76: write.csv(solutionsMatrix, file="SLATE
With a up arrow point at the S in SLATE
Why is it doing this?? Thanks for looking
setwd("C:/DFS/NFL/R DATA")
library('lpSolve')
data <-read.csv ("DRAFTKINGS.CSV")
#Convert salary to numeric
data$Salary <- as.numeric(gsub(",","",data$Salary), data$Salary)
data$MAX.TEAM <- as.numeric(gsub(",","",data$MaxTeam), data$MaxTeam)
#Add binary valeus for positions 'Constraint Vectors'
data <- cbind(data, X=1)
data <- cbind(data, QB=ifelse(data$Pos=="QB",1,0))
data <- cbind(data, RB=ifelse(data$Pos=="RB", 1,0))
data <- cbind(data, WR=ifelse(data$Pos=="WR", 1,0))
data <- cbind(data, TE=ifelse(data$Pos=="TE", 1,0))
data <- cbind(data, DEF=ifelse(data$Pos=="DEF", 1,0))
#Objective Function. sum of proj pts
f.obj <- data$DKA1
#Constraints
num_X <- 9
num_QB <- 1
min_RB <- 2
min_WR <- 3
min_TE <-1
num_DEF <-1
max_RB <- 3
max_WR <-4
max_TE <-2
max_team_cost <- 50000
max_player_from_a_team <-data [2, 5]
#Constraints for max players from team
clubs <- sort(unique(data$Team))
team_constraint_vector <- c()
team_constraint_dir <- c()
team_constraint_rhs <- c()
for(i in 1:length(clubs)){
temp <- data$Team==as.character(clubs[i])
temp[temp==T] <- 1
temp[temp==F] <- 0
team_constraint_vector <- c(team_constraint_vector, temp)
team_constraint_dir <- c(team_constraint_dir, "<=")
team_constraint_rhs <- c(team_constraint_rhs, max_player_from_a_team)
}
solutions <- list()
solutionsMatrix <- matrix(, nrow=0, ncol=21)
for(i in 1:1){
f.con <- matrix (c(data$X,data$QB , data$RB ,data$RB ,data$WR, data$WR,data$TE, data$TE,data$DEF , data$Salary, team_constraint_vector), nrow=(10+length(clubs)), byrow=TRUE)
f.dir <- c("=","=", ">=","<=",">=","<=",">=","<=", "=", "<=", team_constraint_dir)
f.rhs <- c(num_X,num_QB,min_RB,max_RB,min_WR, max_WR,min_TE, max_TE, num_DEF, max_team_cost, team_constraint_rhs)
x <- lp ("max", f.obj, f.con, f.dir, f.rhs, all.bin=TRUE)
x
solutions[[i]] <- data[which(x$solution==1),]
solutionsMatrix <- rbind(solutionsMatrix, c("DKA1, sum(solutions[[i]]$DKA1), sum(solutions[[i]]$Salary), toString(solutions[[i]]$Name[4]),toString(solutions[[i]]$Pos[4]), toString(solutions[[i]]$Name[5]),toString(solutions[[i]]$Pos[5]), toString(solutions[[i]]$Name[8]),toString(solutions[[i]]$Pos[8]), toString(solutions[[i]]$Name[9]),toString(solutions[[i]]$Pos[9]), toString(solutions[[i]]$Name[6]),toString(solutions[[i]]$Pos[6]), toString(solutions[[i]]$Name[7]), toString(solutions[[i]]$Pos[7]),toString(solutions[[i]]$Name[2]),toString(solutions[[i]]$Pos[2]), toString(solutions[[i]]$Name[3]),toString(solutions[[i]]$Pos[3]), toString(solutions[[i]]$Name[1]),toString(solutions[[i]]$Pos[1])))
}
solutions[[1]]
#Solutions Matrix Optimization
solutionsMatrix
write.csv(solutionsMatrix, file="SLATE/DK.A1.CSV")
###################################################################

Resources