termstrc datastream import estim_nss dyncouponbonds - r
I am trying to using termstrc to calculate a time series of nelson siegal parm estimates. I am trying to get the code below to run, but I don't think I have the data in the right format. I'm not sure how to get the data to be organised as a "dyncouponbonds" class properly. The error I get is:
Error in (pos_cf[i] + 1):pos_cf[i + 1] : NA/NaN argument
In addition: Warning message:
In max(n_of_cf) : no non-missing arguments to max; returning -Inf
Apologies if I have not asked in the correct format etc. First time using stackoverflow
importdatastream <- function(datafiles){
#datafiles = c("france S.csv", "france AC.csv", "france CP.csv")
rawdata <- read.csv(datafiles[1], dec=".",sep=",",colClasses = "character")
rawdata_AC <- read.csv(datafiles[2], dec=".",sep=",",colClasses = "character", header=TRUE, check.names=FALSE)
rawdata_CP <- read.csv(datafiles[3], dec=".",sep=",",colClasses = "character", header=TRUE, check.names=FALSE)
DATES <- as.character(as.Date(names(rawdata_AC)[-(1:2)], format="%d.%m.%y"))
DATES <- paste(substring(DATES,9,10),substring(DATES,6,7),substring(DATES,1,4),sep="")
AC <- rawdata[,"ACCRUED"]
CP <- rawdata[,"PRICE"]
dslist <- list()
for(i in 1:length(DATES)){
TODAY <- DATES[i]
AC <- rawdata_AC[-(1:2)][,i]
CP <- rawdata_CP[-(1:2)][,i]
datastreamlist <- function(rawdata, TODAY, AC, CP){
data <- list()
data$ISIN <- rawdata[,"ISIN"]
data$MATURITYDATE <- as.Date(rawdata[,"MATURITYDATE"],format="%d%m%Y")
data$ISSUEDATE <- as.Date(rawdata[,"ISSUEDATE"],format="%d%m%Y")
data$COUPONRATE <- as.numeric(rawdata[,"COUPONRATE"])
data$PRICE <- as.numeric(CP)
data$ACCRUED <- as.numeric(AC)
data$CASHFLOWS <- list()
data$TODAY <- as.Date(TODAY,format="%d%m%Y")
NEXTCOUPON <- ifelse(as.Date(paste(rawdata[,"COUPONDATE"],substring(TODAY,5,8),sep=""),format="%d%m%Y") > data$TODAY,
paste(rawdata[,"COUPONDATE"],substring(TODAY,5,8),sep=""),
paste(rawdata[,"COUPONDATE"],as.character(as.numeric(substring(TODAY,5,8))+1),sep=""))
NCOUPON <- as.numeric(substring(rawdata[,"MATURITYDATE"],5,8)) - as.numeric(substring(NEXTCOUPON,5,8))
# cash flows ISIN
data$CASHFLOWS$ISIN <- vector()
for(i in 1:length(NCOUPON)){
data$CASHFLOWS$ISIN <- c(data$CASHFLOWS$ISIN,rep(data$ISIN[i],NCOUPON[i]+1))
}
# cash flows
data$CASHFLOWS$CF <- vector()
for(i in 1:length(NCOUPON)){
data$CASHFLOWS$CF <- c(data$CASHFLOWS$CF,c(rep(data$COUPONRATE[i]*100,NCOUPON[i]),100+data$COUPONRATE[i]*100))
}
# cash flow dates
data$CASHFLOWS$DATE <- vector()
for(i in 1:length(NCOUPON)){
data$CASHFLOWS$DATE <- c(data$CASHFLOWS$DATE,paste(rawdata[i,"COUPONDATE"],as.numeric(substring(NEXTCOUPON[i],5,8)) + seq(0,NCOUPON[i]),sep=""))
}
data$CASHFLOWS$DATE <- as.Date(data$CASHFLOWS$DATE,format="%d%m%Y")
data
}
dslist[[i]] <- datastreamlist(rawdata, TODAY, AC, CP)
}
dslist
}
datafiles = c("france S.csv", "france AC.csv", "france CP.csv")
govbondsts <- importdatastream(datafiles)
class(govbondsts)="couponbonds"
#class(govbondsts)="dyncouponbonds"
ns_res <- estim_nss(govbondsts, c("FRANCE"), matrange="all" ,method = "ns", tauconstr = list(c(0.2, 7, 0.2)), optimtype = "allglobal")
NEW WORKING R CODE:
if (!("termstrc" %in% installed.packages())) install.packages("termstrc")
library(termstrc)
datafiles = c("france S.csv", "france AC.csv", "france CP.csv")
rawdata <- read.csv(datafiles[1], dec=".", sep=",", colClasses = "character")
rawdata_AC <- read.csv(datafiles[2], dec=".",sep=",",colClasses = "character", header=TRUE, check.names=FALSE)
rawdata_CP <- read.csv(datafiles[3], dec=".",sep=",",colClasses = "character", header=TRUE, check.names=FALSE)
DATES <- as.character(as.Date(names(rawdata_AC)[-(1:2)], format="%d.%m.%y"))
DATES <- paste(substring(DATES,9,10),substring(DATES,6,7),substring(DATES,1,4),sep="")
AC <- rawdata[,"ACCRUED"]
CP <- rawdata[,"PRICE"]
dslist <- list()
for(i in 1:length(DATES)) {
TODAY <- DATES[i]
AC <- rawdata_AC[-(1:2)][,i]
CP <- rawdata_CP[-(1:2)][,i]
datastreamlist <- function(rawdata, TODAY, AC, CP){
data <- list()
data$ISIN <- rawdata[,"ISIN"]
data$MATURITYDATE <- as.Date(rawdata[,"MATURITYDATE"],format="%d%m%Y")
data$ISSUEDATE <- as.Date(rawdata[,"ISSUEDATE"],format="%d%m%Y")
data$COUPONRATE <- as.numeric(rawdata[,"COUPONRATE"])
data$PRICE <- as.numeric(CP)
data$ACCRUED <- as.numeric(AC)
data$CASHFLOWS <- list()
data$TODAY <- as.Date(TODAY,format="%d%m%Y")
NEXTCOUPON <- ifelse(as.Date(paste(rawdata[,"COUPONDATE"],substring(TODAY,5,8),sep=""),format="%d%m%Y") > data$TODAY,
paste(rawdata[,"COUPONDATE"],substring(TODAY,5,8),sep=""),
paste(rawdata[,"COUPONDATE"],as.character(as.numeric(substring(TODAY,5,8))+1),sep=""))
NCOUPON <- as.numeric(substring(rawdata[,"MATURITYDATE"],5,8)) - as.numeric(substring(NEXTCOUPON,5,8))
# cash flows ISIN
data$CASHFLOWS$ISIN <- vector()
for(i in 1:length(NCOUPON)){
data$CASHFLOWS$ISIN <- c(data$CASHFLOWS$ISIN,rep(data$ISIN[i],NCOUPON[i]+1))
}
# cash flows
data$CASHFLOWS$CF <- vector()
for(i in 1:length(NCOUPON)){
data$CASHFLOWS$CF <- c(data$CASHFLOWS$CF,c(rep(data$COUPONRATE[i]*100,NCOUPON[i]),100+data$COUPONRATE[i]*100))
}
# cash flow dates
data$CASHFLOWS$DATE <- vector()
for(i in 1:length(NCOUPON)){
data$CASHFLOWS$DATE <- c(data$CASHFLOWS$DATE,paste(rawdata[i,"COUPONDATE"],as.numeric(substring(NEXTCOUPON[i],5,8)) + seq(0,NCOUPON[i]),sep=""))
}
data$CASHFLOWS$DATE <- as.Date(data$CASHFLOWS$DATE,format="%d%m%Y")
data
}
dslist[[i]] <- list(FRANCE=datastreamlist(rawdata, TODAY, AC, CP))
class(dslist[[i]]) <- "couponbonds"
}
class(dslist) <- "dyncouponbonds"
dl_res <- estim_nss(dslist, c("FRANCE"), method = "dl", lambda = 1/3)
plot(dl_res)
Related
Requires numeric/complex matric/vector argument
So I have a large data set that I have imported and split up. I've made sure to attach everything and tried to run a code to determine the number of breakpoints using AIC. rm(list=ls()) library(Matching) library(segmented) dinosaurs=read.csv("C:/Users/user/Desktop/NEW PLOTS FOR DINOS/centrum_input_fin.csv") attach(dinosaurs) names(dinosaurs) dino_names <- names(dinosaurs) #NEED TO EXPORT FILES (EXPORT THE ALL_DATA_PLUS_SORTED OUT) all_data_plus_sorted<-NULL for(j in 1:length(dino_names)) { with_gaps<-eval(parse(text = dino_names[j])) gaps <- which(is.na(with_gaps)) non_gaps <-which(1:length(with_gaps) %in%gaps==FALSE) sorted_without_gaps <- sort(with_gaps[!is.na(with_gaps)],decreasing=TRUE) ordered_with_gaps<-rep(NA,length(with_gaps)) for(k in 1:length(non_gaps)) { ordered_with_gaps[non_gaps[k]] <- sorted_without_gaps[k] } to_export<-cbind(with_gaps,ordered_with_gaps) colnames(to_export)<-c(paste(dino_names[j],"_actual_with_gaps",sep=""),paste(dino_names[j],"_ordered_with_gaps",sep="")) all_data_plus_sorted<- cbind(all_data_plus_sorted,to_export) } all_data_plus_sorted attach(as.data.frame(all_data_plus_sorted)) print(dinosaurs) detach(as.data.frame(all_data_plus_sorted)) detach(dinosaurs) #split species Dyoplosaurus_acutosquameus_ROM734 <- Dyoplosaurus_acutosquameus_ROM734[!is.na(Dyoplosaurus_acutosquameus_ROM734)] Staurikosaurus_pricei <- Staurikosaurus_pricei[!is.na(Staurikosaurus_pricei)] Opistocoelocaudia_skarzynskii <- Opistocoelocaudia_skarzynskii[!is.na(Opistocoelocaudia_skarzynskii)] Stegosaurus_stenops._NHMUKPVR36730 <- Stegosaurus_stenops._NHMUKPVR36730[!is.na(Stegosaurus_stenops._NHMUKPVR36730)] Giraffatitan_brancai <- Giraffatitan_brancai[!is.na(Giraffatitan_brancai)] Camptosaurus <- Camptosaurus[!is.na(Camptosaurus)] Camptosaurus_prestwichii <- Camptosaurus_prestwichii[!is.na(Camptosaurus_prestwichii)] A_greppini <- A_greppini[!is.na(A_greppini)] Astrophocaudia_slaughteri_SMU61732 <- Astrophocaudia_slaughteri_SMU61732[!is.na(Astrophocaudia_slaughteri_SMU61732)] Tastavinsaurus_sanzi_gen_MPZ999 <- Tastavinsaurus_sanzi_gen_MPZ999[!is.na(Tastavinsaurus_sanzi_gen_MPZ999)] MOZ_Pv1221 <- MOZ_Pv1221[!is.na(MOZ_Pv1221)] Mamenchisaurus <- Mamenchisaurus[!is.na(Mamenchisaurus)] Bromtosaurus_CMNo3018 <- Bromtosaurus_CMNo3018[!is.na(Bromtosaurus_CMNo3018)] Lufengosaurus_Hueni <- Lufengosaurus_Hueni[!is.na(Lufengosaurus_Hueni)] Mamenchisaurus_hochuanensi <- Mamenchisaurus_hochuanensi[!is.na(Mamenchisaurus_hochuanensi)] Spinosaurus_FSACKK11888 <- Spinosaurus_FSACKK11888[!is.na(Spinosaurus_FSACKK11888)] Buitreraptor_MPCNPV370 <- Buitreraptor_MPCNPV370[!is.na(Buitreraptor_MPCNPV370)] Buitreraptor_MPCA245 <- Buitreraptor_MPCA245[!is.na(Buitreraptor_MPCA245)] Huabeisaurus_allocotus_HBV20001 <- Huabeisaurus_allocotus_HBV20001[!is.na(Huabeisaurus_allocotus_HBV20001)] Tethyshadros_insularis_SC57021 <- Tethyshadros_insularis_SC57021[!is.na(Tethyshadros_insularis_SC57021)] Compsognathus_longipes_CNJ79 <- Compsognathus_longipes_CNJ79[!is.na(Compsognathus_longipes_CNJ79)] Archaeopteryx12 <- Archaeopteryx12[!is.na(Archaeopteryx12)] Sinosauropteryx_NIGP127586 <- Sinosauropteryx_NIGP127586[!is.na(Sinosauropteryx_NIGP127586)] Sinosauropteryx_NIGP_127587 <- Sinosauropteryx_NIGP_127587[!is.na(Sinosauropteryx_NIGP_127587)] Tetonosaurus_tilletti_AMNH3040 <- Tetonosaurus_tilletti_AMNH3040[!is.na(Tetonosaurus_tilletti_AMNH3040)] Bambiraptor_feinbergi_FIP001 <- Bambiraptor_feinbergi_FIP001[!is.na(Bambiraptor_feinbergi_FIP001)] Seimosaurus.halli_NMMNH3690 <- Seimosaurus.halli_NMMNH3690[!is.na(Seimosaurus.halli_NMMNH3690)] Diluvicursor_pickeringi_NMVP221080 <- Diluvicursor_pickeringi_NMVP221080[!is.na(Diluvicursor_pickeringi_NMVP221080)] Zhejiungosuurus_lishuiensis_ZMNHM8718 <- Zhejiungosuurus_lishuiensis_ZMNHM8718[!is.na(Zhejiungosuurus_lishuiensis_ZMNHM8718)] Tianyulong_confuciusi_STMN.263 <- Tianyulong_confuciusi_STMN.263[!is.na(Tianyulong_confuciusi_STMN.263)] Lusotitan_atalaiensis <- Lusotitan_atalaiensis[!is.na(Lusotitan_atalaiensis)] Nemegtonykus_citus_MPCD100203 <- Nemegtonykus_citus_MPCD100203[!is.na(Nemegtonykus_citus_MPCD100203)] Elaphrosaurus_bambergi_MBR4960 <- Elaphrosaurus_bambergi_MBR4960[!is.na(Elaphrosaurus_bambergi_MBR4960)] Nomingia_gobiensis_GIN100119 <- Nomingia_gobiensis_GIN100119[!is.na(Nomingia_gobiensis_GIN100119)] Nomingia_gobiensis_MPCD100119 <- Nomingia_gobiensis_MPCD100119[!is.na(Nomingia_gobiensis_MPCD100119)] Chirostenotes_pergracilis <- Chirostenotes_pergracilis[!is.na(Chirostenotes_pergracilis)] Seismosaurus_hallorum_NMMNHP3690 <- Seismosaurus_hallorum_NMMNHP3690[!is.na(Seismosaurus_hallorum_NMMNHP3690)] Heterodontosaurus_tucki_SAMPKK1332 <- Heterodontosaurus_tucki_SAMPKK1332[!is.na(Heterodontosaurus_tucki_SAMPKK1332)] Jianianhualong_tengi_DLXH1218 <- Jianianhualong_tengi_DLXH1218[!is.na(Jianianhualong_tengi_DLXH1218)] Yinlong_downsi_IVPPV18685 <- Yinlong_downsi_IVPPV18685[!is.na(Yinlong_downsi_IVPPV18685)] Neimongosaurus_yangi_LHV0001 <- Neimongosaurus_yangi_LHV0001[!is.na(Neimongosaurus_yangi_LHV0001)] Magnapaulia_laticaudus_LACM17715 <- Magnapaulia_laticaudus_LACM17715[!is.na(Magnapaulia_laticaudus_LACM17715)] Ouranosaurus_nigeriensis <- Ouranosaurus_nigeriensis[!is.na(Ouranosaurus_nigeriensis)] Dreadnoughtus_schrani_MPMPV1156 <- Dreadnoughtus_schrani_MPMPV1156[!is.na(Dreadnoughtus_schrani_MPMPV1156)] Pectodens_zhenyuensis_IVPPV18578 <- Pectodens_zhenyuensis_IVPPV18578[!is.na(Pectodens_zhenyuensis_IVPPV18578)] Dilophosaurus_wetherilli <- Dilophosaurus_wetherilli[!is.na(Dilophosaurus_wetherilli)] Gobihadros_mongoliensis_MPCD100746 <- Gobihadros_mongoliensis_MPCD100746[!is.na(Gobihadros_mongoliensis_MPCD100746)] Gobihadros_mongoliensis_MPCD100755 <- Gobihadros_mongoliensis_MPCD100755[!is.na(Gobihadros_mongoliensis_MPCD100755)] Auroraceratops_rugosus_GJ07913 <- Auroraceratops_rugosus_GJ07913[!is.na(Auroraceratops_rugosus_GJ07913)] Patagotitan_mayorum_MPEFPV <- Patagotitan_mayorum_MPEFPV[!is.na(Patagotitan_mayorum_MPEFPV)] Eoraptor_lunensi_PVSJ512 <- Eoraptor_lunensi_PVSJ512[!is.na(Eoraptor_lunensi_PVSJ512)] Corythosaurus_casuarius <- Corythosaurus_casuarius[!is.na(Corythosaurus_casuarius)] Caihong._Juji_PMoLB00175 <- Caihong._Juji_PMoLB00175[!is.na(Caihong._Juji_PMoLB00175)] Eosinopteryx_brevipenna_YFGPT5197 <- Eosinopteryx_brevipenna_YFGPT5197[!is.na(Eosinopteryx_brevipenna_YFGPT5197)] Rahonavis_ostromi_UA8656 <- Rahonavis_ostromi_UA8656[!is.na(Rahonavis_ostromi_UA8656)] Changyuraptor_yangi_HGB016 <- Changyuraptor_yangi_HGB016[!is.na(Changyuraptor_yangi_HGB016)] Herrerasaurus_ischigualastensis_PVL2566 <- Herrerasaurus_ischigualastensis_PVL2566[!is.na(Herrerasaurus_ischigualastensis_PVL2566)] Herrerasaurus_ischigualastensis_UNSJ53 <- Herrerasaurus_ischigualastensis_UNSJ53[!is.na(Herrerasaurus_ischigualastensis_UNSJ53)] Ischioceratops_zhuchengensis <- Ischioceratops_zhuchengensis[!is.na(Ischioceratops_zhuchengensis)] Koreaceratops_hwaseongensis <- Koreaceratops_hwaseongensis[!is.na(Koreaceratops_hwaseongensis)] # CHOOSE SAMPLE TO ANALYSE #_________________________________________________________________________________________________ # choose sample name_to_test <- "Koreaceratops_hwaseongensis" y_val <- eval(parse(text = paste(name_to_test,"_actual_with_gaps",sep=""))) x_val<-1:length(y_val) # USE AIC TO DECIDE HOW MANY BREAKS TO USE #_________________________________________________________________________________________________ # extract AIC for models with 1-3 breakpoints my_max_it=10 all_mods<-NULL for(h in 1:4) { mod1<-segmented(lm(y_val~x_val),seg.Z=~x_val,psi=NA,control=seg.control(K=h,quant=TRUE,it.max=my_max_it),model=TRUE,nboot=50) all_mods<-rbind(all_mods,c(h,extractAIC(mod1)[2])) } all_mods my_K<-subset(all_mods,all_mods[,2]==min(all_mods[,2]))[1] When i run the last section of the code i get the error Error in crossprod(x, y) : requires numeric/complex matrix/vector arguments Not too sure why because I have put it in a data frame, is it because I'm importing the file incorrectly? Not sure how to fix.
Error data must be of a vector type was null R
I don't know what I'm doing wrong. On Rstudio cloud everything works but in rstudio deskop appears an error. myData <- read.csv('C:/Users/iwona/Desktop/myData.csv', header = TRUE) print('wpisz numer województwa, dla którego zostanie przygotowana anliza') h1 <- readline(prompt="0-Dolnoslaskie, 1-Kujawsko-Pomorskie, 2-Lubelskie, 3-Lubuskie, 4-Lodzkie, 5-Malopolskie, 6-Mazowieckie, 7-Opolskie, 8-Podkarpackie, 9-Podlaskie, 10-Pomorskie, 11-Slaskie, 12-Swietokrzyskie, 13-Warminsko-Mazurskie, 14-Wielkopolskie, 15-Zachodniopomorskie") h <- as.numeric(h1) months_prices <- vector() prices <- vector() dataSummary <- vector() regions <- as.character(myData$Nazwa[5:20]) years <- 2006:2019 services <- vector() #regions <- dataframe serviceNames <- c('Ryz','Mieso wolowe bez kosci','Kurczeta patroszone','Kielbasa suszona','Ser dojrzewajacy','Herbata czarna, lisciasta','Podkoszulek meski bawelniany, bez rekawa','Oczyszczanie chemiczne garnituru meskiego 2-czesciowego','Wegiel kamienny','Olej napedowy') #for(h in 0:15) # h for regions #h <- 5 par(mfrow=c(5,2)) cumulative <- data.frame(years=integer(), prices=logical(), services=logical()) for(i in 0:9) # i for services { for (j in 0:13) # j for years 2006-2019 { for(k in 0:11) #k for months { months_prices[k+1] <- as.numeric(as.matrix(myData[5+h,4+j+(i*14)+(k*140)])[1,]) } prices[j+1] <- mean(months_prices) services[j+1] <- serviceNames[i+1] months_prices <- vector() # empty } pricesForService.data <- data.frame(years, prices, services) cumulative <- rbind(cumulative, pricesForService.data) tmp <- cumulative[cumulative$services == serviceNames[i+1], ] plot(tmp$years, tmp$prices, main = serviceNames[i+1], xlab="Lata", ylab="Ceny", type = "l", col = "blue") mtext(regions[h+1], side = 3, line = -1.5, outer = TRUE) } I get the following error: Error in array(x, c(length(x), 1L), if (!is.null(names(x))) list(names(x), : 'data' must be of a vector type, was 'NULL' Thank for your help
R apply function to a list of price data
in the code below I am grabbing data for three symbols, then I want to apply a simple function(which is a trading strategy) to this data. Ideally, I would then run statistics on those returns, such as those native to PerformanceAnalytics. library("quantmod") library("PerformanceAnalytics") options(scipen=999) PriceData <- new.env() Symbols <- c("SPY", "QQQ", "IWM") StartDt <- as.Date("2015-01-01") suppressWarnings(getSymbols(Symbols, src="yahoo", env=PriceData, from=StartDt)) x <- list() for (i in 1:length(Symbols)) { x[[i]] <- get(Symbols[i], pos=PriceData) } SYSTEM <- function(data){ ret<- Delt(Cl(x[[i]]),Op(x[[i]]),type = 'arithmetic') mavga <- SMA(Cl(x[[i]]), n=10) mavgb <- SMA(Cl(x[[i]]), n=20) sig <- ifelse(mavga>mavgb,1,0)+ifelse(mavgb>mavga,-1,0) sig <- lag(sig,1) sig[is.na(sig)]=0 strategyreturn <- sig * ret return(strategyreturn) } ######I'm doing something wrong here###### y <- lappy(x,SYSTEM) z <- do.call("cbind", y) PerformanceTable <- function(returns){ scalar <- 252 CS <- t(Return.cumulative(returns, geometric = FALSE)) SR <- t(SharpeRatio.annualized(returns, scale=scalar, geometric = FALSE)) SOR <- t(SortinoRatio(returns)) MDD <- t(maxDrawdown(returns)) WP <- colSums(returns > 0)/colSums(returns != 0) WP <- as.data.frame(WP) ASD = t(sd.annualized(returns, scale=scalar)) Stat <- cbind(CS,SR,SOR,MDD,ASD,WP) colnames(Stat) <- c("Profit","SharpeRatio", "Sortino","MaxDrawdown", "AnnStdDev", "WinPct") print("Performance Table") print(Stat) return (Stat) } Perf <- PerformanceTable(y)
You are almost there, just replace x[[i]] by data in your function: SYSTEM <- function(data){ ret<- Delt(Cl(data),Op(data),type = 'arithmetic') mavga <- SMA(Cl(data), n=10) mavgb <- SMA(Cl(data), n=20) sig <- ifelse(mavga>mavgb,1,0)+ifelse(mavgb>mavga,-1,0) sig <- lag(sig,1) sig[is.na(sig)]=0 strategyreturn <- sig * ret return(strategyreturn) } and add a l to lappy() y <- lapply(x,SYSTEM) z <- do.call("cbind", y)
improve execution time in a for loop (R)
Here is a reproducible example of a for loop. Since I want to do 3000 iterations and my matrix is way bigger than this reproducible example, the computer crushes. Any ideas on how can I do it? I have read that loops are not reccomended in R and instead the web proposes to use vectors and apply functions but I wasn't able to build my formula with that functions... The matrix: row.names <- c('A2003','B2010','C2011','D2010','E2001','F2005','F2009','G2003','G2007','H2004','H2010') sp1 <- c(4,83,1,2,4,3,1,5,7,2,4) sp2 <- c(5,0,2,3,10,5,0,2,4,3,1) sp3 <- c(7,2,4,8,7,2,4,83,1,5,7) sp4 <- c(0,2,4,2,4,12,1,5,7,2,4) Site <- c('A','B','C','D','E','F','F','G','G','H','H') Year <- c('2003','2010','2011','2010','2001','2005','2009','2003','2007','2004','2010') Obs <- c(1,1,1,4,9,6,8,2,5,2,3) ID <- c('A2003','B2010','C2011','D2010','E2001','F2005','F2009','G2003','G2007','H2004','H2010') df<- data.frame(row.names, sp1, sp2, sp3, sp4, Site, Year, Obs, ID) rownames(df) <- df[,1] df[,1] <- NULL df df.1 <- subset(df, Obs == 1) df.more <- subset(df, Obs >= 2) df.1 df.more The loop function: require (vegan) iterations <- 3000 out <- vector("list", iterations) for(i in 1:iterations){ rnd.more <- do.call(rbind, lapply(split(df.more, df.more$Site), function(df.more) df.more[sample(nrow(df.more), 1,replace=FALSE) , ]) ) rnd.df <- rbind(df.1,rnd.more) rnd.df.bc <- as.matrix(vegdist(rnd.df[1:4], method="bray")) rnd.df.bc[lower.tri(rnd.df.bc,diag=TRUE)] <- NA triang <- rnd.df.bc[!is.na(rnd.df.bc)] mean.bc <- mean(triang) out[[i]] <- list(rnd = rnd.df, bc = rnd.df.bc, ave = mean.bc) } Extraction of the results: all.rnd.df <- lapply(out, "[[", "rnd") capture.output(all.rnd.df,file="all.rnd.df.txt") all.rnd.df.bc <- lapply(out, "[[", "bc") capture.output(all.rnd.df.bc,file="all.rnd.df.bc.txt") all.triang <- lapply(out, "[[", "ave") capture.output(all.triang,file="all.triang.txt")
precompute the sample indices: idx <- lapply(1:iterations, function(x) { tapply(1:nrow(df.more), as.character(df.more$Site), function(y) { if(length(y) == 1) y else sample(y, 1) }) }) idx <- lapply(idx, function(ids) c(1:nrow(df.1), ids + nrow(df.1))) precompute a placeholder data.frame to index rnd.df <- rbind(df.1, df.more) now you just index the precomputed objects without having to calculate them every loop: iterations <- 3000 out <- vector("list", iterations) for(i in 1:iterations){ rnd.df.bc <- as.matrix(vegdist(rnd.df[idx[[i]] ,1:4], method="bray")) rnd.df.bc[lower.tri(rnd.df.bc,diag=TRUE)] <- NA triang <- rnd.df.bc[!is.na(rnd.df.bc)] mean.bc <- mean(triang) out[[i]] <- list(rnd = rnd.df, bc = rnd.df.bc, ave = mean.bc) } Benchmarks: f1 = my method f2 = OPs code > microbenchmark(f1(), f2(), times=5L) Unit: seconds expr min lq mean median uq max neval f1() 2.21069 4.877017 4.666875 5.27416 5.444411 5.528096 5 f2() 13.54813 13.554965 19.500247 14.51089 27.074520 28.812732 5 Make it parallel: cl <- makeCluster(3) registerDoSNOW(cl) out <- foreach(i = 1:iterations, .packages=c('vegan')) %do% { rnd.df.bc <- as.matrix(vegdist(rnd.df[idx[[i]] ,1:4], method="bray")) rnd.df.bc[lower.tri(rnd.df.bc,diag=TRUE)] <- NA triang <- rnd.df.bc[!is.na(rnd.df.bc)] mean.bc <- mean(triang) list(rnd = rnd.df, bc = rnd.df.bc, ave = mean.bc) } stopCluster(cl)
Not being familiar with the vegan package, I'm a little limited in what advice I can give you. For the most part, you've constructed your for loop well and as you can see below, you don't gain much by converting it to a function and running it through lapply. I think your best bet is to parallelize your code. In the example below, if you convert your for loop to a function and use parLapply, you can shave a couple seconds off if you include the cluster build time. If you exclude the cluster build time, it's about 5 times faster on my 7 cores. Changes in computational time will vary by the number of cores you can run on. But I think that may be your best option for now. library(parallel) library(vegan) row.names <- c('A2003','B2010','C2011','D2010','E2001','F2005','F2009','G2003','G2007','H2004','H2010') sp1 <- c(4,83,1,2,4,3,1,5,7,2,4) sp2 <- c(5,0,2,3,10,5,0,2,4,3,1) sp3 <- c(7,2,4,8,7,2,4,83,1,5,7) sp4 <- c(0,2,4,2,4,12,1,5,7,2,4) Site <- c('A','B','C','D','E','F','F','G','G','H','H') Year <- c('2003','2010','2011','2010','2001','2005','2009','2003','2007','2004','2010') Obs <- c(1,1,1,4,9,6,8,2,5,2,3) ID <- c('A2003','B2010','C2011','D2010','E2001','F2005','F2009','G2003','G2007','H2004','H2010') df<- data.frame(row.names, sp1, sp2, sp3, sp4, Site, Year, Obs, ID) rownames(df) <- df[,1] df[,1] <- NULL df df.1 <- subset(df, Obs == 1) df.more <- subset(df, Obs >= 2) df.1 df.more more.fun <- function(df.more, df.1) { rnd.more <- do.call(rbind, lapply(split(df.more, df.more$Site), function(df.more) df.more[sample(nrow(df.more), 1,replace=FALSE) , ]) ) rnd.df <- rbind(df.1,rnd.more) rnd.df.bc <- as.matrix(vegdist(rnd.df[1:4], method="bray")) rnd.df.bc[lower.tri(rnd.df.bc,diag=TRUE)] <- NA triang <- rnd.df.bc[!is.na(rnd.df.bc)] mean.bc <- mean(triang) list(rnd = rnd.df, bc = rnd.df.bc, ave = mean.bc) } start.orig <- Sys.time() set.seed(pi) iterations <- 3000 out <- vector("list", iterations) for(i in 1:iterations){ rnd.more <- do.call(rbind, lapply(split(df.more, df.more$Site), function(df.more) df.more[sample(nrow(df.more), 1,replace=FALSE) , ]) ) rnd.df <- rbind(df.1,rnd.more) rnd.df.bc <- as.matrix(vegdist(rnd.df[1:4], method="bray")) rnd.df.bc[lower.tri(rnd.df.bc,diag=TRUE)] <- NA triang <- rnd.df.bc[!is.na(rnd.df.bc)] mean.bc <- mean(triang) out[[i]] <- list(rnd = rnd.df, bc = rnd.df.bc, ave = mean.bc) } end.orig <- Sys.time() start.apply <- Sys.time() fn = out <- lapply(1:3000, function(i) more.fun(df.more, df.1)) end.apply <- Sys.time() start.parallel <- Sys.time() cl <- makeCluster(7) clusterEvalQ(cl, library(vegan)) clusterExport(cl, c("df.more", "df.1", "more.fun")) start.parallel.apply <- Sys.time() out <- parLapply(cl, 1:3000, function(i) more.fun(df.more, df.1)) end.parallel <- Sys.time() #* Compare times end.orig - start.orig end.apply - start.apply end.parallel - start.parallel end.parallel - start.parallel.apply (the time comparisons here are pretty crude)
Using RStudio when I click on Run it executes properly but use source and it gives me a error message
I have the following code which optimizes and is suppose to export the results to a csv. All works well but the last line of code write.csv(solutionsMatrix, file="SLATE/DK.A1.CSV") When i click on Run it executes correctly but when i click on Source it gives me a error message Error in source("~/.active-rstudio-document", echo = TRUE) : ~/.active-rstudio-document:76:34: unexpected symbol 75: 76: write.csv(solutionsMatrix, file="SLATE With a up arrow point at the S in SLATE Why is it doing this?? Thanks for looking setwd("C:/DFS/NFL/R DATA") library('lpSolve') data <-read.csv ("DRAFTKINGS.CSV") #Convert salary to numeric data$Salary <- as.numeric(gsub(",","",data$Salary), data$Salary) data$MAX.TEAM <- as.numeric(gsub(",","",data$MaxTeam), data$MaxTeam) #Add binary valeus for positions 'Constraint Vectors' data <- cbind(data, X=1) data <- cbind(data, QB=ifelse(data$Pos=="QB",1,0)) data <- cbind(data, RB=ifelse(data$Pos=="RB", 1,0)) data <- cbind(data, WR=ifelse(data$Pos=="WR", 1,0)) data <- cbind(data, TE=ifelse(data$Pos=="TE", 1,0)) data <- cbind(data, DEF=ifelse(data$Pos=="DEF", 1,0)) #Objective Function. sum of proj pts f.obj <- data$DKA1 #Constraints num_X <- 9 num_QB <- 1 min_RB <- 2 min_WR <- 3 min_TE <-1 num_DEF <-1 max_RB <- 3 max_WR <-4 max_TE <-2 max_team_cost <- 50000 max_player_from_a_team <-data [2, 5] #Constraints for max players from team clubs <- sort(unique(data$Team)) team_constraint_vector <- c() team_constraint_dir <- c() team_constraint_rhs <- c() for(i in 1:length(clubs)){ temp <- data$Team==as.character(clubs[i]) temp[temp==T] <- 1 temp[temp==F] <- 0 team_constraint_vector <- c(team_constraint_vector, temp) team_constraint_dir <- c(team_constraint_dir, "<=") team_constraint_rhs <- c(team_constraint_rhs, max_player_from_a_team) } solutions <- list() solutionsMatrix <- matrix(, nrow=0, ncol=21) for(i in 1:1){ f.con <- matrix (c(data$X,data$QB , data$RB ,data$RB ,data$WR, data$WR,data$TE, data$TE,data$DEF , data$Salary, team_constraint_vector), nrow=(10+length(clubs)), byrow=TRUE) f.dir <- c("=","=", ">=","<=",">=","<=",">=","<=", "=", "<=", team_constraint_dir) f.rhs <- c(num_X,num_QB,min_RB,max_RB,min_WR, max_WR,min_TE, max_TE, num_DEF, max_team_cost, team_constraint_rhs) x <- lp ("max", f.obj, f.con, f.dir, f.rhs, all.bin=TRUE) x solutions[[i]] <- data[which(x$solution==1),] solutionsMatrix <- rbind(solutionsMatrix, c("DKA1, sum(solutions[[i]]$DKA1), sum(solutions[[i]]$Salary), toString(solutions[[i]]$Name[4]),toString(solutions[[i]]$Pos[4]), toString(solutions[[i]]$Name[5]),toString(solutions[[i]]$Pos[5]), toString(solutions[[i]]$Name[8]),toString(solutions[[i]]$Pos[8]), toString(solutions[[i]]$Name[9]),toString(solutions[[i]]$Pos[9]), toString(solutions[[i]]$Name[6]),toString(solutions[[i]]$Pos[6]), toString(solutions[[i]]$Name[7]), toString(solutions[[i]]$Pos[7]),toString(solutions[[i]]$Name[2]),toString(solutions[[i]]$Pos[2]), toString(solutions[[i]]$Name[3]),toString(solutions[[i]]$Pos[3]), toString(solutions[[i]]$Name[1]),toString(solutions[[i]]$Pos[1]))) } solutions[[1]] #Solutions Matrix Optimization solutionsMatrix write.csv(solutionsMatrix, file="SLATE/DK.A1.CSV") ###################################################################