No appear Hist in R

No appear Hist in R - r

library(downloader)
url <- "https://raw.githubusercontent.com/genomicsclass/dagdata/master/inst/extdata/femaleControlsPopulation.csv"
filename <- "femaleControlsPopulation"
download(url, destfile=filename)
dat <- read.csv(filename)
x <- unlist( read.csv(filename))
set.seed(1)
n <- 10000
averages5 <- vector("numeric", n)
for (i in 1:n){
x <- sample(x,5)
averages5[i] <- mean(x)
}
hist(averages5)
mean(abs(averages5-mean(x))>1)
I have the problem that appear empty hist and the mean is 0.....pls i need help

This will do what you want:
url <- "https://raw.githubusercontent.com/genomicsclass/dagdata/master/inst/extdata/femaleControlsPopulation.csv"
dat <- read.table(file=url, header=TRUE)
x <- dat$Bodyweight
n <- 10000
averages5 <- replicate(n, mean(sample(x,5)))
hist(averages5)

Related

Requires numeric/complex matric/vector argument

So I have a large data set that I have imported and split up. I've made sure to attach everything and tried to run a code to determine the number of breakpoints using AIC.
rm(list=ls())
library(Matching)
library(segmented)
dinosaurs=read.csv("C:/Users/user/Desktop/NEW PLOTS FOR DINOS/centrum_input_fin.csv")
attach(dinosaurs)
names(dinosaurs)
dino_names <- names(dinosaurs)
#NEED TO EXPORT FILES (EXPORT THE ALL_DATA_PLUS_SORTED OUT)
all_data_plus_sorted<-NULL
for(j in 1:length(dino_names))
{
with_gaps<-eval(parse(text = dino_names[j]))
gaps <- which(is.na(with_gaps))
non_gaps <-which(1:length(with_gaps) %in%gaps==FALSE)
sorted_without_gaps <- sort(with_gaps[!is.na(with_gaps)],decreasing=TRUE)
ordered_with_gaps<-rep(NA,length(with_gaps))
for(k in 1:length(non_gaps))
{
ordered_with_gaps[non_gaps[k]] <- sorted_without_gaps[k]
}
to_export<-cbind(with_gaps,ordered_with_gaps)
colnames(to_export)<-c(paste(dino_names[j],"_actual_with_gaps",sep=""),paste(dino_names[j],"_ordered_with_gaps",sep=""))
all_data_plus_sorted<- cbind(all_data_plus_sorted,to_export)
}
all_data_plus_sorted
attach(as.data.frame(all_data_plus_sorted))
print(dinosaurs)
detach(as.data.frame(all_data_plus_sorted))
detach(dinosaurs)
#split species
Dyoplosaurus_acutosquameus_ROM734 <- Dyoplosaurus_acutosquameus_ROM734[!is.na(Dyoplosaurus_acutosquameus_ROM734)]
Staurikosaurus_pricei <- Staurikosaurus_pricei[!is.na(Staurikosaurus_pricei)]
Opistocoelocaudia_skarzynskii <- Opistocoelocaudia_skarzynskii[!is.na(Opistocoelocaudia_skarzynskii)]
Stegosaurus_stenops._NHMUKPVR36730 <- Stegosaurus_stenops._NHMUKPVR36730[!is.na(Stegosaurus_stenops._NHMUKPVR36730)]
Giraffatitan_brancai <- Giraffatitan_brancai[!is.na(Giraffatitan_brancai)]
Camptosaurus <- Camptosaurus[!is.na(Camptosaurus)]
Camptosaurus_prestwichii <- Camptosaurus_prestwichii[!is.na(Camptosaurus_prestwichii)]
A_greppini <- A_greppini[!is.na(A_greppini)]
Astrophocaudia_slaughteri_SMU61732 <- Astrophocaudia_slaughteri_SMU61732[!is.na(Astrophocaudia_slaughteri_SMU61732)]
Tastavinsaurus_sanzi_gen_MPZ999 <- Tastavinsaurus_sanzi_gen_MPZ999[!is.na(Tastavinsaurus_sanzi_gen_MPZ999)]
MOZ_Pv1221 <- MOZ_Pv1221[!is.na(MOZ_Pv1221)]
Mamenchisaurus <- Mamenchisaurus[!is.na(Mamenchisaurus)]
Bromtosaurus_CMNo3018 <- Bromtosaurus_CMNo3018[!is.na(Bromtosaurus_CMNo3018)]
Lufengosaurus_Hueni <- Lufengosaurus_Hueni[!is.na(Lufengosaurus_Hueni)]
Mamenchisaurus_hochuanensi <- Mamenchisaurus_hochuanensi[!is.na(Mamenchisaurus_hochuanensi)]
Spinosaurus_FSACKK11888 <- Spinosaurus_FSACKK11888[!is.na(Spinosaurus_FSACKK11888)]
Buitreraptor_MPCNPV370 <- Buitreraptor_MPCNPV370[!is.na(Buitreraptor_MPCNPV370)]
Buitreraptor_MPCA245 <- Buitreraptor_MPCA245[!is.na(Buitreraptor_MPCA245)]
Huabeisaurus_allocotus_HBV20001 <- Huabeisaurus_allocotus_HBV20001[!is.na(Huabeisaurus_allocotus_HBV20001)]
Tethyshadros_insularis_SC57021 <- Tethyshadros_insularis_SC57021[!is.na(Tethyshadros_insularis_SC57021)]
Compsognathus_longipes_CNJ79 <- Compsognathus_longipes_CNJ79[!is.na(Compsognathus_longipes_CNJ79)]
Archaeopteryx12 <- Archaeopteryx12[!is.na(Archaeopteryx12)]
Sinosauropteryx_NIGP127586 <- Sinosauropteryx_NIGP127586[!is.na(Sinosauropteryx_NIGP127586)]
Sinosauropteryx_NIGP_127587 <- Sinosauropteryx_NIGP_127587[!is.na(Sinosauropteryx_NIGP_127587)]
Tetonosaurus_tilletti_AMNH3040 <- Tetonosaurus_tilletti_AMNH3040[!is.na(Tetonosaurus_tilletti_AMNH3040)]
Bambiraptor_feinbergi_FIP001 <- Bambiraptor_feinbergi_FIP001[!is.na(Bambiraptor_feinbergi_FIP001)]
Seimosaurus.halli_NMMNH3690 <- Seimosaurus.halli_NMMNH3690[!is.na(Seimosaurus.halli_NMMNH3690)]
Diluvicursor_pickeringi_NMVP221080 <- Diluvicursor_pickeringi_NMVP221080[!is.na(Diluvicursor_pickeringi_NMVP221080)]
Zhejiungosuurus_lishuiensis_ZMNHM8718 <- Zhejiungosuurus_lishuiensis_ZMNHM8718[!is.na(Zhejiungosuurus_lishuiensis_ZMNHM8718)]
Tianyulong_confuciusi_STMN.263 <- Tianyulong_confuciusi_STMN.263[!is.na(Tianyulong_confuciusi_STMN.263)]
Lusotitan_atalaiensis <- Lusotitan_atalaiensis[!is.na(Lusotitan_atalaiensis)]
Nemegtonykus_citus_MPCD100203 <- Nemegtonykus_citus_MPCD100203[!is.na(Nemegtonykus_citus_MPCD100203)]
Elaphrosaurus_bambergi_MBR4960 <- Elaphrosaurus_bambergi_MBR4960[!is.na(Elaphrosaurus_bambergi_MBR4960)]
Nomingia_gobiensis_GIN100119 <- Nomingia_gobiensis_GIN100119[!is.na(Nomingia_gobiensis_GIN100119)]
Nomingia_gobiensis_MPCD100119 <- Nomingia_gobiensis_MPCD100119[!is.na(Nomingia_gobiensis_MPCD100119)]
Chirostenotes_pergracilis <- Chirostenotes_pergracilis[!is.na(Chirostenotes_pergracilis)]
Seismosaurus_hallorum_NMMNHP3690 <- Seismosaurus_hallorum_NMMNHP3690[!is.na(Seismosaurus_hallorum_NMMNHP3690)]
Heterodontosaurus_tucki_SAMPKK1332 <- Heterodontosaurus_tucki_SAMPKK1332[!is.na(Heterodontosaurus_tucki_SAMPKK1332)]
Jianianhualong_tengi_DLXH1218 <- Jianianhualong_tengi_DLXH1218[!is.na(Jianianhualong_tengi_DLXH1218)]
Yinlong_downsi_IVPPV18685 <- Yinlong_downsi_IVPPV18685[!is.na(Yinlong_downsi_IVPPV18685)]
Neimongosaurus_yangi_LHV0001 <- Neimongosaurus_yangi_LHV0001[!is.na(Neimongosaurus_yangi_LHV0001)]
Magnapaulia_laticaudus_LACM17715 <- Magnapaulia_laticaudus_LACM17715[!is.na(Magnapaulia_laticaudus_LACM17715)]
Ouranosaurus_nigeriensis <- Ouranosaurus_nigeriensis[!is.na(Ouranosaurus_nigeriensis)]
Dreadnoughtus_schrani_MPMPV1156 <- Dreadnoughtus_schrani_MPMPV1156[!is.na(Dreadnoughtus_schrani_MPMPV1156)]
Pectodens_zhenyuensis_IVPPV18578 <- Pectodens_zhenyuensis_IVPPV18578[!is.na(Pectodens_zhenyuensis_IVPPV18578)]
Dilophosaurus_wetherilli <- Dilophosaurus_wetherilli[!is.na(Dilophosaurus_wetherilli)]
Gobihadros_mongoliensis_MPCD100746 <- Gobihadros_mongoliensis_MPCD100746[!is.na(Gobihadros_mongoliensis_MPCD100746)]
Gobihadros_mongoliensis_MPCD100755 <- Gobihadros_mongoliensis_MPCD100755[!is.na(Gobihadros_mongoliensis_MPCD100755)]
Auroraceratops_rugosus_GJ07913 <- Auroraceratops_rugosus_GJ07913[!is.na(Auroraceratops_rugosus_GJ07913)]
Patagotitan_mayorum_MPEFPV <- Patagotitan_mayorum_MPEFPV[!is.na(Patagotitan_mayorum_MPEFPV)]
Eoraptor_lunensi_PVSJ512 <- Eoraptor_lunensi_PVSJ512[!is.na(Eoraptor_lunensi_PVSJ512)]
Corythosaurus_casuarius <- Corythosaurus_casuarius[!is.na(Corythosaurus_casuarius)]
Caihong._Juji_PMoLB00175 <- Caihong._Juji_PMoLB00175[!is.na(Caihong._Juji_PMoLB00175)]
Eosinopteryx_brevipenna_YFGPT5197 <- Eosinopteryx_brevipenna_YFGPT5197[!is.na(Eosinopteryx_brevipenna_YFGPT5197)]
Rahonavis_ostromi_UA8656 <- Rahonavis_ostromi_UA8656[!is.na(Rahonavis_ostromi_UA8656)]
Changyuraptor_yangi_HGB016 <- Changyuraptor_yangi_HGB016[!is.na(Changyuraptor_yangi_HGB016)]
Herrerasaurus_ischigualastensis_PVL2566 <- Herrerasaurus_ischigualastensis_PVL2566[!is.na(Herrerasaurus_ischigualastensis_PVL2566)]
Herrerasaurus_ischigualastensis_UNSJ53 <- Herrerasaurus_ischigualastensis_UNSJ53[!is.na(Herrerasaurus_ischigualastensis_UNSJ53)]
Ischioceratops_zhuchengensis <- Ischioceratops_zhuchengensis[!is.na(Ischioceratops_zhuchengensis)]
Koreaceratops_hwaseongensis <- Koreaceratops_hwaseongensis[!is.na(Koreaceratops_hwaseongensis)]
# CHOOSE SAMPLE TO ANALYSE
#_________________________________________________________________________________________________
# choose sample
name_to_test <- "Koreaceratops_hwaseongensis"
y_val <- eval(parse(text = paste(name_to_test,"_actual_with_gaps",sep="")))
x_val<-1:length(y_val)
# USE AIC TO DECIDE HOW MANY BREAKS TO USE
#_________________________________________________________________________________________________
# extract AIC for models with 1-3 breakpoints
my_max_it=10
all_mods<-NULL
for(h in 1:4)
{
mod1<-segmented(lm(y_val~x_val),seg.Z=~x_val,psi=NA,control=seg.control(K=h,quant=TRUE,it.max=my_max_it),model=TRUE,nboot=50)
all_mods<-rbind(all_mods,c(h,extractAIC(mod1)[2]))
}
all_mods
my_K<-subset(all_mods,all_mods[,2]==min(all_mods[,2]))[1]
When i run the last section of the code i get the error Error in
crossprod(x, y) :
requires numeric/complex matrix/vector arguments
Not too sure why because I have put it in a data frame, is it because I'm importing the file incorrectly? Not sure how to fix.

R vector memory exhausted for calculating tensor products

I am calculating the tensor products of 60,000 pairs of 28*28 matrices in RStudio (version 3.5.2), and the console shows me "Error: vector memory exhausted (limit reached?)". I don't think my MacBook Pro would have such low capacity (16GB RAM). I tried the mclapply method for parallel computing but still don't work. Can anyone provide me some insights? Thanks a lot!
If necessary, the following are my codes. I cannot run the last line.
install.packages("keras")
library(keras)
install_keras()
install_keras(method = "conda")
library(keras)
mnist <- dataset_mnist()
str(mnist)
trainx <- mnist$train$x
trainy <- mnist$train$y
testx <- mnist$test$x
testy <- mnist$test$y
trainxr <- trainx
trainxg <- trainx
trainxb <- trainx
testxr <- testx
testxg <- testx
testxb <- testx
#training data
i <- 1
for(i in i:60000){
randomr <- sample (0:255, 1)
randomg <- sample (0:255, 1)
randomb <- sample (0:255, 1)
trainxr[i,,] <- (randomr/255)*(trainx[i,,]/255)
trainxg[i,,] <- (randomg/255)*(trainx[i,,]/255)
trainxb[i,,] <- (randomb/255)*(trainx[i,,]/255)
i <- i+1
}
#testing data
j <- 1
for(j in j:10000){
randomr <- sample (0:255, 1)
randomg <- sample (0:255, 1)
randomb <- sample (0:255, 1)
testxr[j,,] <- (randomr/255)*(testx[j,,]/255)
testxg[j,,] <- (randomg/255)*(testx[j,,]/255)
testxb[j,,] <- (randomb/255)*(testx[j,,]/255)
j <- j+1
}
#for training
k <- 1
for(k in k:60000){
randomminus <- sample (0:255, 1)
matrixminus <- matrix((randomminus/255):(randomminus/255), nrow = 28, ncol = 28)
trainxr[k,,] <- trainxr[k,,] - matrixminus
trainxr[k,,] <- abs(trainxr[k,,])
trainxg[k,,] <- trainxg[k,,] - matrixminus
trainxg[k,,] <- abs(trainxg[k,,])
trainxb[k,,] <- trainxb[k,,] - matrixminus
trainxb[k,,] <- abs(trainxb[k,,])
k <- k+1
}
#for testing
l <- 1
for(l in l:10000){
randomminus <- sample (0:255, 1)
matrixminus <- matrix((randomminus/255):(randomminus/255), nrow = 28, ncol = 28)
trainxr[l,,] <- trainxr[l,,] - matrixminus
trainxr[l,,] <- abs(trainxr[l,,])
trainxg[l,,] <- trainxg[l,,] - matrixminus
trainxg[l,,] <- abs(trainxg[l,,])
trainxb[l,,] <- trainxb[l,,] - matrixminus
trainxb[l,,] <- abs(trainxb[l,,])
l <- l+1
}
#tensor product
stepone <- matrix(1:1, nrow=21952, ncol=28)
steptwo <- matrix(1:1, nrow=28, ncol=28)
trainxtensor_a <- trainxr %x% trainxg

I'm guessing you intending to collapse the 2nd and 3rd dimensions in that tensor product. Perhaps you want something like this:
library(tensor)
trainxtensor_a <- tensor(trainxr, trainxg, c(2,3), c(2,3))
Although you should try a smaller dataset to check if it is doing what you expect first:
trainxtensor_a <- tensor(trainxr[1:5,,], trainxg[1:5,,], c(2,3), c(2,3))

I want to use a list in a for loop code. list[[i]] works fine, but why doesn't res <- vector("list",n) work?

I am using a for loop to loop through two different matrices. The code looks like this:
x <- matrix(rnorm(1806),7,258)
x2 <- matrix(rnorm(1032),4,258)
samp_size <- 3
iter <- 1000
subs <- matrix(sample(1:nrow(x), samp_size*iter, replace=T),
ncol=samp_size, byrow=T)
subs2 <- matrix(sample(1:nrow(x2), samp_size*iter, replace=T),
ncol=samp_size, byrow=T)
for(j in 1:nrow(subs)){
ad <- x[subs[j,],]
ad1 <- x2[subs2[j,],]
rd <- rbind(ad,ad1)
dis <- dist(rd, method="euclidian")
#CV <- sd(unlist(rd), na.rm=TRUE)/mean(unlist(rd), na.rm=TRUE)*100
dis2 <- dis[dis!=0]
list[[j]] <- mean(dis2)
l <- unlist(list)
l <- na.omit(l)
sdis <- dist(ad, method="euclidian")
sdis2 <- sdis[sdis!=0]
res <- vector("list", 1000)
res[[j]] <- mean(sdis2)
l2 <- unlist(res)
l2 <- na.omit(l2)
}
list[[j]] works just fine.
The result of each iteration is added to it as a list element based on the value of j.
However, res[[j]] ends up being full of NULL with only the last element containing a value.
I am not sure what is happening here. I would like res[[j]] to behave the same way as list[[j]] does.
Help much appreciated.

Maybe initialize res:
# before the loop
res <- rep(list(NA),nrow(subs))
# or in the loop
res <- rep(list(NA),1000)
# or
res <- list(numeric(1000))

R apply function to a list of price data

in the code below I am grabbing data for three symbols, then I want to apply a simple function(which is a trading strategy) to this data. Ideally, I would then run statistics on those returns, such as those native to PerformanceAnalytics.
library("quantmod")
library("PerformanceAnalytics")
options(scipen=999)
PriceData <- new.env()
Symbols <- c("SPY", "QQQ", "IWM")
StartDt <- as.Date("2015-01-01")
suppressWarnings(getSymbols(Symbols, src="yahoo", env=PriceData, from=StartDt))
x <- list()
for (i in 1:length(Symbols)) {
x[[i]] <- get(Symbols[i], pos=PriceData)
}
SYSTEM <- function(data){
ret<- Delt(Cl(x[[i]]),Op(x[[i]]),type = 'arithmetic')
mavga <- SMA(Cl(x[[i]]), n=10)
mavgb <- SMA(Cl(x[[i]]), n=20)
sig <- ifelse(mavga>mavgb,1,0)+ifelse(mavgb>mavga,-1,0)
sig <- lag(sig,1)
sig[is.na(sig)]=0
strategyreturn <- sig * ret
return(strategyreturn)
}
######I'm doing something wrong here######
y <- lappy(x,SYSTEM)
z <- do.call("cbind", y)
PerformanceTable <- function(returns){
scalar <- 252
CS <- t(Return.cumulative(returns, geometric = FALSE))
SR <- t(SharpeRatio.annualized(returns, scale=scalar, geometric = FALSE))
SOR <- t(SortinoRatio(returns))
MDD <- t(maxDrawdown(returns))
WP <- colSums(returns > 0)/colSums(returns != 0)
WP <- as.data.frame(WP)
ASD = t(sd.annualized(returns, scale=scalar))
Stat <- cbind(CS,SR,SOR,MDD,ASD,WP)
colnames(Stat) <- c("Profit","SharpeRatio", "Sortino","MaxDrawdown", "AnnStdDev", "WinPct")
print("Performance Table")
print(Stat)
return (Stat)
}
Perf <- PerformanceTable(y)

You are almost there, just replace x[[i]] by data in your function:
SYSTEM <- function(data){
ret<- Delt(Cl(data),Op(data),type = 'arithmetic')
mavga <- SMA(Cl(data), n=10)
mavgb <- SMA(Cl(data), n=20)
sig <- ifelse(mavga>mavgb,1,0)+ifelse(mavgb>mavga,-1,0)
sig <- lag(sig,1)
sig[is.na(sig)]=0
strategyreturn <- sig * ret
return(strategyreturn)
}
and add a l to lappy()
y <- lapply(x,SYSTEM)
z <- do.call("cbind", y)

improve execution time in a for loop (R)

Here is a reproducible example of a for loop. Since I want to do 3000 iterations and my matrix is way bigger than this reproducible example, the computer crushes. Any ideas on how can I do it?
I have read that loops are not reccomended in R and instead the web proposes to use vectors and apply functions but I wasn't able to build my formula with that functions...
The matrix:
row.names <- c('A2003','B2010','C2011','D2010','E2001','F2005','F2009','G2003','G2007','H2004','H2010')
sp1 <- c(4,83,1,2,4,3,1,5,7,2,4)
sp2 <- c(5,0,2,3,10,5,0,2,4,3,1)
sp3 <- c(7,2,4,8,7,2,4,83,1,5,7)
sp4 <- c(0,2,4,2,4,12,1,5,7,2,4)
Site <- c('A','B','C','D','E','F','F','G','G','H','H')
Year <- c('2003','2010','2011','2010','2001','2005','2009','2003','2007','2004','2010')
Obs <- c(1,1,1,4,9,6,8,2,5,2,3)
ID <- c('A2003','B2010','C2011','D2010','E2001','F2005','F2009','G2003','G2007','H2004','H2010')
df<- data.frame(row.names, sp1, sp2, sp3, sp4, Site, Year, Obs, ID)
rownames(df) <- df[,1]
df[,1] <- NULL
df
df.1 <- subset(df, Obs == 1)
df.more <- subset(df, Obs >= 2)
df.1
df.more
The loop function:
require (vegan)
iterations <- 3000
out <- vector("list", iterations)
for(i in 1:iterations){
rnd.more <- do.call(rbind, lapply(split(df.more, df.more$Site),
function(df.more) df.more[sample(nrow(df.more), 1,replace=FALSE) , ])
)
rnd.df <- rbind(df.1,rnd.more)
rnd.df.bc <- as.matrix(vegdist(rnd.df[1:4], method="bray"))
rnd.df.bc[lower.tri(rnd.df.bc,diag=TRUE)] <- NA
triang <- rnd.df.bc[!is.na(rnd.df.bc)]
mean.bc <- mean(triang)
out[[i]] <- list(rnd = rnd.df, bc = rnd.df.bc, ave = mean.bc)
}
Extraction of the results:
all.rnd.df <- lapply(out, "[[", "rnd")
capture.output(all.rnd.df,file="all.rnd.df.txt")
all.rnd.df.bc <- lapply(out, "[[", "bc")
capture.output(all.rnd.df.bc,file="all.rnd.df.bc.txt")
all.triang <- lapply(out, "[[", "ave")
capture.output(all.triang,file="all.triang.txt")

precompute the sample indices:
idx <- lapply(1:iterations, function(x) {
tapply(1:nrow(df.more), as.character(df.more$Site), function(y) {
if(length(y) == 1) y else sample(y, 1)
})
})
idx <- lapply(idx, function(ids) c(1:nrow(df.1), ids + nrow(df.1)))
precompute a placeholder data.frame to index
rnd.df <- rbind(df.1, df.more)
now you just index the precomputed objects without having to calculate them every loop:
iterations <- 3000
out <- vector("list", iterations)
for(i in 1:iterations){
rnd.df.bc <- as.matrix(vegdist(rnd.df[idx[[i]] ,1:4], method="bray"))
rnd.df.bc[lower.tri(rnd.df.bc,diag=TRUE)] <- NA
triang <- rnd.df.bc[!is.na(rnd.df.bc)]
mean.bc <- mean(triang)
out[[i]] <- list(rnd = rnd.df, bc = rnd.df.bc, ave = mean.bc)
}
Benchmarks:
f1 = my method
f2 = OPs code
> microbenchmark(f1(), f2(), times=5L)
Unit: seconds
expr min lq mean median uq max neval
f1() 2.21069 4.877017 4.666875 5.27416 5.444411 5.528096 5
f2() 13.54813 13.554965 19.500247 14.51089 27.074520 28.812732 5
Make it parallel:
cl <- makeCluster(3)
registerDoSNOW(cl)
out <- foreach(i = 1:iterations, .packages=c('vegan')) %do%
{
rnd.df.bc <- as.matrix(vegdist(rnd.df[idx[[i]] ,1:4], method="bray"))
rnd.df.bc[lower.tri(rnd.df.bc,diag=TRUE)] <- NA
triang <- rnd.df.bc[!is.na(rnd.df.bc)]
mean.bc <- mean(triang)
list(rnd = rnd.df, bc = rnd.df.bc, ave = mean.bc)
}
stopCluster(cl)

Not being familiar with the vegan package, I'm a little limited in what advice I can give you. For the most part, you've constructed your for loop well and as you can see below, you don't gain much by converting it to a function and running it through lapply.
I think your best bet is to parallelize your code. In the example below, if you convert your for loop to a function and use parLapply, you can shave a couple seconds off if you include the cluster build time. If you exclude the cluster build time, it's about 5 times faster on my 7 cores. Changes in computational time will vary by the number of cores you can run on. But I think that may be your best option for now.
library(parallel)
library(vegan)
row.names <- c('A2003','B2010','C2011','D2010','E2001','F2005','F2009','G2003','G2007','H2004','H2010')
sp1 <- c(4,83,1,2,4,3,1,5,7,2,4)
sp2 <- c(5,0,2,3,10,5,0,2,4,3,1)
sp3 <- c(7,2,4,8,7,2,4,83,1,5,7)
sp4 <- c(0,2,4,2,4,12,1,5,7,2,4)
Site <- c('A','B','C','D','E','F','F','G','G','H','H')
Year <- c('2003','2010','2011','2010','2001','2005','2009','2003','2007','2004','2010')
Obs <- c(1,1,1,4,9,6,8,2,5,2,3)
ID <- c('A2003','B2010','C2011','D2010','E2001','F2005','F2009','G2003','G2007','H2004','H2010')
df<- data.frame(row.names, sp1, sp2, sp3, sp4, Site, Year, Obs, ID)
rownames(df) <- df[,1]
df[,1] <- NULL
df
df.1 <- subset(df, Obs == 1)
df.more <- subset(df, Obs >= 2)
df.1
df.more
more.fun <- function(df.more, df.1)
{
rnd.more <- do.call(rbind, lapply(split(df.more, df.more$Site),
function(df.more) df.more[sample(nrow(df.more), 1,replace=FALSE) , ])
)
rnd.df <- rbind(df.1,rnd.more)
rnd.df.bc <- as.matrix(vegdist(rnd.df[1:4], method="bray"))
rnd.df.bc[lower.tri(rnd.df.bc,diag=TRUE)] <- NA
triang <- rnd.df.bc[!is.na(rnd.df.bc)]
mean.bc <- mean(triang)
list(rnd = rnd.df, bc = rnd.df.bc, ave = mean.bc)
}
start.orig <- Sys.time()
set.seed(pi)
iterations <- 3000
out <- vector("list", iterations)
for(i in 1:iterations){
rnd.more <- do.call(rbind, lapply(split(df.more, df.more$Site),
function(df.more) df.more[sample(nrow(df.more), 1,replace=FALSE) , ])
)
rnd.df <- rbind(df.1,rnd.more)
rnd.df.bc <- as.matrix(vegdist(rnd.df[1:4], method="bray"))
rnd.df.bc[lower.tri(rnd.df.bc,diag=TRUE)] <- NA
triang <- rnd.df.bc[!is.na(rnd.df.bc)]
mean.bc <- mean(triang)
out[[i]] <- list(rnd = rnd.df, bc = rnd.df.bc, ave = mean.bc)
}
end.orig <- Sys.time()
start.apply <- Sys.time()
fn = out <- lapply(1:3000, function(i) more.fun(df.more, df.1))
end.apply <- Sys.time()
start.parallel <- Sys.time()
cl <- makeCluster(7)
clusterEvalQ(cl, library(vegan))
clusterExport(cl, c("df.more", "df.1", "more.fun"))
start.parallel.apply <- Sys.time()
out <- parLapply(cl, 1:3000, function(i) more.fun(df.more, df.1))
end.parallel <- Sys.time()
#* Compare times
end.orig - start.orig
end.apply - start.apply
end.parallel - start.parallel
end.parallel - start.parallel.apply
(the time comparisons here are pretty crude)

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

No appear Hist in R - r

This will do what you want: url <- "https://raw.githubusercontent.com/genomicsclass/dagdata/master/inst/extdata/femaleControlsPopulation.csv" dat <- read.table(file=url, header=TRUE) x <- dat$Bodyweight n <- 10000 averages5 <- replicate(n, mean(sample(x,5))) hist(averages5)

Related

Requires numeric/complex matric/vector argument

R vector memory exhausted for calculating tensor products

I want to use a list in a for loop code. list[[i]] works fine, but why doesn't res <- vector("list",n) work?

R apply function to a list of price data

improve execution time in a for loop (R)

Categories

Resources