No appear Hist in R - r
library(downloader)
url <- "https://raw.githubusercontent.com/genomicsclass/dagdata/master/inst/extdata/femaleControlsPopulation.csv"
filename <- "femaleControlsPopulation"
download(url, destfile=filename)
dat <- read.csv(filename)
x <- unlist( read.csv(filename))
set.seed(1)
n <- 10000
averages5 <- vector("numeric", n)
for (i in 1:n){
x <- sample(x,5)
averages5[i] <- mean(x)
}
hist(averages5)
mean(abs(averages5-mean(x))>1)
I have the problem that appear empty hist and the mean is 0.....pls i need help
This will do what you want:
url <- "https://raw.githubusercontent.com/genomicsclass/dagdata/master/inst/extdata/femaleControlsPopulation.csv"
dat <- read.table(file=url, header=TRUE)
x <- dat$Bodyweight
n <- 10000
averages5 <- replicate(n, mean(sample(x,5)))
hist(averages5)
Related
Requires numeric/complex matric/vector argument
So I have a large data set that I have imported and split up. I've made sure to attach everything and tried to run a code to determine the number of breakpoints using AIC. rm(list=ls()) library(Matching) library(segmented) dinosaurs=read.csv("C:/Users/user/Desktop/NEW PLOTS FOR DINOS/centrum_input_fin.csv") attach(dinosaurs) names(dinosaurs) dino_names <- names(dinosaurs) #NEED TO EXPORT FILES (EXPORT THE ALL_DATA_PLUS_SORTED OUT) all_data_plus_sorted<-NULL for(j in 1:length(dino_names)) { with_gaps<-eval(parse(text = dino_names[j])) gaps <- which(is.na(with_gaps)) non_gaps <-which(1:length(with_gaps) %in%gaps==FALSE) sorted_without_gaps <- sort(with_gaps[!is.na(with_gaps)],decreasing=TRUE) ordered_with_gaps<-rep(NA,length(with_gaps)) for(k in 1:length(non_gaps)) { ordered_with_gaps[non_gaps[k]] <- sorted_without_gaps[k] } to_export<-cbind(with_gaps,ordered_with_gaps) colnames(to_export)<-c(paste(dino_names[j],"_actual_with_gaps",sep=""),paste(dino_names[j],"_ordered_with_gaps",sep="")) all_data_plus_sorted<- cbind(all_data_plus_sorted,to_export) } all_data_plus_sorted attach(as.data.frame(all_data_plus_sorted)) print(dinosaurs) detach(as.data.frame(all_data_plus_sorted)) detach(dinosaurs) #split species Dyoplosaurus_acutosquameus_ROM734 <- Dyoplosaurus_acutosquameus_ROM734[!is.na(Dyoplosaurus_acutosquameus_ROM734)] Staurikosaurus_pricei <- Staurikosaurus_pricei[!is.na(Staurikosaurus_pricei)] Opistocoelocaudia_skarzynskii <- Opistocoelocaudia_skarzynskii[!is.na(Opistocoelocaudia_skarzynskii)] Stegosaurus_stenops._NHMUKPVR36730 <- Stegosaurus_stenops._NHMUKPVR36730[!is.na(Stegosaurus_stenops._NHMUKPVR36730)] Giraffatitan_brancai <- Giraffatitan_brancai[!is.na(Giraffatitan_brancai)] Camptosaurus <- Camptosaurus[!is.na(Camptosaurus)] Camptosaurus_prestwichii <- Camptosaurus_prestwichii[!is.na(Camptosaurus_prestwichii)] A_greppini <- A_greppini[!is.na(A_greppini)] Astrophocaudia_slaughteri_SMU61732 <- Astrophocaudia_slaughteri_SMU61732[!is.na(Astrophocaudia_slaughteri_SMU61732)] Tastavinsaurus_sanzi_gen_MPZ999 <- Tastavinsaurus_sanzi_gen_MPZ999[!is.na(Tastavinsaurus_sanzi_gen_MPZ999)] MOZ_Pv1221 <- MOZ_Pv1221[!is.na(MOZ_Pv1221)] Mamenchisaurus <- Mamenchisaurus[!is.na(Mamenchisaurus)] Bromtosaurus_CMNo3018 <- Bromtosaurus_CMNo3018[!is.na(Bromtosaurus_CMNo3018)] Lufengosaurus_Hueni <- Lufengosaurus_Hueni[!is.na(Lufengosaurus_Hueni)] Mamenchisaurus_hochuanensi <- Mamenchisaurus_hochuanensi[!is.na(Mamenchisaurus_hochuanensi)] Spinosaurus_FSACKK11888 <- Spinosaurus_FSACKK11888[!is.na(Spinosaurus_FSACKK11888)] Buitreraptor_MPCNPV370 <- Buitreraptor_MPCNPV370[!is.na(Buitreraptor_MPCNPV370)] Buitreraptor_MPCA245 <- Buitreraptor_MPCA245[!is.na(Buitreraptor_MPCA245)] Huabeisaurus_allocotus_HBV20001 <- Huabeisaurus_allocotus_HBV20001[!is.na(Huabeisaurus_allocotus_HBV20001)] Tethyshadros_insularis_SC57021 <- Tethyshadros_insularis_SC57021[!is.na(Tethyshadros_insularis_SC57021)] Compsognathus_longipes_CNJ79 <- Compsognathus_longipes_CNJ79[!is.na(Compsognathus_longipes_CNJ79)] Archaeopteryx12 <- Archaeopteryx12[!is.na(Archaeopteryx12)] Sinosauropteryx_NIGP127586 <- Sinosauropteryx_NIGP127586[!is.na(Sinosauropteryx_NIGP127586)] Sinosauropteryx_NIGP_127587 <- Sinosauropteryx_NIGP_127587[!is.na(Sinosauropteryx_NIGP_127587)] Tetonosaurus_tilletti_AMNH3040 <- Tetonosaurus_tilletti_AMNH3040[!is.na(Tetonosaurus_tilletti_AMNH3040)] Bambiraptor_feinbergi_FIP001 <- Bambiraptor_feinbergi_FIP001[!is.na(Bambiraptor_feinbergi_FIP001)] Seimosaurus.halli_NMMNH3690 <- Seimosaurus.halli_NMMNH3690[!is.na(Seimosaurus.halli_NMMNH3690)] Diluvicursor_pickeringi_NMVP221080 <- Diluvicursor_pickeringi_NMVP221080[!is.na(Diluvicursor_pickeringi_NMVP221080)] Zhejiungosuurus_lishuiensis_ZMNHM8718 <- Zhejiungosuurus_lishuiensis_ZMNHM8718[!is.na(Zhejiungosuurus_lishuiensis_ZMNHM8718)] Tianyulong_confuciusi_STMN.263 <- Tianyulong_confuciusi_STMN.263[!is.na(Tianyulong_confuciusi_STMN.263)] Lusotitan_atalaiensis <- Lusotitan_atalaiensis[!is.na(Lusotitan_atalaiensis)] Nemegtonykus_citus_MPCD100203 <- Nemegtonykus_citus_MPCD100203[!is.na(Nemegtonykus_citus_MPCD100203)] Elaphrosaurus_bambergi_MBR4960 <- Elaphrosaurus_bambergi_MBR4960[!is.na(Elaphrosaurus_bambergi_MBR4960)] Nomingia_gobiensis_GIN100119 <- Nomingia_gobiensis_GIN100119[!is.na(Nomingia_gobiensis_GIN100119)] Nomingia_gobiensis_MPCD100119 <- Nomingia_gobiensis_MPCD100119[!is.na(Nomingia_gobiensis_MPCD100119)] Chirostenotes_pergracilis <- Chirostenotes_pergracilis[!is.na(Chirostenotes_pergracilis)] Seismosaurus_hallorum_NMMNHP3690 <- Seismosaurus_hallorum_NMMNHP3690[!is.na(Seismosaurus_hallorum_NMMNHP3690)] Heterodontosaurus_tucki_SAMPKK1332 <- Heterodontosaurus_tucki_SAMPKK1332[!is.na(Heterodontosaurus_tucki_SAMPKK1332)] Jianianhualong_tengi_DLXH1218 <- Jianianhualong_tengi_DLXH1218[!is.na(Jianianhualong_tengi_DLXH1218)] Yinlong_downsi_IVPPV18685 <- Yinlong_downsi_IVPPV18685[!is.na(Yinlong_downsi_IVPPV18685)] Neimongosaurus_yangi_LHV0001 <- Neimongosaurus_yangi_LHV0001[!is.na(Neimongosaurus_yangi_LHV0001)] Magnapaulia_laticaudus_LACM17715 <- Magnapaulia_laticaudus_LACM17715[!is.na(Magnapaulia_laticaudus_LACM17715)] Ouranosaurus_nigeriensis <- Ouranosaurus_nigeriensis[!is.na(Ouranosaurus_nigeriensis)] Dreadnoughtus_schrani_MPMPV1156 <- Dreadnoughtus_schrani_MPMPV1156[!is.na(Dreadnoughtus_schrani_MPMPV1156)] Pectodens_zhenyuensis_IVPPV18578 <- Pectodens_zhenyuensis_IVPPV18578[!is.na(Pectodens_zhenyuensis_IVPPV18578)] Dilophosaurus_wetherilli <- Dilophosaurus_wetherilli[!is.na(Dilophosaurus_wetherilli)] Gobihadros_mongoliensis_MPCD100746 <- Gobihadros_mongoliensis_MPCD100746[!is.na(Gobihadros_mongoliensis_MPCD100746)] Gobihadros_mongoliensis_MPCD100755 <- Gobihadros_mongoliensis_MPCD100755[!is.na(Gobihadros_mongoliensis_MPCD100755)] Auroraceratops_rugosus_GJ07913 <- Auroraceratops_rugosus_GJ07913[!is.na(Auroraceratops_rugosus_GJ07913)] Patagotitan_mayorum_MPEFPV <- Patagotitan_mayorum_MPEFPV[!is.na(Patagotitan_mayorum_MPEFPV)] Eoraptor_lunensi_PVSJ512 <- Eoraptor_lunensi_PVSJ512[!is.na(Eoraptor_lunensi_PVSJ512)] Corythosaurus_casuarius <- Corythosaurus_casuarius[!is.na(Corythosaurus_casuarius)] Caihong._Juji_PMoLB00175 <- Caihong._Juji_PMoLB00175[!is.na(Caihong._Juji_PMoLB00175)] Eosinopteryx_brevipenna_YFGPT5197 <- Eosinopteryx_brevipenna_YFGPT5197[!is.na(Eosinopteryx_brevipenna_YFGPT5197)] Rahonavis_ostromi_UA8656 <- Rahonavis_ostromi_UA8656[!is.na(Rahonavis_ostromi_UA8656)] Changyuraptor_yangi_HGB016 <- Changyuraptor_yangi_HGB016[!is.na(Changyuraptor_yangi_HGB016)] Herrerasaurus_ischigualastensis_PVL2566 <- Herrerasaurus_ischigualastensis_PVL2566[!is.na(Herrerasaurus_ischigualastensis_PVL2566)] Herrerasaurus_ischigualastensis_UNSJ53 <- Herrerasaurus_ischigualastensis_UNSJ53[!is.na(Herrerasaurus_ischigualastensis_UNSJ53)] Ischioceratops_zhuchengensis <- Ischioceratops_zhuchengensis[!is.na(Ischioceratops_zhuchengensis)] Koreaceratops_hwaseongensis <- Koreaceratops_hwaseongensis[!is.na(Koreaceratops_hwaseongensis)] # CHOOSE SAMPLE TO ANALYSE #_________________________________________________________________________________________________ # choose sample name_to_test <- "Koreaceratops_hwaseongensis" y_val <- eval(parse(text = paste(name_to_test,"_actual_with_gaps",sep=""))) x_val<-1:length(y_val) # USE AIC TO DECIDE HOW MANY BREAKS TO USE #_________________________________________________________________________________________________ # extract AIC for models with 1-3 breakpoints my_max_it=10 all_mods<-NULL for(h in 1:4) { mod1<-segmented(lm(y_val~x_val),seg.Z=~x_val,psi=NA,control=seg.control(K=h,quant=TRUE,it.max=my_max_it),model=TRUE,nboot=50) all_mods<-rbind(all_mods,c(h,extractAIC(mod1)[2])) } all_mods my_K<-subset(all_mods,all_mods[,2]==min(all_mods[,2]))[1] When i run the last section of the code i get the error Error in crossprod(x, y) : requires numeric/complex matrix/vector arguments Not too sure why because I have put it in a data frame, is it because I'm importing the file incorrectly? Not sure how to fix.
R vector memory exhausted for calculating tensor products
I am calculating the tensor products of 60,000 pairs of 28*28 matrices in RStudio (version 3.5.2), and the console shows me "Error: vector memory exhausted (limit reached?)". I don't think my MacBook Pro would have such low capacity (16GB RAM). I tried the mclapply method for parallel computing but still don't work. Can anyone provide me some insights? Thanks a lot! If necessary, the following are my codes. I cannot run the last line. install.packages("keras") library(keras) install_keras() install_keras(method = "conda") library(keras) mnist <- dataset_mnist() str(mnist) trainx <- mnist$train$x trainy <- mnist$train$y testx <- mnist$test$x testy <- mnist$test$y trainxr <- trainx trainxg <- trainx trainxb <- trainx testxr <- testx testxg <- testx testxb <- testx #training data i <- 1 for(i in i:60000){ randomr <- sample (0:255, 1) randomg <- sample (0:255, 1) randomb <- sample (0:255, 1) trainxr[i,,] <- (randomr/255)*(trainx[i,,]/255) trainxg[i,,] <- (randomg/255)*(trainx[i,,]/255) trainxb[i,,] <- (randomb/255)*(trainx[i,,]/255) i <- i+1 } #testing data j <- 1 for(j in j:10000){ randomr <- sample (0:255, 1) randomg <- sample (0:255, 1) randomb <- sample (0:255, 1) testxr[j,,] <- (randomr/255)*(testx[j,,]/255) testxg[j,,] <- (randomg/255)*(testx[j,,]/255) testxb[j,,] <- (randomb/255)*(testx[j,,]/255) j <- j+1 } #for training k <- 1 for(k in k:60000){ randomminus <- sample (0:255, 1) matrixminus <- matrix((randomminus/255):(randomminus/255), nrow = 28, ncol = 28) trainxr[k,,] <- trainxr[k,,] - matrixminus trainxr[k,,] <- abs(trainxr[k,,]) trainxg[k,,] <- trainxg[k,,] - matrixminus trainxg[k,,] <- abs(trainxg[k,,]) trainxb[k,,] <- trainxb[k,,] - matrixminus trainxb[k,,] <- abs(trainxb[k,,]) k <- k+1 } #for testing l <- 1 for(l in l:10000){ randomminus <- sample (0:255, 1) matrixminus <- matrix((randomminus/255):(randomminus/255), nrow = 28, ncol = 28) trainxr[l,,] <- trainxr[l,,] - matrixminus trainxr[l,,] <- abs(trainxr[l,,]) trainxg[l,,] <- trainxg[l,,] - matrixminus trainxg[l,,] <- abs(trainxg[l,,]) trainxb[l,,] <- trainxb[l,,] - matrixminus trainxb[l,,] <- abs(trainxb[l,,]) l <- l+1 } #tensor product stepone <- matrix(1:1, nrow=21952, ncol=28) steptwo <- matrix(1:1, nrow=28, ncol=28) trainxtensor_a <- trainxr %x% trainxg
I'm guessing you intending to collapse the 2nd and 3rd dimensions in that tensor product. Perhaps you want something like this: library(tensor) trainxtensor_a <- tensor(trainxr, trainxg, c(2,3), c(2,3)) Although you should try a smaller dataset to check if it is doing what you expect first: trainxtensor_a <- tensor(trainxr[1:5,,], trainxg[1:5,,], c(2,3), c(2,3))
I want to use a list in a for loop code. list[[i]] works fine, but why doesn't res <- vector("list",n) work?
I am using a for loop to loop through two different matrices. The code looks like this: x <- matrix(rnorm(1806),7,258) x2 <- matrix(rnorm(1032),4,258) samp_size <- 3 iter <- 1000 subs <- matrix(sample(1:nrow(x), samp_size*iter, replace=T), ncol=samp_size, byrow=T) subs2 <- matrix(sample(1:nrow(x2), samp_size*iter, replace=T), ncol=samp_size, byrow=T) for(j in 1:nrow(subs)){ ad <- x[subs[j,],] ad1 <- x2[subs2[j,],] rd <- rbind(ad,ad1) dis <- dist(rd, method="euclidian") #CV <- sd(unlist(rd), na.rm=TRUE)/mean(unlist(rd), na.rm=TRUE)*100 dis2 <- dis[dis!=0] list[[j]] <- mean(dis2) l <- unlist(list) l <- na.omit(l) sdis <- dist(ad, method="euclidian") sdis2 <- sdis[sdis!=0] res <- vector("list", 1000) res[[j]] <- mean(sdis2) l2 <- unlist(res) l2 <- na.omit(l2) } list[[j]] works just fine. The result of each iteration is added to it as a list element based on the value of j. However, res[[j]] ends up being full of NULL with only the last element containing a value. I am not sure what is happening here. I would like res[[j]] to behave the same way as list[[j]] does. Help much appreciated.
Maybe initialize res: # before the loop res <- rep(list(NA),nrow(subs)) # or in the loop res <- rep(list(NA),1000) # or res <- list(numeric(1000))
R apply function to a list of price data
in the code below I am grabbing data for three symbols, then I want to apply a simple function(which is a trading strategy) to this data. Ideally, I would then run statistics on those returns, such as those native to PerformanceAnalytics. library("quantmod") library("PerformanceAnalytics") options(scipen=999) PriceData <- new.env() Symbols <- c("SPY", "QQQ", "IWM") StartDt <- as.Date("2015-01-01") suppressWarnings(getSymbols(Symbols, src="yahoo", env=PriceData, from=StartDt)) x <- list() for (i in 1:length(Symbols)) { x[[i]] <- get(Symbols[i], pos=PriceData) } SYSTEM <- function(data){ ret<- Delt(Cl(x[[i]]),Op(x[[i]]),type = 'arithmetic') mavga <- SMA(Cl(x[[i]]), n=10) mavgb <- SMA(Cl(x[[i]]), n=20) sig <- ifelse(mavga>mavgb,1,0)+ifelse(mavgb>mavga,-1,0) sig <- lag(sig,1) sig[is.na(sig)]=0 strategyreturn <- sig * ret return(strategyreturn) } ######I'm doing something wrong here###### y <- lappy(x,SYSTEM) z <- do.call("cbind", y) PerformanceTable <- function(returns){ scalar <- 252 CS <- t(Return.cumulative(returns, geometric = FALSE)) SR <- t(SharpeRatio.annualized(returns, scale=scalar, geometric = FALSE)) SOR <- t(SortinoRatio(returns)) MDD <- t(maxDrawdown(returns)) WP <- colSums(returns > 0)/colSums(returns != 0) WP <- as.data.frame(WP) ASD = t(sd.annualized(returns, scale=scalar)) Stat <- cbind(CS,SR,SOR,MDD,ASD,WP) colnames(Stat) <- c("Profit","SharpeRatio", "Sortino","MaxDrawdown", "AnnStdDev", "WinPct") print("Performance Table") print(Stat) return (Stat) } Perf <- PerformanceTable(y)
You are almost there, just replace x[[i]] by data in your function: SYSTEM <- function(data){ ret<- Delt(Cl(data),Op(data),type = 'arithmetic') mavga <- SMA(Cl(data), n=10) mavgb <- SMA(Cl(data), n=20) sig <- ifelse(mavga>mavgb,1,0)+ifelse(mavgb>mavga,-1,0) sig <- lag(sig,1) sig[is.na(sig)]=0 strategyreturn <- sig * ret return(strategyreturn) } and add a l to lappy() y <- lapply(x,SYSTEM) z <- do.call("cbind", y)
improve execution time in a for loop (R)
Here is a reproducible example of a for loop. Since I want to do 3000 iterations and my matrix is way bigger than this reproducible example, the computer crushes. Any ideas on how can I do it? I have read that loops are not reccomended in R and instead the web proposes to use vectors and apply functions but I wasn't able to build my formula with that functions... The matrix: row.names <- c('A2003','B2010','C2011','D2010','E2001','F2005','F2009','G2003','G2007','H2004','H2010') sp1 <- c(4,83,1,2,4,3,1,5,7,2,4) sp2 <- c(5,0,2,3,10,5,0,2,4,3,1) sp3 <- c(7,2,4,8,7,2,4,83,1,5,7) sp4 <- c(0,2,4,2,4,12,1,5,7,2,4) Site <- c('A','B','C','D','E','F','F','G','G','H','H') Year <- c('2003','2010','2011','2010','2001','2005','2009','2003','2007','2004','2010') Obs <- c(1,1,1,4,9,6,8,2,5,2,3) ID <- c('A2003','B2010','C2011','D2010','E2001','F2005','F2009','G2003','G2007','H2004','H2010') df<- data.frame(row.names, sp1, sp2, sp3, sp4, Site, Year, Obs, ID) rownames(df) <- df[,1] df[,1] <- NULL df df.1 <- subset(df, Obs == 1) df.more <- subset(df, Obs >= 2) df.1 df.more The loop function: require (vegan) iterations <- 3000 out <- vector("list", iterations) for(i in 1:iterations){ rnd.more <- do.call(rbind, lapply(split(df.more, df.more$Site), function(df.more) df.more[sample(nrow(df.more), 1,replace=FALSE) , ]) ) rnd.df <- rbind(df.1,rnd.more) rnd.df.bc <- as.matrix(vegdist(rnd.df[1:4], method="bray")) rnd.df.bc[lower.tri(rnd.df.bc,diag=TRUE)] <- NA triang <- rnd.df.bc[!is.na(rnd.df.bc)] mean.bc <- mean(triang) out[[i]] <- list(rnd = rnd.df, bc = rnd.df.bc, ave = mean.bc) } Extraction of the results: all.rnd.df <- lapply(out, "[[", "rnd") capture.output(all.rnd.df,file="all.rnd.df.txt") all.rnd.df.bc <- lapply(out, "[[", "bc") capture.output(all.rnd.df.bc,file="all.rnd.df.bc.txt") all.triang <- lapply(out, "[[", "ave") capture.output(all.triang,file="all.triang.txt")
precompute the sample indices: idx <- lapply(1:iterations, function(x) { tapply(1:nrow(df.more), as.character(df.more$Site), function(y) { if(length(y) == 1) y else sample(y, 1) }) }) idx <- lapply(idx, function(ids) c(1:nrow(df.1), ids + nrow(df.1))) precompute a placeholder data.frame to index rnd.df <- rbind(df.1, df.more) now you just index the precomputed objects without having to calculate them every loop: iterations <- 3000 out <- vector("list", iterations) for(i in 1:iterations){ rnd.df.bc <- as.matrix(vegdist(rnd.df[idx[[i]] ,1:4], method="bray")) rnd.df.bc[lower.tri(rnd.df.bc,diag=TRUE)] <- NA triang <- rnd.df.bc[!is.na(rnd.df.bc)] mean.bc <- mean(triang) out[[i]] <- list(rnd = rnd.df, bc = rnd.df.bc, ave = mean.bc) } Benchmarks: f1 = my method f2 = OPs code > microbenchmark(f1(), f2(), times=5L) Unit: seconds expr min lq mean median uq max neval f1() 2.21069 4.877017 4.666875 5.27416 5.444411 5.528096 5 f2() 13.54813 13.554965 19.500247 14.51089 27.074520 28.812732 5 Make it parallel: cl <- makeCluster(3) registerDoSNOW(cl) out <- foreach(i = 1:iterations, .packages=c('vegan')) %do% { rnd.df.bc <- as.matrix(vegdist(rnd.df[idx[[i]] ,1:4], method="bray")) rnd.df.bc[lower.tri(rnd.df.bc,diag=TRUE)] <- NA triang <- rnd.df.bc[!is.na(rnd.df.bc)] mean.bc <- mean(triang) list(rnd = rnd.df, bc = rnd.df.bc, ave = mean.bc) } stopCluster(cl)
Not being familiar with the vegan package, I'm a little limited in what advice I can give you. For the most part, you've constructed your for loop well and as you can see below, you don't gain much by converting it to a function and running it through lapply. I think your best bet is to parallelize your code. In the example below, if you convert your for loop to a function and use parLapply, you can shave a couple seconds off if you include the cluster build time. If you exclude the cluster build time, it's about 5 times faster on my 7 cores. Changes in computational time will vary by the number of cores you can run on. But I think that may be your best option for now. library(parallel) library(vegan) row.names <- c('A2003','B2010','C2011','D2010','E2001','F2005','F2009','G2003','G2007','H2004','H2010') sp1 <- c(4,83,1,2,4,3,1,5,7,2,4) sp2 <- c(5,0,2,3,10,5,0,2,4,3,1) sp3 <- c(7,2,4,8,7,2,4,83,1,5,7) sp4 <- c(0,2,4,2,4,12,1,5,7,2,4) Site <- c('A','B','C','D','E','F','F','G','G','H','H') Year <- c('2003','2010','2011','2010','2001','2005','2009','2003','2007','2004','2010') Obs <- c(1,1,1,4,9,6,8,2,5,2,3) ID <- c('A2003','B2010','C2011','D2010','E2001','F2005','F2009','G2003','G2007','H2004','H2010') df<- data.frame(row.names, sp1, sp2, sp3, sp4, Site, Year, Obs, ID) rownames(df) <- df[,1] df[,1] <- NULL df df.1 <- subset(df, Obs == 1) df.more <- subset(df, Obs >= 2) df.1 df.more more.fun <- function(df.more, df.1) { rnd.more <- do.call(rbind, lapply(split(df.more, df.more$Site), function(df.more) df.more[sample(nrow(df.more), 1,replace=FALSE) , ]) ) rnd.df <- rbind(df.1,rnd.more) rnd.df.bc <- as.matrix(vegdist(rnd.df[1:4], method="bray")) rnd.df.bc[lower.tri(rnd.df.bc,diag=TRUE)] <- NA triang <- rnd.df.bc[!is.na(rnd.df.bc)] mean.bc <- mean(triang) list(rnd = rnd.df, bc = rnd.df.bc, ave = mean.bc) } start.orig <- Sys.time() set.seed(pi) iterations <- 3000 out <- vector("list", iterations) for(i in 1:iterations){ rnd.more <- do.call(rbind, lapply(split(df.more, df.more$Site), function(df.more) df.more[sample(nrow(df.more), 1,replace=FALSE) , ]) ) rnd.df <- rbind(df.1,rnd.more) rnd.df.bc <- as.matrix(vegdist(rnd.df[1:4], method="bray")) rnd.df.bc[lower.tri(rnd.df.bc,diag=TRUE)] <- NA triang <- rnd.df.bc[!is.na(rnd.df.bc)] mean.bc <- mean(triang) out[[i]] <- list(rnd = rnd.df, bc = rnd.df.bc, ave = mean.bc) } end.orig <- Sys.time() start.apply <- Sys.time() fn = out <- lapply(1:3000, function(i) more.fun(df.more, df.1)) end.apply <- Sys.time() start.parallel <- Sys.time() cl <- makeCluster(7) clusterEvalQ(cl, library(vegan)) clusterExport(cl, c("df.more", "df.1", "more.fun")) start.parallel.apply <- Sys.time() out <- parLapply(cl, 1:3000, function(i) more.fun(df.more, df.1)) end.parallel <- Sys.time() #* Compare times end.orig - start.orig end.apply - start.apply end.parallel - start.parallel end.parallel - start.parallel.apply (the time comparisons here are pretty crude)