Related
Title's a little rough, open to suggestions to improve.
I'm trying to calculate time-average covariances for a 500 length vector.
This is the equation we're using
The result I'm hoping for is a vector with an entry for k from 0 to 500 (0 would just be the variance of the whole set).
I've started with something like this, but I know I'll need to reference the gap (i) in the first mean comparison as well:
x <- rnorm(500)
xMean <-mean(x)
i <- seq(1, 500)
dfGam <- data.frame(i)
dfGam$gamma <- (1/(500-dfGam$i))*(sum((x-xMean)*(x[-dfGam$i]-xMean)))
Is it possible to do this using vector math or will I need to use some sort of for loop?
Here's the for loop that I've come up with for the solution:
gamma_func <- function(input_vec) {
output_vec <- c()
input_mean <- mean(input_vec)
iter <- seq(1, length(input_vec)-1)
for(val in iter){
iter2 <- seq((val+1), length(input_vec))
gamma_sum <- 0
for(val2 in iter2){
gamma_sum <- gamma_sum + (input_vec[val2]-input_mean)*(input_vec[val2-val]-input_mean)
}
output_vec[val] <- (1/length(iter2))*gamma_sum
}
return(output_vec)
}
Thanks
Using data.table, mostly for the shift function to make x_{t - k}, you can do this:
library(data.table)
gammabar <- function(k, x){
xbar <- mean(x)
n <- length(x)
df <- data.table(xt = x, xtk = shift(x, k))[!is.na(xtk)]
df[, sum((xt - xbar)*(xtk - xbar))/n]
}
gammabar(k = 10, x)
# [1] -0.1553118
The filter [!is.na(xtk)] starts the sum at t = k + 1, because xtk will be NA for the first k indices due to being shifted by k.
Reproducible x
x <- c(0.376972124936433, 0.301548373935665, -1.0980231706536, -1.13040590360378,
-2.79653431987176, 0.720573498411587, 0.93912102300901, -0.229377746707471,
1.75913134696347, 0.117366786802848, -0.853122822287008, 0.909259181618213,
1.19637295955276, -0.371583903741348, -0.123260233287436, 1.80004311672545,
1.70399587729432, -3.03876460529759, -2.28897494991878, 0.0583034949929225,
2.17436525195634, 1.09818265352131, 0.318220322390854, -0.0731475581637693,
0.834268741278827, 0.198750636733429, 1.29784138432631, 0.936718306241348,
-0.147433193833294, 0.110431994640128, -0.812504663900505, -0.743702167768748,
1.09534507180741, 2.43537370755095, 0.38811846676708, 0.290627670295127,
-0.285598287083935, 0.0760147178373681, -0.560298603759627, 0.447188372143361,
0.908501134499943, -0.505059597708343, -0.301004012157305, -0.726035976548133,
-1.18007702699501, 0.253074712637114, -0.370711296884049, 0.0221795637601637,
0.660044122429767, 0.48879363533552)
I am creating an R function that calculates a bootstrapped bias corrected and accelerated interval, (not using any pre-installed packages) My code seems to be working but am struggling actually writing the code for the lower and upper limits of the interval. Any suggestions would be helpful.
BCa <- function(stat,X,k,level=0.95,...){
if(!is.numeric(k)||k<=0){
stop("The number of bootstrap resamples 'k' must be a numeric value greater than 0")
}
t.star <- stat(X,...)
t.k <- rep(NA,k)
for(i in 1:k){
Xi <- sample(X,replace=TRUE)
t.k[i] <- stat(Xi,...)
}
z0 <- qnorm(mean(t.k<t.star))
n <- length(X)
t.minus.j <- rep(NA,n)
for(j in 1:n){
Xj <- X[-j]
t.minus.j[j]<- stat(Xj,...)
}
t.bar.minus <- mean(t.minus.j)
t.diff <- t.bar.minus - t.minus.j
a <- ((sum(t.diff^3))/(6*(t.diff^2)^3/2))
alpha <- 1-level
tsort <- sort(t.k, decreasing = FALSE)
L <- pnorm(z0 + ((z0+qnorm(alpha/2))/((1-a)*z0+qnorm(alpha/2))))
U <- qnorm(z0 + ((z0+qnorm(alpha/2))/((1-a)*z0+qnorm(alpha/2))))
if(!is.integer(L)){
L <- floor(L*(k+1))
}
if(!is.integer(U)){
U <- ceiling(U*(k+1))
}
lower.limit <- tsort[L]
upper.limit <- tsort[U+1]
return(list(t.star=t.star,ci=c(lower.limit,upper.limit)))
}
I have written code in order to randomly add points to a numeric variable to increase the weighted mean score by 10% storing the new scores in variable S1.
This is done by calculating the total number of points that need to be added to increase the mean by 10%. Next step is to randomly select until the weighted sum of responses is equal to the target - but not adding points where the score is already 10 so as not to pass the maximum value on the scale. The final stage is to select whether the sum that is just above or just below the target is closest and select this sample to add points to.
The code works ok but doesn't look efficient. I am an R novice and have read that loops should be avoided as much as possible, but cannot work out an alternative. Is it possible to do what I am attempting, but more efficiently?
#Create random data
library(stats)
set.seed(21821)
ncust <- 1000
cust.df <- data.frame(cust.id=as.factor(c(1:ncust)))
wtvar <- rnorm(ncust, mean=1, sd=0.2)
V1 <- floor(rnorm(ncust, mean=7.5, sd=3))
V1[V1 > 10] <- 10
V1[V1 < 1] <- 1
cust.df$V1 <- V1
cust.df$wtvar <- wtvar
#Function to determine sample required
random.sample <- function(x) {
(pctadd <- (sum(cust.df$V1*cust.df$wtvar)*0.1)/sum(cust.df$V1[!cust.df$V1==10]*cust.df$wtvar[!cust.df$V1==10])) #percentage of resps (weighted) who need points added to make 10% increase
(numadd <- (sum(cust.df$V1*cust.df$wtvar)*0.1)) #sum of weights needed to make 10% increase
wgttot <- vector(mode="numeric", length=0)
idtot <- vector(mode="numeric", length=0)
id.ref <- cust.df$cust.id[!cust.df$V1==10]
repeat {
preidtot <- idtot
prewgttot <- wgttot
(t.id <- as.numeric(sample(id.ref, 1)))
(t.wgt <- cust.df$wtvar[cust.df$cust.id==t.id])
id.ref <- id.ref[!id.ref==t.id]
wgttot <- c(wgttot,t.wgt)
idtot <- c(idtot, t.id)
if (sum(wgttot) > numadd) break
}
prediff <- numadd - sum(prewgttot)
postdiff <- sum(wgttot) - numadd
if (prediff < postdiff) {
x <- preidtot
} else {
x <- idtot
}
return(x)
}
tempids <- random.sample()
#Apply sample rule
cust.df$S1 = ifelse(cust.df$cust.id %in% tempids, cust.df$V1 + 1, cust.df$V1)
#Check ~10% increase achieved
weighted.mean(cust.df$V1,cust.df$wtvar)
weighted.mean(cust.df$S1,cust.df$wtvar)
The random.sample is your first version, random.sample1 is the without-loop version, random.sample1 do similar thing as random.sample, but their results are different. You can check the code to see how the result of random.sample1 is used. And due to fact that from your definition, the samples required are not unique, so the results of weighted sum are also different, but all approximately increase by 10%.
#Create random data
library(stats)
set.seed(21821)
ncust <- 1000
cust.df <- data.frame(cust.id=as.factor(c(1:ncust)))
wtvar <- rnorm(ncust, mean=1, sd=0.2)
V1 <- floor(rnorm(ncust, mean=7.5, sd=3))
V1[V1 > 10] <- 10
V1[V1 < 1] <- 1
cust.df$V1 <- V1
cust.df$wtvar <- wtvar
#Function to determine sample required
random.sample <- function() {
(pctadd <- (sum(cust.df$V1*cust.df$wtvar)*0.1)/sum(cust.df$V1[!cust.df$V1==10]*cust.df$wtvar[!cust.df$V1==10])) #percentage of resps (weighted) who need points added to make 10% increase
(numadd <- (sum(cust.df$V1*cust.df$wtvar)*0.1)) #sum of weights needed to make 10% increase
wgttot <- vector(mode="numeric", length=0)
idtot <- vector(mode="numeric", length=0)
id.ref <- cust.df$cust.id[!cust.df$V1==10]
repeat {
preidtot <- idtot
prewgttot <- wgttot
(t.id <- as.numeric(sample(id.ref, 1)))
(t.wgt <- cust.df$wtvar[cust.df$cust.id==t.id])
id.ref <- id.ref[!id.ref==t.id]
wgttot <- c(wgttot,t.wgt)
idtot <- c(idtot, t.id)
if (sum(wgttot) > numadd) break
}
prediff <- numadd - sum(prewgttot)
postdiff <- sum(wgttot) - numadd
if (prediff < postdiff) {
x <- preidtot
} else {
x <- idtot
}
return(x)
}
random.sample1 <- function() {
numadd <- sum(cust.df$V1 * cust.df$wtvar) * 0.1 #sum of weights needed to make 10% increase
id.ref <- which(cust.df$V1 != 10)
pos <- sample(id.ref, length(id.ref))
t.wgt <- cust.df$wtvar[pos]
sumwgttot <- cumsum(t.wgt)
return(pos[1:which.min(abs(sumwgttot - numadd))])
}
system.time(tempids <- random.sample())
## On my computer, it uses about 0.200s to finish the calculation.
system.time(tempids1 <- random.sample1())
## On my computer, the without loop version uses about 0.000s.
#Apply sample rule
cust.df$S1 = ifelse(cust.df$cust.id %in% tempids, cust.df$V1 + 1, cust.df$V1)
## Note that the usage of tempids1 is different, this usage is more
## effective than the original one.
cust.df$S2 = cust.df$V1
cust.df$S2[tempids1] = cust.df$V1[tempids1] + 1
#Check ~10% increase achieved
weighted.mean(cust.df$V1,cust.df$wtvar)
weighted.mean(cust.df$S1,cust.df$wtvar)
weighted.mean(cust.df$S2,cust.df$wtvar)
I want set up a model for interest rate in binomial tree. The interest rate is path dependent. I want return interest rate (discount factor and payoff) at every step in all scenarios(2^N). The reason I want to return every single interest rate is that I want use the interest rate is compute discount factor. I know how to do this in a complex way. Here I want to use a double loop (or something simpler) to get the results.
w is for "0" or "1" dummy variable matrix representing all scenarios.
r is interest rate. if there is a head(1), then r1=r0+u=r0+0.005; if there is a tail(0), then r1=r0-d.
D is discount factor. D1=1/(1+r0), D2=D1/(1+r1)...
P is payoff.
In this case, period N is 10. therefore, I can compute step by step. However,if N gets larger and larger, I cannot use my method. I want a simple way to compute this. Thank you.
#Real Price
N <- 10
r0 <- 0.06
K <- 0.05
u <- 0.005
d <- 0.004
q <- 0.5
w <- expand.grid(rep(list(0:1),N))
r <- D <- P <- matrix(0,0,nrow=2^N,ncol=N)
for(i in 1:dim(w)[1])
{
r[i,1] <- r0 + u*w[i,1] - d*(1-w[i,1])
r[i,2] <- r[i,1] + u*w[i,2] - d*(1-w[i,2])
r[i,3] <- r[i,2]+u*w[i,3]-d*(1-w[i,3])
r[i,4] <- r[i,3]+u*w[i,4]-d*(1-w[i,4])
r[i,5] <- r[i,4]+u*w[i,5]-d*(1-w[i,5])
r[i,6] <- r[i,5]+u*w[i,6]-d*(1-w[i,6])
r[i,7] <- r[i,6]+u*w[i,7]-d*(1-w[i,7])
r[i,8] <- r[i,7]+u*w[i,8]-d*(1-w[i,8])
r[i,9] <- r[i,8]+u*w[i,9]-d*(1-w[i,9])
r[i,10] <- r[i,9]*+u*w[i,10]-d*(1-w[i,10])
D[i,1] <- 1/(1+r0)
D[i,2] <- D[i,1]/(1+r[i,1])
D[i,3] <- D[i,2]/(1+r[i,2])
D[i,4] <- D[i,3]/(1+r[i,3])
D[i,5] <- D[i,4]/(1+r[i,4])
D[i,6] <- D[i,5]/(1+r[i,5])
D[i,7] <- D[i,6]/(1+r[i,6])
D[i,8] <- D[i,7]/(1+r[i,7])
D[i,9] <- D[i,8]/(1+r[i,8])
D[i,10] <- D[i,9]/(1+r[i,9])
P[i,1] <- D[i,1]*pmax(K-r0,0)*(0.5^N)
P[i,2] <- D[i,2]*pmax(K-r[i,1],0)*(0.5^N)
P[i,3] <- D[i,3]*pmax(K-r[i,2],0)*(0.5^N)
P[i,4] <- D[i,4]*pmax(K-r[i,3],0)*(0.5^N)
P[i,5] <- D[i,5]*pmax(K-r[i,4],0)*(0.5^N)
P[i,6] <- D[i,6]*pmax(K-r[i,5],0)*(0.5^N)
P[i,7] <- D[i,7]*pmax(K-r[i,6],0)*(0.5^N)
P[i,8] <- D[i,8]*pmax(K-r[i,7],0)*(0.5^N)
P[i,9] <- D[i,9]*pmax(K-r[i,8],0)*(0.5^N)
P[i,10] <- D[i,10]*pmax(K-r[i,9],0)*(0.5^N)
}
true.price <- sum(P)
#> true.price
# > true.price
# [1] 0.00292045
You can just use a nested loop, looping over 2:(ncol(w)) within the i loop:
for(i in 1:nrow(w)) {
r[i, 1] <- r0 + u*w[i, 1] - d*(1-w[i, 1])
D[i, 1] <- 1/(1+r0)
P[i, 1] <- D[i, 1]*pmax(K-r0, 0)*(0.5^N)
for (j in 2:(ncol(w))) {
r[i,j] <- r[i, j-1] + u*w[i, j] - d*(1-w[i, j])
D[i,j] <- D[i, j-1]/(1+r[i, j-1])
P[i,j] <- D[i, j]*pmax(K-r[i, j-1], 0)*(0.5^N)
}
}
true.price <- sum(P)
The code in matlab is created to make a probability of ecosystem functioning out of loss of species in an ecosystem. Now, this code have to be translated into R. But I have problem to translate a matrix manipulation made in matlab.
In Matlab, this is the code that I have tried to translate into R code:
for j=1:N+1
multi_matrix4(:,j)=matrix(:,1);
end
In R, I have put this code within the for-loop:
+ multi.matrix4 <- matrix[,1,drop=FALSE]
+ multi.matrix4 <- multi.matrix4[,j,drop=FALSE]
+ class(multi.matrix4)
This is the message from R, that comes beneath the for-loop:
Error: subscript out of bounds
My question is:
How to use R for this kind of manipulation of matrices??????
The matlab-code without the last graphs is:
clear all
% No of permutations
sim=1000;
% Total No of ecosystem functions
N=3;
%Total dimensions
J=3;
% Total No of species in pool
total_species=4;
% No of species drawn from pool
species=4;
multi_matrix=zeros(total_species,N);
% "Threshold"
t=.5;
result=zeros(sim,J);
for i=1:sim
% %Uniformly increasing trait values
for j=1:N
matrix=rand(total_species,2);
matrix(:,1)=linspace(0,1,total_species);
matrix=sortrows(matrix,2);
multi_matrix4(:,j)=matrix(:,1);
end
%Complete covariance
matrix=rand(total_species,2);
matrix(:,1)=linspace(0,1,total_species);
matrix=sortrows(matrix,2);
for j=1:N+1
multi_matrix4(:,j)=matrix(:,1);
end
% Excess of high trait values
for j=1:N
matrix=rand(total_species,2);
X=1:total_species;X=X';
matrix(:,1)=1-exp(-0.02*X.^2);
matrix=sortrows(matrix,2);
multi_matrix4(:,j)=matrix(:,1);
end
% Deficiency of high trait values
for j=1:N
matrix=rand(total_species,2);
X=1:total_species;X=X';
% matrix(:,1)=exp((X./22.6).^3)-1;
matrix(:,1)=exp((X./13.55).^3)-1;
matrix=sortrows(matrix,2);
multi_matrix4(:,j)=matrix(:,1);
end
% Reading empirical data
warning off
% [NUMERIC,txt]=xlsread('Plant_6.xls','Sheet1');
Exp07_2 = [ 0 0.72 0.70 ; 1 1 0 ; 0.62 0 1 ; 0.36 0.69 0.61]
multi_matrix(1:total_species,1:N)=Exp07_2;
random=rand(1,N);
multi_matrix(total_species+1,1:N)=random;
multi_matrix2=sortrows(multi_matrix',total_species+1);
multi_matrix3=multi_matrix2';
multi_matrix4=multi_matrix3(1:total_species,:);
warning on
% adding a sorting column
random2=rand(total_species,1);
multi_matrix4(:,N+1)=random2;
sort_multi_matrix=sortrows(multi_matrix4,N+1);
% loop adding one function at a time
for j=1:J
loss_matrix=sort_multi_matrix(1:species,1:j);
max_value=loss_matrix>=t;
B=any(max_value',2);
C=all(B);
result(i,j)=sum(C);
end
end
% reporting
res=mean(result);
res'
The R-code looks like this:
rm()
#No of permutation
sims <- 1000;
#Total number of ecosystem functions
N <- 3
#Total dimensions
J <- 3
#Total number of species in pool
total.species <- 4
#No of species drawn from pool
species <- 4
multi.matrix <- matrix(0, nrow=total.species, ncol=N)
class(multi.matrix)
# $Threshold$
t <- .5;
# The results are to be put in a matrix
result <- matrix(0, nrow=sims, ncol=J)
for (i in 1 : sims)
{
#Uniformly increasing trait values
for (j in 1 : N)
{
matrix <- matrix(runif(total.species*2),total.species)
class(matrix)
matrix[,1] <- seq(0,1, len=total.species) # test 2
class(matrix)
matrix <- matrix[order(matrix( ,2)),]
class(matrix)
# multi.matrix4[,j,drop=FALSE] = matrix[,1,drop=FALSE]
multi.matrix4 <- matrix[,1,drop=FALSE]
multi.matrix4 <- multi.matrix4[,j,drop=FALSE]
class(multi.matrix4)
}
# Complete covariance
matrix <- matrix(runif(total.species*2),total.species)
class(matrix)
matrix[,1] <- seq(0, 1, len=total.species)
class(matrix)
matrix <- matrix[order(matrix( ,2)),]
class(matrix)
for (j in 1 : N + 1)
{multi.matrix4 <- matrix[,1,drop=FALSE]
multi.matrix4 <- multi.matrix4[,j,drop=FALSE]
class(multi.matrix4)
}
# Excess of high trait values
for (j in 1 : N)
{matrix <- matrix(runif(total.species*2),total.species)
class(matrix)
X <- 1 : total.species
X <- t(X)
matrix[,1] <- c(1 - exp(-0.02 %*% X^2)) # Hie... p. 8
matrix <- matrix[order(matrix( ,2)),]
# multi.matrix4[,j,drop=FALSE] <- matrix[,1,drop=FALSE]
# multi.matrix4[,j,drop=FALSE] <- matrix[,1]
multi.matrix4 <- matrix[,1,drop=FALSE]
multi.matrix4 <- multi.matrix4[,j,drop=FALSE]
class(multi.matrix4)
}
# Deficiency of high trait values
for (j in 1 : N)
{matrix <- matrix(runif(total.species*2),total.species)
class(matrix)
X <- 1 : total.species
X <- t(X)
# matrix[1:4,1] <- c(exp((X/22.6)^3)-1)
matrix[1:4,1] <- c(exp((X/13.55)^3)-1)
class(matrix)
matrix <- matrix[order(matrix( ,2))]
class(matrix)
# multi.matrix4[,j,drop=FALSE] <- matrix[,1,drop=FALSE]
# multi.matrix4[,j,drop=FALSE] <- matrix[,1]
# multi.matrix4[,j] <- matrix[,1,drop=FALSE]
# class(multi.matrix4)
multi.matrix4 <- matrix[,1,drop=FALSE]
multi.matrix4 <- multi.matrix4[,j,drop=FALSE]
class(multi.matrix4)
}
# Reading empirical data
Exp_07_2 <- file(description = "Exp_07_2", open = "r", blocking = TRUE, encoding = getOption("encoding"), raw = FALSE)
Exp_07_2 <- matrix(scan(Exp_07_2),nrow=4,byrow=TRUE)
read.matrix <- function(Exp_07_2){
as.matrix(read.table(Exp_07_2))
}
Exp_07_2
class(Exp_07_2)
multi.matrix <- matrix(c(Exp_07_2),ncol=3)
class(multi.matrix)
multi.matrix <- multi.matrix(1:total.species,1:N)
class(multi.matrix)
random <- runif(N)
multi.matrix2 <- t(multi.matrix)[order(t(multi.matrix)[,1], t(multi.matrix)[,2], t(multi.matrix)[,3], t(multi.matrix)[,4]),]
class(multi.matrix2)
multi.matrix3 <- t(multi.matrix2)
class(multi.matrix3)
multi.matrix4 <- multi.matrix3[1:total.species,,drop=FALSE]
class(multi.matrix4)
# Adding a sorting column
random2 <- runif(total.species,1)
random2 <- multi.matrix4[,N+1,drop=FALSE]
sort.multi.matrix <- multi.matrix4(order(multi.matrix4[,1], multi.matrix4[,2], multi.matrix4[,3],multi.matrix4[,4]),N+1,drop=FALSE)
# loop adding one function at a time
for (j in 1 : J)
{loss.matrix <- sort.multi.matrix[nrow=species,ncol=j,drop=FALSE]
class(loss.matrix)
max.value <- loss.matrix >= t
c(B) <- any(t(max.value),2)
c(C) <- all(c(B))
result(i,j) <- c(sum(C))
}
}
# Reporting
res <- mean(result)
res
t(res)
Though I don't have Matlab and R at hand i suspect this is what is causing the problem:
In R you try to assign to a location in the matrix that does not exist, result: it fails
In Matlab you tried to assign to a location in the matrix that did not exist, result: it forgives your strange choice, expands your matrix and succeeds.
Assuming this is the problem, the solution is simple:
When creating the matrix in R, make sure that it is big enough to
contain all the things you want to add to it later.
This is called initalization, and is in most cases best practice. Even in Matlab it is generally recommended to initialize your variables properly in advance where possible rather then let them grow as you go.