I have two subarrays and would like to sample one of them at random. The subarrays consist of character labels from a larger pool of labels as follows:
K <- as.character(1:10)
Suppose I call the arrays K1 and K2 and assign 10 labels between them as follows:
K1 <- 1:8
K2 <- 9:10
I then use
get(paste0("K", i))
to retrieve the labels that were sampled using a 'for' loop.
The issue I am having is this: if K1 is sampled, get(paste0("K", i)) returns labels from K2 (which contain labels 9:10) instead of returning K1 as needed.
In other words, I believe get(past0("K", i)) may be overwriting results.
Any thoughts on why this may occur and how to go about circumventing the issue?
Below is a more extensive example:
K <- 2
N <- 100
Hstar <- 10
perms <- 10000
probs <- rep(1/Hstar, Hstar)
specs <- 1:N
pop <- array(dim = c(c(perms, N), K))
haps <- as.character(1:Hstar)
K1 <- 1:8 # subarray 1
K2 <- 9:10 # subarray 2
for(j in 1:perms){
for(i in 1:K){
if(i == 1){
pop[j, specs, i] <- sample(haps, size = N, replace = TRUE, prob = probs)
}
else{
pop[j ,, 1] <- sample(haps[K1], size = N, replace = TRUE, prob = probs[K1])
pop[j ,, 2] <- sample(haps[K2], size = N, replace = TRUE, prob = probs[K1])
}
}
}
HAC.mat <- array(dim = c(c(perms, N), K))
for(k in specs){
for(j in 1:perms){
for(i in 1:K){
ind.index <- sample(specs, size = k, replace = FALSE)
hap.plot <- pop[sample(1:nrow(pop), size = 1, replace = TRUE), ind.index, sample(i, size = 1, replace = TRUE)]
HAC.mat[j, k, i] <- length(unique(hap.plot))
}
}
}
means <- apply(HAC.mat, MARGIN = 2, mean)
lower <- apply(HAC.mat, MARGIN = 2, function(x) quantile(x, 0.025))
upper <- apply(HAC.mat, MARGIN = 2, function(x) quantile(x, 0.975))
d <- data.frame(specs, means, lower, upper)
par(mfrow = c(1, 2))
if(i == 1){
plot(specs, means, type = "n", xlab = "Specimens sampled", ylab = "Unique haplotypes", ylim = c(1, Hstar))
polygon(x = c(specs, rev(specs)), y = c(lower, rev(upper)), col = "gray")
lines(specs, means, lwd = 2)
HAC.bar <- barplot(N*probs, xlab = "Unique haplotypes", ylab = "Specimens sampled", names.arg = 1:Hstar)
}
else if(i > 1){
plot(specs, means, type = "n", xlab = "Specimens sampled", ylab = "Unique haplotypes", ylim = c(1, Hstar))
polygon(x = c(specs, rev(specs)), y = c(lower, rev(upper)), col = "gray")
lines(specs, means, lwd = 2)
HAC.bar <- barplot(N*probs[get(paste0("K", i))], xlab = "Unique haplotypes", ylab = "Specimens sampled", names.arg = get(paste0("K", i))) ## The issue may lie here
}
Any advice on what may be going on here is greatly appreciated.
Related
It wont print out the gray squares that are meant to represent the people susceptible and red squares that are meant to represent those infected
initial1=function(m,l){
x = matrix(0, nrow = m, ncol = l)
new_infections=x
close_contact=sample(c("S","I"), size=m*l, prob=c(0.9,0.1), replace=TRUE)
close_contact[TRUE] = 1
set.seed(12)
close_contact = sample(c(T, F), size = m*l, replace = TRUE)
close_contact = matrix(close_contact, nrow = m, ncol = l)
infected = x[-50, ] == 1
new_infections[-1, ][infected & close_contact] = 1
}
plot_infection= function(initial1){
sir_colors=c("White", "Grey", "Red")
x1=matrix(initial1, nrow = m)
x1
z=matrix(0, nrow=m, ncol=l)
z[x1=="I"]=1
par(mfrow = c(1,2))
image(z, col = sir_colors)
x2=matrix(initial2, nrow=m)
z1=matrix(0, nrow=m, ncol=l)
z1[x2=="S"]=1
image(z1, col=sir_colors)
}
plot_infection(initial1(m,l))
The following functions are much simpler and plot a graph according to the problem description.
initial1 <- function(m, l){
new_infections <- sample(c(S = 2L, I = 3L), size = m*l, prob = c(0.9, 0.1), replace = TRUE)
new_infections <- matrix(new_infections, nrow = m, ncol = l)
close_contact <- sample(c(TRUE, FALSE), size = m*l, replace = TRUE)
close_contact <- matrix(close_contact, nrow = m, ncol = l)
new_infections[close_contact] <- 1L
new_infections
}
plot_infection <- function(x){
sir_colors <- c("white", "grey", "red")
image(x, col = sir_colors)
}
m <- 50
l <- 50
set.seed(12)
init <- initial1(m, l)
plot_infection(init)
I have created the following function that I need to extract some information from. However, R is giving me some issues.
HAC.sim <- function(K = 1, N, Hstar, probs, perms = 10000){
specs <- 1:N
### Set up a container to hold the identity of each individual from each permutation
pop <- array(dim = c(c(perms, N), K))
### Create an ID for each haplotype
haps <- as.character(1:Hstar)
### Assign probabilities of occurrence to each haplotype, ensure they sum to 1
### This is where we assume we "know" the distribution of haplotypes
### Here, I have assumed they all occur with equal frequency, but you can change this to assume some dominant ones and some rare ones, whatever you want
probs <- rep(1/Hstar, Hstar)
# probs <- c(220/N, rep(3/N, 2), rep(2/N, 2), rep(1/N, 10))
### Generate permutations, we assume each permutation has N individuals, and we sample those individuals' haplotypes from our probabilities
# If K > 1, haplotypes are partitioned into equally-sized subpopulations/demes
# Can change number of haplotypes in each subpopulation and re-run simulation
# For each additional, K, add new Ki and new pop[j ,, i] in loop
for(j in 1:perms){
for(i in 1:K){
if(i == 1){
pop[j, specs, i] <- sample(haps, size = N, replace = TRUE, prob = probs)
}
else{
pop[j ,, 1] <- sample(haps[K1], size = N, replace = TRUE, prob = probs[K1])
pop[j ,, 2] <- sample(haps[K2], size = N, replace = TRUE, prob = probs[K2])
}
}
}
### Make a matrix to hold the 1:N individuals from each permutation
HAC.mat <- array(dim = c(c(perms, N), K))
for(k in specs){
for(j in 1:perms){
for(i in 1:K){
ind.index <- sample(specs, size = k, replace = FALSE) ## which individuals will we sample
hap.plot <- pop[sample(1:nrow(pop), size = 1, replace = TRUE), ind.index, sample(1:K, size = 1, replace = TRUE)] ## pull those individuals from a permutation
HAC.mat[j, k, i] <- length(unique(hap.plot)) ## how many haplotypes did we get for a given sampling intensity (k) from each ### permutation (j)
}
}
}
### Calculate the mean and CI for number of haplotypes at each sampling intensity (j)
means <- apply(HAC.mat, MARGIN = 2, mean)
lower <- apply(HAC.mat, MARGIN = 2, function(x) quantile(x, 0.025))
upper <- apply(HAC.mat, MARGIN = 2, function(x) quantile(x, 0.975))
### Plot the curve and frequency barplot
par(mfrow = c(1, 2))
for(i in 1:K){
if(i == 1){
plot(specs, means, type = "n", xlab = "Specimens sampled", ylab = "Unique haplotypes", ylim = c(1, Hstar))
polygon(x = c(specs, rev(specs)), y = c(lower, rev(upper)), col = "gray")
lines(specs, means, lwd = 2)
HAC.bar <- barplot(N*probs, xlab = "Unique haplotypes", ylab = "Specimens sampled", names.arg = 1:Hstar)
}
else{
plot(specs, means, type = "n", xlab = "Specimens sampled", ylab = "Unique haplotypes", ylim = c(1, max(HAC.mat)))
polygon(x = c(specs, rev(specs)), y = c(lower, rev(upper)), col = "gray")
lines(specs, means, lwd = 2)
HAC.bar <- barplot(N*probs[get(paste0("K", i))], xlab = "Unique haplotypes", ylab = "Specimens sampled", names.arg = get(paste0("K",i)))
}
}
d <- data.frame(specs, means)
## Measures of Closeness ##
list(c(cat("\n Number of haplotypes sampled: " , max(means))
cat("\n Number of haplotypes not sampled: " , Hstar - max(means))
cat("\n Proportion of haplotypes sampled: " , max(means)/Hstar)
cat("\n Proportion of haplotypes not sampled: " , (Hstar - max(means))/Hstar)), d)
}
HAC.sim(K = 1, N = 100, Hstar = 10)
I would like to be able to easily extract the data frame 'd' from the above function.
I need to pass d to a regression model object.
The output gives me almost what I need except there is a [[1]] appended to the end of the value of (Hstar - max(means))/Hstar), instead of on a new line. Plus, [[1]] is a NULL list object. Why am I getting this NULL list as an output, and how can I fix the problem?
Any help is appreciated.
I have tried to plot a simple graph using the following code;
plot(y = Et, x = t, type = "l", col = 1,
xlab = "Time", ylab= "Equity ($)",
main = "Figure 1–3: Randomly Generated Equity Curves")
grid()
abline(h = 10000)
lines(y = Et2, x = t, col = 2)
lines(y = Eb, x = t, col = 8)
Then I try to add a legend to the plot
legend(x = "topleft", col = c(1,2,8), lwd = 2, legend = c("Curve 1",
"Curve 2",
"SPY"))
And the result I obtain is the following;
Where am I going wrong?
EDIT 1: I restarted R studio and re-ran the plots and got the following result.
EDIT 2: Reproducible code:
library(quantmod)
options("getSymbols.warning4.0" = FALSE,
"getSymbols.auto.assign" = FALSE)
SPY <- getSymbols(c("SPY"), from = "2016-09-01")
SPY <- as.numeric(SPY$SPY.Close)
set.seed(123)
#create a time index
t <- 1:(length(SPY)-1)
#tradable capital vector
Vt <- c(rep(10000, length(t)))
#Benchmark return series
Rb <- rep(NA, length(t))
for(i in 2:length(t)) {
Rb[i] <- (SPY[i] / SPY[i - 1]) - 1
}
#Benchmark equity curve
Eb <- rep(NA, length(t))
Eb[1] <- Vt[1]
for(i in 2:length(t)) {
Eb[i] <- Eb[i - 1] * (1 + Rb[i])
}
#Randomy simulated return series 1
Rt <- rep(NA, length(t))
for(i in 2:length(t)) {
Rt[i] <- Rb[i] + rnorm(n = 1,
mean = 0.24/length(t),
sd = 2.5 * sd(Rb, na.rm = TRUE))
}
#Randomly simulated return series 2
Rt2 <- rep(NA, length(t))
for(i in 2:length(t)) {
Rt2[i] <- Rb[i] + rnorm(n = 1,
mean = 0.02/length(t),
sd = 0.75 * sd(Rb, na.rm = TRUE))
}
# Randomly Simulated Equity Curve 1
Et <- rep(NA, length(t))
Et <- Vt[1]
for(i in 2:length(t)) {
Et[i] <- Et[i-1] * (1 + Rt[i])
}
# Randomly Simulated Equity Curve 2
Et2 <- rep(NA, length(t))
Et2 <- Vt[1]
for(i in 2:length(t)) {
Et2[i] <- Et2[i-1] * (1 + Rt2[i])
}
#Plot of Et1 against the SPY Portfolio
plot(y = Et, x = t, type = "l", col = 1,
xlab = "Time", ylab= "Equity ($)",
main = "Figure 1-3: Randomly Generated Equity Curves")
grid()
abline(h = 10000)
lines(y = Et2, x = t, col = 2)
lines(y = Eb, x = t, col = 8)
legend(x = "topleft", col = c(1,2,8), lwd = 2, legend = c("Curve 1",
"Curve 2",
"SPY"))
The above code is what I have ran in order to produce the graphs above. If you are able to run it and get the same error let me know.
is there a simple way to define breaks instead of nbins for a 2d histogram (hist2d) in R?
I want to define the range for the x- and yaxis for a 2D histogram and the number of bins for each dimension.
My example:
# example data
x <- sample(-1:100, 2000, replace=T)
y <- sample(0:89, 2000, replace=T)
# create 2d histogram
h2 <- hist2d(x,y,nbins=c(23,19),xlim=c(-1,110), ylim=c(0,95),xlab='x',ylab='y',main='hist2d')
This results in this 2D histogram output 1
----------------------------
2-D Histogram Object
----------------------------
Call: hist2d(x = x, y = y, nbins = c(23, 19), xlab = "x", ylab = "y",
xlim = c(-1, 110), ylim = c(0, 95), main = "hist2d")
Number of data points: 2000
Number of grid bins: 23 x 19
X range: ( -1 , 100 )
Y range: ( 0 , 89 )
I need
X range: ( -1 , 110 )
Y range: ( 0 , 95 )
instead.
My attempt to define the xlim and ylim only extends the plot but does not define the axis range for the histogram. I know that there would be no data in the additional bins.
Is there a way to define
xbreaks = seq(-1,110,5)
ybreaks = seq(0,95,5)
instead of using nbins which divides the range from minimum to maximum into the given number of bins?
Thank you for your help
I changed the code a little bit and this version should work the with explicitly defining the breaks for both axes. First you have to load the function. Then you can give the x.breaks and y.breaks options with x.breaks=seq(0,10,0.1).
If same.scale is true, you only need x.breaks
The return value addionaly contains the number of bins and the relative counts.
Also, you can include a legend if wanted, by setting legend=TRUE. For that you need to have the package Fields
hist2d_breaks = function (x, y = NULL, nbins = 200,same.scale = FALSE, na.rm = TRUE,
show = TRUE, col = c("black", heat.colors(12)), FUN = base::length,
xlab, ylab,x.breaks,y.breaks, ...)
{
if (is.null(y)) {
if (ncol(x) != 2)
stop("If y is ommitted, x must be a 2 column matirx")
y <- x[, 2]
x <- x[, 1]
}
if (length(nbins) == 1)
nbins <- rep(nbins, 2)
nas <- is.na(x) | is.na(y)
if (na.rm) {
x <- x[!nas]
y <- y[!nas]
}
else stop("missinig values not permitted if na.rm=FALSE")
if(same.scale){
x.cuts = x.breaks;
y.cuts = x.breaks;
}else{
x.cuts <- x.breaks
y.cuts <- y.breaks
}
index.x <- cut(x, x.cuts, include.lowest = TRUE)
index.y <- cut(y, y.cuts, include.lowest = TRUE)
m <- tapply(x, list(index.x, index.y), FUN)
if (identical(FUN, base::length))
m[is.na(m)] <- 0
if (missing(xlab))
xlab <- deparse(substitute(xlab))
if (missing(ylab))
ylab <- deparse(substitute(ylab))
if (show){
if(legend){
image.plot(x.cuts, y.cuts, m, col = col, xlab = xlab, ylab = ylab,
...)
}else{
image(x.cuts, y.cuts, m, col = col, xlab = xlab, ylab = ylab,
...)
}
}
midpoints <- function(x) (x[-1] + x[-length(x)])/2
retval <- list()
retval$counts <- m
retval$counts_rel <- m/max(m)
retval$x.breaks = x.cuts
retval$y.breaks = y.cuts
retval$x = midpoints(x.cuts)
retval$y = midpoints(y.cuts)
retval$nobs = length(x)
retval$bins = c(length(x.cuts),length(y.cuts))
retval$call <- match.call()
class(retval) <- "hist2d"
retval
}
The call of (my data) then brings the following:
hist2d_breaks(df,x.breaks=seq(0,10,1),y.breaks=seq(-10,10,1),legend=TRUE)
brings up the following plot
2D Histogram with breaks
Revise the "hist2d" as follows
hist2d_range<-function (x, y = NULL, nbins = 200, same.scale = TRUE, na.rm = TRUE,
show = TRUE, col = c("black", heat.colors(12)), FUN = base::length,
xlab, ylab,range=NULL, ...)
{
if (is.null(y)) {
if (ncol(x) != 2)
stop("If y is ommitted, x must be a 2 column matirx")
y <- x[, 2]
x <- x[, 1]
}
if (length(nbins) == 1)
nbins <- rep(nbins, 2)
nas <- is.na(x) | is.na(y)
if (na.rm) {
x <- x[!nas]
y <- y[!nas]
}
else stop("missinig values not permitted if na.rm=FALSE")
if (same.scale) {
if(is.null(range))
{
x.cuts <- seq(from = min(x, y), to = max(x, y), length = nbins[1] +
1)
y.cuts <- seq(from = min(x, y), to = max(x, y), length = nbins[2] +
1)
}else{
x.cuts <- seq(from = range[1], to = range[2], length = nbins[1] + 1)
y.cuts <- seq(from = range[1], to = range[2], length = nbins[1] + 1)
}
}
else {
x.cuts <- seq(from = min(x), to = max(x), length = nbins[1] +
1)
y.cuts <- seq(from = min(y), to = max(y), length = nbins[2] +
1)
}
index.x <- cut(x, x.cuts, include.lowest = TRUE)
index.y <- cut(y, y.cuts, include.lowest = TRUE)
m <- tapply(x, list(index.x, index.y), FUN)
if (identical(FUN, base::length))
m[is.na(m)] <- 0
if (missing(xlab))
xlab <- deparse(substitute(xlab))
if (missing(ylab))
ylab <- deparse(substitute(ylab))
if (show)
image(x.cuts, y.cuts, m, col = col, xlab = xlab, ylab = ylab,
...)
midpoints <- function(x) (x[-1] + x[-length(x)])/2
retval <- list()
retval$counts <- m
retval$x.breaks = x.cuts
retval$y.breaks = y.cuts
retval$x = midpoints(x.cuts)
retval$y = midpoints(y.cuts)
retval$nobs = length(x)
retval$call <- match.call()
class(retval) <- "hist2d"
retval
}
This function has an additional argument "range".
The revised point is as follows.
if(is.null(range))
{
x.cuts <- seq(from = min(x, y), to = max(x, y), length = nbins[1] +
1)
y.cuts <- seq(from = min(x, y), to = max(x, y), length = nbins[2] +
1)
}else{
x.cuts <- seq(from = range[1], to = range[2], length = nbins[1] + 1)
y.cuts <- seq(from = range[1], to = range[2], length = nbins[1] + 1)
}
I need to compute the efficient frontier with different risk measure and to use a bootstrapping technique to simulate possible outcome. However, now I'm stuck: what I want to do is to generate via a loop (which will be integrated later into a function) multiple efficient frontier, each one associated to a possible future outcome, and to plot them on the same figure in such a way to see how they may change as the simulation goes on. Here is the loop that I wrote so far:
for (i in 1:B) {
idx <- sample(1:N, N, replace = TRUE)
new.x <- x[idx, ]
µ.b <- apply(X = new.x, 2, FUN = mean)
range.b[, i] <- seq(from = min(µ.b), to = max(µ.b), length.out = steps)
sigma.b <- apply(X = new.x, 2, FUN = sd)
riskCov.b[, i] <- sapply(range.b[, i], function(targetReturn) {
w <- MV_QP(new.x, targetReturn, Sigma)
sd(c(new.x %*% w))
})
xlim.b <- range(c(sigma.b, riskCov.b[, 1]), na.rm = TRUE)
ylim.b <- range(µ.b)
par(new = TRUE)
plot(x = riskCov.b[, i], y = range.b[, i], type = "l", xlim = xlim.b, ylim = ylim.b, xlab = "Risk", ylab = "Return", main = "Resampling EFs")
}
but the problem is that the elements on the x and y axis are rewriting each time the loop runs. How can this problem be solved?
I don't nknow if the optimization is correct. For ploting you can try the following:
for (i in 1:B) {
idx <- sample(1:N, N, replace = TRUE)
new.x <- x[idx, ]
µ.b <- apply(X = new.x, 2, FUN = mean)
range.b[, i] <- seq(from = min(µ.b), to = max(µ.b), length.out = steps)
#sigma.b <- apply(X = new.x, 2, FUN = sd)
riskCov.b[, i] <- sapply(range.b[, i], function(targetReturn) {
w <- MV_QP(new.x, targetReturn,Sigma=cov(new.x))
sd(c(new.x %*% w))
})
}
xlim.b <- range(c(apply(X = x, 2, FUN= sd), riskCov.b), na.rm = TRUE) *c(0.98,1.02)
ylim.b <- range(µ.b) *c(0.98,1.02)
#par(new = TRUE)
for (i in 1:B){
if (i==1) plot(x = riskCov.b[, i], y = range.b[, i], type = "l", xlim = xlim.b, ylim = ylim.b, xlab = "Risk", ylab = "Return", main = "Resampling EFs") else
lines(x = riskCov.b[, i], y = range.b[, i],col=rainbow(B)[i])
}
Depending on your data, you should end up with a similar plot: