Plotting the CDF and Quantile Functions Given the PDF - r

How would I plot the CDF and Quantile functions, in R, if I have the PDF. Currently, I have the following (but I think there must be a better way to do it):
## Probability Density Function
p <- function(x) {
result <- (x^2)/9
result[x < 0 | x > 3] <- 0
result
}
plot(p, xlim = c(0,3), main="Probability Density Function")
## Cumulative Distribution Function
F <- function(a = 0,b){
result <- ((b^3)/27) - ((a^3)/27)
result[a < 0 ] <- 0
result[b > 3] <- 1
result
}
plot(F(,x), xlim=c(0,3), main="Cumulative Distribution Function")
## Quantile Function
Finv <- function(p) {
3*x^(1/3)
}

As #dash2 suggested, the CDF would need you to integrate the PDF, in essence needing you to find the area under the curve.
Here's a generic solution which should help. I am using a gaussian distribution as an example - you should be able to feed to it any generic function.
Note that quantiles reported are approximations only. Also, dont forget to look into the documentation for integrate().
# CDF Function
CDF <- function(FUNC = p, plot = T, area = 0.5, LOWER = -10, UPPER = 10, SIZE = 1000){
# Create data
x <- seq(LOWER, UPPER, length.out = SIZE)
y <- p(x)
area.vec <- c()
area.vec[1] <- 0
for(i in 2:length(x)){
x.vec <- x[1:i]
y.vec <- y[1:i]
area.vec[i] = integrate(p, lower = x[1], upper = x[i])$value
}
# Quantile
quantile = x[which.min(abs(area.vec - area))]
# Plot if requested
if(plot == TRUE){
# PDF
par(mfrow = c(1, 2))
plot(x, y, type = "l", main = "PDF", col = "indianred", lwd = 2)
grid()
# CDF
plot(x, area.vec, type = "l", main = "CDF", col = "slateblue",
xlab = "X", ylab = "CDF", lwd = 2)
# Quantile
mtext(text = paste("Quantile at ", area, "=",
round(quantile, 3)), side = 3)
grid()
par(mfrow = c(1, 1))
}
}
# Sample data
# PDF Function - Gaussian distribution
p <- function(x, SD = 1, MU = 0){
y <- (1/(SD * sqrt(2*pi)) * exp(-0.5 * ((x - MU)/SD) ^ 2))
return(y)
}
# Call to function
CDF(p, area = 0.5, LOWER = -5, UPPER = 5)

Related

How to create a kernel density estimation with R?

I would like to program a kernel estimate (with Epanechnikov kernel^1 for example). I tried the following code^2 by putting the manual code (blue) and the default code (red) on the same figure (see attached) but it always gives a difference between the two density curves!
1: The analytic form of the Epanechnikov kernel is:
kappa(u) = (1-u^2), support |u| <=1, with u = (x-x_{i})/h.
2: My trial code:
x= faithful$eruptions
fit2 <- density(x, bw = 0.6, kernel = "epanechnikov")
xgrid = seq(-1, 8, 0.1)
kernelEpan <- function(x, obs, h) sum((1-((x-obs)/h)^2)*(abs(x-obs)<=h))/h
plot(xgrid, sapply(xgrid, FUN = kernelEpan, obs = faithful$eruptions, h = 0.6)/length(faithful$eruptions), type = "l", col = "blue")
lines(fit2, col = "red")
If you read the docs for bw in the density function, you will see:
bw : the smoothing bandwidth to be used. The kernels are scaled such that this is the standard deviation of the smoothing kernel.
Which means that in order for your function's h parameter to match the behaviour of the bw parameter, you will need to rescale the h parameter by multiplying it by sqrt(5).
I would be tempted to vectorize your function, which allows you to normalize it accurately too:
kernelEpan <- function(xvals, obs, h) {
h <- h * sqrt(5)
dens <- sapply(xvals, function(x) {
u <- abs(x - obs) / h
u <- ifelse(u > 1, 1, u)
sum(1 - u^2)
})
dens / sum(dens * mean(diff(xvals)))
}
This allows:
fit1 <- kernelEpan(xgrid, obs = faithful$eruptions, h = 0.6)
fit2 <- density(x, bw = 0.6, kernel = "epanechnikov")
plot(xgrid, fit1, type = "l", col = "blue")
lines(fit2, col = "red")

Understanding "levels" in r contour function of bivariate distribution

I have trouble understanding how to set the levels in the plot of a bivariate distribution in r. The documentation states that I can choose the levels by setting a
numeric vector of levels at which to draw contour lines
Now I would like the contour to show the limit containing 95% of the density or mass. But if, in the example below (adapted from here) I set the vector as a <- c(.95,.90) the code runs without error but the plot is not displayed. If instead, I set the vector as a <- c(.01,.05) the plot is displayed. But I am not sure I understand what the labels "0.01" and "0.05" mean with respect to the density.
library(mnormt)
x <- seq(-5, 5, 0.25)
y <- seq(-5, 5, 0.25)
mu1 <- c(0, 0)
sigma1 <- matrix(c(2, -1, -1, 2), nrow = 2)
f <- function(x, y) dmnorm(cbind(x, y), mu1, sigma1)
z <- outer(x, y, f)
a <- c(.01,.05)
contour(x, y, z, levels = a)
But I am not sure I understand what the labels "0.01" and "0.05" mean with respect to the density.
It means the points where the density is equal 0.01 and 0.05. From help("contour"):
numeric vector of levels at which to draw contour lines.
So it is the function values at which to draw the lines (contours) where the function is equal to those levels (in this case the density). Take a simple example which may help is x + y:
y <- x <- seq(0, 1, length.out = 50)
z <- outer(x, y, `+`)
par(mar = c(5, 5, 1, 1))
contour(x, y, z, levels = c(0.5, 1, 1.5))
Now I would like the contour to show the limit containing 95% of the density or mass.
In your example, you can follow my answer here and draw the exact points:
# input
mu1 <- c(0, 0)
sigma1 <- matrix(c(2, -1, -1, 2), nrow = 2)
# we start from points on the unit circle
n_points <- 100
xy <- cbind(sin(seq(0, 2 * pi, length.out = n_points)),
cos(seq(0, 2 * pi, length.out = n_points)))
# then we scale the dimensions
ev <- eigen(sigma1)
xy[, 1] <- xy[, 1] * 1
xy[, 2] <- xy[, 2] * sqrt(min(ev$values) / max(ev$values))
# then rotate
phi <- atan(ev$vectors[2, 1] / ev$vectors[1, 1])
R <- matrix(c(cos(phi), sin(phi), -sin(phi), cos(phi)), 2)
xy <- tcrossprod(R, xy)
# find the right length. You can change .95 to which ever
# quantile you want
chi_vals <- qchisq(.95, df = 2) * max(ev$values)
s <- sqrt(chi_vals)
par(mar = c(5, 5, 1, 1))
plot(s * xy[1, ] + mu1[1], s * xy[2, ] + mu1[2], lty = 1,
type = "l", xlab = "x", ylab = "y")
The levels indicates where the lines are drawn, with respect to the specific 'z' value of the bivariate normal density. Since max(z) is
0.09188815, levels of a <- c(.95,.90) can't be drawn.
To draw the line delimiting 95% of the mass I used the ellipse() function as suggested in this post (second answer from the top).
library(mixtools)
library(mnormt)
x <- seq(-5, 5, 0.25)
y <- seq(-5, 5, 0.25)
mu1 <- c(0, 0)
sigma1 <- matrix(c(2, -1, -1, 2), nrow = 2)
f <- function(x, y) dmnorm(cbind(x, y), mu1, sigma1)
z <- outer(x, y, f)
a <- c(.01,.05)
contour(x, y, z, levels = a)
ellipse(mu=mu1, sigma=sigma1, alpha = .05, npoints = 250, col="red")
I also found another solution in the book "Applied Multivariate Statistics with R" by Daniel Zelterman.
# Figure 6.5: Bivariate confidence ellipse
library(datasets)
library(MASS)
library(MVA)
#> Loading required package: HSAUR2
#> Loading required package: tools
biv <- swiss[, 2 : 3] # Extract bivariate data
bivCI <- function(s, xbar, n, alpha, m)
# returns m (x,y) coordinates of 1-alpha joint confidence ellipse of mean
{
x <- sin( 2* pi * (0 : (m - 1) )/ (m - 1)) # m points on a unit circle
y <- cos( 2* pi * (0 : (m - 1)) / (m - 1))
cv <- qchisq(1 - alpha, 2) # chisquared critical value
cv <- cv / n # value of quadratic form
for (i in 1 : m)
{
pair <- c(x[i], y[i]) # ith (x,y) pair
q <- pair %*% solve(s, pair) # quadratic form
x[i] <- x[i] * sqrt(cv / q) + xbar[1]
y[i] <- y[i] * sqrt(cv / q) + xbar[2]
}
return(cbind(x, y))
}
### pdf(file = "bivSwiss.pdf")
plot(biv, col = "red", pch = 16, cex.lab = 1.5)
lines(bivCI(var(biv), colMeans(biv), dim(biv)[1], .01, 1000), type = "l",
col = "blue")
lines(bivCI(var(biv), colMeans(biv), dim(biv)[1], .05, 1000),
type = "l", col = "green", lwd = 1)
lines(colMeans(biv)[1], colMeans(biv)[2], pch = 3, cex = .8, type = "p",
lwd = 1)
Created on 2021-03-15 by the reprex package (v0.3.0)

Performing residual bootstrap using kernel regression in R

Kernel regression is a non-parametric technique that wants to estimate the conditional expectation of a random variable. It uses local averaging of the response value, Y, in order to find some non-linear relationship between X and Y.
I am have used bootstrap for kernel density estimation and now want to use it for kernel regression as well. I have been told to use residual bootstrapping for kernel regression and have read a couple of papers on this. I am however unsure how to perform this. Programming has been done in R using the FKSUM package. I have made an attempt to use standard resampling on kernel regression:
library(FKSUM)
set.seed(1)
n <- 5000
sample.size <- 500
B.replications <- 200
x <- rbeta(n, 2, 2) * 10
y <- 3 * sin(2 * x) + 10 * (x > 5) * (x - 5)
y <- y + rnorm(n) + (rgamma(n, 2, 2) - 1) * (abs(x - 5) + 3)
#taking x.y to be the population
x.y <- data.frame(x, y)
xs <- seq(min(x), max(x), length = 1000)
ftrue <- 3 * sin(2 * xs) + 10 * (xs > 5) * (xs - 5)
#Sample from the population
seqx<-seq(1,5000,by=1)
sample.ind <- sample(seqx, size = sample.size, replace = FALSE)
sample.reg<-x.y[sample.ind,]
x_s <- sample.reg$x
y_s <- sample.reg$y
fhat_loc_lin.pop <- fk_regression(x, y)
fhat_loc_lin.sample <- fk_regression(x = x_s, y = y_s)
plot(x, y, col = rgb(.7, .7, .7, .3), pch = 16, xlab = 'x',
ylab = 'x', main = 'Local linear estimator with amise bandwidth')
lines(xs, ftrue, col = 2, lwd = 3)
lines(fhat_loc_lin, lty = 2, lwd = 2)
#Bootstrap
n.B.sample = sample.size # sample bootstrap size
boot.reg.mat.X <- matrix(0,ncol=B.replications, nrow=n.B.sample)
boot.reg.mat.Y <- matrix(0,ncol=B.replications, nrow=n.B.sample)
fhat_loc_lin.boot <- matrix(0,ncol = B.replications, nrow=100)
Temp.reg.y <- matrix(0,ncol = B.replications,nrow = 1000)
for(i in 1:B.replications){
sequence.x.boot <- seq(from=1,to=n.B.sample,by=1)
sample.ind.boot <- sample(sequence.x.boot, size = sample.size, replace = TRUE)
boot.reg.mat <- sample.reg[sample.ind.boot,]
boot.reg.mat.X <- boot.reg.mat$x
boot.reg.mat.Y <- boot.reg.mat$y
fhat_loc_lin.boot <- fk_regression(x = boot.reg.mat.X ,
y = boot.reg.mat.Y,
h = fhat_loc_lin.sample$h)
lines(y=fhat_loc_lin.boot$y,x= fhat_loc_lin.sample$x, col =c(i) )
Temp.reg.y[,i] <- fhat_loc_lin.boot$y
}
quan.reg.l <- vector()
quan.reg.u <- vector()
for(i in 1:length(xs)){
quan.reg.l[i] <- quantile(x = Temp.reg.y[i,],probs = 0.025)
quan.reg.u[i] <- quantile(x = Temp.reg.y[i,],probs = 0.975)
}
# Lower Bound
Temp.reg.2 <- quan.reg.l
lines(y=Temp.reg.2,x=fhat_loc_lin.boot$x ,col="red",lwd=4,lty=1)
# Upper Bound
Temp.reg.3 <- quan.reg.u
lines(y=Temp.reg.3,x=fhat_loc_lin.boot$x ,col="navy",lwd=4,lty=1)
Asking the question on here now since I haven't received any response on CV. Any help would be greatly appreciated!

Calculate 5th quantile of curve generated from vectors of X, Y points

I have these curves below:
These curves were generated using a library called discreteRV.
library(discreteRV)
placebo.rate <- 0.5
mmm.rate <- 0.3
mmm.power <- power.prop.test(p1 = placebo.rate, p2 = mmm.rate, power = 0.8, alternative = "one.sided")
n <- as.integer(ceiling(mmm.power$n))
patients <- seq(from = 0, to = n, by = 1)
placebo_distribution <- dbinom(patients, size = n, prob = placebo.rate)
mmm_distribution <- dbinom(patients, size = n, prob = mmm.rate)
get_pmf <- function(p1, p2) {
X1 <- RV(patients,p1, fractions = F)
X2 <- RV(patients,p2, fractions = F)
pmf <- joint(X1, X2, fractions = F)
return(pmf)
}
extract <- function(string) {
ints <- unlist(strsplit(string,","))
x1 <- as.integer(ints[1])
x2 <- as.integer(ints[2])
return(x1-x2)
}
diff_prob <- function(pmf) {
diff <- unname(sapply(outcomes(pmf),FUN = extract)/n)
probabilities <- unname(probs(pmf))
df <- data.frame(diff,probabilities)
df <- aggregate(. ~ diff, data = df, FUN = sum)
return(df)
}
most_likely_rate <- function(x) {
x[which(x$probabilities == max(x$probabilities)),]$diff
}
mmm_rate_diffs <- diff_prob(get_pmf(mmm_distribution,placebo_distribution))
placebo_rate_diffs <- diff_prob(get_pmf(placebo_distribution,placebo_distribution))
plot(mmm_rate_diffs$diff,mmm_rate_diffs$probabilities * 100, type = "l", lty = 2, xlab = "Rate difference", ylab = "# of trials per 100", main = paste("Trials with",n,"patients per treatment arm",sep = " "))
lines(placebo_rate_diffs$diff, placebo_rate_diffs$probabilities * 100, lty = 1, xaxs = "i")
abline(v = c(most_likely_rate(placebo_rate_diffs), most_likely_rate(mmm_rate_diffs)), lty = c(1,2))
legend("topleft", legend = c("Alternative hypothesis", "Null hypothesis"), lty = c(2,1))
Basically, I took two binomial discrete random variables, created a joint probability mass function, determined the probability of any given rate difference then plotted them to demonstrate a distribution of those rate differences if the null hypothesis was true or if the alternative hypothesis was true over 100 identical trials.
Now I want to illustrate the 5% percentile on the null hypothesis curve. Unfortunately, I don't know how to do this. If I simply use quantile(x = placebo_rate_diffs$diff, probs = 0.05, I get -0.377027. This can't be correct looking at the graph. I want to calculate the 5th percentile like I would using pbinom() but I don't know how to do that with a graph created from essentially what are just x and y vectors.
Maybe I can approximate these two curves as binomial since they appear to be, but I am still not sure how to do this.
Any help would be appreciated.

Add the new regression line but keep the regression lines from previous runs in R

Background
I have a function called TPN (R code is below the picture). When you run this function, it produces two plots (see picture below). The bottom-row plot samples from the top-row plot and then adds a red regression line. Each time you run the TPN function, the bottom-row plot produces a new red-colored regression line.
Question
In the bottom-row plot, I was wondering if there is a way I could KEEP the regression lines from previous runs each time I run the TPN function (see picture below)?
That is, each time that I run a new TPN function the regression line from a previous run is kept in its place (probably in a color other than "red" for distinction purposes), and the new regression line is just added to he bottom-row plot?
############## Input Values #################
TPN = function( each.sub.pop.n = 150,
sub.pop.means = 20:10,
predict.range = 10:0,
sub.pop.sd = .75,
n.sample = 2 ) {
#############################################
par( mar = c(2, 4.1, 2.1, 2.1) )
m = matrix( c(1, 2), nrow = 2, ncol = 1 ); layout(m)
set.seed(2460986)
Vec.rnorm <- Vectorize(function(n, mean, sd) rnorm(n, mean, sd), 'mean')
y <- c( Vec.rnorm(each.sub.pop.n, sub.pop.means, sub.pop.sd) )
set.seed(NULL)
x <- rep(predict.range, each = each.sub.pop.n)
plot(x, y, ylim = range(y)) ## Top-Row Plot
sample <- lapply(split(y, x), function(z) sample(z, n.sample, replace = TRUE))
sample <- data.frame(y = unlist(sample),
x = as.numeric(rep(names(sample), each = n.sample)))
x = sample$x ; y = sample$y
plot(x, y, ylim = range(y)) #### BOTTOM-ROW PLOT
abline(lm(y ~ x), col = 'red') # Regression Line
}
## TEST HERE:
TPN()
It ain't that easy. I made another function and edit the first one as well.
To summarize what I have done:
I made the first function to set par(new = TRUE) at the end of it. Also, set the color for points in the bottom row plot to be white only for formatting. You can get rid of col = 'white', bg = 'white' if you wish.
Then, in the second function top row plot does not get plotted and yaxis won't be added to the bottom row plot from each "test".
Look below:
############## Input Values #################
TPN = function( each.sub.pop.n = 150,
sub.pop.means = 20:10,
predict.range = 10:0,
sub.pop.sd = .75,
n.sample = 2 ) {
#############################################
par( mar = c(2, 4.1, 2.1, 2.1) )
m = matrix( c(1, 2), nrow = 2, ncol = 1 ); layout(m)
set.seed(2460986)
Vec.rnorm <- Vectorize(function(n, mean, sd) rnorm(n, mean, sd), 'mean')
y <- c( Vec.rnorm(each.sub.pop.n, sub.pop.means, sub.pop.sd) )
set.seed(NULL)
x <- rep(predict.range, each = each.sub.pop.n)
par(new = FALSE)
plot(x, y, ylim = range(y)) ## Top-Row Plot
sample <- lapply(split(y, x), function(z) sample(z, n.sample, replace = TRUE))
sample <- data.frame(y = unlist(sample),
x = as.numeric(rep(names(sample), each = n.sample)))
x = sample$x ; y = sample$y
plot(x, y, ylim = range(y), col = 'white', bg = 'white') #### BOTTOM-ROW PLOT
abline(lm(y ~ x), col = 'red') # Regression Line
par(new = TRUE)
}
The second one does not plot the top row one:
############## Input Values #################
TPN2 = function( each.sub.pop.n = 150,
sub.pop.means = 20:10,
predict.range = 10:0,
sub.pop.sd = .75,
n.sample = 2 ) {
#############################################
par( mar = c(2, 4.1, 2.1, 2.1) )
m = matrix( c(1, 2), nrow = 2, ncol = 1 ); layout(m)
set.seed(2460986)
Vec.rnorm <- Vectorize(function(n, mean, sd) rnorm(n, mean, sd), 'mean')
y <- c( Vec.rnorm(each.sub.pop.n, sub.pop.means, sub.pop.sd) )
set.seed(NULL)
x <- rep(predict.range, each = each.sub.pop.n)
#par(new = FALSE) #comment-out
#plot(x, y, ylim = range(y)) ##Top-Row Plot #comment-out
sample <- lapply(split(y, x), function(z) sample(z, n.sample, replace = TRUE))
sample <- data.frame(y = unlist(sample),
x = as.numeric(rep(names(sample), each = n.sample)))
x = sample$x ; y = sample$y
plot(x, y, ylim = range(y), axes = FALSE, col = 'white', bg = 'white') ##BOTTOM-ROW PLOT
abline(lm(y ~ x), col = 'blue') # Regression Line
par(new = TRUE)
}
Then your test would be like this:
## TEST HERE:
TPN()
TPN2()
TPN2()
TPN2()
This is the output:
A simple way to do what you want is to change your main effect (currently none) to return an accumulation of previous regressions and your side effect (plotting) to loop through these previous regressions (in blue) in addition to the current one (in red).
Another tip: you can use the abline(reg=lm(y~x)) argument and just accumulate the lm objects in a list. It's not necessary to store coefficients and intercepts separately as suggested in the other answer. Keeping the lm objects is also a good idea in case you want to go back and look at average R-squared, etc. -- you couldn't do that using only the coefficients.
Your new function could look like:
TPN.accum <- function( each.sub.pop.n = 150,
sub.pop.means = 20:10,
predict.range = 10:0,
sub.pop.sd = .75,
n.sample = 2,
lm.history = list() # the accumulator
){
par( mar = c(2, 4.1, 2.1, 2.1) )
m <- matrix( c(1, 2), nrow = 2, ncol = 1 ); layout(m)
set.seed(2460986)
Vec.rnorm <- Vectorize(function(n, mean, sd) rnorm(n, mean, sd), 'mean')
y <- c( Vec.rnorm(each.sub.pop.n, sub.pop.means, sub.pop.sd) )
set.seed(NULL)
x <- rep(predict.range, each = each.sub.pop.n)
plot(x, y, ylim = range(y)) ### Top-Row Plot
sample <- lapply(split(y, x), function(z) sample(z, n.sample, replace = TRUE))
sample <- data.frame(y = unlist(sample),
x = as.numeric(rep(names(sample), each = n.sample)))
x <- sample$x ; y <- sample$y
lm.current <- lm(y~x) # the current regression
plot(x, y, ylim = range(y)) ### Bottom-Row Plot
abline(reg = lm.current, col = 'red') # plot current regression (red)
for( i in seq_along(lm.history) ){
abline(reg=lm.history[[i]], col='blue') # plot any previous regressions (blue)
}
return(c(lm.history, list(lm.current))) # append current regression to accumulator
}
To initialize it and then run it repeatedly, just do something like:
tpn.history <- TPN.accum()
for (i in 1:5) tpn.history <- TPN.accum(lm.history=tpn.history)
And your output will look like:
I propose two possibilities:
Use par(mfg) to define on which panel to draw, so that you can add new points or lines on any of the two. For the color, I propose to add options saying if this is the first plot or the last plot of the series.
Store the coefficients of the abline to be used on other plots.
Use par(mfg)
I used some transparent color so that we do not see all superimposition of each iteration. Depending on what you want to achieve, you can modify this.
############## Input Values #################
TPN <- function(each.sub.pop.n = 150,
sub.pop.means = 20:10,
predict.range = 10:0,
sub.pop.sd = .75,
n.sample = 2,
plot = TRUE,
first = FALSE,
last = FALSE) {
#############################################
if (plot & first) {
plot.new()
m <- matrix( c(1, 2), nrow = 2, ncol = 1 ); layout(m)
par( mar = c(2, 4.1, 2.1, 2.1) )
}
set.seed(2460986)
Vec.rnorm <- Vectorize(function(n, mean, sd) rnorm(n, mean, sd), 'mean')
y <- c( Vec.rnorm(each.sub.pop.n, sub.pop.means, sub.pop.sd) )
set.seed(NULL)
x <- rep(predict.range, each = each.sub.pop.n)
if (plot) {
par(mfg = c(1,1)) ## Top-Row Plot
if (first) {
plot(x, y, ylim = range(y), col = "transparent")
} else if (last) {
plot(x, y, ylim = range(y))
}
}
sample <- lapply(split(y, x), function(z) sample(z, n.sample, replace = TRUE))
sample <- data.frame(y = unlist(sample),
x = as.numeric(rep(names(sample), each = n.sample)))
x = sample$x ; y = sample$y
if (plot) {
par(mfg = c(2,1)) #### BOTTOM-ROW PLOT
if (first) {
plot(x, y, ylim = range(y), col = "transparent")
}
if (last) {
points(x, y)
}
abline(lm(y ~ x), col = c('blue', 'red')[(last) + 1]) # Regression Line
}
}
## TEST HERE:
n <- 10
for (i in 1:n) {
TPN(first = ifelse(i == 1, TRUE, FALSE), last = ifelse(i == n, TRUE, FALSE))
}
Store the abline coefficients
There is no need of transparent color here because, a new plot is created for each iteration.
############## Input Values #################
TPN <- function(each.sub.pop.n = 150,
sub.pop.means = 20:10,
predict.range = 10:0,
sub.pop.sd = .75,
n.sample = 2,
plot = TRUE,
coefs = FALSE,
coefsup = NULL) {
#############################################
if (plot) {
m <- matrix( c(1, 2), nrow = 2, ncol = 1 )
layout(m)
par( mar = c(2, 4.1, 2.1, 2.1) )
}
set.seed(2460986)
Vec.rnorm <- Vectorize(function(n, mean, sd) rnorm(n, mean, sd), 'mean')
y <- c( Vec.rnorm(each.sub.pop.n, sub.pop.means, sub.pop.sd) )
set.seed(NULL)
x <- rep(predict.range, each = each.sub.pop.n)
if (plot) {
plot(x, y, ylim = range(y))
}
sample <- lapply(split(y, x), function(z) sample(z, n.sample, replace = TRUE))
sample <- data.frame(y = unlist(sample),
x = as.numeric(rep(names(sample), each = n.sample)))
x = sample$x ; y = sample$y
if (plot) {
plot(x, y, ylim = range(y))
# Add the previous lines if exists
if (!is.null(coefsup)) {
apply(coefsup, 1, function(x) abline(a = x[1], b = x[2], col = "blue"))
}
abline(lm(y ~ x), col = 'red') # Regression Line
}
if (coefs) {return(coef(lm(y ~ x)))}
}
# TEST with coefs
n <- 10
coefsup <- NULL
for (i in 1:n) {
coefsup <- rbind(coefsup, TPN(coefs = TRUE, coefsup = coefsup))
}
In both cases, the output is what you expect:

Resources