I'm trying to reproduce some research on the fitting LPPL on stock indices for bubble prediction and I'm having trouble with fitting the model to the data. I've been using the following papers for insight on this project: http://arxiv.org/pdf/1002.1010v2.pdf where they've already done some testing on the HSI, http://arxiv.org/pdf/0905.0220v1.pdf where I originally got my idea.
I've also tried to reproduce the findings from this stackoverflow post with no success (ran into multiple similar issues i.e. the max iterator has been reached, singular gradient matrix errors again): NLS And Log-Periodic Power Law (LPPL) in R
Not having much success with using daily prices to fit the model, I used weekly prices on the S&P following the advice in the conclusion of the HSI LPPL paper that the data should be "smoothed" in a way.
Here is the code I'm using. Advice on how to fix my issues would be much appreciated!
library(zoom)
library(minpack.lm)
library(tseries)
library(zoo)
#grab S&P500 historical
ts <- get.hist.quote(instrument="^GSPC",
start="2003-02-15", end="2007-10-31",
quote="Close", provider="yahoo", origin="1970-01-01",
compression="w", retclass="zoo")
df <- data.frame(ts)
df <- data.frame(Date=as.Date(rownames(df)),Y=df$Close)
df <- df[!is.na(df$Y),]
df$days <- as.numeric(df$Date - df[1,]$Date)
ts <- get.hist.quote(instrument="^GSPC",
start="1997-10-04", end="2011-10-12",
quote="Close", provider="yahoo", origin="1970-01-01",
compression="w", retclass="zoo")
df2 <- data.frame(ts)
df2 <- data.frame(Date=as.Date(rownames(df2)),Y=df2$Close)
df2 <- df2[!is.na(df2$Y),]
df2$days <- as.numeric(df2$Date - df2[1,]$Date)
f <- function(pars, xx)
with(pars,(a + ((tc - xx)^m) *b + c *(tc - xx)^m* cos(omega*log(tc - xx))+d *(tc - xx)^m* cos(omega*log(tc - xx))))
# residual function
resids <- function(p, observed, xx) {df$Y - f(p,xx)}
plot(df2$Date,df2$Y,type="l")
lines(df$Date,df$Y,type="l")
points(df$Date,df$Y,type="p")
pp = list(a=1662.239,b=-0.483332,tc=2050, m=0.97, omega=5, c=566, d=-566)
lines(df$Date,f(pars=pp,df$days),type="l")
nls.out <- nls.lm(par=pp, fn = resids, observed = df$Y, xx = df$days, control=nls.lm.control(maxiter=1000),lower = c(a = -Inf, b = -Inf, tc = 2008, m = 0.1, omega = 0.1, c = -Inf, d = -Inf), upper = c(a = Inf, b = -0.01, tc = 2050, m = 0.97, omega = 15, c = 3000, d = 3000))
par <- nls.out$par
par
lines(df$Date,f(par,df$days), col ="blue")
nls.out <- nls.lm(par=nls.out$par, fn = resids, observed = df$Y, xx = df$days, control=nls.lm.control(maxiter=1000),lower = c(a = -Inf, b = -Inf, tc = 2008, m = 0.1, omega = 3, c = -Inf, d = -Inf), upper = c(a = Inf, b = -0.01, tc = 2025, m = 0.999, omega = 10, c = Inf, d = Inf))
lines(df$Date,f(nls.out$par,df$days), col ="purple")
ppp = nls.out$par
lines(df$Date,f(ppp,df$days), col ="purple")
nls.final <- nls(Y~(a + ((tc - df$days)^m) * (b + c * cos(omega*log(tc - df$days))+d * cos(omega*log(tc - df$days)))), data=df, start=ppp, algorithm="port", control=nls.control(maxiter=1000, minFactor=1e-8), lower = c(a = -Inf, b = -Inf, tc = 2007, m = 0.01, omega = 6, c = -Inf, d = -Inf), upper = c(a = Inf, b = 0, tc = 2010, m = 0.999, omega = 10, c = Inf, d = Inf))
summary(nls.final) # display statistics of the fit
lines(df$Date,fitted(nls.final), col = "red")
# append fitted values to df
df$pred <- predict(nls.final, interval = "confidence")
summ = coef(summary(nls.final))
Related
I want to get a 95% confidence interval for the following question.
I have written function f_n in my R code. I first randomly sample 100 with Normal and then I define function h for lambda. Then I can get f_n. My question is that how to define a function of f_n-chi-square and use uniroot` to find Confidence interval.
# I first get 100 samples
set.seed(201111)
x=rlnorm(100,0,2)
Based on the answer by #RuiBarradas, I try the following code.
set.seed(2011111)
# I define function h, and use uniroot function to find lambda
h <- function(lam, n)
{
sum((x - theta)/(1 + lam*(x - theta)))
}
# sample size
n <- 100
# the parameter of interest must be a value in [1, 12],
#true_theta<-1
#true_sd<- exp(2)
#x <- rnorm(n, mean = true_theta, sd = true_sd)
x=rlnorm(100,0,2)
xmax <- max(x)
xmin <- min(x)
theta_seq = seq(from = 1, to = 12, by = 0.01)
f_n <- rep(NA, length(theta_seq))
for (i in seq_along(theta_seq))
{
theta <- theta_seq[i]
lambdamin <- (1/n-1)/(xmax - theta)
lambdamax <- (1/n-1)/(xmin - theta)
lambda = uniroot(h, interval = c(lambdamin, lambdamax), n = n)$root
f_n[i] = -sum(log(1 + lambda*(x - theta)))
}
j <- which.max(f_n)
max_fn <- f_n[j]
mle_theta <- theta_seq[j]
plot(theta_seq, f_n, type = "l",
main = expression(Estimated ~ theta),
xlab = expression(Theta),
ylab = expression(f[n]))
points(mle_theta, f_n[j], pch = 19, col = "red")
segments(
x0 = c(mle_theta, xmin),
y0 = c(min(f_n)*2, max_fn),
x1 = c(mle_theta, mle_theta),
y1 = c(max_fn, max_fn),
col = "red",
lty = "dashed"
)
I got the following plot of f_n.
For 95% CI, I try
LR <- function(theta, lambda)
{
2*sum(log(1 + lambda*(x - theta))) - qchisq(0.95, df = 1)
}
lambdamin <- (1/n-1)/(xmax - mle_theta)
lambdamax <- (1/n-1)/(xmin - mle_theta)
lambda <- uniroot(h, interval = c(lambdamin, lambdamax), n = n)$root
uniroot(LR, c(xmin, mle_theta), lambda = lambda)$root
The result is 0.07198144. Then the logarithm is log(0.07198144)=-2.631347.
But there is NA in the following code.
uniroot(LR, c(mle_theta, xmax), lambda = lambda)$root
So the 95% CI is theta >= -2.631347.
But the question is that the 95% CI should be a closed interval...
Here is a solution.
First of all, the data generation code is wrong, the parameter theta is in the interval [1, 12], and the data is generated with rnorm(., mean = 0, .). I change this to a true_theta = 5.
set.seed(2011111)
# I define function h, and use uniroot function to find lambda
h <- function(lam, n)
{
sum((x - theta)/(1 + lam*(x - theta)))
}
# sample size
n <- 100
# the parameter of interest must be a value in [1, 12],
true_theta <- 5
true_sd <- 2
x <- rnorm(n, mean = true_theta, sd = true_sd)
xmax <- max(x)
xmin <- min(x)
theta_seq <- seq(from = xmin + .Machine$double.eps^0.5,
to = xmax - .Machine$double.eps^0.5, by = 0.01)
f_n <- rep(NA, length(theta_seq))
for (i in seq_along(theta_seq))
{
theta <- theta_seq[i]
lambdamin <- (1/n-1)/(xmax - theta)
lambdamax <- (1/n-1)/(xmin - theta)
lambda = uniroot(h, interval = c(lambdamin, lambdamax), n = n)$root
f_n[i] = -sum(log(1 + lambda*(x - theta)))
}
j <- which.max(f_n)
max_fn <- f_n[j]
mle_theta <- theta_seq[j]
plot(theta_seq, f_n, type = "l",
main = expression(Estimated ~ theta),
xlab = expression(Theta),
ylab = expression(f[n]))
points(mle_theta, f_n[j], pch = 19, col = "red")
segments(
x0 = c(mle_theta, xmin),
y0 = c(min(f_n)*2, max_fn),
x1 = c(mle_theta, mle_theta),
y1 = c(max_fn, max_fn),
col = "red",
lty = "dashed"
)
LR <- function(theta, lambda)
{
2*sum(log(1 + lambda*(x - theta))) - qchisq(0.95, df = 1)
}
lambdamin <- (1/n-1)/(xmax - mle_theta)
lambdamax <- (1/n-1)/(xmin - mle_theta)
lambda <- uniroot(h, interval = c(lambdamin, lambdamax), n = n)$root
uniroot(LR, c(xmin, mle_theta), lambda = lambda)$root
#> [1] 4.774609
Created on 2022-03-25 by the reprex package (v2.0.1)
The one-sided CI95 is theta >= 4.774609.
I am trying to model the behavior of a made-up networks of 5 genes, but I have the problem that I get negative values, which it has not sense biologically speaking.
Is there a way to limit the values to zero?
I managed to do it when I represent the graph, but I don't know how to use the ifelse in the main equation.
Thank you very much-1
###################################################
###preliminaries
###################################################
library(deSolve)
library(ggplot2)
library(reshape2)
###################################################
### Initial values
###################################################
values <- c(A = 1,
B = 1,
D = 1,
E = 20,
R = 1)
###################################################
### Set of constants
###################################################
constants <- c(a = 1.2,
b = 0.5,
c = 1.2,
d = 1.5,
e = 0.3,
f = 0.5,
g = 1.5,
h = 0.9,
i = 1.3,
j = 1.3,
m = 0.8,
n = 0.6,
q = 1,
t = 0.0075,
u = 0.0009,
Pa = 100,
Pb = 0.05,
Pd = 0.1,
Pe = 10)
###################################################
### differential equations
###################################################
Dynamic_Model<-function(t, values, constants) {
with(as.list(c(values, constants)),{
dA <- Pa + a*D - j*A - R
dB <- Pb + b*A + e*E - m*B
dD <- Pd + d*B + f*E - g*A - n*D
dE <- Pe - h*B + i*E - q*E
dR <- t*A*B - u*D*E
list(c(dA, dB, dD, dE, dR))
})
}
###################################################
### time
###################################################
times <- seq(0, 200, by = 0.01)
###################################################
### print ## Ploting
###################################################
out <- ode(y = values, times = times, func = Dynamic_Model, parms = constants)
out2 <- ifelse(out<0, 0, out)
out.df = as.data.frame(out2)
out.m = melt(out.df, id.vars='time')
p <- ggplot(out.m, aes(time, value, color = variable)) + geom_point(size=0.5) + ggtitle("Dynamic Model")
I agree completely with #Lutz Lehmann, that the negative values are a result of the structure of the model.
The system of equations allows that derivatives still become negative, even if the states are already below zero, i.e. the states can further decrease. We don't have information about what the states are, so the following is only a technical demonstration. Here a dimensionless Monod-type feedback function fb is implemented as a safeguard. It is normally close to one. The km value should be small enough to act only for state values close to zero, and it should not be too small to avoid numerical errors. It can be formulated individually for each state. Other function types are also possible.
library(deSolve)
library(ggplot2)
library(reshape2)
values <- c(A = 1,
B = 1,
D = 1,
E = 20,
R = 1)
constants <- c(a = 1.2,
b = 0.5,
c = 1.2,
d = 1.5,
e = 0.3,
f = 0.5,
g = 1.5,
h = 0.9,
i = 1.3,
j = 1.3,
m = 0.8,
n = 0.6,
q = 1,
t = 0.0075,
u = 0.0009,
Pa = 100,
Pb = 0.05,
Pd = 0.1,
Pe = 10,
km = 0.001)
Dynamic_Model<-function(t, values, constants) {
with(as.list(c(values, constants)),{
fb <- function(x) x / (x+km) # feedback
dA <- (Pa + a*D - j*A - R) * fb(A)
dB <- (Pb + b*A + e*E - m*B) * fb(B)
dD <- (Pd + d*B + f*E - g*A - n*D) * fb(D)
dE <- (Pe - h*B + i*E - q*E) * fb(E)
dR <- (t*A*B - u*D*E) * fb(R)
list(c(dA, dB, dD, dE, dR))
})
}
times <- seq(0, 200, by = 0.1)
out <- ode(y = values, times = times, func = Dynamic_Model, parms = constants)
plot(out)
Additional hints:
Removal of negative values afterwards (out2 <- ifelse(out<0, 0, out)) is just wrong.
Removal of negative values in the model function, i.e.
use the ifelse in the main
would also be wrong as it can lead to a severe violation of mass balance.
the time steps don't need to be very small. They are automatically adapted anyway by the solver. Too small time steps make your model slow and you get more outputs as needed.
some of your parameters are quite large, so that the model becomes very stiff.
I have a training df with 2 columns like
a b
1 1000 20
2 1008 13
...
n ... ...
Now, as I am required to find a 95% CI for the estimate of 'b' based on a specific 'a' value, with a 'k' value of my choice and compare the CI result to other specific value of 'k's. My question is how can I perform bootstrap for this with 1000 bootstrap reps as I am required to use a fitted knn model for the training data with kernel = 'gaussian' and k can only be in range 1-20 ?
I have found that the best k for this model is k = 5, and had a go for bootstrap but it doesn't work
library(kknn)
library(boot)
boot.kn = function(formula, data, indices)
{
# Create a bootstrapped version
d = data[indices,]
# Fit a model for bs
fit.kn = fitted(train.kknn(formula,data, kernel= "gaussian", ks = 5))
# Do I even need this complicated block
target = as.character(fit.kn$terms[[2]])
rv = my.pred.stats(fit.kn, d[,target])
return(rv)
}
bs = boot(data=df, statistic=boot.kn, R=1000, formula=b ~ a)
boot.ci(bs,conf=0.95,type="bca")
Please inform me for more info if I'm not clear enough. Thank you.
Here is a way to regress b on a with the k-nearest neighbors algorithm.
First, a data set. This is a subset of the iris data set, keeping the first two columns. One row is removed to later be the new data.
i <- which(iris$Sepal.Length == 5.3)
df1 <- iris[-i, 1:2]
newdata <- iris[i, 1:2]
names(df1) <- c("a", "b")
names(newdata) <- c("a", "b")
Now load the packages to be used and determine the optimal value for k with package kknn.
library(caret)
library(kknn)
library(boot)
fit <- kknn::train.kknn(
formula = b ~ a,
data = df1,
kmax = 15,
kernel = "gaussian",
distance = 1
)
k <- fit$best.parameters$k
k
#[1] 9
And bootstrap predictions for the new point a <- 5.3.
boot.kn <- function(data, indices, formula, newdata, k){
d <- data[indices, ]
fit <- knnreg(formula, data = d)
predict(fit, newdata = newdata)
}
set.seed(2021)
R <- 1e4
bs <- boot(df1, boot.kn, R = R, formula = b ~ a, newdata = newdata, k = k)
ci <- boot.ci(bs, level = 0.95, type = "bca")
ci
#BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
#Based on 10000 bootstrap replicates
#
#CALL :
#boot.ci(boot.out = bs, type = "bca", level = 0.95)
#
#Intervals :
#Level BCa
#95% ( 3.177, 3.740 )
#Calculations and Intervals on Original Scale
Plot the results.
old_par <- par(mfrow = c(2, 1),
oma = c(5, 4, 0, 0) + 0.1,
mar = c(1, 1, 1, 1) + 0.1)
hist(bs$t, main = "Histogram of bootstrap values")
abline(v = 3.7, col = "red")
abline(v = mean(bs$t), col = "blue")
abline(v = ci$bca[4:5], col = "blue", lty = "dashed")
plot(b ~ a, df1)
points(5.3, 3.7, col = "red", pch = 19)
points(5.3, mean(bs$t), col = "blue", pch = 19)
arrows(x0 = 5.3, y0 = ci$bca[4],
x1 = 5.3, y1 = ci$bca[5],
col = "blue", angle = 90, code = 3)
par(old_par)
I have a probability plot with point-wise confidence intervals fitted to the data. Using the graph I want to compute or locate the 0.001 quantile from the confidence bands. I used the function locator () to find the location of the point. Is there any other method that can be used to find the x-value given the y-value on a graph?
The code I used is as follows.
times <- c (17.88, 28.92, 33.00, 41.52, 42.12, 45.60, 48.40,
51.84, 51.96, 54.12, 55.56, 67.80, 68.64, 68.64,
68.88, 84.12, 93.12, 98.64, 105.12, 105.84, 127.92,
128.04,173.40)
N <- length (times)
t <- c (5, 10, 15)
rank.times <- rank (times) # Use average ranks for the tied observations
ecdf.times <- (rank.times - 0.5) / N
quant.ecdf <- log (-log (1 - ecdf.times))
weibull.ml <- suppressWarnings (fitdist (times, "weibull"))
weibull.cdf <- pweibull (times, shape = weibull.ml$estimate[1],
scale = weibull.ml$estimate[2])
wei <- log (-log (1 - weibull.cdf))
wei.extra <- approxExtrap (log (times), wei, log (t), method = "linear")
quant.wei <- c (wei.extra$y, wei)
set.seed (123)
B <- 2000
wei.boot <- suppressWarnings (bootdist (weibull.ml, bootmethod = "param", niter = B))
boot.cdf.we <- matrix (NA, nrow = B, ncol = N)
for (i in 1:B){
boot.cdf.we[i, ] <- pweibull (times, shape = wei.boot$estim$shape[i],
scale = wei.boot$estim$scale[i])
}
p <- 0.025
upper.wei <- NULL
lower.wei <- NULL
for (i in 1:N) {
upper.wei[i] <- log (-log (1 - quantile (boot.cdf.we[,i], probs = p)))
lower.wei[i] <- log (-log (1 - quantile (boot.cdf.we[,i], probs = 1-p)))
}
extra.wei.l <- approxExtrap (log (times), lower.wei, log (t), method = "linear")
lower.weibull <- c (extra.wei.l$y, lower.wei)
extra.wei.u <- approxExtrap (log (times), upper.wei, log (t), method = "linear")
upper.weibull <- c (extra.wei.u$y, upper.wei)
times.ext <- c (t, times)
loc1 <- c (.001, .005, .02, .05, .1, .2, .4, .6, .8, .9, .95, .98, .995)
loc2 <- log (-log (1 - loc1))
loc3 <- c (5, 9.77, 20, 50, 100, 200)
plot (times, quant.ecdf, log = "x", axes = FALSE, xlab = "Millions of cycles",
ylab = "Proportion failing", pch = 16, type = "p", main = "Weibull - Complete",
xlim = c (5, 200), ylim = c (-6.95, 1.7))
lines (times.ext, quant.wei)
lines (times.ext, upper.weibull)
lines (times.ext, lower.weibull)
abline (h = loc2[1])
segments (9.77789, -7.5, 9.77789, loc2[1])
axis (1, at = loc3, labels = loc3)
axis (2, at = loc2, labels = loc1, las = 2)
Thanks in advance!!
I am trying sae.dnn.train() with 5000 cases, 55-inputs and 3 hidden layers.
Why function nn.predict returns NaN? (vector)
I am using the following command
Nrow <-5000
Ncol <- 55
v <- c(rnorm(Nrow*Ncol,1, 0.5))
x <- matrix(v, nrow=Nrow, ncol=Ncol)
y <- c(rep(1, Nrow/2), rep(0, Nrow/2))
dnn <- sae.dnn.train(x, y, hidden = c(100,90,80),activationfun = "tanh", learningrate = 0.6, momentum = 0.5, learningrate_scale = 1.0,output = "sigm", sae_output = "linear", numepochs = 10, batchsize = 100, hidden_dropout = 0, visible_dropout = 0)
yy <- nn.predict(dnn, x)