suppose you have this simulated data:
N = 1000; alpha_bar = 0.7; gamma_h = 16; gamma_c = 50
mu_hw = 4; sigma_hw = 0.1; mu_i = 4; sigma_i = 0.5; sigma_eps = 0.01
set.seed(1)
wage <- exp(rnorm(N, mean = mu_hw, sd = sigma_hw))
set.seed(1)
nlincome <- exp(rnorm(N, mu_i, sigma_i))
set.seed(1)
e_ <- rnorm(N, 0, sigma_eps)
obs_data <- data.frame(w = wage,
I = nlincome,
eps = e_) %>%
mutate(alpha = alpha_bar + eps,
h = alpha*gamma_h - ((1-alpha)*(I-gamma_c))/w) %>%
select(w,I,h)
And now assume that you've forgotten these parameters (alpha_bar, gamma_h, gamma_c, sigma_eps). So you want to estimate those by maximum likelihood using the obs_data dataframe.
From the model I am working, I know that epsilon has this form:
Furthermore, I have derived the Log-Likelihood function like this: (I wrote this)
So I know I have to minimize the negative log-likelihood function, so I have tried to do it like this:
normal_loglik <- function(par){
g_h <- par[1]; g_c <- par[2]; a_bar <- par[3]; sigma_e <- par[4]
d <- mutate(obs_data,
num = h + ((I-g_c)/w),
den = g_h + ((I-g_c)/w),
eps = (num/den)-a_bar,
arg_1 = 0.5*(eps/sigma_e)^2,
arg_2 = log(abs(den)),
opt = log(sigma_e * sqrt(2*pi)) + arg_1 + arg_2)
LL <- - sum(d$opt)
return(-LL)
}
opt_par <- optim(rep(1e-4,4),
normal_loglik,
method = "SANN")$par
real_param <- c(gamma_h, gamma_c, alpha_bar, sigma_eps)
# Comparison
print(real_param); print(opt_par)
It does run, but I get these estimates that are way too far away from the real ones:
Related
I tried to run code from https://cran.r-project.org/web/packages/MultiBD/vignettes/SIR-MCMC.pdf on my R software - they are still running but no output until now but I can run the following code on an online R compiler. This is the codes:
library(MultiBD)
data(Eyam)
Eyam
loglik_sir <- function(param, data) {
alpha <- exp(param[1]) # Rates must be non-negative
beta <- exp(param[2])
# Set-up SIR model
drates1 <- function(a, b) { 0 }
brates2 <- function(a, b) { 0 }
drates2 <- function(a, b) { alpha * b }
trans12 <- function(a, b) { beta * a * b }
sum(sapply(1:(nrow(data) - 1), # Sum across all time steps k
function(k) {
log(
dbd_prob( # Compute the transition probability matrix
t = data$time[k + 1] - data$time[k], # Time increment
a0 = data$S[k], b0 = data$I[k], # From: S(t_k), I(t_k)
drates1, brates2, drates2, trans12,
a = data$S[k + 1], B = data$S[k] + data$I[k] - data$S[k + 1],
computeMode = 4, nblocks = 80 # Compute using 4 threads
)[1, data$I[k + 1] + 1] # To: S(t_(k+1)), I(t_(k+1))
)
}))
}
logprior <- function(param) {
log_alpha <- param[1]
log_beta <- param[2]
dnorm(log_alpha, mean = 0, sd = 100, log = TRUE) +
dnorm(log_beta, mean = 0, sd = 100, log = TRUE)
}
library(MCMCpack)
alpha0 <- 3.39
beta0 <- 0.0212
post_sample <- MCMCmetrop1R(fun = function(param) { loglik_sir(param, Eyam) + logprior(param)
}, theta.init = log(c(alpha0, beta0)), mcmc = 500, burnin = 100)
plot(as.vector(post_sample[,1]), type = "l", xlab = "Iteration", ylab =
expression(log(alpha)))
plot(as.vector(post_sample[,2]), type = "l", xlab = "Iteration", ylab = expression(log(beta)))
library(ggplot2)
x = as.vector(post_sample[,1])
y = as.vector(post_sample[,2])
df <- data.frame(x, y)
ggplot(df,aes(x = x,y = y)) +
stat_density2d(aes(fill = ..level..), geom = "polygon", h = 0.26) +
scale_fill_gradient(low = "grey85", high = "grey35", guide = FALSE) +
xlab(expression(log(alpha))) +
ylab(expression(log(beta)))
quantile(exp(post_sample[,1]), probs = c(0.025,0.975))
quantile(exp(post_sample[,2]), probs = c(0.025,0.975))
I realize that this is the part of the codes that took long time to run but produce no output :
post_sample <- MCMCmetrop1R(fun = function(param) { loglik_sir(param, Eyam) + logprior(param)},
theta.init = log(c(alpha0, beta0)), mcmc = 500, burnin = 100)
I think my R software is the problem but what is it?
I wrote down this function for MLE estimation and then I apply it for different settings of parameters.
Finally, I bind all results for an output.
But is not working i have problem with the output and also I need to organize the output like the attached image using R program.
enter image description here
could some one help me please?
What should I fix and how can I print the results like the picture attached.
thank you in advance
rbssn<- function(n,alpha,beta)
{
if(!is.numeric(n)||!is.numeric(alpha)||!is.numeric(beta))
{stop("non-numeric argument to mathematical function")}
if(alpha<=0){ stop("alpha must be positive")}
if(beta<=0) { stop("beta must be positive") }
z <- rnorm(n,0,1)
r <- beta*((alpha*z*0.5)+sqrt((alpha*z*0.5)^2+1))^2
return(r)
}
#Function
mymle <- function(n,alpha,beta,rep)
{
theta=c(alpha,beta) # store starting values
#Tables
LHE=array(0, c(2,rep));
rownames(LHE)= c("MLE_alpha", "MLE_beta")
#Bias
bias= array(0, c(2,rep));
rownames(bias)= c("bias_alpha", "bias_beta")
#Simulation
set.seed(1)
#Loop
for(i in 1:rep){
myx <- exp(-rbssn(n, alpha, beta))
Score <- function(x) {
y <- numeric(2)
y[1] <- (-n/x[1])*(1+2/(x[1]^2)) - (1/(x[2]*x[1]^3))*sum(log(myx)) - (x[2]/(x[1]^3))*sum(1/log(myx))
y[2] <- -(n/(2*x[2])) + sum((1/(x[2]-log(myx)))) - (1/(2*(x[1]^2)*(x[2]^2)))*sum(log(myx)) + (1/(2*x[1]^2))*sum(1/(log(myx)))
y
}
Sin <- c(alpha,beta)
mle<- nleqslv(Sin, Score, control=list(btol=.01))[1]
LHE[i,]= mle
bias[i,]= c(mle[1]-theta[1], mle[2]-theta[2])
}
# end for i
#Format results
L <-round(apply(LHE, 1, mean), 3) # MLE of all the applied iterations
bs <-round(apply(bias,1, mean),3) # bias of all the applied iterations
row<- c(L, bs)
#Format a label
lab <- paste0('n= ',n,';',' alpha= ',alpha,';',' beta= ',beta)
row2 <- c(lab,row)
row2 <- as.data.frame(t(row2))
return(row2)
}
#Bind all
#Example 1
ex1 <- mymle(n = 20,alpha = 1,beta = 0.5,rep = 100)
ex2 <- mymle(n = 50,alpha = 2,beta = 0.5,rep = 100)
ex3 <- mymle(n = 100,alpha = 3,beta = 0.5,rep = 100)
#Example 2
ex4 <- mymle(n = 20,alpha = 0.5,beta = 0.5,rep = 100)
ex5 <- mymle(n = 50,alpha = 0.5,beta = 1,rep = 100)
ex6 <- mymle(n = 100,alpha = 0.5,beta = 1,rep = 100)
df <- rbind(ex1,ex2,ex3,ex4,ex5,ex6)
Any help will be appreciated.
I have the following script
Posdef <- function (n, ev = runif(n, 0, 10))
{
Z <- matrix(ncol=n, rnorm(n^2))
decomp <- qr(Z)
Q <- qr.Q(decomp)
R <- qr.R(decomp)
d <- diag(R)
ph <- d / abs(d)
O <- Q %*% diag(ph)
Z <- t(O) %*% diag(ev) %*% O
return(Z)
}
Sigma <- Posdef(n = 11)
mu <- runif(11,0,10)
data <- as.data.frame(mvrnorm(n=1000, mu, Sigma))
data[data < 0] <- 0 #setting a floor#
data[data > 10] <- 10 #setting a ceiling#
names(data) = c('criteria_1', 'criteria_2', 'criteria_3', 'criteria_4', 'criteria_5',
'criteria_6', 'criteria_7', 'criteria_8', 'criteria_9', 'criteria_10',
'outcome')
data$outcome <- ifelse(data$outcome > 5, 1, 0)
data <- data[, sapply(data, is.numeric)]
maxValue <- as.numeric(apply (data, 2, max))
minValue <- as.numeric(apply (data, 2, min))
data_scaled <- as.data.frame(scale(data, center = minValue,
scale = maxValue-minValue))
ind <- sample (1:nrow(data_scaled), 600)
train <- data_scaled[ind,]
test <- data_scaled[-ind,]
model <- glm (formula =
outcome ~ criteria_1 + criteria_2 + criteria_3 + criteria_4 + criteria_5 +
criteria_6 + criteria_7 + criteria_8 + criteria_9 + criteria_10,
family = "binomial",
data = train)
summary (model)
predicted_model <- predict(model, test)
neural_model <- neuralnet(formula =
outcome ~ criteria_1 + criteria_2 + criteria_3 + criteria_4 + criteria_5 +
criteria_6 + criteria_7 + criteria_8 + criteria_9 + criteria_10,
hidden = c(2,2) ,
threshold = 0.01,
stepmax = 1e+07,
startweights = NULL,
rep = 1,
learningrate = NULL,
algorithm = "rprop+",
linear.output=FALSE,
data= train)
plot (neural_model)
results <- compute (neural_model, test[1:10])
results <- results$net.result*(max(data$outcome)-
min(data$outcome))+ min(data$outcome)
Values <- (test$outcome)*(max(data$outcome)-
min(data$outcome)) + min(data$outcome)
MSE_nueral_model <- sum((results - Values)^2)/nrow(test)
MSE_model <- sum((predicted_model - test$outcome)^2)/nrow(test)
print(MSE_model - MSE_nueral_model)
R1 <- (MSE_model - MSE_nueral_model)
The purpose of this script is to generate some arbitrary multivariate distribution and then compare two methods. In this case its a neural net and logistic regression. The end result is a difference in mean square error.
Now my issue with creating a loop has been with generating the 1000 observations.
I am able to create a loop without the data simulation portion of the script, putting that into the loop seems to make things go haywire. I tried creating a column vector filled with NA's but all I ended up getting was a single value returned rather than a vector of length n populated by the MSE reductions for each iteration of the loop.
Any help would be greatly appreciated.
pval.dist.sim = function(n, sigma_x, rho, reps = 2500){
p = 5; sigma = sqrt(2)
beta = c(0.5, 0.5, 0, 0.25, 0)
mu = 10
# generate vector for pvals
pval.list = numeric(reps)
for(r in 1:reps){
# generate design matrix
X = gen_X(n = n, p = 5, rho = rho, sigma_x = sigma_x, mu = mu)
# generate the XtXinv portion of equation
XtXinv = qr.solve(crossprod(X))
sqrtXtXinv55 = sqrt(XtXinv[5,5])
y = X %*% beta + rnorm(n = n)
beta.hat = XtXinv %*% crossprod(X, y)
sE = sqrt(sum((y - X %*% beta.hat)^2)/(n-p))
t.val = beta.hat[3]/(sE * sqrtXtXinv55)
pval.list[r] = 2 * pt(-abs(t.val), df = n - p)
}
return(pval.list)
}
Above is the pval.dist simulation. I need to run this function to build my p.values to build my power curve
set.seed(3701)
# givens
p = 5; d = 2; mu = 10; sigmasqrd = 2; reps = 2500
n.list = seq(from=10, to=150, by=10)
# create a vector for the estimates of the power
est.power = numeric(length(n.list))
# create a vector for the left endpoints of the 95% CI
LB.list = numeric(length(n.list))
# create a vector for the right endpoints of the 95% CI
UB.list = numeric(length(n.list))
for(j in 1:length(n.list)){
# perform the test reps times
pvals = pval.dist.sim(n = n.list[j], sigma_x = 1.5, rho = 0.2, reps = reps )
# record the simulated estimate of the power
est.power[j] = mean(pvals<0.05)
# compute the 95% conf int
bounds = binom.test(x=sum(pvals < 0.05), n = reps, conf.level = 0.95)$conf.int[1:2]
LB.list[j] = bounds[1]
UB.list[j] = bounds[2]
}
## plot the power curve estimation
plot(n.list, est.power, t = "l", xlab = "n",ylab = "Power")
I am having the issue that my pvalues, when plugged in, are drastically low. I am getting values in the single digit percentage. What am I doing wrong?
I am trying to implement a reaction-diffusion PDE using reacTran in the deSolve package. However, the time-dependent reaction term is not working. Any suggestions on how to implement this would be greatly appreciated!
library(ReacTran)
library(deSolve)
N <- 1000
xgrid <- setup.grid.1D(x.up = 0, x.down = 10, N = N)
x <- xgrid$x.mid
D.coeff <- 1
k <- 1
Diffusion <- function (t, Y, parms){
tran <- tran.1D(C = Y, C.up = 0, C.down = 0, D = D.coeff, dx = xgrid)-k*t
reac <- -kt
return(list(tran$dC+reac))
}
# Set initial conditions as gaussian distribution
C0 <- 10 #Initial concentration (mg/L)
X0 <- 5 #Location of initial concentration (m)
sig <- .2 #Spread of Gaussian distribution
C <- rep(0,N) #matrix
Yini <- C+C0*exp(-((x-X0)/sig)^2)
parms1 <- list(D=D.coeff, k=k)
times <- seq(from = 0, to = 5, by = 0.01)
print(system.time(
out <- ode.1D(y = Yini, times = times, func = Diffusion,
parms = parms1, dimens = N)))