How can I find minimum value for this inequality? - r

n <- 100
r <- rnorm(n, 0.5, 0.01)
xbar <- mean(r)
mu <- seq(from = 0, to = 1, by=0.0001)
F.mu <- function(xbar, mu) {
hstar <- xbar * log(xbar/mu) + (1-xbar) * log((1-xbar)/(1-mu))
ifelse(xbar >= mu, 1, exp(-n*hstar))
}
Then I want to find the minimum value which satisfies F.mu > 0.05.

Something like this maybe:
x <- F.mu(xbar, mu)
mu[which(x == min(x[x > 0.05]))]
# 0.6211

You could run your function with former specified values, then take the minimum of all > 0.05.
set.seed(927254) # to make results comparable
n <- 100
r <- rnorm(n, 0.5, 0.01)
xbar <- mean(r)
mu <- seq(from=0.0, to=1, by=0.0001)
F.mu <- function(xbar.=xbar, mu.=mu){
hstar <-xbar*log(xbar/mu)+(1-xbar)*log((1-xbar)/(1-mu))
ifelse(xbar >= mu,1, exp(-n*hstar)) }
min(F.mu()[F.mu()>0.05])
# [1] 0.05018463

Related

How can I fix the while loop problem in R?

I wrote the code as like below, and sometime it gets proper value but sometime it could not give me the value for a long time.
I guess it looks like it has infinite problem with while function but I couldn't get it how to fix it.
I've already tried to search about the while loop but I guess I wrote proeprly but I couldn't get it why it sometime run properly and sometime run not.
Could you please give me advice or the proper modification?
Thank you.
rm(list=ls())
library(readxl)
library(dplyr)
library(ggplot2)
library(MASS)
# Mean Vector, Covariance Matrix Construction
mu <- c(0,0,0)
mu <- t(mu)
mu <- t(mu)
mu
# Construct 40 random variables for Phase II
mu2 <- c(1, 2, 1)
mu2 <- t(mu2)
mu2 <- t(mu2)
mu2
Sigma <- matrix(c(1, 0.9, 0.9, 0.9, 1, 0.9, 0.9, 0.9, 1), 3)
Sigma
getResult <- function(Result) {
# Construct 50 Random Variables for Phase I
Obs <- mvrnorm(50, mu = mu, Sigma = Sigma)
VecT2 <- apply(Obs, 2, mean)
VecT2 <- round(VecT2, 3)
ST2 <- cov(Obs)
ST2 <- round(ST2, 3)
Obs <- as.matrix(Obs)
T2All <- rep(0, nrow(Obs))
for(i in 1:nrow(Obs)) {
T2All[i] = t(Obs[i, ] - VecT2) %*% solve(ST2) %*% (Obs[i, ] - VecT2)
}
# Construct Control Limit
Alpha <- 0.005
M <- nrow(Obs)
M
p <- ncol(Obs)
p
UCL <- ((p * (M-1) * (M + 1))) / ((M - p) * M) * qf((1-Alpha), p, (M-p))
UCL <- round(UCL, 3)
Compare <- which(T2All > UCL)
# Repeat when is there are Out of Control in Phase I with eliminating it
while(isTRUE(Compare > UCL)) {
Obs <- Obs[-Compare,]
Alpha <- 0.005
M <- nrow(Obs)
p <- ncol(Obs)
UCL <- ((p * (M-1) * (M + 1))) / ((M - p) * M) * qf((1-Alpha), p, (M-p))
Compare <- which(T2All > UCL)
}
UCL <- round(UCL, 3)
# Prepare Observations two types of cases with Variable 20_1, Variable 20_2
Obs20_1 <- mvrnorm(20, mu = mu, Sigma = Sigma)
Obs20_2 <- mvrnorm(20, mu = mu2, Sigma = Sigma)
Obs40 <- rbind(Obs20_1, Obs20_2)
Obs40 <- as.matrix(Obs40)
T2 <- rep(0, nrow(Obs40))
for(i in 1:nrow(Obs40)) {
T2[i] = t(Obs40[i, ] - mu) %*% solve(Sigma) %*% (Obs40[i, ] - mu)
}
Result <- which(T2 > UCL)[1]
# Repeat when Out of Control occur in ARL0 section
while(isTRUE(Result < 20)) {
Obs20_1 <- mvrnorm(20, mu = mu, Sigma = Sigma)
Obs40 <- rbind(Obs20_1, Obs20_2)
Obs40 <- as.matrix(Obs40)
T2 <- rep(0, nrow(Obs40))
for(i in 1:nrow(Obs40)) {
T2[i] = t(Obs40[i, ] - mu) %*% solve(Sigma) %*% (Obs40[i, ] - mu)
}
Result <- which(T2 > UCL)[1]
}
Result
}
# Result
Final <- replicate(n = 200, expr = getResult(Result))
Final <- Final - 20
Final
mean(Final)
You could try using a for loop instead of a while loop.

Binary Logistic Regression with BFGS using package maxLik

I tried binary logistic regression with BFGS using maxlik, but i have included the feature as per the syntax i attached below, but the result is, but i get output like this
Maximum Likelihood estimation
BFGS maximization, 0 iterations
*Return code 100: Initial value out of range.
https://docs.google.com/spreadsheets/d/1fVLeJznB9k29FQ_BdvdCF8ztkOwbdFpx/edit?usp=sharing&ouid=109040212946671424093&rtpof=true&sd=true (this is my data)*
library(maxLik)
library(optimx)
data=read_excel("Book2.xlsx")
data$JKLaki = ifelse(data$JK==1,1,0)
data$Daerah_Samarinda<- ifelse(data$Daerah==1,1,0)
data$Prodi2 = ifelse(data$Prodi==2,1,0)
data$Prodi3 = ifelse(data$Prodi==3,1,0)
data$Prodi4 = ifelse(data$Prodi==4,1,0)
str(data)
attach(data)
ll<- function(param){
mu <- param[1]
beta <- param[-1]
y<- as.vector(data$Y)
x<- cbind(1, data$JKLaki, data$IPK, data$Daerah_Samarinda, data$Prodi2, data$Prodi3, data$Prodi4)
xb<- x%*%beta
pi<- exp(xb)
val <- -sum(y * log(pi) + (1 - y) * log(1 - pi),log=TRUE)
return(val)
}
gl<- funtion(param){
mu <- param[1]
beta <- param[-1]
y <- as.vector(data$Y)
x <- cbind(0, data$JKLaki,data$IPK,data$Daerah_Samarinda,data$Prodi2,data$Prodi3,data$Prodi4)
sigma <- x*beta
pi<- exp(sigma)/(1+exp(sigma))
v= y-pi
vx=as.matrix(x)%*%as.vector(v)
gg= colSums(vx)
return(-gg)}
mle<-maxLik(logLik=ll, grad=gl,hess=NULL,
start=c(mu=1, beta1=0, beta2=0, beta3=0, beta4=0, beta5=0, beta6=0,beta7=0), method="BFGS")
summary(mle)
can i get some help, i tired get this solution, please.
I have been able to optimize the log-likelihood with the following code :
library(DEoptim)
library(readxl)
data <- read_excel("Book2.xlsx")
data$JKLaki <- ifelse(data$JK == 1, 1, 0)
data$Daerah_Samarinda <- ifelse(data$Daerah == 1, 1, 0)
data$Prodi2 <- ifelse(data$Prodi == 2, 1, 0)
data$Prodi3 <- ifelse(data$Prodi == 3, 1, 0)
data$Prodi4 <- ifelse(data$Prodi == 4, 1, 0)
ll <- function(param, data)
{
mu <- param[1]
beta <- param[-1]
y <- as.vector(data$Y)
x <- cbind(1, data$JKLaki, data$IPK, data$Daerah_Samarinda, data$Prodi2, data$Prodi3, data$Prodi4)
xb <- x %*% beta
pi <- exp(mu + xb)
val <- -sum(y * log(pi) + (1 - y) * log(1 - pi))
if(is.nan(val) == TRUE)
{
return(10 ^ 30)
}else
{
return(val)
}
}
lower <- rep(-500, 8)
upper <- rep(500, 8)
obj_DEoptim_Iter1 <- DEoptim(fn = ll, lower = lower, upper = upper,
control = list(itermax = 5000), data = data)
lower <- obj_DEoptim_Iter1$optim$bestmem - 0.25 * abs(obj_DEoptim_Iter1$optim$bestmem)
upper <- obj_DEoptim_Iter1$optim$bestmem + 0.25 * abs(obj_DEoptim_Iter1$optim$bestmem)
obj_DEoptim_Iter2 <- DEoptim(fn = ll, lower = lower, upper = upper,
control = list(itermax = 5000), data = data)
obj_Optim <- optim(par = obj_DEoptim_Iter2$optim$bestmem, fn = ll, data = data)
$par
par1 par2 par3 par4 par5 par6 par7
-350.91045436 347.79576145 0.05337466 0.69032735 -0.01089112 0.47465162 0.38284804
par8
0.42125664
$value
[1] 95.08457
$counts
function gradient
501 NA
$convergence
[1] 1
$message
NULL

Subscript out of bounds in R language

genBi <- function(rho, mu1, mu2, s1, s2){
library(MASS)
mu <- c(mu1, mu2) #mean
sigma <- matrix(c(s1^2, s1*s2*rho, s1*s2*rho, s2^2),
2) #covariance matrix
bvn1 <- mvrnorm(4000, mu = mu, Sigma = sigma )
colnames(bvn1) <- c("x","y")
return(bvn1)
}
#get samples from Z with sample size 20,
getSlice <- function(data){
Z <- seq(0, 1, length.out = 200) #initialize Z
for(i in 0:199){
temp <- data[i * 20 + 1 : (i + 1) * 20, ]
R <- cor(temp[,1], temp[,2])
Z[i] <- 0.5 * log((1 + R)/(1 - R))
}
return(Z)
}
data <- genBi(0.6, 1, 1, 2, 2)
Z <- getSlice(data)
Return with error Error in temp[, 2] : subscript out of bounds. Please help identify the problem!

Vectorizing nested ifelse

I'm trying to fasten my function in R. It contains of three ifelse statements where one of it is nested. For the single one I conducted vectorization which reduced my computation time. Unfortunately I don't see how I can vectorize the nested one. Every way I apply it returns an error. Furthemore if there is any another quirk I can use to speed it up?
cont.run <- function(reps=10000, n=10000, d=0.005, l=10 ,s=0.1) {
r <- rep(0, reps)
theta <- rep(0, n)
for (t in 1:reps) {
epsilon <- rnorm(1, 0, d)
Zt = sum(ifelse(epsilon > theta, 1,
ifelse(epsilon < -theta, -1, 0)))
r[t] <- Zt / (l * n)
theta <- ifelse(runif(n) < s, abs(r[t]), theta)
}
return(mean(r))
}
system.time(cont.run())
I got:
cont.run <- function(reps=10000, n=10000, d=0.005, l=10 ,s=0.1) {
r <- rep(0, reps)
theta <- rep(0, n)
for (t in 1:reps) {
epsilon <- rnorm(1, 0, d)
Zt = rep(NA, length(theta))
Zt = sum(Zt[epsilon > theta, 1])
Zt = sum(Zt[epsilon < -theta, -1])
r[t] <- Zt / (l * n)
theta = rep(theta, length(s))
theta[runif(n) < s] = abs(r[t])
}
return(mean(r))
}
system.time(cont.run())
Here's a little bit improved code.
Main change is that we don't use double ifelse, but instead perform two sums on TRUE vectors (sum(epsilon > theta) - sum(epsilon < -theta)) (we don't care about zeroes here). I added a couple of other improvements (eg., replaced rep with numeric, moved some operations outside the for loop).
contRun <- function(reps = 1e4, n = 1e4, d = 5e-3, l = 10, s = 0.1) {
# Replace rep with numeric
r <- numeric(reps)
theta <- numeric(n)
# Define before loop
ln <- l * n
# Don't use t as it's a function in base R
for (i in 1:reps) {
epsilon <- rnorm(1, 0, d)
# Sum two TRUE vectors
r[i] <- (sum(epsilon > theta) - sum(epsilon < -theta)) / ln
# Define before ifelse
absr <- abs(r[i])
theta <- ifelse(runif(n) < s, absr, theta)
}
return(mean(r))
}
library(microbenchmark)
microbenchmark(cont.run(), contRun())
Unit: seconds
expr min lq mean median uq max neval
cont.run() 13.652324 13.749841 13.769848 13.766342 13.791573 13.853786 100
contRun() 6.533654 6.559969 6.581068 6.577265 6.596459 6.770318 100
PS. For this kind of computing you might one to set seed (set.seed() before the for loop) to make sure that you can reproduce your results.
Furthemore if there is any another quirk I can use to speed it up?
In addition to PoGibas answer, you can avoid calling ifelse and get a faster function as follows
contRun <- function(reps = 1e4, n = 1e4, d = 5e-3, l = 10, s = 0.1) {
# Replace rep with numeric
r <- numeric(reps)
theta <- numeric(n)
# Define before loop
ln <- l * n
# Don't use t as it's a function in base R
for (i in 1:reps) {
epsilon <- rnorm(1, 0, d)
# Sum two TRUE vectors
r[i] <- (sum(epsilon > theta) - sum(epsilon < -theta)) / ln
# Define before ifelse
absr <- abs(r[i])
theta <- ifelse(runif(n) < s, absr, theta)
}
mean(r)
}
contRun2 <- function(reps = 1e4, n = 1e4, d = 5e-3, l = 10, s = 0.1) {
r <- numeric(reps)
theta <- numeric(n)
ln <- l * n
for (i in 1:reps) {
epsilon <- rnorm(1, 0, d)
r[i] <- (sum(epsilon > theta) - sum(epsilon < -theta)) / ln
absr <- abs(r[i])
# avoid ifelse
theta[runif(n) < s] <- absr
}
mean(r)
}
contRun3 <- function(reps = 1e4, n = 1e4, d = 5e-3, l = 10, s = 0.1) {
r <- numeric(reps)
theta <- numeric(n)
ln <- l * n
for (i in 1:reps) {
epsilon <- rnorm(1, 0, d)
r[i] <- (sum(epsilon > theta) - sum(epsilon < -theta)) / ln
absr <- abs(r[i])
# replace runif
theta[sample(c(T, F), prob = c(s, 1 - s), size = n, replace = TRUE)] <- absr
}
mean(r)
}
# gives the same
set.seed(1)
o1 <- contRun()
set.seed(1)
o2 <- contRun2()
set.seed(1)
o3 <- contRun3()
all.equal(o1, o2)
#R [1] TRUE
all.equal(o1, o3) # likely will not match
#R [1] [1] "Mean relative difference: 0.1508537"
# but distribution is the same
set.seed(1)
c1 <- replicate(10000, contRun2(reps = 100, n = 100))
c2 <- replicate(10000, contRun3(reps = 100, n = 100))
par(mfcol = c(1, 2), mar = c(5, 4, 2, .5))
hist(c1, breaks = seq(-.015, .015, length.out = 26))
hist(c2, breaks = seq(-.015, .015, length.out = 26))
# the latter is faster
microbenchmark::microbenchmark(
contRun = {set.seed(1); contRun ()},
contRun2 = {set.seed(1); contRun2()},
contRun3 = {set.seed(1); contRun3()},
times = 5)
#R Unit: seconds
#R expr min lq mean median uq max neval
#R contRun 7.121264 7.371242 7.388159 7.384997 7.443940 7.619352 5
#R contRun2 3.811267 3.887971 3.892523 3.892158 3.921148 3.950070 5
#R contRun3 1.920594 1.920754 1.998829 1.999755 2.009035 2.144005 5
The only bottleneck now is runif in contRun2. Replacing it with sample yields quite an improvement.

Coverage probability for an unspecified CDF

I used the following r code to determine the coverage probability.
theta <- seq(0,1, length = 100)
CD_theta <- function(y, p, n){
1 - pbinom (y, size = n, prob = p) + 1/2*dbinom(y, size = n, prob = p)
}
y <- 5
n <- 100
phat <- y/n
mytheta <- CD_theta(5, theta, 100)
set.seed(650)
ci <- list()
n <- 100
B <- 1000
result = rep(NA, B)
all_confInt <- function(B) {
for (i in 1:B){
boot.sample <- sample(mytheta, replace = TRUE)
lower <- theta[which.min(abs(boot.sample - .025))]
upper <- theta[which.min(abs(boot.sample - .975))]
ci[[i]] <- data.frame(lowerCI = lower, upperCI = upper)
intervals <- unlist(ci)
}
return(intervals)
}
df <- data.frame(matrix(all_confInt(B), nrow=B, byrow=T))
colnames(df)[1] <- "Lower"
colnames(df)[2] <- "Upper"
names(df)
dim(df)
mean(df$Lower < phat & df$Upper > phat)*100
However, I obtained 6.4% which is too low. Why am I getting really lower percentage?. Is there any problem in the r function?

Resources