Difference between log likelihood by hand and logLike function - r

I'm trying to compare the value of the log likelihood function given by the logLik function and the value calculate by hand for a Gamma distribution. The value given by the logLik function is:
require(fitdistrplus)
x = rgamma(50,shape = 2, scale = 10)
Gamma_fitdist = fitdist(x,"gamma")
logLik(Gamma_fitdistr)
-189.4192
and for the loglikelihood function "by hand" is:
gmll <- function(scale,shape,datta){
a <- scale
b <- shape
n <- length(datta)
sumd <- sum(datta)
sumlogd <- sum(log(datta))
gmll <- n*a*log(b) + n*lgamma(a) + sumd/b - (a-1)*sumlogd
gmll
}
gmll(scale = 10, shape = 2, datta = x)
-246.6081
Why logLik function give me a different value? Thanks!

You've interverted scale and shape and there's a couple of sign errors in your code.
library(fitdistrplus)
set.seed(666)
x = rgamma(50, shape = 2, scale = 4)
Gamma_fitdist = fitdist(x,"gamma")
logLik(Gamma_fitdist)
# -150.3687
gmll <- function(scale,shape,datta){
a <- shape
b <- scale
n <- length(datta)
sumd <- sum(datta)
sumlogd <- sum(log(datta))
-n*a*log(b) - n*lgamma(a) - sumd/b + (a-1)*sumlogd
}
rate <- Gamma_fitdist$estimate[["rate"]]
shape <- Gamma_fitdist$estimate[["shape"]]
gmll(scale = 1/rate, shape = shape, datta = x)
# -150.3687

Related

Calculate stderr, t-value, p-value, predict value for linear regression

I'm fitting linear models with MatrixModels:::lm.fit.sparse and MatrixModels::glm4 (also sparse).
However, these functions return coeff, residuals and fitted.values only.
What's the fastest and easiest way to get/calculate another values such as stderr, t-value, p-value, predict value?
I use the data from MatrixModels:::lm.fit.sparse example.
I built a custom function summary_sparse to perform a summary for this model.
All matrix operations are performed with Matrix package.
Results are compared with dense type model.
Note lm.fit.sparse have to be evaluated with method = "chol" to get proper results.
Functions:
summary_sparse <- function(l, X) {
XXinv <- Matrix::chol2inv(Matrix::chol(Matrix::crossprod(X)))
se <- sqrt(Matrix::diag(XXinv*sum(l$residuals**2)/(nrow(X)-ncol(X))))
ts <- l$coef/se
pvals <- 2*c(1 - pnorm(abs(ts)))
list(coef = l$coef, se = se, t = ts, p = pvals)
}
predict_sparse <- function(X, coef) {
X %*% coef
}
Application:
dd <- expand.grid(a = as.factor(1:3),
b = as.factor(1:4),
c = as.factor(1:2),
d= as.factor(1:8))
n <- nrow(dd <- dd[rep(seq_len(nrow(dd)), each = 10), ])
set.seed(17)
dM <- cbind(dd, x = round(rnorm(n), 1))
## randomly drop some
n <- nrow(dM <- dM[- sample(n, 50),])
dM <- within(dM, { A <- c(2,5,10)[a]
B <- c(-10,-1, 3:4)[b]
C <- c(-8,8)[c]
D <- c(10*(-5:-2), 20*c(0, 3:5))[d]
Y <- A + B + A*B + C + D + A*D + C*x + rnorm(n)/10
wts <- sample(1:10, n, replace=TRUE)
rm(A,B,C,D)
})
X <- Matrix::sparse.model.matrix( ~ (a+b+c+d)^2 + c*x, data = dM)
Xd <- as(X,"matrix")
fmDense <- lm(dM[,"Y"]~Xd-1)
ss <- summary(fmDense)
r1 <- MatrixModels:::lm.fit.sparse(X, y = dM[,"Y"], method = "chol")
f <- summary_sparse(r1, X)
all.equal(do.call(cbind, f), ss$coefficients, check.attributes = F)
#TRUE
all.equal(predict_sparse(X, r1$coef)#x, predict(fmDense), check.attributes = F, check.names=F)
#TRUE

Function that will generate iter samples of size n from a gamma distribution with shape parameter alpha and rate parameter beta

The function needs to return the mean and standard deviation of each sample.
This is what I have:
sample_gamma <- function(alpha, beta, n, iter) {
mean = alpha/beta
var = alpha/(beta)^2
sd = sqrt(var)
gamma = rgamma(n,shape = alpha, scale = 1/beta)
sample_gamma = data.frame(mean = replicate(n = iter, expr = mean))
}
I'm very lost for this. I also need to create a data frame for this function.
Thank you for your time.
Edit:
sample_gamma <- function(alpha, beta, n, iter) {
output <- rgamma(iter, alpha, 1/beta)
output_1 <- matrix(output, ncol = iter)
means <- apply(output_1, 2, mean)
sds <- apply(output_1, 2, sd)
mystats <- data.frame(means, sds)
return(mystats)
}
This works except for the sds. It's returning NAs.
It's not really clear to me what you want. But say you want to create 10 samples of size 1000, alpha = 1, beta = 2. Then you can create a single stream of rgamma realizations, dimension them into a matrix, then get your stats with apply, and finally create a data frame with those vectors:
output <- rgamma(10*1000, 1, 1/2)
output <- matrix(output, ncol = 10)
means <- apply(output, 2, mean)
sds <- apply(output, 2, sd)
mystats <- data.frame(means, sds)
You could wrap your function around that code, replacing the hard values with parameters.

reiterating a script using r

I have the following script
Posdef <- function (n, ev = runif(n, 0, 10))
{
Z <- matrix(ncol=n, rnorm(n^2))
decomp <- qr(Z)
Q <- qr.Q(decomp)
R <- qr.R(decomp)
d <- diag(R)
ph <- d / abs(d)
O <- Q %*% diag(ph)
Z <- t(O) %*% diag(ev) %*% O
return(Z)
}
Sigma <- Posdef(n = 11)
mu <- runif(11,0,10)
data <- as.data.frame(mvrnorm(n=1000, mu, Sigma))
data[data < 0] <- 0 #setting a floor#
data[data > 10] <- 10 #setting a ceiling#
names(data) = c('criteria_1', 'criteria_2', 'criteria_3', 'criteria_4', 'criteria_5',
'criteria_6', 'criteria_7', 'criteria_8', 'criteria_9', 'criteria_10',
'outcome')
data$outcome <- ifelse(data$outcome > 5, 1, 0)
data <- data[, sapply(data, is.numeric)]
maxValue <- as.numeric(apply (data, 2, max))
minValue <- as.numeric(apply (data, 2, min))
data_scaled <- as.data.frame(scale(data, center = minValue,
scale = maxValue-minValue))
ind <- sample (1:nrow(data_scaled), 600)
train <- data_scaled[ind,]
test <- data_scaled[-ind,]
model <- glm (formula =
outcome ~ criteria_1 + criteria_2 + criteria_3 + criteria_4 + criteria_5 +
criteria_6 + criteria_7 + criteria_8 + criteria_9 + criteria_10,
family = "binomial",
data = train)
summary (model)
predicted_model <- predict(model, test)
neural_model <- neuralnet(formula =
outcome ~ criteria_1 + criteria_2 + criteria_3 + criteria_4 + criteria_5 +
criteria_6 + criteria_7 + criteria_8 + criteria_9 + criteria_10,
hidden = c(2,2) ,
threshold = 0.01,
stepmax = 1e+07,
startweights = NULL,
rep = 1,
learningrate = NULL,
algorithm = "rprop+",
linear.output=FALSE,
data= train)
plot (neural_model)
results <- compute (neural_model, test[1:10])
results <- results$net.result*(max(data$outcome)-
min(data$outcome))+ min(data$outcome)
Values <- (test$outcome)*(max(data$outcome)-
min(data$outcome)) + min(data$outcome)
MSE_nueral_model <- sum((results - Values)^2)/nrow(test)
MSE_model <- sum((predicted_model - test$outcome)^2)/nrow(test)
print(MSE_model - MSE_nueral_model)
R1 <- (MSE_model - MSE_nueral_model)
The purpose of this script is to generate some arbitrary multivariate distribution and then compare two methods. In this case its a neural net and logistic regression. The end result is a difference in mean square error.
Now my issue with creating a loop has been with generating the 1000 observations.
I am able to create a loop without the data simulation portion of the script, putting that into the loop seems to make things go haywire. I tried creating a column vector filled with NA's but all I ended up getting was a single value returned rather than a vector of length n populated by the MSE reductions for each iteration of the loop.
Any help would be greatly appreciated.

Fit student t to incomplete distribution

Thanks to a closed form formula (I work on risk neutral density, with this king of formula: RND formula, page 8), I have an incomplete distribution of this type:
My idea would be to fit this density with a student-t.
I already tried the MASS and fitdistrplus packages but just can't find how to perform my task. Everything I can do for now is to get the fitted parameters (m=1702.041, s=6.608536, df=15.18036), but from here I don't know how to get my fitted values for my distribution.
A sample of code:
temp = matrix(nrow=1000, ncol=3)
colnames(temp) = c("strikes", "first_density", "mulitply_first_density")
temp = as.data.frame(temp)
# we generate fake data
temp$strikes = seq(1000,2000,length=1000)
temp$first_density = runif(1000,max=0.006, min=1e-10)
# we multiply our first density to generate our sample
temp$mulitply_first_density = temp$first_density*1000000
# we generate our sample
vec = vector()
for (i in 1:nrow(temp))
{
vec = c(vec, rep(temp$strike[i], temp$mulitply_first_density[i]))
}
# we laod our library
library("MASS")
# we fir our parameters
fitted_parameters = fitdistr(vec, "t")
The formula for the t-density function using the location and scale parameters is given in the examples of the documentation as mydt.
#simulated data
set.seed(42)
x <- rt(1e4, 7, 10)
plot(density(x))
library(MASS)
fitted_parameters = fitdistr(x, "t", start = list(df = 10, m = 10, s = 5))
# df m s
# 3.81901649 10.56816146 2.66905346
#( 0.15295551) ( 0.03448627) ( 0.03361758)
mydt <- function(x, m, s, df) dt((x-m)/s, df)/s
curve(do.call(mydt, c(list(x), as.list(fitted_parameters$estimate))), add = TRUE, col = "red")
legend("topright", legend = c("kernel density estimate", "fitted t distribution"),
col = c("black", "red"), lty = 1)

Graphical output of density for the function gammamixEM (package mixtools)

I'm using the function gammamixEM from the package mixtools. How can I return the graphical output of density as in the function normalmixEM (i.e., the second plot in plot(...,which=2)) ?
Update:
Here is a reproducible example for the function gammamixEM:
x <- c(rgamma(200, shape = 0.2, scale = 14), rgamma(200,
shape = 32, scale = 10), rgamma(200, shape = 5, scale = 6))
out <- gammamixEM(x, lambda = c(1, 1, 1)/3, verb = TRUE)
Here is a reproducible example for the function normalmixEM:
data(faithful)
attach(faithful)
out <- normalmixEM(waiting, arbvar = FALSE, epsilon = 1e-03)
plot(out, which=2)
I would like to obtain this graphical output of density from the function gammamixEM.
Here you go.
out <- normalmixEM(waiting, arbvar = FALSE, epsilon = 1e-03)
x <- out
whichplots <- 2
density = 2 %in% whichplots
loglik = 1 %in% whichplots
def.par <- par(ask=(loglik + density > 1), "mar") # only ask and mar are changed
mix.object <- x
k <- ncol(mix.object$posterior)
x <- sort(mix.object$x)
a <- hist(x, plot = FALSE)
maxy <- max(max(a$density), .3989*mix.object$lambda/mix.object$sigma)
I just had to dig into the source code of plot.mixEM
So, now to do this with gammamixEM:
x <- c(rgamma(200, shape = 0.2, scale = 14), rgamma(200,
shape = 32, scale = 10), rgamma(200, shape = 5, scale = 6))
gammamixEM.out <- gammamixEM(x, lambda = c(1, 1, 1)/3, verb = TRUE)
mix.object <- gammamixEM.out
k <- ncol(mix.object$posterior)
x <- sort(mix.object$x)
a <- hist(x, plot = FALSE)
maxy <- max(max(a$density), .3989*mix.object$lambda/mix.object$sigma)
main2 <- "Density Curves"
xlab2 <- "Data"
col2 <- 2:(k+1)
hist(x, prob = TRUE, main = main2, xlab = xlab2,
ylim = c(0,maxy))
for (i in 1:k) {
lines(x, mix.object$lambda[i] *
dnorm(x,
sd = sd(x)))
}
I believe it should be pretty straight forward to continue this example a bit, if you want to add the labels, smooth lines, etc. Here's the source of the plot.mixEM function.

Resources