The model is Poisson likelihood and Gaussian prior. I worked out the posterior for the model and I think that I have it coded correctly but I'm having a lot of trouble trying to implement the algorithm. I know that it's just a simple matter of not defining my variables properly but I'm not seeing where the problems lie. The code that I have so far is:
# Poisson model
#
#
# Log of the unnormalized posterior density:
log.post.dens = function( theta, n, sum.y, mu0, sig0 )
{
alpha = (log.dpois(x, lamda=exp(theta)))*dnorm(x, mu0, sig0)
}
#++++++++++++++++++++++++++++++++++++++++++++++++++++++++
rw.sim = function( M, mu0, sig0, n, sum.y, sd.pd, theta.start )
{
# create theta array and initialize theta[1]
#
theta = rep( 0, M )
theta[1] = theta.start
acc.cnt = 0
for( ii in 2:M ) {
# Normal proposal distribution is centered at the current theta
#
theta.new = rnorm( 1, theta[ii-1], sd.pd )
log.alpha = log.post.dens( theta.new, n, sum.y, mu0, sig0 ) -
log.post.dens( theta[ii-1], n, sum.y, mu0, sig0 )
if( log.alpha > 0 || exp( log.alpha ) > runif( 1, 0, 1 ) )
{
theta[ii] = theta.new
acc.cnt = acc.cnt + 1
}
else
theta[ii] = theta[ii-1]
}
list( ac = acc.cnt, theta = theta )
}
#++++++++++++++++++++++++++++++++++++++++++++++++++++++++
n = 200
mu0 = log(10)
sig0 = 3
yy = rpois( n, exp( mu0 ))
sd.pd = 1
theta.start = mu0
M = 100000
print( paste("M =", M, " mu0 =", mu0, " sig0 =", sig0, "sd.pd =", sd.pd,
"start", theta.start ))
res = rw.sim( M, mu0, sig0, length(yy), sum( yy ), sd.pd, theta.start )
theta = res$theta
acc.rate = res$ac / M
corr = cor( theta[1:(M-1)], theta[2:M])
print( paste("acceptance rate =", acc.rate ))
print( paste("correlation =", corr ))
3
m = 1
if( m )
{
theta0 = theta
thin.const = 40
theta = theta[ seq( .1*length(theta), length(theta), thin.const )]
}
par( mfrow=c(2,2))
hist( theta, prob=T, breaks=32 )
x = seq( min( theta ), max( theta ), len=200 )
lines( x, dnorm( x, mu0, sig0 ), col = 2)
plot( theta, type=ālā )
acf( theta )
##pacf( theta )
#++++++++++++++++++++++++++++
#
# Posterior predictive density for data on a grid
#
hist( yy, prob=T)
lim1 = max(yy) + 2
xx = 0:lim1
ppd = rep( 0, lim1+1 )
for( ii in 1:(lim1+1) )
{
ppd[ii] = (1/M)*sum(yy)*((log.dpois(x, lamda=exp(theta)))*dnorm(x, mu0, sig0))
}
points( xx+.5, ppd, col=2 )
lines( xx+.5, ppd, col=2 )
As I said it's my defining of parameters that's off but I'm not sure how to fix it.
Related
Speaking briefly, I faced a strange performance difference in equal implementations of Deep embedded clustering (DEC) in R which I included links of implementation in the following.
My question,
According to the following results and figures( the R implementation is included), the code in R coverages in the stop condition by labeling all the observations(i.e. data samples) in one cluster (see Figure2).
Figure1. Labeling after first initializing with kmeans
Figure2 Converging in one cluster after the stop condition ( final labeling)
here is the complete code in R:
library(keras)
K <- keras::backend()
library(MLmetrics)
library(aricode)
#' Clustering layer for Deep Embedded Clustering -----------------------------------------------------------
createAutoencoderModel <- function( numberOfUnitsPerLayer,
activation = 'relu',
initializer = 'glorot_uniform' )
{
numberOfEncodingLayers <- length( numberOfUnitsPerLayer ) - 1
# input of AE
inputs <- layer_input( shape = numberOfUnitsPerLayer[1],name = 'input' )
encoder <- inputs
# internal layers in encoder
for( i in seq_len( numberOfEncodingLayers - 1 ) )
{
encoder <- encoder %>%
layer_dense( units = numberOfUnitsPerLayer[i+1],
activation = activation, kernel_initializer = initializer )
}
# hidden layer
encoder <- encoder %>%
layer_dense( units = tail( numberOfUnitsPerLayer, 1 ) )
autoencoder <- encoder
# internal layers in decoder
for( i in seq( from = numberOfEncodingLayers, to = 2, by = -1 ) )
{
autoencoder <- autoencoder %>%
layer_dense( units = numberOfUnitsPerLayer[i],
activation = activation, kernel_initializer = initializer )
}
# output
autoencoder <- autoencoder %>%
layer_dense( numberOfUnitsPerLayer[1], kernel_initializer = initializer, name = 'decoder' )
return( list(
autoencoderModel = keras_model( inputs = inputs, outputs = autoencoder ),
encoderModel = keras_model( inputs = inputs, outputs = encoder ) ) )
}
# Defination of Clustering layer ---------------------------------------------------------------------------
ClusteringLayer <- R6::R6Class( "ClusteringLayer",
inherit = KerasLayer,
lock_objects = FALSE,
public = list(
numberOfClusters = 10,
initialClusterWeights = NULL,
alpha = 1.0,
name = '',
initialize = function( numberOfClusters,
initialClusterWeights = NULL, alpha = 1.0, name = '' )
{
self$numberOfClusters <- numberOfClusters
self$initialClusterWeights <- initialClusterWeights
self$alpha <- alpha
self$name <- name
},
build = function( input_shape )
{
if( length( input_shape ) != 2 )
{
stop( paste0( "input_shape is not of length 2." ) )
}
self$clusters <- self$add_weight(
shape = list( self$numberOfClusters, input_shape[[2]] ),
initializer = 'glorot_uniform', name = 'clusters' )
if( ! is.null( self$initialClusterWeights ) )
{
self$set_weights( self$initialClusterWeights )
self$initialClusterWeights <- NULL
}
self$built <- TRUE
},
call = function( inputs, mask = NULL )
{
# Uses Student t-distribution (same as t-SNE)
# inputs are the variable containing the data, shape = ( numberOfSamples, numberOfFeatures )
K <- keras::backend()
q <- 1.0 / ( 1.0 + ( K$sum( K$square(
K$expand_dims( inputs, axis = 1L ) - self$clusters ), axis = 2L ) / self$alpha ) )
q <- q^( ( self$alpha + 1.0 ) / 2.0 )
q <- K$transpose( K$transpose( q ) / K$sum( q, axis = 1L ) )
return( q )
},
compute_output_shape = function( input_shape )
{
return( list( input_shape[[1]], self$numberOfClusters ) )
}
)
)
layer_clustering <- function( object,
numberOfClusters, initialClusterWeights = NULL,
alpha = 1.0, name = '' )
{
create_layer( ClusteringLayer, object,
list( numberOfClusters = numberOfClusters,
initialClusterWeights = initialClusterWeights,
alpha = alpha, name = name )
)
}
#' Deep embedded clustering (DEC) model class --------------------------------------------------------------
DeepEmbeddedClusteringModel <- R6::R6Class( "DeepEmbeddedClusteringModel",
inherit = NULL,
lock_objects = FALSE,
public = list(
numberOfUnitsPerLayer = NULL,
numberOfClusters = 10,
alpha = 1.0,
initializer = 'glorot_uniform',
convolutional = FALSE,
inputImageSize = NULL,
initialize = function( numberOfUnitsPerLayer,
numberOfClusters, alpha = 1.0, initializer = 'glorot_uniform',
convolutional = FALSE, inputImageSize = NULL )
{
self$numberOfUnitsPerLayer <- numberOfUnitsPerLayer
self$numberOfClusters <- numberOfClusters
self$alpha <- alpha
self$initializer <- initializer
self$convolutional <- convolutional
self$inputImageSize <- inputImageSize
ae <- createAutoencoderModel( self$numberOfUnitsPerLayer,
initializer = self$initializer )
self$autoencoder <- ae$autoencoderModel
self$encoder <- ae$encoderModel
# prepare DEC model
clusteringLayer <- self$encoder$output %>%
layer_clustering( self$numberOfClusters, name = "clustering" )
self$model <- keras_model( inputs = self$encoder$input, outputs = clusteringLayer )
},
pretrain = function( x, optimizer = 'adam', epochs = 200L, batchSize = 256L )
{
self$autoencoder$compile( optimizer = optimizer, loss = 'mse' )
self$autoencoder$fit( x, x, batch_size = batchSize, epochs = epochs )
},
loadWeights = function( weights )
{
self$model$load_weights( weights )
},
extractFeatures = function( x )
{
self$encoder$predict( x, verbose = 0 )
},
predictClusterLabels = function( x )
{
clusterProbabilities <- self$model$predict( x, verbose = 0 )
return( max.col( clusterProbabilities ) )
},
targetDistribution = function( q )
{
weight <- q^2 / colSums( q )
p <- t( t( weight ) / rowSums( weight ) )
return( p )
},
compile = function( optimizer = 'sgd', loss = 'kld', lossWeights = NULL )
{
self$model$compile( optimizer = optimizer, loss = loss, loss_weights = lossWeights )
},
fit = function( x, maxNumberOfIterations = 2e4, batchSize = 256L, tolerance = 1e-3, updateInterval = 10)
{
# Initialize clusters using k-means
km <- stats::kmeans( self$encoder$predict( x, verbose = 0 ),
centers = self$numberOfClusters, nstart = 20 )
currentPrediction <- km$cluster # fitted( km )
previousPrediction <- currentPrediction
self$model$get_layer( name = 'clustering' )$set_weights( list( km$centers ) )
# Deep clustering
loss <- 10000
index <- 0
indexArray <- 1:( dim( x )[1] )
for( i in seq_len( maxNumberOfIterations ) )
{
if( i %% updateInterval == 1 )
{
q <- self$model$predict( x, verbose = 0 )
p <- self$targetDistribution( q )
# Met stopping criterion
currentPrediction <- max.col( q )
plot(currentPrediction, col="blue")
title(main = 'Current prediction')
deltaLabel <- sum( currentPrediction != previousPrediction ) / length( currentPrediction )
cat( "Itr", i, ": ( out of", maxNumberOfIterations,
"): loss = [", unlist( loss ), "], deltaLabel =", deltaLabel,
", ACC= ", Accuracy(previousPrediction, currentPrediction),
", NMI= ", NMI(previousPrediction, currentPrediction), "\n", sep = ' ' )
previousPrediction <- currentPrediction
if( i > 1 && deltaLabel < tolerance )
{
print('Reached tolerance threshold. Stopping training......')
break
}
}
# train on batch
batchIndices <- indexArray[( index * batchSize + 1 ):min( ( index + 1 ) * batchSize, dim( x )[1] )]
loss <- self$model$train_on_batch( x = x[batchIndices,], y = p[batchIndices,] )
if( ( index + 1 ) * batchSize + 1 <= dim( x )[1] )
{
index <- index + 1
} else {
index <- 0
}
}
return( currentPrediction )
}
)
)
# loading dataset ---------------------------------------------------------------------------------------------
fmnist <- dataset_fashion_mnist()
numberOfTrainingData <- length( fmnist$train$y )
numberOfTestingData <- length( fmnist$test$y )
numberOfPixels <- prod( dim( fmnist$test$x[1,,] ) )
3
fmnist$train$xreshaped <- array_reshape( fmnist$train$x,
dim = c( numberOfTrainingData, numberOfPixels ), order = "C" )
fmnist$test$xreshaped <- array_reshape( fmnist$test$x,
dim = c( numberOfTestingData, numberOfPixels ), order = "C" )
x <- rbind( fmnist$test$xreshaped, fmnist$train$xreshaped )/255.0
y <- c( fmnist$test$y, fmnist$train$y )
numberOfClusters <- length( unique( fmnist$train$y ) )
initializer <- initializer_variance_scaling(
scale = 1/3, mode = 'fan_in', distribution = 'uniform' )
pretrainOptimizer <- optimizer_sgd( lr = 1.0, momentum = 0.9 )
decModel <- DeepEmbeddedClusteringModel$new(
numberOfUnitsPerLayer = c( numberOfPixels, 32, 32, 256, 10 ),
numberOfClusters = numberOfClusters, initializer = initializer )
decModel$pretrain( x = x, optimizer = optimizer_sgd( lr = 1.0, momentum = 0.9 ),
epochs = 10L, batchSize = 256L )
decModel$compile( optimizer = optimizer_sgd( lr = 1.0, momentum = 0.9 ), loss = 'kld' )
yPredicted <- decModel$fit( x, maxNumberOfIterations = 2e4, batchSize = 256,
tolerance = 1e-3, updateInterval = 10 )
Train on 70000 samples
Epoch 1/10
70000/70000 [==============================] - 4s 60us/sample - loss: 0.0795
Epoch 2/10
70000/70000 [==============================] - 3s 45us/sample - loss: 0.0651
Epoch 3/10
70000/70000 [==============================] - 3s 46us/sample - loss: 0.0470
Epoch 4/10
70000/70000 [==============================] - 3s 45us/sample - loss: 0.0430
Epoch 5/10
70000/70000 [==============================] - 3s 45us/sample - loss: 0.0402
Epoch 6/10
70000/70000 [==============================] - 3s 45us/sample - loss: 0.0359
Epoch 7/10
70000/70000 [==============================] - 3s 45us/sample - loss: 0.0345
Epoch 8/10
70000/70000 [==============================] - 3s 45us/sample - loss: 0.0337
Epoch 9/10
70000/70000 [==============================] - 3s 45us/sample - loss: 0.0326
Epoch 10/10
70000/70000 [==============================] - 3s 45us/sample - loss: 0.0311
<tensorflow.python.keras.callbacks.History>
Figure 3 Training with 10 epochs
Itr 1 : ( out of 20000 ): loss = [ 10000 ], deltaLabel = 0 , ACC= 1 , NMI= 1
Itr 11 : ( out of 20000 ): loss = [ 1.02756 ], deltaLabel = 0.8403571 , ACC= 0.1596429 , NMI= 0.2638058
Itr 21 : ( out of 20000 ): loss = [ 1.016267 ], deltaLabel = 0.3924 , ACC= 0.6076 , NMI= 0
Itr 31 : ( out of 20000 ): loss = [ 1.467916 ], deltaLabel = 0 , ACC= 1 , NMI= NaN
[1] "Reached tolerance threshold. Stopping training......"
Figure 4 fitting the DEC model
Which it was accordingly respect to Figure 5 and Figure 6 in Python
Figure 5 Initializing the labels by kmeans
Figure 6 final labeling after the model coverage (x-axis is the samples and y-axis is the labels)
Could you please let me know why this is happening?
I have tried other loss functions, still, the similar phenomena happen (for example "categorical_crossentropy")
Information about the machine:
Python 3.7 lunched by Spyder 4.1
Rstodio Version 1.2.5033
Dataset "mnist" for both implementations
The implementation in Python: https://www.dropbox.com/s/ii3k7rklz7z6446/DEC_original.py?dl=0
I want to find intersection of two spheres in R(which is a circle) and find max and min of coordinates on that circle.
The spheres are :
S1: x^2+y^2+z^2=16
S2: (x+1)^2+(y+1)^2+(z+1)^2=27
library(rgl)
s1 =spheres3d(x = 0, y = 0, z = 0, radius = 4)
s2 =spheres3d(x = -1, y = -1, z = -1, radius = sqrt(27))
I think the plane which the circle is on that will be: ( is there any way that R can find this from S1&S2?).
P1: x+y+z=4
So now I have a maximization problem (P1 subject to S1 and S2): So I did this code:
eval_f <- function( x ) {
return( list( "objective" = (x[1]+x[2]+x[3])-4,
"gradient" = c(1,1,1) ))}
# constraint functions
eval_g_eq <- function( x ) {
constr <- cbind(c( (x[1]+1)^2 + (x[2]+1)^2 + (x[3]+1)^2 - 27) , c(x[1]^2+x[2]^2+x[3]^2-16))
grad <- cbind ( c( 2.0*(x[1]+1),
2.0*(x[2]+1),
2.0*(x[3]+1)
),c(2*x[1],2*x[2],2*x[3]) )
return( list( "constraints"=constr, "jacobian"=grad ) )
}
# initial values
x0 <- c( 0, 0, 0 )
local_opts <- list( "algorithm" = "NLOPT_LD_MMA",
"xtol_rel" = 1.0e-7 )
opts <- list( "algorithm" = "NLOPT_LD_AUGLAG",
"xtol_rel" = 1.0e-7,
"maxeval" = 1000,
"local_opts" = local_opts )
res <- nloptr( x0=x0,
eval_f=eval_f,eval_g_eq=eval_g_eq,
opts=opts)
print( res )
Link to the manual - nloptr function.
But I think there is a mistake!
I am using optim() with the Nelder-Mead and BFGS to fit a rather
complicated function with 4 parameter
initial <- c(dep=2, z0=2, na=6, zjoint=5)
The function to be minimised is the sum of squares of the function and
an observed wind profile (functions can be seen below). I do this
individually for about 2000 wind profiles, so I end up with a
distribution for each parameter.
The function (wpLELDefault) has box bounds for the parameter,
0 <= dep, z0, na, zjoint
28 >= dep, z0, zjoint
but also the condition that
dep + z0 < 28
now the function wpLELDefault() is implemented in such a way, that it
returns NA if the parameter are out of the allowed range.
If I use Nelder-Mead the parameter distribution is very sensitive to the initial values for optim() and in a majority of cases
ending at the extreme sides or having a rough distribution with many spikes.
BFGS works much better (smoother parameter value distribution), but does seem to have often problems with the NA values, consequently not being able to fit many wind profiles.
Using L-BFGS-B with bounds poses the problem on how to specify the
non-box condition for dep+z0.
So my question:
What is the best way to approach this problem?
Are there more robust optimization routines to NA values returned by the function?
Which ones in R allow to specify non-box bounds? I would prefer a function which deals gracefully with returned NAs as I also want to fit another function with more complex bounds.
I looked at the CRAN Task View Optimization and Mathematical Programming, but I could not find anything (I must admit, my knowledge at the issue of optimization is rather limited).
The function wpLELDefault
wpLELDefault <- function(
z,
ua,
dep,
z0,
na, # = 7,
zjoint,
h, # = 28,
za, # = 37,
z0sol,# = 0.001,
noU = FALSE,
check = TRUE
){
vk <- 0.41
ok <- ifelse(
check,
parameterOK(
z = z,
ua = ua,
dep = dep,
z0 = z0,
na = na,
zjoint = zjoint,
h = h,
za = za,
z0sol = z0sol
),
TRUE
)
if (!isTRUE(ok)) {
stop(ok)
}
ustar <- ua * vk / log( (za - dep) / z0)
z0h <- z0 * exp( -6.27 * vk * ( ustar^(1/3) ) )
uzjoint <- (ustar / vk) * log( (h - dep)/z0 ) * exp( - na * (1 - zjoint/h ) )
ustarsol <- ifelse(
(zjoint == 0),
as.numeric(NA),
uzjoint * vk / log( zjoint / z0sol )
)
##
result <- list(
z = NA,
u = NA,
u.onlyTop = NA
)
if (!noU) {
result$z <- as.numeric(z)
##
result$u <- as.numeric(
sapply(
z,
function(z) {
if (z >= h) {
u <- ( ustar/vk ) * log( (z-dep) / z0 )
} else if (z >= zjoint) {
uh <- ( ustar/vk ) * log( (h-dep) / z0 )
u <- uh * exp( -na*(1-(z/h)) )
} else if (z >= 0) {
u <- ( ustarsol/vk ) * log( (z ) / z0sol )
} else {
u <- NA
}
return(u)
}
)
)
names(result$u) <- paste0("h", z)
##
result$u.onlyTop = as.numeric(
sapply(
z,
function(z) {
zd <- ((z-dep) / z0)
if (zd < 0){
u <- NA
} else {
u <- ( ustar/vk ) * log( (z-dep) / z0 )
}
if (!is.na(u)) {
if (u < 0) {
u <- NA
}
}
return(u)
}
)
)
}
##
result$parametrization <- "default"
result$dep <- as.numeric(dep)
result$z0 <- as.numeric(z0)
result$na <- as.numeric(na)
result$zjoint <- as.numeric(zjoint)
result$h <- as.numeric(h)
result$za <- as.numeric(za)
result$z0sol <- as.numeric(z0sol)
result$vk <- as.numeric(vk)
result$ua <- as.numeric(ua)
result$ustar <- as.numeric(ustar)
result$z0h <- as.numeric(z0h)
result$uzjoint <- as.numeric(uzjoint)
result$ustarsol <- as.numeric(ustarsol)
##
result$noU <- noU
result$check <- check
##
class(result) <- c("wpLEL")
return(result)
}
The function fitOptim.wpLEL.default.single
fitOptim.wpLEL.default.single <- function(
z,
u,
LAI,
initial = c(dep=25, z0=0.8*28, na=9, zjoint=0.2*2),
h = 28,
za = 37,
z0sol = 0.001,
...
) {
## Function to be minimised
wpLELMin <- function(par, z, u, ua, h, za, z0sol) {
if (
isTRUE(
parameterOK(
z = z,
ua = ua,
dep = par[1], # par$dep,
z0 = par[2], # par$z0,
na = par[3], # par$na,
zjoint = par[4], # par$zjoint
h = h,
za = za,
z0sol = z0sol
)
)
) {
p <- wpLELDefault(
z = z,
ua = ua,
dep = par[1], # par$dep,
z0 = par[2], # par$z0,
na = par[3], # par$na,
zjoint = par[4], # par$zjoint
h = h,
za = za,
z0sol = z0sol,
check = FALSE
)
result <- sum( ( (p$u - u)^2 ) / length(u) )
} else {
result <- NA
}
return( result )
}
ua <- u[length(u)]
result <- list()
result$method <- "fitOptim.wpLEL.default.single"
result$initial <- initial
result$dot <- list(...)
result$z <- z
result$u <- u
result$fit <- optim(
par = c(
initial["dep"],
initial["z0"],
initial["na"],
initial["zjoint"]
),
fn = wpLELMin,
z = z,
u = u,
ua = ua,
h = h,
za = za,
z0sol = z0sol,
...
)
result$wp <- wpLELDefault(
z = z,
ua = ua,
dep = result$fit$par["dep"],
z0 = result$fit$par["z0"],
na = result$fit$par["na"],
zjoint = result$fit$par["zjoint"],
h = h,
za = za,
z0sol = z0sol
)
class(result) <- c(class(result), "wpLELFit")
return(result)
}
Mu is distributed as N(0,1) and sig^2 is distributed as IGamma(a,b) with a = 1, b = 2. I'm trying to create a couple of graphs (histograms, scatterplots, ACF, PACF) but keep getting error messages of "Error in xy.coords(x, y, xlabel, ylabel, log) :
'x' and 'y' lengths differ"
It's probably a stupid question but I'm new to r. This is the program I've ran so far
# Gibbs sampler, MODEL 2: semi-conjugate normal model
#
# Density of inverse gamma distribution
#dinvgamma = function( x, shape, rate )
{
exp( shape * log( rate ) - lgamma( shape ) -( shape+1)*log(x) - rate/x )
}
nn = 200
yy = rnorm( nn, 1, 4 )
mu.0 = 1.0
sig2.0 = 4^2
a = 2
b = 1
M = 10000
mu = rep( 0, 10000 )
sig2 = rep( 1, 10000 )
mean.y = mean( yy )
var.y = var( yy )
mu[1] = mean.y
sig2[1] = var.y
for( ii in 2:M ) {
mu.star <- ((1/sig2.0)*mu.0 + (nn/sig2 [ii-1])*mean(yy))/((1/sig2.0)+(nn/sig2 [ii- 1]))
sig2.star <- 1/((1/sig2.0)+(nn/sig2[ii-1]))
# sample mu from its full cond.
#
mu[ii] = rnorm( 1, mu.star, sqrt( sig2.star ))
a.star <- a + (nn/2)
b.star <- (sum((yy-mu [ii])^2)/2) + b
# sample sig2 from its full cond.
#
sig2[ii] = 1 / rgamma( 1, a.star, rate = b.star )
## print( c( mu[ii], sig2[ii] ))
}
mu <- mu[9001:length(mu)]
sig2 <- sig2[9001:length(mu)]
#
# Time series
#
par(mfrow=c(2,2))
plot( 1:M, mu, type="l", xlab="Iteration", xlim=c(0, 50), ylim=c(0, 100),
main= "Mu" )
hist( mu, prob = T )
x = seq( min(mu), max( mu ), len=200 )
lines( x, dnorm( x, mean=mu.0, sd=sqrt(sig2.0) ), col=2, lwd=2)
acf( mu )
pacf( mu )
#++++++++++++++++++++++++++++++++++
par(new=TRUE)
par(mfrow=c(2,2))
plot( 1:M, sig2, type="l", xlab="Iteration", ylab="sig^2",xlim=c(0, 50), ylim=c(0, 100), main= "sig^2" )
hist( sig2, prob = T )
x = seq( 0, max( sig2 ), len=200 )
lines( x, dinvgamma( x, shape=a, rate=b ), col=2, lwd=2)
acf( sig2 )
pacf( sig2 )
par(new=TRUE)
plot(mu, sig2, main="Scatterplot Example", xlim=c(0, 50), ylim=c(0, 100),xlab="mu ", ylab="sig2 ", pch=19)
Any help at all would be greatly appreciated. I'm under no illusion that my code is efficient or even approaching it so any changes you may deem necessary please feel free to tell me
Your problem seems to be coming from redefining these two variables:
mu <- mu[9001:length(mu)]
sig2 <- sig2[9001:length(mu)]
I commented them out and the script ran fine producing the plot (presumably) as desired. Maybe take a look at how you are redefining mu and sig2
Background
Using R to predict the next values in a series.
Problem
The following code generates and plots a model for a curve with some uniform noise:
slope = 0.55
offset = -0.5
amplitude = 0.22
frequency = 3
noise = 0.75
x <- seq( 0, 200 )
y <- offset + (slope * x / 100) + (amplitude * sin( frequency * x / 100 ))
yn <- y + (noise * runif( length( x ) ))
gam.object <- gam( yn ~ s( x ) + 0 )
plot( gam.object, col = rgb( 1.0, 0.392, 0.0 ) )
points( x, yn, col = rgb( 0.121, 0.247, 0.506 ) )
The model reveals the trend, as expected. The trouble is predicting subsequent values:
p <- predict( gam.object, data.frame( x=201:210 ) )
The predictions do not look correct when plotted:
df <- data.frame( fit=c( fitted( gam.object ), p ) )
plot( seq( 1:211 ), df[,], col="blue" )
points( yn, col="orange" )
The predicted values (from 201 onwards) appear to be too low.
Questions
Are the predicted values, as shown, actually the most accurate predictions?
If not, how can the accuracy be improved?
What is a better way to concatenate the two data sets (fitted.values( gam.object ) and p)?
The simulated data is weird, because all the errors you add to the "true" y are greater than 0. (runif creates numbers on [0,1], not [-1,1].)
The problem disappears when an intercept term in the model is allowed.
For example:
gam.object2 <- gam( yn ~ s( x ))
p2 <- predict( gam.object2, data.frame( x=201:210 ))
points( 1:211, c( fitted( gam.object2 ), p2), col="green")
The reason for the systematic underestimation in the model without intercept could be that gam uses a sum-to-zero constraint on the estimated smooth functions. I think point 2 answers your first and second questions.
Your third question needs clarification because a gam-object is not a data.frame. The two data types do not mix.
A more complete example:
slope = 0.55
amplitude = 0.22
frequency = 3
noise = 0.75
x <- 1:200
y <- (slope * x / 100) + (amplitude * sin( frequency * x / 100 ))
ynoise <- y + (noise * runif( length( x ) ))
gam.object <- gam( ynoise ~ s( x ) )
p <- predict( gam.object, data.frame( x = 1:210 ) )
plot( p, col = rgb( 0, 0.75, 0.2 ) )
points( x, ynoise, col = rgb( 0.121, 0.247, 0.506 ) )
points( fitted( gam.object ), col = rgb( 1.0, 0.392, 0.0 ) )