Why the Deep embedding clustering (DEC) with R implementation results one cluster? - r

Speaking briefly, I faced a strange performance difference in equal implementations of Deep embedded clustering (DEC) in R which I included links of implementation in the following.
My question,
According to the following results and figures( the R implementation is included), the code in R coverages in the stop condition by labeling all the observations(i.e. data samples) in one cluster (see Figure2).
Figure1. Labeling after first initializing with kmeans
Figure2 Converging in one cluster after the stop condition ( final labeling)
here is the complete code in R:
library(keras)
K <- keras::backend()
library(MLmetrics)
library(aricode)
#' Clustering layer for Deep Embedded Clustering -----------------------------------------------------------
createAutoencoderModel <- function( numberOfUnitsPerLayer,
activation = 'relu',
initializer = 'glorot_uniform' )
{
numberOfEncodingLayers <- length( numberOfUnitsPerLayer ) - 1
# input of AE
inputs <- layer_input( shape = numberOfUnitsPerLayer[1],name = 'input' )
encoder <- inputs
# internal layers in encoder
for( i in seq_len( numberOfEncodingLayers - 1 ) )
{
encoder <- encoder %>%
layer_dense( units = numberOfUnitsPerLayer[i+1],
activation = activation, kernel_initializer = initializer )
}
# hidden layer
encoder <- encoder %>%
layer_dense( units = tail( numberOfUnitsPerLayer, 1 ) )
autoencoder <- encoder
# internal layers in decoder
for( i in seq( from = numberOfEncodingLayers, to = 2, by = -1 ) )
{
autoencoder <- autoencoder %>%
layer_dense( units = numberOfUnitsPerLayer[i],
activation = activation, kernel_initializer = initializer )
}
# output
autoencoder <- autoencoder %>%
layer_dense( numberOfUnitsPerLayer[1], kernel_initializer = initializer, name = 'decoder' )
return( list(
autoencoderModel = keras_model( inputs = inputs, outputs = autoencoder ),
encoderModel = keras_model( inputs = inputs, outputs = encoder ) ) )
}
# Defination of Clustering layer ---------------------------------------------------------------------------
ClusteringLayer <- R6::R6Class( "ClusteringLayer",
inherit = KerasLayer,
lock_objects = FALSE,
public = list(
numberOfClusters = 10,
initialClusterWeights = NULL,
alpha = 1.0,
name = '',
initialize = function( numberOfClusters,
initialClusterWeights = NULL, alpha = 1.0, name = '' )
{
self$numberOfClusters <- numberOfClusters
self$initialClusterWeights <- initialClusterWeights
self$alpha <- alpha
self$name <- name
},
build = function( input_shape )
{
if( length( input_shape ) != 2 )
{
stop( paste0( "input_shape is not of length 2." ) )
}
self$clusters <- self$add_weight(
shape = list( self$numberOfClusters, input_shape[[2]] ),
initializer = 'glorot_uniform', name = 'clusters' )
if( ! is.null( self$initialClusterWeights ) )
{
self$set_weights( self$initialClusterWeights )
self$initialClusterWeights <- NULL
}
self$built <- TRUE
},
call = function( inputs, mask = NULL )
{
# Uses Student t-distribution (same as t-SNE)
# inputs are the variable containing the data, shape = ( numberOfSamples, numberOfFeatures )
K <- keras::backend()
q <- 1.0 / ( 1.0 + ( K$sum( K$square(
K$expand_dims( inputs, axis = 1L ) - self$clusters ), axis = 2L ) / self$alpha ) )
q <- q^( ( self$alpha + 1.0 ) / 2.0 )
q <- K$transpose( K$transpose( q ) / K$sum( q, axis = 1L ) )
return( q )
},
compute_output_shape = function( input_shape )
{
return( list( input_shape[[1]], self$numberOfClusters ) )
}
)
)
layer_clustering <- function( object,
numberOfClusters, initialClusterWeights = NULL,
alpha = 1.0, name = '' )
{
create_layer( ClusteringLayer, object,
list( numberOfClusters = numberOfClusters,
initialClusterWeights = initialClusterWeights,
alpha = alpha, name = name )
)
}
#' Deep embedded clustering (DEC) model class --------------------------------------------------------------
DeepEmbeddedClusteringModel <- R6::R6Class( "DeepEmbeddedClusteringModel",
inherit = NULL,
lock_objects = FALSE,
public = list(
numberOfUnitsPerLayer = NULL,
numberOfClusters = 10,
alpha = 1.0,
initializer = 'glorot_uniform',
convolutional = FALSE,
inputImageSize = NULL,
initialize = function( numberOfUnitsPerLayer,
numberOfClusters, alpha = 1.0, initializer = 'glorot_uniform',
convolutional = FALSE, inputImageSize = NULL )
{
self$numberOfUnitsPerLayer <- numberOfUnitsPerLayer
self$numberOfClusters <- numberOfClusters
self$alpha <- alpha
self$initializer <- initializer
self$convolutional <- convolutional
self$inputImageSize <- inputImageSize
ae <- createAutoencoderModel( self$numberOfUnitsPerLayer,
initializer = self$initializer )
self$autoencoder <- ae$autoencoderModel
self$encoder <- ae$encoderModel
# prepare DEC model
clusteringLayer <- self$encoder$output %>%
layer_clustering( self$numberOfClusters, name = "clustering" )
self$model <- keras_model( inputs = self$encoder$input, outputs = clusteringLayer )
},
pretrain = function( x, optimizer = 'adam', epochs = 200L, batchSize = 256L )
{
self$autoencoder$compile( optimizer = optimizer, loss = 'mse' )
self$autoencoder$fit( x, x, batch_size = batchSize, epochs = epochs )
},
loadWeights = function( weights )
{
self$model$load_weights( weights )
},
extractFeatures = function( x )
{
self$encoder$predict( x, verbose = 0 )
},
predictClusterLabels = function( x )
{
clusterProbabilities <- self$model$predict( x, verbose = 0 )
return( max.col( clusterProbabilities ) )
},
targetDistribution = function( q )
{
weight <- q^2 / colSums( q )
p <- t( t( weight ) / rowSums( weight ) )
return( p )
},
compile = function( optimizer = 'sgd', loss = 'kld', lossWeights = NULL )
{
self$model$compile( optimizer = optimizer, loss = loss, loss_weights = lossWeights )
},
fit = function( x, maxNumberOfIterations = 2e4, batchSize = 256L, tolerance = 1e-3, updateInterval = 10)
{
# Initialize clusters using k-means
km <- stats::kmeans( self$encoder$predict( x, verbose = 0 ),
centers = self$numberOfClusters, nstart = 20 )
currentPrediction <- km$cluster # fitted( km )
previousPrediction <- currentPrediction
self$model$get_layer( name = 'clustering' )$set_weights( list( km$centers ) )
# Deep clustering
loss <- 10000
index <- 0
indexArray <- 1:( dim( x )[1] )
for( i in seq_len( maxNumberOfIterations ) )
{
if( i %% updateInterval == 1 )
{
q <- self$model$predict( x, verbose = 0 )
p <- self$targetDistribution( q )
# Met stopping criterion
currentPrediction <- max.col( q )
plot(currentPrediction, col="blue")
title(main = 'Current prediction')
deltaLabel <- sum( currentPrediction != previousPrediction ) / length( currentPrediction )
cat( "Itr", i, ": ( out of", maxNumberOfIterations,
"): loss = [", unlist( loss ), "], deltaLabel =", deltaLabel,
", ACC= ", Accuracy(previousPrediction, currentPrediction),
", NMI= ", NMI(previousPrediction, currentPrediction), "\n", sep = ' ' )
previousPrediction <- currentPrediction
if( i > 1 && deltaLabel < tolerance )
{
print('Reached tolerance threshold. Stopping training......')
break
}
}
# train on batch
batchIndices <- indexArray[( index * batchSize + 1 ):min( ( index + 1 ) * batchSize, dim( x )[1] )]
loss <- self$model$train_on_batch( x = x[batchIndices,], y = p[batchIndices,] )
if( ( index + 1 ) * batchSize + 1 <= dim( x )[1] )
{
index <- index + 1
} else {
index <- 0
}
}
return( currentPrediction )
}
)
)
# loading dataset ---------------------------------------------------------------------------------------------
fmnist <- dataset_fashion_mnist()
numberOfTrainingData <- length( fmnist$train$y )
numberOfTestingData <- length( fmnist$test$y )
numberOfPixels <- prod( dim( fmnist$test$x[1,,] ) )
3
fmnist$train$xreshaped <- array_reshape( fmnist$train$x,
dim = c( numberOfTrainingData, numberOfPixels ), order = "C" )
fmnist$test$xreshaped <- array_reshape( fmnist$test$x,
dim = c( numberOfTestingData, numberOfPixels ), order = "C" )
x <- rbind( fmnist$test$xreshaped, fmnist$train$xreshaped )/255.0
y <- c( fmnist$test$y, fmnist$train$y )
numberOfClusters <- length( unique( fmnist$train$y ) )
initializer <- initializer_variance_scaling(
scale = 1/3, mode = 'fan_in', distribution = 'uniform' )
pretrainOptimizer <- optimizer_sgd( lr = 1.0, momentum = 0.9 )
decModel <- DeepEmbeddedClusteringModel$new(
numberOfUnitsPerLayer = c( numberOfPixels, 32, 32, 256, 10 ),
numberOfClusters = numberOfClusters, initializer = initializer )
decModel$pretrain( x = x, optimizer = optimizer_sgd( lr = 1.0, momentum = 0.9 ),
epochs = 10L, batchSize = 256L )
decModel$compile( optimizer = optimizer_sgd( lr = 1.0, momentum = 0.9 ), loss = 'kld' )
yPredicted <- decModel$fit( x, maxNumberOfIterations = 2e4, batchSize = 256,
tolerance = 1e-3, updateInterval = 10 )
Train on 70000 samples
Epoch 1/10
70000/70000 [==============================] - 4s 60us/sample - loss: 0.0795
Epoch 2/10
70000/70000 [==============================] - 3s 45us/sample - loss: 0.0651
Epoch 3/10
70000/70000 [==============================] - 3s 46us/sample - loss: 0.0470
Epoch 4/10
70000/70000 [==============================] - 3s 45us/sample - loss: 0.0430
Epoch 5/10
70000/70000 [==============================] - 3s 45us/sample - loss: 0.0402
Epoch 6/10
70000/70000 [==============================] - 3s 45us/sample - loss: 0.0359
Epoch 7/10
70000/70000 [==============================] - 3s 45us/sample - loss: 0.0345
Epoch 8/10
70000/70000 [==============================] - 3s 45us/sample - loss: 0.0337
Epoch 9/10
70000/70000 [==============================] - 3s 45us/sample - loss: 0.0326
Epoch 10/10
70000/70000 [==============================] - 3s 45us/sample - loss: 0.0311
<tensorflow.python.keras.callbacks.History>
Figure 3 Training with 10 epochs
Itr 1 : ( out of 20000 ): loss = [ 10000 ], deltaLabel = 0 , ACC= 1 , NMI= 1
Itr 11 : ( out of 20000 ): loss = [ 1.02756 ], deltaLabel = 0.8403571 , ACC= 0.1596429 , NMI= 0.2638058
Itr 21 : ( out of 20000 ): loss = [ 1.016267 ], deltaLabel = 0.3924 , ACC= 0.6076 , NMI= 0
Itr 31 : ( out of 20000 ): loss = [ 1.467916 ], deltaLabel = 0 , ACC= 1 , NMI= NaN
[1] "Reached tolerance threshold. Stopping training......"
Figure 4 fitting the DEC model
Which it was accordingly respect to Figure 5 and Figure 6 in Python
Figure 5 Initializing the labels by kmeans
Figure 6 final labeling after the model coverage (x-axis is the samples and y-axis is the labels)
Could you please let me know why this is happening?
I have tried other loss functions, still, the similar phenomena happen (for example "categorical_crossentropy")
Information about the machine:
Python 3.7 lunched by Spyder 4.1
Rstodio Version 1.2.5033
Dataset "mnist" for both implementations
The implementation in Python: https://www.dropbox.com/s/ii3k7rklz7z6446/DEC_original.py?dl=0

Related

How to calculate Bayes error rate in R for a data has two classes ?

I have this data that has two classes and 5000 observations, I generated this data as follow:
# sample from beta dist. and normal
betadata1 <- function(seed,n.obs,boun1,boun2,k1,k2,k3,s1,s2,s3 ) {
set.seed(seed)
U =runif(n.obs)
rand.samples = rep(NA,n.obs)
for(i in 1:n.obs){
if(U[i]< boun1){
rand.samples[i] = rbeta(1,k1,s1)
}else if(U[i]< boun2){
rand.samples[i] = rbeta(1,k2,s2)
}else{
rand.samples[i] = rbeta(1,k3,s3)
}
}
class = rep( 1, n.obs )
data = data.frame( V1 = class,V2 = rand.samples )
}
betadata2 <- function(seed, n.obs,k1,s1 ) {
set.seed(seed)
#Sample N random uniforms U
U =runif( n.obs)
rand.samples = rep(NA, n.obs)
for(i in 1: n.obs){
rand.samples[i] = rnorm(1,k1,s1)
}
class = rep( 2, n.obs )
data = data.frame( V1 = class,V2 = rand.samples )
}
############
mix.data1 = betadata1(893123,3500,1,1,.2,20,25,.2,1,1)
mix.data2= betadata2(1170022,1500,.5,.2)
data.gen.training = rbind(mix.data1,mix.data2)
mix.data11 = betadata1(102938,840,1,1,.2,20,25,.2,1,1)
mix.data22= betadata2(910710999,360,.5,.2)
data.gen.test = rbind(mix.data11,mix.data22)
My question is how to calculate the bayes error rate for this data as in the following plot? The two classes

Intersection of two spheres (maximization ) in R

I want to find intersection of two spheres in R(which is a circle) and find max and min of coordinates on that circle.
The spheres are :
S1: x^2+y^2+z^2=16
S2: (x+1)^2+(y+1)^2+(z+1)^2=27
library(rgl)
s1 =spheres3d(x = 0, y = 0, z = 0, radius = 4)
s2 =spheres3d(x = -1, y = -1, z = -1, radius = sqrt(27))
I think the plane which the circle is on that will be: ( is there any way that R can find this from S1&S2?).
P1: x+y+z=4
So now I have a maximization problem (P1 subject to S1 and S2): So I did this code:
eval_f <- function( x ) {
return( list( "objective" = (x[1]+x[2]+x[3])-4,
"gradient" = c(1,1,1) ))}
# constraint functions
eval_g_eq <- function( x ) {
constr <- cbind(c( (x[1]+1)^2 + (x[2]+1)^2 + (x[3]+1)^2 - 27) , c(x[1]^2+x[2]^2+x[3]^2-16))
grad <- cbind ( c( 2.0*(x[1]+1),
2.0*(x[2]+1),
2.0*(x[3]+1)
),c(2*x[1],2*x[2],2*x[3]) )
return( list( "constraints"=constr, "jacobian"=grad ) )
}
# initial values
x0 <- c( 0, 0, 0 )
local_opts <- list( "algorithm" = "NLOPT_LD_MMA",
"xtol_rel" = 1.0e-7 )
opts <- list( "algorithm" = "NLOPT_LD_AUGLAG",
"xtol_rel" = 1.0e-7,
"maxeval" = 1000,
"local_opts" = local_opts )
res <- nloptr( x0=x0,
eval_f=eval_f,eval_g_eq=eval_g_eq,
opts=opts)
print( res )
Link to the manual - nloptr function.
But I think there is a mistake!

Optimization with non-box bounds in R

I am using optim() with the Nelder-Mead and BFGS to fit a rather
complicated function with 4 parameter
initial <- c(dep=2, z0=2, na=6, zjoint=5)
The function to be minimised is the sum of squares of the function and
an observed wind profile (functions can be seen below). I do this
individually for about 2000 wind profiles, so I end up with a
distribution for each parameter.
The function (wpLELDefault) has box bounds for the parameter,
0 <= dep, z0, na, zjoint
28 >= dep, z0, zjoint
but also the condition that
dep + z0 < 28
now the function wpLELDefault() is implemented in such a way, that it
returns NA if the parameter are out of the allowed range.
If I use Nelder-Mead the parameter distribution is very sensitive to the initial values for optim() and in a majority of cases
ending at the extreme sides or having a rough distribution with many spikes.
BFGS works much better (smoother parameter value distribution), but does seem to have often problems with the NA values, consequently not being able to fit many wind profiles.
Using L-BFGS-B with bounds poses the problem on how to specify the
non-box condition for dep+z0.
So my question:
What is the best way to approach this problem?
Are there more robust optimization routines to NA values returned by the function?
Which ones in R allow to specify non-box bounds? I would prefer a function which deals gracefully with returned NAs as I also want to fit another function with more complex bounds.
I looked at the CRAN Task View Optimization and Mathematical Programming, but I could not find anything (I must admit, my knowledge at the issue of optimization is rather limited).
The function wpLELDefault
wpLELDefault <- function(
z,
ua,
dep,
z0,
na, # = 7,
zjoint,
h, # = 28,
za, # = 37,
z0sol,# = 0.001,
noU = FALSE,
check = TRUE
){
vk <- 0.41
ok <- ifelse(
check,
parameterOK(
z = z,
ua = ua,
dep = dep,
z0 = z0,
na = na,
zjoint = zjoint,
h = h,
za = za,
z0sol = z0sol
),
TRUE
)
if (!isTRUE(ok)) {
stop(ok)
}
ustar <- ua * vk / log( (za - dep) / z0)
z0h <- z0 * exp( -6.27 * vk * ( ustar^(1/3) ) )
uzjoint <- (ustar / vk) * log( (h - dep)/z0 ) * exp( - na * (1 - zjoint/h ) )
ustarsol <- ifelse(
(zjoint == 0),
as.numeric(NA),
uzjoint * vk / log( zjoint / z0sol )
)
##
result <- list(
z = NA,
u = NA,
u.onlyTop = NA
)
if (!noU) {
result$z <- as.numeric(z)
##
result$u <- as.numeric(
sapply(
z,
function(z) {
if (z >= h) {
u <- ( ustar/vk ) * log( (z-dep) / z0 )
} else if (z >= zjoint) {
uh <- ( ustar/vk ) * log( (h-dep) / z0 )
u <- uh * exp( -na*(1-(z/h)) )
} else if (z >= 0) {
u <- ( ustarsol/vk ) * log( (z ) / z0sol )
} else {
u <- NA
}
return(u)
}
)
)
names(result$u) <- paste0("h", z)
##
result$u.onlyTop = as.numeric(
sapply(
z,
function(z) {
zd <- ((z-dep) / z0)
if (zd < 0){
u <- NA
} else {
u <- ( ustar/vk ) * log( (z-dep) / z0 )
}
if (!is.na(u)) {
if (u < 0) {
u <- NA
}
}
return(u)
}
)
)
}
##
result$parametrization <- "default"
result$dep <- as.numeric(dep)
result$z0 <- as.numeric(z0)
result$na <- as.numeric(na)
result$zjoint <- as.numeric(zjoint)
result$h <- as.numeric(h)
result$za <- as.numeric(za)
result$z0sol <- as.numeric(z0sol)
result$vk <- as.numeric(vk)
result$ua <- as.numeric(ua)
result$ustar <- as.numeric(ustar)
result$z0h <- as.numeric(z0h)
result$uzjoint <- as.numeric(uzjoint)
result$ustarsol <- as.numeric(ustarsol)
##
result$noU <- noU
result$check <- check
##
class(result) <- c("wpLEL")
return(result)
}
The function fitOptim.wpLEL.default.single
fitOptim.wpLEL.default.single <- function(
z,
u,
LAI,
initial = c(dep=25, z0=0.8*28, na=9, zjoint=0.2*2),
h = 28,
za = 37,
z0sol = 0.001,
...
) {
## Function to be minimised
wpLELMin <- function(par, z, u, ua, h, za, z0sol) {
if (
isTRUE(
parameterOK(
z = z,
ua = ua,
dep = par[1], # par$dep,
z0 = par[2], # par$z0,
na = par[3], # par$na,
zjoint = par[4], # par$zjoint
h = h,
za = za,
z0sol = z0sol
)
)
) {
p <- wpLELDefault(
z = z,
ua = ua,
dep = par[1], # par$dep,
z0 = par[2], # par$z0,
na = par[3], # par$na,
zjoint = par[4], # par$zjoint
h = h,
za = za,
z0sol = z0sol,
check = FALSE
)
result <- sum( ( (p$u - u)^2 ) / length(u) )
} else {
result <- NA
}
return( result )
}
ua <- u[length(u)]
result <- list()
result$method <- "fitOptim.wpLEL.default.single"
result$initial <- initial
result$dot <- list(...)
result$z <- z
result$u <- u
result$fit <- optim(
par = c(
initial["dep"],
initial["z0"],
initial["na"],
initial["zjoint"]
),
fn = wpLELMin,
z = z,
u = u,
ua = ua,
h = h,
za = za,
z0sol = z0sol,
...
)
result$wp <- wpLELDefault(
z = z,
ua = ua,
dep = result$fit$par["dep"],
z0 = result$fit$par["z0"],
na = result$fit$par["na"],
zjoint = result$fit$par["zjoint"],
h = h,
za = za,
z0sol = z0sol
)
class(result) <- c(class(result), "wpLELFit")
return(result)
}

intersection of two step functions

I trying to determine the point (x,y) where two functions intersect. The functions are the step interpolation between sets of points. One function is weakly increasing (v1) and the other weakly decreasing (v2). I'm coding in R, but a general algorithm is also ok.
If it helps, this is to determine market equilibrium with sets of supply and demand points.
The length of the two vectors is different and their x's and y's will not be the same.
Some example data:
set.seed(4)
v1 = data.frame( y = cumsum( runif(10) ) ,
x = cumsum( runif(10) ) )
v2 = data.frame( y = 5-cumsum( runif(8) ) ,
x = cumsum( runif(8) ) )
plot(y=0,x=0,type="n",xlim=c(0,5),ylim=c(0,5),xlab="x",ylab="y")
lines( y=v1$y , x=v1$x , type="S" , col="blue" )
lines( y=v1$y , x=v1$x , type="p" , col="blue" )
lines( y=v2$y , x=v2$x , type="s" , col="red" )
lines( y=v2$y , x=v2$x , type="p" , col="red" )
In this example, the intersection is at (x=2.7275363 , y=2.510405), where the x is from v2 and y is from v1.
Thanks
As I was facing the same issue, but was dependent on speed. I used the wonderful Rcpp to speed the code up.
If anybody is interested, this is what I did:
library(dplyr) # for data manipulation only, not used for the algorithm!
library(ggplot2) # for data graphing only, not used for the algorithm!
# Load (i.e., Source the Cpp function)
Rcpp::sourceCpp("find_optimum.cpp")
# small helper function that plots the supply and demand as a step-function
plot_supply_demand <- function(supply, demand) {
supply_df <- supply %>%
bind_rows(data_frame(p = -Inf, q = 0)) %>%
arrange(p) %>%
mutate(agg_q = cumsum(q), side = "supply") %>%
bind_rows(data_frame(p = Inf, q = 0, agg_q = sum(supply$q), side = "supply"))
demand_df <- demand %>%
bind_rows(data_frame(p = Inf, q = 0)) %>%
arrange(desc(p)) %>%
mutate(agg_q = cumsum(q), side = "demand") %>%
bind_rows(data_frame(p = -Inf, q = 0, agg_q = sum(demand$q), side = "demand"))
ggplot(mapping = aes(x = p, y = agg_q, color = side)) +
geom_step(data = demand_df, direction = "vh") +
geom_step(data = supply_df)
}
# create two data_frames containing the disaggregated data (i.e., orders)
# by graphing the data, or by calculating it by hand we see the optimum at (10, 2)
supply_small = data_frame(p = c(8, 10),
q = c(1, 2))
demand_small = data_frame(p = c(12, 10, 8),
q = c(1, 1, 1))
plot_supply_demand(supply_small, demand_small) +
geom_point(aes(x = 10, y = 2), color = "red", size = 5)
find_optimum(supply_small$p, supply_small$q, demand_small$p, demand_small$q)
#> $price
#> [1] 10
#>
#> $quantity
#> [1] 2
Larger example
set.seed(12345678)
demand <- data_frame(p = runif(100, 80, 200), q = rnorm(100, 10, 2))
supply <- data_frame(p = runif(100, 0, 120), q = rnorm(100, 10, 2))
opt <- find_optimum(supply$p, supply$q, demand$p, demand$q)
opt
#> $price
#> [1] 102.5982
#>
#> $quantity
#> [1] 841.8772
plot_supply_demand(supply, demand) +
geom_point(aes(x = opt$price, y = opt$quantity), color = "red", size = 2)
To zoom in a bit on the optimum, we can use the following
plot_supply_demand(supply, demand) +
geom_point(aes(x = opt$price, y = opt$quantity), color = "red", size = 2) +
xlim(opt$price + c(-10, 10)) + ylim(opt$quantity + c(-50, 50))
#> Warning: Removed 92 rows containing missing values (geom_path).
#> Warning: Removed 93 rows containing missing values (geom_path).
Created on 2018-10-20 by the reprex package (v0.2.0).
Rcpp Function
And last but not least, the C++ function in find_optimum.cpp that does the heavy lifting:
#include <Rcpp.h>
#include <map>
// [[Rcpp::export]]
Rcpp::List find_optimum(Rcpp::NumericVector price_supply,
Rcpp::NumericVector quant_supply,
Rcpp::NumericVector price_demand,
Rcpp::NumericVector quant_demand) {
std::map<double, double> supply;
std::map<double, double> demand;
// fill the maps
for (int i = 0; i < price_supply.size(); ++i) {
supply[price_supply[i]] += quant_supply[i];
}
for (int i = 0; i < price_demand.size(); ++i) {
demand[price_demand[i]] += quant_demand[i];
}
if (supply.empty() || demand.empty())
return Rcpp::List::create(Rcpp::Named("price") = 0, Rcpp::Named("quantity") = 0);
auto sIt = supply.begin(), nextS = std::next(sIt, 1);
const auto endS = supply.end();
auto dIt = demand.rbegin(), nextD = std::next(dIt, 1);
const auto endD = demand.rend();
// quantity and prices at either side
double pS = sIt->first, pD = dIt->first;
double qS = 0, qD = 0;
// next prices
double nextPS = nextS->first, nextPD = nextD->first;
if (pD < pS)
return Rcpp::List::create(Rcpp::Named("price") = 0, Rcpp::Named("quantity") = 0);
// add the best price from each side!
qS += sIt->second;
qD += dIt->second;
while (pS < pD) {
if (nextS == endS && nextD == endD) {
pD = qD < qS ? pS : pD;
break;
}
while (qS <= qD && sIt != endS && nextS->first <= pD) {
++sIt;
++nextS;
pS = sIt->first;
qS += sIt->second;
}
if (sIt == endS) break;
if (nextD->first < pS) {
pD = qD < qS ? pS : pD;
break;
}
while (qD < qS && dIt != endD && nextD->first >= pS) {
++dIt;
++nextD;
pD = dIt->first;
qD += dIt->second;
}
if (dIt == endD) break;
}
double price = pD;
double vol = qS < qD ? qS : qD;
return Rcpp::List::create(Rcpp::Named("price") = price,
Rcpp::Named("quantity") = vol);
}
You're drawing your step lines differently in each case: v1 you change the vertical first, and then the horizontal (up and across), whereas for v2 you reverse the order (across then down). Assuming this is correct, then your intersection point will be at or immediately after a point in v1 where the next point along the axis is a v1 with a lower y coordinate. We can find that by doing:
v1$v <- 1
v2$v <- 2
v3 <- rbind(v1,v2)
v3 <- v3[order(v3$x),]
v3$diff <- c( diff(v3$y),0)
ind <- which(v3$diff < 0 & v3$v ==1)[1]
There are now two distinct cases - the intersection could be on the horizontal or vertical arm following this point from v1. It will be the former if the immediately preceeding v2 is higher than the v1 after our found one; otherwise it will be in the horizontal arm. This is clear if you draw it out - I'll try and attach an image if you don't see this.
previousV2 <- tail(which(v3$v[1:ind]==2),1)
nextV1 <- which(v3$v[-(1:ind)]==1)[1] + ind
if (v3$y[previousV2] > v3$y[nextV1]) {
x <- v3$x[ind+1]
y <- v3$y[nextV1]
} else {
x <- v3$x[ind]
y <- v3$y[previousV2]
}
Worryingly, this doesn't agree with your (x=2.7275363 , y=2.510405) answer, but when I plot it, mine appears on the intersection. So either: I haven't understood what you want; you've miscalculated; or there's a different scheme regarding the order of horizontal and vertical components. The above code should be adaptable to different schemes.
I seem to have something that works but it's a lot more complicated than i was expecting.
First, let me define a helper function
between <- function(x, a, b) {
if(missing(b)) {
if(length(a)==2) {
a<-t(a)
}
} else {
a <- unname(cbind(a,b))
}
a<-t(apply(a,1,sort))
a[,1] <= x & x <= a[,2]
}
this just helps to check if a number is between two others. Now I will embed the two data.frames to make sets of consecutive point pairs, then i check each possible combination for segments that overlap in just the right way. (It's important that v1 here is the "S" and v2 is the s.)
sa<-embed(as.matrix(v1[,c("x","y")]),2)
sz<-embed(as.matrix(v2[,c("x","y")]),2)
xx<-outer(1:nrow(sa), 1:nrow(sz), function(a,z)
(between(sa[a,2], sz[z,c(2,4)]) & between(sz[z,1], sa[a,c(1,3)])) *1
+ (between(sz[z,4], sa[a,c(2,4)]) & between(sa[a,3], sz[z,c(1,3)]))*2
)
Now xx contains the matching set of points, I just need to extract the correct coordinates depending on which type of intersection occurred.
i <- which(xx!=0, arr.ind=T)
int.pt <- if(nrow(i)>0 && ncol(i)==2) {
if(xx[i]==1) {
c(sz[i[2],1], sa[i[1],2])
} else if (xx[i]==2) {
c(sa[i[1],3], sz[i[2],4])
}
} else {
c(NA,NA)
}
#optionally plot intersection
#if (all(!is.na(int.pt))) {
# points(int.pt[1],int.pt[2], pch=20, col="black")
# abline(v=int.pt[1], h=int.pt[2], lty=2)
#}
Perhaps there is a better way, but at least you have another method that seems to work to compare answers with.
I had another think about the problem. A key issue is that I need to find the intersection within an optimisation routine, so it has to be fast. So, I came up with the following (included here in case others have to same problem in the future). It is a modified Bentley-Ottmann algorithm.
# create some data
supply = data.frame( p = cumsum( runif(1000) ) ,
q = cumsum( runif(1000) ) )
demand = data.frame( p = tail(supply,1)$p - cumsum( runif(1000) ) ,
q = cumsum( runif(1000) ) )
# create tables that identify coordinates of horizontal and vertical lines
demand.h = cbind( p = head(demand,-1)$p ,
q.lower = head(demand,-1)$q ,
q.upper = tail(demand,-1)$q )
supply.v = cbind( q = head(supply,-1)$q ,
p.lower = head(supply,-1)$p ,
p.upper = tail(supply,-1)$p )
demand.v = cbind( q = tail(demand,-1)$q ,
p.lower = tail(demand,-1)$p ,
p.upper = head(demand,-1)$p )
supply.h = cbind( p = tail(supply,-1)$p ,
q.lower = head(supply,-1)$q ,
q.upper = tail(supply,-1)$q )
# define a function
find.intersection = function( f.A , f.B ){
f.result = any( f.B[,2]<=f.A[1] & f.B[,3]>=f.A[1] &
f.A[2] <=f.B[,1] & f.A[3] >=f.B[,1] )
return( f.result )
}
# find the intersection
intersection.h = c( demand.h[ apply( demand.h ,
MARGIN=1 ,
FUN=find.intersection ,
supply.v ) , 1 ] ,
supply.v[ apply( supply.v ,
MARGIN=1 ,
FUN=find.intersection ,
demand.h ) , 1 ] )
intersection.v = c( supply.h[ apply( supply.h ,
MARGIN=1 ,
FUN=find.intersection ,
demand.v ) , 1 ] ,
demand.v[ apply( demand.v ,
MARGIN=1 ,
FUN=find.intersection ,
supply.h ) , 1 ] )
intersection = c( intersection.h , intersection.v )
# (optional) if you want to print the graph and intersection
plot(y=0,x=0,type="n",
xlim=c(intersection[2]-1,intersection[2]+1),
ylim=c(intersection[1]-1,intersection[1]+1),
xlab="q",ylab="p")
lines( y=supply$p , x=supply$q , type="S" , col="black" )
lines( y=supply$p , x=supply$q , type="p" , col="black" )
lines( y=demand$p , x=demand$q , type="s" , col="black" )
lines( y=demand$p , x=demand$q , type="p" , col="black" )
points(intersection[2],intersection[1], pch=20, col="red")
abline( v=intersection[2], h=intersection[1], lty=2 , col="red")

MC algorithm for a non-conjugate model

The model is Poisson likelihood and Gaussian prior. I worked out the posterior for the model and I think that I have it coded correctly but I'm having a lot of trouble trying to implement the algorithm. I know that it's just a simple matter of not defining my variables properly but I'm not seeing where the problems lie. The code that I have so far is:
# Poisson model
#
#
# Log of the unnormalized posterior density:
log.post.dens = function( theta, n, sum.y, mu0, sig0 )
{
alpha = (log.dpois(x, lamda=exp(theta)))*dnorm(x, mu0, sig0)
}
#++++++++++++++++++++++++++++++++++++++++++++++++++++++++
rw.sim = function( M, mu0, sig0, n, sum.y, sd.pd, theta.start )
{
# create theta array and initialize theta[1]
#
theta = rep( 0, M )
theta[1] = theta.start
acc.cnt = 0
for( ii in 2:M ) {
# Normal proposal distribution is centered at the current theta
#
theta.new = rnorm( 1, theta[ii-1], sd.pd )
log.alpha = log.post.dens( theta.new, n, sum.y, mu0, sig0 ) -
log.post.dens( theta[ii-1], n, sum.y, mu0, sig0 )
if( log.alpha > 0 || exp( log.alpha ) > runif( 1, 0, 1 ) )
{
theta[ii] = theta.new
acc.cnt = acc.cnt + 1
}
else
theta[ii] = theta[ii-1]
}
list( ac = acc.cnt, theta = theta )
}
#++++++++++++++++++++++++++++++++++++++++++++++++++++++++
n = 200
mu0 = log(10)
sig0 = 3
yy = rpois( n, exp( mu0 ))
sd.pd = 1
theta.start = mu0
M = 100000
print( paste("M =", M, " mu0 =", mu0, " sig0 =", sig0, "sd.pd =", sd.pd,
"start", theta.start ))
res = rw.sim( M, mu0, sig0, length(yy), sum( yy ), sd.pd, theta.start )
theta = res$theta
acc.rate = res$ac / M
corr = cor( theta[1:(M-1)], theta[2:M])
print( paste("acceptance rate =", acc.rate ))
print( paste("correlation =", corr ))
3
m = 1
if( m )
{
theta0 = theta
thin.const = 40
theta = theta[ seq( .1*length(theta), length(theta), thin.const )]
}
par( mfrow=c(2,2))
hist( theta, prob=T, breaks=32 )
x = seq( min( theta ), max( theta ), len=200 )
lines( x, dnorm( x, mu0, sig0 ), col = 2)
plot( theta, type=ā€™lā€™ )
acf( theta )
##pacf( theta )
#++++++++++++++++++++++++++++
#
# Posterior predictive density for data on a grid
#
hist( yy, prob=T)
lim1 = max(yy) + 2
xx = 0:lim1
ppd = rep( 0, lim1+1 )
for( ii in 1:(lim1+1) )
{
ppd[ii] = (1/M)*sum(yy)*((log.dpois(x, lamda=exp(theta)))*dnorm(x, mu0, sig0))
}
points( xx+.5, ppd, col=2 )
lines( xx+.5, ppd, col=2 )
As I said it's my defining of parameters that's off but I'm not sure how to fix it.

Resources