viterbi decoding of matrices - r

Aim : global decoding of hidden sequence using matrix. The step-by-step code is below:
# equilibrium probs
equil=function(P){
e=eigen(t(P))$vectors[,1]
e/sum(e)
}
# simulate hidden sequence
hssim=function(n,lambda)
{
r=dim(lambda)[1]
states=1:r
s=vector("numeric",n)
pi.lam=equil(lambda)
s[1]=sample(states,1,FALSE,pi.lam)
for (t in 2:n) {
s[t]=sample(states,1,FALSE,prob=lambda[s[t-1],])
}
s
}
# simulate observed sequence
obssim=function(s,P)
{
n=length(s)
r=dim(P)[2]
q=dim(P)[1]
states=1:q
obs=vector("numeric",n)
for (t in 1:n) {
obs[t]=sample(states,1,FALSE,prob=P[,s[t]])
}
obs
}
lambda=rbind(c(0.999,0.0002,0.0003,0.0004,0.0001),c(0.001,0.9930,0.010,0.004,0.002),c(0.0004,0.0020,0.9900,0.0075,0.0001),c(0.0007,0.0030,0.0003,0.9940,0.0020),c(0.0010,0.0005,0.0040,0.0020,0.9925))
s=hssim(10000,lambda)
P=cbind(c(0.6,0.1,0.1,0.2),c(0.25,0.3,0.2,0.25),c(0.1,0.6,0.2,0.1),c(0.25,0.2,0.4,0.1),c(0.5,0.2,0.2,0.1))
obs=obssim(s,P)
# optional - converting to/from another alphabet...
letters=c("a","c","g","t")
numbers=1:4
convert=function(x,frm,to)
{
to[match(x,frm)]
}
obslets=convert(obs,numbers,letters)
# estimate emmision probs from observed and hidden sequence
Pest=function(s,obs,r,q)
{
est=matrix(0,nrow=q,ncol=r)
for (i in 1:r) {
est[,i]=table(obs[s==i])
est[,i]=est[,i]/sum(est[,i])
}
est
}
phat=Pest(s,obs,5,4)
# estimate lambda from hidden sequence
lambdaest=function(s,r)
{
n=length(s)
est=matrix(0,ncol=r,nrow=r)
for (t in 2:n) {
est[s[t-1],s[t]]=est[s[t-1],s[t]]+1
}
for (i in 1:r) {
est[i,]=est[i,]/sum(est[i,])
}
est
}
lamhat=lambdaest(s,5)
# global decoding algorithm
global=function(obs,lambda,P)
{
r=dim(lambda)[1]
q=dim(P)[1]
n=length(obs)
s=vector("numeric",n)
f=matrix(0,nrow=r,ncol=n)
# forwards
f0=equil(lambda)
f[,1]=P[obs[1],]*(f0%*%lambda)
for (i in 2:n) {
for (k in 1:r){
f[k,i]=P[obs[i],k]*max(f[,i-1]*lambda[,k])
}
f[,i]=f[,i]/sum(f[,i])
}
# backwards
s[n]=which.max(f[,n])
for (i in (n-1):1) {
s[i]=which.max(lambda[,s[i+1]]*f[,i])
}
s
}
globest=global(obs,lambda,P)
I created function to solve viterbi decoding of a hidden sequence and showed error,and so i couldnt plot the graph identify the difference.Can anyone fix this for me.

The problem is that function max does not work with complex data like your f matrix.
I suggest taking the module before applying max. Example:
x = c(1 + 1i, 2)
# What you are doing
max(x)
# >> Error in max(x) : invalid 'type' (complex) of argument
# Suggestion
max(Mod(x))
# >> 2

Related

How can I find the observations associated with rank j in this code?

I have been trying to find the observations associated with rank j in Ranked Set Sampling method. The problem is I don't know how to use the simulations to find the Xj values I'm supposed to work with further. Please help!
#The rankedsets function selects ranked sets from a target population. The selection of units in a set is without replacement, but the sets are selecting with replacement.
rankedsets<-function(X,m,s=m){
if(s==m){
x=sample(X,(m^2),replace=F)
n=matrix(x,ncol=m,nrow=m,byrow=T)
ms=matrix(0,ncol=m,nrow=m)
for (i in 1:m){
ms[i,]=sort(n[i,])
}
}else {
x=sample(X,(m*s),replace=F)
n=matrix(x,ncol=m,nrow=s,byrow=T)
ms=matrix(0,ncol=m,nrow=s)
for (i in 1:s){
ms[i,]=sort(n[i,])
}
}
return(ms)
}
#The rss function samples from a target population by using ranked set sampling method
rss<-function(X,m,r=1,sets=FALSE){
rss=numeric()
set=matrix(0,ncol=m,nrow=(m*r))
if (is.vector(X)){
a=0
for (j in 1:r){
ms=rankedsets(X,m)
for (i in 1:(m)){
set[i+a,]=ms[i,]
rss[i+a]=ms[i,i]
}
a=a+m
}
rss=matrix(rss,ncol=m,nrow=r,byrow=T)
cn=rn=numeric()
for (i in 1:r){
rn[i]=paste("r","=",i)
}
for (i in 1:m){
cn[i]=paste("m","=",i)
}
rownames(rss)=rn
colnames(rss)=cn
if (sets){
s=list(sets=set,sample=rss)
return(s)
} else {
return(rss)}
}else stop(" X must be a vector!",call.=F)
}
#RSS Data Generation
data=rnorm(10000,1,3)
rss(data,m=5,r=3,sets=TRUE)
I was trying using simulations but the code doesn't return Xj values:
sims = 1000
Xj = rep(NA, sims)
because I don't really know where I should put my for loop.

Return() Not Working While Print() does after building a function in R

I'm working with panel data in R and am endeavoring to build a function that returns every user ID where PCA==1. I've largely gotten this to work, with one small problem: it only returns the values when I end the function with print() but does not do so when I end the function with return(). As I want the ids in a vector so I can later subset the data to only include those IDs, that's a problem. Code reflected below - can anyone advise on what I'm doing wrong?
The version that works (but doesn't do what I want):
retrievePCA<-function(data) {
for (i in 1:dim(data)[1]) {
if (data$PCA[i] == 1) {
id<-data$CPSIDP[i]
print(id)
}
}
}
retrievePCA(data)
The version that doesn't:
retrievePCA<-function(data) {
for (i in 1:dim(data)[1]) {
if (data$PCA[i] == 1) {
id<-data$CPSIDP[i]
return(id)
}
}
}
vector<-retrievePCA(data)
vector
Your problem is a simple misunderstanding of what a function and returning from a function does.
Take the small example below
f <- function(x){
x <- x * x
return x
x <- x * x
return x
}
f(2)
[1] 4
4 is returned, 8 is not. That is because return exits the function returning the specific value. So in your function the function hits the first instance where PCA[i] == 1 and then exits the function. Instead you should create a vector, list or another alternative and return this instead.
retrievePCA<-function(data) {
ids <- vector('list', nrow(data))
for (i in 1:nrow(data)) {
if (data$PCA[i] == 1) {
ids[[i]] <-data$CPSIDP[i]
}
}
return unlist(ids)
}
However you could just do this in one line
data$CPSIDP[data$PCA == 1]

Likelihood for First order censored autoregressive process with covariate(see Jung Wook Park1 , Marc G. Genton2 and Sujit K. Ghosh3)

library(mvtnorm)
set.seed(14)
n=10000
sigmatrue<-1
rhotrue<-0.3
b1=0.05
b0=0
y<-arima.sim(model=list(ar=c(0.3)),n=10000 ,sd=sigmatrue)#kataskevi
#xronoseiras
x=rep(0,n)
for(i in 1:n){
x[i]=i
}
for(t in 1:n)
{
y[t]=y[t]+b0+b1*x[t]
}
est=arima(y,order=c(1,0,0),xreg=x,include.mean=TRUE,method="ML",kappa=1e+06)
cens<-rep(0, n)
c=(9/10)*(n*b1+b0)
for (i in 1:n) {
if(y[i]>c){
y[i]<-c
cens[i]<-1
}
}
ll<-function(p){
sigma=matrix(c(p[2]^2/(1-p[3]^2), p[2]^2*p[3]/(1-p[3]^2),p[2]^2*p[3]/(1-p[3]^2),p[2]^2/(1-p[3]^2)),ncol=2,nrow=2,byrow=TRUE)
likelihood<-rep(0,n)
for(t in 2 :n){
if(cens[t]==0 & cens[t-1]==0){
likelihood[t]<-dnorm(((y[t]-(p[1]+p[4]*t)-p[3]*(y[t-1]-(p[1]+p[4]*(t-1)))/p[2]) )/p[2])
}
else if(cens[t]==0 & cens[t-1]==1){
likelihood[t]<-(1/(1-pnorm((c-(p[1]+p[4]*t)*sqrt(1-p[3]^2)/p[2]))*sqrt(1-p[3]^2)/p[2]*dnorm(((y[t]-(p[1]+p[4]*t)*sqrt(1-p[3]^2))/p[2])*(1-pnorm(((c-(p[1]+p[4]*(t))-p[3]*(y[t]-(p[1]+p[4]*(t-1)))/p[2])))))))
}
else if(cens[t]==1 & cens[t-1]==0){
likelihood[t]<-1-pnorm(((c-(p[1]+p[4]*t)-p[3]*(y[t-1]-(p[1]+p[4]*(t-1)))/p[2])))
}
else
{
likelihood[t]<-(((pmvnorm(lower=c, upper=Inf , mean=c(p[1]+p[4]*(t-1),p[1]+p[4]*t),sigma=sigma))/(1-pnorm((c-(p[1]+p[4]*(t-1))*sqrt(1-p[3]^2)/p[2])))))
}
}
f0=(sqrt(1-p[3])/p[2]*dnorm(((y[1]-p[1]-p[4])*sqrt(1-p[3]^2))/p[2]))
likelihood[1]=f0
#Ta prosthesa
if (any(likelihood==0)){
likelihood[likelihood==0] = 0.000001 #poly mikros arithmos
}
if (any(likelihood==Inf)){
likelihood[likelihood==Inf] = 1 #poly megalos h 1, an milame gia pi8anothta
}
if (any(is.nan(likelihood))){
likelihood[is.nan(likelihood)] = 0.000001
}
minusloglike=-sum(log(likelihood))
#l1=list(Minusloglike=minusloglike,Loglikelihood=log(likelihood))
return(minusloglike)
}
fit<-optim(c(0,1,0.3,0.05),ll,method="L-BFGS-B",lower=c(-Inf,0.001,-0.999,-Inf),upper = c(Inf,Inf,0.999,Inf),hessian=TRUE)
fisher.info<-solve(fit$hessian)
fisher.info
prop.sigma<-sqrt(diag(fisher.info))
sigmas<-diag(prop.sigma)
upper<-fit$par+1.96*sigmas
lower<-fit$par-1.96*sigmas
interval<-data.frame(value=fit$par, lower=diag(lower),upper=diag(upper))
interval
I run this code(it is for censored first order autogressive process with covariate , i have 4 cases for x(t) ,x(t-1) either is censored or non-censored and i dont want the likelihood to go near zero and inf).I get error
Error in if (any(likelihood == Inf)) { :
missing value where TRUE/FALSE needed
Called from: fn(par, ...)
The program is working for n=100 but when n is larger than 100 i have this error. I think this error causes bad estimattes of the four parameters(b1,rho,sigma,b0).Does anyone know what can i do?
Thank you for your help.

How can I define a constant variable inside a recursive function in R?

I want to create functions that compute the Simple Moving Average (SMA) and the Exponential Moving Average (EMA).
My problem is in the EMA implementation. I would like to set a constant variable inside the recursive function that is equal to one of its argument.
Here is my code:
#SMA
sma <- function(P,t,n)
{
return(sum(P[(t-n):(t-1)])/n)
}
#EMA
recursive.ema <- function(P,t,n)
{
# Here I want to create a constant variable that keeps in memory the first
# value of t, i.e. the value of t before the first recursion, so I can use
# it as argument of sma function.
# Something similar to this: tmp <- t (t given from outside the function)
b <- 2/(n+1)
if (t == 1)
{
return(b*P[1] + (1-b)*sma(P,tmp,n))
}
return (b*P[t] + (1-b)*recursive.ema(P,t-1,n))
}
Here P is a vector, time series of prices, t is the index of the vector, the time in my model, and n is any positive number, corresponding to n lagged periods at time t.
Would this work ?
e <-new.env()
test<-TRUE
assign("test",test,e)
recursive.ema <- function(P,t,n)
{
test <-get("test",envir=e)
if (test) {
assign("t",t,envir=e)
assign("test",FALSE,envir=e)
} else {
t=get("t",envir=e)
}
b <- 2/(n+1)
if (t == 1)
{
return(b*P[1] + (1-b)*sma(P,tmp,n))
}
return (b*P[t] + (1-b)*recursive.ema(P,t-1,n))
}

How to identify starting values for constrOptim()?

I'm using constrOptim() in R to estimate parameters of a model. I get the error that my target function has a result of length 0 instead 1. The problem can be the starting values I chose. I'm using R the first time and I'm not familiar with the constrOptim algorithm itself. Can somebody help me with the optimization?
my input:
library(quantmod)
getSymbols('^GDAXI', src='yahoo', return.class='ts',from="2005-01-01", to="2015-01-31")
GDAXI.DE=GDAXI[ , "GDAXI.Close"]
log_r1=diff(log(GDAXI.DE[39:2575]))
sigma=matrix(nrow=length(log_r1)-1, ncol=1)
for(i in 2:length(log_r1))
{
sigma[i-1]=var(log_r1[1:i])
}
error=matrix(nrow=length(log_r1), ncol=1)
mu=mean(log_r1)
for(i in 1:length(log_r1))
{
error[i]=log_r1[i]-mu
}
the function i want to minimize:
LogLik=function(th)
{
input=garch(th)
sig=input
for(i in 1:length(sigma))
{
LL=(-1/2)*log(2*pi)-(1/2)*log(sig[i])-(1/2)*er[i]/sig[i]
}
}
the function calculating the input above:
sig=var(log_r1)
er=error
Sigma_garch=matrix(nrow=length(sigma), ncol=1)
garch=function(th)
{
omega.0=th[1]
alpha.0=th[2]
beta.0=th[3]
for(i in 1:length(sigma))
{
if(i==1)
{
Sigma_ga=sig
}else
{
Sigma_ga=Sigma_garch[i-1]
}
Sigma_garch[i]=omega.0+alpha.0*er[i]^2+beta.0*Sigma_ga
}
return(Sigma_garch)
}
my constraints and optimization procedure:
ui=matrix(c(diag(1,3),c(0,(-1),(-1))),4,3, byrow = T)
ci=c(0.01,0,0,(-0.99))
theta=c(0.02,0.01,0.95)
est=constrOptim(c(0.02,0.01,0.95),LogLik, grad=NULL, ui,ci,hessian=FALSE)
the error I'm getting: Error in optim(theta.old, fun, gradient, control = control, method = method, : Zielfunktion gibt Ergebnis der Länge 0 zurück, nicht Länge 1.
Does somebody can tell me what can be the probleme here?
Your target function, LogLik, doesnt return a value resulting in target function has a result of length 0 instead 1.
LogLik=function(th)
{
input=garch(th)
sig=input
for(i in 1:length(sigma))
{
LL=(-1/2)*log(2*pi)-(1/2)*log(sig[i])-(1/2)*er[i]/sig[i]
}
return(LL)
}

Resources