Applying a user defined R codes to a data set - r

I have R codes that I would like to apply to a data file. Please see the codes below:
library(plyr);
library(dplyr);
ability<- function(mdl, u, b, a, c){
J<- length(b)
if(mdl == 1 | mdl==2 | missing(c)) {
c<- rep(0,J)
}
if (mdl == 1 | missing(a)) { a<- rep(1,J)}
x<- sum(u)
if (x == 0) {
th<- -log(2*J)
}
if(x == J){
th<- log(2*J)
}
if (x == 0 | x == J) {
sumdem<- 0.0
for ( j in 1:J) {
pstar<- 1/(1 + exp(-a[j] * (th - b[j])))
phat<- c[j] + (1.0 - c[j])* pstar
sumdem<- sumdem - a[j]**2 * phat * (1.0 - phat) * (pstar / phat)**2
}
se <- 1/ sqrt(-sumdem)
}
if (x != 0 & x != J){
th<- log(x /(J-x))
S<- 10;
ccrit <- 0.001
for ( s in 1:S) {
sumnum <- 0.0
sumdem <- 0.0
for(j in 1:J){
pstar<- 1/(1 + exp(-a[j] * (th - b[j])))
phat<- c[j] + (1.0 - c[j])* pstar
sumnum<- sumnum + a[j] * (u[j] - phat) *
(pstar / phat)
sumdem <- sumdem - a[j]**2 * phat * (1.0 - phat) *
(pstar / phat)**2
}
delta<- sumnum / sumdem
th<- th - delta
if(abs(delta) < ccrit | s == S) {
se<- 1/sqrt(-sumdem)
break;
}
}
}
cat(paste("th=", th, "\n")); flush.console();
cat(paste("se=", se, "\n")); flush.console();
thse<- c(th, se);
return(thse);
}
u<-read.csv("C:\\PA\\Keystone\\Spring 018\\data\\out_sp16_ALGEBRA1.csv",header=TRUE,as.is=T);
b<- c(-0.5255,0.0645,-0.0685,0.4132,0.5103,0.6826,-0.524,-0.2108,-0.3115,0.583,-0.6093,1.1567,0.6096,-0.2789,0.1151,1.2127,-0.085,-0.2494,-0.2724,0.0433,-0.4693,0.1692,0.15,0.3255,0.6008,0.3654,-1.239,-0.7911,0.2886,0.2324,0.1885,0.6671,-0.1561,0.4412,1.4597,0.0981,0.2668,1.9483,2.0044,1.403,2.1106,1.0472);
a<- rep(1,length(b));
u<-u[,-c(1)]
u<-as.matrix(u)
apply()
The argument u is a data set that I converted to matrix and would like to apply the codes to all row of the matrix. a is a vector and b is a vector. I thought about using apply() but not giving me what I want.
Thanks for your time and input.

I am assuming that each row of the u represents different students and each column represents different items so using matrix indices may help(i.e sum(u[1,]) will compute sum for overall performance or all items combined for 1st student. also I did not get what you are asking specifically in order to guide what you should do.

Related

How to make Peirce projection algorithm allow for 360 degrees rotation?

I'm using this algorithm for Peirce world map projection in R. I'm able to do some fine maps, for instance using 28 as the value for the lambda_0 parameter in function toPeirceQuincuncial, since this angle creates less land distortion and breaks no important islands (besides Antarctica, obviously). The algorithm is used like this:
# constants
pi<-acos(-1.0)
twopi<-2.0*pi
halfpi<-0.5*pi
degree<-pi / 180
halfSqrt2<-sqrt(2) / 2
quarterpi<-0.25 * pi
mquarterpi<--0.25 * pi
threequarterpi<-0.75 * pi
mthreequarterpi<--0.75 * pi
radian<-180/pi
sqrt2<-sqrt(2)
sqrt8<-2. * sqrt2
halfSqrt3<-sqrt(3) / 2
PeirceQuincuncialScale<-3.7081493546027438 ;# 2*K(1/2)
PeirceQuincuncialLimit<-1.8540746773013719 ;# K(1/2)
ellFaux<-function(cos_phi,sin_phi,k){
x<-cos_phi * cos_phi
y<-1.0 - k * k * sin_phi * sin_phi
z<-1.0
rf<-ellRF(x,y,z)
return(sin_phi * rf)
}
ellRF<-function(x,y,z){
if (x < 0.0 || y < 0.0 || z < 0.0) {
print("Negative argument to Carlson's ellRF")
print("ellRF negArgument")
}
delx<-1.0;
dely<-1.0;
delz<-1.0
while(abs(delx) > 0.0025 || abs(dely) > 0.0025 || abs(delz) > 0.0025) {
sx<-sqrt(x)
sy<-sqrt(y)
sz<-sqrt(z)
len<-sx * (sy + sz) + sy * sz
x<-0.25 * (x + len)
y<-0.25 * (y + len)
z<-0.25 * (z + len)
mean<-(x + y + z) / 3.0
delx<-(mean - x) / mean
dely<-(mean - y) / mean
delz<-(mean - z) / mean
}
e2<-delx * dely - delz * delz
e3<-delx * dely * delz
return((1.0 + (e2 / 24.0 - 0.1 - 3.0 * e3 / 44.0) * e2+ e3 / 14) / sqrt(mean))
}
toPeirceQuincuncial<-function(lambda,phi,lambda_0=20.0){
# Convert latitude and longitude to radians relative to the
# central meridian
lambda<-lambda - lambda_0 + 180
if (lambda < 0.0 || lambda > 360.0) {
lambda<-lambda - 360 * floor(lambda / 360)
}
lambda<-(lambda - 180) * degree
phi<-phi * degree
# Compute the auxiliary quantities 'm' and 'n'. Set 'm' to match
# the sign of 'lambda' and 'n' to be positive if |lambda| > pi/2
cos_phiosqrt2<-halfSqrt2 * cos(phi)
cos_lambda<-cos(lambda)
sin_lambda<-sin(lambda)
cos_a<-cos_phiosqrt2 * (sin_lambda + cos_lambda)
cos_b<-cos_phiosqrt2 * (sin_lambda - cos_lambda)
sin_a<-sqrt(1.0 - cos_a * cos_a)
sin_b<-sqrt(1.0 - cos_b * cos_b)
cos_a_cos_b<-cos_a * cos_b
sin_a_sin_b<-sin_a * sin_b
sin2_m<-1.0 + cos_a_cos_b - sin_a_sin_b
sin2_n<-1.0 - cos_a_cos_b - sin_a_sin_b
if (sin2_m < 0.0) {
sin2_m<-0.0
}
sin_m<-sqrt(sin2_m)
if (sin2_m > 1.0) {
sin2_m<-1.0
}
cos_m<-sqrt(1.0 - sin2_m)
if (sin_lambda < 0.0) {
sin_m<--sin_m
}
if (sin2_n < 0.0) {
sin2_n<-0.0
}
sin_n<-sqrt(sin2_n)
if (sin2_n > 1.0) {
sin2_n<-1.0
}
cos_n<-sqrt(1.0 - sin2_n)
if (cos_lambda > 0.0) {
sin_n<--sin_n
}
# Compute elliptic integrals to map the disc to the square
x<-ellFaux(cos_m,sin_m,halfSqrt2)
y<-ellFaux(cos_n,sin_n,halfSqrt2)
# Reflect the Southern Hemisphere outward
if(phi < 0) {
if (lambda < mthreequarterpi) {
y<-PeirceQuincuncialScale - y
} else if (lambda < mquarterpi) {
x<--PeirceQuincuncialScale - x
} else if (lambda < quarterpi) {
y<--PeirceQuincuncialScale - y
} else if (lambda < threequarterpi) {
x<-PeirceQuincuncialScale - x
} else {
y<-PeirceQuincuncialScale - y
}
}
# Rotate the square by 45 degrees to fit the screen better
X<-(x - y) * halfSqrt2
Y<-(x + y) * halfSqrt2
res<-list(X,Y)
return(res)
}
library(rgdal)
ang <- 28
p <- readOGR('~/R/shp','TM_WORLD_BORDERS-0.3') # read world shapefile
for (p1 in 1:length(p#polygons)) {
for (p2 in 1:length(p#polygons[[p1]]#Polygons)) {
for (p3 in 1:nrow(p#polygons[[p1]]#Polygons[[p2]]#coords)) {
pos <- toPeirceQuincuncial(p#polygons[[p1]]#Polygons[[p2]]#coords[p3,1],
p#polygons[[p1]]#Polygons[[p2]]#coords[p3,2],ang)
p#polygons[[p1]]#Polygons[[p2]]#coords[p3,1] <- pos[[1]][1]
p#polygons[[p1]]#Polygons[[p2]]#coords[p3,2] <- pos[[2]][1]
}
}
}
z <- toPeirceQuincuncial(0,-90,ang)[[1]][1]
p#bbox[,1] <- -z
p#bbox[,2] <- z
# plotting the map
par(mar=c(0,0,0,0),bg='#a7cdf2',xaxs='i',yaxs='i')
plot(p,col='gray',lwd=.5)
for (lon in 15*1:24) { # meridians
pos <- 0
posAnt <- 0
for (lat in -90:90) {
if (length(pos) == 2) {
posAnt <- pos
}
pos <- toPeirceQuincuncial(lon,lat,ang)
if (length(posAnt) == 2) {
segments(pos[[1]][1],pos[[2]][1],posAnt[[1]][1],posAnt[[2]][1],col='white',lwd=.5)
}
}
}
lats <- 15*1:5
posS <- matrix(0,length(lats),2)
posST <- 0
pos0 <- 0
posN <- matrix(0,length(lats),2)
posNT <- 0
for (lon in 0:360) {
posAntS <- posS
posAntST <- posST
posAnt0 <- pos0
posAntN <- posN
posAntNT <- posNT
pos0 <- unlist(toPeirceQuincuncial(lon,0,ang))
posST <- unlist(toPeirceQuincuncial(lon,-23.4368,ang))
posNT <- unlist(toPeirceQuincuncial(lon,23.4368,ang))
for (i in 1:length(lats)) {
posS[i,] <- unlist(toPeirceQuincuncial(lon,-lats[i],ang))
posN[i,] <- unlist(toPeirceQuincuncial(lon,lats[i],ang))
}
if (lon > 0) {
segments(pos0[1],pos0[2],posAnt0[1],posAnt0[2],col='red',lwd=1)
segments(posNT[1],posNT[2],posAntNT[1],posAntNT[2],col='yellow')
for (i in 1:length(lats)) {
segments(posN[i,1],posN[i,2],posAntN[i,1],posAntN[i,2],col='white',lwd=.5)
}
if (!(lon %in% round(90*(0:3+.5)+ang))) {
for (i in 1:length(lats)) {
segments(posS[i,1],posS[i,2],posAntS[i,1],posAntS[i,2],col='white',lwd=.5)
}
segments(posST[1],posST[2],posAntST[1],posAntST[2],col='yellow')
} else {
for (i in 1:length(lats)) {
posS[i,] <- unlist(toPeirceQuincuncial(lon-0.001,-lats[i],ang))
segments(posS[i,1],posS[i,2],posAntS[i,1],posAntS[i,2],col='white',lwd=.5)
posS[i,] <- unlist(toPeirceQuincuncial(lon,-lats[i],ang))
}
posST <- unlist(toPeirceQuincuncial(lon-0.001,-23.4368,ang))
segments(posST[1],posST[2],posAntST[1],posAntST[2],col='yellow')
posST <- unlist(toPeirceQuincuncial(lon,-23.4368,ang))
}
}
}
Playing with different values for lambda_0, I've found out that I apparently cannot choose any value I want. It seems that the function will only work with half the possibilities I thought it did.
Numbers indicate values of lambda_0. As you can see, North America moves clockwise from right to left between 80° and 200°, and starts the same movement again between 240° and 40°.
How can I change the algorithm to allow for any angle I want (for instance, North America pointing up)?
I made a simple hack that "solved" the problem. First I've found out which angles were a repetition of the others: between 46 and 225 degrees. Then, for these angles, I just had to flip the bounding box of the shapefile:
z <- toPeirceQuincuncial(0,-90,ang)[[1]][1]
if (ang > 45 & ang < 226) {
p#bbox[,1] <- z
p#bbox[,2] <- -z
} else {
p#bbox[,1] <- -z
p#bbox[,2] <- z
}
Other thing: I improved the R code of toPeirceQuincuncial, since it was converting coordinate by coordinate. Since R is a vector language, I adapted it to convert a group of coordinates at once, which made the code extremely fast. Now lambda and phi may both be vectors (same size, of course), while lambda_0 is still a single number.
toPeirceQuincuncial2<-function(lambda,phi,lambda_0=20.0){
# Convert latitude and longitude to radians relative to the
# central meridian
lambda<-lambda - lambda_0 + 180
lambda[which(lambda<0.0 | lambda>360.0)] <-
lambda[which(lambda<0.0 | lambda>360.0)] - 360*floor(lambda[which(lambda<0.0 | lambda>360.0)]/360)
lambda<-(lambda - 180) * degree
phi<-phi * degree
# Compute the auxiliary quantities 'm' and 'n'. Set 'm' to match
# the sign of 'lambda' and 'n' to be positive if |lambda| > pi/2
cos_phiosqrt2<-halfSqrt2 * cos(phi)
cos_lambda<-cos(lambda)
sin_lambda<-sin(lambda)
cos_a<-cos_phiosqrt2 * (sin_lambda + cos_lambda)
cos_b<-cos_phiosqrt2 * (sin_lambda - cos_lambda)
sin_a<-sqrt(1.0 - cos_a * cos_a)
sin_b<-sqrt(1.0 - cos_b * cos_b)
cos_a_cos_b<-cos_a * cos_b
sin_a_sin_b<-sin_a * sin_b
sin2_m<-1.0 + cos_a_cos_b - sin_a_sin_b
sin2_n<-1.0 - cos_a_cos_b - sin_a_sin_b
sin2_m[which(sin2_m < 0.0)] <- 0.0
sin_m<-sqrt(sin2_m)
sin2_m[which(sin2_m > 1.0)] <- 1.0
cos_m<-sqrt(1.0 - sin2_m)
sin_m[which(sin_lambda < 0.0)] <- -sin_m[which(sin_lambda < 0.0)]
sin2_n[which(sin2_n < 0.0)] <- 0.0
sin_n<-sqrt(sin2_n)
sin2_n[which(sin2_n > 1.0)] <- 1.0
cos_n<-sqrt(1.0 - sin2_n)
sin_n[which(cos_lambda > 0.0)] <- -sin_n[which(cos_lambda > 0.0)]
# Compute elliptic integrals to map the disc to the square
x<-ellFaux(cos_m,sin_m,halfSqrt2)
y<-ellFaux(cos_n,sin_n,halfSqrt2)
# Reflect the Southern Hemisphere outward
y[which(phi<0 & lambda<mthreequarterpi)] <- PeirceQuincuncialScale - y[which(phi<0 & lambda<mthreequarterpi)]
x[which(phi<0 & lambda>=mthreequarterpi & lambda<mquarterpi)] <- -PeirceQuincuncialScale - x[which(phi<0 & lambda>=mthreequarterpi & lambda<mquarterpi)]
y[which(phi<0 & lambda>=mquarterpi & lambda<quarterpi)] <- -PeirceQuincuncialScale - y[which(phi<0 & lambda>=mquarterpi & lambda<quarterpi)]
x[which(phi<0 & lambda>=quarterpi & lambda<threequarterpi)] <- PeirceQuincuncialScale - x[which(phi<0 & lambda>=quarterpi & lambda<threequarterpi)]
y[which(phi<0 & lambda>=threequarterpi)] <- PeirceQuincuncialScale - y[which(phi<0 & lambda>=threequarterpi)]
# Rotate the square by 45 degrees to fit the screen better
X<-(x - y) * halfSqrt2
Y<-(x + y) * halfSqrt2
res<-list(X,Y)
return(res)
}

Create sequence in R

Hi I was wondering if someone knows how to realize this sequence in R?
Consider a sequence with following requirement.
a1=1
an=an-1+3 (If n is a even number)
an=2×an-1-5 (If n is a odd number)
e.g. 1,4,3,6,7,10,15,...
a30=?
Try the following.
It will return the entire sequence, not just the last element.
seq_chih_peng <- function(n){
a <- integer(n)
a[1] <- 1
for(i in seq_along(a)[-1]){
if(i %% 2 == 0){
a[i] <- a[i - 1] + 3
}else{
a[i] <- 2*a[i - 1] - 5
}
}
a
}
seq_chih_peng(30)
Note that I do not include code to check for input errors such as passing n = 0 or a negative number.
If you want to do it recursively, you just have the write the equations in your function as follows:
sequence <- function(n) {
if (n == 1) return(1)
else if (n > 1) {
if (n %% 2 == 1) {
return(2 * sequence(n - 1) - 5)
}else{
return(sequence(n - 1) + 3)
}
}else{
stop("n must be stricly positive")
}
}
sequence(30)
# returns 32770

Missing value where TRUE FALSE value needed R ... not a duplicate! Aliens vs. Vampires Simulation

Beginning R programmer here. I'm running a simulation of Aliens vs. Vampires and their domination of the human race.
Two stipulations to this simulation:
Aliens can abduct vampires. The amount is proportional to vampires current percentage of total world population (including humans).
Vampires have health requirements. They not only can transform humans into vampires but they also need some to feed on and kill...
I'm having trouble understanding this portion of code and why it is not working:
if (A[i-1] + A.prime * delta.t <= 0){
A[i]<-0
} else {
A[i] <- A[i-1] + A.prime * delta.t
}
It is returning this error:
Error in if (A[i - 1] + A.prime * delta.t <= 0) { : missing value
where TRUE/FALSE needed
Here is the entire code, I'm sure it is a simple code error and not a logical one. Any help would be appreciated!
# Aliens v. Vampires -- version 2.1
# (logistic growth with human reproduction)
# CPSC 420 -- spring 2016
# Set up time.
delta.t <- 1 # years
time <- seq(1940,3000,delta.t)
# Utility functions to convert between i and t.
itot <- function(i) (i-1)*delta.t + 1940
ttoi <- function(t) (t-1940)/delta.t + 1
# Simulation parameters.
init.human.pop <- 7e9
alien.abduction.rate <- 30000 # (beings/year)/year
bite.rate <- .1 # (people/year)/vampire
birth.rate <- .01 # (people/year)/person
sacrifice.rate<- .1 #people/year/vampire
#There is no specific rate for vampires being abducted per year because it is a portion of the alien abduction rate
A <- vector()
V <- vector()
H <- vector()
earth.population <- vector()
V.abductions<-vector()
sacrifice<- vector()
# Initial conditions. (No aliens until 1940, and only one lonely vampire.)
A[1] <- 0
V[1] <- 1
H[1] <- init.human.pop
earth.population[1] <- V[1] + H[1]
V.abductions[1]<-0
sacrifice[1]<-0
# Simulate.
for (i in 2:length(time)) {
logistic.factor <- H[i-1]/earth.population[i-1]
# Compute flows.
VA.prime<-alien.abduction.rate * (time[i]-1940) * (V[i-1]/earth.population[i-1]) #vampires abducted/year
sacrifice.prime<- sacrifice.rate * (V[i-1]) #sacrifice/year
V.prime <- bite.rate * V[i-1] * logistic.factor - VA.prime# people/year
A.prime <- alien.abduction.rate * (time[i] - 1940) * (logistic.factor) + VA.prime # beings/year
H.prime <- H[i-1] * birth.rate - (V.prime + (A.prime - VA.prime) + sacrifice.prime) # people/year
earth.population.prime <- -A.prime - sacrifice.prime # people/year
if(H[i-1] - sacrifice[i-1]<0){
V.prime <- V.prime - (abs(sacrifice[i-1] - H[i-1])*sacrifice.rate)
} else {
V.prime <- bite.rate * V[i-1] * logistic.factor - VA.prime# people/year
}
# Compute stocks.
if (A[i-1] + A.prime * delta.t <= 0){
A[i]<-0
} else {
A[i] <- A[i-1] + A.prime * delta.t
}
# people abducted
if( V[i-1] + V.prime * delta.t <= 0 ) {
V[i] <-0
} else {
V[i] <- V[i-1] + V.prime * delta.t
} # vampires
if(H[i-1] + H.prime * delta.t <= 0 ){
H[i]<-0
} else {
H[i]<-H[i-1] + H.prime * delta.t
} #humans left
if(V[i] + H[i] <= 0 ){
earth.population[i]<-0
}else{
earth.population[i] <- V[i] + H[i]
}
# people
V.abductions[i]<- V.abductions[i-1] + VA.prime * delta.t #total vampires abducted thus far
sacrifice[i]<-sacrifice[i-1] + sacrifice.prime * delta.t #total victims sacrificed to vampires thus far
}
# Plot results.
all.values <- c(A,V,H,earth.population)
plot(time,A,type="l",col="green",lwd=2,
ylim=c(min(all.values),max(all.values)),
main="Aliens v. Vampires apocalypse -- oh my!!",
xlab="year",
ylab="# of victims")
lines(time,V,col="red",lwd=2)
lines(time,H,col="black",lwd=1)
lines(time,earth.population,col="brown",lty="dotted",lwd=3)
lines(time,V.abductions,col="purple",lty="dashed",lwd=1)
lines(time,sacrifice,col="blue", lty = 3, lwd=1)
legend("topleft",legend=c("Alien abductions","Vampire bites","Humans","Earthlings", "Vampires Abducted","Human Victims"),
fill=c("green","red","black","brown", "purple","blue"))
The statement A[i-1] + A.prime * delta.t <= 0 evaluates to NA, and that raises an error. The if wants to know if it's TRUE or FALSE, and NA is neither.
A.prime is NaN, so that's where your problem starts. A.prime tests whether A is a prime number, but since it's a vector, this statement doesn't produce any value.
At some point (step i=336), earth.population has dropped to 0 so logistic.factor is division by 0 . Maybe you want to add a condition to enter the loop.

R using "extract" Function: all cases discarded after aggregation

I'm trying to use the "extract" Function in R, which allows for Policy Mood analysis (documentation can be found here: http://www.unc.edu/~jstimson/Software_files/Doc.pdf). All of my cases are discarded, however, and I can't figure out whether the data or the function is the problem.
Here is a sample dataset with the code needed to execute the function (the "extract" function code is included further below):
library(car)
#create sample data frame
mat <- matrix(, nrow = 10, ncol = 0)
mat <- as.data.frame(mat)
mat$year <- c(1998,1999,2000,2001,2002,1996,1997,1998,2000,2002) #survey years
mat$varname <- c("ESS","ESS","ESS","ESS","ESS","ISSP","ISSP","ISSP","ISSP","ISSP") #survey names
mat$index <- c(10,20,30,20,30,10,20,30,20,10) #approval rating in survey
mat$ncases <- c(1000,1120,1300,800,1000,1200,1300,1400,1100,1000) #number of survey respondents
source("Extract.r") #loading Extract for estimating Mood
mat$date<-ISOdate(mat$year,1,1) #generate date variable
output<-extract(mat$varname,mat$date,mat$index,mat$ncases) #estimate mood series
The final line returns the following:
[1] "Series ESS discarded. After aggregation cases = 0"
[1] "Series ISSP discarded. After aggregation cases = 0"
Error in issue[, v] : subscript out of bounds
So the surveys are all being eliminated as unusable at aggregation. My best guess, looking at the function code below, is that the cases are being dropped for having uncomputable or zero standard deviations - but I don't understand why that would be the case.
Finally, here is the Extract.r code for the function. The code chunk I believe may be causing problems begins at "#NOW REDUCE ISSUE MATRIX TO ELIMINATE UNUSABLE SERIES (WN<2)":
display<-function(out,filename=NULL) {
if (is.null(filename)) filename=""
d<-out$dimensions
p<-out$period
m<-out$latent1
if (d==2) m2<-out$latent2
T<-out$T
mo=100*(p-as.integer(p))
for (t in 1:T) {
yr<-format(as.integer(p[t]),nsmall=0)
month<-format(mo[t],digits=2)
lat1<-format(m[t],nsmall=3)
if (d==1) {
cat(c(yr,month,lat1),fill=TRUE,file=filename,append=TRUE)
} else {
lat2<-format(m2[t],nsmall=3)
cat(c(yr,month,lat1,lat2),fill=TRUE,file=filename,append=TRUE)
}
}
}
##########################################################################################
plot.Zextract<-function(outobject) {
dim<- outobject$dimensions
T<- outobject$T
vect1<-outobject$latent1
t<-seq(1:T)
if (dim>1) {
vect2<-outobject$latent2
miny<-min(vect1)
if (miny>min(vect2)) miny<-min(vect2)
maxy<-max(vect1)
if (maxy<max(vect2)) maxy<-max(vect2)
dummy<-rep(miny,T-1) #dummy is a fake variable used to reset axes to handle min/max of both series
dummy[T]<-maxy
leg.text<-c("","Dimension 1","Dimension 2")
plot(t,dummy,type="l",lty=0,main="Final Estimation Results: Two Dimensions",xlab="Time Point",ylab="Latent Variables")
lines(t,vect1,col=1)
lines(t,vect2,col=2)
legend(1,maxy,leg.text,col=c(0,1,2),lty=c(0,1,1))
} else {
plot(t,vect1,type="l",main="Final Estimation Results",xlab="Time Point",ylab="Latent Variable")
if (dim == 2) lines(t,vect2,col=2)
}
}
##########################################################################################
summary.Zextract<- function(outobject) {
T=outobject$T
nvar=outobject$nvar
dim<- outobject$dimensions
vn<- c(outobject$varname,"Variable Name")
vn<- format(vn,justify="right")
nc<- format(outobject$N,justify="right")
ld<- format(outobject$loadings1,digits=3,justify="right")
mean<- format(outobject$means,digits=6,justify="right")
sd<- format(outobject$std.deviations,digits=6,justify="right")
cat("Variable Loadings and Descriptive Information: Dimension 1\n")
cat(paste(vn[nvar+1],"Cases","Loading"," Mean ","Std Dev","\n"))
for (v in 1:nvar) {
cat(paste(vn[v]," ",nc[v]," ",ld[v],mean[v],sd[v],"\n"))
}
if (dim == 2) {
ld<- format(outobject$loadings2,digits=3,justify="right")
cat("\nVariable Loadings and Descriptive Information: Dimension 2\n")
cat(paste(vn[nvar+1],"Cases","Loading"," Mean ","Std Dev","\n"))
for (v in 1:nvar) {
cat(paste(vn[v]," ",nc[v]," ",ld[v],mean[v],sd[v],"\n"))
}
}
}
##########################################################################################
findper<-function(unit,curdate,mind,miny,minper,aggratio) { #returns intFindPer
datcurdate<-curdate
class(datcurdate)<-"Date"
mo <- findmonth(datcurdate)
qu <- 1 + as.integer((mo - 1)/3)
dy <- findday(datcurdate)
yr <- findyear(datcurdate)
arinv<- 1/aggratio
if (unit == "D") intFindPer <- curdate - mind +1 #curdate - mindate + 1
if (unit == "A" || unit == "O") intFindPer <- as.integer((yr - miny) / aggratio) + 1
if (unit == "Q") part <- qu
if (unit == "M") part <- mo
if (unit == "Q" || unit == "M") intFindPer <- (yr - miny - 1) * arinv + part + (arinv - (minper - 1))
return(intFindPer)
} #findper
##########################################################################################
findday<-function(DateVar) {
z<-as.POSIXlt(DateVar)
v<-unlist(z)
findday<-as.integer(v[4])
} #end findday
##########################################################################################
findmonth<-function(DateVar) {
z<-as.POSIXlt(DateVar)
v<-unlist(z)
findmonth<-as.integer(v[5])+1
} #end findmonth
##########################################################################################
findyear<-function(DateVar) {
z<-as.POSIXlt(DateVar)
v<-unlist(z)
findyear<-as.integer(v[6])+1900
} #end findyear
##########################################################################################
aggregate<- function(varname,date,index,ncases,mindate,maxdate,nperiods,nvar,aggratio,unit,miny,minper) { #
#READ A NEW RECORD, CALCULATE PERIOD, AND SET UP AGGREGATION INTO MAT.ISSUE[NPERIODS,NVAR]
vl<- character(nvar)
mind<- as.integer(mindate)/86400
maxd<- as.integer(maxdate)/86400
vfac<- factor(varname) #make a factor vector
vlev<- levels(vfac) #find unique categories
Mat.Issue<- array(dim=c(nperiods,nvar))
nrec<-length(varname) #added for R compatibility
lp<- 0
per<- 0
x<- 0
c<- 0
nkeep<- 0
lv<- "0"
for (record in 1:nrec) { # MASTER LOOP THROUGH INPUT DATA, 1 TO NREC
if (ncases[record] == 0 || is.na(ncases[record])) ncases[record] <- 1000
mo <- findmonth(date[record])
qu <- 1 + as.integer((mo - 1)/3)
dy <- findday(date[record])
yr <- findyear(date[record])
curdate<- as.integer(date[record])
if (curdate >= mind && curdate <= maxd) { #is date within range?
nkeep <- nkeep + 1
if (nkeep==1) { #startup routine for first good case
firstcase<- TRUE
lp <- findper(unit,curdate,mind,miny,minper,aggratio)
lv <- varname[record]
x <- index[record] * ncases[record] #start new sums for case 1
c <- ncases[record]
for (i in 1:nvar) {
if (lv==vlev[i]) v=i #determine v by matching to position of labels vector
} #end for
} else {
firstcase<- FALSE
} #end if
if (firstcase == FALSE) { #skip over the rest for first good case
per<- findper(unit,curdate,mind,miny,minper,aggratio) #here we translate date into agg category
if ((varname[record] != lv) || (per !=lp)) { #found a new period or variable name
if (lp > 0 && lp <= nperiods) {
Mat.Issue[lp, v] <- x / c #recompute for either period or var change
x<- 0
c<- 0
}
if (varname[record] != lv) { #new var only
for (i in 1:nvar) {
if (varname[record]==vlev[i]) v=i #determine v by matching to position of labels vector
} #end for
vl[v]<- varname[record] #this will only catch names that have good cases
lv<-vl[v] #reassign new varname to lastvar
} # new var
lp <- findper(unit,curdate,mind,miny,minper,aggratio)
x <- index[record] * ncases[record] #start new sums for current case
c <- ncases[record]
} else {
x<- x + index[record] * ncases[record] #a continuing case, increment sums
c<- c + ncases[record]
}
} # end of first case special loop
} #end of date test loop
} #newrec: next record
vl<- vlev #overwrite previous assignment which had good names only
agglist<- list(lab=vl,iss=Mat.Issue)
return(agglist) #list includes labels and issue matrix
} #end aggregate function
##########################################################################################
esmooth<- function(mood, fb, alpha){
##########################################################################################
smooth<- function(alpha) { #for time series "series" and alpha "alpha[1]" compute sum of squared forecast error
ferror<- numeric(1)
T<- length(series)
xvect<- numeric(T)
xvect[1] <- series[1]
for (t in 2:T) {
xvect[t] <- alpha[1] * series[t] + (1 - alpha[1]) * xvect[t - 1]
}
sumsq <- 0
for (t in 3:T) {
ferror <- series[t] - xvect[t - 1]
sumsq <- sumsq + ferror ^ 2
}
return(sumsq) #this is the value of the function for a particular parameter alpha[1]
} # END OF FUNCTION SMOOTH
##########################################################################################
series<- mood[fb,] #create series to be smoothed
sm.out<- optim(c(.75),smooth,method="L-BFGS-B",lower=0.5,upper=1) #call smoother
alpha<- sm.out$par #assign result to alpha
#NOW SMOOTH USING ALPHA
T<- length(series)
for (t in 2:T) {
mood[fb,t] <- alpha * series[t] + (1 - alpha) * mood[fb,t - 1]
}
return(alpha)
} #END OF FUNCTION ESMOOTH
##########################################################################################
residmi<- function(issue,v,mood) { #function regresses issue(v) on mood and then residualizes it
o<- lm(issue[,v] ~ mood[3,]) #regress issue on mood to get a,b
issue[,v]<- 100 + issue[,v] - (o$coef[1]+o$coef[2]*mood[3,]) #100 + Y - (a+bx)
return(issue[,v])
}
##########################################################################################
iscorr<- function(issue,mood) { #compute issue-scale correlations
Nv<- length(issue[1,])
Np<- length(issue[,1])
Rvector<- numeric(Nv)
for (v in 1:Nv) {
N<- Np - sum(is.na(issue[,v]))
if (N > 1) Rvector[v]<- cor(issue[,v],mood[3,],use="complete.obs",method="pearson")
}
return(Rvector)
} #end function iscorr
##########################################################################################
dominate<- function(fb,issue,nperiods,nvar,mood,valid,smoothing,alpha) {
nitems<- numeric(nperiods)
if (fb==2) alpha1<-alpha
if (fb==1) {
unexp<-numeric(1)
everlap<- integer(1)
alpha<- 1
alpha1<- 1
}
if (fb == 1) {
startper <- 1
mood[fb, startper] <- 100
firstj <- 2
lastj <- nperiods
stepj <- 1
jprev <- 1
} else {
startper <- nperiods
mood[fb, startper] <- mood[1, nperiods] #reuse forward metric
firstj <- nperiods - 1
lastj <- 1
stepj <- -1
jprev <- nperiods
} # end if
for (j in seq(firstj,lastj,by=stepj)) {
mood[fb, j] <- 0
everlap <- 0 ## of years which have contributed sums to mood
if (fb == 1) {
firstj2 <- 1
lastj2 <- j - 1
} else {
firstj2 <- j + 1
lastj2 <- nperiods
} # end if
for (j2 in firstj2:lastj2) {
sum <- 0 #has already been estimated
consum <- 0 #sum of communalities across issues
overlap <- 0
for (v in 1:nvar) {
xj <- issue[j, v] #xj is base year value
sngx2 <- issue[j2, v] #sngx2 is comparison year value
if (!is.na(xj) && !is.na(sngx2)) {
overlap <- overlap + 1 #numb of issues contributing to sum
ratio <- xj / sngx2
if (csign[v] < 0) ratio <- 1 / ratio
sum <- sum + valid[v] * ratio * mood[fb, j2]
consum <- consum + valid[v]
} # end if
} #next v
if (overlap > 0) {
everlap <- everlap + 1
mood[fb, j] <- mood[fb, j] + sum / consum
} # end if
} #next j2
nitems[j] <- everlap
if (everlap > 0) mood[fb, j] <- mood[fb, j] / everlap else mood[fb, j] <- mood[fb, jprev] #if undefined, set to lag(mood)
jprev <- j #last value of j, whether lead or lag
} #next j
if (smoothing == TRUE) {
alpha<- esmooth(mood, fb, alpha) #NOW SMOOTH USING ALPHA
mood.sm<- mood[fb,] #set up alternate vector mood.sm
for (t in 2:nperiods) {
mood.sm[t]<- alpha*mood[fb,t]+(1-alpha)*mood.sm[t-1]
} #end for
mood[fb,]<- mood.sm #now assign back smoothed version
} else {
alpha1 <- 1
alpha <- 1
}
if (smoothing == TRUE && fb == 1) alpha1 <- alpha
dominate.out<- list(alpha1=alpha1,alpha=alpha,latent=mood[fb,]) #output object
return(dominate.out)
# return(mood[fb,])
} #end dominate algorithm
##########################################################################################
#begindt<-NA #ISOdate(2004,6,1)
#enddt<-NA #ISOdate(2004,10,31)
##########################################################################################
## MAIN EXTRACT CODE BEGINS HERE #########################################################
extract<- function(varname,date,index,ncases=NULL,unit="A",mult=1,begindt=NA,enddt=NA,npass=1,smoothing=TRUE,endmonth=12) {
formula<-match.call(extract)
nrecords<- length(varname)
if (is.null(ncases)) ncases<- rep(0,nrecords)
moddate<- date #create temporary date vector, leaving original unmodified
if ((unit=="A" || unit=="O") && endmonth<12) {
for (i in 1:nrecords) { #first loop through raw data file
month<- findmonth(moddate[i])
year<- findyear(moddate[i])
if (month>endmonth) moddate[i]<- ISOdate(year+1,1,1) #modified date become 1/1 of next year
} #end loop through data
} # end if
if (is.na(begindt)) minper<-findmonth(min(moddate)) else minper<-findmonth(begindt)
if (is.na(begindt)) miny<-findyear(min(moddate)) else miny<-findyear(begindt)
if (is.na(begindt)) minday<-findday(min(moddate)) else minday<-findday(begindt)
if (is.na(enddt)) maxper<-findmonth(max(moddate)) else maxper<-findmonth(enddt)
if (is.na(enddt)) maxy<-findyear(max(moddate)) else maxy<-findyear(enddt)
if (is.na(enddt)) maxday<-findday(max(moddate)) else maxday<-findday(enddt)
if (unit=="Q") {
minper<- as.integer((minper-1)/3)+1
maxper<- as.integer((maxper-1)/3)+1
}
mindate<- ISOdate(miny,minper,minday,0,0,0,tz="GMT")
maxdate<- ISOdate(maxy, maxper, maxday,0,0,0,tz="GMT") #86400=24*60*60
#SETCONS:
latent<- numeric(1)
aggratio<- 0
fb<- 1 #initialize
auto<- "start" #meaningless value
alpha<- 1
alpha1<- 1
pass<- 1
holdtola<- 0.001
tola<- holdtola
iter<- 0
lastconv<- 99999
wtmean<- 0 #for it=1
wtstd<- 1
fract<- 1
if (unit=="A") {
nperiods<- maxy-miny+1
aggratio<- 1
months<- 12
}
if (unit=="O") {
years<- mult
months<- years*12
aggratio<- 2
odd<- (maxy-miny+1) %% mult #mod
nperiods=as.integer((maxy-miny)/mult) + odd
}
if (unit=="M") {
fract<- 100
nperiods<- (maxy-miny)*12
nperiods<- nperiods-12 + (12-minper+1) + maxper
aggratio<- 1/12
months<- 1
}
if (unit=="Q") {
aggratio<- 1/4
months<- 3
nperiods<- as.integer((maxy-miny)/aggratio)
nperiods<- nperiods-4 + (4-minper+1) + maxper
fract<- 10
}
if (unit=="D") {
months=1
nperiods<- (as.integer(maxdate)-as.integer(mindate))/86400 + 1 #86400=24*60*60
}
arinv<- 1/aggratio
aggratio<- months/12
nrecords<- length(index)
#HERE WE SET UP FUNDAMENTAL DIMENSIONS AND DECLARE VECTORS
if (fb != 2) mood<- array(dim=c(3,nperiods))
vfac<- factor(varname) #make a factor vector
vlev<- levels(vfac) #find unique categories
nvar<- length(vlev) #how many are there?, includes unusable series
valid<- numeric(nvar)
csign<<- numeric(nvar)
vl<- character(nvar)
r<- numeric(nvar)
oldr<- rep(1,nvar) # r=1 for all v initially
issue<- array(dim=c(nperiods,nvar))
count<- numeric(nperiods)
vl<- numeric(nvar)
period<- numeric(nperiods)
converge<- 0
evalue<- 0
# create numeric variable period, eg, yyyy.0m
if (unit=="D") {
period<-seq(1:nperiods)
} else {
if (months >= 12) {
for (l in 1:nperiods) {
p <- (l - 1) * aggratio
period[l] <- miny + p
} #next l
} else {
y <- 0
i <- 0
my <- miny
if (minper == 1) my <- my - 1
for (l in 1:nperiods) {
i<- 1 + ((l-1) %% arinv)
mq <- minper + i - 1
mq<- 1 + ((mq-1) %% arinv)
if (mq == 1) y <- y + 1 #first month or quarter, increment year
period[l] <- my + y + mq / fract
} # end for
} #end else
} # end if
agglist<- aggregate(varname,moddate,index,ncases,mindate,maxdate,nperiods,nvar,aggratio,unit,miny,minper) # call aggregate to produce issue matrix
vl<- agglist$lab #extract two elements of the list from aggregate call
issue<- agglist$iss
rm(agglist) #don't need this anymore
#NOW REDUCE ISSUE MATRIX TO ELIMINATE UNUSABLE SERIES (WN<2)
ndrop<- 0
nissue<- numeric(nperiods)
std<- numeric(nperiods)
for (v in 1:nvar) {
std[v]<- 0 #default
nissue[v]<- sum(!is.na(issue[,v])) #criterion is 2 cases for npass=1 or 3 for npass=2
if (nissue[v]>npass) std[v]<- sqrt(var(issue[,v],na.rm=TRUE)) #this is just a test for variance >0
if (std[v]<.001) { #case dropped if std uncomputable (NA) or actually zero (constant)
ndrop<- ndrop+1
print(paste("Series",vl[v],"discarded. After aggregation cases =",nissue[v]))
}
}
nvarold<- nvar
nvar<- nvar-ndrop
pointer<- 1
found<- FALSE
for (v in 1:nvar) { #now reduced nvar
while (found==FALSE && pointer<=nvarold) { #find first valid column and push down
if (std[pointer]>.001) { #good case, transfer
issue[,v]<- issue[,pointer]
vl[v]<- vl[pointer]
pointer<- pointer+1
found<- TRUE
} else {
pointer<- pointer+1 #bad case, increment pointer
} #end if
} #end while
found<- FALSE
} #for
length(vl)<- nvar #reduce
length(issue)<- nperiods*nvar #chop off unused columns
attr(issue,"dim")<- c(nperiods,nvar)
N<- numeric(nvar)
#export<<-list(nperiods,nvar,issue)
for (pass in 1:npass) { #newpass: RESTART FOR SECOND DIMENSION CASE
if (pass == 2) { #reset iteration control parameters
iter <- 0
tola = holdtola
lastconv <- 99999
converge<- lastconv
conv<- converge
} else {
av<- numeric(nvar)
std<- numeric(nvar)
# ngood<- 0
for (v in 1:nvar) { #compute av and std by issue nvar now reduced to good cases
wn<- as.integer(nperiods-sum(is.na(issue[,v])))
av[v] <- mean(issue[,v],na.rm=TRUE)
std[v]<- sqrt(var(issue[,v],na.rm=TRUE) * ((wn - 1)/wn)) #convert to population standard deviation
issue[,v]<- 100 + 10 * (issue[,v] - av[v])/std[v] #standardize
# ngood<- ngood+1
}#end for
}
#READY FOR ESTIMATION, SET UP AND PRINT OPTIONS INFO
out<- as.character(10) #initial length only
out[1]<- print(paste("Estimation report:"))
if (pass == 1) {
if (months >= 12) {
out[2]<- print(paste("Period:", miny, " to", maxy," ", nperiods, " time points"))
} else {
out[2]<- print(paste("Period:", miny, minper, " to", maxy, maxper, nperiods, " time points"))
}
out[3]<- print(paste("Number of series: ", nvar+ndrop))
out[4]<- print(paste("Number of usable series: ", nvar))
out[5]<- print(paste("Exponential smoothing: ",smoothing))
}
out[6]<- print(paste("Iteration history: Dimension ",pass))
print(" ")
out[7]<- print("Iter Convergence Criterion Reliability Alphaf Alphab")
outcount<- 7
for (p in 1:nperiods) {
count[p]<- sum(!is.na(issue[p,]))
}
valid<- rep(1,times=nvar)
csign<<- rep(1,times=nvar)
auto <- "y" #iterative estimation on by default
quit <- 0 #false implies go ahead and estimate
while (iter == 0 || converge > tola) { #MASTER CONTROL LOOP WHICH ITERATES UNTIL SOLUTION REACHED
for (fb in 1:2) { # MASTER fb LOOP fb=1 is forward, 2 backward
dominate.out<- dominate(fb,issue,nperiods,nvar,mood,valid,smoothing,alpha) #master estimation routine
alpha1<- dominate.out$alpha1
alpha<- dominate.out$alpha
mood[fb,]<- dominate.out$latent
} #next fb
fb <- 3 #average mood from here on
for (p in 1:nperiods) { # AVERAGE
mood[fb, p] <- (mood[1, p] + mood[2, p]) / 2
} #next p
moodmean<-mean(mood[3,])
sdmood<-sd(mood[3,])
for (p in 1:nperiods) { #PLACEMENT OF THIS LOOP MAY NOT BE RIGHT
mood[fb,p] <- ((mood[fb,p] - moodmean) * wtstd / sdmood) + wtmean
} #end for
#plot commands
t<- seq(1:nperiods) #time counter used for plot below
lo<- 50 #force scale of iterative plot to large range
hi<- 150
if (min(mood[3,]) < lo) lo=min(mood[3,]) #whichever larger, use
if (max(mood[3,]) > hi) hi=max(mood[3,])
dummy<- rep(lo,nperiods) #dummy is fake variable used to set plot y axis to 50,150
dummy[nperiods]<- hi
if (iter==0) {
plot(t,dummy,type="l",lty=0,xlab="Time Period",ylab="Estimate by iteration",main="Estimated Latent Dimension") #create box, no visible lines
} else {
lines(t,mood[3,],col=iter)
}
iter <- iter + 1
if (auto == "y") r<- iscorr(issue,mood) else auto <- "y" #recompute correlations
wtmean<- 0
wtstd<- 0
vsum<- 0
goodvar<- 0
converge<- 0 #start off default
evalue<- 0
totalvar<- 0
for (v in 1:nvar) {
wn<- nperiods-sum(is.na(issue[,v]))
if (!is.na(sign(r[v]))) csign[v]<<- sign(r[v])
wn<- nperiods-sum(is.na(issue[,v]))
if (wn>1) { #sum over variables actually used
vratio <- wn / nperiods
evalue <- evalue + vratio * r[v]^2
totalvar <- totalvar + vratio
} #end if
#convergence tests
if (wn > 3) {
conv <- abs(r[v] - oldr[v]) #conv is convergence test for item=v
conv <- conv * (wn / nperiods) #weight criterion by number of available periods
if (conv > converge) converge <- conv #converge is the global max of conv
} #end if
if (!is.na(r[v])) oldr[v] <- r[v]
if (!is.na(r[v])) valid[v] <- r[v]^2
if (!is.na(av[v])) wtmean <- wtmean + av[v] * valid[v]
if (!is.na(std[v])) wtstd <- wtstd + std[v] * valid[v]
if (!is.na(r[v])) vsum <- vsum + valid[v]
} #end v loop
if (vsum > 0) wtmean <- wtmean / vsum
if (vsum > 0) wtstd <- wtstd / vsum
if (pass == 1) {
mean1 <- wtmean
std1 <- wtstd
e1=evalue
} else {
wtmean <- mean1
wtstd <- std1 #*unexp
} #end if
fbcorr <- cor(mood[1,],mood[2,]) #fnfrontback
if (quit != 1) {
outcount<- outcount+1
cv<- format(round(converge,4),nsmall=4)
itfmt<-format(round(iter),justify="right",length=4)
out[outcount]<- print(paste(itfmt," ",cv," ",round(tola,4)," ",round(fbcorr,3),round(alpha1,4),round(alpha,4)))
}
if (converge > lastconv) tola <- tola * 2
lastconv <- converge
auto = "y" #skip corr on iter=1, set auto on
if (iter >= 50) break #get out of while loop
} #END MASTER WHILE ITERATION CONTROL LOOP
if (auto == "y" && converge<tola) { #IF WE REACH THIS CODE WE HAVE A FINAL SOLUTION TO BE REPORTED
if (pass == 1) out1<- out #hold output for 2 dimensional solution
auto <- "Q"
quit <- 1 #flag solution reached, last time through
r<- iscorr(issue,mood) #final iteration correlations
if (pass == 1) r1<- r #hold correlations for 2 dimensional solution
if (pass > 1) {
unexp <- totalvar
totalvar <- unexp * totalvar
evalue <- evalue * unexp
} # end if
if (pass == 1) {
expprop <- evalue / totalvar
tot1 <- totalvar
} else {
erel <- evalue / totalvar #% exp relative
totalvar <- (1 - expprop) * tot1 #true var=original var discounted by %exp
evalue <- erel * totalvar #rescale to retain %exp relationship
expprop <- evalue / tot1 #now reduce eral to expprop
} # end if
for (v in 1:nvar) {
N[v]<- sum(!is.na(issue[,v]))
}
var.out<- list(varname=vl,loadings=r,means=av,std.deviations=std)
print(" ")
outcount<- outcount+1
out[outcount]<- print(paste("Eigen Estimate ", round(evalue,2), " of possible ",round(tot1,2)))
outcount<- outcount+1
out[outcount]<- print(paste(" Percent Variance Explained: ",round(100 * expprop,2)))
if (pass != 2 && npass>1) {
for (v in 1:nvar) {
valid[v] <- 0 #reset all, regmoodissue will set good=1
if (csign[v] != 0) issue[,v]<- residmi(issue,v,mood) #regmoodissue()
} #v loop
} # if
#begin prn output routine # mood[fb,] is now our estimate, WHAT ABOUT A SECOND DIMENSION
latent<- mood[fb,] #vector holds values for output
if (pass == 1) latent1<- latent #hold first dimension
print(" ")
out[outcount+1]<- print(paste("Final Weighted Average Metric: Mean: ",round(wtmean,2)," St. Dev: ",round(wtstd,2)))
#for Zelig output
if (npass==1) {
extract.out<- list(formula=formula,T=nperiods,nvar=nvar,unit=unit,dimensions=npass,period=period,varname=vl,N=N,means=av,std.deviations=std,setup1=out1,loadings1=r1,latent1=latent1)
} else {
for (i in 6:outcount) {
out[i-5]=out[i]
}
length(out)<- outcount-5
extract.out<- list(formula=formula,T=nperiods,nvar=nvar,unit=unit,dimensions=npass,period=period,varname=vl,N=N,means=av,std.deviations=std,setup1=out1,loadings1=r1,latent1=latent1,setup2=out,loadings2=r,latent2=latent)
}
} #end if auto="y"
} #end of for pass=1,2 loop
par(col=1) #reset on termination
class(extract.out)<- "Zextract"
return(extract.out)
} #end of extract
Anyone have any ideas what's going wrong here? Thanks in advance for your time!
The issue here seems to be dates inside the aggregate function (note, not the base R aggregate). Namely, a minimum and maximum date are calculated as:
mind<- as.integer(mindate)/86400
maxd<- as.integer(maxdate)/86400
These are then compared (inside a loop for each date) against the curdate variable, using an if statement:
if (curdate >= mind && curdate <= maxd) { #is date within range?
The problem, though, is that curdate<- as.integer(date[record]) (the line immediately before the if statement) is not on the same time scale. Changing it to: curdate<- as.integer(date[record])/86400 seems to solve the problem.

Mixture modeling - troublee with infinite values from exp() and log()

I'm writing a function for Gaussian mixture models with spherical covariance structures--ie $\Sigma_k = \sigma_k^2 I$. This particular function is similar to the mclust package with identifier VII.
http://en.wikipedia.org/wiki/Mixture_model
Anyways, the problem I'm having is running into infinite values for the weight matrix. Definition: Let W be an n x m matrix where n = 1, ..., n (number of obs) and m = 1, ..., m (number of mixtues). Each element of W (ie w_ij) can essentially be defined as a specific form of:
w_im = \frac{a / b * exp(c)}{\sum_i=1^m [a_i / b_i * exp(c_i)]}
Computing this numerically is giving me infinite values. So I'm trying to use the log-identity log(x+y) = log(x) + log(1 + y/x). But the issue is that it's not as simple as log(x+y) but rather log(\sum_i=1^m [a_i / b_i * exp(c_i)]).
Here's some code define:
n_im = a / b * exp(c) ;
d_.m = \sum_i=1^m [a_i / b_i * exp(c_i)] ; and
c_mat[i,j] as the value of the exponent for the [i,j]th term.
n_mat[, i] <- log(a[i]) - log(b[i]) - c[,i] # numerator of w_im
internal_vec1[i] <- (a[i] * b[1])/ (a[1] * b[i]) # an internal for the step below
c_mat2 <- cbind(rep(1, n), c_mat[,1] - c_mat[,-1]) # since e^a / e^b = e^(a-b)
for (i in 1:n) {
d_vec[i] <- n_mat[i,1] + log(sum(internal_vec1 * exp(c_mat2[i,)))
} ## still getting infinite values
I'm trying to define the problem as briefly as possible. the entire function is obviously much larger than this. But, since the problem I'm running into is specifically dealing with infinite (and 1/infinity) values, I'm hoping this snippet is sufficient. Anyone with a coding trick here?
Here is the solution!! (I've spent way too damn long on this)
**The first function log_plus() solves the simple problem where you want log(\sum_{i=1)^n x_i)
**The second function log_plus2() solves the more complicated problem described above where you want log(\sum_{i=1}^n [a_i / b_i * exp(c_i)])
log_plus <- function(xvec) {
m <- length(xvec)
x <- log(xvec[1])
for (j in 2:m) {
sum_j <- sum(xvec[1:j-1])
x <- x + log(1 + xvec[j]/sum_j)
}
return(x)
}
log_plus2 <- function(a, b, c) {
# assumes intended input of form sum(a/b * e^c)
if ((length(a) != length(b)) || (length(a) != length(c))) {
stop("Input equal length vectors")
}
if (!(all(c > 0) || all(c < 0))) {
stop("All values of c must be either > 0 or < 0.")
}
m <- length(a)
# initilialize log sum
x <- log(a[1]) - log(b[1]) + c[1]
# aggregate / loop log sum
for (j in 2:m) {
# build denominator
b2 <- b[1:j-1]
for (i in 1:j-1) {
d1 <- 0
c2 <- c[1:i]
if (all(c2 > 0)) {
c_min <- min(c2[1:j-1])
c2 <- c2 - c_min
} else if (all(c2 < 0)) {
c_min <- max(c2[1:j-1])
c2 <- c2 - c_min
}
d1 <- d1 + a[i] * prod(b2[-i]) * exp(c2[i])
}
den <- b[j] * (d1)
num <- a[j] * prod(b[1:j-1]) * exp(c[j] - c_min)
x <- x + log(1 + num / den)
}
return(x)
}

Resources