I'm new here and in general to programming - was hoping for some help.
I have the following code for backtracking an Extended Kalman Filter, which gives me the MSE for specific parameters. The problem is when I run the code, at the end, the matrix only stores the last set of values instead of all of them.
If you need to run the code on your PC, just replace the file name with any data set you have on hand. It should still work.
start.time <- Sys.time()
library(invgamma)
w = read.csv("Reddy.csv")
q = ts(w[2])
num = length(q)
f = function(x){
f1 = sqrt(x)
return(f1)
}
h = function(x){
h1 = x**3
return(h1)
}
ae1 = seq(24,26)
ae2 = seq(24,26)
be1 = seq(1,3)
be2 = seq(1,3)
a = seq(1,3)
b = seq(1,3)
MSE = matrix(nrow = length(ae1)*length(ae2)*length(be1)*length(be2)*length(a)*length(b), ncol =7)
for (i in ae1){
for (j in ae2){
for (k in be1){
for (l in be2){
for (m in a){
for (n in b){
d = rep(0,num)
for(o in 2:num){
xt = rep(0,num)
yt = rep(0,num)
fx = rep(0,num)
hx = rep(0,num)
e = rinvgamma(num,i,k)
g = rinvgamma(num,j,l)
fx[o] = f(xt[o-1])
xt[o] = m*fx[o] + e[o-1]
hx[o] = h(xt[o])
yt[o]= n*hx[o] +g[o]
d[o] = (yt[o] - q[o])**2
}
MSE[,1] = mean(d)
MSE[,2] = i
MSE[,3] = j
MSE[,4] = k
MSE[,5] = l
MSE[,6] = m
MSE[,7] = n
t = rbind(mean(d),i,j,k,l,m,n)
print(t)
}
}
}
}
}
}
end.time <- Sys.time()
time.taken <- end.time - start.time
time.taken
m = which.min(MSE[1])
Ideally, my matrix would have the first row as the MSE, the 2nd to 7th column would have the corresponding i,j,k,l,m,n values respectively and each iteration would get logged into a new row entry. Here, it seems to rewrite the entire matrix each time.
When you use
MSE[,2] = i
You actually call the entire column, and therefore the code is rewriting that column.
I have updated the code with a counter that'll help.
start.time <- Sys.time()
library(invgamma)
w = read.csv("Reddy.csv")
q = ts(w[2])
num = length(q)
f = function(x){
f1 = sqrt(x)
return(f1)
}
h = function(x){
h1 = x**3
return(h1)
}
ae1 = seq(24,26)
ae2 = seq(24,26)
be1 = seq(1,3)
be2 = seq(1,3)
a = seq(1,3)
b = seq(1,3)
count = 0
MSE = matrix(nrow = length(ae1)*length(ae2)*length(be1)*length(be2)*length(a)*length(b), ncol =7)
for (i in ae1){
for (j in ae2){
for (k in be1){
for (l in be2){
for (m in a){
for (n in b){
d = rep(0,num)
for(o in 2:num){
xt = rep(0,num)
yt = rep(0,num)
fx = rep(0,num)
hx = rep(0,num)
e = rinvgamma(num,i,k)
g = rinvgamma(num,j,l)
fx[o] = f(xt[o-1])
xt[o] = m*fx[o] + e[o-1]
hx[o] = h(xt[o])
yt[o]= n*hx[o] +g[o]
d[o] = (yt[o] - q[o])**2
}
count <- count + 1
MSE[count,1] = mean(d)
MSE[count,2] = i
MSE[count,3] = j
MSE[count,4] = k
MSE[count,5] = l
MSE[count,6] = m
MSE[count,7] = n
t = rbind(mean(d),i,j,k,l,m,n)
print(t)
}
}
}
}
}
}
end.time <- Sys.time()
time.taken <- end.time - start.time
time.taken
m = which.min(MSE[1])
Related
I want to implement RRHO analysis described in this manuscript https://academic.oup.com/nar/article/38/17/e169/1033168,
Maybe it's more clear and easy to see following R code to implement RRHO analysis. calculate_hyper_overlap function is what I try to do.
## Compute the overlaps between two *character* atomic vector:
hyper_test <- function(sample1, sample2, n) {
count <- length(intersect(sample1, sample2))
m <- length(sample1)
k <- length(sample2)
# under-enrichment
if (count <= m * k / n) {
sign <- -1L
pvalue <- stats::phyper(
q = count, m = m, n = n - m,
k = k, lower.tail = TRUE, log.p = FALSE
)
} else {
# over-enrichment
sign <- 1L
pvalue <- stats::phyper(
q = count, m = m, n = n - m,
k = k, lower.tail = FALSE, log.p = FALSE
)
}
c(count = count, pvalue = pvalue, sign = sign)
}
calculate_hyper_overlap <- function(sample1, sample2, n, stepsize) {
row_ids <- seq.int(stepsize, length(sample1), by = stepsize)
col_ids <- seq.int(stepsize, length(sample2), by = stepsize)
indexes <- expand.grid(
row_ids = row_ids,
col_ids = col_ids
)
overlaps <- apply(as.matrix(indexes), 1L, function(x) {
hyper_test(
sample1[seq_len(x[["row_ids"]])],
sample2[seq_len(x[["col_ids"]])],
n = n
)
}, simplify = FALSE)
overlaps <- data.table::transpose(overlaps)
number_of_obj <- length(row_ids)
matrix_counts <- matrix(
overlaps[[1L]],
nrow = number_of_obj
)
matrix_pvals <- matrix(
overlaps[[2L]],
nrow = number_of_obj
)
matrix_signs <- matrix(
overlaps[[3L]],
nrow = number_of_obj
)
list(
counts = matrix_counts,
pvalue = matrix_pvals,
signs = matrix_signs
)
}
The Rcpp code I use is here:
// [[Rcpp::export]]
List calculate_hyper_overlap_cpp(CharacterVector sample1, CharacterVector sample2, int n, int stepsize)
{
int list1_len = floor((sample1.size() - stepsize) / stepsize) + 1;
int list2_len = floor((sample2.size() - stepsize) / stepsize) + 1;
IntegerMatrix counts(list1_len, list2_len);
NumericMatrix pvalue(list1_len, list2_len);
IntegerMatrix signs(list1_len, list2_len);
for (int i = 0; i < list1_len; i++)
{
for (int j = 0; j < list2_len; j++)
{
CharacterVector list1 = sample1[Range(0, (i + 1) * stepsize - 1)];
CharacterVector list2 = sample2[Range(0, (j + 1) * stepsize - 1)];
int count = intersect(list1, list2).size();
counts(i, j) = count;
int m = list1.size(), k = list2.size();
if (count <= m * k / n)
// under-enrichment
{
pvalue(i, j) = R::phyper(count, m, n - m, k, true, false);
signs(i, j) = -1;
}
else
// over-enrichment
{
pvalue(i, j) = R::phyper(count, m, n - m, k, false, false);
signs(i, j) = 1;
}
}
}
return List::create(Named("counts") = counts,
Named("pvalue") = pvalue,
Named("signs") = signs);
}
here is the test:
n <- 200
sample1 <- rnorm(n)
sample2 <- rnorm(n)
names(sample1) <- names(sample2) <- paste0("gene", seq_len(n))
bench_res <- bench::mark(
res1 <- calculate_hyper_overlap_cpp(
names(sample1), names(sample2),
n = n, stepsize = 3L
),
res2 <- calculate_hyper_overlap(
names(sample1), names(sample2),
n = n, stepsize = 3L
),
check = FALSE
)
dplyr::select(bench_res, where(~ !is.list(.x)))
The test results
The first line is the time by Rcpp code and the second by raw R code
set.seed(650)
library(maxLik)
y = c(rnorm(15,1,1), rnorm(15,3,1))
dat = data.frame(y)
B = 3 # number bootstrap sample
n = length(dat$y)
n1 = 15
boot.samples = matrix(sample(dat$y, size = B * n, replace = TRUE), n, B)
ml = list()
boot.l = 0
va.l = NULL
for (j in 1:B) {
boot.l = boot.samples[, j]
for (i in 1:n) {
LLl <- function(param) {
mul <- param[1]
sigmal <- param[2]
sum(log(dnorm(dat[1:i, ], mul, sigmal)))
}
ml[[i]] = coef(maxLik(logLik = LLl, start = c(mul = 1, sigmal = 1)))
}
va.l = matrix(unlist(ml), n-1, B*2, byrow = TRUE)
}
va.l
The following are my output
However, when I print the list I have the following output.
My question is how can I have mul estimates for j=1 in the 1st column, sigmal estimates for j = 1 in the second column and mul estimates for j=2 in the 3rd column, sigmal estimates for j = 2 in the 4th column and so on?
Are there any other way do this? Thank you for your help.
I have a problem with results of loop in loop function. It counts inside loop only once and choose the best solution for the first raw and then stop.
I would like to remember the best solution for every row of the matrix zmienne. What am I doing wrong?
schaffer <- function(xx)
{x1 <- xx[1]
x2 <- xx[2]
fact1 <- (sin(x1^2-x2^2))^2 - 0.5
fact2 <- (1 + 0.001*(x1^2+x2^2))^2
y <- 0.5 + fact1/fact2
return(y)
}
gradient_descent <- function(func, step, niter) {
N <- 3 #N- number of random points
zmienne <- matrix(runif(N*2, min = -100, max = 100), N, 2)
print(zmienne)
h = 0.001;
iter_count = 0;
for (i in 1:N) {
x_0 <- zmienne[i,]
x_n = x_0;
for (j in 1:niter) {
func_grad = (func(x_n+h) - func(x_n))/h;
if (abs(func_grad) < 0.0001) { break; }
x_n = x_n - step * func_grad;
print(x_n)
iter_count = iter_count + 1
}
}
return(list(iterations = niter, best_value = func_grad, best_state = x_n, x0=x_0))
}
solution_m1 <- gradient_descent(schaffer, 0.1, 20)
solution_m1
I think this is what you want:
gradient_descent <- function(func, step, niter) {
N <- 3 #N- number of random points
zmienne <- matrix(runif(N*2, min = -100, max = 100), N, 2)
print(zmienne)
h = 0.001;
iter_count = 0;
best.vals <- NULL
for (i in 1:N) {
x_0 <- zmienne[i,]
x_n = x_0;
for (j in 1:niter) {
func_grad = (func(x_n+h) - func(x_n))/h;
if (abs(func_grad) < 0.0001) { break; }
x_n = x_n - step * func_grad;
print(x_n)
iter_count = iter_count + 1
}
best.vals <- c(best.vals, func_grad)
}
return(list(iterations = iter_count, best_value = best.vals, best_state = x_n, x0=x_0))
}
solution_m1 <- gradient_descent(schaffer, 0.1, 20)
solution_m1
The return should not be inside the inside loop but at then end of the function.
I am trying to develop a method for variable selection using genetic algorithm and partial least squares in R. For this purpose I am using the package GA and the package plsdepot. However there seems to be some error in my code which I am unable to identify.I am using the GA package for the first time and very new to the ideas of genetic algorithm and partial least squares regression. Any help will be greatly appreciated
library(plsdepot)
library(GA)
mod <- plsreg1(Data[, 3:137], Data[, 2, drop = FALSE], comps = 100,crosval=TRUE)
x <- Data[,3:137]
y <- Data[,2]
fitness <- function(string) {
inc <- which(string == 1)
X <- cbind(1, x[,inc])
model <- plsreg1(y, X, comps =2,crosval=TRUE)
q2list<-model$Q2[3]
best_comp<-which(q2list==max(q2list))
model_final<-plsreg1(y, X, comps =best_comp,crosval=TRUE)
q2list_final<-model_final$Q2[3]
max(q2list_final)
}
GA <- ga("binary", fitness = fitness, nBits = ncol(x),names = colnames(x), monitor = plot)
When the code is run i am getting the following error in console
> GA <- ga("binary", fitness = fitness, nBits = ncol(x),names = colnames(x), monitor = plot)
Error in plsreg1(y, X, comps = 2, crosval = TRUE) :
predictors must contain more than one column
Called from: fitness(Pop[i, ], ...)
Browse[1]> Q
on checking if x is empty or not
> ncol(x)
[1] 135
In terms of errors I think the main problem seems to be the X matrix is staying blank as its somehow unable to read the x matrix.
When I am running the GA code source viewer is opening up and this is what its showing
function (predictors, response, comps = 2, crosval = TRUE)
{
X = as.matrix(predictors)
n = nrow(X)
p = ncol(X)
if (p < 2)
stop("\npredictors must contain more than one column") 'This is where the error is happening as p is < 2'
if (is.null(colnames(X)))
colnames(X) = paste(rep("X", p), 1:p, sep = "")
if (is.null(rownames(X)))
rownames(X) = 1:n
Y = as.matrix(response)
if (ncol(Y) != 1)
stop("\nresponse must be a single variable")
if (any(is.na(response)))
stop("\nresponse must not contain missing values")
if (nrow(X) != nrow(Y))
stop("\npredictors and response have different number of rows")
if (is.null(colnames(Y)))
colnames(Y) = "Y"
if (is.null(rownames(Y)))
rownames(Y) = 1:n
if (any(is.na(X)))
na.miss = TRUE
else na.miss = FALSE
if (!is.null(comps)) {
nc = comps
if (mode(nc) != "numeric" || length(nc) != 1 || nc <=
1 || (nc%%1) != 0 || nc > min(n, p))
nc = min(n, p)
if (nc == n)
nc = n - 1
}
else {
if (na.miss) {
crosval = FALSE
nc = 2
}
else {
if (n >= 10)
crosval = TRUE
else crosval = FALSE
nc = min(n, p)
}
}
if (!is.logical(crosval))
crosval = FALSE
Xx = scale(X)
Yy = scale(Y)
X.old = Xx
Y.old = Yy
Th = matrix(NA, n, nc)
Ph = matrix(NA, p, nc)
Wh = matrix(NA, p, nc)
Uh = matrix(NA, n, nc)
ch = rep(NA, nc)
Hot = matrix(NA, n, nc)
hlim = rep(NA, nc)
if (crosval) {
RSS = c(n - 1, rep(NA, nc))
PRESS = rep(NA, nc)
Q2 = rep(NA, nc)
sets_size = c(rep(n%/%10, 9), n - 9 * (n%/%10))
obs = sample(1:n, size = n)
segments = vector("list", length = 10)
ini = cumsum(sets_size) - sets_size + 1
fin = cumsum(sets_size)
for (k in 1:10) segments[[k]] = obs[ini[k]:fin[k]]
}
w.old = rep(1, p)
t.new = rep(1, n)
p.new = rep(NA, p)
h = 1
repeat {
if (na.miss) {
for (j in 1:p) {
i.exist = which(complete.cases(X[, j]))
w.old[j] = sum(X.old[i.exist, j] * Y.old[i.exist])
}
w.new = w.old/sqrt(sum(w.old^2))
for (i in 1:n) {
j.exist = which(complete.cases(X[i, ]))
t.new[i] = sum(X.old[i, j.exist] * w.new[j.exist])
}
for (j in 1:p) {
i.exist = intersect(which(complete.cases(X[,
j])), which(complete.cases(t.new)))
p.new[j] = sum(X.old[i.exist, j] * t.new[i.exist])/sum(t.new[i.exist]^2)
}
c.new = t(Y.old) %*% t.new/sum(t.new^2)
u.new = Y.old/as.vector(c.new)
}
if (!na.miss) {
w.old = t(X.old) %*% Y.old/sum(Y.old^2)
w.new = w.old/sqrt(sum(w.old^2))
t.new = X.old %*% w.new
p.new = t(X.old) %*% t.new/sum(t.new^2)
c.new = t(Y.old) %*% t.new/sum(t.new^2)
u.new = Y.old/as.vector(c.new)
if (crosval) {
RSS[h + 1] = sum((Y.old - t.new %*% c.new)^2)
press = rep(0, 10)
for (i in 1:10) {
aux = segments[[i]]
Xy.aux = t(X.old[-aux, ]) %*% Y.old[-aux]
wh.si = Xy.aux %*% sqrt(solve(t(Xy.aux) %*%
Xy.aux))
th.si = X.old[-aux, ] %*% wh.si
ch.si = t(Y.old[-aux]) %*% th.si %*% solve(t(th.si) %*%
th.si)
ch.si = as.vector(ch.si)
Yhat.si = ch.si * X.old[aux, ] %*% wh.si
press[i] = sum((Y.old[aux] - Yhat.si)^2)
}
PRESS[h] = sum(press)
Q2[h] = 1 - PRESS[h]/RSS[h]
}
}
Y.old = Y.old - (t.new %*% c.new)
X.old = X.old - (t.new %*% t(p.new))
Th[, h] = t.new
Ph[, h] = p.new
Wh[, h] = w.new
Uh[, h] = u.new
ch[h] = c.new
Hot[, h] = (n/(n - 1)) * t.new^2/(sum(t.new^2)/(n - 1))
hlim[h] = qf(0.95, h, n - h) * (h * (n^2 - 1))/(n * (n -
h))
if (is.null(comps) && crosval) {
if (Q2[h] < 0.0975 || h == nc)
break
}
else {
if (h == nc)
break
}
h = h + 1
}
if (crosval) {
q2cum = rep(NA, h)
for (k in 1:h) q2cum[k] = prod(PRESS[1:k])/prod(RSS[1:k])
Q2cum = 1 - q2cum
Q2cv = cbind(PRESS[1:h], RSS[1:h], Q2[1:h], rep(0.0975,
h), Q2cum)
dimnames(Q2cv) = list(1:h, c("PRESS", "RSS", "Q2", "LimQ2",
"Q2cum"))
if (is.null(comps))
h = h - 1
}
if (!crosval)
Q2cv = NULL
Th = Th[, 1:h]
Ph = Ph[, 1:h]
Wh = Wh[, 1:h]
Uh = Uh[, 1:h]
ch = ch[1:h]
Ws = Wh %*% solve(t(Ph) %*% Wh)
Bs = as.vector(Ws %*% ch)
if (!na.miss) {
Br = Bs * (rep(apply(Y, 2, sd), p)/apply(X, 2, sd))
cte = as.vector(colMeans(Y) - Br %*% apply(X, 2, mean))
y.hat = as.vector(X %*% Br + cte)
cor.xyt = cor(cbind(Xx, y = Yy), Th)
}
else {
mu.x <- attributes(Xx)$"scaled:center"
sd.x <- attributes(Xx)$"scaled:scale"
X.hat = Th %*% t(Ph) %*% diag(sd.x, p, p) + matrix(rep(mu.x,
each = n), n, p)
Br = Bs * (rep(apply(Y, 2, sd), p)/sd.x)
cte = as.vector(colMeans(response) - Br %*% mu.x)
y.hat = as.vector(X.hat %*% Br + cte)
cor.xyt = matrix(NA, p + 1, h)
for (j in 1:p) {
i.exist <- which(complete.cases(X[, j]))
cor.xyt[j, ] = cor(Xx[i.exist, j], Th[i.exist, ])
}
cor.xyt[p + 1, ] = cor(Yy, Th)
}
resid = as.vector(Y - y.hat)
R2 = as.vector(cor(Th, Yy))^2
R2Xy = t(apply(cor.xyt^2, 1, cumsum))
T2hot = rbind(hlim[1:h], t(apply(Hot[, 1:h], 1, cumsum)))
dimnames(Wh) = list(colnames(X), paste(rep("w", h), 1:h,
sep = ""))
dimnames(Ws) = list(colnames(X), paste(rep("w*", h), 1:h,
sep = ""))
dimnames(Th) = list(rownames(X), paste(rep("t", h), 1:h,
sep = ""))
dimnames(Ph) = list(colnames(X), paste(rep("p", h), 1:h,
sep = ""))
dimnames(Uh) = list(rownames(Y), paste(rep("u", h), 1:h,
sep = ""))
names(ch) = paste(rep("c", h), 1:h, sep = "")
dimnames(T2hot) = list(c("T2", rownames(X)), paste(rep("H",
h), 1:h, sep = ""))
names(Bs) = colnames(X)
names(Br) = colnames(X)
names(resid) = rownames(Y)
names(y.hat) = rownames(Y)
names(R2) = paste(rep("t", h), 1:h, sep = "")
colnames(R2Xy) = paste(rep("t", h), 1:h, sep = "")
dimnames(cor.xyt) = list(c(colnames(X), colnames(Y)), colnames(Th))
res = list(x.scores = Th, x.loads = Ph, y.scores = Uh, y.loads = ch,
cor.xyt = cor.xyt, raw.wgs = Wh, mod.wgs = Ws, std.coefs = Bs,
reg.coefs = c(Intercept = cte, Br), R2 = R2, R2Xy = R2Xy,
y.pred = y.hat, resid = resid, T2 = T2hot, Q2 = Q2cv,
y = response)
class(res) = "plsreg1"
return(res)
}
I used Rglpk to solve a linear programming problem, but its results seems weird. I changed it to lpSolve, and the two results are different.
Please comment the Rglpk and uncomment lpSolve statements to change the solver to lpSolve.
# Lo, S.-F., & Lu, W.-M. (2009). An integrated performance evaluation of financial holding companies in Taiwan.
# European Journal of Operational Research, 198(1), 341–350. doi:10.1016/j.ejor.2008.09.006
sbm = function(X,Y)
{
# Here X is N * m matrix, Y is N*s matrix.
library(Rglpk)
# require(lpSolve)
N = nrow(X)
m = ncol(X)
s = ncol(Y)
# variables are
# t
# gamma_j,j=1..N
# s_i^(-),i=1..m
# s_r^(+),r=1..s
efficiency = numeric(N)
max_positive_y = apply(Y[,1:s], MARGIN = 2, function(x) max(x[x>0]))
min_positive_y = apply(Y[,1:s], MARGIN = 2, function(x) min(x[x>0]))
dir = rep("==",1+m+s+1)
rhs = c(1,rep(0,m),rep(0,s),0)
for(i in 1:N)
{
x = X[i,]
y = Y[i,]
#variables
coef_t = 1
coef_gamma = rep(0,N)
coef_s_i = -1/(m * x)
coef_s_r = rep(0,s)
obj = c(coef_t,coef_gamma,coef_s_i,coef_s_r)
coef_constraint1_s=y
for(r in 1:s)
{
if(y[r]<0){
coef_constraint1_s[r] =
min_positive_y[r] * (max_positive_y[r] - min_positive_y[r])/
(max_positive_y[r] - y[r])
}
}
constraint1 = c(1, rep(0,N), rep(0,m) , 1/(s*coef_constraint1_s))
constraint2 = cbind(-x, t(X), diag(m), matrix(0,m,s))
constraint3 = cbind(-y, t(Y), matrix(0,s,m), -diag(s))
constraint4 = c(-1, rep(1,N), rep(0,m), rep(0,s))
mat = rbind(constraint1,constraint2,constraint3,constraint4)
results = Rglpk_solve_LP(obj = obj,mat = mat,dir = dir,rhs = rhs,max = FALSE)
efficiency[i] = results$optimum
# results <- lp("min", obj, mat, dir, rhs)
# efficiency[i] = results$objval
}
efficiency
}