Generate a sequence by formula with R - r

I want to generate a sequence:
X_n= |X_{n-1} - epsilon_n|,
where epsilon_n has an exponential distribution.
For example
epsilon <- rexp(100, rate = 0.3)

Use Reduce:
X0 <- 10
set.seed(42)
epsilon<-rexp(100, rate = 0.3)
eps <- c(X0, epsilon)
X <- Reduce(function(x, y) abs(x-y), eps, accumulate = TRUE)
plot(X)

## n is length of the sequence, X0 is initial value,
## default exponential rate is 0.3
xSeq <- function(n,X0,rate=0.3){
vOut <- rep(0,n)
vOut[1] <- X0
eSeq <- rexp(n-1,rate)
for(i in 2:n){
vOut[i] <- abs(vOut[i-1]-eSeq[i-1])
vOut
}
return(vOut)
}

Related

How to fit it `Error in hist.default(res) : 'x' must be numeric`?

Following this question: How to get the value of `t` so that my function `h(t)=epsilon` for a fixed `epsilon`?
I first sampling 500 eigenvectors v of a random matrix G and then generate 100 different random vectors initial of dimension 500. I normalized them in mats.
#make this example reproducible
set.seed(100001)
n <- 500
#Sample GOE random matrix
A <- matrix(rnorm(n*n, mean=0, sd=1), n, n)
G <- (A + t(A))/sqrt(2*n)
ev <- eigen(G)
l <- ev$values
v <- ev$vectors
#size of multivariate distribution
mean <- rep(0, n)
var <- diag(n)
#simulate bivariate normal distribution
initial <- MASS::mvrnorm(n=1000, mu=mean, Sigma=var) #ten random vectors
#normalized the first possible initial value, the initial data uniformly distributed on the sphere
xmats <- lapply(1:1000, function(i) initial[i, ]/norm(initial[i, ], type="2"))
Then I compute res
h1t <- function(t,x_0) {
h10 <- c(x_0 %*% v[, n])
denom <- vapply(t, function(.t) {
sum((x_0 %*% v)^2 * exp(-4*(l - l[n]) * .t))
}, numeric(1L))
abs(h10) / sqrt(denom)
}
find_t <- function(x, epsilon = 0.01, range = c(-50, 50)) {
uniroot(function(t) h1t(t, x) - epsilon, range,
tol = .Machine$double.eps)$root
}
I want to get res:
res <- lapply(xmats, find_t)
However, it shows error that Error in uniroot(function(t) h1t(t, x) - epsilon, range, tol = .Machine$double.eps) : f() values at end points not of opposite sign
res is a list. I run hist(unlist(res)) and it worked well.

Cannot make sense of the error while using OptimParallel in R

I'm trying to run the following function mentioned below using OptimParallel in R on a certain data set. The code is as follows:
install.packages("optimParallel")
install.packages('parallel')
library(parallel)
library(optimParallel)
library(doParallel)
library(data.table)
library(Rlab)
library(HDInterval)
library(mvtnorm)
library(matrixStats)
library(dplyr)
library(cold)
## Bolus data:
data("bolus")
d1 <- bolus
d1$group <- ifelse(d1$group == "2mg",1,0)
colnames(d1) <- c("index",'group',"time","y")
d2 <- d1 %>% select(index, y, group, time)
colnames(d2) <- c('index','y','x1','x2') ### Final data
## Modification of the objective function:
## Another approach:
dpd_poi <- function(x,fixed = c(rep(FALSE,5))){
params <- fixed
dpd_1 <- function(p){
params[!fixed] <- p
alpha <- params[1]
beta_0 <- params[2]
beta_1 <- params[3]
beta_2 <- params[4]
rho <- params[5]
add_pi <- function(d){
k <- beta_0+(d[3]*beta_1)+(d[4]*beta_2)
k1 <- exp(k) ## for Poisson regression
d <- cbind(d,k1)
}
dat_split <- split(x , f = x$index)
result <- lapply(dat_split, add_pi)
result <- rbindlist(result)
result <- as.data.frame(result)
colnames(result) <- c('index','y','x1','x2','lamb')
result_split <- split(result, f = result$index)
expression <- function(d){
bin <- as.data.frame(combn(d$y , 2))
pr <- as.data.frame(combn(d$lamb , 2))
## Evaluation of the probabilities:
f_jk <- function(u,v){
dummy_func <- function(x,y){
ppois(x, lambda = y)
}
dummy_func_1 <- function(x,y){
ppois(x-1, lambda = y)
}
k <- mapply(dummy_func,u,v)
k_1 <- mapply(dummy_func_1,u,v)
inv1 <- inverseCDF(as.matrix(k), pnorm)
inv2 <- inverseCDF(as.matrix(k_1), pnorm)
mean <- rep(0,2)
lower <- inv2
upper <- inv1
corr <- diag(2)
corr[lower.tri(corr)] <- rho
corr[upper.tri(corr)] <- rho
prob <- pmvnorm(lower = lower, upper = upper, mean = mean, corr = corr)
prob <- (1+(1/alpha))*(prob^alpha)
## First expression: (changes for Poisson regression)
lam <- as.vector(t(v))
v1 <- rpois(1000, lambda = lam[1])
v2 <- rpois(1000, lambda = lam[2])
all_possib <- as.data.frame(rbind(v1,v2))
new_func <- function(u){
k <- mapply(dummy_func,u,v)
k_1 <- mapply(dummy_func_1,u,v)
inv1_1 <- inverseCDF(as.matrix(k), pnorm)
inv2_1 <- inverseCDF(as.matrix(k_1), pnorm)
mean1 <- rep(0,2)
lower1 <- inv2_1
upper1 <- inv1_1
corr1 <- diag(2)
corr1[lower.tri(corr1)] <- rho
corr1[upper.tri(corr1)] <- rho
prob1 <- pmvnorm(lower = lower1, upper = upper1, mean = mean1, corr = corr1)
prob1 <- prob1^(alpha)
}
val <- apply(all_possib, 2, new_func)
val_s <- mean(val) ## approximation
return(val_s - prob)
}
final_res <- mapply(f_jk, bin, pr)
final_value <- sum(final_res)
}
u <- sapply(result_split,expression)
return(sum(u))
}
}
## run the objective function:
cl <- makeCluster(25)
setDefaultCluster(cl=cl)
clusterExport(cl,c('d2','val'))
clusterEvalQ(cl,c(library(data.table), library(Rlab),library(HDInterval),library(mvtnorm),library(matrixStats),library(dplyr),library(cold)))
val <- dpd_poi(d2, c(0.5,FALSE,FALSE,FALSE,FALSE))
optimParallel(par = c(beta_0 =1, beta_1 =0.1 ,beta_2 = 1,rho=0.2),fn = val ,method = "L-BFGS-B",lower = c(-10,-10,-10,0),upper = c(Inf,Inf,Inf,1))
stopCluster(cl)
After running for some time, it returns the following error:
checkForRemoteErrors(val)
9 nodes produced errors; first error: missing value where TRUE/FALSE needed
However, when I make a minor change in the objective function (pick 2 random numbers from rpois instead of 1000) and run the same code using optim, it converges and gives me a proper result. This is a Monte Carlo simulation and it does not make sense to draw so few Poisson variables. I have to use optimParllel, otherwise, it takes way too long to converge. I could also run this code using simulated data.
I'm unable to figure out where the issue truly lies. I truly appreciate any help in this regard.

coding gradient descent in R

I am trying to code gradient descent in R. The goal is to collect a data frame of each estimate so I can plot the algorithm's search through the parameter space.
I am using the built-in dataset data(cars) in R. Unfortunately something is way off in my function. The estimates just increase linearly with each iteration! But I cannot figure out where I err.
Any tips?
Code:
GradientDescent <- function(b0_start, b1_start, x, y, niter=10, alpha=0.1) {
# initialize
gradient_b0 = 0
gradient_b1 = 0
x <- as.matrix(x)
y <- as.matrix(y)
N = length(y)
results <- matrix(nrow=niter, ncol=2)
# gradient
for(i in 1:N){
gradient_b0 <- gradient_b0 + (-2/N) * (y[i] - (b0_start + b1_start*x[i]))
gradient_b1 <- gradient_b1 + (-2/N) * x[i] * (y[i] - (b0_start + b1_start*x[i]))
}
# descent
b0_hat <- b0_start
b1_hat <- b1_start
for(i in 1:niter){
b0_hat <- b0_hat - (alpha*gradient_b0)
b1_hat <- b1_hat - (alpha*gradient_b1)
# collect
results[i,] <- c(b0_hat,b1_hat)
}
# return
df <- data.frame(results)
colnames(df) <- c("b0", "b1")
return(df)
}
> test <- GradientDescent(0,0,cars$speed, cars$dist, niter=1000)
> head(test,2); tail(test,2)
b0 b1
1 8.596 153.928
2 17.192 307.856
b0 b1
999 8587.404 153774.1
1000 8596.000 153928.0
Here is a solution for cars dataset:
# dependent and independent variables
y <- cars$dist
x <- cars$speed
# number of iterations
iter_n <- 100
# initial value of the parameter
theta1 <- 0
# learning rate
alpha <- 0.001
m <- nrow(cars)
yhat <- theta1*x
# a tibble to record the parameter update and cost
library(tibble)
results <- data_frame(theta1 = as.numeric(),
cost = NA,
iteration = 1)
# run the gradient descent
for (i in 1:iter_n){
theta1 <- theta1 - alpha * ((1 / m) * (sum((yhat - y) * x)))
yhat <- theta1*x
cost <- (1/m)*sum((yhat-y)^2)
results[i, 1] = theta1
results[i, 2] <- cost
results[i, 3] <- i
}
# print the parameter value after the defined iteration
print(theta1)
# 2.909132
Checking whether cost is decreasing:
library(ggplot2)
ggplot(results, aes(x = iteration, y = cost))+
geom_line()+
geom_point()
I wrote a more detailed blog post here.

Stochastic gradient descent from gradient descent implementation in R

I have a working implementation of multivariable linear regression using gradient descent in R. I'd like to see if I can use what I have to run a stochastic gradient descent. I'm not sure if this is really inefficient or not. For example, for each value of α I want to perform 500 SGD iterations and be able to specify the number of randomly picked samples in each iteration. It would be nice to do this so I could see how the number of samples influences the results. I'm having trouble through with the mini-batching and I want to be able to easily plot the results.
This is what I have so far:
# Read and process the datasets
# download the files from GitHub
download.file("https://raw.githubusercontent.com/dbouquin/IS_605/master/sgd_ex_data/ex3x.dat", "ex3x.dat", method="curl")
x <- read.table('ex3x.dat')
# we can standardize the x vaules using scale()
x <- scale(x)
download.file("https://raw.githubusercontent.com/dbouquin/IS_605/master/sgd_ex_data/ex3y.dat", "ex3y.dat", method="curl")
y <- read.table('ex3y.dat')
# combine the datasets
data3 <- cbind(x,y)
colnames(data3) <- c("area_sqft", "bedrooms","price")
str(data3)
head(data3)
################ Regular Gradient Descent
# http://www.r-bloggers.com/linear-regression-by-gradient-descent/
# vector populated with 1s for the intercept coefficient
x1 <- rep(1, length(data3$area_sqft))
# appends to dfs
# create x-matrix of independent variables
x <- as.matrix(cbind(x1,x))
# create y-matrix of dependent variables
y <- as.matrix(y)
L <- length(y)
# cost gradient function: independent variables and values of thetas
cost <- function(x,y,theta){
gradient <- (1/L)* (t(x) %*% ((x%*%t(theta)) - y))
return(t(gradient))
}
# GD simultaneous update algorithm
# https://www.coursera.org/learn/machine-learning/lecture/8SpIM/gradient-descent
GD <- function(x, alpha){
theta <- matrix(c(0,0,0), nrow=1)
for (i in 1:500) {
theta <- theta - alpha*cost(x,y,theta)
theta_r <- rbind(theta_r,theta)
}
return(theta_r)
}
# gradient descent α = (0.001, 0.01, 0.1, 1.0) - defined for 500 iterations
alphas <- c(0.001,0.01,0.1,1.0)
# Plot price, area in square feet, and the number of bedrooms
# create empty vector theta_r
theta_r<-c()
for(i in 1:length(alphas)) {
result <- GD(x, alphas[i])
# red = price
# blue = sq ft
# green = bedrooms
plot(result[,1],ylim=c(min(result),max(result)),col="#CC6666",ylab="Value",lwd=0.35,
xlab=paste("alpha=", alphas[i]),xaxt="n") #suppress auto x-axis title
lines(result[,2],type="b",col="#0072B2",lwd=0.35)
lines(result[,3],type="b",col="#66CC99",lwd=0.35)
}
Is it more practical to find a way to use sgd()? I can't seem to figure out how to have the level of control I'm looking for with the sgd package
Sticking with what you have now
## all of this is the same
download.file("https://raw.githubusercontent.com/dbouquin/IS_605/master/sgd_ex_data/ex3x.dat", "ex3x.dat", method="curl")
x <- read.table('ex3x.dat')
x <- scale(x)
download.file("https://raw.githubusercontent.com/dbouquin/IS_605/master/sgd_ex_data/ex3y.dat", "ex3y.dat", method="curl")
y <- read.table('ex3y.dat')
data3 <- cbind(x,y)
colnames(data3) <- c("area_sqft", "bedrooms","price")
x1 <- rep(1, length(data3$area_sqft))
x <- as.matrix(cbind(x1,x))
y <- as.matrix(y)
L <- length(y)
cost <- function(x,y,theta){
gradient <- (1/L)* (t(x) %*% ((x%*%t(theta)) - y))
return(t(gradient))
}
I added y to your GD function and created a wrapper function, myGoD, to call yours but first subsetting the data
GD <- function(x, y, alpha){
theta <- matrix(c(0,0,0), nrow=1)
theta_r <- NULL
for (i in 1:500) {
theta <- theta - alpha*cost(x,y,theta)
theta_r <- rbind(theta_r,theta)
}
return(theta_r)
}
myGoD <- function(x, y, alpha, n = nrow(x)) {
idx <- sample(nrow(x), n)
y <- y[idx, , drop = FALSE]
x <- x[idx, , drop = FALSE]
GD(x, y, alpha)
}
Check to make sure it works and try with different Ns
all.equal(GD(x, y, 0.001), myGoD(x, y, 0.001))
# [1] TRUE
set.seed(1)
head(myGoD(x, y, 0.001, n = 20), 2)
# x1 V1 V2
# V1 147.5978 82.54083 29.26000
# V1 295.1282 165.00924 58.48424
set.seed(1)
head(myGoD(x, y, 0.001, n = 40), 2)
# x1 V1 V2
# V1 290.6041 95.30257 59.66994
# V1 580.9537 190.49142 119.23446
Here is how you can use it
alphas <- c(0.001,0.01,0.1,1.0)
ns <- c(47, 40, 30, 20, 10)
par(mfrow = n2mfrow(length(alphas)))
for(i in 1:length(alphas)) {
# result <- myGoD(x, y, alphas[i]) ## original
result <- myGoD(x, y, alphas[i], ns[i])
# red = price
# blue = sq ft
# green = bedrooms
plot(result[,1],ylim=c(min(result),max(result)),col="#CC6666",ylab="Value",lwd=0.35,
xlab=paste("alpha=", alphas[i]),xaxt="n") #suppress auto x-axis title
lines(result[,2],type="b",col="#0072B2",lwd=0.35)
lines(result[,3],type="b",col="#66CC99",lwd=0.35)
}
You don't need the wrapper function--you can just change your GD slightly. It is always good practice to explicitly pass arguments to your functions rather than relying on scoping. Before you were assuming that y would be pulled from your global environment; here y must be given or you will get an error. This will avoid many headaches and mistakes down the road.
GD <- function(x, y, alpha, n = nrow(x)){
idx <- sample(nrow(x), n)
y <- y[idx, , drop = FALSE]
x <- x[idx, , drop = FALSE]
theta <- matrix(c(0,0,0), nrow=1)
theta_r <- NULL
for (i in 1:500) {
theta <- theta - alpha*cost(x,y,theta)
theta_r <- rbind(theta_r,theta)
}
return(theta_r)
}

Speeding up matrix calculations

I have this matrix calculations in my code that are taking a long time to run. So far the only way I can think of to speed is up is to use a foreach instead of a for loop, but I feel like there's more that can be done. Is there some way of vectorizing things or using an alternative to for loop that I'm missing out on?
Thanks!
require(foreach)
require(mvtnorm)
# some dummy input values
omega.input.jP <- matrix(rnorm(3000*5, 0.1, 0.1), 3000, 5)
nsteps.obs <- ncol(omega.input.jP)
sigma.j <- rnorm(3000, 0.02, 0.05)
rho1.j <- rnorm(3000, 0.8, 0.1)
rho2.j <- rnorm(3000, 0.05, 0.1)
y.lastobs <- 0.3
mu.input.jP <- matrix(NA, nrow(omega.input.jP), ncol(omega.input.jP))
# note: j is an index denoting sample number (here there are 3000 samples in total, and P denotes the time step (5 time steps here)
mu.input.jP <- foreach (j = 1:nrow(mu.input.jP), .combine = "rbind") %do% {
omega <- omega.input.jP[j, ]
Sigma.mu <- GetSigmaMu(nsteps = nsteps.obs, sigma_ar = sigma.j[j], rho1 = rho1.j[j], rho2 = rho2.j[j])
mu.input.P <- GetConditionalMu(omega = omega, Sigma.mu = Sigma.mu, y = y.lastobs)
return(mu.input.P)
}
GetSigmaMu <- function( # Get Sigma.mu, a \code{nsteps} x \code{nsteps} matrix, for AR(2) process
nsteps,
sigma_ar,
rho1,
rho2
) {
rho <- c(rho1, rho2)
cor <- ARMAacf(ar = rho, pacf = FALSE, lag.max = nsteps) # phi's, first element is phi0 = 1
var <- sigma_ar^2/(1 - sum(rho*cor[2:3])) # stationary variance # cor[2:3] gives first two phi's; cor[1] gives phi0 = 1 # change JR, 20140304
cov <- cor*var
Sigma.mu <- matrix(NA, nsteps, nsteps)
for (i in 1:nsteps) {
for (k in 1:nsteps) {
Sigma.mu[i,k] <- cov[abs(i-k)+1]
}
}
return(Sigma.mu)
}
GetConditionalMu <- function( # Get values of mu given y
omega,
Sigma.mu,
y,
method = "svd" # Method to get eigenvalues in matrix. Default method does not work, "svd" used instead.
) {
nsteps <- length(omega)
one <- rep(1, nsteps)
mean.mu.cond <- c(omega + (1/(sum(Sigma.mu)))*(Sigma.mu %*% one)*c(nsteps*y - t(one) %*% omega))
Sigma.mu.cond <- Sigma.mu - (1/(sum(Sigma.mu)))*(Sigma.mu %*% one %*% t(one) %*% Sigma.mu)
mu.cond <- rmvnorm(1, mean.mu.cond, Sigma.mu.cond, method = method)
return(mu.cond)
}

Resources