Can't Convert `for` Loop to `apply` Function - r

I've converted for loops I've found in the past into apply functions e.g.:
y <- 6:10
z <- 1:5
for(i in 1:length(y)){
z[i] <- y[i] * y[i]^2
}
z
z <- sapply(X = 1:length(y), FUN = function(i){
y[i] * y[i]^2
})
z
But the following loop has been giving me issues today:
lambda <- 2.9
n <- 20
z <- 1:n
x <- 0.02
z[1] <- x
for(i in 1:(n - 1)){
z[i + 1] <- lambda * z[i] * (1 - z[i])
}
z
z <- 1:n
z[1] <- x
sapply(X = 1:(n - 1), FUN = function(i){
lambda * z[i] * (1 - z[i])
})
Does anyone see the bug?

Because you need result from previous step (eg. z[i+1]=f(z[i] ), you can not use sapply which results are available at the end of all steps.
For this you can use accumulate from package purrr:
purrr::accumulate(1:(n-1), ~(lambda * . * (1 - .)), .init=x)
[1] 0.0200000 0.0568400 0.1554667 0.3807608 0.6837678 0.6270652 0.6781778 0.6329327
[9] 0.6737538 0.6374479 0.6702134 0.6409794 0.6673619 0.6437710 0.6650567 0.6459932
[17] 0.6631894 0.6477708 0.6616750 0.6491974
where ~ defines a function taking 2 parameters (. and .y), . being reinjected from previous step and .y is the next step value
To better understand try:
purrr::accumulate(1:n, ~.)
purrr::accumulate(1:n, ~.y)

Related

append list for loop

I cannot figure out what i'm doing wrong with appending the results of a loop (as a tibble) into a list.
Below is code. I believe it has to do with the d in distance and the figures not being positive?
Error in datalist[[d]] <- dat :
attempt to select less than one element in integerOneIndex
library(tidyverse)
x <- c(10,5)
y <- c(1,3)
distance <- c(1,2,3) # distance away from the road
old<-data.frame(x,y)
datalist = list()
datalist2 = list()
for (d in 1: length(distance)) {
# Given a vector (defined by 2 points) and the distance,
# calculate a new vector that is distance away from the original
segment.shift <- function(x, y, d){
# calculate vector
v <- c(x[2] - x[1],y[2] - y[1])
# normalize vector
v <- v/sqrt((v[1]**2 + v[2]**2))
# perpendicular unit vector
vnp <- c( -v[2], v[1] )
return(list(x = c( x[1] + d*vnp[1], x[2] + d*vnp[1]),
y = c( y[1] + d*vnp[2], y[2] + d*vnp[2])))
}
# allocate memory for the bike path
xn <- numeric( (length(x) - 1) * 2 )
yn <- numeric( (length(y) - 1) * 2 )
for ( i in 1:(length(x) - 1) ) {
xs <- c(x[i], x[i+1])
ys <- c(y[i], y[i+1])
new.s <- segment.shift( xs, ys, d )
xn[(i-1)*2+1] <- new.s$x[1] ; xn[(i-1)*2+2] <- new.s$x[2]
yn[(i-1)*2+1] <- new.s$y[1] ; yn[(i-1)*2+2] <- new.s$y[2]
}
dat1<-as_tibble()
dat1<-as.data.frame(xn)%>%mutate(id=row_number())%>%mutate(Dist_Col=paste(d,"left",sep="_"))
#datalist[[d]] <- dat1 # add it to your list
dat2<-as_tibble()
dat2<-as.data.frame(yn)%>%mutate(id=row_number())%>%mutate(Dist_Col=paste(d,"left",sep="_"))
#datalist2[[d]] <- dat2 # add it to your list
###Now do right side
# allocate memory for the bike path
xn <- numeric( (length(x) - 1) * 2 )
yn <- numeric( (length(y) - 1) * 2 )
for ( i in 1:(length(x) - 1) ) {
xs <- c(x[i], x[i+1])
ys <- c(y[i], y[i+1])
new.s <- segment.shift( xs, ys, -d )
xn[(i-1)*2+1] <- new.s$x[1] ; xn[(i-1)*2+2] <- new.s$x[2]
yn[(i-1)*2+1] <- new.s$y[1] ; yn[(i-1)*2+2] <- new.s$y[2]
}
dat3<-as_tibble()
dat3<-as.data.frame(xn)%>%mutate(id=row_number())%>%mutate(Dist_Col=paste(d,"right",sep="_"))
datcomb<- full_join(dat1,dat3)
datalist[[d]] <- datcomb # add it to your list
dat4<-as_tibble()
dat4<-as.data.frame(yn)%>%mutate(id=row_number())%>%mutate(Dist_Col=paste(d,"right",sep="_"))
dat2comb<- full_join(dat2,dat4)
datalist2[[d]] <- dat2comb # add it to your list
}
big_data = do.call(rbind, datalist)
big_data2 = do.call(rbind, datalist2)
comb_data<- full_join(big_data,big_data2)
ggplot()+geom_line(data=old,aes(x,y),color='black')+geom_line(data=comb_data,aes(xn,yn,group=Dist_Col),color='red')
see updated code above which plot parallel lines on both sides of the original line.
I resolved the issue finally with
for (d in 1:length(distance))
plot update

how to calculate the Correlation coefficient using R using a function?

Hello!
I'm trying to write a function deriving the formula for Pearson's coefficient of correlation . I wrote the following code but when I try to pass the values, it returns empty output. Please point me to my error, I'm clueless!
Much appreciated.
correlation = function(X, Y, n = length(X)){
sum_X = 0
sum_Y = 0
sum_XY = 0
squareSum_X = 0
squareSum_Y = 0
i = 0
while (i < n ) {
# sum of elements of array X.
sum_X = sum_X + X[i]
# sum of elements of array Y.
sum_Y = sum_Y + Y[i]
# sum of X[i] * Y[i].
sum_XY = sum_XY + X[i] * Y[i]
# sum of square of array elements.
squareSum_X = squareSum_X + X[i] * X[i]
squareSum_Y = squareSum_Y + Y[i] * Y[i]
i =+ 1
}
# combine all into a final formula
final = (n * sum_XY - (sum_X * sum_Y))/ (sqrt((n * squareSum_X - sum_X * sum_X)* (n * squareSum_Y -
sum_Y * sum_Y)))
return (final)
}
R is a 1-indexed language. Start with i = 1 and change to while(i <= n) (and fix the iteration counter as noted in the comments: i = i + 1. Then your function works correctly.
n <- 100
x <- rnorm(n)
y <- rnorm(n)
round(correlation(x, y), 4) == round(cor(x, y), 4) # TRUE
Note, however, that R is also great for vectorized operations, and you can skip the explicit loop altogether. Something like this is a step towards getting more efficient:
correlation2 <- function(X, Y){
n <- length(X)
sum_X <- sum(X)
sum_Y <- sum(Y)
sum_XY <- sum(X * Y)
squareSum_X <- sum(X * X)
squareSum_Y <- sum(Y * Y)
final <- (n * sum_XY - (sum_X * sum_Y)) / (sqrt((n * squareSum_X - sum_X * sum_X)* (n * squareSum_Y - sum_Y * sum_Y)))
return (final)
}
round(correlation2(x, y), 4) == round(cor(x, y), 4) # TRUE
Or even just:
correlation3 <- function(X, Y){
n = length(X)
sum_x = sum(X)
sum_y = sum(Y)
(n * sum(X * Y) - sum_x * sum_y) /
(sqrt((n * sum(x^2) - sum_x^2) * (n * sum(Y^2) - sum_y^2)))
}

Iterative optimization of alternative glm family

I'm setting up an alternative response function to the commonly used exponential function in poisson glms, which is called softplus and defined as $\frac{1}{c} \log(1+\exp(c \eta))$, where $\eta$ corresponds to the linear predictor $X\beta$
I already managed optimization by setting parameter $c$ to arbitrary fixed values and only searching for $\hat{\beta}$.
BUT now for the next step I have to optimize this parameter $c$ as well (iteratively changing between updated $\beta$ and current $c$).
I tried to write a log-lik function, score function and then setting up a Newton Raphson optimization (using a while loop)
but I don't know how to seperate the updating of c in an outer step and updating \beta in an inner step..
Are there any suggestions?
# Response function:
sp <- function(eta, c = 1 ) {
return(log(1 + exp(abs(c * eta)))/ c)
}
# Log Likelihood
l.lpois <- function(par, y, X){
beta <- par[1:(length(par)-1)]
c <- par[length(par)]
l <- rep(NA, times = length(y))
for (i in 1:length(l)){
l[i] <- y[i] * log(sp(X[i,]%*%beta, c)) - sp(X[i,]%*%beta, c)
}
l <- sum(l)
return(l)
}
# Score function
score <- function(y, X, par){
beta <- par[1:(length(par)-1)]
c <- par[length(par)]
s <- matrix(rep(NA, times = length(y)*length(par)), ncol = length(y))
for (i in 1:length(y)){
s[,i] <- c(X[i,], 1) * (y[i] * plogis(c * X[i,]%*%beta) / sp(X[i,]%*%beta, c) - plogis(c * X[i,]%*%beta))
}
score <- rep(NA, times = nrow(s))
for (j in 1:length(score)){
score[j] <- sum(s[j,])
}
return(score)
}
# Optimization function
opt <- function(y, X, b.start, eps=0.0001, maxiter = 1e5){
beta <- b.start[1:(length(b.start)-1)]
c <- b.start[length(b.start)]
b.old <- b.start
i <- 0
conv <- FALSE
while(conv == FALSE){
eta <- X%*%b.old[1:(length(b.old)-1)]
s <- score(y, X, b.old)
h <- numDeriv::hessian(l.lpois,b.old,y=y,X=X)
invh <- solve(h)
# update
b.new <- b.old + invh %*% s
i <- i + 1
# Test
if(any(is.nan(b.new))){
b.new <- b.old
warning("convergence failed")
break
}
# convergence reached?
if(sqrt(sum((b.new - b.old)^2))/sqrt(sum(b.old^2)) < eps | i >= maxiter){
conv <- TRUE
}
b.old <- b.new
}
eta <- X%*%b.new[1:(length(b.new)-1)]
# covariance
invh <- solve(numDeriv::hessian(l.lpois,b.new,y=y,X=X))
fitted <- sp(eta, b.new[length(b.new)])
result <- list("coefficients" = c(beta = b.new),
"fitted.values" = fitted,
"covariance" = invh)
}
# Running fails ..
n <- 100
x <- runif(n, 0, 1)
Xdes <- cbind(1, x)
eta <- 1 + 2 * x
y <- rpois(n, sp(eta, c = 1))
opt(y,Xdes,c(0,1,1))
You have 2 bugs:
line 25:
(y[i] * plogis(c * X[i,]%*%beta) / sp(X[i,]%*%beta, c) - plogis(c * X[i,]%*%beta))
this returns matrix so you must convert to numeric:
as.numeric(y[i] * plogis(c * X[i,]%*%beta) / sp(X[i,]%*%beta, c) - plogis(c * X[i,]%*%beta))
line 23:
) is missing:
you have:
s <- matrix(rep(NA, times = length(y)*length(par), ncol = length(y))
while it should be:
s <- matrix(rep(NA, times = length(y)*length(par)), ncol = length(y))

How to use a matrix as an input in a User-Defined Function and Loop it in R?

Here is the current script I have:
delta <- 1/52
T <- 0.5
S0 <- 25
sigma <- 0.30
K <- 25
r <- 0.05
n <- 1000000
m <- T/delta
S <- numeric(m + 1)
S[1] <- S0
#Payoff asian option
asian_option_price <- function() {
for(j in 1:m) {
W <- rnorm(1)
S[j + 1] <- S[j] * exp((r - 0.5 * sigma^2) * delta + sigma * sqrt(delta) * W)
}
Si.bar <- mean(S)
exp(-r * T) * max(Si.bar - K, 0)
}
#Loops
C <- raply(n, asian_option_price(), .progress = "text")
My issue is that I need to use "-W" for a second simulation right after this one is done. The way the script is made, "W" is inside my loop which makes it impossible (i think) to use the corresponding "-W" after that. I think I need to use an independent matrix filled with rnorm() mat(x) = matrix(rnorm(m*n,mean=0,sd=1), m, n) so that I can simply use -mat(x) in my second simulation. I don't get how to take "W" out of my loop and still use it's corresponding matrix. Any help would be very useful. Thanks!
Your idea to preallocate all the random numbers is correct. You could then loop over the individual entries. However, it is faster to go for a vectorized approach:
delta <- 1/52
T <- 0.5
S0 <- 25
sigma <- 0.30
K <- 25
r <- 0.05
n <- 100000
m <- ceiling(T/delta)
W <- matrix(rnorm(n*m), nrow = m, ncol = n)
S <- apply(exp((r - 0.5 * sigma^2) * delta + sigma * sqrt(delta) * W), 2, cumprod)
S <- S0 * rbind(1, S)
Si_bar <- apply(S, 2, mean)
mean(pmax(Si_bar -K, 0)) * exp(-r*T)

Error in seq.default(a, length = max(0, b - a - 1)) : length must be non-negative number

I tried running the code below.
set.seed(307)
y<- rnorm(200)
h2=0.3773427
t=seq(-3.317670, 2.963407, length.out=500)
fit=density(y, bw=h2, n=1024, kernel="epanechnikov")
integrate.xy(fit$x, fit$y, min(fit$x), t[407])
However, i recived the following message:
"Error in seq.default(a, length = max(0, b - a - 1)) :
length must be non-negative number"
I am not sure what's wrong.
I do not encounter any problem when i use t[406] or t[408] as follow:
integrate.xy(fit$x, fit$y, min(fit$x), t[406])
integrate.xy(fit$x, fit$y, min(fit$x), t[408])
Does anyone know what's the problem and how to fix it? Appreciate your help please. Thanks!
I went through the source code for the integrate.xy function, and there seems to be a bug relating to the usage of the xtol argument.
For reference, here is the source code of integrate.xy function:
function (x, fx, a, b, use.spline = TRUE, xtol = 2e-08)
{
dig <- round(-log10(xtol))
f.match <- function(x, table) match(signif(x, dig), signif(table,
dig))
if (is.list(x)) {
fx <- x$y
x <- x$x
if (length(x) == 0)
stop("list 'x' has no valid $x component")
}
if ((n <- length(x)) != length(fx))
stop("'fx' must have same length as 'x'")
if (is.unsorted(x)) {
i <- sort.list(x)
x <- x[i]
fx <- fx[i]
}
if (any(i <- duplicated(x))) {
n <- length(x <- x[!i])
fx <- fx[!i]
}
if (any(diff(x) == 0))
stop("bug in 'duplicated()' killed me: have still multiple x[]!")
if (missing(a))
a <- x[1]
else if (any(a < x[1]))
stop("'a' must NOT be smaller than min(x)")
if (missing(b))
b <- x[n]
else if (any(b > x[n]))
stop("'b' must NOT be larger than max(x)")
if (length(a) != 1 && length(b) != 1 && length(a) != length(b))
stop("'a' and 'b' must have length 1 or same length !")
else {
k <- max(length(a), length(b))
if (any(b < a))
stop("'b' must be elementwise >= 'a'")
}
if (use.spline) {
xy <- spline(x, fx, n = max(1024, 3 * n))
if (xy$x[length(xy$x)] < x[n]) {
if (TRUE)
cat("working around spline(.) BUG --- hmm, really?\n\n")
xy$x <- c(xy$x, x[n])
xy$y <- c(xy$y, fx[n])
}
x <- xy$x
fx <- xy$y
n <- length(x)
}
ab <- unique(c(a, b))
xtol <- xtol * max(b - a)
BB <- abs(outer(x, ab, "-")) < xtol
if (any(j <- 0 == apply(BB, 2, sum))) {
y <- approx(x, fx, xout = ab[j])$y
x <- c(ab[j], x)
i <- sort.list(x)
x <- x[i]
fx <- c(y, fx)[i]
n <- length(x)
}
ai <- rep(f.match(a, x), length = k)
bi <- rep(f.match(b, x), length = k)
dfx <- fx[-c(1, n)] * diff(x, lag = 2)
r <- numeric(k)
for (i in 1:k) {
a <- ai[i]
b <- bi[i]
r[i] <- (x[a + 1] - x[a]) * fx[a] + (x[b] - x[b - 1]) *
fx[b] + sum(dfx[seq(a, length = max(0, b - a - 1))])
}
r/2
}
The value given to the xtol argument, is being overwritten in the line xtol <- xtol * max(b - a). But the value of the dig variable is calculated based on the original value of xtol, as given in the input to the function. Because of this mismatch, f.match function, in the line bi <- rep(f.match(b, x), length = k), returns no matches between x and b (i.e., NA). This results in the error that you have encountered.
A simple fix, at least for the case in question, would be to remove the xtol <- xtol * max(b - a) line. But, you should file a bug report with the maintainer of this package, for a more rigorous fix.

Resources