box muller equations returning NA values - r

#generating 100 uniformly distributed numbers
u1 <- runif(100,0,1)
u2 <- runif(100,0,1)
x1 <- function(x, y) {
return(sqrt(-2 * log(x) * cos(2 * pi * y)))
}
x2 <- function(x, y) {
return(sqrt(-2 * log(x) * sin(2 * pi * y)))
}
#applying x1
x1_vals <- mapply(x1, u1, u2)
#applying x2
x2_vals <- mapply(x2, u1, u2)
Hi I want to write a box muller function, this is part of my attempt above, I'm trying to avoid using loops as much as possible(for loops especially)
However, I keep getting NA values for my x1/x2 functions. I can't figure out whats wrong.

Here is the Box-Muller formula corrected.
x1 <- function(x, y) sqrt(-2 * log(x)) * cos(2 * pi * y)
x2 <- function(x, y) sqrt(-2 * log(x)) * sin(2 * pi * y)
u1 <- runif(1000,0,1)
u2 <- runif(1000,0,1)
# applying x1
x1_vals <- x1(u1, u2)
all(is.finite(x1_vals))
#> [1] TRUE
# applying x2
x2_vals <- x2(u1, u2)
all(is.finite(x2_vals))
#> [1] TRUE
old_par <- par(mfrow = c(1, 2))
hist(x1_vals, freq = FALSE)
curve(dnorm, from = -4, to = 4, add = TRUE)
hist(x2_vals, freq = FALSE)
curve(dnorm, from = -4, to = 4, add = TRUE)
par(old_par)
Created on 2022-09-28 with reprex v2.0.2

Those functions are already vectorized. Don't need to wrap mapply around them. Just give the two vectors to x1 and x2
x1(u1,u2)
[1] NaN NaN NaN NaN 0.3088330
[6] NaN 0.7866889 NaN NaN 1.7102801
[11] 2.1886770 NaN 1.5473627 1.0644378 0.8499208
snipped remaining 100 values
Vectorization is a feature of the R language. If the expressions in the function body can all take vectors and return vectors of equal length then no loop wrapper is needed.
You are getting NA's because the domain of the arguments to sin and cos are causing both positive and negative values to be given to sqrt.

Related

Error when calling a user-defined function within a while loop, but not when called directly

When I call the user defined function sRGB_to_CAM16UCS in the console, it displays the result as intended. But when I try to call it within a while loop it throws an error. Can somebody help me understand the error?
library(purrr)
library(tibble)
library(tidyr)
sRGB_to_CAM16UCS <- function(R255, G255, B255){
# Convert sRGB to 1931 CIE XYZ [IEC 61966-2–1:2003(E)]
## Convert to the range of 0 to 1
R1 <- R255 / 255
G1 <- G255 / 255
B1 <- B255 / 255
## Gamma Expansion of sRGB values
gamma_inverse <- function(RGB1){
if (RGB1 < -0.04045 | RGB1 > 0.04045){
((RGB1 + 0.055)/1.055)^2.4
} else {
RGB1/12.92
}
}
R_li <- gamma_inverse(R1)
G_li <- gamma_inverse(G1)
B_li <- gamma_inverse(B1)
# Convert linear RGB values to CIE XYZ
X <- 41.24 * R_li + 35.76 * G_li + 18.05 * B_li
Y <- 21.26 * R_li + 71.52 * G_li + 07.22 * B_li
Z <- 01.93 * R_li + 11.92 * G_li + 95.05 * B_li
# Convert XYZ to CAM16
## User defined Parameters
X_w <- 96.4212
Y_w <- 100
Z_w <- 82.5188
L_A <- 40
Y_b <- 20
surround <- 2
discounting <- FALSE
## Predefined functions and constants
### M16
M16 <- matrix(c(0.401288,-0.250268,-0.002079,
0.650173, 1.204414, 0.048952,
-0.051461, 0.045854, 0.953127), nrow = 3, ncol=3)
### lerp
lerp <- function(a,b,c){
(1 - c) * a + c * b
}
### Crop
crop <- function(a,b,c){
pmin(pmax(c, a), b)
}
### Define adapt
adapt <- function(component){
con <- (F_L * abs(component) * 0.01)^0.42
sign(component) * 400 * con / (con + 27.13)
}
### Define unadapt
unadapt <- function(component){
sign(component) * 100 / F_L * ((27.13* abs(component))/(400-abs(component)))^2.38095238095
}
# Calculations
## Calculate "c"
if (surround >=1){
c <- lerp(0.59, 0.69, surround-1)
}else{
c <- lerp(0.525, 0.59, surround)
}
## Calculate "F" and "N_c"
if (c >= 0.59){
N_c <- lerp(0.9, 1.0, (c - 0.59)/.1)
} else {
N_c <- lerp(0.8, 0.9, (c - 0.525)/0.065)
}
## Calculate "k"
k <- 1/(5*L_A + 1)
## Calculate F_L
F_L <- k^4 * L_A + 0.1 * (1-k^4)^2 * (5 * L_A)^0.33333333333
## Calculate n
n <- Y_b / Y_w
## Calculate z
z <- 1.48 + sqrt(n)
## Calculate N_bb
N_bb <- 0.725 * n^-0.2
## Calculate D
if (discounting == FALSE){
D <- crop(0,1,N_c* (1 - 1/3.6 * exp((-L_A - 42)/92)))
}else {
D <- 1
}
## Calculate the "RGB_w"
RGB_w <- matrix(c(M16[1,1] * X_w + M16[1,2] * Y_w + M16[1,3] * Z_w,
M16[2,1] * X_w + M16[2,2] * Y_w + M16[2,3] * Z_w,
M16[3,1] * X_w + M16[3,2] * Y_w + M16[3,3] * Z_w), nrow = 3, ncol=1)
# Calculate the "D_RGB"
D_RGB <- apply(RGB_w, c(1, 2), function(x)((1 - D) * 1 + D * Y_w/x))
# Calculate the "D_RGB_inv"
D_RGB_inv <- apply(D_RGB,c(1, 2),function(x)1/x)
# Calculate the "RGB_cw"
RGB_cw <- RGB_w*D_RGB
# Calculate RGB_aw
RGB_aw <- apply(RGB_cw, c(1,2), adapt)
# Calculate A_w
A_w <- N_bb * ( 2 * RGB_aw[1,1] + RGB_aw[2,1] + 0.05 * RGB_aw[3,1])
# Calculate RGB_a
R_a <- adapt((M16[1,1] * X + M16[1,2] * Y + M16[1,3] * Z) * D_RGB[1,1])
B_a <- adapt((M16[2,1] * X + M16[2,2] * Y + M16[2,3] * Z) * D_RGB[2,1])
G_a <- adapt((M16[3,1] * X + M16[3,2] * Y + M16[3,3] * Z) * D_RGB[3,1])
# Calculate Hue
a <- R_a + (-12 * G_a + B_a) / 11
b <- (R_a + G_a - 2 * B_a) / 9
h_rad <- atan2(b, a)
h_ucs <- h_rad*(180.0/pi)
# Calculate Lightness (J)
e_t <- 0.25 * (cos(h_rad + 2) + 3.8)
A <- N_bb * (2*R_a + G_a + 0.05*B_a)
J <- 100 * ((A / A_w)^(c*z))
J_ucs <- 1.7 * J / (1 + 0.007 * J)
# Calculate brightness (Q)
Q <- 4/c * sqrt(J/100) * (A_w + 4) * (F_L^0.25)
# Calculate chroma (C)
t <- (5000 / 13 * N_c * N_bb * e_t * sqrt(a*a + b*b)) / (R_a + G_a + 1.05 * B_a + 0.305)
alpha <- t^0.9*(1.64 - 0.29^n)^0.73
C <- alpha * sqrt(J/100)
# Calculate colorfulness (M)
M <- C * F_L^0.25
M_ucs <- log(1 + 0.0228 * M) / 0.0228
# Calculate redness-greenness(a)
a_ucs <- M * cos(h_rad)
# Calculate yellowness-blueness(b)
b_ucs <- M * sin(h_rad)
# Calculate Saturation (s)
s <- 50 * sqrt(alpha*c /(A_w + 4))
return(tibble(R255, G255, B255, h_ucs, J_ucs ,M_ucs ,a_ucs, b_ucs))
}
nc <- 5
rgb_vals <- tibble(r1 = rdunif(nc, b=255, a=0), g1 = rdunif(nc, b=255, a=0), b1 = rdunif(nc, b=255, a=0))
test <- 1
test_df <- tibble(h_ucs = numeric(), J_ucs = numeric(), M_ucs = numeric(), a_ucs = numeric(), b_ucs = numeric())
while(test <= nrow(rgb_vals)){
test_r <- sRGB_to_CAM16UCS(rgb_vals[test, 1],rgb_vals[test, 2],rgb_vals[test, 3])
test_df <- rbind(test_df, test_r)
test <- test + 1
}
output <- cbind(rgb_vals, test_df)
print(output)
openxlsx::write.xlsx(output, "rgb2camucs.xlsx")
threw an error as following
Error in atan2(b, a) : non-numeric argument to mathematical function
>
> output <- cbind(rgb_vals, test_df)
Error in data.frame(..., check.names = FALSE) :
arguments imply differing number of rows: 5, 0
> print(output)
Error in print(output) : object 'output' not found
UPDATE: If I wrap the a and b with as.numeric() function, it throws the following error message:
Error:
! Column names `r1`, `r1`, `r1`, `r1`, `r1`, and 1 more must not be duplicated.
Use .name_repair to specify repair.
Caused by error in `repaired_names()`:
! Names must be unique.
x These names are duplicated:
* "r1" at locations 1, 2, 3, 5, 6, etc.
Run `rlang::last_error()` to see where the error occurred.
>
This is because b and a are indeed non-numeric arguments. They are data.frames.
Replacing that line with h_rad <- atan2(b$r1, a$r1) makes it work as class(b$r1) results in numeric. Note that the last line of code where output is exported to an XLSX does not work.
You can also "unpack" your 1x1 data.frame using double brackets before calling your method, i.e., say test_r <- sRGB_to_CAM16UCS(rgb_vals[[test, 1]],rgb_vals[[test, 2]],rgb_vals[[test, 3]]) instead of test_r <- sRGB_to_CAM16UCS(rgb_vals[test, 1],rgb_vals[test, 2],rgb_vals[test, 3]) near the end of your code. This way you are passing the plain numbers to your function. This way, also the export to XLSX works.
I found this using RStudio's debugger, setting a breakpoint to that line and then entering class(b) into the console on the bottom.

loop though a matrix in R

I try to loop trough a matrix but cant find a easy and elegant way instead of writing many (>10) equations... Can anyone help me please?
My Matrix looks like this:
and I want to calculate the following:
(0 * 0 * 4/24) + (0 * 1 * 6/24) + (0 * 2 * 3/24) + (1 * 0 * 3/24) + (1 * 1 * 4/24) + (1 * 2 * 4/24)
instead of using
__
btw: my code for the matrix
vals<- c(4/24, 6/24, 3/24, 3/24, 4/24, 4/24)
x <- c(0,1)
y <- c(0,1,2)
df <- matrix(vals, byrow = TRUE, nrow = 2, ncol = 3,
dimnames = list(x,y))
instead of calculation each step manually, I think there should be a for-loop method, but cant figure it out..
A possible solution:
c(x %*% df %*% y)
#> [1] 0.5
Another possible solution, based on outer:
sum(outer(x, y, Vectorize(\(x,y) x*y*df[x+1,y+1])))
#> [1] 0.5
x <- c(0, 1)
y <- c(0, 1, 2)
vals<- c(4/24, 6/24, 3/24, 3/24, 4/24, 4/24)
mat <- matrix(vals, byrow = TRUE, nrow = 2, ncol = 3,
dimnames = list(x,y)) ## not a data frame; don't call it "df"
There is even a better way than a for loop:
sum(tcrossprod(x, y) * mat)
#[1] 0.5
sum((x %o% y) * df)
Explanation:
x %o% y gets the outer product of vectors x and y which is:
#> [,1] [,2] [,3]
#> [1,] 0 0 0
#> [2,] 0 1 2
Since that has the same dimensions as df, you can multiply the corresponding elements and get the sum: sum((x %o% y) * df)
If you are new to R (as I am), here is the loop approach.
result = 0
for (i in 1:length(x)) {
for (j in 1:length(y)) {
result = result + x[i] * y[j] * df[i, j]
}
}
result

Multiplying a 5X3 matrix and 3X1 vector in R

I am starting to learn R and trying to multiply a 5X3 matrix with a 3X1 column vector in R; However while creating a new variable to perform the operation, R throws the error "non-conformable arrays". Can someone please point out my mistake in the code below -
*#5X3 Matrix*
X <- matrix(c(1,25.5,1.23,1,40.8,1.89,1,30.2,1.55,1,4.3,1.18,1,10.7,1.68),nrow=5,ncol=3,byrow=TRUE)
*3X1 Column vector*
b1 <- matrix(c(23,0.1,-8), nrow = 3, ncol = 1, byrow = TRUE)
v1 <- X * b1
v1
Appreciate your help :)
You need the matrix-multiplication operator %*%:
X <- matrix(c(1,25.5,1.23,1,40.8,1.89,1,30.2,1.55,1,4.3,1.18,1,10.7,1.68),nrow=5,ncol=3,byrow=TRUE)
b1 <- matrix(c(23,0.1,-8), nrow = 3, ncol = 1, byrow = TRUE)
v1 <- X %*% b1
v1
#> [,1]
#> [1,] 15.71
#> [2,] 11.96
#> [3,] 13.62
#> [4,] 13.99
#> [5,] 10.63
Normally one would use the first alternative below but the others are possible too. The first four alternatives below give a column vector as the result while the others give a plain vector without dimensions. The first three work even if b1 has more than one column. The remainder assume b1 has one column but could be generalized.
X %*% b1
crossprod(t(X), b1)
library(einsum)
einsum("ij,jk -> ik", X, b1)
out <- matrix(0, nrow(X), ncol(b1))
for(i in 1:nrow(X)) {
for(k in 1:ncol(X)) out[i] <- out[i] + X[i, k] * b1[k, 1]
}
out
colSums(t(X) * c(b1))
apply(X, 1, crossprod, b1)
sapply(1:nrow(X), function(i) sum(X[i, ] * b1))
rowSums(mapply(`*`, as.data.frame(X), b1))
rowSums(sapply(1:ncol(X), function(j) X[, j] * b1[j]))
X[, 1] * b1[1, 1] + X[, 2] * b1[2, 1] + X[, 3] * b1[3, 1]
Note
The input shown in the question is:
X <- matrix(c(1,25.5,1.23,1,40.8,1.89,1,30.2,1.55,1,4.3,1.18,1,10.7,1.68),nrow=5,ncol=3,byrow=TRUE)
b1 <- matrix(c(23,0.1,-8), nrow = 3, ncol = 1, byrow = TRUE)

Automatically "recognizing" (calculus) derivatives

I am using the R programming language. Using the "optim" library and the "BFGS" optimization algorithm, I am interested in optimizing the following function (also called the "Rosenbrock Function"):
If you define this function and the derivative of this function, it is pretty straightforward to optimize with the "optim" library and the BFGS algorithm (note: the BFGS algorithm requires knowledge of the function's derivative):
fr <- function(x) { ## Rosenbrock Banana function
x1 <- x[1]
x2 <- x[2]
100 * (x2 - x1 * x1)^2 + (1 - x1)^2
}
grr <- function(x) { ## Gradient of 'fr'
x1 <- x[1]
x2 <- x[2]
c(-400 * x1 * (x2 - x1 * x1) - 2 * (1 - x1),
200 * (x2 - x1 * x1))
}
res <- optim(c(-1.2,1), fr, grr, method = "BFGS")
> res
$par
[1] 1 1
$value
[1] 9.594956e-18
$counts
function gradient
110 43
$convergence
[1] 0
$message
NULL
Suppose you are working with a high dimensional complicated function - the derivative of this function will be difficult to manually evaluate and then write a function for this derivative (i.e. an additional place where you can make a mistake). Are there any "automatic" ways in R, such that if you write a mathematical function - R can automatically "infer" the derivative of this function?
For instance, in a new R session - would there have been some way to run the BFGS algorithm without explicitly defining the derivative?
fr <- function(x) { ## Rosenbrock Banana function
x1 <- x[1]
x2 <- x[2]
100 * (x2 - x1 * x1)^2 + (1 - x1)^2
}
#pseudo code
res <- optim(c(-1.2,1), fr, ??? , method = "BFGS")
Does anyone know if something like this is possible? Can R automatically infer the derivative?
I thought of an approach where you could use a preexisting "numerical differentiation" function in R to approximate the derivative at each iteration, and then feed this approximation into the BFGS algorithm, but that sounds very complicated and unnecessary.
It would have been nice if R could somehow automatically infer the derivative of a function.
References:
https://www.rdocumentation.org/packages/stats/versions/3.6.2/topics/optim
This is already built into the optim() function. If you don't specify the derivative it will be calculated numerically e.g.
fr <- function(x) { ## Rosenbrock Banana function
x1 <- x[1]
x2 <- x[2]
100 * (x2 - x1 * x1)^2 + (1 - x1)^2
}
optim(c(-1.2,1), fr, method = "BFGS")
# $par
# [1] 0.9998044 0.9996084
#
# $value
# [1] 3.827383e-08
#
# $counts
# function gradient
# 118 38
#
# $convergence
# [1] 0
#
# $message
# NULL
Note that (at least for this case) the solution is very close to that found using the analytical derivatives.

Weighted Pearson's Correlation?

I have a 2396x34 double matrix named y wherein each row (2396) represents a separate situation consisting of 34 consecutive time segments.
I also have a numeric[34] named x that represents a single situation of 34 consecutive time segments.
Currently I am calculating the correlation between each row in y and x like this:
crs[,2] <- cor(t(y),x)
What I need now is to replace the cor function in the above statement with a weighted correlation. The weight vector xy.wt is 34 elements long so that a different weight can be assigned to each of the 34 consecutive time segments.
I found the Weighted Covariance Matrix function cov.wt and thought that if I first scale the data it should work just like the cor function. In fact you can specify for the function to return a correlation matrix as well. Unfortunately it does not seem like I can use it in the same manner because I cannot supply my two variables (x and y) separately.
Does anyone know of a way I can get a weighted correlation in the manner I described without sacrificing much speed?
Edit: Perhaps some mathematical function could be applied to y prior to the cor function in order to get the same results that I'm looking for. Maybe if I multiply each element by xy.wt/sum(xy.wt)?
Edit #2 I found another function corr in the boot package.
corr(d, w = rep(1, nrow(d))/nrow(d))
d
A matrix with two columns corresponding to the two variables whose correlation we wish to calculate.
w
A vector of weights to be applied to each pair of observations. The default is equal weights for each pair. Normalization takes place within the function so sum(w) need not equal 1.
This also is not what I need but it is closer.
Edit #3
Here is some code to generate the type of data I am working with:
x<-cumsum(rnorm(34))
y<- t(sapply(1:2396,function(u) cumsum(rnorm(34))))
xy.wt<-1/(34:1)
crs<-cor(t(y),x) #this works but I want to use xy.wt as weight
Unfortunately the accepted answer is wrong when y is a matrix of more than one row. The error is in the line
vy <- rowSums( w * y * y )
We want to multiply the columns of y by w, but this will multiply the rows by the elements of w, recycled as necessary. Thus
> f(x, y[1, , drop = FALSE], xy.wt)
[1] 0.103021
is correct, because in this case the multiplication is performed element-wise, which is equivalent to column-wise multiplication here, but
> f(x, y, xy.wt)[1]
[1] 0.05463575
gives a wrong answer due to the row-wise multiplication.
We can correct the function as follows
f2 <- function( x, y, w = rep(1,length(x))) {
stopifnot(length(x) == dim(y)[2] )
w <- w / sum(w)
# Center x and y, using the weighted means
x <- x - sum(x * w)
ty <- t(y - colSums(t(y) * w))
# Compute the variance
vx <- sum(w * x * x)
vy <- colSums(w * ty * ty)
# Compute the covariance
vxy <- colSums(ty * x * w)
# Compute the correlation
vxy / sqrt(vx * vy)
}
and check the results against those produced by corr from the boot package:
> res1 <- f2(x, y, xy.wt)
> res2 <- sapply(1:nrow(y),
+ function(i, x, y, w) corr(cbind(x, y[i,]), w = w),
+ x = x, y = y, w = xy.wt)
> all.equal(res1, res2)
[1] TRUE
which in itself gives another way that this problem could be solved.
You can go back to the definition of the correlation.
f <- function( x, y, w = rep(1,length(x))) {
stopifnot( length(x) == dim(y)[2] )
w <- w / sum(w)
# Center x and y, using the weighted means
x <- x - sum(x*w)
y <- y - apply( t(y) * w, 2, sum )
# Compute the variance
vx <- sum( w * x * x )
vy <- rowSums( w * y * y ) # Incorrect: see Heather's remark, in the other answer
# Compute the covariance
vxy <- colSums( t(y) * x * w )
# Compute the correlation
vxy / sqrt(vx * vy)
}
f(x,y)[1]
cor(x,y[1,]) # Identical
f(x, y, xy.wt)
Here is a generalization to compute the weighted Pearson correlation between two matrices (instead of a vector and a matrix, as in the original question):
matrix.corr <- function (a, b, w = rep(1, nrow(a))/nrow(a))
{
# normalize weights
w <- w / sum(w)
# center matrices
a <- sweep(a, 2, colSums(a * w))
b <- sweep(b, 2, colSums(b * w))
# compute weighted correlation
t(w*a) %*% b / sqrt( colSums(w * a**2) %*% t(colSums(w * b**2)) )
}
Using the above example and the correlation function from Heather, we can verify it:
> sum(matrix.corr(as.matrix(x, nrow=34),t(y),xy.wt) - f2(x,y,xy.wt))
[1] 1.537507e-15
In terms of calling syntax, this resembles the unweighted cor:
> a <- matrix( c(1,2,3,1,3,2), nrow=3)
> b <- matrix( c(2,3,1,1,7,3,5,2,8,1,10,12), nrow=3)
> matrix.corr(a,b)
[,1] [,2] [,3] [,4]
[1,] -0.5 0.3273268 0.5 0.9386522
[2,] 0.5 0.9819805 -0.5 0.7679882
> cor(a, b)
[,1] [,2] [,3] [,4]
[1,] -0.5 0.3273268 0.5 0.9386522
[2,] 0.5 0.9819805 -0.5 0.7679882

Resources