This calculates the log of (x-1)! to return the lgamma(x) value of an integer but my function log_gamma works only till x = 171 for x > 171 it returns Inf. How can I solve this problem?
log_gamma <- function(x){
y <- 1
if (x < 1)(
return("Infinity")
)
if (x == 1)(
return(0)
)
x <- x-1
for (i in 1:x){
y <- y*i
}
return(log(y))
}
Your current solution first computes 171! which is a pretty big number. Instead, use the fact that log(a*b) = log(a) + log(b) to compute this as a sum.
log_gamma <- function(x){
y <- 1
if (x < 1)(
return("Infinity")
)
if (x == 1)(
return(0)
)
x <- x-1
for (i in 1:x){
y <- y + log(i)
}
return(y)
}
log_gamma(171)
[1] 707.5731
log_gamma(172)
[1] 712.7147
log_gamma(1000)
[1] 5906.22
Related
I'm trying to get the following function to work for windsorizing attributes but I can't get the if elseif working in a function. It gives the following error: "the condition has length > 1 and only the first element will be used". I'm hoping someone can suggest a solution or alternative.
Example:
x <- data.frame(runif(100, 0, 100))
colnames(x) <- "test"
WINSORIZE <- function(x){
WIN_MEAN <- mean(x)
WIN_SD <- sd(x)
WIN_UPPER <- sum(WIN_MEAN + (3 * WIN_SD))
WIN_LOWER <- sum(WIN_MEAN - (3 * WIN_SD))
if(x > WIN_UPPER){
WIN_UPPER
} else if (x < WIN_LOWER) {WIN_LOWER
} else x
}
WINSORIZE(x$test)
Solution
Use R's immanent vectorized ability. Select by [ and change the value by <- assignment.
This solution is very R-ish:
winsorize <- function(x) {
m <- mean(x)
s <- sd(x)
u <- m + 3 * s
l <- m - 3 * s
x[ x > u ] <- u # select elements > u and assign to them u in situ
x[ x < l ] <- l # select elements < l and assign to them l in situ
x # return the resulting vector
}
And also this solution is very R-ish with the already vectorized ifelse() function:
winsorize <- function(x) {
m <- mean(x)
s <- sd(x)
u <- m + 3 * s
l <- m - 3 * s
ifelse( x > u, u, ifelse( x < l, l, x))
}
Solution with sapply()
Another possibility is to use sapply(x, ...) to apply your if-else constructs on each element of x.
winsorize <- function(x){
m <- mean(x)
s <- sd(x)
upper <- m + 3 * s
lower <- m - 3 * s
# apply your if-else construct on each individual element (el) of x
# using `sapply()`
sapply(x, function(el) if(el > upper){
upper
} else if (el < lower) {
lower
} else {
el})
}
Or the same with ifelse():
winsorize <- function(x){
m <- mean(x)
s <- sd(x)
upper <- m + 3 * s
lower <- m - 3 * s
sapply(x, function(el)
ifelse(el > upper, upper, ifelse(el < lower, lower, el))
}
Solution with Vectorize()
Or make a function out of your if-else construct, vectorize this function using Vectorize() before you apply it on x:
winsorize <- function(x){
m <- mean(x)
s <- sd(x)
upper <- m + 3 * s
lower <- m - 3 * s
# define function for one element
winsorize.one.element <- function(el) {
if(el > upper){ upper } else if (el < lower) { lower } else { el}
}
# Vectorize this function
winsorize.elements <- Vectorize(winsorize.one.element)
# Apply the vectorized function to the vector and return the result
winsorize.elements(x)
}
This winsorize.one.element function can be written neater by ifelse,
but although ifelse is vectorized
I have a large loop that will take too long (~100 days). I'm hoping to speed it up with the snow library, but I'm not great with apply statements. This is only part of the loop, but if I can figure this part out, the rest should be straightforward. I'm ok with a bunch of apply statements or loops, but one apply statement using a function to get object 'p' would be ideal.
Original data
dim(m1) == x x # x >>> 0
dim(m2) == y x # y >>> 0, y > x, y > x-10
dim(mout) == x x
thresh == x-10 #specific to my data, actual number probably unimportant
len(v1) == y #each element is a random integer, min==1, max==thresh
len(v2) == y #each element is a random integer, min==1, max==thresh
Original loop
p <- rep(NA,y)
for (k in 1:y){
mout <- m1 * matrix(m2[k,],x,x)
mout <- mout/sum(mout)
if (v1[k] < thresh + 1){
if(v2[k] < thresh + 1){
p[k] <- out[v1[k],v2[k]]
}
if(v2[k] > thresh){
p[k] <- sum(mout[v1[k],(thresh+1):x])
}
}
#do stuff with object 'p'
}
library(snow)
dostuff <- function(k){
#contents of for-loop
mout <- m1 * matrix(m2[k,],x,x)
mout <- mout/sum(mout)
if (v1[k] < thresh + 1){
if(v2[k] < thresh + 1){
p <- out[v1[k],v2[k]]
}
if(v2[k] > thresh){
p <- sum(mout[v1[k],(thresh+1):x])
}
}
#etc etc
return(list(p,
other_vars))
}
exports = c('m1',
'm2',
'thresh',
'v1',
'x' ,
'v2')
cl = makeSOCKcluster(4)
clusterExport(cl,exports)
loop <- as.array(1:y)
out <- parApply(cl,loop,1,dostuff)
p <- rep(NA,y)
for(k in 1:y){
p[k] <- out[[k]][[1]]
other_vars[k] <- out[[k]][[2]]
}
I tried running the code below.
set.seed(307)
y<- rnorm(200)
h2=0.3773427
t=seq(-3.317670, 2.963407, length.out=500)
fit=density(y, bw=h2, n=1024, kernel="epanechnikov")
integrate.xy(fit$x, fit$y, min(fit$x), t[407])
However, i recived the following message:
"Error in seq.default(a, length = max(0, b - a - 1)) :
length must be non-negative number"
I am not sure what's wrong.
I do not encounter any problem when i use t[406] or t[408] as follow:
integrate.xy(fit$x, fit$y, min(fit$x), t[406])
integrate.xy(fit$x, fit$y, min(fit$x), t[408])
Does anyone know what's the problem and how to fix it? Appreciate your help please. Thanks!
I went through the source code for the integrate.xy function, and there seems to be a bug relating to the usage of the xtol argument.
For reference, here is the source code of integrate.xy function:
function (x, fx, a, b, use.spline = TRUE, xtol = 2e-08)
{
dig <- round(-log10(xtol))
f.match <- function(x, table) match(signif(x, dig), signif(table,
dig))
if (is.list(x)) {
fx <- x$y
x <- x$x
if (length(x) == 0)
stop("list 'x' has no valid $x component")
}
if ((n <- length(x)) != length(fx))
stop("'fx' must have same length as 'x'")
if (is.unsorted(x)) {
i <- sort.list(x)
x <- x[i]
fx <- fx[i]
}
if (any(i <- duplicated(x))) {
n <- length(x <- x[!i])
fx <- fx[!i]
}
if (any(diff(x) == 0))
stop("bug in 'duplicated()' killed me: have still multiple x[]!")
if (missing(a))
a <- x[1]
else if (any(a < x[1]))
stop("'a' must NOT be smaller than min(x)")
if (missing(b))
b <- x[n]
else if (any(b > x[n]))
stop("'b' must NOT be larger than max(x)")
if (length(a) != 1 && length(b) != 1 && length(a) != length(b))
stop("'a' and 'b' must have length 1 or same length !")
else {
k <- max(length(a), length(b))
if (any(b < a))
stop("'b' must be elementwise >= 'a'")
}
if (use.spline) {
xy <- spline(x, fx, n = max(1024, 3 * n))
if (xy$x[length(xy$x)] < x[n]) {
if (TRUE)
cat("working around spline(.) BUG --- hmm, really?\n\n")
xy$x <- c(xy$x, x[n])
xy$y <- c(xy$y, fx[n])
}
x <- xy$x
fx <- xy$y
n <- length(x)
}
ab <- unique(c(a, b))
xtol <- xtol * max(b - a)
BB <- abs(outer(x, ab, "-")) < xtol
if (any(j <- 0 == apply(BB, 2, sum))) {
y <- approx(x, fx, xout = ab[j])$y
x <- c(ab[j], x)
i <- sort.list(x)
x <- x[i]
fx <- c(y, fx)[i]
n <- length(x)
}
ai <- rep(f.match(a, x), length = k)
bi <- rep(f.match(b, x), length = k)
dfx <- fx[-c(1, n)] * diff(x, lag = 2)
r <- numeric(k)
for (i in 1:k) {
a <- ai[i]
b <- bi[i]
r[i] <- (x[a + 1] - x[a]) * fx[a] + (x[b] - x[b - 1]) *
fx[b] + sum(dfx[seq(a, length = max(0, b - a - 1))])
}
r/2
}
The value given to the xtol argument, is being overwritten in the line xtol <- xtol * max(b - a). But the value of the dig variable is calculated based on the original value of xtol, as given in the input to the function. Because of this mismatch, f.match function, in the line bi <- rep(f.match(b, x), length = k), returns no matches between x and b (i.e., NA). This results in the error that you have encountered.
A simple fix, at least for the case in question, would be to remove the xtol <- xtol * max(b - a) line. But, you should file a bug report with the maintainer of this package, for a more rigorous fix.
Suppose I have a vector x which I want to convolve with itself n times. What is the good way to do this in R?
Suppose that we already have a function conv(u,v) that convolves two vectors.
I can do this:
autoconv<-function(x,n){
r<-1;
for(i in 1:n){
r<-conv(r,x);
}
return(r);
}
is there a more efficient way?
Take the Fast Fourier Transform (fft) of x, raise it to the kth power and take the inverse fft. Then compare that to performing convolutions of k copies of x. No packages are used.
# set up test data
set.seed(123)
k <- 3 # no of vectors to convolve
n <- 32 # length of x
x <- rnorm(n)
# method 1 using fft and inverse fft
yy <- Re(fft(fft(x)^k, inverse = TRUE) / n)
# method 2 using repeated convolutions
y <- x
if (k >= 2) for(i in 2:k) y <- convolve(x, y, FALSE)
# check that the two methods give the same result
all.equal(y, yy)
## TRUE
autoconv <- function(x, n){
if(n == 0){
return(1)
} else if(n == 1){
return(x)
} else {
i <- 2
xi <- conv(x,x)
while(n %% i != 0){
i <- i + 1
xi <- conv(xi,x)
}
return(autoconv(xi,n/i))
}
}
This will call conv() once for each prime factor of n, rather than n times.
I can't seem to make apply function access/modify a variable that is declared outside... what gives?
x = data.frame(age=c(11,12,13), weight=c(100,105,110))
x
testme <- function(df) {
i <- 0
apply(df, 1, function(x) {
age <- x[1]
weight <- x[2]
cat(sprintf("age=%d, weight=%d\n", age, weight))
i <- i+1 #this could not access the i variable in outer scope
z <- z+1 #this could not access the global variable
})
cat(sprintf("i=%d\n", i))
i
}
z <- 0
y <- testme(x)
cat(sprintf("y=%d, z=%d\n", y, z))
Results:
age=11, weight=100
age=12, weight=105
age=13, weight=110
i=0
y=0, z=0
Using the <<- operator you can write to variables in outer scopes:
x = data.frame(age=c(11,12,13), weight=c(100,105,110))
x
testme <- function(df) {
i <- 0
apply(df, 1, function(x) {
age <- x[1]
weight <- x[2]
cat(sprintf("age=%d, weight=%d\n", age, weight))
i <<- i+1 #this could not access the i variable in outer scope
z <<- z+1 #this could not access the global variable
})
cat(sprintf("i=%d\n", i))
i
}
z <- 0
y <- testme(x)
cat(sprintf("y=%d, z=%d\n", y, z))
The result here:
age=11, weight=100
age=12, weight=105
age=13, weight=110
i=3
y=3, z=3
Note that the usage of <<- is dangerous, as you break up scoping. Do this only if really necessary and if you do, document that behavior clearly (at least in bigger scripts)
try the following inside your apply. Experiment with the value of n. I believe that for i it should be one less than for z.
assign("i", i+1, envir=parent.frame(n=2))
assign("z", z+1, envir=parent.frame(n=3))
testme <- function(df) {
i <- 0
apply(df, 1, function(x) {
age <- x[1]
weight <- x[2]
cat(sprintf("age=%d, weight=%d\n", age, weight))
## ADDED THESE LINES
assign("i", i+1, envir=parent.frame(2))
assign("z", z+1, envir=parent.frame(3))
})
cat(sprintf("i=%d\n", i))
i
}
OUTPUT
> z <- 0
> y <- testme(x)
age=11, weight=100
age=12, weight=105
age=13, weight=110
i=3
> cat(sprintf("y=%d, z=%d\n", y, z))
y=3, z=3