I have defined the next function
inequalizer <- function(x,caracter) {
if(caracter=="X") {
function(y) {y[1] < x}
} else if(caracter=="Y") {
function(y) {y[2] < x}
} else {
function(y) {y[3] < x}
}
}
which returns one function depending on the input parameters x and caracter. I have another function where I call this function recursively , whose arguments depend on some initial data.
This function returned by "inequalizer" is saved as
function(y) {y[2] < x}
<bytecode: 'code'>
<environment: 'code'>
I want to know if there is some way to save it with the literal argument passed to x. So if those parameters are x=1 caracter="Y" I would get
function(y) {y[2] < 1}
<bytecode: 'code'>
<environment: 'code'>
Maybe store x as attr
inequalizer <- function(x, caracter) {
if(caracter=="X") {
foo = function(y) {y[1] < x}
attr(foo, "x") = x
foo
} else if(caracter=="Y") {
foo = function(y) {y[2] < x}
attr(foo, "x") = x
foo
} else {
foo = function(y) {y[3] < x}
attr(foo, "x") = x
foo
}
}
myf = inequalizer(5, "X")
myf
#function(y) {y[1] < x}
#<environment: 0x000000001c12e2d0>
#attr(,"x")
#[1] 5
Related
I have defined multiple different functions, each containing one equation each, like so:
catalanFormula <- function(n){
return( (factorial(2 * n)) / ((factorial(n + 1)) * factorial(n)) )
}
triangularFormula <- function(n){
return( (n * (n+1)) / 2 )
}
I am then plotting each function using the plot() function from base R. What I want to do is be able to include the equation in my plot as a label or text, but in a way that is easily understandable. I know I can use LaTeX to write each equation manually, but I was wondering if there is a package or method anyone knows of that can deparse a mathematical equation in R and turn it into a standard readable mathematical equation. The only math occurring is factorials, multiplication, division, exponents, addition, and subtraction.
For example, I want to convert
(factorial(2 * n)) / ((factorial(n + 1)) * factorial(n))
into something that looks like this.
Then, be able to use the output within the text() function of base R plotting.
With the Ryacas package you can transform a math expression to its corresponding LaTeX code. But the factorial must be given as !:
library(Ryacas)
eq <- yac_symbol("(2 * n)! / ((n + 1)! * n!)")
tex(eq)
# "\\frac{\\left( 2 n\\right) !}{\\left( n + 1\\right) ! n!}"
Another possibility is to call Python from R and use the pytexit library:
from pytexit import py2tex
py2tex("math.factorial(2 * n) / ((factorial(n + 1)) * factorial(n))")
# $$\frac{\operatorname{factorial}\left(2n\right)}{\operatorname{factorial}\left(n+1\right) \operatorname{factorial}\left(n\right)}$$
You can also take a look at PyLaTeX and lax.
Here's a base R function that will walk the abstract syntax tree to replace the factorial and / functions with the corresponding ?plotmath markup so you can add them to R plots.
returnToPlotmath <- function(fun) {
swap <- function(x) {
if (class(x) %in% c("call","(")) {
x <- as.list(x)
if (as.character(x[[1]])=="/") {
x[[1]] = quote(frac)
x[[2]] = swap(x[[2]])
x[[3]] = swap(x[[3]])
} else if (as.character(x[[1]])=="factorial") {
x[[1]] = quote(`*`)
if (is.call(x[[2]])) {
x[[2]] = as.call(list(quote(`(`), x[[2]]))
} else {
x[[2]] = swap(x[[2]])
}
x[[3]] = "!"
} else if (as.character(x[[1]])=="*") {
if(is.call(x[[2]]) | is.call(x[[3]])) {
x[[1]] = quote(`%.%`)
x[[2]] = swap(x[[2]])
x[[3]] = swap(x[[3]])
}
} else {
x[[2]] = swap(x[[2]])
if (length(x)==3) x[[3]] = swap(x[[3]])
}
return(as.call(x))
} else {
return(x)
}
}
body_exprs <- body(fun)[[2]]
swap(body_exprs[[length(body_exprs)]])
}
This does assume that the return is the last statement in the function body {} block. You can get the expression with
returnToPlotmath(catalanFormula)
# frac(((2 * n) * "!"), (((n + 1) * "!") %.% (n * "!")))
returnToPlotmath(triangularFormula)
# frac((n %.% (n + 1)), 2)
And you can add them to plot titles and such
plot(main=returnToPlotmath(catalanFormula), 1, 1)
plot(main=returnToPlotmath(triangularFormula), 1, 1)
This solution is highly specific to the functions you need to transform. But it could be extended if needed.
The expr2latex() function from simsalapar parses expressions to LaTeX, and can be extended to include the factorial symbol with a minor addition to the code:
expr2latex2 <- function(expr) {
L <- length(expr)
c.BinTable <- simsalapar:::c.BinTable
if(!L) "" else {
Symb <- is.symbol(expr)
F <- if(Symb) expr else expr[[1]]
cF <- simsalapar:::mDeparse(F)
FF <- simsalapar:::renderAtom(F, Len=L, d.a = cF)
if(Symb && L != 1)
stop("is.symbol(.), but length(.) = ", L, " != 1")
else if(!Symb && typeof(expr) != "language" && L != 1)
stop("is not language nor symbol), but length(.) = ", L, " != 1")
switch(L,
## length 1:
FF,
{ ## length 2: e.g. "- 1", "+ x", "!TRUE", "~ ff",
#browser()
rhs <- expr2latex2(expr[[2]])
if (cF == "bold") paste0("\\mathbf{", rhs, "}")
else if(cF == "italic") paste0("\\mathit{", rhs, "}")
else if(cF == "factorial") paste0("(",rhs,")!") #extra case added in for factorial
else if(!simsalapar:::isOp(cF)) # not a binary operator ==> "function call":
paste0(FF,"(",rhs,")") ## e.g. "O(n)"
else if(cF == "{") paste0("{", rhs, "}")
else if(cF == "(") paste0("(", rhs, ")")
else paste(FF, rhs)
},
{ ## length 3:
lhs <- expr2latex2(expr[[2]])
rhs <- expr2latex2(expr[[3]])
if(cF == "[") ## subscript
paste0(lhs, "_{", rhs, "}")
else if(cF == "~") ## space
paste(lhs, "\\", rhs)
## not treated, as plotmath() does neither :
## else if(cF == "[[")
## paste0(lhs, "[[", rhs, "]]")
else if(cF %in% c.BinTable)
paste(lhs, simsalapar:::getTab(cF, simsalapar:::BinTable), rhs)
else if(cF %in% c.RelTable)
paste(lhs, simsalapar:::getTab(cF, simsalapar:::RelTable), rhs)
else if(simsalapar:::isOp(cF)) ## e.g. U + x
paste(lhs, FF, rhs)
else ## log(x, 2)
paste0(FF, "(", lhs, ",", rhs, ")")
},
## length >=4 : F(a, b, c, ...)
stop("length(expr) = ",L," (>= 4); not yet implemented") # TODO MM
)## end{switch}
}
}
original function
expr2latex( quote( (factorial(2 * n)) / ((factorial(n + 1)) * factorial(n)) ) )
#[1] "(factorial(2 n)) / ((factorial(n + 1)) factorial(n))"
revised treatment of factorials
expr2latex2( quote( (factorial(2 * n)) / ((factorial(n + 1)) * factorial(n)) ) )
#[1] "((2 n)!) / (((n + 1)!) (n)!)"
I found myself often writing code such as
#' #param x input vector
#' #param ... passed to [slow_fun()]
fast_fun <- function(x, ...) {
u <- unique(x)
i <- match(x, u)
v <- slow_fun(u, ...)
v[i]
}
To accelerate a slow vectorized "pure" function where each input entry could theoretically be computed individually and where input is expected to contain many duplicates.
Now I wonder whether this is the best way to achieve such a speedup or is there some function (preferrably in base R or the tidyverse) which does something like unique and match at the same time?
Benchmarks so far
Thanks for the provided answers. I've written a small benchmark suite to compare the approaches:
method <- list(
brute = slow_fun,
unique_match = function(x, ...) {
u <- unique(x)
i <- match(x, u)
v <- slow_fun(u, ...)
v[i]
},
unique_factor = function(x, ...) {
if (is.character(x)) {
x <- factor(x)
i <- as.integer(x)
u <- levels(x)
} else {
u <- unique(x)
i <- as.integer(factor(x, levels = u))
}
v <- slow_fun(u, ...)
v[i]
},
unique_match_df = function(x, ...) {
u <- unique(x)
i <- if (is.numeric(x)) {
match(data.frame(t(round(x, 10))), data.frame(t(round(u, 10))))
} else {
match(data.frame(t(x)), data.frame(t(u)))
}
v <- slow_fun(u, ...)
v[i]
},
rcpp_uniquify = function(x, ...) {
iu <- uniquify(x)
v <- slow_fun(iu[["u"]], ...)
v[iu[["i"]]]
}
)
exprs <- lapply(method, function(fun) substitute(fun(x), list(fun = fun)))
settings$bench <- lapply(seq_len(nrow(settings)), function(i) {
cat("\rBenchmark ", i, " / ", nrow(settings), sep = "")
x <- switch(
settings$type[i],
integer = sample.int(
n = settings$n_distinct[i],
size = settings$n_total[i],
replace = TRUE
),
double = sample(
x = runif(n = settings$n_distinct[i]),
size = settings$n_total[i],
replace = TRUE
),
character = sample(
x = stringi::stri_rand_strings(
n = settings$n_distinct[i],
length = 20L
),
size = settings$n_total[i],
replace = TRUE
)
)
microbenchmark::microbenchmark(
list = exprs
)
})
library(tidyverse)
settings %>%
mutate(
bench = map(bench, summary)
) %>%
unnest(bench) %>%
group_by(n_distinct, n_total, type) %>%
mutate(score = median / min(median)) %>%
group_by(expr) %>%
summarise(mean_score = mean(score)) %>%
arrange(mean_score)
Currently, the rcpp-based approach is best in all tested settings on my machine but barely manages to exceed the unique-then-match method.
I suspect a greater advantage in performance the longer x becomes, because unique-then-match needs two passes over the data while uniquify() only needs one pass.
|expr | mean_score|
|:---------------|----------:|
|rcpp_uniquify | 1.018550|
|unique_match | 1.027154|
|unique_factor | 5.024102|
|unique_match_df | 36.613970|
|brute | 45.106015|
Maybe you can try factor + as.integer like below
as.integer(factor(x))
I found a cool, and fast, answer recently,
match(data.frame(t(x)), data.frame(t(y)))
As always, beware when working with floats. I recommend something like
match(data.frame(t(round(x,10))), data.frame(t(round(y))))
in such cases.
I've finally managed to beat unique() and match() using Rcpp to hand-code the algorithm in C++ using a std::unordered_map as core bookkeeping data structure.
Here is the source code, which can be used in R by writing it into a file and running Rcpp::sourceCpp on it.
#include <Rcpp.h>
using namespace Rcpp;
template <int T>
List uniquify_impl(Vector<T> x) {
IntegerVector idxes(x.length());
typedef typename Rcpp::traits::storage_type<T>::type storage_t;
std::unordered_map<storage_t, int> unique_map;
int n_unique = 0;
// 1. Pass through x once
for (int i = 0; i < x.length(); i++) {
storage_t curr = x[i];
int idx = unique_map[curr];
if (idx == 0) {
unique_map[curr] = ++n_unique;
idx = n_unique;
}
idxes[i] = idx;
}
// 2. Sort unique_map by its key
Vector<T> uniques(unique_map.size());
for (auto &pair : unique_map) {
uniques[pair.second - 1] = pair.first;
}
return List::create(
_["u"] = uniques,
_["i"] = idxes
);
}
// [[Rcpp::export]]
List uniquify(RObject x) {
switch (TYPEOF(x)) {
case INTSXP: {
return uniquify_impl(as<IntegerVector>(x));
}
case REALSXP: {
return uniquify_impl(as<NumericVector>(x));
}
case STRSXP: {
return uniquify_impl(as<CharacterVector>(x));
}
default: {
warning(
"Invalid SEXPTYPE %d (%s).\n",
TYPEOF(x), type2name(x)
);
return R_NilValue;
}
}
}
I am trying to write a very simple function wrapper in R, that will accept f and return g where g returns zero whenever the first argument is negative. I have the following code
wrapper <- function(f) {
function(x, ...) {
if( x <= 0 ) { 0 }
else { f(x, ...) }
}
}
Thge wrapper works as expected, but is there are way to maintain the function signature
> wdnorm <- wrapper(dnorm)
> args(dnorm)
function (x, mean = 0, sd = 1, log = FALSE)
NULL
> args(wdnorm)
function (x, ...)
NULL
I would like to do something like this (but obviously it doesn't work)
args(g) <- args(f)
is this possible in R?
Here is what you want. Tho, do you really need this?
wrapper <- function(f) {
f2 = function(x) {
if (x <= 0) { 0 }
else { do.call(f, as.list( match.call())[-1]) }
}
formals(f2) = formals(f)
f2
}
wdnorm <- wrapper(dnorm)
args(dnorm)
args(wdnorm)
wdnorm(-5)
wdnorm(5)
output
> args(dnorm)
function (x, mean = 0, sd = 1, log = FALSE)
NULL
> args(wdnorm)
function (x, mean = 0, sd = 1, log = FALSE)
NULL
> wdnorm(-5)
[1] 0
> wdnorm(5)
[1] 1.48672e-06
>#R version 3.1.1 (2014-07-10) -- "Sock it to Me"
> library(pryr)
> f1 <- function() { cat("hi1\n"); 1 }
> f2 <- function() { cat("hi2\n"); 2 }
This is what I would like to have but with e = globalenv:
> e <- new.env()
> delayedAssign("x", f1(), assign.env = e)
> substitute(x, e)
f1()
If I try substitute it does not work (according to doc, there is no substitute in globalenv):
> delayedAssign("x", f1(), assign.env = globalenv())
> substitute(x, globalenv())
x
subs from pryr doesn not work either because it evaluates the promise:
> subs(x, globalenv())
hi1
[1] 1
in fact it evaluates all promises, because it converts globalenv to a list:
> delayedAssign("x", f1(), assign.env = globalenv())
> delayedAssign("y", f2(), assign.env = globalenv())
> subs(x, globalenv())
hi1
hi2
[1] 1
>
To sum up: Is it possible to have something like substitute that works with promises in all environments including globalenv?
UPDATE
I could not figure out how to do this within R, so I wrote (more precisely lifted from the R code) the following C func
SEXP get_no_eval(SEXP symbol, SEXP envir) {
SEXP t;
if (!Rf_isString(symbol) || Rf_length(symbol)!=1)
Rf_error("symbol is not a string");
if (!Rf_isEnvironment(envir))
Rf_error("envir is not an environment");
if (envir==R_NilValue)
Rf_error("envir is NULL");
t = Rf_findVar(Rf_install(CHAR(STRING_ELT(symbol, 0))), envir);
if (t != R_UnboundValue) {
if (TYPEOF(t) == PROMSXP) {
do {
t = PREXPR(t);
} while(TYPEOF(t) == PROMSXP);
if (NAMED(t) < 2) SET_NAMED(t, 2);
return t;
}
else if (TYPEOF(t) == DOTSXP)
Rf_error("'...' used in an incorrect context");
return t;
}
Rf_error("symbol not found");
return R_NilValue;
}
Now, continuing my example from above I can do:
> delayedAssign("x", f1(), assign.env = globalenv())
> get_no_eval("x", globalenv())
f1()
Not exactly substitute but works for what I needed.
Using:
mean (x, trim=0.05)
Removes 2.5% from each side of the distribution, which is fine for symmetrical two-tailed data. But if I have one tailed or highly asymmetric data I would like to be able to remove just one side of the distribution. Is there a function for this or do I have write myself a new one? If so, how?
Just create a modified mean.default. First look at mean.default:
mean.default
Then modify it to accept a new argument:
mean.default <-
function (x, trim = 0, na.rm = FALSE, ..., side="both")
{
if (!is.numeric(x) && !is.complex(x) && !is.logical(x)) {
warning("argument is not numeric or logical: returning NA")
return(NA_real_)
}
if (na.rm)
x <- x[!is.na(x)]
if (!is.numeric(trim) || length(trim) != 1L)
stop("'trim' must be numeric of length one")
n <- length(x)
if (trim > 0 && n) {
if (is.complex(x))
stop("trimmed means are not defined for complex data")
if (any(is.na(x)))
return(NA_real_)
if (trim >= 0.5)
return(stats::median(x, na.rm = FALSE))
lo <- if( side=="both" || side=="right" ){ floor(n * trim) + 1 }else{1}
hi <- if( side=="both" || side=="left" ){ n + 1 - (floor(n * trim) + 1 ) }else{ n}
x <- sort.int(x, partial = unique(c(lo, hi)))[lo:hi]
cat(c(length(x), lo , hi) )
}
.Internal(mean(x))
}
I don't know of a function. Something like the following would trim off the upper tail of the distribution before taking the mean.
upper.trim.mean <- function(x,trim) {
x <- sort(x)
mean(x[1:floor(length(x)*(1-trim))])
}
This should account for either side, or both sides for trimming.
trim.side.mean <- function(x, trim, type="both"){
if (type == "both") {
mean(x,trim)}
else if (type == "right") {
x <- sort(x)
mean(x[1:floor(length(x)*(1-trim))])}
else if (type == "left"){
x <- sort(x)
mean(x[max(1,floor(length(x)*trim)):length(x)])}}
one.sided.trim.mean <- function(x, trim, upper=T) {
if(upper) trim = 1-trim
data <- mean(x[x<quantile(x, trim)])
}
I found that all the answers posted do not match when checked manually. So I created one of my own. Its long but simple enough to understand
get_trim <- function(x,trim,type)
{
x <- sort(x)
ans<-0
if (type=="both")
{
for (i in (trim+1):(length(x)-trim))
{
ans=ans+x[i];
}
print(ans/(length(x)-(2*trim)))
}
else if(type=="left")
{
for (i in (trim+1):(length(x)))
{
ans=ans+x[i];
}
print(ans/(length(x)-trim))
}
else if (type=="right")
{
for (i in 1:(length(x)-trim))
{
ans=ans+x[i];
}
print(ans/(length(x)-trim))
}
}