r how to keep print method for custom class - r

i have defined a method for printing a vector with the class test:
print.test <- function(x, ...) {
x <- formatC(
as.numeric(x),
format = "f",
big.mark = ".",
decimal.mark = ",",
digits = 1
)
x[x == "NA"] <- "-"
x[x == "NaN"] <- "-"
print.default(x)
}
which works fine for the following
a <- c(1000.11, 2000.22, 3000.33)
class(a) <- c("test", class(a))
print(a)
[1] "1.000,11" "2.000,22" "3.000,33"
this also works:
round(a)
[1] "1.000,0" "2.000,0" "3.000,0"
this does not:
median(a)
[1] 2000.22
class(median(a))
[1] "numeric"
now my question is: do i need to write a custom method for this class to use median e.g. and if so what would it look like or is there another way (as i simply would like this class to print the data in a certain format)?

The problem is that median.default returns an object of class numeric therefore autoprinting of the returned object does not call your custom print method.
The following will do so.
median.test <- function(x, na.rm = FALSE, ...){
y <- NextMethod(x, na.rm = na.rm, ...)
class(y) <- c("test", class(y))
y
}
median(a)
#[1] "2.000,2"
As for the handling of NA values, I will first define another method for a base R function. It is not strictly needed but save some code lines if objects of class test are used frequently.
c.test <- function(x, ...){
y <- NextMethod(x, ...)
class(y) <- c("test", class(y))
y
}
b <- c(a, NA)
class(b)
#[1] "test" "numeric"
median(b)
#[1] "-"
median(b, na.rm = TRUE)
#[1] "2.000,2"
EDIT.
The following defines a generic function wMedian, a default method and a method for objects of class "currency", as requested by the OP in a comment.
Note that there must be a method print.currency, which I don't redefine since it's exactly the same as print.test above. As for the other methods, I have made them simpler with the help of a new function, as.currency.
median.currency <- function(x, na.rm = FALSE, ...){
y <- NextMethod(x, na.rm = na.rm, ...)
as.currency(y)
}
c.currency <- function(x, ...){
y <- NextMethod(x, ...)
as.currency(y)
}
as.currency <- function(x){
class(x) <- c("currency", class(x))
x
}
wMedian <- function(x, ...) UseMethod("wMedian")
wMedian.default <- function(x, ...){
matrixStats::weightedMedian(x, ...)
}
wMedian.currency <- function(x, w = NULL, idxs = NULL, na.rm = FALSE, interpolate = is.null(ties), ties = NULL, ...) {
y <- NextMethod(x, w = w, idxs = idxs, na.rm = na.rm, interpolate = interpolate, ties = ties, ... )
as.currency(y)
}
set.seed(1)
x <- rnorm(10)
wMedian(x, w = (1:10)/10)
#[1] 0.4084684
wMedian(as.currency(x), w = (1:10)/10)
#[1] "0,4"

Related

Can't convert from <double> to <integer> due to loss of precision

Tried running this code and I am getting this error message:
"Must subset columns with a valid subscript vector. Can't convert from double to integer due to loss of precision." Could someone help either fix or convert so it recognizes the dataframe columns appropriately
data1 <- wins.df(data1, data1$q, wins.limits = c(.01, .99), append.wins.label = FALSE, verbose = TRUE)
Here is the function:
wins.df <-
function(X,
var,
wins.limits = c(.01, .99),
append.wins.label = TRUE,
verbose = TRUE) {
Y <- X
x <- X[, var]
x.w <- wins(x, wins.limits)
var.w <- var
if (append.wins.label)
var.w <- paste(var, ".w", sep = "")
Y[, var.w] <- x.w
if (verbose) {
print(summary(Y[, var.w])) print(summary(X[, var]))
}
return(Y)
}

R-caret-plyr : how to modify downSample function to create sampled data of different proportions

Below is the downSample function of caret that I found here .
downSample <- function(x, y, list = FALSE, yname = "Class")
{
xc <- class(x)
if(!is.data.frame(x)) x <- as.data.frame(x)
if(!is.factor(y))
{
warning("Down-sampling requires a factor variable as the response. The original data was returned.")
return(list(x = x, y = y))
}
minClass <- min(table(y))
x$.outcome <- y
x <- ddply(x, .(y),
function(dat, n) dat[sample(seq(along = dat$.outcome), n),,drop = FALSE],
n = minClass)
y <- x$.outcome
x <- x[, !(colnames(x) %in% c("y", ".outcome")), drop = FALSE]
if(list)
{
if(xc[1] == "matrix") x <- as.matrix(x)
out <- list(x = x, y = y)
} else {
out <- cbind(x, y)
colnames(out)[ncol(out)] <- yname
}
out
}
suppose that my data set is iris :
data(iris)
x <- iris[, -5]
y <- iris[, 5]
to make the response variable a hugely unbalanced binary one :
y[-c(130, 146)] <- "setosa"
There are now therefore two instances of "virginica" and 148 instances of "setosa". I would like to modify the function downSample so that, in the end, instead of returning a subsampled data set with 50% of minClass, it returns a subsampled data set with for instance 30% (k) of minor class and 70% of major class. Because using the downSample function for n instances in the minClass it selects n instances of the other class to get a fully balanced data set. But in my case I loose a lot of data so I just want to balance it a bit not fully.
Let's suppose that k = 20 % i.e. in the end I want 20% of minClaas and 80% of the other class. I have already tried to modify this part of function :
x <- ddply(x, .(y), function(dat, n)
dat[sample(seq(along = dat$.outcome), n),, drop = FALSE], n = minClass)
by changing n to 4*n but I did not achieve it. There is this error :
Error in size <= n/2 :
comparison (4) is possible only for atomic and list types
Your help would be appreciated.
A simple way to perform this is to change the n = minClass part of the ddply call.
downSample_custom <- function(x, y, list = FALSE, yname = "Class", frac = 1){ #add argument frac which is in the 0 - 1 range
xc <- class(x)
if(!is.data.frame(x)) x <- as.data.frame(x)
if(!is.factor(y))
{
warning("Down-sampling requires a factor variable as the response. The original data was returned.")
return(list(x = x, y = y))
}
minClass <- min(table(y))
x$.outcome <- y
x <- ddply(x, .(y),
function(dat, n) dat[sample(seq(along = dat$.outcome), n),,drop = FALSE],
n = minClass*frac) #change the n to this
y <- x$.outcome
x <- x[, !(colnames(x) %in% c("y", ".outcome")), drop = FALSE]
if(list)
{
if(xc[1] == "matrix") x <- as.matrix(x)
out <- list(x = x, y = y)
} else {
out <- cbind(x, y)
colnames(out)[ncol(out)] <- yname
}
out
}
Does it work:
library(plyr)
imbalanced y:
set.seed(1)
y <- as.factor(sample(c("M", "F"),
prob = c(0.1, 0.9),
size = 10000,
replace = TRUE))
x <- rnorm(10000)
table(downSample_custom(x, y)[,2])
output:
F M
1044 1044
table(downSample_custom(x, y, frac = 0.5)[,2])
output:
F M
522 522
table(downSample_custom(x, y, frac = 0.2)[,2])
output
F M
208 208
using frac > 1 returns an error:
downSample_custom(x, y, frac = 2)
output
Error in sample.int(length(x), size, replace, prob) :
cannot take a sample larger than the population when 'replace = FALSE'
EDIT: answer to the updated question.
This can be achieved for instance by sampling the indexes of each class separately. Here is an example that works only for two class problems:
downSample_custom <- function(x, y, yname = "Class", frac = 1){
lev <- levels(y)
minClass <- min(table(y))
lev_min <- levels(y)[which.min(table(y))]
inds_down <- sample(which(y == lev[lev != lev_min]), size = minClass * frac) #sample the indexes of the more abundant class according to minClass * frac
inds_minClass <- which(y == lev[lev == lev_min]) #take all the indexes of the lesser abundant class
out <- data.frame(x, y)
out <- out[sort(c(inds_down, inds_minClass)),]
colnames(out)[ncol(out)] <- yname
return(out)
}
how it looks in practice:
table(downSample_custom(x, y)[,2])
output:
F M
1044 1044
table(downSample_custom(x, y, frac = 5)[,2])
output:
F M
5220 1044
head(downSample_custom(x, y, frac = 5))
output:
x Class
1 -1.5163733 F
2 0.6291412 F
4 1.1797811 M
5 1.1176545 F
6 -1.2377359 F
7 -1.2301645 M

Remove an argument / element from ellipsis

I want to drop an element from an ellipsis because I want to set it manaully inside my function. The example below illustrates the point as the error is sensible given I'm passing na.rm in manually and via ellipsis. How can I remove na.rm from the ellipsis so that the error isn't thrown?
mymean <- function(x, ...){
dots <- list(...)
if (is.null(dots$na.rm)) {
na_arg <- TRUE
} else {
na_arg <- dots$na.rm
}
mean(x, na.rm = na_arg, ...)
}
set.seed(10); a <- c(rnorm(20), NA)
mymean(a)
## [1] -0.06053267
mymean(a, trim = .5)
## [1] -0.08275319
mymean(a, na.rm = TRUE, trim = .1)
## Error in mean.default(x, na.rm = na_arg, ...) :
## formal argument "na.rm" matched by multiple actual arguments
1) lm Look at the source code of lm. Following that:
mymean1 <- function(x, ...){
mc <- match.call()
mc[[1]] <- as.name("mean")
if (is.null(mc$na.rm)) mc$na.rm <- TRUE
eval(mc, parent.frame())
}
mymean1(a, na.rm = TRUE, trim = .1)
## [1] -0.01886715
mymean1(a, trim = .1)
## [1] -0.01886715
2) do.call This would also work although there is a subtle difference, namely ... gets evaluated. In this and most cases it won't matter but it might in some less usual cases.
mymean2 <- function(x, ...) {
dots <- list(...)
if (! "na.rm" %in% names(dots)) dots$na.rm <- TRUE
do.call("mean", c(list(x), dots))
}
mymean2(a, na.rm = TRUE, trim = .1)
## [1] -0.01886715
mymean2(a, trim = .1)
## [1] -0.01886715
3) ordinary arg However, it would be easier to just write:
mymean3 <- function(x, na.rm = TRUE, ...) mean(x, na.rm = na.rm, ...)
mymean3(a, na.rm = TRUE, trim = .1)
## [1] -0.01886715
mymean3(a, trim = .1)
## [1] -0.01886715
This is the usual way wrappers with different defaults are written. For example see the read.csv source code.

Running aggregate function within dmapply (ddR package)

I would like to run the aggregate function within the dmapply function as offered through the ddR package.
Desired results
The desired results reflect a simple output generated via aggregate in base:
aggregate(
x = mtcars$mpg,
FUN = function(x) {
mean(x, na.rm = TRUE)
},
by = list(trans = mtcars$am)
)
which produces:
trans x
1 0 17.14737
2 1 24.39231
Attempt - ddmapply
I would like to arrive at the same results while utilising ddmapply, as attempted below:
# ddR
require(ddR)
# ddR object creation
distMtcars <- as.dframe(mtcars)
# Aggregate / ddmapply
dmapply(
FUN = function(x, y) {
aggregate(FUN = mean(x, na.rm = TRUE),
x = x,
by = list(trans = y))
},
distMtcars$mpg,
y = distMtcars$am,
output.type = "dframe",
combine = "rbind"
)
The code fails:
Error in match.fun(FUN) : 'mean(x, na.rm = TRUE)' is not a
function, character or symbol Called from: match.fun(FUN)
Updates
Fixing error pointed out by #Mike removes the error, however, does not produce the desired result. The code:
# Avoid namespace conflict with other packages
ddR::collect(
dmapply(
FUN = function(x, y) {
aggregate(
FUN = function(x) {
mean(x, na.rm = TRUE)
},
x = x,
by = list(trans = y)
)
},
distMtcars$mpg,
y = distMtcars$am,
output.type = "dframe",
combine = "rbind"
)
)
yields:
[1] trans x
<0 rows> (or 0-length row.names)
It works fine for me if you change your aggregate function to be consistent with the one you call earlier: FUN = function(x) mean(x, na.rm = T). The reason it can't find mean(x, na.rm = T) is because it isn't a function (it's a function call), rather mean is a function.
Also it will give you NA results unless you change your x = distMtcars$mpg to x = collect(distMtcars)$mpg. Same for y. With all that said, I think this should work for you:
res <-dmapply(
FUN = function(x, y) {
aggregate(FUN = function(x) mean(x, na.rm = TRUE),
x = x,
by = list(trans = y))
},
x = list(collect(distMtcars)$mpg),
y = list(collect(distMtcars)$am),
output.type = "dframe",
combine = "rbind"
)
Then you can do collect(res) to see the result.
collect(res)
# trans x
#1 0 17.14737
#2 1 24.39231

Clean, simple function factories in R

Short example. I am exploring the behavior of a function by testing it with different "specs", f(spec). I wrote down one spec by hand, spec1, and am creating new specs as variations on it. To do this, I decided to write a function:
spec1 = list(fy = list(a = 1), fx = list(f1 = function(x) 10-x, f2 = function(x) 2-x))
make_spec = function(f = function(x) 10-x, xtheta = 2)
list(fy = list(a = 1), fx = list(f1 = f, f2 = function(x) xtheta-x))
res1 = make_spec()
# first problem: they don't match
all.equal(res1,spec1)
# [1] "Component “fx”: Component “f2”: target, current do not match when deparsed"
# ^ this happens, even though...
res1$fx$f2(4) == spec1$fx$f2(4)
# TRUE
# second problem: res1 is fugly
res1
# $fy
# $fy$a
# [1] 1
#
#
# $fx
# $fx$f1
# function (x)
# 10 - x
# <environment: 0x000000000f8f2e20>
#
# $fx$f2
# function (x)
# xtheta - x
# <environment: 0x000000000f8f2e20>
str(res1)
# even worse
My goals for make_spec are...
all.equal(spec1, res1) and/or identical(spec1, res1)
for str(res1) to be human-readable (no <environment: ptr> tags or srcfilecopy)
to avoid substitute and eval altogether if possible (not a high priority)
to avoid writing out the second arg of substitute (see "full" example below)
Is there an idiomatic way to achieve some or all of these goals?
Full example. I'm not sure if the example above fully covers my use case, so here's the latter:
spec0 = list(
v_dist = list(
pdf = function(x) 1,
cdf = function(x) x,
q = function(x) x,
supp = c(0,1)
)
,
ucondv_dist = {
ucondv_dist = list()
ucondv_dist$condmean = function(v) 10-v
ucondv_dist$pdf = function(u,v) dnorm(u, ucondv_dist$condmean(v), 50)
ucondv_dist$cdf = function(u,v) pnorm(u, ucondv_dist$condmean(v), 50)
ucondv_dist
}
)
make_spec = function(ycondx_condmean = function(x) 10-x, ycondx_sd = 50){
s = substitute(list(
x_dist = list(
pdf = function(x) 1,
cdf = function(x) x,
q = function(x) x,
supp = c(0,1)
)
,
ycondx_dist = {
ycondx_dist = list()
ycondx_dist$condmean = ycondx_condmean
ycondx_dist$pdf = function(u,v) dnorm(u, ycondx_dist$condmean(v), ycondx_sd)
ycondx_dist$cdf = function(u,v) pnorm(u, ycondx_dist$condmean(v), ycondx_sd)
ycondx_dist
}
)
, list(ycondx_condmean=ycondx_condmean, ycondx_sd = ycondx_sd))
eval(s, .GlobalEnv)
}
res0 = make_spec()
Side note. I don't know if "function factory" is the right term here, since I am not a computer scientist, but it seems related. I found only a paragraph on the concept related to R.
The enclosing environments of the functions are different leading to the difference in output/difference in deparsing. So, there are two things to do to get the desired output:
make the environments the same
substitute the variables from the enclosing environments into the function bodies.
However, doing it this way you get a double dose of the eval/substitute you didn't want, so maybe there would be an alternative.
make_spec <- function(f = function(x) 10-x, xtheta = 2) {
e <- parent.frame()
fixClosure <- function(func)
eval(eval(substitute(substitute(func)), parent.frame()), e)
list(fy = list(a = 1), fx = list(
f1 = fixClosure(f),
f2 = fixClosure(function(x) xtheta-x)
))
}
spec1 <- list(fy = list(a = 1), fx = list(f1 = function(x) 10-x, f2 = function(x) 2-x))
res1 <- make_spec()
all.equal(res1, spec1)
[1] TRUE

Resources