I tried to run this code (with no success):
for (i in chanel_code) {
assign(paste("prospect_",i,sep="", collapse = NULL,recycle0 = FALSE),(aggregate(na_adss_score ~ month_year + na_appl_status, paste("new_account_",i, sep="",collapse = NULL, recycle0 = FALSE),mean) %>%
mutate(aggregate(na_pcn_no ~ month_year + na_appl_status, paste("new_account_",i, sep="",collapse = NULL, recycle0 = FALSE), length))))
}
Error in eval(predvars, data, env) :
argument 'envir' incorrect de type 'character'
would you mind please helping me.
thanks in advance
You don't need assign() or all the arguments for paste(). Instead, subset your data with i and use paste0().
Assuming your dataset is called new_account_BA and new_account_BA$chanel_codes contains chanel_codes:
chanel_code=c("BA","CS","DM","DS","EN","IA","MG","PS","TM")
results <- list()
for (i in chanel_code) {
x <- new_account_BA[new_account_BA$chanel_code == paste0("prospect_",i),]
results[[chanel_code]] <- cbind(
aggregate(na_adss_score ~ month_year + na_appl_status, x = x, mean),
aggregate(na_pcn_no ~ month_year + na_appl_status, x = x, length))
}
Related
Tried running this code and I am getting this error message:
"Must subset columns with a valid subscript vector. Can't convert from double to integer due to loss of precision." Could someone help either fix or convert so it recognizes the dataframe columns appropriately
data1 <- wins.df(data1, data1$q, wins.limits = c(.01, .99), append.wins.label = FALSE, verbose = TRUE)
Here is the function:
wins.df <-
function(X,
var,
wins.limits = c(.01, .99),
append.wins.label = TRUE,
verbose = TRUE) {
Y <- X
x <- X[, var]
x.w <- wins(x, wins.limits)
var.w <- var
if (append.wins.label)
var.w <- paste(var, ".w", sep = "")
Y[, var.w] <- x.w
if (verbose) {
print(summary(Y[, var.w])) print(summary(X[, var]))
}
return(Y)
}
Let me preface this by saying that there are similar questions to mine on Stackoverflow, but I have not seen them answered to my satisfaction, and the answers that were given don't help me with the problem I'm having. Also this is a long question but I have tried to make each part simple and easy to understand.
Here is a proof of concept that you can assign formulas to variables in the global environment, and pass the formula variable to the lm function and use predict to make predictions. I do it in several ways to be thorough:
fake_data_1 <- data.frame(
ecks = c(-19:20,-19:20,-19:20),
why = c((-19:20)^2, (-19:20)^3/40, abs(-19:20))
)
fake_data_2 <- data.frame(
ecks =runif(22)
)
#using basic formula
formula_used <- why ~ ecks
lm_model <- lm(formula = formula_used, data = fake_data_1)
predict(lm_model, newdata = fake_data_2)
#converting string to formula
formula_used <- as.formula("why ~ ecks")
lm_model <- lm(formula = formula_used, data = fake_data_1)
predict(lm_model, newdata = fake_data_2)
#can use a basic string as well
formula_used <- "why ~ ecks"
lm_model <- lm(formula = formula_used, data = fake_data_1)
predict(lm_model, newdata = fake_data_2)
Here is proof of concept that it is possible to perform these processes inside of functions:
#can run this as a function
make_prediction <- function(data_in,y_var,x_var,new_data){
formula_used <- as.formula(paste(y_var, x_var, sep = " ~ "))
lm_model <- lm(formula = formula_used,data = data_in)
predict(lm_model, newdata = data_in)
}
make_prediction(data_in = fake_data_1, y_var = "why", x_var = "ecks", new_data = fake_data_2)
#can explicitly set the environment of the formula: will make sense why I show this later
make_prediction_2 <- function(data_in,y_var,x_var,new_data){
local_env = environment()
formula_used <- as.formula(paste(y_var, x_var, sep = " ~ "),env = local_env)
lm_model <- lm(formula = formula_used,data = data_in)
predict(lm_model, newdata = new_data)
}
make_prediction_2(data_in = fake_data_1, y_var = "why", x_var = "ecks",new_data = fake_data_2)
As I say in the comment, it will make sense why I try the explicit assignment of environment later.
Now I am trying to use the lme function from the nlme package to make predictions. As an aside I don't understand the statistics of this function, am just using it based off of code that someone else in my lab wrote.
Here is proof of concept that you can use this function to make predictions with a formula assigned to a variable (not dealing with formula called "random" for now:
library(nlme)
#fake data for making model
fake_data_complicated_1 <- data.frame(ecks = c(-19:20,-19:20,-19:20),
why = c((-19:20)^3, (-19:20)^4/40, abs(-19:20)*100),
treatment = c(rep("a",times = 40),
rep("b", times = 40),
rep("control", times = 40)),
ID = c(rep(c("q","w","e","r"),times = 10),
rep(c("t","y","u","i"),times = 10),
rep(c("h","j","k","l"),times = 10))
)
#fake data for making prediction
fake_data_complicated_2 <- data.frame(ecks = runif(120),
treatment = c(rep("a",times = 40),
rep("b", times = 40),
rep("control", times = 40)),
ID = c(rep(c("q","w","e","r"),times = 10),
rep(c("t","y","u","i"),times = 10),
rep(c("h","j","k","l"),times = 10))
)
Can do it with a basic formula:
#can use basic formula as before
fixed_formula <- why ~ ecks * treatment
random_formula <- ~1|ID #not sure what this does in the model but that's not importante
lme_model <- lme(fixed = fixed_formula,
random = random_formula,
data = fake_data_complicated_1)
predict(lme_model, newdata = fake_data_complicated_2)
Can convert string to formula:
#can use a pasted/converted formula as before
fixed_formula <- as.formula(
paste("why", paste("ecks", "treatment", sep = " * "), sep = " ~ ")
)
lme_model <- lme(fixed = fixed_formula,
random = random_formula,
data = fake_data_complicated_1)
predict(lme_model, newdata = fake_data_complicated_2)
As another aside the lme function won't take a raw string, but that's not my main problem:
#can't use a raw string, this code generates an error
# fixed_formula <- paste("why", paste("ecks", "treatment", sep = " * "), sep = " ~ ")
#
#
# lme_model <- lme(fixed = fixed_formula,
# random = random_formula,
# data = fake_data_complicated_1)
#
#
# predict(lme_model, newdata = fake_data_complicated_2)
Here is the problem: when I try to put this lme code into a function I get an object 'xxxxx' not found error:
#this function does not work!
make_prediction_nlm <- function(data_in,y_var,x_var,treatment_var ,id_var,new_data){
formula_used_nlm <- as.formula(paste(y_var, paste(x_var, treatment_var, sep = " * "), sep = " ~ "))
random_used <- as.formula(paste("~1|",id_var,sep = ""))
lme_model <- lme(fixed = formula_used_nlm,
random = random_used,
data = data_in)
predict(lme_model, newdata = new_data)
}
make_prediction_nlm(data_in = fake_data_complicated_1,
y_var = "why",
x_var = "ecks",
treatment_var = "treatment",
id_var = "ID",
new_data = fake_data_complicated_1)
Specifically the error is Error in eval(mCall$fixed) : object 'formula_used_nlm' not found
An answer here: Object not found error when passing model formula to another function suggests that as I did above, I explicitly set the environment of the formula in the function. I tried that and it did not work, generating the same error:
#neither does this one!
make_prediction_2 <- function(data_in,y_var,x_var,treatment_var ,id_var){
local_env = environment()
formula_used_nlm <- as.formula(paste(y_var, paste(x_var, treatment_var, sep = " * "), sep = " ~ "),
env = local_env)
random_used <- as.formula(paste("~1|",id_var,sep = ""), env = local_env)
lme_model <- lme(fixed = formula_used_nlm,
random = random_used,
data = data_in)
predict(lme_model, newdata = data_in)
}
make_prediction_2(data_in = fake_data_complicated_1,
y_var = "why",
x_var = "ecks",
treatment_var = "treatment",
id_var = "ID")
I could perhaps getting around this by using a macro instead of a function, but that's not something I want to wade into if I can help it, if it would even work. For now I will just be copying and pasting code rather than writing a function. Thanks to those of you who read through this.
For some reason the lme function expects a literal formula to be in the call. It does not expect to see a variable there. It uses nonstandard evaluation to try to separate the response from the fixed effect terms. In this case, it really doesn't have to do with the environment of the formula.
The easiest way around this would be to inject the formulas into the call with do.call. This should work
make_prediction_nlm <- function(data_in,y_var,x_var,treatment_var ,id_var,new_data){
formula_used_nlm <- as.formula(paste(y_var, paste(x_var, treatment_var, sep = " * "), sep = " ~ "))
random_used <- as.formula(paste("~1|",id_var,sep = ""))
lme_model <- do.call("lme", list(fixed = formula_used_nlm,
random = random_used,
data = quote(data_in)))
predict(lme_model, newdata = new_data)
}
This only really affects the predict function when you pass newdata= because it goes back to see what the original call was.
If you look at nlme:::predict.lme (a hidden function in the nlme package namespace) you'll note this line:
fixed <- eval(eval(mCall$fixed)[-2])
The function is trying to extract the fixed component, remove the left-hand side (which is what the [-2] does), and then re-evaluate.
#MrFlick's solution works, and may be more principled than the one I found, which is to insert the line
lme_model$call$fixed <- formula_used_nlm
right before the predict() call in your function. This explicitly replaces the symbol with the evaluated value ...
It's also possible if the fixed <- eval(...) line were replaced with an appropriate variation of eval() that worked in the parent frame or the environment of the formula or ...
I fitted a mixed model of the following form:
global.mod <- lmer(log(y) ~
x1 + x2 + x3 + x4 + x5 + (1 + x1|a/b),
REML = FALSE, data = lmerDat,
na.action = 'na.fail', control = lmerControl(optimizer="bobyqa",optCtrl=list(maxfun=2e4)))
I then use predge to create combinations of model below which works fine.
require(parallel) || require(snow)
clusterType <- if(length(find.package("snow", quiet = TRUE))) "SOCK" else "PSOCK"
clust <- try(makeCluster(getOption("cl.cores", 4), type = clusterType))
clusterEvalQ(clust, library(lme4))
clusterExport(clust, "lmerDat")
model.set <- pdredge(global.mod, clust,
m.lim = c(2, NA), rank = AIC, extra = "adjR^2", trace = 2)
I then tried to specify my models in a slightly different format as below:
PredictorVariables <- names(lmerDat)[c(5:9)] # this is x1 till x5
fixed.part <- paste("log(y) ~", paste(PredictorVariables, collapse=" + "))
random.part <- paste('(1 + x1|a/b)')
Formula <- formula(paste(fixed.part, random.part, sep = " + "))
global.mod <- lmer(Formula, data = lmerDat, na.action = 'na.fail', control =
lmerControl(optimizer="bobyqa",optCtrl=list(maxfun=2e4)), REML = FALSE)
require(parallel) || require(snow)
clusterType <- if(length(find.package("snow", quiet = TRUE))) "SOCK" else "PSOCK"
clust <- try(makeCluster(getOption("cl.cores", 4), type = clusterType))
clusterEvalQ(clust, library(lme4))
clusterExport(clust, "lmerDat")
model.set <- pdredge(global.mod, clust,
m.lim = c(2, NA), rank = AIC, extra = "adjR^2", trace = 2)
However, this gives me the follow error
Error in sprintf(gettext(fmt, domain = domain), ...) :
invalid format '%d'; use format %f, %e, %g or %a for numeric objects
Unfortunately the latter is how I want to do the model specification since apriori I do not know what will be the names of the predictors in names(lmerDat)[c(5:9)]. Could anyone help me understand the error and how to resolve it?
EDIT
my traceback output is as follows (please note that this is on my original data instead of the dummy data above).
7: sprintf(gettext(fmt, domain = domain), ...)
6: gettextf(Message, ..., domain = domain)
5: structure(list(message = as.character(message), call = call),
class = class)
4: simpleError(gettextf(Message, ..., domain = domain), Call)
3: stop(simpleError(gettextf(Message, ..., domain = domain), Call))
2: cry(, "number of non-fixed predictors [%d] exceeds the allowed maximum of %d (with %d variants)",
nov, novMax, nVariants)
1: pdredge(global.mod, clust)
I am reviewing code that was given to me as part of a homework. I am familiar with functions in R, but this person is using !! before the 'y' argument inside the function, which I had never seen before. I am wondering what is the function of !!.
I have tried google and the help function in R studio.
my_table <- function(df = NULL, y = NULL, grp = NULL,
grpname = " ", dgts = 1, tbltitle = " ") {
y <- enquo(y)
grp <- enquo(grp)
mysummary <- df %>% group_by(!!grp) %>% summarize(
n = sum(!is.na(!!y)),
mean = format(round(mean(!!y, na.rm=T), dgts), nsmall=dgts),
sd = format(round(sd(!!y, na.rm=T), dgts), nsmall=dgts),
se = format(round(sd(!!y, na.rm=T)/sqrt(sum(!is.na(!!y))), dgts),
nsmall=dgts),
kable(mysummary)
}
This works.
ok <- function(data)
{
lattice:::bwplot(size ~ mxPH, data=data)
}
> ok(algae)
## no error
I'd like to specify size and mxPH by its column numbers. However, the following didn't work.
1st attempt
aa <- function(data, n1, n2)
{
names <- names(data)
lattice:::bwplot(names[n1] ~ names[n2], data=data)
}
> aa(algae,2,4)
Warning message:
In function (x, y, box.ratio = 1, box.width = box.ratio/(1 + box.ratio), :
NAs introduced by coercion
2nd Attempt (small modification to the 1st one)
bb <- function(data, n1, n2)
{
names <- factor(names(data))
lattice:::bwplot(names[n1] ~ names[n2], data=data)
}
> bb(algae,2,4)
## no error
3rd Attempt (small modification to the 2nd one)
cc <- function(data, n1, n2)
{
names <- factor(names(data))
lattice:::bwplot(deparse(substitute(names[n1])) ~ deparse(substitute(names[n2])), data=data)
}
> cc(algae,2,4)
Warning message:
In function (x, y, box.ratio = 1, box.width = box.ratio/(1 + box.ratio), :
NAs introduced by coercion
Data
algae <- utils:::read.table('http://www.liaad.up.pt/~ltorgo/DataMiningWithR/DataSets/Analysis.txt',
header=F,
dec='.',
col.names=c('season','size','speed','mxPH','mnO2','Cl','NO3','NH4','oPO4','PO4','Chla','a1','a2','a3','a4','a5','a6','a7'),
na.strings=c('XXXXXXX'))
library(lattice)
bwplot.column <- function(data, n1, n2, xlab = NULL, ylab = NULL, ...)
{
if (is.null(ylab)) {
ylab <- names(data)[n1]
}
if (is.null(xlab)) {
xlab <- names(data)[n2]
}
bwplot(data[,n1] ~ data[,n2], xlab = xlab, ylab = ylab, data = data, ...)
}
You can refer to the specific columns as Michael demonstrates, or you can also paste the formula together manually:
fun <- function(dat,n1,n2){
nms <- colnames(dat)
f <- as.formula(paste(nms[n1],"~",nms[n2]))
bwplot(f,data = dat)
}
fun(algae,2,4)