Create argument list using lapply for do.call - r

I'm trying to pass a set of modified arguments from a larger function to arguments in a nested function. This is an argument supplied from the larger function:
time_dep_covariates_list = c(therapy_start = "Start of Therapy",
therapy_end = "End of Therapy")
I have these sets of constant arguments:
tmerge_args_1 <- alist(data1 = analytic_dataset,
data2 = analytic_dataset,
id = patientid,
tstop = adv_dx_to_event,
death_censor = event(adv_dx_to_event))
And I want to append these modified arguments to that argument list:
tmerge_args_2 <- lapply(1:length(time_dep_covariates_list), function(x){
tmerge_args <<- c(tmerge_args, alist('var' = tdc(var)) )
paste0(names(time_dep_covariates_list[x])," =
tdc(",names(time_dep_covariates_list[x]), ")")
})
> tdc_args
[[1]]
[1] "therapy_start = tdc(therapy_start)"
[[2]]
[1] "therapy_end = tdc(therapy_end)"
I want to create a do.call that handles the arguments like so:
count_process_form <- do.call(tmerge, args = c(tmerge_args_1,
tmerge_args_2)
That would be identical to the following:
tmerge(data1 = analytic_dataset, data2 = analytic_dataset,
id = patientid, tstop = adv_dx_to_event,
therapy_start = tdc(therapy_start), therapy_end = tdc(therapy_end)
It works fine with tmerge_args_1 by itself, but as the args_2 are character and not language elements, I get this error:
Error in (function (data1, data2, id, ..., tstart, tstop, options) :
all additional argments [sic] must have a name:
How can I modify the list I'm creating for args_2 so they're stored as arguments that do.call can understand? Or am I approaching this all wrong?
Thanks!
Here is a reproducible example:
analytic_dataset= data_frame(patientid = sample(1:1000,5),
adv_dx_to_event = sample(100:200, 5),
death_censor = sample(0:1,5, replace = T),
therapy_start = sample(1:20,5),
therapy_stop = sample(40:100,5))
The below would be passed in from a function:
time_dep_covariates_list = c(therapy_start = "Start of Therapy",
therapy_end = "End of Therapy")
tmerge_args_1 <- alist(data1 = analytic_dataset,
data2 = analytic_dataset,
id = patientid,
tstop = adv_dx_to_event,
death_censor = event(adv_dx_to_event))
do.call(tmerge,tmerge_args_1) #this works
tmerge_args_2 <- lapply(1:length(time_dep_covariates_list), function(x){
tmerge_args <<- c(tmerge_args, alist('var' = tdc(var)) )
paste0(names(time_dep_covariates_list[x])," = tdc(",names(time_dep_covariates_list[x]), ")")
})
do.call(tmerge,tmerge_args_1,tmerge_args_2) # this doesn't```

Related

Remove columns with many NA values using mlr3pipelines

I am trying to remove columns where proportion of NA value are greater than na_cutoff threshold using mlr3pipelines.
Here is my try:
library(mlr3)
library(mlr3pipelines)
task = tsk("iris")
dt = task$data()
dt[1:50, Sepal.Width := NA]
task_ = as_task_classif(dt, target = "Species")
graph = po("removeconstants", id = "removeconstants", ratio = 0.01) %>>%
po("select", id = "drop_na_cols")
ps = ParamSet$new(list(ParamDbl$new("na_cutoff", lower = 0, upper = 1, default = 0.2)))
graph$param_set$add(ps)
graph$param_set
graph$param_set$trafo = function(x, param_set) {
na_cutoff = x$na_cutoff
print(na_cutoff)
x$drop_na_cols.selector = function(task) {
fn = task$feature_names
data = task$data(cols = fn)
drop <- which(colMeans(is.na(data)) > na_cutoff)
fn[-drop]
}
x$na_cutoff = NULL
x
}
train_res = graph$train(task_)
train_res$drop_na_cols.output$data()
The problem is that last column is not removed even it should be.
In general, trafos are not meant for parameter sets.
I.e. internally, when the Graph accesses the parameters, the parameter transformation is not applied.
They are intended to create search spaces for black-box optimization, including hyperparameter optimization of ML models.
Also, you modifying the parameter set of an existing Graph is a bad idea.
The way to go I believe is to use the PipeOpSelect with a custom selector: https://mlr3pipelines.mlr-org.com/reference/Selector.html
Following this issue https://github.com/mlr-org/mlr3pipelines/issues/313
I thought the recommended way to do this is through trafo on select pipe.
Nevertheless, I have just created new pipeop that removes columns with many NA values:
library(mlr3pipelines)
library(mlr3verse)
library(mlr3misc)
library(R6)
PipeOpDropNACol = R6::R6Class(
"PipeOpDropNACol",
inherit = mlr3pipelines::PipeOpTaskPreprocSimple,
public = list(
initialize = function(id = "drop.nacol", param_vals = list()) {
ps = ParamSet$new(list(
ParamDbl$new("cutoff", lower = 0, upper = 1, default = 0.05, tags = c("dropnacol_tag"))
))
ps$values = list(cutoff = 0.2)
super$initialize(id, param_set = ps, param_vals = param_vals)
}
),
private = list(
.get_state = function(task) {
pv = self$param_set$get_values(tags = "dropnacol_tag")
print(pv$cutoff)
features_names = task$feature_names
data = task$data(cols = features_names)
print(data)
many_na = sapply(data, function(column) (sum(is.na(column))) / length(column) > pv$cutoff)
print(many_na)
list(cnames = colnames(data)[-many_na])
},
.transform = function(task) {
task$select(self$state$cnames)
}
)
)
# no group variable
task = tsk("iris")
dt = task$data()
dt[1:50, Sepal.Width := NA]
task = as_task_classif(dt, target = "Species")
gr = Graph$new()
gr$add_pipeop(PipeOpDropNACol$new())
result = gr$train(task)
result[[1]]$data()
gr$predict(task)

Error in m == q : R comparison (1) is possible only for atomic and list types

The whole function which i need to convert the for loop in to apply for optimization
plans_achievements <- function(pa_m,pa_q){
if(nrow(pa_m)==0 & nrow(pa_q==0)){
df = data.frame(a = c(""), b = c("No Data Available"))
colnames(df)=""
}else{
pa_m= pa_m%>% select(inc,month_year,Plans,Achievements,quarter_year)
colnames(pa_mon)[2] = "Period"
pa_q= pa_q%>% select(inc,quarter_year,Plans,Achievements)
colnames(pa_qtr)[2] = "Period"
df = data.frame(inc=c(""),Period=c(""),Plans=c(""),Achievements=c(""))
for (q in unique(pa_q$Period)){
df1 = pa_q[pa_q$Period==q,]
df1$Period = paste0("<span style=\"color:#288D55\">",df1$Period,"</span>")
df1$Plans = paste0("<span style=\"color:#288D55\">",df1$Plans,"</span>")
df1$Achievements = paste0("<span style=\"color:#288D55\">",df1$Achievements,"</span>")
df = rbind(df,df1)
for (m in unique(pa_m$quarter_year)){
if(m==q){
df2 = pa_m[pa_m$quarter_year==q,][-5]
df = rbind(df,df2)
}
}
}
df = df[-1,]
}
return(df)
}
The apply which i tried
my_fun <- function(q){
df1 = pa_qtr[pa_qtr$Period==q,]
df1$Period = paste0("<span style=\"color:#288D55\">",df1$Period,"</span>")
df1$Plans = paste0("<span style=\"color:#288D55\">",df1$Plans,"</span>")
df1$Achievements = paste0("<span style=\"color:#288D55\">",df1$Achievements,"</span>")
df = rbind(df,df1)
}
df = do.call(rbind,lapply(unique(pa_qtr$Period), my_fun))
my_fun2 <- function(m,my_fun){
if (m == q) {
df2 = pa_mon[pa_mon$qtr_yr == q, ][-5]
df = rbind(df,df2)
}
}
df = do.call(cbind,lapply(unique(pa_mon$qtr_yr), my_fun2))
DT::datatable(plans_achievements(pa_m[pa_m$inc=="vate",],pa_q[pa_q$inc=="vate",]), rownames = F,escape = FALSE,selection=list(mode="single",target="row"),options = list(pageLength = 50,scrollX = TRUE,dom = 'tp',ordering=F,columnDefs = list(list(visible=FALSE, targets=c(0)),list(className = 'dt-left', targets = '_all'))))
Why you get the error comparison is possible only for atomic and list types
I will answer your original question first:
You get the error because you haven't defined q as a variable inside the function my_fun2. Since you haven't defined this variable, R will look for it in the global environment. There R will find the function q() (used to quit R). So you get the error message comparison (1) is possible only for atomic and list types because R thinks you are trying to compare a number m with the function q.
Here is a small example to make it easy to see:
# Run this in a clean environment
m <- 1
m == b # Understandable error message - "b" is not found
m == q # Your error - because R thinks you are comparing m to a function
You fix this error by making sure that q is defined inside your function. Either by creating it inside the function, or by supplying it as an input argument.
A possible solution for your problem
As I understand your code, you want to format, merge and sort the values in pa_q and pa_m, to display them in a html table.
Under is a possible solution, using tidyverse and vectorized operations, rather than a loop or apply functions. Vectorized functions are typically your fastest option in R, as I know you want to optimize your code.
library(dplyr)
plans_achievements <- function(pa_m, pa_q) {
# I've modified the logic a bit: there is no need to wrap the full function in
# an else statement, since we can return early if the data has no rows
if (nrow(pa_m) == 0 && nrow(pa_q == 0)) {
df = data.frame(a = c(""), b = c("No Data Available"))
colnames(df) = ""
return(df)
}
pa_q <-
pa_q %>%
# Select and rename the columns vi need
select(inc, Period = quarter_year, Plans, Achievements, date) %>%
# Format the values
mutate(
Period = paste0("<span style=\"color:#288D55\">", Period,"</span>"),
Plans = paste0("<span style=\"color:#288D55\">", Plans,"</span>"),
Achievements = paste0("<span style=\"color:#288D55\">", Achievements,"</span>")
)
pa_m <-
pa_m %>%
# Select and rename the columns we need
select(inc, Period = month_year, Plans, Achievements, date) #%>%
# Combine the datasets
bind_rows(
pa_q,
pa_m
) %>%
# Make sure that R understand date as a date value
mutate(
date = lubridate::dmy(date)
) %>%
# Sort by date
arrange(desc(date)) %>%
# Remove columns we do not need
select(-date, -inc)
}
DT::datatable(
plans_achievements(
pa_m[pa_m$inc=="vate",],
pa_q[pa_q$inc=="vate",]
),
rownames = FALSE,
escape = FALSE,
selection = list(mode = "single", target = "row"),
options = list(
pageLength = 50,
scrollX = TRUE,
dom = 'tp',
ordering = FALSE,
columnDefs = list(
list(className = 'dt-left', targets = '_all')
)
)
)
Hopefully this solves your problem.

How can i get my function arguments to be of the corrct type for datatables in r?

I have a data table called data:
[![This is a screengrab of the data table][1]][1]
The goal is to write a function that mimics the following code:
data[Region == "Northeast",mean(Awareness, na.rm = TRUE), by = Product][order(-rank(V1))][1:5,.(Product)]
So far I have:
topx_engagement = function(state_of_engagement, respondent_variable, respondent_variable_sub, rank_length = 3){
respondent_variable_sub = as.character(respondent_variable_sub)
data[eval(respondent_variable == respondent_variable_sub), mean(get(state_of_engagement), na.rm = TRUE), by = Product][order(-rank(V1))][1:rank_length,.(Product)]
}
The function topx_engagement should allow a user to enter respondent_variable which coincides with Region, a respondent_variable_sub which coincides with "Northeast" and subsequent arguments. I would like to focus on the preceding arguments as all the other work fine.
Currently, when I call:
topx_engagement(state_of_engagement = Awareness, respondent_variable = Region, respondent_variable_sub = Northeast, rank_length = 3)
I get an error:
Error in topx_engagement(state_of_engagement = Awareness, respondent_variable = Region, :
object 'Northeast' not found
Alternately, running
topx_engagement = function(state_of_engagement, respondent_variable, respondent_variable_sub, rank_length = 3){
#respondent_variable_sub = as.character(respondent_variable_sub)
data[eval(respondent_variable == respondent_variable_sub), mean(get(state_of_engagement), na.rm = TRUE), by = Product][order(-rank(V1))][1:rank_length,.(Product)]
}
topx_engagement(state_of_engagement = Awareness, respondent_variable = Region, respondent_variable_sub = Northeast, rank_length = 3)
throws an error of Error in eval(.massagei(isub[[2L]]), parent.frame(), parent.frame()) : object 'Region' not found
I need assistance with getting the function inputs in the right format.
[1]: https://i.stack.imgur.com/hseWS.png
We may use deparse/substitute to convert to character string if we are passing unquote arguments
topx_engagement = function(state_of_engagement,
respondent_variable, respondent_variable_sub, rank_length = 3){
state_of_engagement <- deparse(substitute(state_of_engagement))
respondent_variable <- deparse(substitute(respondent_variable))
respondent_variable_sub <- deparse(substitute(respondent_variable_sub))
data[eval(as.name(respondent_variable)) == respondent_variable_sub,
mean(get(state_of_engagement), na.rm = TRUE),
by = Product][order(-rank(V1))][1:rank_length,.(Product)]
}
then call it as
topx_engagement(state_of_engagement = Awareness,
respondent_variable = Region,
respondent_variable_sub = Northeast, rank_length = 3)

Create a script with 2 vectors in R

I'm using Heatmap from the package complexheatmap
in the script, I need to create a variable ha_column that I will incorporate into my script.
ha_column = HeatmapAnnotation (df = data.frame(type1=c(rep("name1",5), rep("name2",5),rep("name3",5), col = list(type1=c("name1" = "#DCDCDC", "name2" = "#DC928B", "name2"="#BA72D3")))))
I have 2 vectors:
vectors1=c("name1","name2","name3)
vectors2=c("#DCDCDC","#DC928B","#BA72D3")
and the idea is to reproduce the above script with these two vectors.
I tried:
paste0("ha_column = HeatmapAnnotation(df = data.frame(type1 = c(rep(",vectors1,", 5),col = list(type1 = c(",vectors1,"=",vectors2,")))")
bu it only paste line by line such as:
[1] "ha_column = HeatmapAnnotation(df = data.frame(type1 = c(rep(name1, 5),col = list(type1 = c(name1=#DCDCDC)))"
[2] "ha_column = HeatmapAnnotation(df = data.frame(type1 = c(rep(name2, 5),col = list(type1 = c(name2=#DC928B)))"
[3] "ha_column = HeatmapAnnotation(df = data.frame(type1 = c(rep(name3, 5),col = list(type1 = c(name3=#BA72D3)))"
instead of doing what I want ...
Does anyone have an idea?
Thanks for your time.
It's generally not a good idea to build code as a string. Instead think of building a function to do what you want.
You could do something line
ha_column_fun = function(names, colors) {
HeatmapAnnotation(
df = data.frame(type1 = rep(names, each=5)),
col = list(type1=setNames(colors, names))
)
}
And then you could call it with
ha_column = ha_column_fun(vectors1, vectors2)

How to pass user input inside an eventReactive expression Shiny/R

I am having trouble with creating a new variable in dplyr::mutate using a user inputted variable through selectInput in the UI.
gwrdata <- eventReactive(input$rungwr, {
sp_shape <- as(data(), "Spatial")
bwG <- gwr.sel(formula(), data = sp_shape, gweight = gwr.Gauss, verbose = FALSE)
gwrG <- gwr(formula(), data = sp_shape, bandwidth = bwG,
gweight = gwr.Gauss, hatmatrix = TRUE)
sf_gwr <- st_as_sf(gwrG$SDF)
bins <- 3
browser()
sf_gwr <- mutate(sf_gwr, parBin = cut2(sf_gwr[, input$inVar],
g = bins, levels.mean = TRUE))
sf_gwr <- mutate(sf_gwr, sigBin = cut2(localR2, g = bins, levels.mean = TRUE))
bvColors = c("#e8e8e8", "#dfb0d6", "#be64ac", "#ace4e4",
"#a5add3", "#8c62aa", "#5ac8c8", "#5698b9", "#3b4994")
levels(sf_gwr$parBin) <- 1:bins
levels(sf_gwr$sigBin) <- 1:bins
sf_gwr <- mutate(sf_gwr, value = paste(parBin, '-', sigBin, sep = ''))
sf_gwr
})
There are two mutate functions. The process works fine if it is hard coded like it is with localR2. However when using the sf_gwr[,input$inVar] the following error is given.
Evaluation error: no applicable method for 'st_geometry<-' applied to an object of class "list".
Does this have something to do with the subsetting in the mutate function?

Resources