Tuning Tidymodels’ Recipe and Model Parameters Simultaneously - r

We can use Tidymodels to tune both recipe parameters and model parameters simultaneously, right? I'm struggling to understand what corrective action I should take based on the message, Error: Some tuning parameters require finalization but there are recipe parameters that require tuning. Please use parameters() to finalize the parameter ranges.” Any help would be most appreciated.
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(tidymodels))
suppressPackageStartupMessages(library(themis))
suppressPackageStartupMessages(library(finetune))
suppressPackageStartupMessages(library(doParallel))
suppressPackageStartupMessages(library(titanic))
registerDoParallel()
set.seed(123)
train.df <- titanic_train %>%
mutate(Survived = factor(ifelse(Survived == 1, 'Y', 'N')),
Pclass = factor(Pclass, ordered = TRUE),
Sex = factor(Sex),
Embarked = factor(ifelse(Embarked == '', NA, Embarked))) %>%
select(-c(Name, Ticket, Cabin))
summary(train.df)
#> PassengerId Survived Pclass Sex Age SibSp
#> Min. : 1.0 N:549 1:216 female:314 Min. : 0.42 Min. :0.000
#> 1st Qu.:223.5 Y:342 2:184 male :577 1st Qu.:20.12 1st Qu.:0.000
#> Median :446.0 3:491 Median :28.00 Median :0.000
#> Mean :446.0 Mean :29.70 Mean :0.523
#> 3rd Qu.:668.5 3rd Qu.:38.00 3rd Qu.:1.000
#> Max. :891.0 Max. :80.00 Max. :8.000
#> NA's :177
#> Parch Fare Embarked
#> Min. :0.0000 Min. : 0.00 C :168
#> 1st Qu.:0.0000 1st Qu.: 7.91 Q : 77
#> Median :0.0000 Median : 14.45 S :644
#> Mean :0.3816 Mean : 32.20 NA's: 2
#> 3rd Qu.:0.0000 3rd Qu.: 31.00
#> Max. :6.0000 Max. :512.33
#>
cv.folds <- vfold_cv(train.df, v = 4, strata = Survived)
cv.folds
#> # 4-fold cross-validation using stratification
#> # A tibble: 4 x 2
#> splits id
#> <list> <chr>
#> 1 <split [667/224]> Fold1
#> 2 <split [668/223]> Fold2
#> 3 <split [669/222]> Fold3
#> 4 <split [669/222]> Fold4
#########################################################
# Logistic Regression Model -- This Works
# Tuning Recipe Parameters: Yes
# Tuning Model Hyperparameters: No
recipe.logistic.regression <-
recipe(Survived ~ ., data = train.df) %>%
update_role(PassengerId, new_role = 'ID') %>%
step_dummy(all_nominal(), -all_outcomes()) %>%
step_impute_knn(all_predictors(), neighbors = tune()) %>%
step_normalize(all_predictors()) %>%
step_downsample(Survived, seed = 456)
spec.logistic.regression <-
logistic_reg() %>%
set_engine("glm")
wf.logistic.regression <-
workflow() %>%
add_recipe(recipe.logistic.regression) %>%
add_model(spec.logistic.regression)
wf.logistic.regression
#> == Workflow ====================================================================
#> Preprocessor: Recipe
#> Model: logistic_reg()
#>
#> -- Preprocessor ----------------------------------------------------------------
#> 4 Recipe Steps
#>
#> * step_dummy()
#> * step_impute_knn()
#> * step_normalize()
#> * step_downsample()
#>
#> -- Model -----------------------------------------------------------------------
#> Logistic Regression Model Specification (classification)
#>
#> Computational engine: glm
rs.logistic.regression <- tune_race_anova(
wf.logistic.regression,
resamples = cv.folds,
grid = 25,
metrics = metric_set(accuracy),
control = control_race(verbose = TRUE, verbose_elim = TRUE,
parallel_over = "everything",
save_pred = TRUE,
save_workflow = TRUE)
)
#> i Racing will maximize the accuracy metric.
#> i Resamples are analyzed in a random order.
#> i Fold4: 1 eliminated; 9 candidates remain.
show_best(rs.logistic.regression)
#> # A tibble: 5 x 7
#> neighbors .metric .estimator mean n std_err .config
#> <int> <chr> <chr> <dbl> <int> <dbl> <chr>
#> 1 9 accuracy binary 0.791 4 0.0193 Preprocessor01_Model1
#> 2 2 accuracy binary 0.788 4 0.0186 Preprocessor08_Model1
#> 3 4 accuracy binary 0.788 4 0.0190 Preprocessor09_Model1
#> 4 1 accuracy binary 0.787 4 0.0205 Preprocessor05_Model1
#> 5 10 accuracy binary 0.787 4 0.0205 Preprocessor10_Model1
#########################################################
# Random Forest Model A -- This Works
# Tuning Recipe Parameters: No
# Tuning Model Hyperparameters: Yes
recipe.random.forest.a <-
recipe(Survived ~ ., data = train.df) %>%
update_role(PassengerId, new_role = 'ID') %>%
step_impute_knn(all_predictors(),
neighbors = 5) %>% # <-- Manually setting value for neighbors
step_downsample(Survived, seed = 456)
spec.random.forest.a <-
rand_forest(mtry = tune(),
min_n = tune(),
trees = tune()) %>%
set_mode("classification") %>%
set_engine("ranger")
wf.random.forest.a <-
workflow() %>%
add_recipe(recipe.random.forest.a) %>%
add_model(spec.random.forest.a)
wf.random.forest.a
#> == Workflow ====================================================================
#> Preprocessor: Recipe
#> Model: rand_forest()
#>
#> -- Preprocessor ----------------------------------------------------------------
#> 2 Recipe Steps
#>
#> * step_impute_knn()
#> * step_downsample()
#>
#> -- Model -----------------------------------------------------------------------
#> Random Forest Model Specification (classification)
#>
#> Main Arguments:
#> mtry = tune()
#> trees = tune()
#> min_n = tune()
#>
#> Computational engine: ranger
rs.random.forest.a <- tune_race_anova(
wf.random.forest.a,
resamples = cv.folds,
grid = 25,
metrics = metric_set(accuracy),
control = control_race(verbose = TRUE, verbose_elim = TRUE,
parallel_over = "everything",
save_pred = TRUE,
save_workflow = TRUE)
)
#> i Creating pre-processing data to finalize unknown parameter: mtry
#> i Racing will maximize the accuracy metric.
#> i Resamples are analyzed in a random order.
#> i Fold4: 4 eliminated; 21 candidates remain.
show_best(rs.random.forest.a)
#> # A tibble: 5 x 9
#> mtry trees min_n .metric .estimator mean n std_err .config
#> <int> <int> <int> <chr> <chr> <dbl> <int> <dbl> <chr>
#> 1 4 837 18 accuracy binary 0.818 4 0.00685 Preprocessor1_Model~
#> 2 4 1968 16 accuracy binary 0.817 4 0.00738 Preprocessor1_Model~
#> 3 4 1439 25 accuracy binary 0.817 4 0.00664 Preprocessor1_Model~
#> 4 3 1769 10 accuracy binary 0.816 4 0.0130 Preprocessor1_Model~
#> 5 3 1478 13 accuracy binary 0.816 4 0.0109 Preprocessor1_Model~
#########################################################
# Random Forest Model B -- This Does Not Work
# Tuning Recipe Parameters: Yes
# Tuning Model Hyperparameters: Yes
recipe.random.forest.b <-
recipe(Survived ~ ., data = train.df) %>%
update_role(PassengerId, new_role = 'ID') %>%
step_impute_knn(all_predictors(),
neighbors = tune()) %>% # <-- Tuning neighbors
step_downsample(Survived, seed = 456)
spec.random.forest.b <-
rand_forest(mtry = tune(),
min_n = tune(),
trees = tune()) %>%
set_mode("classification") %>%
set_engine("ranger")
wf.random.forest.b <-
workflow() %>%
add_recipe(recipe.random.forest.b) %>%
add_model(spec.random.forest.b)
wf.random.forest.b
#> == Workflow ====================================================================
#> Preprocessor: Recipe
#> Model: rand_forest()
#>
#> -- Preprocessor ----------------------------------------------------------------
#> 2 Recipe Steps
#>
#> * step_impute_knn()
#> * step_downsample()
#>
#> -- Model -----------------------------------------------------------------------
#> Random Forest Model Specification (classification)
#>
#> Main Arguments:
#> mtry = tune()
#> trees = tune()
#> min_n = tune()
#>
#> Computational engine: ranger
rs.random.forest.b <- tune_race_anova(
wf.random.forest.b,
resamples = cv.folds,
grid = 25,
metrics = metric_set(accuracy),
control = control_race(verbose = TRUE, verbose_elim = TRUE,
parallel_over = "everything",
save_pred = TRUE,
save_workflow = TRUE)
)
#> Error: Some tuning parameters require finalization but there are recipe parameters that require tuning. Please use `parameters()` to finalize the parameter ranges.
#########################################################
sessionInfo()
#> R version 4.1.0 (2021-05-18)
#> Platform: x86_64-w64-mingw32/x64 (64-bit)
#> Running under: Windows 10 x64 (build 19041)
#>
#> Matrix products: default
#>
#> locale:
#> [1] LC_COLLATE=English_United States.1252
#> [2] LC_CTYPE=English_United States.1252
#> [3] LC_MONETARY=English_United States.1252
#> [4] LC_NUMERIC=C
#> [5] LC_TIME=English_United States.1252
#>
#> attached base packages:
#> [1] parallel stats graphics grDevices utils datasets methods
#> [8] base
#>
#> other attached packages:
#> [1] titanic_0.1.0 doParallel_1.0.16 iterators_1.0.13 foreach_1.5.1
#> [5] finetune_0.1.0 themis_0.1.4 yardstick_0.0.8 workflowsets_0.1.0
#> [9] workflows_0.2.3 tune_0.1.6 rsample_0.1.0 recipes_0.1.16
#> [13] parsnip_0.1.7 modeldata_0.1.1 infer_0.5.4 dials_0.0.9
#> [17] scales_1.1.1 broom_0.7.9 tidymodels_0.1.3 forcats_0.5.1
#> [21] stringr_1.4.0 dplyr_1.0.7 purrr_0.3.4 readr_2.0.0
#> [25] tidyr_1.1.3 tibble_3.1.3 ggplot2_3.3.5 tidyverse_1.3.1
#>
#> loaded via a namespace (and not attached):
#> [1] minqa_1.2.4 colorspace_2.0-2 ellipsis_0.3.2 class_7.3-19
#> [5] fs_1.5.0 rstudioapi_0.13 listenv_0.8.0 furrr_0.2.3
#> [9] ParamHelpers_1.14 prodlim_2019.11.13 fansi_0.5.0 lubridate_1.7.10
#> [13] ranger_0.13.1 xml2_1.3.2 codetools_0.2-18 splines_4.1.0
#> [17] knitr_1.33 jsonlite_1.7.2 nloptr_1.2.2.2 pROC_1.17.0.1
#> [21] dbplyr_2.1.1 compiler_4.1.0 httr_1.4.2 backports_1.2.1
#> [25] assertthat_0.2.1 Matrix_1.3-4 cli_3.0.1 htmltools_0.5.1.1
#> [29] tools_4.1.0 gtable_0.3.0 glue_1.4.2 RANN_2.6.1
#> [33] parallelMap_1.5.1 fastmatch_1.1-3 Rcpp_1.0.7 cellranger_1.1.0
#> [37] styler_1.5.1 DiceDesign_1.9 vctrs_0.3.8 nlme_3.1-152
#> [41] timeDate_3043.102 mlr_2.19.0 gower_0.2.2 xfun_0.25
#> [45] globals_0.14.0 lme4_1.1-27.1 rvest_1.0.1 lifecycle_1.0.0
#> [49] future_1.21.0 MASS_7.3-54 ipred_0.9-11 hms_1.1.0
#> [53] BBmisc_1.11 yaml_2.2.1 rpart_4.1-15 stringi_1.7.3
#> [57] highr_0.9 checkmate_2.0.0 lhs_1.1.1 boot_1.3-28
#> [61] hardhat_0.1.6 lava_1.6.9 rlang_0.4.11 pkgconfig_2.0.3
#> [65] evaluate_0.14 lattice_0.20-44 tidyselect_1.1.1 parallelly_1.27.0
#> [69] plyr_1.8.6 magrittr_2.0.1 R6_2.5.0 generics_0.1.0
#> [73] DBI_1.1.1 pillar_1.6.2 haven_2.4.3 withr_2.4.2
#> [77] survival_3.2-11 nnet_7.3-16 ROSE_0.0-4 modelr_0.1.8
#> [81] crayon_1.4.1 unbalanced_2.0 utf8_1.2.2 tzdb_0.1.2
#> [85] rmarkdown_2.10 grid_4.1.0 readxl_1.3.1 data.table_1.14.0
#> [89] FNN_1.1.3 reprex_2.0.1 digest_0.6.27 munsell_0.5.0
#> [93] GPfit_1.0-8
Created on 2021-08-07 by the reprex package (v2.0.1)

Related

Get the mean for every iteration

I'm new in R. Hoping someone could help me.
I am trying to get the mean using for the first values of i for nth iteration, example (first value on first iteration then first two values on 2nd iterations)
How do I go about doing this?
Here is the sample data:
set.seed(1234)
i <- sample(200,100)
An alternative, may be, simpler solution
set.seed(1234)
i <- sample(200,100)
cumsum(i)/(1:100)
#> [1] 28.00000 54.00000 86.00000 89.75000 94.00000 101.16667 105.71429
#> [8] 113.25000 116.66667 118.20000 116.36364 115.25000 113.30769 110.21429
#> [15] 108.13333 108.62500 103.05882 104.33333 102.10526 97.20000 101.66667
#> [22] 103.81818 101.04348 100.70833 101.56000 105.11538 103.66667 105.96429
#> [29] 106.55172 104.60000 104.70968 105.53125 104.96970 103.08824 103.42857
#> [36] 102.55556 104.10811 102.47368 100.94872 98.47500 98.92683 101.00000
#> [43] 99.79070 99.84091 98.75556 99.52174 100.76596 101.87500 100.95918
#> [50] 101.66000 100.17647 101.03846 102.37736 100.62963 100.54545 99.14286
#> [57] 98.01754 99.20690 100.38983 100.15000 101.00000 99.53226 99.68254
#> [64] 100.34375 100.07692 101.39394 100.17910 99.75000 99.18841 99.85714
#> [71] 100.35211 100.72222 102.04110 101.02703 100.69333 101.53947 102.44156
#> [78] 101.89744 101.43038 100.61250 100.83951 102.04878 101.04819 99.95238
#> [85] 99.12941 98.70930 97.77011 98.44318 98.92135 98.46667 97.45055
#> [92] 97.31522 97.75269 97.05319 96.84211 97.02083 97.81443 97.93878
#> [99] 98.92929 99.55000
Created on 2022-03-04 by the reprex package (v2.0.1)
Here's a one-liner to get the result:
sapply(1:100, function(x) mean(i[seq(x)]))
#> [1] 28.00000 54.00000 86.00000 89.75000 94.00000 101.16667 105.71429
#> [8] 113.25000 116.66667 118.20000 116.36364 115.25000 113.30769 110.21429
#> [15] 108.13333 108.62500 103.05882 104.33333 102.10526 97.20000 101.66667
#> [22] 103.81818 101.04348 100.70833 101.56000 105.11538 103.66667 105.96429
#> [29] 106.55172 104.60000 104.70968 105.53125 104.96970 103.08824 103.42857
#> [36] 102.55556 104.10811 102.47368 100.94872 98.47500 98.92683 101.00000
#> [43] 99.79070 99.84091 98.75556 99.52174 100.76596 101.87500 100.95918
#> [50] 101.66000 100.17647 101.03846 102.37736 100.62963 100.54545 99.14286
#> [57] 98.01754 99.20690 100.38983 100.15000 101.00000 99.53226 99.68254
#> [64] 100.34375 100.07692 101.39394 100.17910 99.75000 99.18841 99.85714
#> [71] 100.35211 100.72222 102.04110 101.02703 100.69333 101.53947 102.44156
#> [78] 101.89744 101.43038 100.61250 100.83951 102.04878 101.04819 99.95238
#> [85] 99.12941 98.70930 97.77011 98.44318 98.92135 98.46667 97.45055
#> [92] 97.31522 97.75269 97.05319 96.84211 97.02083 97.81443 97.93878
#> [99] 98.92929 99.55000
Created on 2022-03-04 by the reprex package (v2.0.1)

Extract names of genes expressed by at least 10% of cells in a cluster

I have a Seurat object with defined clusters. I need to extract a list of all genes that are expressed by at least 10% of cells in my cluster. I need to repeat it for every cluster that I have, separately.
I know one code that could potentially extract genes expressed by at least 10% of cells from the whole Seurat:
genes.to.keep <- Matrix::rowSums(Monocyte.integrated#assays$RNA#counts > 0) >= floor(0.1 * ncol(Monocyte.integrated#assays$RNA#counts))
counts.sub <- Monocyte.integrated#assays$RNA#counts[genes.to.keep,]
But this is not what I want. And I'm not sure how to modify it to include cluster names (considering it's correct).
I store the cluster names in the metadata variable called "cluster_names".
I would appreciate any help
BW
You could use lapply to iterate over the factor levels of your clusters to subset and filter them individually and use setNames to name the resulting list. Below is a reproducible example:
library(Seurat)
data("pbmc_small")
pbmc_small <- FindClusters(pbmc_small, resolution = 1)
names(pbmc_small#meta.data)[names(pbmc_small#meta.data)=="seurat_clusters"] <- "cluster_names"
levels(pbmc_small$cluster_names) <- paste0("cluster_", seq_along(levels(pbmc_small$cluster_names)))
setNames(lapply(levels(pbmc_small$cluster_names), function(x) {
p <- subset(pbmc_small, cluster_names==x)
rownames(p)[Matrix::rowSums(p#assays$RNA#counts > 0) >= .1*dim(p)[2]]
}), levels(pbmc_small$cluster_names))
#> $cluster_1
#> [1] "CD79B" "HLA-DRA" "LTB" "SP100" "PPP3CC" "CXCR4"
#> [7] "STX10" "SNHG7" "CD3D" "NOSIP" "SAFB2" "CD2"
#> [13] "IL7R" "PIK3IP1" "MPHOSPH6" "KHDRBS1" "MAL" "CCR7"
#> [19] "THYN1" "TAF7" "LDHB" "TMEM123" "EPC1" "EIF4A2"
#> [25] "CD3E" "TMUB1" "BLOC1S4" "SRSF7" "ACAP1" "TNFAIP8"
#> [31] "CD7" "TAGAP" "DNAJB1" "ASNSD1" "S1PR4" "CTSW"
#> [37] "GZMK" "NKG7" "IL32" "DNAJC2" "LYAR" "CST7"
#> [43] "LCK" "CCL5" "HNRNPH1" "SSR2" "GIMAP1" "MMADHC"
#> [49] "CD8A" "GYPC" "HNRNPF" "RPL7L1" "KLRG1" "CRBN"
#> [55] "SATB1" "PMPCB" "NRBP1" "TCF7" "HNRNPA3" "S100A8"
#> [61] "S100A9" "LYZ" "FCN1" "TYROBP" "NFKBIA" "TYMP"
#> [67] "CTSS" "TSPO" "CTSB" "LGALS1" "BLVRA" "LGALS3"
#> [73] "IFI6" "HLA-DPA1" "CST3" "GSTP1" "EIF3G" "VPS28"
#> [79] "ZFP36L1" "ANXA2" "HSP90AA1" "LST1" "AIF1" "PSAP"
#> [85] "YWHAB" "MYO1G" "SAT1" "RGS2" "FCGR3A" "S100A11"
#> [91] "FCER1G" "IFITM2" "COTL1" "LGALS9" "CD68" "RHOC"
#> [97] "CARD16" "COPS6" "PPBP" "GPX1" "TPM4" "PF4"
#> [103] "SDPR" "NRGN" "SPARC" "GNG11" "CLU" "HIST1H2AC"
#> [109] "NCOA4" "GP9" "FERMT3" "ODC1" "CD9" "RUFY1"
#> [115] "TUBB1" "TALDO1" "TREML1" "NGFRAP1" "PGRMC1" "CA2"
#> [121] "ITGA2B" "MYL9" "TMEM40" "PARVB" "PTCRA" "ACRBP"
#> [127] "TSC22D1" "VDAC3" "GZMB" "GZMA" "GNLY" "FGFBP2"
#> [133] "AKR1C3" "CCL4" "PRF1" "GZMH" "XBP1" "GZMM"
#> [139] "PTGDR" "IGFBP7" "TTC38" "KLRD1" "ARHGDIA" "IL2RB"
#> [145] "CLIC3" "PPP1R18" "CD247" "ALOX5AP" "XCL2" "C12orf75"
#> [151] "RARRES3" "PCMT1" "LAMP1" "SPON2"
#>
#> $cluster_2
#> [1] "CD79B" "CD79A" "HLA-DRA" "HLA-DQB1"
#> [5] "HVCN1" "HLA-DMB" "LTB" "SP100"
#> [9] "NCF1" "EAF2" "FAM96A" "CXCR4"
#> [13] "STX10" "SNHG7" "NT5C" "NOSIP"
#> [17] "IL7R" "KHDRBS1" "TAF7" "LDHB"
#> [21] "TMEM123" "EIF4A2" "TMUB1" "BLOC1S4"
#> [25] "SRSF7" "TNFAIP8" "TAGAP" "DNAJB1"
#> [29] "S1PR4" "NKG7" "IL32" "DNAJC2"
#> [33] "LYAR" "CCL5" "SSR2" "GIMAP1"
#> [37] "MMADHC" "HNRNPF" "RPL7L1" "HNRNPA3"
#> [41] "S100A8" "S100A9" "LYZ" "CD14"
#> [45] "FCN1" "TYROBP" "ASGR1" "NFKBIA"
#> [49] "TYMP" "CTSS" "TSPO" "RBP7"
#> [53] "CTSB" "LGALS1" "FPR1" "VSTM1"
#> [57] "BLVRA" "MPEG1" "BID" "SMCO4"
#> [61] "CFD" "LINC00936" "LGALS2" "MS4A6A"
#> [65] "FCGRT" "LGALS3" "NUP214" "SCO2"
#> [69] "IL17RA" "IFI6" "HLA-DPA1" "FCER1A"
#> [73] "CLEC10A" "HLA-DMA" "RGS1" "HLA-DPB1"
#> [77] "HLA-DQA1" "RNF130" "HLA-DRB5" "HLA-DRB1"
#> [81] "CST3" "IL1B" "POP7" "HLA-DQA2"
#> [85] "GSTP1" "EIF3G" "VPS28" "LY86"
#> [89] "ZFP36L1" "ANXA2" "GRN" "CFP"
#> [93] "HSP90AA1" "LST1" "AIF1" "PSAP"
#> [97] "YWHAB" "MYO1G" "SAT1" "RGS2"
#> [101] "SERPINA1" "IFITM3" "FCGR3A" "LILRA3"
#> [105] "S100A11" "FCER1G" "TNFRSF1B" "IFITM2"
#> [109] "WARS" "IFI30" "MS4A7" "C5AR1"
#> [113] "HCK" "COTL1" "LGALS9" "CD68"
#> [117] "RP11-290F20.3" "RHOC" "CARD16" "LRRC25"
#> [121] "COPS6" "ADAR" "GPX1" "TPM4"
#> [125] "NRGN" "NCOA4" "FERMT3" "ODC1"
#> [129] "TALDO1" "PARVB" "VDAC3" "GZMB"
#> [133] "XBP1" "IGFBP7" "ARHGDIA" "PPP1R18"
#> [137] "ALOX5AP" "RARRES3" "PCMT1" "SPON2"
#>
#> $cluster_3
#> [1] "MS4A1" "CD79B" "CD79A" "HLA-DRA"
#> [5] "TCL1A" "HLA-DQB1" "HVCN1" "HLA-DMB"
#> [9] "LTB" "LINC00926" "FCER2" "SP100"
#> [13] "NCF1" "PPP3CC" "EAF2" "PPAPDC1B"
#> [17] "CD19" "KIAA0125" "CYB561A3" "CD180"
#> [21] "RP11-693J15.5" "FAM96A" "CXCR4" "STX10"
#> [25] "SNHG7" "NT5C" "BANK1" "IGLL5"
#> [29] "CD200" "FCRLA" "CD3D" "NOSIP"
#> [33] "CD2" "IL7R" "PIK3IP1" "KHDRBS1"
#> [37] "THYN1" "TAF7" "LDHB" "TMEM123"
#> [41] "CCDC104" "EPC1" "EIF4A2" "CD3E"
#> [45] "SRSF7" "ACAP1" "TNFAIP8" "CD7"
#> [49] "TAGAP" "DNAJB1" "S1PR4" "CTSW"
#> [53] "GZMK" "NKG7" "IL32" "DNAJC2"
#> [57] "LYAR" "CST7" "LCK" "CCL5"
#> [61] "HNRNPH1" "SSR2" "GIMAP1" "MMADHC"
#> [65] "CD8A" "PTPN22" "GYPC" "HNRNPF"
#> [69] "RPL7L1" "CRBN" "SATB1" "SIT1"
#> [73] "PMPCB" "NRBP1" "TCF7" "HNRNPA3"
#> [77] "S100A9" "LYZ" "FCN1" "TYROBP"
#> [81] "NFKBIA" "TYMP" "CTSS" "TSPO"
#> [85] "CTSB" "LGALS1" "BLVRA" "MPEG1"
#> [89] "BID" "CFD" "LINC00936" "LGALS2"
#> [93] "MS4A6A" "FCGRT" "LGALS3" "SCO2"
#> [97] "HLA-DPA1" "FCER1A" "CLEC10A" "HLA-DMA"
#> [101] "RGS1" "HLA-DPB1" "HLA-DQA1" "RNF130"
#> [105] "HLA-DRB5" "HLA-DRB1" "CST3" "IL1B"
#> [109] "POP7" "HLA-DQA2" "CD1C" "GSTP1"
#> [113] "EIF3G" "VPS28" "LY86" "ZFP36L1"
#> [117] "ZNF330" "ANXA2" "GRN" "CFP"
#> [121] "HSP90AA1" "FUOM" "LST1" "AIF1"
#> [125] "PSAP" "YWHAB" "MYO1G" "SAT1"
#> [129] "RGS2" "SERPINA1" "IFITM3" "FCGR3A"
#> [133] "S100A11" "FCER1G" "TNFRSF1B" "IFITM2"
#> [137] "WARS" "IFI30" "MS4A7" "HCK"
#> [141] "COTL1" "LGALS9" "CD68" "RHOC"
#> [145] "CARD16" "LRRC25" "COPS6" "ADAR"
#> [149] "GPX1" "TPM4" "NCOA4" "FERMT3"
#> [153] "ODC1" "RUFY1" "TALDO1" "VDAC3"
#> [157] "GZMA" "GNLY" "FGFBP2" "PRF1"
#> [161] "XBP1" "GZMM" "PTGDR" "ARHGDIA"
#> [165] "PPP1R18" "CD247" "ALOX5AP" "XCL2"
#> [169] "C12orf75" "RARRES3" "PCMT1" "SPON2"
Created on 2021-03-26 by the reprex package (v1.0.0)

Why does the r session break when I try to make a prediction with a lightGBM model saved as ".rds"?

After fitting a lightGBM model with tidymodels and treesnip, I can take the fitted workflow and make predictions on new data without any problems. However, after saving the adjusted model in ".rds" format, closing the session and loading the ".rds" model in a new session, when I try to generate a prediction the R session breaks.
This only happens with the lightGBM model, for any other type of model this inconvenience does not happen. Here is a reproducible example:
The lightGBM model was installed as follows
PKG_URL <- "https://github.com/microsoft/LightGBM/releases/download/v3.0.0/lightgbm-3.0.0-r-cran.tar.gz"
remotes::install_url(PKG_URL)
library(dplyr)
library(parsnip)
library(rsample)
library(yardstick)
library(recipes)
library(workflows)
library(dials)
library(tune)
library(treesnip)
data = bind_rows(iris, iris, iris, iris, iris, iris, iris)
set.seed(2)
initial_split <- initial_split(data, p = 0.75)
train <- training(initial_split)
test <- testing(initial_split)
initial_split
#> <Analysis/Assess/Total>
#> <788/262/1050>
recipe <- recipe(Sepal.Length ~ ., data = data) %>%
step_dummy(all_nominal(), -all_outcomes())
model <- boost_tree(
mtry = 3,
trees = 1000,
min_n = tune(),
tree_depth = tune(),
loss_reduction = tune(),
learn_rate = tune(),
sample_size = 0.75
) %>%
set_mode("regression") %>%
set_engine("lightgbm")
wf <- workflow() %>%
add_model(model) %>%
add_recipe(recipe)
wf
#> ══ Workflow ════════════════════════════════════════════════════════════════════
#> Preprocessor: Recipe
#> Model: boost_tree()
#>
#> ── Preprocessor ────────────────────────────────────────────────────────────────
#> 1 Recipe Step
#>
#> ● step_dummy()
#>
#> ── Model ───────────────────────────────────────────────────────────────────────
#> Boosted Tree Model Specification (regression)
#>
#> Main Arguments:
#> mtry = 3
#> trees = 1000
#> min_n = tune()
#> tree_depth = tune()
#> learn_rate = tune()
#> loss_reduction = tune()
#> sample_size = 0.75
#>
#> Computational engine: lightgbm
# resamples
resamples <- vfold_cv(train, v = 3)
# grid
grid <- parameters(model) %>%
finalize(train) %>%
grid_random(size = 10)
head(grid)
#> # A tibble: 6 x 4
#> min_n tree_depth learn_rate loss_reduction
#> <int> <int> <dbl> <dbl>
#> 1 2 4 0.000282 0.0000402
#> 2 13 10 0.00333 13.0
#> 3 32 11 0.000000585 0.000106
#> 4 32 7 0.000258 0.163
#> 5 31 13 0.0000000881 0.000479
#> 6 19 14 0.000000167 0.00174
# grid search
tune_grid <- wf %>%
tune_grid(
resamples = resamples,
grid = grid,
control = control_grid(verbose = FALSE),
metrics = metric_set(rmse)
)
# select best hiperparameter found
best_params <- select_best(tune_grid, "rmse")
wf <- wf %>% finalize_workflow(best_params)
wf
#> ══ Workflow ════════════════════════════════════════════════════════════════════
#> Preprocessor: Recipe
#> Model: boost_tree()
#>
#> ── Preprocessor ────────────────────────────────────────────────────────────────
#> 1 Recipe Step
#>
#> ● step_dummy()
#>
#> ── Model ───────────────────────────────────────────────────────────────────────
#> Boosted Tree Model Specification (regression)
#>
#> Main Arguments:
#> mtry = 3
#> trees = 1000
#> min_n = 13
#> tree_depth = 10
#> learn_rate = 0.00333377440294304
#> loss_reduction = 13.0320661814971
#> sample_size = 0.75
#>
#> Computational engine: lightgbm
# last fit
last_fit <- last_fit(wf,initial_split)
# metrics
collect_metrics(last_fit)
#> # A tibble: 2 x 3
#> .metric .estimator .estimate
#> <chr> <chr> <dbl>
#> 1 rmse standard 0.380
#> 2 rsq standard 0.837
# fit to predict new data
model_fit <- fit(wf, data)
#> [LightGBM] [Warning] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000020 seconds.
#> You can set `force_row_wise=true` to remove the overhead.
#> And if memory is not enough, you can set `force_col_wise=true`.
#> [LightGBM] [Info] Total Bins 95
#> [LightGBM] [Info] Number of data points in the train set: 1050, number of used features: 5
#> [LightGBM] [Info] Start training from score 5.843333
#> [LightGBM] [Warning] No further splits with positive gain, best gain: -inf
#> [LightGBM] [Warning] No further splits with positive gain, best gain: -inf
#> [LightGBM] [Warning] No further splits with positive gain, best gain: -inf
#> [LightGBM] [Warning] No further splits with positive gain, best gain: -inf
#> [LightGBM] [Warning] No further splits with positive gain, best gain: -inf
#> [LightGBM] [Warning] No further splits with positive gain, best gain: -inf
#> [LightGBM] [Warning] No further splits with positive gain, best gain: -inf
#> [LightGBM] [Warning] No further splits with positive gain, best gain: -inf
#> [LightGBM] [Warning] No further splits with positive gain, best gain: -inf
#> [LightGBM] [Warning] No further splits with positive gain, best gain: -inf
#> [LightGBM] [Warning] No further splits with positive gain, best gain: -inf
#> [LightGBM] [Warning] No further splits with positive gain, best gain: -inf
#> [LightGBM] [Warning] No further splits with positive gain, best gain: -inf
#> [LightGBM] [Warning] No further splits with positive gain, best gain: -inf
#> [LightGBM] [Warning] No further splits with positive gain, best gain: -inf
#> [LightGBM] [Warning] No further splits with positive gain, best gain: -inf
#> [LightGBM] [Warning] No further splits with positive gain, best gain: -inf
#> [LightGBM] [Warning] No further splits with positive gain, best gain: -inf
#> [LightGBM] [Warning] No further splits with positive gain, best gain: -inf
.................................................................................
predicciones = predict(model_fit, iris)
head(predicciones)
#> # A tibble: 6 x 1
#> .pred
#> <dbl>
#> 1 5.13
#> 2 5.12
#> 3 5.12
#> 4 5.12
#> 5 5.13
#> 6 5.25
# save model
saveRDS(model_fit, "model_fit.rds")
After saving the model, I close the session and in a new session load the model.
model <- readRDS("model_fit.rds")
predicciones = predict(model, iris)
When I try to generate the prediction the r session breaks. An alternative that works mostly is to pull the workflow, extract the fit and save with the model's own method, however I lose all the workflow stored in the work_flow. I will be attentive to any help or suggestion.
pull_lightgbm = pull_workflow_fit(model_fit)
library(lightgbm)
lgb.save(pull_lightgbm$fit, "lightgbm.model")
model = lgb.load("lightgbm.model")
sessionInfo()
#> R version 4.0.3 (2020-10-10)
#> Platform: x86_64-apple-darwin17.0 (64-bit)
#> Running under: macOS Mojave 10.14.6
#>
#> Matrix products: default
#> BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
#>
#> locale:
#> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] treesnip_0.1.0.9000 tune_0.1.1 dials_0.0.9
#> [4] scales_1.1.1 workflows_0.2.1 recipes_0.1.14
#> [7] yardstick_0.0.7 rsample_0.0.8 parsnip_0.1.4
#> [10] dplyr_1.0.2
#>
#> loaded via a namespace (and not attached):
#> [1] Rcpp_1.0.5 lubridate_1.7.9 lattice_0.20-41 tidyr_1.1.2
#> [5] listenv_0.8.0 class_7.3-17 assertthat_0.2.1 digest_0.6.27
#> [9] ipred_0.9-9 foreach_1.5.1 parallelly_1.21.0 R6_2.5.0
#> [13] plyr_1.8.6 evaluate_0.14 ggplot2_3.3.2 highr_0.8
#> [17] pillar_1.4.6 rlang_0.4.8 DiceDesign_1.8-1 furrr_0.2.1
#> [21] rpart_4.1-15 Matrix_1.2-18 rmarkdown_2.5 splines_4.0.3
#> [25] gower_0.2.2 stringr_1.4.0 munsell_0.5.0 compiler_4.0.3
#> [29] xfun_0.19 pkgconfig_2.0.3 globals_0.13.1 htmltools_0.5.0
#> [33] nnet_7.3-14 tidyselect_1.1.0 tibble_3.0.4 prodlim_2019.11.13
#> [37] codetools_0.2-16 GPfit_1.0-8 fansi_0.4.1 future_1.20.1
#> [41] crayon_1.3.4 withr_2.3.0 MASS_7.3-53 grid_4.0.3
#> [45] gtable_0.3.0 lifecycle_0.2.0 magrittr_1.5 pROC_1.16.2
#> [49] cli_2.1.0 stringi_1.5.3 timeDate_3043.102 ellipsis_0.3.1
#> [53] lhs_1.1.1 generics_0.1.0 vctrs_0.3.4 lava_1.6.8.1
#> [57] iterators_1.0.13 tools_4.0.3 glue_1.4.2 purrr_0.3.4
#> [61] parallel_4.0.3 survival_3.2-7 yaml_2.2.1 colorspace_1.4-1
#> [65] knitr_1.30
Created on 2020-11-08 by the reprex package (v0.3.0)
For others that may come across this post in the future:
The bonsai package follows up on the treesnip package and fixes many of the issues with LightGBM that you may be seeing.
The development version of the lightgbm R package supports saving with saveRDS()/readRDS() as normal, and will be hitting CRAN in the next few months, so this will "just work" soon.
Generally, if you're not sure whether a model object requires native serialization methods to safely save/reload in new sessions, you can use the bundle package to prepare model objects for saving.

ARIMA fitted model gives NULL

I am trying to plot the residuals vs. the fitted values, but when I use the fitted function on my ARMA model, the output I receive is NULL.
The data I am using is 500 values between roughly -5 and 5.
The data should be modelled well by an ARMA(1,1) process.
I am not sure what the problem is in the following code.
model <- arima(data$Z, order = c(1,0,1), include.mean=FALSE)
fitted(model)
Use the following code
library(forecast)
#> Warning: package 'forecast' was built under R version 3.5.3
z <- runif(500, -5.0, 5)
model <- arima(z, order = c(1,0,1), include.mean = F)
fitted(model)
#> Time Series:
#> Start = 1
#> End = 500
#> Frequency = 1
#> [1] -0.0015806455 0.0799286719 -0.1409297625 0.0479671123 -0.1228818961
#> [6] 0.0940340261 -0.0395403451 0.0930088194 -0.0504654231 0.0154369074
#> [11] 0.0133157834 -0.0398617697 0.0848056118 -0.1391454237 0.1515008682
#> [16] -0.1467538990 0.1805508412 -0.1879896786 0.0793030786 -0.1378767013
#> [21] 0.0249573249 0.0287911357 -0.0351466073 -0.0204974526 0.0461081760
#> [26] 0.0026239567 0.0460184801 -0.0203468288 0.0828714994 -0.1221614534
#> [31] 0.0877768930 -0.1300021809 0.1775641943 -0.1583465561 0.0598343159
#> [36] 0.0383818418 -0.0412695391 -0.0322236465 -0.0045104996 0.0464239480
#> [41] -0.0873485626 0.1217045601 -0.0466749971 -0.0100498122 0.0800410409
#> [46] -0.0299737152 -0.0614290196 0.0263853310 -0.0265231697 0.0694531484
#> [51] 0.0298069473 -0.0408218386 -0.0140498359 -0.0338596582 0.0378135790
#> [56] 0.0005786616 0.0066221013 -0.0229934639 -0.0408114564 0.1034192284
#> [61] -0.0377462959 -0.0257183236 -0.0322490101 -0.0111188196 0.0407765161
#> [66] 0.0503798846 -0.0390813201 0.0948137913 -0.1497653064 0.0903615396
#> [71] -0.0827762735 0.0019291654 -0.0496267125 0.0970206197 -0.0931098112
#> [76] 0.0735280460 0.0086683535 -0.0199644624 -0.0002643464 0.0869008538
#> [81] -0.0204382045 -0.0639750387 -0.0111928636 -0.0319269965 0.0897082975
#> [86] -0.1231369993 0.0746107817 -0.0543711631 0.0056392789 -0.0642910157
#> [91] 0.0706781787 0.0120862153 0.0159663078 -0.0730658685 0.0837554717
#> [96] 0.0197429018 -0.0560623745 0.0776559650 -0.0808164436 -0.0082439969
#> [101] -0.0357098828 0.0132052455 -0.0815812696 0.1186676628 -0.1277749333
#> [106] 0.1277066903 -0.0914505386 0.0533966779 0.0102037355 0.0279883047
#> [111] -0.0811406552 0.1212558120 -0.0877936586 0.1084079690 -0.1269089632
#> [116] 0.1391932820 -0.0159836164 0.0100766075 0.0028410998 0.0786503805
#> [121] -0.0516762816 0.0152099611 -0.0599428484 0.1284742491 -0.0168351682
#> [126] 0.0648963409 0.0019635567 0.0818920976 -0.0573381183 0.0346615048
#> [131] -0.0372407913 -0.0482556686 0.0374608687 -0.0196944986 -0.0259857030
#> [136] -0.0661423447 0.0449849608 -0.0088458317 0.0012445222 -0.0368579185
#> [141] -0.0248778616 -0.0081077663 0.0744412577 -0.1315420519 0.0386156339
#> [146] 0.0231558591 0.0494305331 -0.1055739416 0.0748404861 -0.0532585073
#> [151] 0.0474897484 0.0152686161 0.0462263086 -0.0051924179 0.0583029703
#> [156] -0.0013862901 -0.0456514139 0.0310454056 -0.0003521619 -0.0049355367
#> [161] -0.0523830774 0.0596767804 -0.1155786218 0.1455602295 -0.0517246838
#> [166] 0.0866360162 -0.0631074760 0.0991277528 -0.1073100396 0.0881465371
#> [171] 0.0176332718 -0.0523389260 -0.0300377628 0.1071425810 -0.0447753946
#> [176] -0.0140462900 0.0089771025 -0.0607728545 0.0354816226 -0.0583115285
#> [181] 0.0441725572 -0.1010844880 0.1277178696 -0.0858740586 0.1352428209
#> [186] -0.1692240491 0.0833982337 -0.1343390578 0.0863563683 0.0079788036
#> [191] 0.0451385447 -0.0305476615 0.0724859272 -0.0319277030 0.0875179824
#> [196] -0.1143069402 0.1428043464 -0.0542025365 0.0691813621 -0.1378836483
#> [201] 0.1548367687 -0.1547689318 0.0466587034 -0.0270980299 0.0617565456
#> [206] -0.1153437078 0.0819353251 -0.0886510700 0.0086937995 -0.0097789550
#> [211] -0.0618482380 0.1113973826 -0.0825907319 0.0645605858 -0.0099850724
#> [216] -0.0315631929 0.0921659522 -0.1340849744 0.1033218258 -0.1417316946
#> [221] 0.0868881723 -0.1081003833 0.0686838112 0.0146099431 0.0520422776
#> [226] 0.0078375045 -0.0296319158 0.0563414844 0.0421604164 -0.0997564843
#> [231] 0.1578777288 -0.1957675156 0.2243556112 -0.2127929966 0.1288517674
#> [236] -0.1607573440 0.0567032448 -0.0760473709 0.0282155748 -0.0910475040
#> [241] 0.0075793433 0.0309329174 -0.1072956621 0.1580420902 -0.1328846885
#> [246] 0.0884293296 -0.1356508126 0.1722392449 -0.2106045376 0.1054175969
#> [251] -0.0015348903 -0.0373736007 0.0167893100 0.0052500910 -0.0042228543
#> [256] 0.0669646749 0.0186587322 -0.0342439539 -0.0287081617 0.0757394852
#> [261] -0.1300820561 0.0113874056 -0.0732004266 -0.0127913096 0.0443308870
#> [266] -0.0106436071 -0.0434872013 0.0253017841 -0.0152324172 -0.0029074241
#> [271] -0.0832628166 0.0830016957 -0.0670986967 0.0660973240 0.0062552073
#> [276] 0.0537228356 -0.1080867153 0.1092415667 -0.1497847261 0.0859415492
#> [281] -0.1475177902 0.0654457064 -0.0026609979 0.0088159232 0.0707379173
#> [286] -0.1208832375 0.1171907317 -0.0067955664 -0.0662620888 0.0613072133
#> [291] -0.0688126032 0.1002880427 0.0018881851 0.0381840867 -0.0569733203
#> [296] 0.0434666013 0.0255480141 -0.0962203190 0.0012360699 -0.0811855149
#> [301] 0.1181875355 -0.0113710015 0.0075110430 -0.0522479209 -0.0017592812
#> [306] 0.0526061777 -0.0169970424 -0.0076249015 0.0845160198 -0.0902542228
#> [311] 0.0825594728 -0.0687970535 0.0373812783 0.0482434223 -0.0064498737
#> [316] -0.0533391773 0.1073577827 -0.1531742787 0.0639086879 0.0389546639
#> [321] -0.0161981486 0.0975635033 -0.1363791170 0.0140204015 -0.0522590460
#> [326] 0.0649534485 -0.0132355802 -0.0253227616 -0.0664868743 -0.0359240445
#> [331] 0.0146378920 -0.0574512043 -0.0016882519 0.0247159085 -0.0790481636
#> [336] 0.0418239200 0.0188440234 -0.0397519834 0.0722264546 -0.1126478393
#> [341] 0.0993232677 -0.0379553899 0.0368922160 0.0588238729 -0.0838114665
#> [346] -0.0189325360 0.0739032318 -0.0428047888 -0.0466670324 -0.0175479638
#> [351] -0.0441230892 0.1131879514 -0.1219213716 0.1508840663 -0.0961787428
#> [356] 0.1441710031 -0.1831779503 0.1654813243 -0.0949113008 0.1520370285
#> [361] -0.1727357031 0.1351809646 -0.1345324996 0.1686684178 -0.1549104829
#> [366] 0.1862771569 -0.0672782655 0.0734554425 -0.0258031629 -0.0484269379
#> [371] -0.0198109969 -0.0554243649 0.1046794323 -0.1405714927 0.1797461702
#> [376] -0.1419333618 0.1889498847 -0.0645619368 0.0117013953 -0.0567398582
#> [381] 0.0310466272 -0.0872145874 0.0428480299 0.0124185088 -0.0002800209
#> [386] -0.0187980372 -0.0429250516 -0.0115772653 0.0135315958 -0.0252087526
#> [391] -0.0365567495 -0.0225046419 0.0050644310 -0.0611879250 0.0476489402
#> [396] -0.0588489005 0.0560405677 0.0188174734 -0.0203073820 0.0646727336
#> [401] 0.0150454588 -0.0858822185 0.0658706992 0.0307391635 -0.0585834988
#> [406] -0.0248515288 0.0512787227 -0.0030806330 -0.0127171414 0.0043611259
#> [411] -0.0810023715 0.0037203293 0.0149482900 0.0271406549 0.0665726775
#> [416] -0.0854764974 0.0508187103 -0.0620854299 -0.0305801406 0.0974746898
#> [421] -0.0694405928 0.0658222850 -0.0258447983 0.0536554925 -0.0167506813
#> [426] -0.0427748538 0.1163436514 -0.1652023366 0.0534030991 0.0321732584
#> [431] 0.0462210191 0.0039255339 -0.0237493688 0.0874260572 -0.0285420511
#> [436] 0.0628541394 -0.0885130445 0.0668230909 -0.1043385046 0.1072994580
#> [441] -0.1529411364 0.1065325407 -0.0241914848 0.0596283347 -0.1161781497
#> [446] 0.0754220131 -0.1024254740 0.0890813186 -0.1442837568 0.1275026643
#> [451] -0.0156066013 -0.0495071292 -0.0030282481 -0.0748786377 0.1329556363
#> [456] -0.0759620198 0.0869286497 0.0220572915 0.0626362825 -0.0400019048
#> [461] 0.0590547079 -0.0949463899 0.0772250687 0.0297254457 -0.0886943700
#> [466] 0.0201779536 -0.0878654822 -0.0020070518 -0.0185832580 -0.0488934965
#> [471] 0.0325913155 -0.0730349390 0.0001271660 -0.0604520442 0.0575821964
#> [476] -0.0523677730 0.0046346989 -0.0065106330 0.0447399374 0.0391716272
#> [481] 0.0299163020 0.0626436810 -0.0413999734 0.0237195869 0.0638785024
#> [486] -0.1326918031 0.0186015266 0.0726652337 -0.0772833974 -0.0182879433
#> [491] 0.0249745768 0.0336220956 -0.0513471211 0.0202261267 -0.0442003287
#> [496] 0.0826917008 -0.0668356103 0.1329418861 -0.0392132173 0.0669457471
plot(residuals(model))
checkresiduals(model)
#>
#> Ljung-Box test
#>
#> data: Residuals from ARIMA(1,0,1) with zero mean
#> Q* = 3.8204, df = 8, p-value = 0.873
#>
#> Model df: 2. Total lags used: 10
Created on 2019-11-10 by the reprex package (v0.3.0)

How to enumerate all S4 methods implemented by a package?

I'm looking for a way to query all S4 methods implemented by a particular package (given through its namespace environment). I think I could enumerate all objects that start with .__T__, but I'd rather prefer using a documented and/or less hackish way.
> ls(asNamespace("RSQLite"), all.names = TRUE, pattern = "^[.]__T__")
[1] ".__T__dbBegin:DBI" ".__T__dbBeginTransaction:RSQLite"
[3] ".__T__dbBind:DBI" ".__T__dbClearResult:DBI"
[5] ".__T__dbColumnInfo:DBI" ".__T__dbCommit:DBI"
[7] ".__T__dbConnect:DBI" ".__T__dbDataType:DBI"
[9] ".__T__dbDisconnect:DBI" ".__T__dbExistsTable:DBI"
[11] ".__T__dbFetch:DBI" ".__T__dbGetException:DBI"
[13] ".__T__dbGetInfo:DBI" ".__T__dbGetPreparedQuery:RSQLite"
[15] ".__T__dbGetQuery:DBI" ".__T__dbGetRowCount:DBI"
[17] ".__T__dbGetRowsAffected:DBI" ".__T__dbGetStatement:DBI"
[19] ".__T__dbHasCompleted:DBI" ".__T__dbIsValid:DBI"
[21] ".__T__dbListFields:DBI" ".__T__dbListResults:DBI"
[23] ".__T__dbListTables:DBI" ".__T__dbReadTable:DBI"
[25] ".__T__dbRemoveTable:DBI" ".__T__dbRollback:DBI"
[27] ".__T__dbSendPreparedQuery:RSQLite" ".__T__dbSendQuery:DBI"
[29] ".__T__dbUnloadDriver:DBI" ".__T__dbWriteTable:DBI"
[31] ".__T__fetch:DBI" ".__T__isSQLKeyword:DBI"
[33] ".__T__make.db.names:DBI" ".__T__show:methods"
[35] ".__T__sqlData:DBI" ".__T__SQLKeywords:DBI"
I think showMethods is the only thing available in methods, but it does not actually return the functions as an object, just prints them to the screen.
The following will return a list of the methods defined in an environment. Adapted from covr::replacements_S4(), which is used to modify all methods in a package to track coverage.
S4_methods <- function(env) {
generics <- methods::getGenerics(env)
res <- Map(generics#.Data, generics#package, USE.NAMES = FALSE,
f = function(name, package) {
what <- methods::methodsPackageMetaName("T", paste(name, package, sep = ":"))
table <- get(what, envir = env)
mget(ls(table, all.names = TRUE), envir = table)
})
res[lengths(res) > 0]
}
m <- S4_methods(asNamespace("DBI"))
length(m)
#> [1] 21
m[1:3]
#> [[1]]
#> [[1]]$DBIObject
#> function(dbObj, obj, ...) {
#> dbiDataType(obj)
#> }
#> <environment: namespace:DBI>
#> attr(,"target")
#> An object of class "signature"
#> dbObj
#> "DBIObject"
#> attr(,"defined")
#> An object of class "signature"
#> dbObj
#> "DBIObject"
#> attr(,"generic")
#> [1] "dbDataType"
#> attr(,"generic")attr(,"package")
#> [1] "DBI"
#> attr(,"class")
#> [1] "MethodDefinition"
#> attr(,"class")attr(,"package")
#> [1] "methods"
#>
#>
#> [[2]]
#> [[2]]$character
#> function(drvName, ...) {
#> findDriver(drvName)(...)
#> }
#> <environment: namespace:DBI>
#> attr(,"target")
#> An object of class "signature"
#> drvName
#> "character"
#> attr(,"defined")
#> An object of class "signature"
#> drvName
#> "character"
#> attr(,"generic")
#> [1] "dbDriver"
#> attr(,"generic")attr(,"package")
#> [1] "DBI"
#> attr(,"class")
#> [1] "MethodDefinition"
#> attr(,"class")attr(,"package")
#> [1] "methods"
#>
#>
#> [[3]]
#> [[3]]$`DBIConnection#character`
#> function(conn, statement, ...) {
#> rs <- dbSendStatement(conn, statement, ...)
#> on.exit(dbClearResult(rs))
#> dbGetRowsAffected(rs)
#> }
#> <environment: namespace:DBI>
#> attr(,"target")
#> An object of class "signature"
#> conn statement
#> "DBIConnection" "character"
#> attr(,"defined")
#> An object of class "signature"
#> conn statement
#> "DBIConnection" "character"
#> attr(,"generic")
#> [1] "dbExecute"
#> attr(,"generic")attr(,"package")
#> [1] "DBI"
#> attr(,"class")
#> [1] "MethodDefinition"
#> attr(,"class")attr(,"package")
#> [1] "methods"
I think you want the showMethods function, as in:
showMethods(where=asNamespace("RSQLite"))
The output is:
Function: dbBegin (package DBI)
conn="SQLiteConnection"
Function: dbBeginTransaction (package RSQLite)
conn="ANY"
Function: dbClearResult (package DBI)
res="SQLiteConnection"
res="SQLiteResult"
Function: dbColumnInfo (package DBI)
res="SQLiteResult"
and this goes on for many more rows. ?showMethods will has some additional arguments for tailoring the results.

Resources