I want to use map() to apply summary() on the result of lm(). As long as I run the lm() on all nested groups ("VC" and "OJ"), it works. But how do I do that if lm() was not applicable for one group (e.g., "VC")? I tried with map_if() and map_at() to get lin.mod.res in that case but failed. Any idea?
library(tibble)
library(dplyr)
#>
#> Attache Paket: 'dplyr'
#> Die folgenden Objekte sind maskiert von 'package:stats':
#>
#> filter, lag
#> Die folgenden Objekte sind maskiert von 'package:base':
#>
#> intersect, setdiff, setequal, union
library(tidyr)
library(purrr)
as_tibble(ToothGrowth) %>%
group_by(supp) %>%
nest() %>%
mutate(
lin.mod = map(data, ~ (lm(len ~ dose, .))),
lin.mod.res = map(lin.mod, ~ summary(.))
)
#> # A tibble: 2 x 4
#> # Groups: supp [2]
#> supp data lin.mod lin.mod.res
#> <fct> <list> <list> <list>
#> 1 VC <tibble [30 x 2]> <lm> <smmry.lm>
#> 2 OJ <tibble [30 x 2]> <lm> <smmry.lm>
as_tibble(ToothGrowth) %>%
group_by(supp) %>%
nest() %>%
mutate(
lin.mod = map_if(data, supp != "VC", ~ (lm(len ~ dose, .)), .else = "NA")
)
#> # A tibble: 2 x 3
#> # Groups: supp [2]
#> supp data lin.mod
#> <fct> <list> <list>
#> 1 VC <tibble [30 x 2]> <NULL>
#> 2 OJ <tibble [30 x 2]> <lm>
Created on 2022-10-21 with reprex v2.0.2
It can be a function
as_tibble(ToothGrowth) %>%
group_by(supp) %>%
nest() %>%
mutate(
lin.mod = map_if(data, supp != "VC", ~ (lm(len ~ dose, .)), .else = ~ NA)
)
-output
# A tibble: 2 × 3
# Groups: supp [2]
supp data lin.mod
<fct> <list> <list>
1 VC <tibble [30 × 2]> <lgl [1]>
2 OJ <tibble [30 × 2]> <lm>
Related
I try to transfer the problem from this post to a setting where you use different formulas in the lm()
function in R.
Here a basic setup to reproduce the problem:
library(dplyr)
library(broom)
library(purrr)
library(tidyr)
# Generate data
set.seed(324)
dt <- data.frame(
t = sort(rep(c(1,2), 50)),
w1 = rnorm(100),
w2 = rnorm(100),
x1 = rnorm(100),
x2 = rnorm(100)
)
# Generate formulas
fm <- map(1:2, ~as.formula(paste0("w", .x, "~ x", .x)))
Now I try to run different regressions for each group t with models specified in formulas object fm :
# Approach 1:
dt %>% group_by(t) %>%
do(fit = tidy(map(fm, ~lm(.x, data = .)))) %>%
unnest(fit)
# Approach 2
dt %>% nest(-t) %>%
mutate(
fit = map(fm, ~lm(.x, data = .)),
tfit = tidy(fit)
)
This produces an error indicating that the formula cannot be converted to a data.frame . What am I doing wrong?
This needs map2 instead of map as the data column from nest is also a list of data.frame, and thus we need to loop over the corresponding elements of 'fm' list and data (map2 does that)
library(tidyr)
library(purrr)
library(dplyr)
library(broom)
out <- dt %>%
nest(data = -t) %>%
mutate(
fit = map2(fm, data, ~lm(.x, data = .y)),
tfit = map(fit, tidy))
-output
> out
# A tibble: 2 × 4
t data fit tfit
<dbl> <list> <list> <list>
1 1 <tibble [50 × 4]> <lm> <tibble [2 × 5]>
2 2 <tibble [50 × 4]> <lm> <tibble [2 × 5]>
> bind_rows(out$tfit)
# A tibble: 4 × 5
term estimate std.error statistic p.value
<chr> <dbl> <dbl> <dbl> <dbl>
1 (Intercept) 0.0860 0.128 0.670 0.506
2 x1 0.262 0.119 2.19 0.0331
3 (Intercept) -0.00285 0.152 -0.0187 0.985
4 x2 -0.115 0.154 -0.746 0.459
Or may also use
> imap_dfr(fm, ~ lm(.x, data = dt %>%
filter(t == .y)) %>%
tidy)
# A tibble: 4 × 5
term estimate std.error statistic p.value
<chr> <dbl> <dbl> <dbl> <dbl>
1 (Intercept) 0.0860 0.128 0.670 0.506
2 x1 0.262 0.119 2.19 0.0331
3 (Intercept) -0.00285 0.152 -0.0187 0.985
4 x2 -0.115 0.154 -0.746 0.459
If we want to have all the combinations of 'fm' for each level of 't', then use crossing
dt %>%
nest(data = -t) %>%
crossing(fm) %>%
mutate(fit = map2(fm, data, ~ lm(.x, data = .y)),
tfit = map(fit, tidy))
-output
# A tibble: 4 × 5
t data fm fit tfit
<dbl> <list> <list> <list> <list>
1 1 <tibble [50 × 4]> <formula> <lm> <tibble [2 × 5]>
2 1 <tibble [50 × 4]> <formula> <lm> <tibble [2 × 5]>
3 2 <tibble [50 × 4]> <formula> <lm> <tibble [2 × 5]>
4 2 <tibble [50 × 4]> <formula> <lm> <tibble [2 × 5]>
I'm trying to create a function that will map across a nested tibble. This function needs to take a vector of parameters that will vary for each row.
When I call purrr:map2() on the nested data, purrr tries to loop over all values of the parameter vector and all rows in the dataset. What can I do to pass the entire vector as a single argument?
library(tidyverse)
myf <- function(x, params) {
print(params)
x %>%
mutate(new_mpg = mpg + rnorm(n(), params[1], params[2])) %>%
summarise(old = mean(mpg), new = mean(new_mpg)) %>%
as.list()
}
# Calling function with params defined is great!
myf(mtcars, params = c(5, 10))
#> [1] 5 10
#> $old
#> [1] 20.09062
#>
#> $new
#> [1] 25.62049
# Cannot work in purr as vector, tries to loop over param
mtcars %>%
group_by(cyl) %>% # from base R
nest() %>%
mutate(
newold = map2(data, c(5, 10), myf),
)
#> [1] 5
#> Warning in rnorm(n(), params[1], params[2]): NAs produced
#> [1] 10
#> Warning in rnorm(n(), params[1], params[2]): NAs produced
#> Error: Problem with `mutate()` column `newold`.
#> ℹ `newold = map2(data, c(5, 10), myf)`.
#> ℹ `newold` must be size 1, not 2.
#> ℹ The error occurred in group 1: cyl = 4.
# New function wrapper with hard-coded params
myf2 <- function(x){
myf(x, c(5, 10))
}
# works great! but not what I need
mtcars %>%
group_by(cyl) %>% # from base R
nest() %>%
mutate(
mean = 5,
sd = 10,
newold = map(data, myf2),
)
#> [1] 5 10
#> [1] 5 10
#> [1] 5 10
#> # A tibble: 3 × 5
#> # Groups: cyl [3]
#> cyl data mean sd newold
#> <dbl> <list> <dbl> <dbl> <list>
#> 1 6 <tibble [7 × 10]> 5 10 <named list [2]>
#> 2 4 <tibble [11 × 10]> 5 10 <named list [2]>
#> 3 8 <tibble [14 × 10]> 5 10 <named list [2]>
Created on 2021-11-29 by the reprex package (v2.0.0)
Skip the group_by() step and just use nest() - otherwise your data will remain grouped after nesting and need to be ungrouped. To get your function to work, just pass the parameters as a list.
library(tidyverse)
mtcars %>%
nest(data = -cyl) %>%
mutate(
newold = map2_df(data, list(c(5, 10)), myf)
) %>%
unpack(newold)
# A tibble: 3 x 4
cyl data old new
<dbl> <list> <dbl> <dbl>
1 6 <tibble [7 x 10]> 19.7 30.7
2 4 <tibble [11 x 10]> 26.7 31.1
3 8 <tibble [14 x 10]> 15.1 17.0
You don't need map2. I think what you need is map.
mtcars %>%
group_by(cyl) %>% # from base R
nest() %>%
mutate(
newold = map(data, myf, params = c(5, 10)),
)
# [1] 5 10
# [1] 5 10
# [1] 5 10
# # A tibble: 3 x 3
# # Groups: cyl [3]
# cyl data newold
# <dbl> <list> <list>
# 1 6 <tibble [7 x 10]> <named list [2]>
# 2 4 <tibble [11 x 10]> <named list [2]>
# 3 8 <tibble [14 x 10]> <named list [2]>
If you have multiple sets of params. You can ungroup your data frame, add a list column with your params, and use map2.
mtcars %>%
group_by(cyl) %>%
nest() %>%
ungroup() %>%
# Add different sets of params
mutate(Params = list(a = c(5, 10), b = c(6, 11), c = c(7, 12))) %>%
mutate(
newold = map2(data, Params, myf)
)
# [1] 5 10
# [1] 6 11
# [1] 7 12
# # A tibble: 3 x 4
# cyl data Params newold
# <dbl> <list> <named list> <list>
# 1 6 <tibble [7 x 10]> <dbl [2]> <named list [2]>
# 2 4 <tibble [11 x 10]> <dbl [2]> <named list [2]>
# 3 8 <tibble [14 x 10]> <dbl [2]> <named list [2]>
I'm trying to run a data wrangling procedure inside a tibble using tools from {purrr} package. My method is to organize everything I need inside a tibble:
the input data inside a column
the function to apply upon the input data gets its own column too
My problem: how can I use purrr's mapping functions to say "take the function stored in column x and apply it over the data in column y"?
Below is a minimal example, based on mtcars and iris. I want to summarise each data set, in the same workflow: first subset columns, then do some aggregation. For the aggregation part, I preemptively set up 2 functions, one for each data.
summarise_iris()
summarise_mtcars()
Then I organize all I need inside a tibble (see trb object below).
The first part, the subsetting, works well. As can be seen in trb_1 below, dat_selected is a new column I mutated, which stores the output of the subset step.
However, the second part is not working. I want to take the function in column summarise_func and apply it over the data stored in column dat_selected. But it's not working. Why not? I purposely used map() because it maps only 1 input to the function.
library(purrr)
library(tibble)
library(dplyr, warn.conflicts = FALSE)
summarise_iris <- function(.dat) {
.dat %>%
group_by(Species) %>%
summarise(across(starts_with("Sepal"), ~ mean(.x, na.rm = TRUE)))
}
# to test: iris %>% summarise_iris()
summarise_mtcars <- function(.dat) {
.dat %>%
group_by(am) %>%
summarise(mpg_median = median(mpg))
}
# to test: mtcars %>% summarise_mtcars()
trb <-
tribble(~original_data, ~cols_to_select, ~summarise_func,
mtcars, c("am", "disp", "mpg"), ~summarise_mtcars(.),
iris, c("Species", "Sepal.Length", "Sepal.Width"), ~summarise_iris(.)
)
trb_1 <-
trb %>%
mutate(dat_selected = map2(.x = original_data, .y = cols_to_select, .f = ~select(.x, all_of(.y))))
trb_1
#> # A tibble: 2 x 4
#> original_data cols_to_select summarise_func dat_selected
#> <list> <list> <list> <list>
#> 1 <df [32 x 11]> <chr [3]> <formula> <df [32 x 3]>
#> 2 <df [150 x 5]> <chr [3]> <formula> <df [150 x 3]>
trb_1 %>%
mutate(dat_summarised = map(.x = dat_selected, .f = summarise_func))
#> Error: Problem with `mutate()` column `dat_summarised`.
#> i `dat_summarised = map(.x = dat_selected, .f = summarise_func)`.
#> x Index 1 must have length 1, not 2
Created on 2021-12-02 by the reprex package (v2.0.1.9000)
How can I achieve the desired output (see below) using the in-table method I'm trying to incorporate? I.e.:
trb_1 %>%
mutate(dat_summarised = map(.x = dat_selected, .f = summarise_func))
## to give the desired output that's equivalent to what we get if we run:
summar_mtcars <- mtcars %>% summarise_mtcars()
summar_iris <- iris %>% summarise_iris()
trb_1 %>%
tibble::add_column(dat_summarised = list(summar_mtcars, summar_iris))
## # A tibble: 2 x 5
## original_data cols_to_select summarise_func dat_selected dat_summarised
## <list> <list> <list> <list> <list>
## 1 <df [32 x 11]> <chr [3]> <formula> <df [32 x 3]> <tibble [2 x 2]>
## 2 <df [150 x 5]> <chr [3]> <formula> <df [150 x 3]> <tibble [3 x 3]>
UPDATE
I don't know if the following is in the right direction, but based on this answer, I thought to utilize rlang::as_function() such that:
trb_1 %>%
mutate(dat_summarised = map(.x = dat_selected, .f = ~rlang::as_function(summarise_func)))
But it gives a different error now:
x Can't convert a list to function
I think you can take a simpler approach. First, we don't need to select columns, that's inherent to summarize anyway. Let's create columns that define the columns to group by, the columns to summarize, and functions to use.
library(purrr)
library(tibble)
library(dplyr, warn.conflicts = FALSE)
trb <-
tribble(~original_data, ~cols_to_group, ~cols_to_summarize, ~summarise_func,
mtcars, "am", "mpg", \(x) mean(x, na.rm = T),
iris, "Species", ~starts_with("Sepal"), median
)
The \(x) mean(x, na.rm = TRUE) syntax is the new anonymous function syntax in R 4.1. If using an earlier version, just change to function(x) mean(...)
Now we can define a function (to eventually use in pmap that accepts the data, grouping columns, columns to analyse, and the summarize functions.
summarize_fun <- function(
.dat, .group_cols, .summ_cols, .funs
) {
.dat %>%
group_by(across(!!.group_cols)) %>%
summarize(across(!!.summ_cols, .funs))
}
And now we can just use these within mutate(pmap(...)) to get the result we want. I rely on !! for unquoting expressions because that works for passing in things like ~starts_with("Sepal"), which don't work with {{ }} to my knowledge.
trb_final <- trb %>%
mutate(dat_summarized = pmap(
list(
.dat=original_data,
.group_cols=cols_to_group,
.summ_cols=cols_to_summarize,
.funs=summarise_func
),
summarize_fun
))
trb_final
#> # A tibble: 2 × 5
#> original_data cols_to_group cols_to_summarize summarise_func dat_summarized
#> <list> <chr> <list> <list> <list>
#> 1 <df [32 × 11]> am <chr [1]> <fn> <tibble [2 × 2]>
#> 2 <df [150 × 5]> Species <formula> <fn> <tibble [3 × 3]>
trb_final$dat_summarized
#> [[1]]
#> # A tibble: 2 × 2
#> am mpg
#> <dbl> <dbl>
#> 1 0 17.1
#> 2 1 24.4
#>
#> [[2]]
#> # A tibble: 3 × 3
#> Species Sepal.Length Sepal.Width
#> <fct> <dbl> <dbl>
#> 1 setosa 5 3.4
#> 2 versicolor 5.9 2.8
#> 3 virginica 6.5 3
General functions
If instead as in the comments, we want just to apply generic functions to summarize, then just rely on pmap with 2 arguments, the data and the summarizing function.
summarize_mtcars <- function(.dat) {
.dat %>%
group_by(am) %>%
summarise(mpg_median = median(mpg))
}
summarize_iris <- function(.dat) {
.dat %>%
group_by(Species) %>%
summarise(across(starts_with("Sepal"), ~ mean(.x, na.rm = TRUE)))
}
Now we can just define our data frame to analyze using the original data and the two summarize_... functions we defined for the datasets.
trb <-
tribble(~original_data, ~summarize_func,
mtcars, summarize_mtcars,
iris, summarize_iris
)
And then just use pmap as before (can also use map2 of course).
trb_final <- trb %>%
mutate(dat_summarized = pmap(
list(
original_data,
summarize_func
),
\(.d, .f) .f(.d)
))
trb_final
#> # A tibble: 2 × 3
#> original_data summarize_func dat_summarized
#> <list> <list> <list>
#> 1 <df [32 × 11]> <fn> <tibble [2 × 2]>
#> 2 <df [150 × 5]> <fn> <tibble [3 × 3]>
trb_final$dat_summarized
#> [[1]]
#> # A tibble: 2 × 2
#> am mpg_median
#> <dbl> <dbl>
#> 1 0 17.3
#> 2 1 22.8
#>
#> [[2]]
#> # A tibble: 3 × 3
#> Species Sepal.Length Sepal.Width
#> <fct> <dbl> <dbl>
#> 1 setosa 5.01 3.43
#> 2 versicolor 5.94 2.77
#> 3 virginica 6.59 2.97
I would store the functions as strings:
trb <-
tribble(~original_data, ~cols_to_select, ~summarise_func,
mtcars, c("am", "disp", "mpg"), "summarise_mtcars",
iris, c("Species", "Sepal.Length", "Sepal.Width"), "summarise_iris"
)
Then you can simply use do.call in your map call. Or you convert your functions to strings on the fly with mutate:
trb_2 <- trb_1 %>%
mutate(summarise_func = as.character(summarise_func)) %>%
mutate(dat_summarised = map2(summarise_func, original_data, ~ do.call(what = .x, args = list(.dat = .y))))
trb_2
#> # A tibble: 2 × 5
#> original_data cols_to_select summarise_func dat_selected dat_summarised
#> <list> <list> <chr> <list> <list>
#> 1 <df [32 × 11]> <chr [3]> summarise_mtcars <df [32 × 3]> <tibble [2 × 2]>
#> 2 <df [150 × 5]> <chr [3]> summarise_iris <df [150 × 3]> <tibble [3 × 3]>
Created on 2021-12-02 by the reprex package (v2.0.1)
Update: Storing functions or rather function names as strings can be problematic if the underlying function changes (I get that now). The problem is getting the function into the tibble in the first place. What you do in the question is storing it as a formula. A better way is (imo) to store it in a list column:
trb <-
tribble(~original_data, ~cols_to_select, ~summarise_func,
mtcars, c("am", "disp", "mpg"), list(fun = summarise_mtcars),
iris, c("Species", "Sepal.Length", "Sepal.Width"), list(fun = summarise_iris)
)
With a slight adaptation, this original answer then works like this:
trb_3 <- trb_1 %>%
mutate(dat_summarised = map2(summarise_func, original_data, ~ do.call(what = .x$fun, args = list(.dat = .y))))
trb_3
#> # A tibble: 2 × 5
#> original_data cols_to_select summarise_func dat_selected dat_summarised
#> <list> <list> <list> <list> <list>
#> 1 <df [32 × 11]> <chr [3]> <named list [1]> <df [32 × 3]> <tibble [2 × 2]>
#> 2 <df [150 × 5]> <chr [3]> <named list [1]> <df [150 × 3]> <tibble [3 × 3]>
Created on 2021-12-02 by the reprex package (v2.0.1)
Some code:
mymtcars <- mtcars %>% head %>% rownames_to_column('model') %>% group_by(vs) %>% nest
mymtcars
vs data
<dbl> <list>
1 0 <tibble [3 × 11]>
2 1 <tibble [3 × 11]>
I can fit a linear model on this list column df like so:
mymtcars %>%
+ mutate(mod = map(.x = data, ~ lm(.x$mpg ~ .x$cyl)))
# A tibble: 2 x 3
# Groups: vs [2]
vs data mod
<dbl> <list> <list>
1 0 <tibble [3 × 11]> <lm>
2 1 <tibble [3 × 11]> <lm>
What if my function name is a field?
mymtcars2 <- mtcars %>% head %>% rownames_to_column('model') %>% group_by(vs) %>% nest %>% crossing(func = c('lm'))
> mymtcars2
# A tibble: 2 x 3
vs data func
<dbl> <list> <chr>
1 0 <tibble [3 × 11]> lm
2 1 <tibble [3 × 11]> lm
I gave it a try with:
mymtcars2 %>%
+ mutate(mod = map2(.x = data, .y = func, ~ .y(.x$mpg ~ .x$cyl)))
Error: Problem with `mutate()` input `mod`.
x could not find function ".y"
ℹ Input `mod` is `map2(.x = data, .y = func, ~.y(.x$mpg ~ .x$cyl))`.
How can I pass the function to call in map and then call it in the above block?
May be using match.fun inside map2 like below:
models <- mymtcars2 %>%
mutate(mod = map2(.x = data, .y = func, ~ match.fun(.y)(.x$mpg ~ .x$cyl)))
Output:
[[1]]
Call:
match.fun(.y)(formula = .x$mpg ~ .x$cyl)
Coefficients:
(Intercept) .x$cyl
36.926733 -2.728218
[[2]]
Call:
match.fun(.y)(formula = .x$mpg ~ .x$cyl)
Coefficients:
(Intercept) .x$cyl
41.9400 -3.8025
I also found that I can use get:
mymtcars2 %>%
mutate(mod = map2(.x = data, .y = func, ~ get(.y)(.x$mpg ~ .x$cyl)))
Am unsure of when to use one over the other.
A different option could be:
mymtcars2 %>%
mutate(mod = map2(.x = data,
.y = func,
~ exec(.y, mpg ~ cyl, data = .x)))
vs data func mod
<dbl> <list> <chr> <list>
1 0 <tibble [3 × 11]> lm <lm>
2 1 <tibble [3 × 11]> lm <lm>
Since {dplyr} >= 1.0 this kind of problems can be solved with dplyr::rowwise. We can use it either with a classic do.call, in which case we have to wrap the arguments in list(), or with rlang::exec. With dlpyr::rowwise we don't need map2 which makes things more readable since there is no lambda function with .x .y. However, since the output column stores lm objects (and not an atomic vector), the result has to be wrapped in mod = list(...).
library(tidyverse)
mymtcars2 %>%
rowwise %>%
mutate(mod = list(do.call(func, list(mpg ~ cyl, data = data))))
#> # A tibble: 2 x 4
#> # Rowwise:
#> vs data func mod
#> <dbl> <list> <chr> <list>
#> 1 0 <tibble [3 × 11]> lm <lm>
#> 2 1 <tibble [3 × 11]> lm <lm>
mymtcars2 %>%
rowwise %>%
mutate(mod = list(exec(func, mpg ~ cyl, data = data)))
#> # A tibble: 2 x 4
#> # Rowwise:
#> vs data func mod
#> <dbl> <list> <chr> <list>
#> 1 0 <tibble [3 × 11]> lm <lm>
#> 2 1 <tibble [3 × 11]> lm <lm>
Created on 2021-08-28 by the reprex package (v0.3.0)
I'm interested in learning tidymodels and have tried to apply it to some exercises in Appied Predictive Modeling. This is Exercise 6.2. I would like to specify a Partial Least Squares (PLS) model to the permeability data set.
I have the following code that works all the way up to the tune grid. I've modeled my analysis off of Julia Silge's - Lasso regression with tidymodels and The Office found here.
You can see my script and the tune_grid error message below.
library(tidymodels)
library(tidyverse)
library(skimr)
library(plsmod)
library(caret)
library(AppliedPredictiveModeling)
data(permeability)
dim(fingerprints)
fingerprints <- fingerprints[, -nearZeroVar(fingerprints)]
dim(fingerprints)
df <- cbind(fingerprints, permeability)
df <- as_tibble(df)
perm_split <- initial_split(df)
perm_train <- training(perm_split)
perm_test <- testing(perm_split)
perm_rec<- recipe(permeability ~ ., data=perm_train) %>%
step_center(all_numeric(),-all_outcomes()) %>%
step_scale(all_numeric(),-all_outcomes())
perm_prep <- perm_rec %>%
prep()
perm_prep
pls_spec <- pls(num_comp = 4) %>%
set_mode("regression") %>%
set_engine("mixOmics")
wf <- workflow() %>%
add_recipe(perm_prep)
pls_fit <- wf %>%
add_model(pls_spec) %>%
fit(data=perm_train)
pls_fit %>%
pull_workflow_fit() %>%
tidy()
set.seed(123)
perm_folds <- vfold_cv(perm_train, v=10)
pls_tune_spec <- pls(num_comp = tune()) %>%
set_mode("regression") %>%
set_engine("mixOmics")
comp_grid <- expand.grid(num_comp = seq(from = 1, to = 20, by = 1))
doParallel::registerDoParallel()
set.seed(4763)
pls_grid <- tune_grid(
wf %>% add_model(pls_tune_spec),
resamples = perm_folds,
grid = comp_grid
)
At this point I'm getting the following error:
All models failed in tune_grid(). See the .notes column.
Two questions:
Why is my tune grid failing and how can I fix it?
How does one see the .note column.
I am guessing that you may be using a Windows computer, because we currently have a bug in the CRAN version of tune for parallel processing on Windows. Try either:
training sequentially without parallel processing, or
installing the development version of tune where we have fixed this bug, via devtools::install_github("tidymodels/tune")
You should see results like this:
library(tidymodels)
library(plsmod)
library(AppliedPredictiveModeling)
data(permeability)
df <- cbind(fingerprints, permeability)
df <- as_tibble(df)
set.seed(123)
perm_split <- initial_split(df)
perm_train <- training(perm_split)
perm_test <- testing(perm_split)
set.seed(234)
perm_folds <- vfold_cv(perm_train, v=10)
perm_rec <- recipe(permeability ~ ., data = perm_train) %>%
step_nzv(all_predictors()) %>%
step_center(all_numeric(), -all_outcomes()) %>%
step_scale(all_numeric(), -all_outcomes())
pls_spec <- pls(num_comp = tune()) %>%
set_mode("regression") %>%
set_engine("mixOmics")
comp_grid <- tibble(num_comp = seq(from = 1, to = 20, by = 5))
doParallel::registerDoParallel()
workflow() %>%
add_recipe(perm_rec) %>%
add_model(pls_spec) %>%
tune_grid(
resamples = perm_folds,
grid = comp_grid
)
#>
#> Attaching package: 'rlang'
#> The following objects are masked from 'package:purrr':
#>
#> %#%, as_function, flatten, flatten_chr, flatten_dbl, flatten_int,
#> flatten_lgl, flatten_raw, invoke, list_along, modify, prepend,
#> splice
#>
#> Attaching package: 'vctrs'
#> The following object is masked from 'package:tibble':
#>
#> data_frame
#> The following object is masked from 'package:dplyr':
#>
#> data_frame
#> Loading required package: MASS
#>
#> Attaching package: 'MASS'
#> The following object is masked from 'package:dplyr':
#>
#> select
#> Loading required package: lattice
#>
#> Loaded mixOmics 6.12.2
#> Thank you for using mixOmics!
#> Tutorials: http://mixomics.org
#> Bookdown vignette: https://mixomicsteam.github.io/Bookdown
#> Questions, issues: Follow the prompts at http://mixomics.org/contact-us
#> Cite us: citation('mixOmics')
#>
#> Attaching package: 'mixOmics'
#> The following object is masked from 'package:plsmod':
#>
#> pls
#> The following object is masked from 'package:tune':
#>
#> tune
#> The following object is masked from 'package:purrr':
#>
#> map
#> # Tuning results
#> # 10-fold cross-validation
#> # A tibble: 10 x 4
#> splits id .metrics .notes
#> <list> <chr> <list> <list>
#> 1 <split [111/13]> Fold01 <tibble [8 × 5]> <tibble [0 × 1]>
#> 2 <split [111/13]> Fold02 <tibble [8 × 5]> <tibble [0 × 1]>
#> 3 <split [111/13]> Fold03 <tibble [8 × 5]> <tibble [0 × 1]>
#> 4 <split [111/13]> Fold04 <tibble [8 × 5]> <tibble [0 × 1]>
#> 5 <split [112/12]> Fold05 <tibble [8 × 5]> <tibble [0 × 1]>
#> 6 <split [112/12]> Fold06 <tibble [8 × 5]> <tibble [0 × 1]>
#> 7 <split [112/12]> Fold07 <tibble [8 × 5]> <tibble [0 × 1]>
#> 8 <split [112/12]> Fold08 <tibble [8 × 5]> <tibble [0 × 1]>
#> 9 <split [112/12]> Fold09 <tibble [8 × 5]> <tibble [0 × 1]>
#> 10 <split [112/12]> Fold10 <tibble [8 × 5]> <tibble [0 × 1]>
Created on 2020-11-12 by the reprex package (v0.3.0.9001)
If you have an object like pls_grid with notes, you should be able to get to the column via pls_grid$.notes, or to see the first example via pls_grid$.notes[[1]].