Thanks to this site, I'm using the R purrr package to aggregation data based on multiple columns. The aggregation is working how I want but the output is not. Here is a sample using the mtcars dataset.
library(dplyr)
library(purrr)
#pull in data
data <- mtcars
#get colnames
variable1 <- colnames(data)
#map the variables
t1 <- map(variable1, ~ data %>%
group_by_at(.x) %>%
summarize(number = mean(mpg))) %>%
set_names(variable1) %>%
bind_rows(., .id = 'variable')
Were I expect three columns (Predictor Variable, Levels within Each of those Variables, aggregation), I have 8. See the image below:
How can I take my code up at the top and turn out a tidy dataset?
A simple way to do this is to reshape your data to long form, which lets you aggregate with ordinary dplyr:
library(tidyverse)
mpg_means <- mtcars %>%
gather(variable, value, -mpg) %>%
group_by(variable, value) %>%
summarise(mean_mpg = mean(mpg))
mpg_means
#> # A tibble: 146 x 3
#> # Groups: variable [?]
#> variable value mean_mpg
#> <chr> <dbl> <dbl>
#> 1 am 0. 17.1
#> 2 am 1. 24.4
#> 3 carb 1. 25.3
#> 4 carb 2. 22.4
#> 5 carb 3. 16.3
#> 6 carb 4. 15.8
#> 7 carb 6. 19.7
#> 8 carb 8. 15.0
#> 9 cyl 4. 26.7
#> 10 cyl 6. 19.7
#> # ... with 136 more rows
Note that while mtcars is entirely numeric, if you have different types, converting to long form will coerce variable types. The calculations will be the same, but it may cause issues later. To resolve it, use an output format that can handle diverse types, e.g.
mpg_means_in_list_cols <- mtcars %>%
as_tibble() %>% # compact printing for list columns
summarise_all(list) %>% # collapse each column into a list of itself
gather(group, group_values, -mpg) %>%
mutate(mpg_means = map2(mpg, group_values, # for each mpg/value pair, ...
~tibble(mpg = .x, group_value = .y) %>% # ...reconstruct a data frame...
group_by(group_value) %>%
summarise(mean_mpg = mean(mpg)))) # ...and aggregate
mpg_means_in_list_cols
#> # A tibble: 10 x 4
#> mpg group group_values mpg_means
#> <list> <chr> <list> <list>
#> 1 <dbl [32]> cyl <dbl [32]> <tibble [3 × 2]>
#> 2 <dbl [32]> disp <dbl [32]> <tibble [27 × 2]>
#> 3 <dbl [32]> hp <dbl [32]> <tibble [22 × 2]>
#> 4 <dbl [32]> drat <dbl [32]> <tibble [22 × 2]>
#> 5 <dbl [32]> wt <dbl [32]> <tibble [29 × 2]>
#> 6 <dbl [32]> qsec <dbl [32]> <tibble [30 × 2]>
#> 7 <dbl [32]> vs <dbl [32]> <tibble [2 × 2]>
#> 8 <dbl [32]> am <dbl [32]> <tibble [2 × 2]>
#> 9 <dbl [32]> gear <dbl [32]> <tibble [3 × 2]>
#> 10 <dbl [32]> carb <dbl [32]> <tibble [6 × 2]>
While this is decidedly not as pretty, it's capable of holding many types tidily. To extract the result above, just add %>% unnest(mpg_means). As-is, grouping variables are each held in a list element of group_values and in aggregated form in the first column of each mpg_means tibble.
When grouping your data within the map, you can rename the grouping variable to "level", since those values will form the column containing the levels of the grouping variable in the final data set.
When you have mixed types of grouping variables (e.g. both numeric and character), you'll also need to coerce the grouping variable to character in order to be able to bind the results together.
With those additions, you should get what you expect. (You can also skip the bind_rows by using map_df instead of map, to save a little bit of code, like I've done below.)
reprex::reprex_info()
#> Created by the reprex package v0.1.1.9000 on 2018-02-09
library(purrr)
library(dplyr)
data <- iris
vars <- names(data)
set_names(vars) %>%
map_df(function(var) {
var <- set_names(var, "level")
data %>%
group_by_at(var) %>%
summarize_at("Sepal.Length", "mean") %>%
mutate_at("level", as.character)
}, .id = "variable")
#> # A tibble: 126 x 3
#> variable level Sepal.Length
#> <chr> <chr> <dbl>
#> 1 Sepal.Length 4.3 4.3
#> 2 Sepal.Length 4.4 4.4
#> 3 Sepal.Length 4.5 4.5
#> 4 Sepal.Length 4.6 4.6
#> 5 Sepal.Length 4.7 4.7
#> 6 Sepal.Length 4.8 4.8
#> 7 Sepal.Length 4.9 4.9
#> 8 Sepal.Length 5 5.0
#> 9 Sepal.Length 5.1 5.1
#> 10 Sepal.Length 5.2 5.2
#> # ... with 116 more rows
You could also wrap the process in a function, and allow multiple variables to summarise with multiple functions. You'd have to spend a moment to come up with an evocative name though (I cheated and just used foo here).
foo <- function(data, vars, funs) {
grps <- names(data)
set_names(grps) %>%
map_df(function(grp) {
grp <- set_names(grp, "level")
data %>%
group_by_at(grp) %>%
summarize_at(vars, funs) %>%
mutate_at("level", as.character)
}, .id = "variable")
}
foo(iris, vars(Sepal.Length, Sepal.Width), funs(mean, sd))
#> # A tibble: 126 x 6
#> variable level Sepal.Length_mean Sepal.Width_mean Sepal.Length_sd
#> <chr> <chr> <dbl> <dbl> <dbl>
#> 1 Sepal.Length 4.3 4.3 3.000000 NaN
#> 2 Sepal.Length 4.4 4.4 3.033333 0
#> 3 Sepal.Length 4.5 4.5 2.300000 NaN
#> 4 Sepal.Length 4.6 4.6 3.325000 0
#> 5 Sepal.Length 4.7 4.7 3.200000 0
#> 6 Sepal.Length 4.8 4.8 3.180000 0
#> 7 Sepal.Length 4.9 4.9 2.950000 0
#> 8 Sepal.Length 5 5.0 3.120000 0
#> 9 Sepal.Length 5.1 5.1 3.477778 0
#> 10 Sepal.Length 5.2 5.2 3.425000 0
#> # ... with 116 more rows, and 1 more variables: Sepal.Width_sd <dbl>
Related
Edited for Clarity
I frequently do stratified analyses. However, to avoid spending Type I error on hypotheses tests
that aren't of interest, I would like to remove certain values before using p.adjust().
library(purrr)
library(dplyr, warn.conflicts = FALSE)
library(broom)
library(tidyr)
mtcars_fit <- mtcars %>%
group_by(cyl) %>% # you can use "cyl" too, very flexible
nest() %>%
mutate(
model = map(data, ~ lm(mpg ~ wt, data = .)),
coeff = map(model, tidy, conf.int = FALSE)
) %>%
unnest(coeff) %>%
select(-statistic)
mtcars_fit
#> # A tibble: 6 × 7
#> # Groups: cyl [3]
#> cyl data model term estimate std.error p.value
#> <dbl> <list> <list> <chr> <dbl> <dbl> <dbl>
#> 1 6 <tibble [7 × 10]> <lm> (Intercept) 28.4 4.18 0.00105
#> 2 6 <tibble [7 × 10]> <lm> wt -2.78 1.33 0.0918
#> 3 4 <tibble [11 × 10]> <lm> (Intercept) 39.6 4.35 0.00000777
#> 4 4 <tibble [11 × 10]> <lm> wt -5.65 1.85 0.0137
#> 5 8 <tibble [14 × 10]> <lm> (Intercept) 23.9 3.01 0.00000405
#> 6 8 <tibble [14 × 10]> <lm> wt -2.19 0.739 0.0118
#If I want to adjust the p-values for multiple comparisons for the weight only and
#save the Type I error as I don't want to test the intercept, I would do something like this
mtcars_adjusted <- mtcars_fit %>%
mutate(
p.value2 = if_else(term != "(Intercept)", p.value, NA_real_),
p.value_adj = if_else(term != "(Intercept)", p.adjust(p.value2, method = "fdr"), NA_real_),
.after = "p.value"
) %>%
select(-p.value2)
mtcars_adjusted
#> # A tibble: 6 × 8
#> # Groups: cyl [3]
#> cyl data model term estimate std.error p.value p.val…¹
#> <dbl> <list> <list> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 6 <tibble [7 × 10]> <lm> (Intercept) 28.4 4.18 1.05e-3 NA
#> 2 6 <tibble [7 × 10]> <lm> wt -2.78 1.33 9.18e-2 0.0918
#> 3 4 <tibble [11 × 10]> <lm> (Intercept) 39.6 4.35 7.77e-6 NA
#> 4 4 <tibble [11 × 10]> <lm> wt -5.65 1.85 1.37e-2 0.0137
#> 5 8 <tibble [14 × 10]> <lm> (Intercept) 23.9 3.01 4.05e-6 NA
#> 6 8 <tibble [14 × 10]> <lm> wt -2.19 0.739 1.18e-2 0.0118
#> # … with abbreviated variable name ¹p.value_adj
As this discussion on StackOverflow indicates that dplyr and p.adjust() often don't work well together, I applied the function outside the pipe as suggested.
#To check I will filter the dataset and make sure p adjusted values are the same
p.adj <- mtcars_fit %>%
filter(term != "(Intercept)") %>%
mutate(p.value_adj = NA_real_)
p.adj$p.value_adj = p.adjust(p.adj$p.value, method = "fdr")
p.adj
#> # A tibble: 3 × 8
#> # Groups: cyl [3]
#> cyl data model term estimate std.error p.value p.value_adj
#> <dbl> <list> <list> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 6 <tibble [7 × 10]> <lm> wt -2.78 1.33 0.0918 0.0918
#> 2 4 <tibble [11 × 10]> <lm> wt -5.65 1.85 0.0137 0.0206
#> 3 8 <tibble [14 × 10]> <lm> wt -2.19 0.739 0.0118 0.0206
Created on 2022-08-18 by the reprex package (v2.0.1)
The result is that the adjusted p-values are different, so I am unsure what is correct. The fact that I adjusted the P-values in two different ways -- with objects mtcars_adjusted and p.value_adj -- and got different adjusted P-values is concerning. The adjusted P-values for each object:
mtcars_adjusted: 0.0918, 0.0137, 0.0118
p.adj: 0.0918, 0.0206, 0.0206.
The resulting dataset is that I want to keep the intercept estimates without adjusting them in the p-value. The resulting dataset would look something like mtcars_adjusted, but I want to make sure the p-values are adjusted accurately. How would I go about doing this?
Implementing your adjustment within the pipe chain
You don't need to adjust your p-values outside of mutate() in your example. Below, I show the identical result can be produced within the piping chain.
# Adjust p-values for "wt" parameter estimates using your approach
p.adj <- mtcars_fit %>%
filter(term != "(Intercept)") %>%
mutate(p.value_adj = NA_real_)
p.adj$p.value_adj = p.adjust(p.adj$p.value, method = "fdr")
# Alternative approach
p.adj_alt <- mtcars_fit %>%
ungroup() %>%
filter(term != "(Intercept)") %>%
mutate(p.value_adj = p.adjust(p.adj$p.value, method = "fdr"))
# Show they are identical once ungrouped (which you should do once you are
# done with all by-group operations)
identical(ungroup(p.adj), p.adj_alt)
#> [1] TRUE
Whether you are accomplishing what you intended with your "outside of the pipe" approach is a different question than what you asked in your post, but I encourage you to make sure it is.
Adding the intercepts
Once you have your adjusted estimates, you can add in the intercept rows by filter()ing them from the original object and passing them with your adjusted data to bind_rows(). You can also combine the two p-values columns into a single column if you'd like using coalesce().
# Get intercepts, bind into a single data.frame, and create a coalesced
# column that combined the (un)adjusted p-values
mtcars_fit %>%
filter(term == "(Intercept)") %>%
bind_rows(p.adj) %>%
ungroup() %>%
mutate(p.value_combined = coalesce(p.value, p.value_adj))
#> # A tibble: 6 × 9
#> cyl data model term estim…¹ std.e…² p.value p.val…³ p.val…⁴
#> <dbl> <list> <list> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 6 <tibble [7 × 10]> <lm> (Inte… 28.4 4.18 1.05e-3 NA 1.05e-3
#> 2 4 <tibble [11 × 10]> <lm> (Inte… 39.6 4.35 7.77e-6 NA 7.77e-6
#> 3 8 <tibble [14 × 10]> <lm> (Inte… 23.9 3.01 4.05e-6 NA 4.05e-6
#> 4 6 <tibble [7 × 10]> <lm> wt -2.78 1.33 9.18e-2 0.0918 9.18e-2
#> 5 4 <tibble [11 × 10]> <lm> wt -5.65 1.85 1.37e-2 0.0206 1.37e-2
#> 6 8 <tibble [14 × 10]> <lm> wt -2.19 0.739 1.18e-2 0.0206 1.18e-2
#> # … with abbreviated variable names ¹estimate, ²std.error, ³p.value_adj,
#> # ⁴p.value_combined
I have a dataframe of 4 columns: Dataset, X, Y, Group.
The task is to fit a linear model to each of the five groups (The group column contains 5 groups: a, b, c, d, e) in the dataframe and then compare the slope with the dataframe test_2. For the test_2 I have already fitted a model, as there was no group separation like in the test_1. For the test_1 we have been suggested to use the function nest_by to compute a group-wise linear models
I have tried to fit a model with the function nest_by
Input:
model <- test_1 %>%
nest_by(Group) %>%
mutate(model = list(lm(y ~ x, data = test_1)))
model
Output:
A tibble: 5 x 3
# Rowwise: Group
Group data model
<fct> <list<tibble[,3]>> <list>
1 a [58 x 3] <lm>
2 b [35 x 3] <lm>
3 c [47 x 3] <lm>
4 d [44 x 3] <lm>
5 e [38 x 3] <lm>
I do not know now how to proceed. I thought that I could ungroup them and do a summary(), but would be similar to just fit a model separately with the function filter() and create 5 separated models.
Yes, you can proceed further using tidy from broom package which is better option than summary and then doing unnest.
For example, for mtcars, for each cyl group, we can do the following,
library(tidyr)
library(dplyr)
library(purrr)
library(broom)
mtcars_model <- mtcars %>%
nest(data = -cyl) %>%
mutate(
model = map(data, ~ lm(mpg ~ wt, data = .))
)
# now simply for each cyl, tidy the model output and unnest it
mtcars_model %>%
mutate(
tidy_summary = map(model, tidy)
) %>%
unnest(tidy_summary)
#> # A tibble: 6 × 8
#> cyl data model term estimate std.error statistic p.value
#> <dbl> <list> <list> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 6 <tibble [7 × 10]> <lm> (Interce… 28.4 4.18 6.79 1.05e-3
#> 2 6 <tibble [7 × 10]> <lm> wt -2.78 1.33 -2.08 9.18e-2
#> 3 4 <tibble [11 × 10]> <lm> (Interce… 39.6 4.35 9.10 7.77e-6
#> 4 4 <tibble [11 × 10]> <lm> wt -5.65 1.85 -3.05 1.37e-2
#> 5 8 <tibble [14 × 10]> <lm> (Interce… 23.9 3.01 7.94 4.05e-6
#> 6 8 <tibble [14 × 10]> <lm> wt -2.19 0.739 -2.97 1.18e-2
Created on 2022-07-09 by the reprex package (v2.0.1)
For additional Information with examples, check here
You can comfortably create nested models with map() resulting in list columns:
df <- mtcars %>%
nest(data = -c(cyl)) %>%
mutate(aov = map(data, ~ aov(mpg ~ hp, data = .x))) %>%
mutate(dunned = map(data, ~ rstatix::dunn_test(mpg ~ hp, data = .x)))
df
# A tibble: 3 × 4
cyl data aov dunned
<dbl> <list> <list> <list>
1 6 <tibble [7 × 10]> <aov> <rstatix_test [6 × 9]>
2 4 <tibble [11 × 10]> <aov> <rstatix_test [45 × 9]>
3 8 <tibble [14 × 10]> <aov> <rstatix_test [36 × 9]>
However, desolving these list columns by unnest() works only sometimes, e.g. here:
df %>% unnest(dunned)
# A tibble: 87 × 12
cyl data aov .y. group1 group2 n1 n2 statistic p
<dbl> <list> <lis> <chr> <chr> <chr> <int> <int> <dbl> <dbl>
1 6 <tibble… <aov> mpg 105 110 1 3 1.62 0.106
2 6 <tibble… <aov> mpg 105 123 1 2 0 1
3 6 <tibble… <aov> mpg 105 175 1 1 0.661 0.509
However, it doesn't work in other cases:
df %>% unnest(aov) # Error: Input must be a vector, not a <aov/lm> object.
df %>% unnest_wider(aov) # Error: Input must be list of vectors
df %>% unnest_legacy(aov) # Error: Each column must either be a list of vectors or a list of data frames [lm]
Before tidier 1.0, the unnest() would work on the above objects, but now it doesn't (see unnest_legacy(aov)). Why?
My guess is it depends on the output format (e.g. if it is a dataframe, denoted by [6 × 9]), so these cases are no problem.
Question:
How can you desolve list columns (created by common models such as aov and lm) for which none of the unnest/unnest_wider/unnest_legacy options work?
Expected Result:
I'm looking for a solution which is more general than a "trick" I know for the aov case:
df <- mtcars %>%
nest(data = -c(cyl)) %>%
mutate(aov = map(data, ~ aov(mpg ~ hp, data = .x))) %>%
mutate(tidied = map(aov, ~ broom::tidy(.x)))
which unfolds easily:
df %>% unnest(tidied)
# A tibble: 6 × 9
cyl data aov term df sumsq meansq statistic
<dbl> <list> <list> <chr> <dbl> <dbl> <dbl> <dbl>
1 6 <tibble… <aov> hp 1 0.205 0.205 0.0821
2 6 <tibble… <aov> Resi… 5 12.5 2.49 NA
3 4 <tibble… <aov> hp 1 55.7 55.7 3.40
4 4 <tibble… <aov> Resi… 9 148. 16.4 NA
I want to create a data frame where I summarize values like number of observations, mean and median, and I want to nest its ggplot histograms. For this, I will use the iris dataset.
This is my first attempt:
iris %>%
pivot_longer(-Species,
names_to = "Vars",
values_to = "Values") %>%
group_by(Vars) %>%
summarise(obs = n(),
mean = round(mean(Values),2),
median = round(median(Values),2))
So it gives me:
# A tibble: 4 x 4
Vars obs mean median
<chr> <int> <dbl> <dbl>
1 Petal.Length 150 3.76 4.35
2 Petal.Width 150 1.2 1.3
3 Sepal.Length 150 5.84 5.8
4 Sepal.Width 150 3.06 3
This is the expected table:
# A tibble: 4 x 5
Vars obs mean median plot
<chr> <int> <dbl> <dbl> <list>
1 Petal.Length 150 3.76 4.35 <gg>
2 Petal.Width 150 1.2 1.3 <gg>
3 Sepal.Length 150 5.84 5.8 <gg>
4 Sepal.Width 150 3.06 3 <gg>
This is what I have tried:
iris %>%
pivot_longer(-Species,
names_to = "Vars",
values_to = "Values") %>%
group_by(Vars) %>%
nest() %>%
mutate(metrics = lapply(data, function(df) df %>% summarise(obs = n(), mean = mean(Values), median = median(Values))),
plots = lapply(data, function(df) df %>% ggplot(aes(Values)) + geom_histogram()))
Almost there, I see this:
# A tibble: 4 x 4
# Groups: Vars [4]
Vars data metrics plots
<chr> <list> <list> <list>
1 Sepal.Length <tibble [150 × 2]> <tibble [1 × 3]> <gg>
2 Sepal.Width <tibble [150 × 2]> <tibble [1 × 3]> <gg>
3 Petal.Length <tibble [150 × 2]> <tibble [1 × 3]> <gg>
4 Petal.Width <tibble [150 × 2]> <tibble [1 × 3]> <gg>
But I don't know how to see the expected tibble with the obs, mean, median and plots columns without the data and metrics columns. Any help will be greatly appreciated.
We may use cur_data() in summarise and get the output in a list by wrapping
library(dplyr)
library(ggplot2)
library(tidyr)
out <- iris %>%
pivot_longer(-Species,
names_to = "Vars",
values_to = "Values") %>%
group_by(Vars) %>%
summarise(obs = n(),
mean = round(mean(Values),2),
median = round(median(Values),2),
plots = list(ggplot(cur_data(), aes(Values)) + geom_histogram()))
-output
out
# A tibble: 4 × 5
Vars obs mean median plots
<chr> <int> <dbl> <dbl> <list>
1 Petal.Length 150 3.76 4.35 <gg>
2 Petal.Width 150 1.2 1.3 <gg>
3 Sepal.Length 150 5.84 5.8 <gg>
4 Sepal.Width 150 3.06 3 <gg>
nested_numeric <- model_table %>%
group_by(ano_fiscal) %>%
select(-c("ano_estudo", "payout", "div_ratio","ebitda", "name.company",
"alavancagem","div_pl", "div_liq", "div_total")) %>%
nest()
nested_numeric
# A tibble: 7 x 2
# Groups: ano_fiscal [7]
ano_fiscal data
<dbl> <list>
1 2012 <tibble [34 x 10]>
2 2013 <tibble [35 x 10]>
3 2014 <tibble [35 x 10]>
4 2015 <tibble [35 x 10]>
5 2016 <tibble [35 x 10]>
6 2017 <tibble [35 x 10]>
7 2018 <tibble [35 x 10]>
df_ipca$idx
[1] 0.9652515 0.9741318 0.9817300 0.9911546 0.9941281 0.9985022 1.0000000
The list-column named "data" consists of numeric variables. I want to multiply them for a deflator index. (a.k.a. adjusting for inflation)
this works fine
map2_df(nested_numeric$data, df_ipca$idx, ~ .x * .y)
or even
map2(nested_numeric$data, df_ipca$idx, ~ .x * .y)
but I'm trying to create a new list-column named "adjusted_data" with the result of this operation:
nested_numeric <- model_table %>%
group_by(ano_fiscal) %>%
select(-c("ano_estudo", "payout", "div_ratio","ebitda", "name.company",
"alavancagem","div_pl", "div_liq", "div_total")) %>%
nest() %>%
mutate( adjusted_data = data %>% {
map2(., df_ipca$idx, ~ .x * .y)})
Gives me this error:
Error: Column `adjusted_data` must be length 1 (the group size), not 7
I hope my problem is clear enough because I'm trying to adjust for inflation a data frame with values nested by years.
I thought that going for map2 within a mutate would be enough... I've tried everything and couldn't figure it what I'm doing wrong.
I've read similar questions with pipes within map2 here, but still...
Please help :)
Thank you!
A simple solution (which however does break up your pipes) is to just do
nested_numeric$adjusted_data <- map2(nested_numeric$data, df_ipca$idx, ~ .x * .y)
For example, using the iris data:
library(tidyverse)
df_ipca <- data.frame(idx = runif(3))
iris <- iris %>%
group_by(Species) %>%
nest()
iris$adjusted_data <- map2(iris$data, df_ipca$idx, ~.x * .y)
iris
#> # A tibble: 3 x 3
#> # Groups: Species [3]
#> Species data adjusted_data
#> <fct> <list> <list>
#> 1 setosa <tibble [50 × 4]> <df[,4] [50 × 4]>
#> 2 versicolor <tibble [50 × 4]> <df[,4] [50 × 4]>
#> 3 virginica <tibble [50 × 4]> <df[,4] [50 × 4]>
Using solution with mutate
If you want to do the map2 inside mutate, after you have grouped and nested your data, you first have to ungroup() before calling mutate (I think otherwise mutate will try to do the operation within each group instead of looping over the entire data column, which is what you want):
nested_numeric %>%
ungroup() %>%
mutate(
adjusted_data = map2(data, df_ipca$idx, ~ .x * .y)
)
For example, using the iris data:
library(tidyverse)
df_ipca <- data.frame(idx = runif(3))
iris_nested <- iris %>%
group_by(Species) %>%
nest() %>%
ungroup() %>%
mutate(
adjusted_data = map2(data, df_ipca$idx, ~ .x * .y)
)
# Original data
map(iris_nested$data, head)
#> [[1]]
#> # A tibble: 6 x 4
#> Sepal.Length Sepal.Width Petal.Length Petal.Width
#> <dbl> <dbl> <dbl> <dbl>
#> 1 5.1 3.5 1.4 0.2
#> 2 4.9 3 1.4 0.2
#> 3 4.7 3.2 1.3 0.2
#> 4 4.6 3.1 1.5 0.2
#> 5 5 3.6 1.4 0.2
#> 6 5.4 3.9 1.7 0.4
#>
#> [[2]]
#> # A tibble: 6 x 4
#> Sepal.Length Sepal.Width Petal.Length Petal.Width
#> <dbl> <dbl> <dbl> <dbl>
#> 1 7 3.2 4.7 1.4
#> 2 6.4 3.2 4.5 1.5
#> 3 6.9 3.1 4.9 1.5
#> 4 5.5 2.3 4 1.3
#> 5 6.5 2.8 4.6 1.5
#> 6 5.7 2.8 4.5 1.3
#>
#> [[3]]
#> # A tibble: 6 x 4
#> Sepal.Length Sepal.Width Petal.Length Petal.Width
#> <dbl> <dbl> <dbl> <dbl>
#> 1 6.3 3.3 6 2.5
#> 2 5.8 2.7 5.1 1.9
#> 3 7.1 3 5.9 2.1
#> 4 6.3 2.9 5.6 1.8
#> 5 6.5 3 5.8 2.2
#> 6 7.6 3 6.6 2.1
# Adjusted data
map(iris_nested$adjusted_data, head)
#> [[1]]
#> Sepal.Length Sepal.Width Petal.Length Petal.Width
#> 1 1.0206142 0.7004215 0.2801686 0.04002409
#> 2 0.9805901 0.6003613 0.2801686 0.04002409
#> 3 0.9405660 0.6403854 0.2601566 0.04002409
#> 4 0.9205540 0.6203733 0.3001807 0.04002409
#> 5 1.0006022 0.7204336 0.2801686 0.04002409
#> 6 1.0806503 0.7804697 0.3402047 0.08004817
#>
#> [[2]]
#> Sepal.Length Sepal.Width Petal.Length Petal.Width
#> 1 0.3256959 0.1488896 0.2186816 0.06513919
#> 2 0.2977791 0.1488896 0.2093760 0.06979199
#> 3 0.3210431 0.1442368 0.2279872 0.06979199
#> 4 0.2559039 0.1070144 0.1861120 0.06048639
#> 5 0.3024319 0.1302784 0.2140288 0.06979199
#> 6 0.2652095 0.1302784 0.2093760 0.06048639
#>
#> [[3]]
#> Sepal.Length Sepal.Width Petal.Length Petal.Width
#> 1 2.399749 1.257011 2.285475 0.9522814
#> 2 2.209293 1.028464 1.942654 0.7237339
#> 3 2.704479 1.142738 2.247384 0.7999164
#> 4 2.399749 1.104646 2.133110 0.6856426
#> 5 2.475932 1.142738 2.209293 0.8380076
#> 6 2.894935 1.142738 2.514023 0.7999164
In fact, you can also omit the group_by() and ungroup() calls by providing the non-nested column (in your case, ano_fiscal) to nest():
iris %>%
nest(data = -Species) %>%
mutate(
adjusted_data = map2(data, df_ipca$idx, ~ .x * .y)
)
which should give the same result as before. Note to avoid having a warning, you should name the -Species argument inside nest().