Adding summary row to dplyr output - r

I found a few solutions on here but none seem to work to add a summary row to dplyr output.
#mock up data
df <- data.frame("Market" = sample(c("East", "North", "West"), 100, replace = TRUE, prob = c(0.33, 0.33, 0.34)),
"var1" = sample(c("Y", "N"), 100, replace = TRUE, prob = c(0.4, 0.6)),
"var2" = sample(c("Y", "N"), 100, replace = TRUE, prob = c(0.7, 0.3)),
"var3" = sample(c("Y", "N"), 100, replace = TRUE, prob = c(0.5, 0.5)))
Here is the code:
df_report <- df %>%
group_by(Market) %>%
filter(Market == "East" | Market == "West") %>%
summarise(n = n(),
var1_y = sum(var1 == "Y"),
var1_n = sum(var1 == "N")) %>%
mutate(total = var1_y + var1_n,
var1_y_pct = (var1_y/total),
var1_n_pct = (var1_n/total),
pct_total = total/sum(total))
Here is the output:
# A tibble: 2 x 8
Market n var1_y var1_n total var1_y_pct var1_n_pct pct_total
<fct> <int> <int> <int> <int> <dbl> <dbl> <dbl>
1 East 29 13 16 29 0.448 0.552 0.453
2 West 35 16 19 35 0.457 0.543 0.547
Here are the two solutions I tried:
Option 1
df_report %>%
add_row(Market = "Total", n = sum(n), var1_y = sum(var1_y), var1_n = sum(var1_n),
total = sum(total), var1_y_pct = sum(var1_y_pct), var1_n_pct = sum(varn_y_pct), pct_total = sum(pct_total))
Option 2
df_report %>%
rbind(c("Total", sum(n), sum(var1_y), sum(var1_n), sum(total), sum(var1_y_pct), sum(varn_y_pct), sum(pct_total)))
Both give me the same error: Error in sum(n) : invalid 'type' (closure) of argument
I'm unable to determine why these solutions, while working for others and seeming very reasonable, are not working for me.

You should try
df_report %>% janitor::adorn_totals("row")
Which produces
Market n var1_y var1_n total var1_y_pct var1_n_pct pct_total
East 30 11 19 30 0.3666667 0.6333333 0.4285714
West 40 19 21 40 0.4750000 0.5250000 0.5714286
Total 70 30 40 70 0.8416667 1.1583333 1.0000000
The long way of doing this is going for summarise (watch out, you have a typo in var1_n_pct). Then bind the rows.
row_to_add <- df_report %>%
summarise(Market = "Total",
n = sum(n),
var1_y = sum(var1_y),
var1_n = sum(var1_n),
total = sum(total),
var1_y_pct = sum(var1_y_pct),
var1_n_pct = sum(var1_n_pct),
pct_total = sum(pct_total))
df_report %>% bind_rows(row_to_add)

Related

calculate summary for more than one column [duplicate]

This question already has answers here:
Summarizing multiple columns with dplyr? [duplicate]
(5 answers)
Closed 1 year ago.
I am trying to calculate median mean for group of columns but its calculating only for one column. what i am doing wrong here ...??
df <- data.frame(Name = c("ABC", "DCA", "GOL",NA, "MNA",NA, "VAN"),
Goal =c("published", "pending", "not designed",NA, "pending", "pending", "not designed"),
Target_1 = c(3734, 2639, 2604, NA, 2793, 2688, 2403),
Target_2 = c(3322, 2016, 2310, NA, 3236, 3898, 2309),
Target_3 = c(3785, 2585, 3750, NA, 2781, 3589, 2830))
df_summary <- df %>% select(contains("Target")) %>% summarise(
q25 = round(quantile(., type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[2],digits = 0),
Median = round(quantile(., type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[3],digits = 0),
Mean = round( mean(., na.rm=TRUE),digits = 0),
q75 = round(quantile(., type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[4],digits = 0),
N = sum(!is.na(.)))
Use across to apply a function to multiple columns.
library(dplyr)
library(tidyr)
df %>%
summarise(across(contains("Target"), list(
q25 = ~round(quantile(., type=6, probs = 0.25, na.rm=TRUE),digits = 0),
Median = ~round(quantile(., type=6, probs = 0.5, na.rm=TRUE),digits = 0),
Mean = ~round( mean(., na.rm=TRUE),digits = 0),
q75 = ~round(quantile(., type=6, probs = 0.75, na.rm=TRUE),digits = 0),
N = ~sum(!is.na(.)))))
# Target_1_q25 Target_1_Median Target_1_Mean Target_1_q75 Target_1_N Target_2_q25
#1 2554 2664 2810 3028 6 2236
# Target_2_Median Target_2_Mean Target_2_q75 Target_2_N Target_3_q25 Target_3_Median
#1 2773 2848 3466 6 2732 3210
# Target_3_Mean Target_3_q75 Target_3_N
#1 3220 3759 6
Or maybe long format is a better way to display the values.
df %>%
pivot_longer(cols = contains("Target")) %>%
group_by(name) %>%
summarise( q25 = round(quantile(value, type=6, probs = 0.25, na.rm=TRUE),digits = 0),
Median = round(quantile(value, type=6, probs = 0.5, na.rm=TRUE),digits = 0),
Mean = round( mean(value, na.rm=TRUE),digits = 0),
q75 = round(quantile(value, type=6, probs = 0.75, na.rm=TRUE),digits = 0),
N = sum(!is.na(value)))
# name q25 Median Mean q75 N
# <chr> <dbl> <dbl> <dbl> <dbl> <int>
#1 Target_1 2554 2664 2810 3028 6
#2 Target_2 2236 2773 2848 3466 6
#3 Target_3 2732 3210 3220 3759 6
Using map:
df %>%
select(contains('Target'))%>%
map_dfr(~c(quantile(.x, type=6, probs = c(.25, .5,.75), na.rm = TRUE),
mean = mean(.x, na.rm = TRUE),
N = length(na.omit(.x))), .id = 'grp')
grp `25%` `50%` `75%` mean N
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Target_1 2554. 2664. 3028. 2810. 6
2 Target_2 2236. 2773 3466 2848. 6
3 Target_3 2732 3210. 3759. 3220 6
Whatever you are doing seems like a summary:
df %>%
select(contains('Target'))%>%
summary()
Another way could be:
df %>%
summarise(across(contains('Target'),
~list(quantile(.x, type=6, probs = c(.25, .5,.75), na.rm = TRUE),
mean(.x, na.rm = TRUE),
length(na.omit(.x))))
)%>%
unnest(everything())
A tibble: 5 x 3
Target_1 Target_2 Target_3
<dbl> <dbl> <dbl>
1 2554. 2236. 2732
2 2664. 2773 3210.
3 3028. 3466 3759.
4 2810. 2848. 3220
5 6 6 6
If you were to include pivoting:
df %>%
pivot_longer(contains('Target')) %>%
group_by(name) %>%
summarise(a = list(quantile(value, type=6, probs = c(.25, .5,.75), na.rm = TRUE)),
mean = mean(value, na.rm = TRUE), N = length(na.omit(value)))%>%
unnest_wider(a)
# A tibble: 3 x 6
name `25%` `50%` `75%` mean N
<chr> <dbl> <dbl> <dbl> <dbl> <int>
1 Target_1 2554. 2664. 3028. 2810. 6
2 Target_2 2236. 2773 3466 2848. 6
3 Target_3 2732 3210. 3759. 3220 6

Error in Weighted.Mean. Why can't it find column?

I have a dataframe that looks like this
df <- data.frame(Region = c("Asia","Asia","Africa","Europe","Europe"),
Emp = c(120,40,10,67,110),
Sales18 = c(12310, 4510, 1140, 5310, 16435),
Sales19 = c(15670, 6730, 1605, 6120, 1755))
I am running a code where I group by region and then take average and weighted average for all 'sales' columns by 'Emp'
Result <- df %>% group_by(Region) %>%
summarise(sales18 = mean(Sales18, na.rm = T),
sales19 = mean(Sales19, na.rm = T),
weightedsales18 = weighted.mean(Sales18, .data[[Emp]], na.rm = T),
weightedsales19 = weighted.mean(Sales19, .data[[Emp]], na.rm = T))
However, I get the following err
Error in splice(dot_call(capture_dots, frame_env = frame_env, named = named, :
object 'Emp' not found
Can't figure out what I am doing wrong
An option could be:
library(tidyverse)
df <- data.frame(Region = c("Asia","Asia","Africa","Europe","Europe"),
Emp = c(120,40,10,67,110),
Sales18 = c(12310, 4510, 1140, 5310, 16435),
Sales19 = c(15670, 6730, 1605, 6120, 1755))
df %>%
group_by(Region) %>%
summarise(across(
.cols = starts_with("Sales"),
.fns = list(w_mean = ~ weighted.mean(.x, w = Emp), mean = ~ mean(.x)),
.names = "{.col}_{.fn}")
)
#> # A tibble: 3 x 5
#> Region Sales18_w_mean Sales18_mean Sales19_w_mean Sales19_mean
#> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 Africa 1140 1140 1605 1605
#> 2 Asia 10360 8410 13435 11200
#> 3 Europe 12224. 10872. 3407. 3938.
Created on 2021-05-25 by the reprex package (v2.0.0)
This works. The data masking already takes place, you don't need the .data pronoun.
library(tidyverse)
df <- data.frame(Region = c("Asia","Asia","Africa","Europe","Europe"),
Emp = c(120,40,10,67,110),
Sales18 = c(12310, 4510, 1140, 5310, 16435),
Sales19 = c(15670, 6730, 1605, 6120, 1755))
Result <- df %>% group_by(Region) %>%
summarise(sales18 = mean(Sales18, na.rm = T),
sales19 = mean(Sales19, na.rm = T),
weightedsales18 = weighted.mean(Sales18, Emp, na.rm = T),
weightedsales19 = weighted.mean(Sales19, Emp, na.rm = T))
Result
#> # A tibble: 3 x 5
#> Region sales18 sales19 weightedsales18 weightedsales19
#> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 Africa 1140 1605 1140 1605
#> 2 Asia 8410 11200 10360 13435
#> 3 Europe 10872. 3938. 12224. 3407.
Created on 2021-05-25 by the reprex package (v2.0.0)
Unquoted Emp inside [[ tells R to search for string variable called Emp that presumably contains name of other variable that contains weights, like here:
df <- data.frame(Region = c("Asia","Asia","Africa","Europe","Europe"),
x = c(120,40,10,67,110),
Sales18 = c(12310, 4510, 1140, 5310, 16435),
Sales19 = c(15670, 6730, 1605, 6120, 1755))
Emp <- 'x'
df %>% group_by(Region) %>%
summarise(sales18 = mean(Sales18, na.rm = T),
sales19 = mean(Sales19, na.rm = T),
weightedsales18 = weighted.mean(Sales18, .data[[Emp]], na.rm = T),
weightedsales19 = weighted.mean(Sales19, .data[[Emp]], na.rm = T))
# A tibble: 3 x 5
Region sales18 sales19 weightedsales18 weightedsales19
<chr> <dbl> <dbl> <dbl> <dbl>
1 Africa 1140 1605 1140 1605
2 Asia 8410 11200 10360 13435
3 Europe 10872. 3938. 12224. 3407.
Since, you do not have this kind of Emp, R throws an error.
What to do? Just quote Emp inside [[:
df <- data.frame(Region = c("Asia","Asia","Africa","Europe","Europe"),
Emp = c(120,40,10,67,110),
Sales18 = c(12310, 4510, 1140, 5310, 16435),
Sales19 = c(15670, 6730, 1605, 6120, 1755))
df %>% group_by(Region) %>%
summarise(sales18 = mean(Sales18, na.rm = T),
sales19 = mean(Sales19, na.rm = T),
weightedsales18 = weighted.mean(Sales18, .data[['Emp']], na.rm = T),
weightedsales19 = weighted.mean(Sales19, .data[['Emp']], na.rm = T))
# A tibble: 3 x 5
Region sales18 sales19 weightedsales18 weightedsales19
<chr> <dbl> <dbl> <dbl> <dbl>
1 Africa 1140 1605 1140 1605
2 Asia 8410 11200 10360 13435
3 Europe 10872. 3938. 12224. 3407.

Is there any function in R that can find : average sales contribution of total sales?

Assume a company that we have info about Total sales and the amount of sales in three counties CA , TX and WI.
How can i calculate : the average sales contribution of the three states of total company sales
I need furthermore to find : the same average percentages for each year, month of the year and day of the week.
EDITED !!!
structure(list(CA = c(11047, 9925, 11322, 12251, 16610, 14696
), TX = c(7381, 5912, 9006, 6226, 9440, 9376), WI = c(6984, 3309,
8883, 9533, 11882, 8664), Total = c(25412, 19146, 29211, 28010,
37932, 32736), date = structure(c(1296518400, 1296604800, 1296691200,
1296777600, 1296864000, 1296950400), tzone = "UTC", class = c("POSIXct",
"POSIXt")), event_type = c("NA", "NA", "NA", "NA", "NA", "Sporting"
), snap_CA = c(1, 1, 1, 1, 1, 1), snap_TX = c(1, 0, 1, 0, 1,
1), snap_WI = c(0, 1, 1, 0, 1, 1)), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"))
if I understood your problem correctly a possible solution would be this:
library(dplyr)
library(lubridate)
df1 <- df %>%
dplyr::mutate(YEAR = lubridate::year(date),
MONTH = lubridate::month(date),
WEEKDAY = lubridate::wday(date),
P_CA = CA / Total,
P_TX = TX / Total,
P_WI = WI / Total)
# Average per Year
df1 %>%
dplyr::group_by(YEAR) %>%
dplyr::summarise(AV_CA = mean(P_CA, na.rm = TRUE),
AV_TX = mean(P_TX, na.rm = TRUE),
AV_WI = mean(P_WI, na.rm = TRUE))
YEAR AV_CA AV_TX AV_WI
<dbl> <dbl> <dbl> <dbl>
1 2011 0.444 0.278 0.278
# Average per Month
df1 %>%
dplyr::group_by(MONTH) %>%
dplyr::summarise(AV_CA = mean(P_CA, na.rm = TRUE),
AV_TX = mean(P_TX, na.rm = TRUE),
AV_WI = mean(P_WI, na.rm = TRUE))
MONTH AV_CA AV_TX AV_WI
<dbl> <dbl> <dbl> <dbl>
1 2 0.444 0.278 0.278
# Average per Weekday
df1 %>%
dplyr::group_by(WEEKDAY) %>%
dplyr::summarise(AV_CA = mean(P_CA, na.rm = TRUE),
AV_TX = mean(P_TX, na.rm = TRUE),
AV_WI = mean(P_WI, na.rm = TRUE))
WEEKDAY AV_CA AV_TX AV_WI
<dbl> <dbl> <dbl> <dbl>
1 1 0.449 0.286 0.265
2 3 0.435 0.290 0.275
3 4 0.518 0.309 0.173
4 5 0.388 0.308 0.304
5 6 0.437 0.222 0.340
6 7 0.438 0.249 0.313
For this dummy data all will up to 100% but when using a larger dataset this might not be true

Calculation of "average sales share " with dplyr::mutate

My data concerns a company and includes Total Sales and the amount of sales in three counties CA , TX and WI.
Data :
> dput(head(WalData))
structure(list(CA = c(11047, 9925, 11322, 12251, 16610, 14696
), TX = c(7381, 5912, 9006, 6226, 9440, 9376), WI = c(6984, 3309,
8883, 9533, 11882, 8664), Total = c(25412, 19146, 29211, 28010,
37932, 32736), date = structure(c(1296518400, 1296604800, 1296691200,
1296777600, 1296864000, 1296950400), tzone = "UTC", class = c("POSIXct",
"POSIXt")), event_type = c("NA", "NA", "NA", "NA", "NA", "Sporting"
), snap_CA = c(1, 1, 1, 1, 1, 1), snap_TX = c(1, 0, 1, 0, 1,
1), snap_WI = c(0, 1, 1, 0, 1, 1)), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"))
With the following code i am trying to calculate the average sales share of the three states on the company's total sales.
In addition, i need the same average percentages for each year, month of the year and day of the week.
install.packages("dplyr")
install.packages("lubridate")
library(dplyr)
library(lubridate)
df1 <- df %>%
dplyr::mutate(YEAR = lubridate::year(date),
MONTH = lubridate::month(date),
WEEKDAY = lubridate::wday(date),
P_CA = CA / Total,
P_TX = TX / Total,
P_WI = WI / Total)
# Average per Year
df1 %>%
dplyr::group_by(YEAR) %>%
dplyr::summarise(AV_CA = mean(P_CA, na.rm = TRUE),
AV_TX = mean(P_TX, na.rm = TRUE),
AV_WI = mean(P_WI, na.rm = TRUE))
# Average per Month
df1 %>%
dplyr::group_by(MONTH) %>%
dplyr::summarise(AV_CA = mean(P_CA, na.rm = TRUE),
AV_TX = mean(P_TX, na.rm = TRUE),
AV_WI = mean(P_WI, na.rm = TRUE))
# Average per Weekday
df1 %>%
dplyr::group_by(WEEKDAY) %>%
dplyr::summarise(AV_CA = mean(P_CA, na.rm = TRUE),
AV_TX = mean(P_TX, na.rm = TRUE),
AV_WI = mean(P_WI, na.rm = TRUE))
Output :
> df1 <- df %>%
+ dplyr::mutate(YEAR = lubridate::year(date),
+ MONTH = lubridate::month(date),
+ WEEKDAY = lubridate::wday(date),
+ P_CA = CA / Total,
+ P_TX = TX / Total,
+ P_WI = WI / Total)
Error in UseMethod("mutate_") :
no applicable method for 'mutate_' applied to an object of class "function"
> # Average per Year
> df1 %>%
+ dplyr::group_by(YEAR) %>%
+ dplyr::summarise(AV_CA = mean(P_CA, na.rm = TRUE),
+ AV_TX = mean(P_TX, na.rm = TRUE),
+ AV_WI = mean(P_WI, na.rm = TRUE))
Error in eval(lhs, parent, parent) : object 'df1' not found
It comes with an error : Error in UseMethod("mutate_") :
no applicable method for 'mutate_' applied to an object of class "function"
I cant figure out whats wrong , i double checked the code and the correctness of the data .
Please give a solution .
The issue would be that df is not created as an object in the global env and there is a function with name df if we do ?df
df(x, df1, df2, ncp, log = FALSE)
Basically, the error is based on applying mutate on a function df rather than an object
Checking on a fresh R session with no objects created
df %>%
dplyr::mutate(YEAR = lubridate::year(date),
MONTH = lubridate::month(date),
WEEKDAY = lubridate::wday(date),
P_CA = CA / Total,
P_TX = TX / Total,
P_WI = WI / Total)
Error in UseMethod("mutate_") :
no applicable method for 'mutate_' applied to an object of class "function"
Now, we define 'df' as
df <- WalData
df %>%
dplyr::mutate(YEAR = lubridate::year(date),
MONTH = lubridate::month(date),
WEEKDAY = lubridate::wday(date),
P_CA = CA / Total,
P_TX = TX / Total,
P_WI = WI / Total)
# A tibble: 6 x 15
# CA TX WI Total date event_type snap_CA snap_TX snap_WI YEAR MONTH WEEKDAY P_CA P_TX P_WI
# <dbl> <dbl> <dbl> <dbl> <dttm> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 11047 7381 6984 25412 2011-02-01 00:00:00 NA 1 1 0 2011 2 3 0.435 0.290 0.275
#2 9925 5912 3309 19146 2011-02-02 00:00:00 NA 1 0 1 2011 2 4 0.518 0.309 0.173
#3 11322 9006 8883 29211 2011-02-03 00:00:00 NA 1 1 1 2011 2 5 0.388 0.308 0.304
#4 12251 6226 9533 28010 2011-02-04 00:00:00 NA 1 0 0 2011 2 6 0.437 0.222 0.340
#5 16610 9440 11882 37932 2011-02-05 00:00:00 NA 1 1 1 2011 2 7 0.438 0.249 0.313
#6 14696 9376 8664 32736 2011-02-06 00:00:00 Sporting 1 1 1 2011 2 1 0.449 0.286 0.265

R tidyeval passing a list containing multiple character vectors to dplyr functions

In this problem I want to map a function over multiple data inputs to create output showing frequencies of item responses.
First I create two data sets using the psych package:
suppressMessages(library(here))
suppressMessages(library(tidyverse))
suppressMessages(library(psych))
set.seed(123)
data_input_sim <-
as_tibble(sim.poly.ideal(nvar = 50, n = 1000, cat = 4, )[["items"]]) %>%
mutate_all(
~ case_when(
.x == 0 ~ "never",
.x == 1 ~ "occasionally",
.x == 2 ~ "frequently",
.x == 3 ~ "always"
)
) %>%
rename_all( ~ str_c("i", str_pad(
as.character(1:50), 2, side = "left", pad = "0"
))) %>%
mutate(
ID = 100001:101000,
age = sample(c(5:12), 1000, replace = TRUE),
age_range = case_when(
age <=8 ~ "5 to 8 yo",
T ~ "9 to 12 yo"
),
gender = sample(
c("female", "male"),
1000,
replace = TRUE,
prob = c(0.53, 0.47)
),
educ = sample(
c("no_HS", "HS_grad", "some_college", "BA_plus"),
1000,
replace = TRUE,
prob = c(0.119, 0.263, 0.306, 0.311)
),
ethnic = sample(
c("hispanic", "asian", "black", "white", "other"),
1000,
replace = TRUE,
prob = c(0.239, 0.048, 0.136, 0.521, .056)
),
region = sample(
c("northeast", "south", "midwest", "west"),
1000,
replace = TRUE,
prob = c(0.166, 0.383, 0.212, 0.238)
),
clin_status = sample(
c("typ", "clin"),
1000,
replace = TRUE,
prob = c(0.8, 0.2)
)
) %>%
select(ID:clin_status, i01:i50)
data_input_bfi <- bfi %>%
drop_na() %>%
sample_n(1000) %>%
mutate(
ID = 200001:201000,
age_range = case_when(
age <= 18 ~ "18 yo or younger",
between(age, 19, 24) ~ "19 to 24 yo",
between(age, 25, 39) ~ "25 to 39 yo",
T ~ "40 yo or older"
),
gender = case_when(gender == 1 ~ "male",
gender == 2 ~ "female"),
educ = case_when(
education == 1 ~ "no_HS",
education == 2 ~ "HS_grad",
education == 3 ~ "some_college",
T ~ "BA_plus"
),
ethnic = sample(
c("hispanic", "asian", "black", "white", "other"),
1000,
replace = TRUE,
prob = c(0.239, 0.048, 0.136, 0.521, .056)
),
region = sample(
c("northeast", "south", "midwest", "west"),
1000,
replace = TRUE,
prob = c(0.166, 0.383, 0.212, 0.238)
),
clin_status = sample(
c("typ", "clin"),
1000,
replace = TRUE,
prob = c(0.8, 0.2)
)
) %>%
mutate_at(
vars(A1:O5),
~
case_when(
.x == 1 ~ "very_inaccurate",
.x == 2 ~ "moderately_inaccurate",
.x == 3 ~ "slightly_inaccurate",
.x == 4 ~ "slightly_accurate",
.x == 5 ~ "moderately_accurate",
.x == 6 ~ "very_accurate",
)
) %>%
select(ID, age:clin_status, A1:O5)
Then I extract and sequence elements unique to each data set: the suffix of its name, the names of its item columns, and the names of its item categories:
data_name_suffix <- c("sim", "bfi")
sim_item_cols <- str_c("i", str_pad(as.character(1:50), 2, side = "left", pad = "0"))
bfi_item_cols <- cross(list(c("A", "C", "E", "N", "O"), seq(1:5))) %>%
map_chr(str_c, collapse = "") %>%
sort()
sim_item_cats <- c("never", "occasionally","frequently", "always")
bfi_item_cats <- c("very_inaccurate", "moderately_inaccurate", "slightly_inaccurate",
"slightly_accurate", "moderately_accurate", "very_accurate")
data_name_suffix is a two-element character vector; I then create two-element lists (using quos()) to hold the item column and category names:
item_cols <- quos(sim_item_cols, bfi_item_cols)
item_cats <- quos(sim_item_cats, bfi_item_cats)
Now I attempt to map the output-creating function over the three inputs, using purrr::pmap():
pmap_df(
list(data_name_suffix,
item_cols,
item_cats),
~
eval(as.name(str_c("data_input_", data_name_suffix))) %>%
select(!!!item_cols) %>%
gather(var, value) %>%
group_by(var, value) %>%
count(var, value) %>%
ungroup() %>%
spread(value, n) %>%
arrange(match(var, !!!item_cols)) %>%
select(var, !!!item_cats) %>%
assign(str_c("freq_item_val_", data_name_suffix), ., envir = .GlobalEnv)
)
And it returns this error:
Error: Unknown columns `A1`, `A2`, `A3`, `A4`, `A5` and ...
Which suggests to me that R is seeing the list item_cols as a single long character vector, rather than two separate character vectors to iterate over.
And here we reach the limit of my understanding of and experience with tidyeval techniques. I suspect that I'm doing something wrong with quos() and !!!.
Thanks in advance for any help, and I hope whoever reads this is safe and healthy during this surreal time.
Here, we could use mget to get the values of the objects
library(stringr)
library(purrr)
library(dplyr)
library(tidyr)
list(mget(str_c('data_input_', data_name_suffix)),
item_cols,
item_cats) %>%
pmap(~ ..1 %>%
select(!!! ..2) %>%
pivot_longer(everything(), names_to = 'var', values_to = 'value') %>%
count(var, value) %>%
pivot_wider(names_from = value, values_from = n) %>%
arrange(match(var, !!!..2)) %>%
select(var, !!! ..3) )
#$data_input_sim
# A tibble: 50 x 5
# var never occasionally frequently always
# <chr> <int> <int> <int> <int>
# 1 i01 465 366 141 28
# 2 i02 489 336 147 28
# 3 i03 457 367 146 30
# 4 i04 433 385 162 20
# 5 i05 418 362 171 49
# 6 i06 420 369 169 42
# 7 i07 405 367 182 46
# 8 i08 361 401 194 44
# 9 i09 346 391 211 52
#10 i10 334 425 203 38
# … with 40 more rows
#$data_input_bfi
# A tibble: 25 x 7
# var very_inaccurate moderately_inaccurate slightly_inaccurate slightly_accurate moderately_accurate very_accurate
# <chr> <int> <int> <int> <int> #<int> <int>
# 1 A1 334 278 151 130 75 32
# 2 A2 18 49 48 197 365 323
# 3 A3 32 51 72 210 353 282
# 4 A4 48 69 60 159 243 421
# 5 A5 26 66 89 207 340 272
# 6 C1 17 48 82 213 383 257
# 7 C2 26 85 98 212 361 218
# 8 C3 35 80 102 272 322 189
# 9 C4 296 270 166 163 83 22
#10 C5 197 212 118 207 167 99
# … with 15 more rows
NOTE: assigning to create multiple objects is not recommended. Instead keep the output in a list and make changes in each of the list elements (if needed) by looping over it with map

Resources