How do I subset a column of the current group in dplyr? - r

I am having difficulty parameterizing some code I wrote. It works fine when not in a function like this:
new_df = group_by(groupby1, groupby2) %>%
mutate(new_value=
slider_helper(
slide(cur_data()[, c('string1', 'string2'], ~.x, .before = Inf, .after = -1),
cur_data()$string2,
'string1',
beta
But when I try to create a function where you can pass strings for the variables to group by and the variables to slide over like so:
my_fun <- function(df, groupby1, groupby2, string1, string2, beta) {
return(df %>%
group_by({{groupby1}}, {{groupby2}}) %>%
mutate(new_value=
slider_helper(
slide(cur_data()[, c({{string1}}, {{string2}}], ~.x, .before = Inf, .after = -1),
cur_data()[[{{string2}}]],
{{string1}},
beta)))
}
I get this vague stack trace:
The error occurred in group 1: "groupby1" = "groupby1", "groupby2" = "groupby2".
Caused by error in `.subset()`:
! invalid subscript type 'closure'
What is the proper way to parameterize dplyr functions to work with column names passed as strings?
EDIT:
Here is a reproducible example
slider_helper <- function(left, right, string1, beta) {
cbind_helper <- function(left, right) {
todaysDate = rep(right, nrow(left))
return(cbind(left, todaysDate))
}
date_helper <- function(today, date) {
return(1/as.integer(today - date))
}
df = data.frame(t(mapply(cbind_helper, left, right)))
df$val1= mapply(date_helper, df[,'todaysDate'], df[, date])
df$val1_product= mapply('%*%', df$val1, df[[target]]) / sapply(df$val1, FUN=sum, na.rm=T)
df$val2= 1/seq(nrow(df), 1)
df$val2_product= sapply(mapply('*', df$val2, df[[target]]), FUN=sum, na.rm=T) / sum(df$val2, na.rm=T)
w_sum = beta * df$val2_product+ (1-beta) * df$val2_product
return(w_sum)
}
my_fun <- function(df, groupby1, groupby2, string1, string2, beta) {
return(df %>%
group_by({{groupby1}}, {{groupby2}}) %>%
mutate(new_value=
slider_helper(
slide(cur_data()[, c({{string1}}, {{string2}})], ~.x, .before = Inf, .after = -1),
cur_data()[[{{string2}}]],
{{string1}},
beta)))
}
df= data.frame(sample(1:2, 20, replace=T), sample(1:2, 20, replace=T), seq(from=-1, to=.9, by = .1), seq.Date(from=as.Date('2011-01-01'), to=as.Date('2011-01-20'), by = 1))
colnames(df) = c('groupby1', 'groupby2', 'string1', 'string2')
my_fun(testy, 'groupby1', 'groupby2', 'string1', 'string2', 0.5)

Without sample data and more context this is kind of a guess.
But, if you are using a simple function then you need to remove the {{}}. The function does not need those brackets.
my_fun <- function(df, groupby1, groupby2, string1, string2, beta) {
return(df %>%
group_by(groupby1, groupby2) %>%
mutate(new_value=
slider_helper(
slide(cur_data()[, c(string1, string2)], ~.x, .before = Inf, .after = -1),
cur_data()[[string2]],
string1,
beta)))
}

Related

argument "" is missing, with no default when using map_dfc in R

I am trying to modify Monte Carlo code from Reproduceable Finance with R to handle withdrawals and inflation of these withdrawals.
to do so I have changed a simulation accumulate function to subtract a withdrawl amount that has been increased do to inflation prior to calling the function.
simulation_accum_withdrawls <- function(init_value, N, mean, stdev, wAmt) {
tibble(c(init_value, 1 + rnorm(N, mean, stdev),wAmt)) %>%
`colnames<-`("returns") %>%
mutate(growth =
accumulate(returns, function(x, y, wAmt) x * y - wAmt)) %>%
select(growth)
}
the function is run from the following code
monte_carlo_sim <-
map_dfc(starts, simulation_accum_withdrawls,
N = sim_duration_months,
mean = geo_mean_port_return,
stdev = geo_stddev_port_return,
wAmt = inflated_withdrawls)
which throws off the following error.
Error in map():
ℹ In index: 1.
Caused by error in mutate():
! Problem while computing growth = accumulate(returns, function(x, y, wAmt) x * y - wAmt).
Caused by error in fn():
! argument "wAmt" is missing, with no default
Backtrace:
purrr::map_dfc(...)
purrr::accumulate(returns, function(x, y, wAmt) x * y - wAmt)
purrr:::reduce_impl(...)
fn(out, elt, ...)
Error in map(.x, .f, ...) :
The inflated_withdrawls data frame does exist and is not empty i.e. see below
str(inflated_withdrawls)
'data.frame': 38 obs. of 1 variable:
$ wAmt: num 1805391 0 0 0 0 ...
Any advice or insight on how to debug this would be appreciated.
Thanks
Here is an executable example per the request.
library(tidyverse)
library(highcharter)
library(tidyquant)
library(timetk)
library(broom)
library(highcharter)
library(purrr)
library(knitr)
library(readxl)
accum_inflation <- function(init_value, inflation=0.0) {
tibble(c(init_value, 1 + inflation)) %>%
`colnames<-`("acc_inf") %>%
mutate(acc_inf =
accumulate(acc_inf, function(x, y) x * y ))
}
accum_wd <- function(wAmt, acc_inf=0.0) {
growth = wAmt * acc_inf
}
simulation_accum_1 <- function(init_value, N, mean, stdev,inf_wd) {
tibble(c(init_value, 1 + rnorm(N, mean, stdev), inf_wd)) %>%
`colnames<-`("returns") %>%
mutate(growth =
accumulate(returns, function(x, y) x * y - inf_wd)) %>%
select(growth)
}
simulation_accum_withdrawls <- function(init_value, N, mean, stdev, wAmt=0) {
tibble(c(init_value, 1 + rnorm(N, mean, stdev),wAmt)) %>%
`colnames<-`("returns") %>%
mutate(growth =
accumulate(returns, function(x, y, wAmt) x * y + wAmt)) %>%
select(growth)
}
test_df <- data.frame(wAmt =c(100000,-180000,-180000,-180000),
date=seq.Date(as.Date("2024-01-01"), by = "year", length.out = 4)
)
test_df <- test_df %>% pad_by_time(date, .by = "month", .pad_value = 0)
inf_df <- rnorm(nrow(test_df),0.03/12,0.042/12)
inflated_returns <- accum_inflation(1, inf_df)
edatep1 <- ymd(edate)+months(1)
test_df<- rows_append(test_df,tibble(wAmt = 0, date = edatep1))
inflated_withdrawls <- accum_wd(test_df[1], inflated_returns)
nav=1000000
sim_duration_months = 40
total_sims = 10
geo_mean_port_return = 0.00443402454379282
geo_stddev_port_return = 0.0237813751473552
sims <- total_sims
starts <-
rep(nav, sims) %>%
set_names(paste("sim", 1:sims, sep = ""))
monte_carlo_sim <-
map_dfc(starts, simulation_accum_withdrawls,
N = sim_duration_months,
mean = geo_mean_port_return,
stdev = geo_stddev_port_return,
wAmt = inflated_withdrawls)

Applying text_transform() to row groups in R gt

In the {gt} package I want to use text_transform() on the row group titles in order to render the HTML but I'm getting the `no applicable method for 'resolve_location' error.
In my example below, you can see that text_transform() works if the locations argument is cells_body() (which is not what I actually want) but not if it's cells_row_groups() which is what I want.
Thoughts?
Zev
# As an experiment, I put HTML in both a value and in the groups, though
# in the real data there is only HTML in groups.
tbl <- tibble(values = c("test<sup>2</sup>", 2:4), groups = c("x<sup>2</sup>", "x<sup>2</sup>", "y", "y"))
unescape_html <- function(str){
xml2::xml_text(xml2::read_html(paste0("<x>", str, "</x>")))
}
# Error, no applicable method for resolve_location
tbl |>
gt::gt(groupname_col = "b") |>
gt::text_transform(
locations = gt::cells_row_groups(),
fn = function(x){
x <- purrr::map_chr(x, unescape_html)
paste("<span style=color:red;>", x, "</span>")
}
)
# This works so it shows that I'm close :)
tbl |>
gt::gt(groupname_col = "b") |>
gt::text_transform(
locations = gt::cells_body(columns = 1),
fn = function(x){
x <- purrr::map_chr(x, unescape_html)
paste("<span style=color:red;>", x, "</span>")
}
)
After some trial and error and a look at the rendered html code I figured out a solution using gt::html:
tbl <- tibble::tibble(values = c("test<sup>2</sup>", 2:4), groups = c("x<sup>2</sup>", "x<sup>2</sup>", "y", "y"))
unescape_html <- function(str){
xml2::xml_text(xml2::read_html(paste0("<x>", str, "</x>")))
}
tbl |>
gt::gt(groupname_col = "groups") |>
gt::text_transform(
locations = gt::cells_row_groups(),
fn = function(x) {
purrr::map(x, ~ gt::html(paste("<span style=color:blue;>", .x, "</span>")))
}
) |>
gt::text_transform(
locations = gt::cells_body(columns = 1),
fn = function(x){
x <- purrr::map_chr(x, unescape_html)
paste("<span style=color:red;>", x, "</span>")
}
)

Using group_by in function

I have a function written to calculate the confidence interval of a ratio of averages between two vectors using jackknife standard errors
jackknife_CI = function(x, y, alpha = .05) {
xl = (sum(x,na.rm=T) - x) / (length(x) - 1)
yl = (sum(y,na.rm=T) - y) / (length(y) - 1)
n = length(x) + length(y)
jack_se = (sd(c(xl / mean(y,na.rm=T), mean(x,na.rm=T) / yl),na.rm=T) * (n - 1)) / sqrt(n)
mean(x, na.rm = T) / mean(y, na.rm = T) + jack_se * qnorm(c(alpha/2,1-alpha/2))
}
I want to then use it with the ToothGrowth dataset in the following way:
df1 =
ToothGrowth %>%
filter(supp == "OJ") %>%
rename(len_x = len) %>%
select(dose,len_x)
df2 =
ToothGrowth %>%
filter(supp == "VC") %>%
rename(len_y = len) %>%
select(dose, len_y)
df = cbind(df1,df2)
df = df[,-3]
jack_CI = df %>% group_by(dose) %>% jackknife_CI(x = len_x, y = len_y)
My problem is that the last line results in the error:
Error in jackknife_CI(., x = len_x, y = len_y) : object 'len_x' not found
How do I get around this?
The last line need to be:
jack_CI = jackknife_CI(x = df$len_x, y = df$len_y)
The way you are running it is being interpreted as follows:
jack_CI = jackknife_CI(group_by(df, dose), x = len_x, y = len_y)
Which is causing a couple issues:
jackknife_CI is not expecting the first argument to be the dataframe. (because of pipe operator)
len_x and len_y are not recognized outside of the dataframe.
If you want to run the function on each group you can do:
df %>% group_by(dose) %>%
do({
ci <- jackknife_CI(.$len_x, .$len_y)
tibble(low = ci[1], hi = ci[2])
})
I use do because the function returns two values. Otherwise you would be able to just use summarize. Each group is being passed to do which is then returning a tibble (note the last line in do) which are then being stacked to return the result. I am referring to each group inside of do with .$variable_name where the dot references the value being passed (in this case the dataframe for each group)

Function do ({}) in R does not allow me to name "ode" to be able to add the variables inside ggplot mapings

I am implementing the SIR model in R, and I need to vary beta and gamma for it.
library(deSolve)
par(mar = rep(2, 4))
N = 1000
vi <- c(S = N-1,I = 1,R = 0)
SIR <- function(t, vi, pm) {
with(as.list(c(vi, pm)), {
ds <- -beta* S* (I/N)
di <- beta* S* (I/N) - gamma * I
dr <- gamma * I
return(list(c(ds, di, dr)))
})
}
t <- seq(0, 50, by = 1)
betavals <- c(1,5,8)
ipvals <- c(2,20,50)
gammavals <- 1/ipvals
However, when wanting to apply the function for my different Beta and gamma values, the do ({}) function does not allow me to name my function "ode" and thus be able to print in ggplot (aes (x = t, y = value ) both I, S and R.
library(tidyverse)
expand.grid(beta=betavals,gamma=gammavals)%>%
group_by(beta,gamma) %>%
do(
{
ode(func=SIR,y=vi,times=t,
parms=c(beta=.$beta,gamma=.$gamma)) %>%
as.data.frame() -> out
}
) out %>%
gather(variable,value,-time)%>%
ggplot(aes(x=time,y=value,color=variable))+ #value is I,S,R
geom_line()+
facet_grid(beta~gamma,scales='free_y',labeller=label_both)+
theme_bw()
When doing so I get this error
<Error: unexpected symbol in:
" }
) out">
You do not have access to the out variable outside the do function. We can continue using the same chain operation to get data in the long format. gather has been retired, so I replace it with pivot_longer.
library(tidyverse)
library(deSolve)
expand.grid(beta=betavals,gamma=gammavals)%>%
group_by(beta,gamma) %>%
do(
{
ode(func=SIR,y=vi,times=t,
parms=c(beta=.$beta,gamma=.$gamma)) %>%
as.data.frame()
}
) %>%
ungroup %>%
pivot_longer(cols = S:R) %>%
mutate(name = factor(name, c('S', 'I', 'R'))) %>%
ggplot(aes(x=time,y=value,color=name))+
geom_line() +
facet_grid(beta~gamma,scales='free_y',labeller=label_both)+
theme_bw()

What is the function of !! in front of an argument inside a function?

I am reviewing code that was given to me as part of a homework. I am familiar with functions in R, but this person is using !! before the 'y' argument inside the function, which I had never seen before. I am wondering what is the function of !!.
I have tried google and the help function in R studio.
my_table <- function(df = NULL, y = NULL, grp = NULL,
grpname = " ", dgts = 1, tbltitle = " ") {
y <- enquo(y)
grp <- enquo(grp)
mysummary <- df %>% group_by(!!grp) %>% summarize(
n = sum(!is.na(!!y)),
mean = format(round(mean(!!y, na.rm=T), dgts), nsmall=dgts),
sd = format(round(sd(!!y, na.rm=T), dgts), nsmall=dgts),
se = format(round(sd(!!y, na.rm=T)/sqrt(sum(!is.na(!!y))), dgts),
nsmall=dgts),
kable(mysummary)
}

Resources