Macro function in R

Macro function in R - r

I'm new to R coming from SAS. I am trying to repeat this code for a set of datasets called mort_1969, mort_1970,..., mort_n. How can I modify it so I don't have to rerun it each time, replacing the year variable?
mrt <- read_csv("mort1985.csv")
mrt <- mrt %>%
rename(nchs_code = countyrs)
crosswalk <- read_csv("crosswalk.csv")
crosswalk <- crosswalk %>%
select(-X1)
mrt_cw <- left_join(mrt, crosswalk, by = "nchs_code")
mrt_cw1 <- mrt_cw %>%
filter(cityrs == 999) %>%
select(fips, racer3) %>%
mutate(count = 1) %>%
group_by(fips, racer3) %>%
summarise(mrt_c = sum(count)) %>%
mutate(racer3=replace(racer3, racer3==1, "white_pop"),
racer3=replace(racer3, racer3==2, "black_pop"),
racer3=replace(racer3, racer3==3, "other_race_pop"))
mrt_cw2 <- pivot_wider(
data=mrt_cw1,
names_from = racer3,
values_from = mrt_c
)
#Convert NAs to 0 and drop original variables
mrt_cw3 <- mrt_cw2 %>%
mutate(white_m = ifelse(is.na(white_pop), 0, white_pop),
black_m = ifelse(is.na(black_pop), 0, black_pop),
other_race_m = ifelse(is.na(other_race_pop), 0, other_race_pop)) %>%
select(-white_pop, -black_pop, -other_race_pop) %>%
mutate(total_m = white_m + black_m + other_race_m)
pop <- read_csv('population_file.csv')
##########################ADD YEAR
pop <- pop %>%
filter(year == 1985) %>%
select(-X1, -year)
mrt_cw4 <- left_join(mrt_cw3, pop, by = "fips")
mrt_cw5 <- mrt_cw4 %>%
mutate(ttl_rate = (total_m/total_pop)*100,
blk_rate = (black_m/black_pop)*100,
wht_rate = (white_m/white_pop)*100,
otr_rate = (other_race_m/other_races_pop)*100)
mrt_cw6 <- mrt_cw5 %>%
mutate(high_flag = ifelse(ttl_rate >= 100 | blk_rate >= 100 |
wht_rate >= 100 | otr_rate >= 100, 1, 0))
##########################ADD YEAR
#Save file
write.csv(mrt_cw6, "mort_85.csv")

Related

Change row group labels in gt table (with superscript/subscript and line breaks). Customising row group labels in R

I have the following data and table:
library(gt)
library(dplyr)
a <- rnorm(21, mean = 112, sd =12)
colour <- rep(c("Blue", "Red", "Green"), 7)
data <- data.frame(colour, a) %>%
group_by(colour) %>%
summarise(mean = mean(a), sd = sd(a), n = n()) %>%
mutate(grp = html("[H<sub>2</sub>O]")) %>%
rename(cat = colour)
b <- rnorm(21, mean = 60, sd =12)
day <- rep(c("2", "4", "6"), 7)
data2 <- data.frame(day, b) %>%
group_by(day) %>%
summarise(mean = mean(a), sd = sd(a), n = n()) %>%
mutate(grp = html("[H<sub>2</sub>O] Additition <br> (Days)")) %>%
rename(cat = day)
bind_rows(data, data2) %>%
group_by(grp) %>%
gt(rowname_col = "cat")
bind_rows(data, data2) %>%
group_by(grp) %>%
gt() %>%
tab_options(row_group.as_column = TRUE)
The row group labels appear literally as '[H<sub>2<\sub>O]', rather than [H2O] etc. It is likely that I am using HTML wrong and it needs to be used with another package/function. I have also tried using cols_label but doesn't recognise these as columns in the dataframe.
Is there also a way to have the row groups column vertically centered, rather than at the top where is currently is? How do you bold these row groups?

The html function won't work outside of a gt table, so you'll have to create the row groups using tab_row_group and add the html labels there.
data <- data.frame(colour, a) %>%
group_by(colour) %>%
summarise(mean = mean(a), sd = sd(a), n = n()) %>%
mutate(grp = "color") %>%
rename(cat = colour)
data2 <- data.frame(day, b) %>%
group_by(day) %>%
summarise(mean = mean(a), sd = sd(a), n = n()) %>%
mutate(grp = "day") %>%
rename(cat = day)
bind_rows(data, data2) %>%
gt() %>%
tab_row_group(
label = html("[H<sub>2</sub>O]"),
rows = grp == "color"
) %>%
tab_row_group(
label = html("[H<sub>2</sub>O] Additition <br> (Days)"),
rows = grp == "day"
) %>%
cols_hide(grp)

An error keeps appearing that the sample doesn't work in R Studio cloud and I don't know why

sharp_null_thought_experiment <-
function() {
final_data %>%
mutate(
OUTCOME_Z_0 = rnorm(n(), sd = 0.5007117),
OUTCOME_Z_1 = OUTCOME_Z_0,
Z = sample(rep(c(0, 1), times = c(sum(final_data$treatment_group=="control"), sum(final_data$treatment_group=="treatment"))), size = n()),
OUTCOME = if_else(Z == 0, OUTCOME_Z_0, OUTCOME_Z_1)
) %>%
difference_in_means(OUTCOME ~ Z, data = .) %>%
tidy
}
sampling_distribution_sharp_null <- rerun(1000, sharp_null_thought_experiment()) %>%
bind_rows
sampling_distribution_sharp_null %>%
summarise(mean(estimate>=results$estimate))

Skip "zero" level of dichotomous variables in expss tables

I want to create a summary table for some dichotomous variables using the expss package. Since the variables are dichotomous, one of the two levels would the sufficient to "show the picture".
I tried to use the function tab_net_cell, but was not able to get the right results. Here is some example code with BrCa (Breast cancer) with 1 or 0. I only want to show the number of patients with but not without breast cancer.
df <- data.frame(BrCa = c(1,1,1,0,0,0,NA,NA,0,0))
df$group <- c(1,2,1,2,1,2,1,2,1,2)
df %>%
expss::tab_cols(group) %>%
expss::tab_cells(BrCa) %>%
expss::tab_stat_cpct(total_row_position = "none",label = "%") %>%
expss::tab_stat_cases(total_row_position = "none",label = "N") %>%
expss::tab_pivot(stat_position = "inside_rows")
df %>%
expss::tab_cols(group) %>%
expss::tab_cells(BrCa) %>%
expss::tab_net_cells("BrCa" = eq(1)) %>%
expss::tab_stat_cpct(total_row_position = "none",label = "%") %>%
expss::tab_stat_cases(total_row_position = "none",label = "N") %>%
expss::tab_pivot(stat_position = "inside_rows")

The simplest way is to filter resulted table:
df <- data.frame(BrCa = c(1,1,1,0,0,0,NA,NA,0,0))
df$group <- c(1,2,1,2,1,2,1,2,1,2)
df %>%
expss::tab_cols(group) %>%
expss::tab_cells(BrCa) %>%
expss::tab_stat_cpct(total_row_position = "none",label = "%") %>%
expss::tab_stat_cases(total_row_position = "none",label = "N") %>%
expss::tab_pivot(stat_position = "inside_rows") %>%
expss::where(grepl(1, row_labels))
Another way is to use mean and sum instead of cpct and cases:
df %>%
expss::tab_cols(group) %>%
expss::tab_cells(BrCa*100) %>%
expss::tab_stat_mean(label = "%") %>%
expss::tab_stat_sum(label = "N") %>%
expss::tab_pivot(stat_position = "inside_rows")

Pretty tables with cumulative count / percentage and group totals using R "tables" package

I am trying to produce a formatted html table which has columns for frequency, cumulative frequency, column percentage, and cumulative column percentage. The table should also have the data subsetted by a grouping variable, and including a group total.
I can almost achieve this using a combination of dplyr and tidyr, but the output is a dataframe which doesn't look so pretty. I wonder if there is an easier way using the tables::tabulate command?
# Sample data
dat <- data.frame(
id = 1:100,
group = factor(sample(c("A", "B"), 100, replace = TRUE)),
sessions = factor(sample(1:10, 100, replace = TRUE))
)
# dplyr/tidyr solution
library(dplyr)
library(tidyr)
dat %>%
group_by(group, sessions) %>%
tally() %>%
spread(key = group, value = n) %>%
mutate(All = rowSums(.[-1])) %>%
gather(key = group, value = n, -sessions) %>%
group_by(group) %>%
mutate(
cum_n = cumsum(n),
p = round(n / sum(n)*100,1),
cum_p = round(cum_n / sum(n)*100,1),
) %>%
data.frame() %>%
reshape(timevar = "group", idvar = "sessions", direction = "wide")
# As far as I get using tables::tabulate
library(tables)
tabular(
Factor(sessions, "Sessions") ~
(Heading()*group + 1) *
(
(n = 1) +
# (cum_n = ??) +
Heading("%")*Percent(denom = "col")*Format(digits = 2)
# + Heading("cum_%")*??*Format(digits = 2)
),
data = dat
)

I would recommend using knitr::kable and kableExtra, amazing packages for producing tables. You can also set it up for multiple format outputs, for example using the same code to produce html and latex for pdf.
library(dplyr)
library(tidyr)
library(knitr)
library(kableExtra)
dat %>%
group_by(group, sessions) %>%
tally() %>%
spread(key = group, value = n) %>%
mutate(All = rowSums(.[-1])) %>%
gather(key = group, value = n, -sessions) %>%
group_by(group) %>%
mutate(
cum_n = cumsum(n),
p = round(n / sum(n)*100,1),
cum_p = round(cum_n / sum(n)*100,1),
) %>%
data.frame() %>%
reshape(timevar = "group", idvar = "sessions", direction = "wide") %>%
kable("html") %>%
kable_styling(bootstrap_options = c("striped", "hover"))

R - dplyr bootstrap issue

I have an issue understanding how to use the dplyr bootstrap function properly.
What I want is to generate a bootstrap distribution from two randomly assigned groups and compute the difference in means, like this for example :
library(dplyr)
library(broom)
data(mtcars)
mtcars %>%
mutate(treat = sample(c(0, 1), 32, replace = T)) %>%
group_by(treat) %>%
summarise(m = mean(disp)) %>%
summarise(m = m[treat == 1] - m[treat == 0])
The issue is that I need to repeat this operation 100, 1000, or more times.
Using replicate, I can do
frep = function(mtcars) mtcars %>%
mutate(treat = sample(c(0, 1), 32, replace = T)) %>%
group_by(treat) %>%
summarise(m = mean(disp)) %>%
summarise(m = m[treat == 1] - m[treat == 0])
replicate(1000, frep(mtcars = mtcars), simplify = T) %>% unlist()
and get the distribution
I don't really get how to use bootstraphere. How should I start ?
mtcars %>%
bootstrap(10) %>%
mutate(treat = sample(c(0, 1), 32, replace = T))
mtcars %>%
bootstrap(10) %>%
do(tidy(treat = sample(c(0, 1), 32, replace = T)))
It's not really working. Where should I put the bootstrap pip ?
Thanks.

In the do step, we wrap with data.frame and create the 'treat' column, then we can group by 'replicate' and 'treat' to get the summarised output column
mtcars %>%
bootstrap(10) %>%
do(data.frame(., treat = sample(c(0,1), 32, replace=TRUE))) %>%
group_by(replicate, treat) %>%
summarise(m = mean(disp)) %>%
summarise(m = m[treat == 1] - m[treat == 0])
#or as 1 occurs second and 0 second, we can also use
#summarise(m = last(m) - first(m))

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

Macro function in R - r

Related

Change row group labels in gt table (with superscript/subscript and line breaks). Customising row group labels in R

An error keeps appearing that the sample doesn't work in R Studio cloud and I don't know why

Skip "zero" level of dichotomous variables in expss tables

Pretty tables with cumulative count / percentage and group totals using R "tables" package

R - dplyr bootstrap issue

Categories

Resources