using plotmath in ggrepel labels

using plotmath in ggrepel labels - r

I am trying to create a plot where I want to show all coefficients from my linear model and their respective statistical details attached at each point using ggrepel package. I have managed to create the basic plot, but what I haven't been able to figure out is how to use plotmath while creating labels. So, for example, in the plot produced below, I would like to use italics for the t-value (t) and p-value (p). Additionally, if I were to include estimates, I might also want to include the greek letter beta (β) in the label.
# loading needed libraries
library(ggrepel)
#> Loading required package: ggplot2
library(ggplot2)
library(GGally)
library(tidyverse)
# creating a dataframe containing results
(label_df <- broom::tidy(x = stats::lm(data = mtcars, wt ~ am*cyl), conf.int = TRUE) %>%
dplyr::filter(.data = ., term != "(Intercept)") %>%
dplyr::select(.data = ., term, estimate, conf.low, conf.high, statistic, p.value) %>%
purrrlyr::by_row(
.d = .,
..f = ~ paste(
"t = ",
round(.$statistic, digits = 3),
", p = ",
round(.$p.value, digits = 3),
sep = ""
),
.collate = "rows",
.to = "label",
.labels = TRUE
)
)
#> # tibble [3 x 7]
#> term estimate conf.low conf.high statistic p.value label
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
#> 1 am -0.956 -2.58 0.668 -1.21 0.238 t = -1.206, p = 0~
#> 2 cyl 0.304 0.135 0.473 3.68 0.000989 t = 3.678, p = 0.~
#> 3 am:cyl 0.0328 -0.234 0.300 0.252 0.803 t = 0.252, p = 0.~
# creating the model coefficient plot using ggcoef
plot <- GGally::ggcoef(x = stats::lm(data = mtcars, wt ~ am*cyl), exclude_intercept = TRUE)
# adding labels using ggrepel
plot +
ggrepel::geom_label_repel(
data = label_df,
mapping = ggplot2::aes(x = estimate, y = term, label = label),
size = 3,
box.padding = grid::unit(x = 0.75, units = "lines"),
fontface = "bold",
direction = "y",
color = "black",
label.size = 0.25,
segment.color = "black",
segment.size = 0.5,
segment.alpha = NULL,
min.segment.length = 0.5,
max.iter = 2000,
point.padding = 0.5,
force = 2,
na.rm = TRUE
)
If I use something like base::substitute or base::bquote to create the label inside purrrlyr, I get the following error:
.f must return either data frames or vectors for non-list collation
I can get rid of this error by converting it to character type but then the labels get all messed-up.
# creating a dataframe containing results
(label_df <- broom::tidy(x = stats::lm(data = mtcars, wt ~ am*cyl), conf.int = TRUE) %>%
dplyr::filter(.data = ., term != "(Intercept)") %>%
dplyr::select(.data = ., term, estimate, conf.low, conf.high, statistic, p.value) %>%
purrrlyr::by_row(
.d = .,
..f = ~ as.character(bquote(
"t = "~.(round(.$statistic, digits = 3))~
", p = "~
.(round(.$p.value, digits = 3))
)),
.collate = "rows",
.to = "label",
.labels = TRUE
)
)
#> # tibble [9 x 8]
#> term estimate conf.low conf.high statistic p.value .row label
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <chr>
#> 1 am -0.956 -2.58 0.668 -1.21 0.238 1 ~
#> 2 am -0.956 -2.58 0.668 -1.21 0.238 1 "\"t = \" ~~
#> 3 am -0.956 -2.58 0.668 -1.21 0.238 1 0.238
#> 4 cyl 0.304 0.135 0.473 3.68 0.000989 2 ~
#> 5 cyl 0.304 0.135 0.473 3.68 0.000989 2 "\"t = \" ~~
#> 6 cyl 0.304 0.135 0.473 3.68 0.000989 2 0.001
#> 7 am:cyl 0.0328 -0.234 0.300 0.252 0.803 3 ~
#> 8 am:cyl 0.0328 -0.234 0.300 0.252 0.803 3 "\"t = \" ~~
#> 9 am:cyl 0.0328 -0.234 0.300 0.252 0.803 3 0.803
Created on 2018-06-13 by the reprex package (v0.2.0).

Based on discussion in the comments, you need to use mathematical annotations correctly to avoid errors, see link.
The below label format works for me, and includes beta-estimates with the Greek symbol. list is needed to obtain commas in plotmath.
(label_df <- broom::tidy(x = stats::lm(data = mtcars, wt ~ am*cyl), conf.int = TRUE) %>%
dplyr::filter(.data = ., term != "(Intercept)") %>%
dplyr::select(.data = ., term, estimate, conf.low, conf.high, statistic, p.value) %>%
purrrlyr::by_row(
.d = .,
..f = ~ paste(
"list(italic(t)==",
round(.$statistic, digits = 3),
", ~italic(p)==",
round(.$p.value, digits = 3),
", ~beta==",
round(.$estimate, digits = 3),
")",
sep = ""
),
.collate = "rows",
.to = "label",
.labels = TRUE
)
)

Related

Extract categorical coeffients and all p-values from a mixed model into a data table

Here is a reproduceable code and sample data
I want to achieve a final data table with 3 columns: 1. exposure quantile 2. OR/RR 3. PV
set.seed(42)
n <- 100
dat = data.frame(ID = rep(c(1:25),times=4 ) ,
Score = rnorm(n, mean=0.3, sd=0.8))
dat = dat %>%
group_by(ID)%>%
dplyr::mutate(exposure1 = rep(c(rnorm(1, mean=6, sd=1.8))),
exposure2 = rep(c(rnorm(1, mean=3, sd=0.6))),
age = rep(c(rnorm(1, mean=40, sd=15))))%>%
ungroup()%>%
dplyr::mutate(exposure1_quantile = cut(exposure1, breaks = 4, labels = c("Q1","Q2","Q3","Q4")),
exposure2_quantile = cut(exposure2, breaks = 4, labels = c("Q1","Q2","Q3","Q4")))
exposures_var = c("exposure1_quantile","exposure2_quantile")
exposure_var_labels("exposure1 Q1","exposure1 Q2 ", "exposure 1 Q3",
"exposure2 Q1","exposure2 Q2 ", "exposure2 Q3")
age="age"
outcome = "Score"
exposure_data_table = c()
for(i in 1:length(exposures_var)){
exp = exposures_var[i]
fixed_effects_formula = paste0(outcome, "~",exp,"+",age)
fixed_effects_formula = as.formula(fixed_effects_formula)
mixedmodel = lme(fixed =fixed_effects_formula, random = ~1|ID, data=dat, method = "ML")
for(m in 2:4){
v = mixedmodel$coefficients$fixed[m]
vector = c(exp , v)
#P=p value for every quantile (HOW TO ADD?)
#exposure_name = exposure_var_labels[?] (HOW TO ADD LABEL)
exposure_data_table = rbind(exposure_data_table, vector)
}
}
exposure_data_table = as.data.table(exposure_data_table)
colnames(exposure_data_table)=c("Exposure","RR")#,"pv")
view(exposure_data_table)
I first used anova to try and get the pvalue but it didnt work.

I think a tidymodels approach using lme would work well here:
library(nlme)
library(tidymodels)
library(multilevelmod)
library(data.table)
lme_spec <-
linear_reg() %>%
set_engine("lme", random = ~ 1 | ID)
Map(function(exp) {
fixed_effects_formula <- as.formula(paste0("Score~",exp,"+ age +", 0))
lme_spec %>%
fit(fixed_effects_formula, data = dat) %>%
broom.mixed::tidy() %>%
filter(effect == "fixed", grepl("exposure", term)) %>%
select(term, estimate, std.error, p.value)
}, exposures_var) %>%
bind_rows() %>%
as.data.table()
#> term estimate std.error p.value
#> 1: exposure1_quantileQ1 -0.16147364 0.3532834 0.6525497
#> 2: exposure1_quantileQ2 0.22318505 0.2719366 0.4214784
#> 3: exposure1_quantileQ3 0.24976757 0.3484126 0.4817411
#> 4: exposure1_quantileQ4 0.14177064 0.4020702 0.7280757
#> 5: exposure2_quantileQ1 0.28976458 0.4191198 0.4972840
#> 6: exposure2_quantileQ2 0.19907863 0.2699164 0.4693496
#> 7: exposure2_quantileQ3 0.35040767 0.2827229 0.2295436
#> 8: exposure2_quantileQ4 -0.09587234 0.3533819 0.7889412
Created on 2022-08-07 by the reprex package (v2.0.1)

Trying to annotate a plot with regression results in R markdown

I am trying to add a footnote to the bottom of my plot with betas, standard errors, and p values directly from the model summary I saved. However, it keeps telling me there is an unexpected error in the parse text. Any help would be greatly appreciated!
exact error:
Error in parse(text = text[[i]]) : :1:26: unexpected input
1: 'Main effect of age: ' $
^
minimal reproducible example:
id<-rep(1:50)
tst<-c(sample(7:9,50, replace = T))
mydf<-data.frame(id,tst)
mydf$age<-sample(40:90,50, replace = T)
mydf$bmi<-sample(20:30,50, replace = T)
mydf$sex<-sample(1:2,50, replace = T)
##Overall model##
model <- lm( tst ~ age*sex + bmi , data = mydf)
summary(model)
model.df<-ggpredict(model, terms = c("age", "sex"))
model.plot<-plot(model.df)+theme(legend.position="none")+
theme(plot.title = element_text(hjust = 0.5))+
annotate("text", x = 0, y = 0.05, parse = TRUE, size = 4,
label = " 'Main effect of age: ' $\beta == %.2g ",
coef(model)[2])
(model.plot)

Seems like parsing your syntax for parsing is wrong. Also, your code would add the text to each facet - not sure if that was the intended outcome (if so, just use parse=FALSE and the paste0(...) expression from below for your annotation. If you wish a global footnote, you could a caption like so:
library(ggiraphExtra)
library(ggplot2)
set.seed(1234)
mydf <- data.frame(
id = 1:50,
tst = sample(7:9, 50, replace = T),
age = sample(40:90,50, replace = T),
bmi = sample(20:30,50, replace = T),
sex = sample(1:2,50, replace = T)
)
##Overall model##
model <- lm( tst ~ age*sex + bmi , data = mydf)
summary(model)
#>
#> Call:
#> lm(formula = tst ~ age * sex + bmi, data = mydf)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -1.24030 -0.67286 -0.07152 0.62489 1.27281
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 4.99309 1.66745 2.994 0.00446 **
#> age 0.01975 0.02389 0.827 0.41274
#> sex 1.21860 0.95986 1.270 0.21077
#> bmi 0.06602 0.03805 1.735 0.08955 .
#> age:sex -0.01852 0.01532 -1.209 0.23307
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 0.7507 on 45 degrees of freedom
#> Multiple R-squared: 0.125, Adjusted R-squared: 0.04721
#> F-statistic: 1.607 on 4 and 45 DF, p-value: 0.189
model.df <- ggPredict(model, terms = c("age", "sex"))
model.plot <- model.df +
theme(legend.position="none",
plot.title = element_text(hjust = 0.5)) +
labs(caption = paste0(
"Main effect of age: β = ",
sprintf("%.2g", coef(model)[2])))
model.plot
Created on 2022-06-30 by the reprex package (v2.0.1)

report t.test value efficiently in R

I have 2- different groups namely, am.0 and am.1 and I would like to create a data frame that shows mean(sd) of each group as well as diff(sd) with it t.test sign ***. Many thanks in advance.
mtcars$am <- factor(mtcars$am)
my.mtcars <- mtcars %>%
dplyr::group_by(am)%>%
dplyr::summarise(
mean = mean(mpg),
sd = sd(mpg)); my.mtcars
my.mtcars$sd <- paste0("(", round(my.mtcars$sd,2), ")") ; my.mtcars
Expected Answer is a data frame
am.0 am.1 diff (SD)
mpg 17.1 (3.83) 24.4 (6.17) 17.1-24.4 (SD)**
disp ... ... ...
gear ... ... ...
Where SD is the standard error of the difference between two independent samples,
SD = sqrt(s_1^2/n1 + s_2^2/n2)

This is my rather brute attempt:
my_signif = function(x, digits) floor(x) + signif(x %% 1, digits)
t.test.df <- function(x,y,df)
{
t = t.test(eval(parse(text=x))~eval(parse(text=y)), df)
p = t$p.value
sig = ifelse(p < 0.001,"***", ifelse(p < 0.01, "**", ifelse(p < 0.05, "*", "")))
est1 = my_signif(t$estimate[1],2)
est2 = my_signif(t$estimate[2],2)
sd1 = my_signif(sd(df[[x]][df[[y]] == levels(df[[y]])[1]]),2)
sd2 = my_signif(sd(df[[x]][df[[y]] == levels(df[[y]])[2]]),2)
out = data.frame(a = paste0(est1, " (",sd1,")"), b = paste0(est2, " (",sd2,")"), c = paste0(est1 - est2, " (",sig,")"),row.names = x)
colnames(out) = c(paste0(y,levels(df[[y]])[1]), paste0(y,levels(df[[y]])[2]), "diff")
out
}
t.test.df("mpg","am",mtcars)
output:
> t.test.df("mpg","am",mtcars)
am0 am1 diff
mpg 17.15 (3.83) 24.39 (6.17) -7.24 (**)
Further:
t.test.df2 <- function(cols,y,df) do.call(rbind,lapply(cols, function(x) t.test.df(x,y,df)))
output:
cols = c("mpg", "disp", "gear")
t.test.df2(cols,"am",mtcars)
am0 am1 diff
mpg 17.15 (3.83) 24.39 (6.17) -7.24 (**)
disp 290.38 (110.17) 143.53 (87.2) 146.85 (***)
gear 3.21 (0.42) 4.38 (0.51) -1.17 (***)

Selecting the degree of spline function based on a set R^2 threshold value

I am developing a shiny app in which I am plotting scatterplot and a spline fit function on it, the degree of spline function can be changed by a slider whose values varies from 2-12, shown below:
ui <- tabPanel(sidebarLayout(
sidebarPanel(sliderInput('degree', 'Degree of the Polynomial:', min = 2, max = 12, value = 3, step = 1)),
mainPanel(plotlyOutput("plot"))))
Below is the server side code:
server <- function(input, output, session){
observeEvent(input$degree, {
output$plot <- renderPlotly({
m <- lm(formula = y ~ splines::bs(x, df = input$degree), df4)
#plot
g <- ggplot(data = df4, aes_string(x = df4$x, y = df4$y)) + theme_bw() +
geom_point(colour = "blue", size = 0.1)+
geom_smooth(formula = y ~ splines::bs(x, df = input$degree), method = "lm", color = "green3", level = 1, size = 1)
h <- g + xlab("X (mm)") + ylab("Z (um)")
ggplotly(h) %>% add_annotations(text= sprintf("R^2: %f", summary(m)[8]), xref="paper", yref="paper", x=0.05,y=0.9)
})
})
}
df4 is the dataset that has been used to plot the scatterplot, which looks like this:
Now I want the value of the degree of spline fit to get selected automatically based on the R^2 value.
For example, if 0.8 is the set threshold for the R^2 value, then that degree of spline function should get automatically selected as the default value of the slider, where the value of R^2 crosses the threshold of 0.8 for the first time.
All in all, I want the default set value of the slider (which is set to 3 here) to be dynamic based on the set threshold value of R^2.

This should do it. You need to estimate the model outside of the rendered output so you can identify the correct degree. Then, you need to use renderUI() to build the slider so you can pass the identified value of degree to the value argument. Then, you can make the plot without being inside the event observer because it's already a reactive function and observing the degree input slider.
ui <- fluidPage(sidebarLayout(
sidebarPanel(uiOutput("slider")),
mainPanel(plotlyOutput("plot"))))
server <- function(input, output, session){
library(ggplot2)
library(plotly)
library(splines)
set.seed(1)
## set number of observations
n <- 400
## generate x in [0,1]
x <- 0:(n-1)/(n-1)
## create compled function of x
f <- 0.2*x^11*(10*(1-x))^6+10*(10*x)^3*(1-x)^10
## create y = f(x) + random noise
y <- f + rnorm(n, 0, sd = 2)
df4 <- data.frame(x=x, y=y)
deg <- 2
r2 <- 0
while(r2 < .8){
deg <- deg + 1
m <- lm(formula = y ~ splines::bs(x, df = deg), df4)
r2 <- summary(m)$r.squared
}
output$slider <- renderUI(sliderInput('degree',
'Degree of the Polynomial:',
min = 2,
max = 300,
value = deg,
step = 1) )
output$plot <- renderPlotly({
#plot
m <- lm(formula = y ~ splines::bs(x, df = input$degree), df4)
g <- ggplot(data = df4, aes(x = x, y = y)) + theme_bw() +
geom_point(colour = "blue", size = 0.1)+
geom_smooth(formula = y ~ splines::bs(x, df = input$degree), method = "lm", color = "green3", level = 1, size = 1)
h <- g + xlab("X (mm)") + ylab("Z (um)")
ggplotly(h) %>% add_annotations(text= sprintf("R^2: %f", summary(m)[8]), xref="paper", yref="paper", x=0.05,y=0.9)
})
}
shinyApp(ui, server)
EDIT add file upload
I added a file upload button and text box along with variable choosers for the x- and y- variables from the names in the uploaded dataset.
ui <- fluidPage(sidebarLayout(
sidebarPanel(
fileInput('file1', 'Choose file to upload',
accept = c(
'text/csv',
'text/comma-separated-values',
'text/tab-separated-values',
'text/plain',
'.csv',
'.tsv'
)
),
uiOutput("xvar"),
uiOutput("yvar"),
uiOutput("slider")),
mainPanel(plotlyOutput("plot"))))
server <- function(input, output, session){
library(ggplot2)
library(plotly)
library(splines)
df4 <- reactive({
req(input$file1)
inFile <- input$file1
read.csv(inFile$datapath, header = TRUE)
})
output$xvar <- renderUI({
req(df4())
selectInput("xvar", "X-variable", choices=names(df4()), selected = NULL)
})
output$yvar <- renderUI({
req(df4())
selectInput("yvar", "Y-variable", choices=names(df4()), selected = NULL)
})
deg <- reactive({
req(input$yvar)
degr <- 2
r2 <- 0
while(r2 < .8){
degr <- degr + 1
form <- paste(input$yvar, "~ splines::bs(", input$xvar, ", df = ", degr, ")")
m <- lm(formula = form, df4())
r2 <- summary(m)$r.squared
}
degr
})
output$slider <- renderUI({
req(deg())
sliderInput('degree',
'Degree of the Polynomial:',
min = 2,
max = 300,
value = deg(),
step = 1) })
output$plot <- renderPlotly({
req(deg())
#plot
form <- paste(input$yvar, "~ splines::bs(", input$xvar, ", df = ", input$degree, ")")
m <- lm(formula = form, df4())
g <- ggplot(data = df4(), aes_string(x = input$xvar, y = input$yvar)) + theme_bw() +
geom_point(colour = "blue", size = 0.1)+
geom_smooth(formula = y ~ splines::bs(x, df = input$degree), method = "lm", color = "green3", level = 1, size = 1)
h <- g + xlab("X (mm)") + ylab("Z (um)")
ggplotly(h) %>% add_annotations(text= sprintf("R^2: %f", summary(m)[8]), xref="paper", yref="paper", x=0.05,y=0.9)
})
}
shinyApp(ui, server)

This is tricky without some sample data, but suppose we had the following data set:
set.seed(1)
df4 <- data.frame(x = 1:10, y = rnorm(10, (1:10)/10))
df4
#> x y
#> 1 1 -0.5264538
#> 2 2 0.3836433
#> 3 3 -0.5356286
#> 4 4 1.9952808
#> 5 5 0.8295078
#> 6 6 -0.2204684
#> 7 7 1.1874291
#> 8 8 1.5383247
#> 9 9 1.4757814
#> 10 10 0.6946116
When plotted, it looks like this:
plot(df)
so it has a slight upwards trend.
If we want to find the number of splines that gives a fit with r squared > 0.8 we can do:
library(splines)
i <- 3
while(summary(lm(formula = y ~ bs(x, df = i), df4))$r.squared < 0.8) i <- i + 1
So now i is the lowest number of splines that gives an r squared of 0.8 or more:
i
#> [1] 8
And we can fit i into our fixed model:
fit <- lm(formula = y ~ splines::bs(x, df = i), df4)
summary(fit)
#>
#> Call:
#> lm(formula = y ~ splines::bs(x, df = i), data = df4)
#>
#> Residuals:
#> 1 2 3 4 5 6 7 8
#> 0.00008 -0.00216 0.01512 -0.04776 0.08208 -0.08208 0.04776 -0.01512
#> 9 10
#> 0.00216 -0.00008
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) -0.5265 0.1360 -3.871 0.1609
#> splines::bs(x, df = i)1 4.4178 0.4344 10.170 0.0624 .
#> splines::bs(x, df = i)2 -4.1409 0.4194 -9.874 0.0643 .
#> splines::bs(x, df = i)3 5.2151 0.3247 16.064 0.0396 *
#> splines::bs(x, df = i)4 -1.3020 0.3068 -4.244 0.1473
#> splines::bs(x, df = i)5 2.3384 0.3245 7.206 0.0878 .
#> splines::bs(x, df = i)6 1.9458 0.4199 4.634 0.1353
#> splines::bs(x, df = i)7 2.0650 0.4309 4.792 0.1310
#> splines::bs(x, df = i)8 1.2212 0.1924 6.349 0.0995 .
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 0.136 on 1 degrees of freedom
#> Multiple R-squared: 0.9974, Adjusted R-squared: 0.9769
#> F-statistic: 48.6 on 8 and 1 DF, p-value: 0.1105
and
lines(10:100/10, predict(fit, newdata = list(x = 10:100/10)), col = "red")
Created on 2020-11-30 by the reprex package (v0.3.0)

Optimize function in r

Here is my code:
cee = abs(qnorm(.5*0.1)) # Bonferroni threshold for achieving study-wide significance = 0.1
p.value = (simAll %>% select("p.value"))
p.value1 <- as.numeric(unlist(p.value))
# we use "cee" so R does not get confused with the function 'c'
betahat = log(OR) # Reported OR
z = sign(betahat)*abs(qnorm(0.5*p.value1)) # Reported p-value = 5.7e-4, which we convert to a z-value
###################################################
# THE PROPOSED APPROACH #
###################################################
se = betahat/z # standard error of betahat
mutilde1 = optimize(f=conditional.like,c(-20,20),maximum=T,z=z,cee=cee)$maximum # the conditional mle
The p.value is the p-values for 1000 simulations, same as OR, for the "se“ part, I can get 1000 different se values there. But for the mutilde1 line, there is an error exist: "Error in optimize(f = conditional.like, c(-20, 20), maximum = T, z = z, :
invalid function value in 'optimize'"
How can I fix the issue?
The conditional.like() function:
conditional.like=function(mu,cee,z){
like=dnorm(z-mu)/(pnorm(mu-cee)+pnorm(-cee-mu))
return((abs(z)>cee)*like) }
The simALL is a table looks like this (total 1000 lines)：
# A tibble: 1,000 x 6
id term estimate std.error statistic p.value
<int> <chr> <dbl> <dbl> <dbl> <dbl>
1 1 .x 0.226 0.127 1.78 0.0747
2 2 .x 0.137 0.127 1.08 0.280
3 3 .x 0.304 0.127 2.38 0.0171
4 4 .x 0.497 0.128 3.87 0.000111
OR (total 1000 lines):
> OR
[1] 1.5537098 1.0939850 1.4491432 1.6377551 1.1646904 1.3387534 1.6377551 1.5009351 1.7918552
Also, here is my overall code:
library(tidyverse)
library(broom)
# create a tibble with an id column for each simulation and x wrapped in list()
sim <- tibble(id = 1:1000,
x = list(rbinom(1000,1,0.5))) %>%
# to generate z, pr, y, k use map and map2 from the purrr package to loop over the list column x
# `~ ... ` is similar to `function(.x) {...}`
# `.x` represents the variable you are using map on
mutate(z = map(x, ~ log(1.3) * .x),
pr = map(z, ~ 1 / (1 + exp(-.x))),
y = map(pr, ~ rbinom(1000, 1, .x)),
k = map2(x, y, ~ glm(.y ~ .x, family="binomial")),
# use broom::tidy to get the model summary in form of a tibble
sum = map(k, broom::tidy)) %>%
# select id and sum and unnest the tibbles
select(id, sum) %>%
unnest(cols = c(sum))
simOR <- sim %>%
# drop the intercepts and every .x with a p < 0.05
filter(term !="(Intercept)",
p.value < 0.05)
sim
j1=exp(simOR %>% select("estimate"))
OR1=as.numeric(unlist(j1))
mean(OR1)
simAll <- sim %>%
filter(term !="(Intercept)")
j <- exp(simAll %>% select("estimate"))
OR2 <- as.numeric(unlist(j))
mean(OR2)
simOR2 <- sim %>%
filter(term !="(Intercept)",
p.value < 0.005)
j2 <- exp(simOR2 %>% select("estimate"))
OR3 <- as.numeric(unlist(j2))
mean(OR3)
#op <- par(mfrow = c(3, 1))
hga=hist(OR2, main = NULL, freq = T, breaks = 10) #OR2:Overall OR
hgb=hist(OR1, freq = T,col=2,breaks=10, main="OR:p-value<0.05") #OR1:p-value<0.05
hgc=hist(OR3, freq = T,col=2,breaks=10, main="OR:p-value<0.005") #OR3:p-value<0.005
plot(hga,col=rgb(0,1,0,0.5),main = "OR",xlim=c(0.8,2),ylim=c(0,250))
plot(hgb, add = TRUE,col=rgb(0,0,0.8,0.5),xlim=c(0.8,2),ylim=c(0,250))
plot(hgc, add = TRUE,col=rgb(1,0,0,0.5),xlim=c(0.8,2))
abline(v = mean(OR2), lwd = 4, col = 3)
abline(v = mean(OR3), lwd = 4, col=2)
text(1.65,240,"1.31",col=1)
arrows(1.5,240,1.31,240,length=0.1,col=1,lwd=2)
abline(v = mean(OR1), lwd = 4, col=4)
text(2.1,220,"1.43",col=4)
arrows(1.98,220,1.43,220,length=0.1,col=4,lwd=2)
text(2.1,220,"1.55",col=2)
arrows(1.98,220,1.55,220,length=0.1,col=2,lwd=2)
#########################################
## THE FUNCTIONS BELOW ARE USED TO OBTAIN THE
## BIAS-CORRECTED ESTIMATES
#########################################
conditional.like=function(mu,cee,z){
like=dnorm(z-mu)/(pnorm(mu-cee)+pnorm(-cee-mu))
return((abs(z)>cee)*like) }
conditional.like.z=function(mu,cee,z){
return(conditional.like(mu,cee,z)*mu)
}
#########################################
## THE FUNCTIONS BELOW ARE USED TO OBTAIN THE
## BIAS-CORRECTED CONFIDENCE INTERVAL
#########################################
ptruncnorm.lower=function(z,mu,cee,alpha){
A=pnorm(-cee+mu)+pnorm(-cee-mu)
term1=pnorm(z-mu)
term2=pnorm(-cee-mu)
term3=pnorm(-cee-mu)+pnorm(z-mu)-pnorm(cee-mu)
result=(1/A)*(term1*(z<= -cee)+term2*(abs(z)<cee)+term3*(z>=cee))
return(result-(alpha/2))
}
ptruncnorm.upper=function(z,mu,cee,alpha){
A=pnorm(-cee+mu)+pnorm(-cee-mu)
term1=pnorm(z-mu)
term2=pnorm(-cee-mu)
term3=pnorm(-cee-mu)+pnorm(z-mu)-pnorm(cee-mu)
result=(1/A)*(term1*(z<= -cee)+term2*(abs(z)<cee)+term3*(z>=cee))
return(result-(1-alpha/2))
}
find.lowerz=function(mu,z,cee,alpha){
lowerz=uniroot(ptruncnorm.lower,lower=-20,upper=20,mu=mu,cee=cee,alpha=alpha)$root
return(lowerz-z)
}
find.upperz=function(mu,z,cee,alpha){
upperz=uniroot(ptruncnorm.upper,lower=-20,upper=20,mu=mu,cee=cee,alpha=alpha)$root
return(upperz-z)
}
getCI=function(z,cee,alpha){
uppermu=uniroot(find.lowerz,interval=c(-15,15),cee=cee,z=z,alpha=alpha)$root
lowermu=uniroot(find.upperz,interval=c(-15,15),cee=cee,z=z,alpha=alpha)$root
out=list(lowermu,uppermu)
names(out)=c("lowermu","uppermu")
return(out)
}
source("GW-functions.R")# YOU READ IN THE FUNCTIONS FOR OUR METHOD
cee=abs(qnorm(.5*0.1)) # Bonferroni threshold for achieving study-wide significance = 0.1
p.value=(simAll %>% select("p.value"))
p.value1 <- as.numeric(unlist(p.value))
# we use "cee" so R does not get confused with the function 'c'
betahat=log(OR) # Reported OR
z=sign(betahat)*abs(qnorm(0.5*p.value1)) # Reported p-value = 5.7e-4, which we convert to a z-value
###################################################
# THE PROPOSED APPROACH #
###################################################
se=betahat/z # standard error of betahat
mutilde1=optimize(f=conditional.like,c(-20,20),maximum=T,z=z,cee=cee)$maximum