Is there a way to report prevalence ratio using gtsummary? - r

How would I report PR instead of OR?
library(gtsummary)
library(dplyr)
set.seed(2022)
trial_data <- tibble(
"outcome" = factor(rbinom(1000, 1, 0.20),
labels = c("No", "Yes")),
"var1" = factor(rbinom(1000, 2, 0.25),
labels = c("Low", "Middle", "High")),
"var2" = factor(rbinom(1000, 2, 0.20),
labels = c("Primary", "Secondary", "Tertiary")),
"var3" = factor(rbinom(1000, 1, 0.10),
labels = c("No", "Yes")),
"var4" = round(rnorm(1000, 20, 5)),
)
trial_data %>% count(var3)
# Logistic regression with odds ratio
trial_data %>%
tbl_uvregression(
method = glm,
y = "outcome",
method.args = list(family = binomial),
exponentiate = TRUE,
pvalue_fun = function(x) style_pvalue(x, digits = 2)
)

Related

Fitting age adjusted model in R

I would like to fit a logistic regression model in R but adjust for age only. How can this be done? I know there is a way to fit univariable logistic model using gtsummary tbl_uvregression function. Can we tweak this?
library(tidyverse)
library(gtsummary)
set.seed(100)
trialdata <- tibble(
outcome = factor(rbinom(1000, 1, 0.10),
labels = c("No", "Yes")),
age = floor(rnorm(1000, 16, 3)),
score = floor(runif(1000, 30, 99)),
school = factor(rbinom(1000, 1, 0.35),
labels = c("No", "Yes")),
education = factor(rbinom(1000, 2, 0.20),
labels = c("Primary", "Secondary", "Tertiary"))
)
tbl_uv_ex1 <-
trialdata %>%
tbl_uvregression(
method = glm,
y = outcome,
method.args = list(family = binomial),
exponentiate = TRUE
)
tbl_uv_ex1

change second y axis color in base R

Change secondary line axis color changes send color for ggplot, but I chose to go with base R, and would like to be able to select the second y axis color.
I have the following data:
df = structure(list(A = c("Q4-17", "Q1-18", "Q2-18", "Q3-18", "Q4-18",
"Q1-19", "Q2-19", "Q3-19", "Q4-19", "Q1-20", "Q2-20", "Q3-20",
"Q4-20", "Q1-21", "Q2-21", "Q3-21", "Q4-21", "Q1-22", "Q2-22",
"Q3-22"), B = c(69.45, 71.1, 74.94, 73.87, 93.61, 91.83,
95.38, 109.8, 133.75, 125.26, 118.22, 145.65, 144.9757185, 155.3464032,
184.367033, 179.8121721, 187.235487, 189.1684376, 184.3864519,
161.5300056), C = c(70.73, 71.73, 74.33, 73.27,
95.94, 94.38, 95.38, 109.8, 115.32, 116.92, 115.9, 113.87, 106.108147,
96.84273563, 111.5150869, 110.1228567, 110.7448835, 194.9684376,
187.7241152, 167.7665553), D = c(260.3, 216.02, 203.72,
203.52, 300.96, 320.77, 330.5, 413.52, 436.7, 474.96, 463.6,
501.87, 493.8865461, 497.1760767, 514.9903459, 503.7601267, 510.8362938,
614.9915546, 603.5761107, 593.660831), E = c(NA,
NA, NA, NA, NA, NA, NA, NA, 39.237, 35.621, 32.964, NA, 152.137,
140.743023, 167.809, 170.877, 117.517, 102.691723, 88.8, 76.2445528
)), class = "data.frame", row.names = c(NA, -20L))
df = df %>%
rowwise() %>%
mutate(sums = sum(D,E, na.rm = TRUE))
df = df[8:nrow(df),]
and this to generate my plot
x <- seq(1,nrow(df),1)
y1 <- df$B
y2 <- df$D
par(mar = c(5, 4, 4, 4) + 0.3)
plot(x, y1, col = "#000000",
type = "l",
main = "title",
ylim = c(0, max(df[,2:3])),
ylab = "Y1",
xlab = "",
xaxt = "n")
axis(1,
at = seq(from = 13, by = -4, length.out = 4),
labels = df$A[seq(from = 13, by = -4, length.out = 4)])
lines(x, df$C, lty = "dashed", col = "#adadad", lwd = 2)
par(new = TRUE)
plot(x, df$sums, col = "#ffa500",
axes = FALSE, xlab = "", ylab = "", type = "l")
axis(side = 4, at = pretty(range(y2)),
ylim = c(0,max(df[,3:5], na.rm = TRUE)),
col = "#00aa00") # Add colour selection of 2nd axis
par(new = TRUE)
plot(x, df$D , col = "#0000ff",
axes = FALSE, xlab = "", ylab = "", type = "l", lwd = 1)
mtext("y2", side = 4, line = 3)
but this does not colour my complete second y axis, nor labels, nor title
does any one have any suggestions to be able to set entire y2 axis to be #00AA00 - ticks, labels, and title?

Survival Analysis in R -- why it is always a straight line

I am encountering a problem while trying to generate a survival curve. I don't know and understand why the curve which represents the survival probability of each year is sometimes a straight line. Ideally, it should be an up-and-down line, eventually lying around 55%.
I have included codes below, and please kindly refer to them. I have also attached a sample wrong plot to understand my description. Hopefully, it can help you understand what I am describing.
Any suggestions will be very appreciated. :)
Plot:
Codes:
library(survival)
library(survminer)
library(dplyr)
library(ggplot2)
library(readxl)
library(tidyverse)
data_all <- data.frame(Years_Diff_Surg_Death = c(8.919917864,
8.895277207, 8.881587953, 8.821355236, 8.728268309, 8.709103354), Survival = c(1L, 0L, 1L, 1L, 1L, 1L))
data_all <- data.frame(Years_Diff_Surg_Death = c(8.919917864,
8.895277207, 8.881587953, 8.821355236, 8.728268309, 8.709103354), Survival = c(1L, 0L, 1L, 1L, 1L, 1L))
data_2013 <- data.frame(Years_Diff_Surg_Death = c("36.99383984", "2.584531143", "36.91991786", "36.89527721", "36.88158795", "36.82135524"), YEARS_OF_SURGERY = c("2013","2013","2013","2013","2013","2013"), Survival = c("1","0", "1", "1", "1", "1"))
data_2014 <- data.frame(Years_Diff_Surg_Death = c(0.542094456, 5.196440794, 35.95619439, 35.91786448, 35.86584531, 35.8275154), YEARS_OF_SURGERY = c(2014, 2014, 2015, 2014, 2014, 2014, 2016), Survival = c(0, 0, 1, 1, 1, 1))
data_2015 <- data.frame(Years_Diff_Surg_Death = c(34.4476386, 34.25598905,0.621492129, 34.38740589, 34.33264887, 1.081451061), YEARS_OF_SURGERY = c(2015, 2015, 2015, 2015, 2015, 2015), Survival = c(1, 1, 0, 1, 1, 0))
data_2016 <- data.frame(Years_Diff_Surg_Death = c(2.902121834, 0.950034223, 33.9301848, 33.91101985, 33.87268994, 33.85352498), YEARS_OF_SURGERY = c(2016,2016,2016, 2016, 2016, 2016), Survival = c(0, 0, 1, 1, 1, 1))
data_2017 <- data.frame(Years_Diff_Surg_Death = c(32.99110198, 3.348391513, 32.95277207,32.91170431, 32.87611225, 0.791238877), YEARS_OF_SURGERY = c(2017, 2017, 2017, 2017, 2017, 2017), Survival = c(1, 0, 1, 1, 1, 0))
fit_all <- survfit(Surv(Years_Diff_Surg_Death, Survival) ~ 1, data = data_all)
fit_2013 <- survfit(Surv(Years_Diff_Surg_Death, Survival) ~ YEARS_OF_SURGERY, data = data_2013)
fit_2014 <- survfit(Surv(Years_Diff_Surg_Death, Survival) ~ YEARS_OF_SURGERY, data = data_2014)
fit_2015 <- survfit(Surv(Years_Diff_Surg_Death, Survival) ~ YEARS_OF_SURGERY, data = data_2015)
fit_2016 <- survfit(Surv(Years_Diff_Surg_Death, Survival) ~ YEARS_OF_SURGERY, data = data_2016)
fit_2017 <- survfit(Surv(Years_Diff_Surg_Death, Survival) ~ YEARS_OF_SURVERY, data = data_2017)
fit_comb <- list(s_2013 = fit_2013,
s_2014 = fit_2014,
s_2015 = fit_2015,
s_2016 = fit_2016,
s_2017 = fit_2017,
s_all= fit_all)
ggsurvplot(fit_all, conf.int = TRUE,
ylim = c(0,1),
xlim = c(0,5),
break.x.by = 1,
title = "Years of Death After Surgery via Survival",
xlab = ("Years"),
legend = "none")
ggsurvplot(fit_2013, conf.int = TRUE,
ylim = c(0,1),
xlim = c(0,5),
break.x.by = 1,
title = ("Years of Death After Surgery via Survival"),
xlab = ("Years"),
legend = "none",
risk.table = F)
ggsurvplot(fit_2014, conf.int = TRUE,
ylim = c(0,1),
xlim = c(0,5),
break.x.by = 1,
title = ("Years of Death After Surgery via Survival"),
xlab = ("Years"),
legend = "none",
risk.table = F)
ggsurvplot(fit_2015, conf.int = TRUE,
ylim = c(0,1),
xlim = c(0,5),
break.x.by = 1,
title = ("Years of Death After Surgery via Survival"),
xlab = ("Years"),
legend = "none",
risk.table = F)
ggsurvplot(fit_2016, conf.int = TRUE,
ylim = c(0,1),
xlim = c(0,5),
break.x.by = 1,
title = ("Years of Death After Surgery via Survival"),
xlab = ("Years"),
legend = "none",
risk.table = F)
ggsurvplot(fit_2017, conf.int = TRUE,
ylim = c(0,1),
xlim = c(0,5),
break.x.by = 1,
title = ("Years of Death After Surgery via Survival"),
xlab = ("Years"),
legend = "none",
risk.table = F)
ggsurvplot_combine(fit_comb,
data_ECV,
xlab = ("Years"),
xlim = c(0,5),
break.x.by = 1)

R Highcharter specify continuous x and y axis

I am a bit confused with use of highcharter hc_add_series function.
I am trying to create a plot where I need to specify both x and y axis, where x axis are continuous. I have a data-frame, for example:
df_plot <- cbind(
seq(0, 1, by = 0.1),
sample(seq(from = 100, to = 300, by = 10), size = 11, replace = TRUE),
sample(seq(from = 1, to = 100, by = 9), size = 11, replace = TRUE),
sample(seq(from = 50, to = 60, by = 2), size = 11, replace = TRUE),
sample(seq(from = 100, to = 130, by = 1), size = 1, replace = TRUE)
) %>%
as.data.frame()
names(df_plot) <- c("x", "a", "b", "c", "d")
I saw this example that works
highchart() %>%
hc_add_series(data = purrr::map(4:8, function(x) list(x, x)), color = "blue")
So i tried:
df_plot1 <- Map(cbind, split.default(df_plot[-1], names(df_plot)[-1]), x=df_plot[1])
highchart() %>%
hc_add_series(data = df_plot1[[1]]) %>%
hc_add_series(data = df_plot1[[2]], yAxis = 1) %>%
hc_yAxis_multiples(
list(lineWidth = 3, lineColor='#7cb5ec', title=list(text="First y-axis")),
list(lineWidth = 3, lineColor="#434348", title=list(text="Second y-axis")))
However, I am getting "No data to display" on the plot, so I obviously went wrong somewhere.
Also, I cannot use hchart function, as I need have multiple y axis
After reading docs about split.default it Divide into Groups and Reassemble, however you need to access the variable you want to plot, e.g. df_plot1[[1]$a, like so:
library(highcharter)
df_plot <- cbind(
seq(0, 1, by = 0.1),
sample(seq(from = 100, to = 300, by = 10), size = 11, replace = TRUE),
sample(seq(from = 1, to = 100, by = 9), size = 11, replace = TRUE),
sample(seq(from = 50, to = 60, by = 2), size = 11, replace = TRUE),
sample(seq(from = 100, to = 130, by = 1), size = 1, replace = TRUE)
) %>% as.data.frame()
names(df_plot) <- c("x", "a", "b", "c", "d")
df_plot1 <- Map(cbind, split.default(df_plot[-1], names(df_plot)[-1]), x=df_plot[1])
highchart() %>%
hc_xAxis(categories = df_plot1[[1]]$x) %>%
hc_add_series(data = df_plot1[[1]]$a) %>%
hc_add_series(data = df_plot1[[2]]$b, yAxis = 1) %>%
hc_yAxis_multiples(
list(lineWidth = 3, lineColor='#7cb5ec', title=list(text="First y-axis")),
list(lineWidth = 3, lineColor="#434348", title=list(text="Second y-axis")))
not sure if this can help you,
library(tidyr)
df_plot2 <- gather(df_plot, group, y, -x)
hchart(df_plot2, "line", hcaes(x, y, group = group))
hchart(df_plot2, "line", hcaes(x, y, group = group), yAxis = 0:3) %>%
hc_yAxis_multiples(
list(lineWidth = 3, title=list(text="First y-axis")),
list(lineWidth = 3, title=list(text="Second y-axis")),
list(lineWidth = 3, title=list(text="3rd y-axis")),
list(lineWidth = 3, title=list(text="4th y-axis"))
)

Combine lattice xyplot and histogram

Could someone help me please to upgrade my plot?
a) In the plot, there should be print only one y-scale per row.
b) To print a more comfortable legend, that means
1) change the order of symbols and description,
2) print line in the same x-position like superpose.symbols,
3) and print symbols for the histogram.
d1 <- data.frame(x=c(NA, 13:20, NA), y = 25, z = c(rep('march', 5),
rep("april", 5)), color = c(c(rep(c("red", "green"), 2), "red"),
c(rep(c("blue", "yellow"), 2), "blue")), stringsAsFactors = FALSE)
d2 <- data.frame(x=c(NA, 20:27, NA), y = 23, z = c(rep('may', 5),
rep("june", 5)), color = c(c(rep(c("blue", "red"), 2), "red"),
c(rep(c("blue", "yellow"), 2), "blue")), stringsAsFactors = FALSE)
d1<-rbind(d1,d2)
sup.sym <- trellis.par.get("superpose.symbol")
sup.sym$alpha<-c(1, 0, 0, 0, 0, 0, 0)
sup.sym$col<-c(1,2,3,4,5,6,7)
sup.lin <- trellis.par.get("superpose.line")
sup.lin$col<-c(1,2,7,5,5,6,7)
sup.lin$alpha<-c(0, 1, 1, 1, 0, 0, 0)
settings<-list(superpose.symbol = sup.sym,superpose.line = sup.lin)
xyplot(y ~ x | factor(z), data = d1
,ylim = list( c(22, 26),c(22, 26), c(0, 1),c(0, 1) )
,layout=c(2,2)
,scales = list(y = list( relation = "free" ))
,par.settings = settings
,auto.key = list(text = c("A","B","C", "D")
,space = "right"
,lines = TRUE
)
,panel = function(x, y, subscripts) {
if(panel.number()>2){
panel.histogram(x,breaks=3)
}else{
panel.xyplot(x = x, y = y,
subscripts=subscripts,
col = d1[subscripts, "color"])
}
})

Resources