Survival Analysis in R -- why it is always a straight line - r

I am encountering a problem while trying to generate a survival curve. I don't know and understand why the curve which represents the survival probability of each year is sometimes a straight line. Ideally, it should be an up-and-down line, eventually lying around 55%.
I have included codes below, and please kindly refer to them. I have also attached a sample wrong plot to understand my description. Hopefully, it can help you understand what I am describing.
Any suggestions will be very appreciated. :)
Plot:
Codes:
library(survival)
library(survminer)
library(dplyr)
library(ggplot2)
library(readxl)
library(tidyverse)
data_all <- data.frame(Years_Diff_Surg_Death = c(8.919917864,
8.895277207, 8.881587953, 8.821355236, 8.728268309, 8.709103354), Survival = c(1L, 0L, 1L, 1L, 1L, 1L))
data_all <- data.frame(Years_Diff_Surg_Death = c(8.919917864,
8.895277207, 8.881587953, 8.821355236, 8.728268309, 8.709103354), Survival = c(1L, 0L, 1L, 1L, 1L, 1L))
data_2013 <- data.frame(Years_Diff_Surg_Death = c("36.99383984", "2.584531143", "36.91991786", "36.89527721", "36.88158795", "36.82135524"), YEARS_OF_SURGERY = c("2013","2013","2013","2013","2013","2013"), Survival = c("1","0", "1", "1", "1", "1"))
data_2014 <- data.frame(Years_Diff_Surg_Death = c(0.542094456, 5.196440794, 35.95619439, 35.91786448, 35.86584531, 35.8275154), YEARS_OF_SURGERY = c(2014, 2014, 2015, 2014, 2014, 2014, 2016), Survival = c(0, 0, 1, 1, 1, 1))
data_2015 <- data.frame(Years_Diff_Surg_Death = c(34.4476386, 34.25598905,0.621492129, 34.38740589, 34.33264887, 1.081451061), YEARS_OF_SURGERY = c(2015, 2015, 2015, 2015, 2015, 2015), Survival = c(1, 1, 0, 1, 1, 0))
data_2016 <- data.frame(Years_Diff_Surg_Death = c(2.902121834, 0.950034223, 33.9301848, 33.91101985, 33.87268994, 33.85352498), YEARS_OF_SURGERY = c(2016,2016,2016, 2016, 2016, 2016), Survival = c(0, 0, 1, 1, 1, 1))
data_2017 <- data.frame(Years_Diff_Surg_Death = c(32.99110198, 3.348391513, 32.95277207,32.91170431, 32.87611225, 0.791238877), YEARS_OF_SURGERY = c(2017, 2017, 2017, 2017, 2017, 2017), Survival = c(1, 0, 1, 1, 1, 0))
fit_all <- survfit(Surv(Years_Diff_Surg_Death, Survival) ~ 1, data = data_all)
fit_2013 <- survfit(Surv(Years_Diff_Surg_Death, Survival) ~ YEARS_OF_SURGERY, data = data_2013)
fit_2014 <- survfit(Surv(Years_Diff_Surg_Death, Survival) ~ YEARS_OF_SURGERY, data = data_2014)
fit_2015 <- survfit(Surv(Years_Diff_Surg_Death, Survival) ~ YEARS_OF_SURGERY, data = data_2015)
fit_2016 <- survfit(Surv(Years_Diff_Surg_Death, Survival) ~ YEARS_OF_SURGERY, data = data_2016)
fit_2017 <- survfit(Surv(Years_Diff_Surg_Death, Survival) ~ YEARS_OF_SURVERY, data = data_2017)
fit_comb <- list(s_2013 = fit_2013,
s_2014 = fit_2014,
s_2015 = fit_2015,
s_2016 = fit_2016,
s_2017 = fit_2017,
s_all= fit_all)
ggsurvplot(fit_all, conf.int = TRUE,
ylim = c(0,1),
xlim = c(0,5),
break.x.by = 1,
title = "Years of Death After Surgery via Survival",
xlab = ("Years"),
legend = "none")
ggsurvplot(fit_2013, conf.int = TRUE,
ylim = c(0,1),
xlim = c(0,5),
break.x.by = 1,
title = ("Years of Death After Surgery via Survival"),
xlab = ("Years"),
legend = "none",
risk.table = F)
ggsurvplot(fit_2014, conf.int = TRUE,
ylim = c(0,1),
xlim = c(0,5),
break.x.by = 1,
title = ("Years of Death After Surgery via Survival"),
xlab = ("Years"),
legend = "none",
risk.table = F)
ggsurvplot(fit_2015, conf.int = TRUE,
ylim = c(0,1),
xlim = c(0,5),
break.x.by = 1,
title = ("Years of Death After Surgery via Survival"),
xlab = ("Years"),
legend = "none",
risk.table = F)
ggsurvplot(fit_2016, conf.int = TRUE,
ylim = c(0,1),
xlim = c(0,5),
break.x.by = 1,
title = ("Years of Death After Surgery via Survival"),
xlab = ("Years"),
legend = "none",
risk.table = F)
ggsurvplot(fit_2017, conf.int = TRUE,
ylim = c(0,1),
xlim = c(0,5),
break.x.by = 1,
title = ("Years of Death After Surgery via Survival"),
xlab = ("Years"),
legend = "none",
risk.table = F)
ggsurvplot_combine(fit_comb,
data_ECV,
xlab = ("Years"),
xlim = c(0,5),
break.x.by = 1)

Related

change second y axis color in base R

Change secondary line axis color changes send color for ggplot, but I chose to go with base R, and would like to be able to select the second y axis color.
I have the following data:
df = structure(list(A = c("Q4-17", "Q1-18", "Q2-18", "Q3-18", "Q4-18",
"Q1-19", "Q2-19", "Q3-19", "Q4-19", "Q1-20", "Q2-20", "Q3-20",
"Q4-20", "Q1-21", "Q2-21", "Q3-21", "Q4-21", "Q1-22", "Q2-22",
"Q3-22"), B = c(69.45, 71.1, 74.94, 73.87, 93.61, 91.83,
95.38, 109.8, 133.75, 125.26, 118.22, 145.65, 144.9757185, 155.3464032,
184.367033, 179.8121721, 187.235487, 189.1684376, 184.3864519,
161.5300056), C = c(70.73, 71.73, 74.33, 73.27,
95.94, 94.38, 95.38, 109.8, 115.32, 116.92, 115.9, 113.87, 106.108147,
96.84273563, 111.5150869, 110.1228567, 110.7448835, 194.9684376,
187.7241152, 167.7665553), D = c(260.3, 216.02, 203.72,
203.52, 300.96, 320.77, 330.5, 413.52, 436.7, 474.96, 463.6,
501.87, 493.8865461, 497.1760767, 514.9903459, 503.7601267, 510.8362938,
614.9915546, 603.5761107, 593.660831), E = c(NA,
NA, NA, NA, NA, NA, NA, NA, 39.237, 35.621, 32.964, NA, 152.137,
140.743023, 167.809, 170.877, 117.517, 102.691723, 88.8, 76.2445528
)), class = "data.frame", row.names = c(NA, -20L))
df = df %>%
rowwise() %>%
mutate(sums = sum(D,E, na.rm = TRUE))
df = df[8:nrow(df),]
and this to generate my plot
x <- seq(1,nrow(df),1)
y1 <- df$B
y2 <- df$D
par(mar = c(5, 4, 4, 4) + 0.3)
plot(x, y1, col = "#000000",
type = "l",
main = "title",
ylim = c(0, max(df[,2:3])),
ylab = "Y1",
xlab = "",
xaxt = "n")
axis(1,
at = seq(from = 13, by = -4, length.out = 4),
labels = df$A[seq(from = 13, by = -4, length.out = 4)])
lines(x, df$C, lty = "dashed", col = "#adadad", lwd = 2)
par(new = TRUE)
plot(x, df$sums, col = "#ffa500",
axes = FALSE, xlab = "", ylab = "", type = "l")
axis(side = 4, at = pretty(range(y2)),
ylim = c(0,max(df[,3:5], na.rm = TRUE)),
col = "#00aa00") # Add colour selection of 2nd axis
par(new = TRUE)
plot(x, df$D , col = "#0000ff",
axes = FALSE, xlab = "", ylab = "", type = "l", lwd = 1)
mtext("y2", side = 4, line = 3)
but this does not colour my complete second y axis, nor labels, nor title
does any one have any suggestions to be able to set entire y2 axis to be #00AA00 - ticks, labels, and title?

Is there a way to report prevalence ratio using gtsummary?

How would I report PR instead of OR?
library(gtsummary)
library(dplyr)
set.seed(2022)
trial_data <- tibble(
"outcome" = factor(rbinom(1000, 1, 0.20),
labels = c("No", "Yes")),
"var1" = factor(rbinom(1000, 2, 0.25),
labels = c("Low", "Middle", "High")),
"var2" = factor(rbinom(1000, 2, 0.20),
labels = c("Primary", "Secondary", "Tertiary")),
"var3" = factor(rbinom(1000, 1, 0.10),
labels = c("No", "Yes")),
"var4" = round(rnorm(1000, 20, 5)),
)
trial_data %>% count(var3)
# Logistic regression with odds ratio
trial_data %>%
tbl_uvregression(
method = glm,
y = "outcome",
method.args = list(family = binomial),
exponentiate = TRUE,
pvalue_fun = function(x) style_pvalue(x, digits = 2)
)

point labels in R scatter plot

I have the following toy data
Xeafield (1999) (PER) ,1,0.5745375408
Lancelot et al. (1989),0.9394939494,0.4733405876
LemLM Xeafield (1997) (TER) ,0.6265126513,0.2959738847
Almore and Flemin (2001) (KER),0.4218921892,0.5745375408
Malek et al. (2006) (HER) ,0.4125412541,1
Charles and Osborne (2003),0.0308030803,0.1414581066
And trying a simple 2D plot in R with points labeled using the 1st column.
pdf('data.pdf', width = 7, height = 8)
d1 <- read.csv("data.csv", header=F, dec=".",sep = ",")
plot(as.matrix(d1[,2]), as.matrix(d1[,3]), col= "blue", pch = 19, cex = 1, lty = "solid", lwd = 2, ylim=c(0,1), xaxt = "n",yaxt = "n")
text(as.matrix(d1[,2]), as.matrix(d1[,3]), labels=as.matrix(d1[,1]), cex= 0.7, pos=3)
x_axis_range <- c(0,1)
x_axis_labels <- c("Small","Large")
axis(1,at = x_axis_range, labels = x_axis_labels)
y_axis_range <- c(0,1)
y_axis_labels <- c("Slow","Fast")
axis(2,at = y_axis_range, labels = y_axis_labels)
title(xlab="Memory", ylab="Speed",cex.lab=1)
dev.off()
But the plot doesn't come out right. A few issues I have: the axis label are messed up (it shows as.matrix ..., instead of the label I specified), and the margin of the plot is to small that node labels are cutoff. I am new to using R and plot, appreciate your help.
A simple solution for your problem is to define axis labels and axis ranges in the plot function.
d1 <- structure(list(V1 = structure(c(6L, 3L, 4L, 1L, 5L, 2L), .Label = c("Almore and Flemin (2001) (KER)",
"Charles and Osborne (2003)", "Lancelot et al. (1989)", "LemLM Xeafield (1997) (TER) ",
"Malek et al. (2006) (HER) ", "Xeafield (1999) (PER) "), class = "factor"),
V2 = c(1, 0.9394939494, 0.6265126513, 0.4218921892, 0.4125412541,
0.0308030803), V3 = c(0.5745375408, 0.4733405876, 0.2959738847,
0.5745375408, 1, 0.1414581066)), .Names = c("V1", "V2", "V3"
), class = "data.frame", row.names = c(NA, -6L))
# Use xlab and ylab for axis labels and
# and xlim and ylim for setting axis ranges
plot(as.matrix(d1[,2]), as.matrix(d1[,3]), col= "blue", pch = 19,
cex = 1, lty = "solid", lwd = 2, ylim=c(-0.1,1.1), xaxt = "n",yaxt = "n",
xlab="Memory", ylab="Speed",cex.lab=1, xlim=c(-0.1,1.1))
text(as.matrix(d1[,2]), as.matrix(d1[,3]),
labels=as.matrix(d1[,1]), cex= 0.7, pos=3)
x_axis_range <- c(0,1)
x_axis_labels <- c("Small","Large")
axis(1,at = x_axis_range, labels = x_axis_labels)
y_axis_range <- c(0,1)
y_axis_labels <- c("Slow","Fast")
axis(2,at = y_axis_range, labels = y_axis_labels)

How to assign name to every circle in a Venn diagram using R (Venndiagram package)

I would assign a name for every circle in a Venn diagram. I have tried to change options in category but seems this is the only set I can use. I attach my code, please where is the wrong part?
goterm3 = c(1,2,3,4,5,6)
goterm2 =c(2,2,3,4,3,5)
goterm1=c(4,5,3,2,4,3,2,4)
int12 = intersect(goterm1, goterm2)
int13 = intersect(goterm1, goterm3)
int23 = intersect(goterm2, goterm3)
intall = intersect(int12, goterm3)
require(VennDiagram)
venn.plot = draw.triple.venn(length(goterm1), length(goterm2), length(goterm3),
length(int12), length(int23), length(int13),length(intall),
category = rep("ORG1, ORG2,Org",3) ,rotation = 1, reverse = FALSE, euler.d = FALSE,
scaled = FALSE, lwd = rep(2, 3), lty = rep("solid", 3),
col = rep("black", 3), fill = c("blue", "red", "green"),
alpha = rep(0.5, 3),
label.col = rep("black", 7), cex = rep(1, 7), fontface = rep("plain", 7),
fontfamily = rep("serif", 7), cat.pos = c(0, 0, 180),
cat.dist = c(0.05, 0.05, 0.025), cat.col = rep("black", 3),
cat.cex = rep(1, 3), cat.fontface = rep("plain", 3),
cat.fontfamily = rep("serif", 3),
cat.just = list(c(0.5, 1), c(0.5, 1), c(0.5, 0)), cat.default.pos = "outer",
cat.prompts = FALSE, rotation.degree = 0, rotation.centre = c(0.5, 0.5),
ind = TRUE, sep.dist = 0.05, offset = 0)
This is what I get and it does have the same labels as your categories (after I unmangled the string values for the categories:
category = c("ORG1", "ORG2","Org") # no rep needed and proper quotes

Combine lattice xyplot and histogram

Could someone help me please to upgrade my plot?
a) In the plot, there should be print only one y-scale per row.
b) To print a more comfortable legend, that means
1) change the order of symbols and description,
2) print line in the same x-position like superpose.symbols,
3) and print symbols for the histogram.
d1 <- data.frame(x=c(NA, 13:20, NA), y = 25, z = c(rep('march', 5),
rep("april", 5)), color = c(c(rep(c("red", "green"), 2), "red"),
c(rep(c("blue", "yellow"), 2), "blue")), stringsAsFactors = FALSE)
d2 <- data.frame(x=c(NA, 20:27, NA), y = 23, z = c(rep('may', 5),
rep("june", 5)), color = c(c(rep(c("blue", "red"), 2), "red"),
c(rep(c("blue", "yellow"), 2), "blue")), stringsAsFactors = FALSE)
d1<-rbind(d1,d2)
sup.sym <- trellis.par.get("superpose.symbol")
sup.sym$alpha<-c(1, 0, 0, 0, 0, 0, 0)
sup.sym$col<-c(1,2,3,4,5,6,7)
sup.lin <- trellis.par.get("superpose.line")
sup.lin$col<-c(1,2,7,5,5,6,7)
sup.lin$alpha<-c(0, 1, 1, 1, 0, 0, 0)
settings<-list(superpose.symbol = sup.sym,superpose.line = sup.lin)
xyplot(y ~ x | factor(z), data = d1
,ylim = list( c(22, 26),c(22, 26), c(0, 1),c(0, 1) )
,layout=c(2,2)
,scales = list(y = list( relation = "free" ))
,par.settings = settings
,auto.key = list(text = c("A","B","C", "D")
,space = "right"
,lines = TRUE
)
,panel = function(x, y, subscripts) {
if(panel.number()>2){
panel.histogram(x,breaks=3)
}else{
panel.xyplot(x = x, y = y,
subscripts=subscripts,
col = d1[subscripts, "color"])
}
})

Resources