Add scatter points on top of ggsurvplot - r

After calling ggsurvplot(...) I want to superimpose some points from another data frame df containing two columns time and survival. I'm looking for tips on accomplishing this.
Edit: some code as an example
require("survival")
require("survminer")
fit<- survfit(Surv(time, status) ~ sex, data = lung)
# Basic survival curves
ggsurvplot(fit, data = lung)
# Example points
x <- fit$time
y <- fit$n.risk
How would I superimpose points(x, y) on ggsurvplot plot.

The ggplot-type object is part of the object returned by ggsurvplot() and can be addressed as $plot:
ggplot1 <- ggsurvplot(fit, data = lung)$plot
You can work with it as with a usual ggplot object and add other layers. For your specific example, however, it is not clear how you want to define Y coordinate of your points: fit$n.risk is a number between 1 and 138 while your plot is in 0..1 range. Here is one option:
ggplot1 <- ggsurvplot(fit, data = lung)$plot
df1 <- data.frame(time=fit$time, nRisk=fit$n.risk, nRiskRel=fit$n.risk/max(fit$n.risk))
ggplot1 + geom_point(aes(x=time, y=nRiskRel), data = df1, alpha=0.5, size=3)
You may want to add colors etc.

Related

How to use ggrepel with a survival plot (ggsurvplot)?

I would like to add the label of each survival curve at the end of the lines.
I am using ggsurvplot from the Survminer package (which is based on ggplot2).
I don't know how to do it with ggrepel. I didn't find any example with survival data:
require("survival")
library(survminer)
library(ggrepel)
fit<- survfit(Surv(time, status) ~ sex, data = lung)
p=ggsurvplot(fit, data = lung)$plot;
p + geom_label_repel()
The code above throws an error.
The object p you have created contains enough information to generate the labels. p$data is a data frame, and contains a column called strata which you can use here. You need to map the label aesthetic to this column. You will also need to filter a copy of the data to pass to the geom_label_repel layer that contains only the maximum time value for each stratum:
p + geom_label_repel(aes(label = strata),
data = p$data %>%
group_by(strata) %>%
filter(time == max(time)))

plotting log(10) lengths differ

I am having difficulty plotting a log(10) formula on to existing data points. I derived a logarithmic function based on a list of data where "Tout_F_6am" is my independent variable and "clo" is my dependent variable.
When I go to plot it, I am getting the error that lengths x and y are different. Can someone please help me figure out whats going wrong?
logKT=lm(log10(clo)~ Tout_F_6am,data=passive)
summary(logKT) #r2=0.12
coef(logKT)
plot(passive$Tout_F_6am,passive$clo) #plot data points
x=seq(53,84, length=6381)#match length of x variable
y=logKT
lines(x,y,type="l",lwd=2,col="red")
length(passive$Tout_F_6am) #6381
length(passive$clo) #6381
Additionally, can the formula curve(-0.0219-0.005*log10(x),add=TRUE,col=2)be written as eq=(10^-0.022)*(10^-0.005*x)? thanks!
The problem is that you are trying to plot the model object, not the predictions from the model. Try something like this:
Define the explanatory values you want to plot, in a data frame (or tibble). It doesn't have to be as many as there are data points.
library(dplyr)
explanatory_data <- tibble(
Tout_F_6am = seq(53, 84, 0.1)
)
Add a column of predicted values using predict(). This takes a model and your explanatory data. predict() will return the transformed values, so you have to backtransform them.
prediction_data <- explanatory_data %>%
mutate(
log10_clo = predict(logKT, explanatory_data),
clo = 10 ^ log10_clo
)
Finally, draw your plot.
plot(clo ~ Tout_F_6am, data = prediction_data, log="y", type = "l")
The plotting is actually easier using ggplot2. This should give you more or less what you want.
library(ggplot2)
ggplot(passive, aes(Tout_F_6am, clo)) +
geom_point() +
geom_smooth(method = "lm") +
scale_y_log10()

Place different QQ plot (with different datasets) in the same coordinate system

I can only get the qq plot one by one with different datasets..
library(fitdistrplus)
x1<-c(1300,541,441,35,278,167,276,159,126,60.8,160,5000,264.6,379,170,251.3,155.84,187.01,850)
x2<-c(25,500,42,100,10,8.2,76,2.2,7.86,50)
y1<-log10(x1)
y2<-log10(x2)
x1.logis <- fitdist(y1, "logis", method="mle")
x2.logis <- fitdist(y2, "logis", method="mle")
ppcomp(x1.logis, addlegend=FALSE)
ppcomp(x2.logis, addlegend=FALSE)
How can i place the two qq plot in same coordinate system?
Use ggplot2. You need to extract your fitted values from the fitdist object n and make a new data frame. Use ggplot2 layers to add the fitted values from the two data sets and then add an abline.
library(ggplot2)
fittedx1 <- data.frame(x = sort(plogis(x1.logis$data,
location = x1.logis$estimate[1],
scale = x1.logis$estimate[2])),
p = (1:length(x1.logis$data))/length(x1.logis$data))
fittedx2 <- data.frame(x = sort(plogis(x2.logis$data,
location = x2.logis$estimate[1],
scale = x2.logis$estimate[2])),
p = (1:length(x2.logis$data))/length(x2.logis$data))
fitted <- rbind(fittedx1,fittedx2) #You need to combine the two datasets
#Add a variable that identifies which dataset the values belong to
#Then you can use the col option in ggplot to give each data set its own color!
fitted$set <- c(rep("1", nrow(fittedx1)), rep("2", nrow(fittedx2)))
#Now plot
ggplot(fitted) +
geom_point(aes(p, x, col=set), shape=1, size=3) +
geom_abline(intercept=0, slope=1)

Change colors of select lines in ggplot2 coefficient plot in R

I would like to change the color of coefficient lines based on whether the point estimate is negative or positive in a ggplot2 coefficient plot in R. For example:
require(coefplot)
set.seed(123)
dat <- data.frame(x = rnorm(100), z = rnorm(100))
mod1 <- lm(y1 ~ x + z, data = dat)
coefplot.lm(mod1)
Which produces the following plot:
In this plot, I would like to change the "x" variable to red when plotted. Any ideas? Thanks.
I think, you cannot do this with a plot produced by coefplot.lm. The package coefplot uses ggplot2 as the plotting system, which is good itself, but does not allow to play with colors as easily as you would like. To achieve the desired colors, you need to have a variable in your dataset that would color-code the values; you need to specify color = color-code in aes() function within the layer that draws the dots with CE. Apparently, this is impossible to do with the output of coefplot.lm function. Maybe, you can change the colors using ggplot2 ggplot_build() function. I would say, it's easier to write your own function for this task.
I've done this once to plot odds. If you want, you may use my code. Feel free to change it. The idea is the same as in coefplot. First, we extract coefficients from a model object and prepare the data set for plotting; second, actually plot.
The code for extracting coefficients and data set preparation
df_plot_odds <- function(x){
tmp<-data.frame(cbind(exp(coef(x)), exp(confint.default(x))))
odds<-tmp[-1,]
names(odds)<-c('OR', 'lower', 'upper')
odds$vars<-row.names(odds)
odds$col<-odds$OR>1
odds$col[odds$col==TRUE] <-'blue'
odds$col[odds$col==FALSE] <-'red'
odds$pvalue <- summary(x)$coef[-1, "Pr(>|t|)"]
return(odds)
}
Plot the output of the extract function
plot_odds <- function(df_plot_odds, xlab="Odds Ratio", ylab="", asp=1){
require(ggplot2)
p <- ggplot(df_plot_odds, aes(x=vars, y=OR, ymin=lower, ymax=upper),asp=asp) +
geom_errorbar(aes(color=col),width=0.1) +
geom_point(aes(color=col),size=3)+
geom_hline(yintercept = 1, linetype=2) +
scale_color_manual('Effect', labels=c('Positive','Negative'),
values=c('blue','red'))+
coord_flip() +
theme_bw() +
theme(legend.position="none",aspect.ratio = asp)+
ylab(xlab) +
xlab(ylab) #switch because of the coord_flip() above
return(p)
}
Plotting your example
set.seed(123)
dat <- data.frame(x = rnorm(100),y = rnorm(100), z = rnorm(100))
mod1 <- lm(y ~ x + z, data = dat)
df <- df_plot_odds(mod1)
plot <- plot_odds(df)
plot
Which yields
Note that I chose theme_wb() as the default. Output is a ggplot2object. So, you may change it quite a lot.

plotting multiple plots in ggplot2 on same graph that are unrelated

How would one use the smooth.spline() method in a ggplot2 scatterplot?
If my data is in the data frame called data, with two columns, x and y.
The smooth.spline would be sm <- smooth.spline(data$x, data$y). I believe I should use geom_line(), with sm$x and sm$y as the xy coordinates. However, how would one plot a scatterplot and a lineplot on the same graph that are completely unrelated? I suspect it has something to do with the aes() but I am getting a little confused.
You can use different data(frames) in different geoms and call the relevant variables using aes or you could combine the relevant variables from the output of smooth.spline
# example data
set.seed(1)
dat <- data.frame(x = rnorm(20, 10,2))
dat$y <- dat$x^2 - 20*dat$x + rnorm(20,10,2)
# spline
s <- smooth.spline(dat)
# plot - combine the original x & y and the fitted values returned by
# smooth.spline into a data.frame
library(ggplot2)
ggplot(data.frame(x=s$data$x, y=s$data$y, xfit=s$x, yfit=s$y)) +
geom_point(aes(x,y)) + geom_line(aes(xfit, yfit))
# or you could use geom_smooth
ggplot(dat, aes(x , y)) + geom_point() + geom_smooth()

Resources