Survival function with dropdown menu for each gene - r

I am trying to make a shiny app where you can select different miRNA in my input then plot the survival curve using ggsurvplot. There is something wrong with the functions within fitSurv, but I am not sure where I am doing it wrong.
library(dplyr)
require(survminer)
library(tidyverse)
require(reshape2)
library(shiny)
library(tidyr)
require(survival)
example data:
df.miRNA.cpm <- structure(list(`86` = c(5.57979757386892, 17.0240095264258, 4.28380151026145,
13.0457611762755, 12.5531123449841), `175` = c(5.21619202802748,
15.2849097474841, 2.46719979911461, 10.879496005461, 9.66416497290915
), `217` = c(5.42796072966512, 17.1413407297933, 5.15230233060323,
12.2646127361351, 12.1031024927547), `394` = c(-1.1390337316217,
15.1021660424984, 4.63168157763046, 11.1299079134792, 9.55572588729967
), `444` = c(5.06134249676025, 14.5442494311861, -0.399445049232868,
7.45775961504073, 9.92629675808998)), row.names = c("hsa_let_7a_3p",
"hsa_let_7a_5p", "hsa_let_7b_3p", "hsa_let_7b_5p", "hsa_let_7c_5p"
), class = "data.frame")
df.miRNA.cpm$miRNA <- rownames(df.miRNA.cpm)
ss.survival.shiny.miRNA.miRNA <- structure(list(ID = c("86", "175", "217", "394", "444"), TimeDiff = c(71.0416666666667,
601.958333333333, 1130, 1393, 117.041666666667), Status = c(1L,
1L, 0L, 0L, 1L)), row.names = c(NA, 5L), class = "data.frame")
Joint the two example data frames:
data_prep.miRNA <- df.miRNA.cpm %>%
tidyr::pivot_longer(-miRNA, names_to = "ID") %>%
left_join(ss.survival.shiny.miRNA.miRNA)
Example of the joined data:
> data_prep.miRNA
# A tibble: 153,033 x 5
miRNA ID value TimeDiff Status
<chr> <chr> <dbl> <dbl> <int>
1 hsa_let_7a_3p 86 5.58 71.0 1
2 hsa_let_7a_3p 175 5.22 602. 1
3 hsa_let_7a_3p 217 5.43 1130 0
4 hsa_let_7a_3p 394 -1.14 1393 0
5 hsa_let_7a_3p 444 5.06 117. 1
6 hsa_let_7a_3p 618 4.37 1508 0
7 hsa_let_7a_3p 640 2.46 1409 0
8 hsa_let_7a_3p 829 0.435 919. 0
9 hsa_let_7a_3p 851 -1.36 976. 0
10 hsa_let_7a_3p 998 3.87 1196. 0
# … with 153,023 more rows
For a selected MicroRNA this works:
fitSurv <- survfit(Surv(data$TimeDiff, data$Status) ~ paste(cut(value , quantile(value , probs = c(0, 0.8)), include.lowest=T)), data = data_prep.miRNA[grep("hsa_let_7a_3p",data_prep.miRNA$miRNA),])
Shiny:
ui.miRNA <- fluidPage(
selectInput("MicroRNA", "miRNA", choices = unique(data_prep.miRNA$miRNA)),
plotOutput("myplot"))
server <- function(input, output, session) {
data_selected <- reactive({
filter(data_prep.miRNA, miRNA %in% input$MicroRNA)
})
output$myplot <- renderPlot({
fitSurv <- survfit(Surv("TimeDiff", "Status") ~ paste(cut("value" , quantile("value" , probs = c(0, 0.8)), include.lowest=T)), data = data_selected)
ggsurvplot(fitSurv ,title="", xlab="Time (Yrs)", ylab="Survival prbability",
font.main = 8,
font.x = 8,
font.y = 8,
font.tickslab = 8,
font.legend=8,
pval.size = 3,
pval.coord = c(1000,1),
size=0.4,
legend = "right",
censor.size=2,
break.time.by = 365,
pval =T,#"p=0.003",#"p=0.41",
#xscale=365,
#palette = c("#E7B800", "#2E9FDF"),
#ggtheme = theme_bw(),
risk.table = F,
xscale=365.25,
xlim=c(0,7*365))
})
}
shinyApp(ui.miRNA, server)

There are several mistakes in this statement:
fitSurv <-
survfit(Surv("TimeDiff", "Status") ~ paste(cut("value", quantile("value", probs = c(0, 0.8)), include.lowest=T)),
data = data_selected)
First, data_selected is a reactive conductor, not a dataframe. If you want the dataframe returned by this reactive conductor, you have to use parentheses: data_selected().
Next, you must not quote the variables: TimeDiff and not "TimeDiff", etc.
The paste command is useless.
Your cut produces only one category and the NA category. To get two intervals as categories, use probs = c(0, 0.8, 1) in quantile.
Finally it is not a good idea to use T for TRUE, because T can be set to any R object, while TRUE is a reserved work.
To conclude, here is the corrected code:
fitSurv <-
survfit(Surv(TimeDiff, Status) ~ cut(value, quantile(value, probs = c(0, 0.8, 1)), include.lowest=TRUE),
data = data_selected())

Related

apply any function to elements which returned from which.max function in r

I want to apply any function to elements which I found its location with which.max function. For example, my sample data is below:
$Apr
$Apr$`04-2036`
date value
92 04-01-2036 0.00
93 04-02-2036 3.13
94 04-03-2036 20.64
$Apr$`04-2037`
date value
457 04-01-2037 5.32
458 04-02-2037 82.47
459 04-03-2037 15.56
$Dec
$Dec$`04-2039`
date value
1431 12-01-2039 3
1432 12-02-2039 0
1433 12-03-2039 11
$Dec$`04-2064`
date value
10563 12-01-2064 0
10564 12-02-2064 5
10565 12-03-2064 0
data<-structure(list(Apr = structure(list(`04-2036` = structure(list(
date = c("04-01-2036", "04-02-2036", "04-03-2036"), value = c(0,
3.13, 20.64)), .Names = c("date", "value"), row.names = 92:94, class = "data.frame"),
`04-2037` = structure(list(date = c("04-01-2037", "04-02-2037",
"04-03-2037"), value = c(5.32, 82.47, 15.56)), .Names = c("date",
"value"), row.names = 457:459, class = "data.frame")), .Names = c("04-2036",
"04-2037")), Dec = structure(list(`04-2039` = structure(list(
date = c("12-01-2039", "12-02-2039", "12-03-2039"), value = c(3,
0, 11)), .Names = c("date", "value"), row.names = 1431:1433, class = "data.frame"),
`04-2064` = structure(list(date = c("12-01-2064", "12-02-2064",
"12-03-2064"), value = c(0, 5, 0)), .Names = c("date", "value"
), row.names = 10563:10565, class = "data.frame")), .Names = c("04-2039",
"04-2064"))), .Names = c("Apr", "Dec"))
I have found locations of maximum values for each element in lists of list using the functions below.
drop<-function(y){
lapply(y, function(x)(x[!(names(x) %in% c("date"))]))
}
q1<-lapply(data, drop)
q2<-lapply(q1, function(x) unlist(x,recursive = FALSE))
daily_max<-lapply(q2, function(x) lapply(x, max))
dailymax <- data.frame(matrix(unlist(daily_max), nrow=length(daily_max), byrow=TRUE))
row.names(dailymax)<-names(daily_max)
apply(dailymax, 1, which.max)
Locations of max. values of each element is computed as it is seen below;
Apr Dec
2 1
Now, I want to apply any function to these elements automatically for all my data(it is Apr 2 = Apr$04-2037 and Dec$2039).
You can subset and keep only the max value data in each list.
max_value <- apply(dailymax, 1, which.max)
Map(`[[`, data, max_value)
#$Apr
# date value
#457 04-01-2037 5.32
#458 04-02-2037 82.47
#459 04-03-2037 15.56
#$Dec
# date value
#1431 12-01-2039 3
#1432 12-02-2039 0
#1433 12-03-2039 11
Let's say you want to apply the function fn to this list.
fn <- function(x) {x$value <- x$value * 2;x}
You can change the Map function as -
Map(function(x, y) fn(x[[y]]), data, max_value)
#$Apr
# date value
#457 04-01-2037 10.64
#458 04-02-2037 164.94
#459 04-03-2037 31.12
#$Dec
# date value
#1431 12-01-2039 6
#1432 12-02-2039 0
#1433 12-03-2039 22

What is wrong with my custom colour palette in this plot?

Using ggsurvplot to draw some Kaplan-Meier curves.
5 curves should be plotted and I want control over their colours.
Here is the output of the survfit being plotted:
> elective_30Decadesurv
Call: survfit(formula = elective30Surv ~ electives$Decade)
n events median 0.95LCL 0.95UCL
electives$Decade=50 14 0 NA NA NA
electives$Decade=60 173 2 NA NA NA
electives$Decade=70 442 5 NA NA NA
electives$Decade=80 168 4 NA NA NA
electives$Decade=90 2 0 NA NA NA
Here is a working plot using the default colour palette, "hue":
> ggsurvplot(elective_30Decadesurv,
data = electives,
palette = "hue",
title = "30 day survival after elective EVAR",
legend = "none",
legend.title = "Decade",
legend.labs = c("5th",
"6th",
"7th",
"8th",
"9th"
),
censor.shape = 124,
ggtheme = survPlotTheme,
risk.table = "nrisk_cumevents",
risk.table.y.text.col = TRUE,
risk.table.fontsize = 3,
risk.table.height = 0.3,
break.time.by = 5,
ylim = c(0.95,
1
),
pval = TRUE,
pval.size = 3,
pval.coord = c(1,
0.96
)
)
See plot in section 3.1.4 of this webpage for the output of the above
The Decade group has 5 entries, so I'm trying to provide five colours to palette.
However, both:
> ggsurvplot(elective_30Decadesurv,
data = electives,
palette = c("#440154",
"#3B528B",
"#21908C",
"#5DC863",
"#5DC863"
),
title = "30 day survival after elective EVAR",
legend = "none",
legend.title = "Decade",
legend.labs = c("5th",
"6th",
"7th",
"8th",
"9th"
),
censor.shape = 124,
ggtheme = survPlotTheme,
risk.table = "nrisk_cumevents",
risk.table.y.text.col = TRUE,
risk.table.fontsize = 3,
risk.table.height = 0.3,
break.time.by = 5,
ylim = c(0.95,
1
),
pval = TRUE,
pval.size = 3,
pval.coord = c(1,
0.96
)
)
And:
> fiveColours <- c("#440154",
"#3B528B",
"#21908C",
"#5DC863",
"#5DC863"
)
> ggsurvplot(elective_30Decadesurv,
data = electives,
palette = fiveColours,
title = "30 day survival after elective EVAR",
legend = "none",
legend.title = "Decade",
legend.labs = c("5th",
"6th",
"7th",
"8th",
"9th"
),
censor.shape = 124,
ggtheme = survPlotTheme,
risk.table = "nrisk_cumevents",
risk.table.y.text.col = TRUE,
risk.table.fontsize = 3,
risk.table.height = 0.3,
break.time.by = 5,
ylim = c(0.95,
1
),
pval = TRUE,
pval.size = 3,
pval.coord = c(1,
0.96
)
)
Give the same error:
Error in names(.cols) <- grp.levels :
'names' attribute [5] must be the same length as the vector [4]
What vector is length [4]?
Is 'names' attribute my colour vector?
If I take one of the colours out of the custom palette, eg fiveColours <- c("#440154","#3B528B","#21908C","#5DC863") I get this error:
Error: Insufficient values in manual scale. 5 needed but only 4 provided.
Which implies the number of colours provided is correct but something else is causing the issue.
I've troubleshot to the limits of my own ability. Help please!
FYI:
> electives %>% select(Decade) %>% group_by(Decade) %>% summarise(n())
# A tibble: 5 x 2
Decade `n()`
<fct> <int>
1 50 14
2 60 173
3 70 442
4 80 168
5 90 2
Should prove the length of the Decade variable and here is how the survival object and survfit were generated:
> elective5Surv <- Surv(electives$surv5Y, electives$dead5Y)
> elective_5Decadesurv <- survfit(elective5Surv ~ electives$Decade)
Ok, I have sorted my own mistake by proof-reading!
Of the five hex colours I’d provided, two were identical (not on purpose.)
I changed the fifth colour to a different hex value (what it was meant to be in the first place) and it works now.
Thanks, Rui, for your response earlier, it helped me down the path!

Multiple line chart using plotly r

I have a data frame which I am trying to plot using plotly as multiple line chart.Below is how the dataframe looks like:
Month_considered pct.x pct.y pct
<fct> <dbl> <dbl> <dbl>
1 Apr-17 79.0 18.4 2.61
2 May-17 78.9 18.1 2.99
3 Jun-17 77.9 18.7 3.42
4 Jul-17 77.6 18.5 3.84
5 Aug-17 78.0 18.3 3.70
6 Sep-17 78.0 18.9 3.16
7 Oct-17 77.6 18.9 3.49
8 Nov-17 77.6 18.4 4.01
9 Dec-17 78.5 18.0 3.46
10 Jan-18 79.3 18.4 2.31
11 2/1/18 78.9 19.6 1.48
When I iterate through to plot multiple lines below is the code used.
colNames <- colnames(delta)
p <-
plot_ly(
atc_seg_master,
x = ~ Month_considered,
type = 'scatter',
mode = 'line+markers',
line = list(color = 'rgb(205, 12, 24)', width = 4)
)
for (trace in colNames) {
p <-
p %>% plotly::add_trace(y = as.formula(paste0("~`", trace, "`")), name = trace)
}
p %>%
layout(
title = "Trend Over Time",
xaxis = list(title = ""),
yaxis = list (title = "Monthly Count of Products Sold")
)
p
This is how the output looks like
My question is how to remove trace 0 and month_considered to remove from the chart even though its not in colnames which I loop through to add the lines.
It looks like you were getting tripped up by two things:
When you initially defined p and included the data and x arguments, a trace was created -- trace 0. You can define a plot without providing any data or x values to start by just using p <- plot_ly() along with any desired layout features.
When you loop through the column names, your x axis column, Month_Considered is part of the set. You can exclude this by using setdiff() (part of base R) to create a vector with all of your column names except for Months_Considered
Putting those two things together, one way (of many possible) to accomplish what you're going for is as follows:
library(plotly)
df <- data.frame(Month_Considered = seq.Date(from = as.Date("2017-01-01"), by = "months", length.out = 12),
pct.x = seq(from = 70, to = 80, length.out = 12),
pct.y = seq(from = 30, to = 40, length.out = 12),
pct = seq(from = 10, to = 20, length.out = 12))
## Define a blank plot with the desired layout (don't add any traces yet)
p <- plot_ly()%>%
layout(title = "Trend Over Time",
xaxis = list(title = ""),
yaxis = list (title = "Monthly Count of Products Sold") )
## Make sure our list of columns to add doesnt include the Month Considered
ToAdd <- setdiff(colnames(df),"Month_Considered")
## Add the traces one at a time
for(i in ToAdd){
p <- p %>% add_trace(x = df[["Month_Considered"]], y = df[[i]], name = i,
type = 'scatter',
mode = 'line+markers',
line = list(color = 'rgb(205, 12, 24)', width = 4))
}
p

R Shiny Reactive dplyr filters to form new dataframe to plot from

I am trying to build a reactive plot that uses dplyr filter functions reactive to user inputs to pull the correct x,y coordinates to plot.
My UI:
choices <- c("Web" = 1,"Huddle" = 3, "Other" = 5, "Test" = 7)
role <- c("Student" = 1, "Not" = 2)
range <- c("2016"=2,"July 2017"=1)
ui <- dashboardPage(
dashboardHeader(title="Membership Satisfaction"),
dashboardSidebar(
sidebarMenu(
menuItem("Value Dashboard", tabName = "dashboard", icon =
icon("dashboard")),
menuItem("Services Dashboard", tabName = "service", icon =
icon("dashboard")),
menuItem("Demographics Dashboard", tabName = "demos", icon =
icon("dashboard"))
)
),
dashboardBody(
tabItems(
tabItem(tabName = "demos",
sidebarPanel(
checkboxGroupInput("inpt","Select variables to plot", choices =
choices),
checkboxGroupInput("role",
"Select Primary Role of Interest",
choices = role),
checkboxGroupInput("yrs",
"Select year(S) of Interest",
choices = range)),
fluidPage(
plotOutput("test")
)))))
and my server:
server <- function(input,output){
x <- reactive({
example1 %>%
dplyr::filter(Product == as.integer(input$inpt))%>%
dplyr::filter(year == as.integer(input$range)) %>%
dplyr::filter(status == as.integer(input$role)) %>%
pull(-2)
})
y <- reactive({
example1 %>%
dplyr::filter(Product == as.integer(input$inpt+1))%>%
dplyr::filter(year == as.integer(input$range)) %>%
dplyr::filter(status == as.integer(input$role)) %>%
pull(-1)
})
z <- reactive({data.frame(x= x(), y = y())})
output$test <- renderPlot({
ggplot(z(), aes(x,y))+
geom_point(colour ="green", shape = 17, size=5 )+
labs(x = "Mean Satisfaction", y = "Mean Importance") +
xlim(0,5) + ylim(0,5)+
geom_vline(xintercept=2.5) + geom_hline(yintercept = 2.5)})
}
shinyApp (ui = ui, server = server)
Notes:
Example1 is a dataframe with summarized averages by group, year, and product.
X and Y variables filter to get a particular product code and stores just the mean values. Y is a corresponding and always product(x+1). It gets the average for y and stores those vales. Variable Z then creates a dataframe of x,y coordinate points to plot.
Example1 data head:
> example1
# A tibble: 24 x 5
# Groups: year, status [12]
year status Product AvgComImpt AvgComSat
<dbl> <dbl> <fct> <dbl> <dbl>
1 1. 1. 1 NaN 2.70
2 1. 1. 2 3.13 NaN
3 1. 2. 1 NaN 2.43
4 1. 2. 2 3.33 NaN
5 1. 3. 1 NaN 2.40
6 1. 3. 2 3.60 NaN
7 1. 5. 1 NaN 3.03
8 1. 5. 2 3.30 NaN
9 1. 7. 1 NaN 3.50
10 1. 7. 2 4.00 NaN
> dput(head(example1))
structure(list(year = c(1, 1, 1, 1, 1, 1), status = c(1, 1, 2,
2, 3, 3), Product = structure(c(1L, 2L, 1L, 2L, 1L, 2L), .Label = c("1",
"2"), class = "factor"), AvgComImpt = c(NaN, 3.12844036697248,
NaN, 3.33333333333333, NaN, 3.6), AvgComSat = c(2.7037037037037,
NaN, 2.42857142857143, NaN, 2.4, NaN)), .Names = c("year", "status",
"Product", "AvgComImpt", "AvgComSat"), row.names = c(NA, -6L), class =
c("grouped_df",
"tbl_df", "tbl", "data.frame"), vars = c("year", "status"), drop = TRUE,
indices = list(
0:1, 2:3, 4:5), group_sizes = c(2L, 2L, 2L), biggest_group_size = 2L, labels
= structure(list(
year = c(1, 1, 1), status = c(1, 2, 3)), row.names = c(NA,
-3L), class = "data.frame", vars = c("year", "status"), drop = TRUE, .Names
= c("year",
"status")))
In theory it's supposed to work like this:
X would get this:
[1] 3.128440 3.333333 3.600000 3.296296 4.000000 4.000000 4.000000 4.234131
4.254562 4.386861 4.090909 4.387218
Y would get this:
[1] 2.703704 2.428571 2.400000 3.034483 3.500000 2.666667 4.000000 3.856167
4.045455 3.825000 3.818182 3.996377
and then my Z would get this:
1 3.128440 2.703704
2 3.333333 2.428571
3 3.600000 2.400000
4 3.296296 3.034483
5 4.000000 3.500000
6 4.000000 2.666667
7 4.000000 4.000000
8 4.234131 3.856167
9 4.254562 4.045455
10 4.386861 3.825000
11 4.090909 3.818182
12 4.387218 3.996377
when i run it however, it says in the app:
"Error: Results must have length 2, not 0."
When i make a selection on the variables to plot it changes to:
"Error: Results must have length 1, not 0."
I'm wondering if it has anything to do with if there's no input selected and the error throws because the filter grabs the user input. But if that was the case, I'd think that making selections in all input areas would at least give a plot for that - but it doesn't. If this is what I'm doing wrong, I have no idea where to start... any help would be appreciated!
Any inputs/ideas on how I can get this code to work? THANKS!!!!

Draw a graph in R with header elaborate on two columns

I have a table with header expanded on two columns. How to draw a 3D graph on this table OR what would be a way to draw a graph on tables having elaborated headers. Kindly suggest me alternate ways to achieve this (if any)
Crime Table:
year
2014 2015 2016
Reported Detected Reported Detected Reported Detected
Murder 221 208 178 172 26 20
Murder(Gain) 20 16 11 9 1 1
Dacoity 51 45 44 36 5 1
Robbery 538 316 351 201 23 10
Chain Snatching 528 394 342 229 23 0
Code:
library(tables)
#CLASS 1 CRIMES 2014
c14 <- structure(list(`Reported` = c(221, 20, 51,
538, 528), `Detected` = c(208, 16, 45, 316, 394)), .Names = c("Reported",
"Detected"), row.names = c("Murder", "Murder(Gain)", "Dacoity", "Robbery", "Chain Snatching"), class = "data.frame")
c14
#CLASS 1 CRIMES 2015
c15 <- structure(list(`Reported` = c(178, 11, 44,
351, 342), `Detected` = c(172, 9,
36, 201, 229)), .Names = c("Reported",
"Detected"), row.names = c("Murder", "Murder(Gain)", "Dacoity",
"Robbery", "Chain Snatching"), class = "data.frame")
c15
#CLASS 1 CRIMES 31-01-2016
c16 <- structure(list(`Reported` = c(26, 1, 5,
23, 23), `Detected` = c(20, 1,
1, 10, 0)), .Names = c("Reported",
"Detected"), row.names = c("Murder", "Murder(Gain)", "Dacoity",
"Robbery", "Chain Snatching"), class = "data.frame")
c16
# rbind with rownames as a column
st <- rbind(
data.frame(c14, year = '2014', what = factor(rownames(c14), levels = rownames(c14)),
row.names= NULL, check.names = FALSE),
data.frame(c15,year = '2015',what = factor(rownames(c15), levels = rownames(c15)),
row.names = NULL,check.names = FALSE),
data.frame(c16,year = '2016',what = factor(rownames(c16), levels = rownames(c16)),
row.names = NULL,check.names = FALSE)
)
crimetable <- tabular(Heading()*what ~ year*(`Reported` +`Detected`)*Heading()*(identity),data=st)
crimetable
As I hate 3D plots for 3-way tables and I like ggplot2, I suggest this:
Gather your data into "long" format:
library(tidyr)
st_long = gather(st, type, count, -c(year, what))
head(st_long, 3)
# year what type count
# 1 2014 Murder Reported 221
# 2 2014 Murder(Gain) Reported 20
# 3 2014 Dacoity Reported 51
As you can see, both Detected and Reported columns are now included in the same column called type. This is useful for ggplot2, as it can easily create facets. Facets are separate elements within the plot that share the same aesthetic components but work with on different groups of data:
library(ggplot2)
ggplot(st_long, aes(year, count, group = what, color = what)) +
geom_line() +
facet_wrap(~ type)
(I am not saying that line plot is the only/best plot here, but it is often used when comparing frequencies across different time-points.)

Resources