I have a data frame which I am trying to plot using plotly as multiple line chart.Below is how the dataframe looks like:
Month_considered pct.x pct.y pct
<fct> <dbl> <dbl> <dbl>
1 Apr-17 79.0 18.4 2.61
2 May-17 78.9 18.1 2.99
3 Jun-17 77.9 18.7 3.42
4 Jul-17 77.6 18.5 3.84
5 Aug-17 78.0 18.3 3.70
6 Sep-17 78.0 18.9 3.16
7 Oct-17 77.6 18.9 3.49
8 Nov-17 77.6 18.4 4.01
9 Dec-17 78.5 18.0 3.46
10 Jan-18 79.3 18.4 2.31
11 2/1/18 78.9 19.6 1.48
When I iterate through to plot multiple lines below is the code used.
colNames <- colnames(delta)
p <-
plot_ly(
atc_seg_master,
x = ~ Month_considered,
type = 'scatter',
mode = 'line+markers',
line = list(color = 'rgb(205, 12, 24)', width = 4)
)
for (trace in colNames) {
p <-
p %>% plotly::add_trace(y = as.formula(paste0("~`", trace, "`")), name = trace)
}
p %>%
layout(
title = "Trend Over Time",
xaxis = list(title = ""),
yaxis = list (title = "Monthly Count of Products Sold")
)
p
This is how the output looks like
My question is how to remove trace 0 and month_considered to remove from the chart even though its not in colnames which I loop through to add the lines.
It looks like you were getting tripped up by two things:
When you initially defined p and included the data and x arguments, a trace was created -- trace 0. You can define a plot without providing any data or x values to start by just using p <- plot_ly() along with any desired layout features.
When you loop through the column names, your x axis column, Month_Considered is part of the set. You can exclude this by using setdiff() (part of base R) to create a vector with all of your column names except for Months_Considered
Putting those two things together, one way (of many possible) to accomplish what you're going for is as follows:
library(plotly)
df <- data.frame(Month_Considered = seq.Date(from = as.Date("2017-01-01"), by = "months", length.out = 12),
pct.x = seq(from = 70, to = 80, length.out = 12),
pct.y = seq(from = 30, to = 40, length.out = 12),
pct = seq(from = 10, to = 20, length.out = 12))
## Define a blank plot with the desired layout (don't add any traces yet)
p <- plot_ly()%>%
layout(title = "Trend Over Time",
xaxis = list(title = ""),
yaxis = list (title = "Monthly Count of Products Sold") )
## Make sure our list of columns to add doesnt include the Month Considered
ToAdd <- setdiff(colnames(df),"Month_Considered")
## Add the traces one at a time
for(i in ToAdd){
p <- p %>% add_trace(x = df[["Month_Considered"]], y = df[[i]], name = i,
type = 'scatter',
mode = 'line+markers',
line = list(color = 'rgb(205, 12, 24)', width = 4))
}
p
Related
I am trying to make a shiny app where you can select different miRNA in my input then plot the survival curve using ggsurvplot. There is something wrong with the functions within fitSurv, but I am not sure where I am doing it wrong.
library(dplyr)
require(survminer)
library(tidyverse)
require(reshape2)
library(shiny)
library(tidyr)
require(survival)
example data:
df.miRNA.cpm <- structure(list(`86` = c(5.57979757386892, 17.0240095264258, 4.28380151026145,
13.0457611762755, 12.5531123449841), `175` = c(5.21619202802748,
15.2849097474841, 2.46719979911461, 10.879496005461, 9.66416497290915
), `217` = c(5.42796072966512, 17.1413407297933, 5.15230233060323,
12.2646127361351, 12.1031024927547), `394` = c(-1.1390337316217,
15.1021660424984, 4.63168157763046, 11.1299079134792, 9.55572588729967
), `444` = c(5.06134249676025, 14.5442494311861, -0.399445049232868,
7.45775961504073, 9.92629675808998)), row.names = c("hsa_let_7a_3p",
"hsa_let_7a_5p", "hsa_let_7b_3p", "hsa_let_7b_5p", "hsa_let_7c_5p"
), class = "data.frame")
df.miRNA.cpm$miRNA <- rownames(df.miRNA.cpm)
ss.survival.shiny.miRNA.miRNA <- structure(list(ID = c("86", "175", "217", "394", "444"), TimeDiff = c(71.0416666666667,
601.958333333333, 1130, 1393, 117.041666666667), Status = c(1L,
1L, 0L, 0L, 1L)), row.names = c(NA, 5L), class = "data.frame")
Joint the two example data frames:
data_prep.miRNA <- df.miRNA.cpm %>%
tidyr::pivot_longer(-miRNA, names_to = "ID") %>%
left_join(ss.survival.shiny.miRNA.miRNA)
Example of the joined data:
> data_prep.miRNA
# A tibble: 153,033 x 5
miRNA ID value TimeDiff Status
<chr> <chr> <dbl> <dbl> <int>
1 hsa_let_7a_3p 86 5.58 71.0 1
2 hsa_let_7a_3p 175 5.22 602. 1
3 hsa_let_7a_3p 217 5.43 1130 0
4 hsa_let_7a_3p 394 -1.14 1393 0
5 hsa_let_7a_3p 444 5.06 117. 1
6 hsa_let_7a_3p 618 4.37 1508 0
7 hsa_let_7a_3p 640 2.46 1409 0
8 hsa_let_7a_3p 829 0.435 919. 0
9 hsa_let_7a_3p 851 -1.36 976. 0
10 hsa_let_7a_3p 998 3.87 1196. 0
# … with 153,023 more rows
For a selected MicroRNA this works:
fitSurv <- survfit(Surv(data$TimeDiff, data$Status) ~ paste(cut(value , quantile(value , probs = c(0, 0.8)), include.lowest=T)), data = data_prep.miRNA[grep("hsa_let_7a_3p",data_prep.miRNA$miRNA),])
Shiny:
ui.miRNA <- fluidPage(
selectInput("MicroRNA", "miRNA", choices = unique(data_prep.miRNA$miRNA)),
plotOutput("myplot"))
server <- function(input, output, session) {
data_selected <- reactive({
filter(data_prep.miRNA, miRNA %in% input$MicroRNA)
})
output$myplot <- renderPlot({
fitSurv <- survfit(Surv("TimeDiff", "Status") ~ paste(cut("value" , quantile("value" , probs = c(0, 0.8)), include.lowest=T)), data = data_selected)
ggsurvplot(fitSurv ,title="", xlab="Time (Yrs)", ylab="Survival prbability",
font.main = 8,
font.x = 8,
font.y = 8,
font.tickslab = 8,
font.legend=8,
pval.size = 3,
pval.coord = c(1000,1),
size=0.4,
legend = "right",
censor.size=2,
break.time.by = 365,
pval =T,#"p=0.003",#"p=0.41",
#xscale=365,
#palette = c("#E7B800", "#2E9FDF"),
#ggtheme = theme_bw(),
risk.table = F,
xscale=365.25,
xlim=c(0,7*365))
})
}
shinyApp(ui.miRNA, server)
There are several mistakes in this statement:
fitSurv <-
survfit(Surv("TimeDiff", "Status") ~ paste(cut("value", quantile("value", probs = c(0, 0.8)), include.lowest=T)),
data = data_selected)
First, data_selected is a reactive conductor, not a dataframe. If you want the dataframe returned by this reactive conductor, you have to use parentheses: data_selected().
Next, you must not quote the variables: TimeDiff and not "TimeDiff", etc.
The paste command is useless.
Your cut produces only one category and the NA category. To get two intervals as categories, use probs = c(0, 0.8, 1) in quantile.
Finally it is not a good idea to use T for TRUE, because T can be set to any R object, while TRUE is a reserved work.
To conclude, here is the corrected code:
fitSurv <-
survfit(Surv(TimeDiff, Status) ~ cut(value, quantile(value, probs = c(0, 0.8, 1)), include.lowest=TRUE),
data = data_selected())
This question already has answers here:
Format axis tick labels to percentage in plotly
(2 answers)
Closed 2 years ago.
I have a df which can have 2 or more columns with the first one month always fixed.I am trying to plot them using plotly r. As of now it has three columns: month,apple,orange. Based on analysis it can have another column banana. Below is the code I am using right now but it even takes the column month for y-axis. How do I fix this:
> sample_test
month apple orange
2 Aug-17 2 1
3 Dec-17 2 1
4 Feb-18 2 1
5 Jan-18 2 1
6 Jul-17 2 1
7 Jun-17 2 1
8 May-17 2 1
9 Nov-17 2 1
10 Oct-17 2 1
11 Sep-17 2 1
p<- plot_ly(sample_test, x = sample_test$month, name = 'alpha', type = 'scatter', mode = 'lines',
line = list(color = 'rgb(24, 205, 12)', width = 4)) %>%
layout(#title = "abbb",
xaxis = list(title = "Time"),
yaxis = list (title = "Percentage"))
for(trace in colnames(sample_test)){
p <- p %>% plotly::add_trace(y = as.formula(paste0("~`", trace, "`")), name = trace)
}
p
The output looks like this :
Does this help?
sample_test <- read.table(
text = ' month apple orange
2 Aug-17 2 1
3 Dec-17 2 1
4 Feb-18 2 1
5 Jan-18 2 1
6 Jul-17 2 1
7 Jun-17 2 1
8 May-17 2 1
9 Nov-17 2 1
10 Oct-17 2 1
11 Sep-17 2 1'
)
sample_test$month <- as.Date(paste('01', sample_test$month, sep = '-'), format = '%d-%b-%y')
library(plotly)
p <- plot_ly(sample_test, type = 'scatter', mode = 'lines',
line = list(color = 'rgb(24, 205, 12)', width = 4)) %>%
layout(#title = "abbb",
xaxis = list(title = "Time"),
yaxis = list (title = "Percentage", tickformat = '%'))
for(trace in colnames(sample_test)[2:ncol(sample_test)]){
p <- p %>% plotly::add_trace(x = sample_test[['month']], y = sample_test[[trace]], name = trace)
}
p
There are couple of things to note here -
While dealing with dates, it's best to format them as dates. This can save a lot of headache later on. It is also useful as most if not all functions that require dealing with dates have methods built to handle them.
While adding traces in a for loop, always reference the vector to be plotted explicitly like data$vector or data[['vector']] and not like y = ~vector, because plotly for some reason ends up plotting just one trace over and over again.
You can specify a trace for the first y element, which will give you your raw counts. Next you can add a format for your y-axis using tickformat, which will convert to percentages.
sample_test <- data.frame(month = c("Aug-17", "Dec-17", "Feb-18"), apple = c(2,2,2), orange = c(1,1,1))
p <- plot_ly(sample_test, x = sample_test$month, y = ~apple, name = 'alpha', type = 'scatter', mode = 'lines',
line = list(color = 'rgb(24, 205, 12)', width = 4)) %>%
layout(xaxis = list(title = "Time")) %>%
layout(yaxis = list(tickformat = "%", title = "Percentage"))
Although for some reason this appears to just multiply by 100 and add a % label for some reason, rather than actually calculate a percentage. From this SO answer, looks like that's all it does. I don't really use plotly, but in ggplot you can do this if you reshape your data to long and map your categorical variable (in this case fruit) as a percent.
Edit: per OP's comment, removed month from being traced.
p <- plot_ly(type = 'scatter', mode = 'lines') %>%
layout(yaxis = list(tickformat = "%", title = "Percentage"))
colNames <- names(sample_test)
colNames <- colNames[-which(colNames == 'month')]
for(trace in colNames){
p <- p %>% plotly::add_trace(data = sample_test, x = ~ month, y = as.formula(paste0("~`", trace, "`")), name = trace)
print(paste0("~`", trace, "`"))
}
p
I'm having trouble with plotly in R and 'parcoords'. I'm trying to plot using colorscale defined by Persona. Persona has values of 1 through 4 and I expect each number to have it's own color. The plot scales fine but there are no lines representing the values for each variable.
Here is the code
options(viewer=NULL)
p <- df %>%
plot_ly(type = 'parcoords',
line = list(color = ~Persona,
colorscale = list(c(0,'red'),c(0.5,'green'),c(1,'blue'),c(1.5,'yellow'))) ,
dimensions = list(
list(range = c(15,55),
label = 'Rescuer Count', values = ~RescuerCount),
list(range = c(15,50),
label = 'Rescuer Share', values = ~RescuerShare),
list(range = c(5,95),
label = 'Avg Serviced Zip Codes', values = ~AvgServZips),
list(range = c(10,925),
label = 'Avg Number of Rescues', values = ~ AAvgNumofRescues),
list(range = c(310,16000),
label = 'Avg Rescuer Earnings', values = ~ AAEarnings),
list(range = c(1,55),
label = 'Persona Share of Earnings', values = ~ EarnShare),
list(range = c(30,95),
label = ' Login Percentage', values = ~ LoginPrct),
list(range = c(7,95),
label = 'Prct of Login Days W/Offer', values = ~ PrctLoginDaysWO),
list(range = c(1,5),
label = 'Avg Acceptance Rate', values = ~ AvgAcceptRate),
list(range = c(150,1975),
label = 'Annualized Number of Offers', values = ~ ANumofOffers)
)
)
print(p)
Data Table is here
Persona RescuerCount RescuerShare AvgServZips AAvgNumofRescues AAEarnings EarnShare LoginPrct PrctLoginDaysWO AvgAcceptRate ANumofOffers
1 16 15 45 389 6706 27 71 91 30 1314
2 13 15 90 915 15805 51 91 94 47 1954
3 30 27 28 147 2429 18 55 86 22 679
4 51 46 6 20 319 4 34 75 13 152
resulting plot
Please Help
I would like to create a Plotly graph in R that is colored green when it is positive and red when it is negative.
I attempted to do this using two separate traces producing the fist plot below which is discontinuous. I then attempted to create a colored trace using the color column which I created by the code below. These are the only implementations that I can think of using the current version of plotly.
> str(results)
'data.frame': 804 obs. of 7 variables:
$ date : Date, format: "2014-03-06" "2014-03-07" "2014-03-10" ...
$ 5yr : num 32.9 32.5 32.9 32.8 32.8 ...
$ 3y5 : num 32.4 32.1 32.5 32.4 32.4 ...
$ spread: num -0.488 -0.431 -0.438 -0.388 -0.452 ...
$ pos : num NA NA NA NA NA NA NA NA NA NA ...
$ neg : num -0.488 -0.431 -0.438 -0.388 -0.452 ...
$ color : chr "red" "red" "red" "red" ...
results$spread <- results[,3] - results[,2]
results$neg <- ifelse(results$spread < 0 , results$spread, NA)
results$pos <- ifelse(results$spread >= 0 , results$spread, NA)
plot_ly(results,
x = ~dates,
y = ~pos,
type = 'scatter',
mode = 'lines',
line = list(color = 'green')) %>%
add_trace(results,
x = ~dates,
y = ~neg,
type = 'scatter',
mode = 'lines',
line = list(color = 'red')) %>%
layout(xaxis = list(title = 'Date'),
yaxis = list(title = 'Price'))
plot_ly(results,
x = ~dates,
y = ~spread,
type = 'scatter',
mode = 'lines',
color = ~color) %>%
layout(xaxis = list(title = 'Date'),
yaxis = list(title = 'Price'))
This was an interesting one. But after a while I realized you can get what you want by inserting a zero value at every zero crossing of your plot:
I think the code is self-explanatory (with the comments)
Here is the code - (with some faked data):
library(plotly)
#fake up some data
set.seed(123)
n <- 100
sdate <- as.Date("2014-03-06")
dt <- seq.Date(sdate,by="days",length.out=n)
results <- data.frame(dates=dt,v1=rnorm(n,32.6,0.2),v2=rnorm(n,32.6,0.2))
results$spread <- results[,3] - results[,2]
# find all the zero crossings
spd <- results$spread
lagspd <- c(spd[1],spd[1:(length(spd)-1)])
crs <- sign(spd)!=sign(lagspd)
results$crs <- crs
# now insert a zero row where there is a crossing
insertZeroRow <- function(df,i){
n <- nrow(df)
ndf1 <- df[1:i,] # note these overlap by 1
ndf2 <- df[i:n,] # that is the row we insert
ndf1$spread[i] <- 0
ndf <- rbind(ndf1,ndf2)
}
i <- 1
while(i<nrow(results)){
if (results$crs[i]){
results <- insertZeroRow(results,i)
i <- i+1
}
i <- i+1
}
# plot it now
results$neg <- ifelse(results$spread <= 0 , results$spread, NA)
results$pos <- ifelse(results$spread >= 0 , results$spread, NA)
plot_ly(results,
x = ~dates,
y = ~pos,
type = 'scatter',
mode = 'lines',
line = list(color = 'green')) %>%
add_trace(results,
x = ~dates,
y = ~neg,
type = 'scatter',
mode = 'lines',
line = list(color = 'red')) %>%
layout(xaxis = list(title = 'Date'),
yaxis = list(title = 'Price'))
And here is the result:
Note you could make it better by interpolating the dates and spread value to get the correct x-axis crossing point, but I think it would not make a huge difference in most cases. If you did that you would need a date type that can represent hours of the day too (like as.POSIXct), in order to be able to specify the correct x-axis value.
Update:
Just to clear up any confusion, adding the zero rows are necessary. If you comment out the insertZeroRow call, you get this:
basically you can change your first implementation in this part of code:
results$spread <- results[,3] - results[,2]
results$neg <- ifelse(results$spread < 0 , results$spread, NA)
results$pos <- ifelse(results$spread >= 0 , results$spread, NA)
adding = in the second line of code:
results$spread <- results[,3] - results[,2]
results$neg <- ifelse(results$spread <= 0 , results$spread, NA)
results$pos <- ifelse(results$spread >= 0 , results$spread, NA)
try, it should work removing the discontinuities
I am trying to use r plotly to plot a chart that has following features:
Date objects as X-variable
2 line plots in one charts with 2 Y-axis: one on the left, the other on the right
Date Amount1 Amount2
2/1/2017 19251130 21698.94
2/2/2017 26429396 10687.37
2/5/2017 669252 0.00
2/6/2017 25944054 11885.10
2/7/2017 27895562 14570.39
2/8/2017 20842279 20080.56
2/9/2017 25485527 9570.51
2/10/2017 17008478 14847.49
2/11/2017 172562 0.00
2/12/2017 379397 900.00
2/13/2017 25362794 18390.80
2/14/2017 26740881 11490.94
2/15/2017 20539413 22358.26
2/16/2017 22589808 12450.45
2/17/2017 18290862 3023.45
2/19/2017 1047087 775.00
2/20/2017 4159070 4100.00
2/21/2017 28488401 22750.35
and the code I use is:
ay <- list(
#tickfont = list(color = "red"),
overlaying = "y",
side = "right"
)
p <- plot_ly() %>%
add_lines(x = df$Date, y = df$Amount1, name = "Amount1",type = "scatter", mode = "lines") %>%
add_lines(x = df$Date, y = df$Amount2, name = "Amount2", yaxis = "y2",type = "scatter", mode = "lines") %>%
layout(
title = "Chart Summary", yaxis2 = ay,
xaxis = list(title="Date")
)
The output chart looks fine but the date intervals on the X-axis is looking bad. I am wondering what is the solution to this, and if I want to have 2 histograms in one chart using the data above, what is the optimal way to do it?
Thank you for help!
Is your Date column a string or date?
If it is a string, convert it to date and let Plotly take care of it.
df$Date <- as.Date(df$Date , "%m/%d/%Y")
Full code
library('plotly')
txt <- "Date Amount1 Amount2
2/1/2017 19251130 21698.94
2/2/2017 26429396 10687.37
2/5/2017 669252 0
2/6/2017 25944054 11885.1
2/7/2017 27895562 14570.39
2/8/2017 20842279 20080.56
2/9/2017 25485527 9570.51
2/10/2017 17008478 14847.49
2/11/2017 172562 0
2/12/2017 379397 900
2/13/2017 25362794 18390.8
2/14/2017 26740881 11490.94
2/15/2017 20539413 22358.26
2/16/2017 22589808 12450.45
2/17/2017 18290862 3023.45
2/19/2017 1047087 775
2/20/2017 4159070 4100
2/21/2017 28488401 22750.35"
df$Date <- as.Date(df$Date , "%m/%d/%Y")
ay <- list(
#tickfont = list(color = "red"),
overlaying = "y",
side = "right"
)
p <- plot_ly() %>%
add_lines(x = df$Date, y = df$Amount1, name = "Amount1",type = "scatter", mode = "lines") %>%
add_lines(x = df$Date, y = df$Amount2, name = "Amount2", yaxis = "y2",type = "scatter", mode = "lines") %>%
layout(
title = "Chart Summary", yaxis2 = ay,
xaxis = list(title="Date", ticks=df$Date)
)
p