plotly R reordering factor based on numeric variable - r

I need to reorder the x-axis categorical variable with a numeric variable so the plot is simpler to follow with plotly, in R.
Here is some dummy data:
agg = structure(list(catvar = c("a", "b", "c", "d", "e", "f", "g",
"h", "i", "j", "k", "l", "m", "n", "o"), v1 = c(1.1, 1.3, 1,
0.8, 1.2, 1.4, 0.7, 2.5, 0.9, 2.5, 1.7, 0.9, 1.7, 1.1, 0.9),
v2 = c(0.1, 0.1, 0.1, 0, 0.1, 0.1, 0, 0.2, 0, 0.2, 0.1, 0.1,
0.1, 0.1, 0.1), v3 = c(7.3, 8.2, 6.4, 6, 7.5, 8.4, 5.8, 12.4,
6.4, 13.1, 9.3, 6.2, 9.4, 7.1, 6.3)), .Names = c("catvar",
"v1", "v2", "v3"), class = "data.frame", row.names = c(NA, -15L
))
Here is what I've been trying out. It seems recently a new feature was added to handle this but it's not working.
# the reordering is handled here:
ax <- list(
type = "category",
categoryorder = "array",
categoryarray = agg$catvar[order(agg[['v1']])],
showgrid = TRUE,
showticklabels = TRUE
)
p <- plot_ly(agg, x =~catvar,
y =~v2, name = 'v2',
type = 'scatter', mode = 'lines') %>%
add_trace(y =~v1, name = 'v1') %>%
add_trace(y =~v3, name = 'v3') %>%
layout(
xaxis = ax,
hovermode = 'x+y')
p
All I need is to re-order the x-axis (catvar) based on any of the numeric varaibles in the data.frame, these are v1, v2, v3.

You need to add to your code a command for reordering your dataset according to v1.
agg2 <- agg[order(agg[['v1']]),]
p <- plot_ly(agg2, x =~catvar,
y =~v2, name = 'v2',
type = 'scatter', mode = 'points') %>%
add_trace(y =~v1, name = 'v1') %>%
add_trace(y =~v3, name = 'v3') %>%
layout(
xaxis = ax,
hovermode = 'x+y')
p

Related

ggpattern change stripe angle for one element

How can I change the angle of the stripe pattern in only one element? For example, I want only the stripe pattern of "V" to be at -30 degrees.
Also, there seems to be an issue with my "NA" values / none pattern and no pattern appears after a NA value.
library(swimplot)
library(ggpattern)
library(tidyverse)
df <- data.frame(
study_id = c(3, 3, 3,3), primary_therapy = c("Si", "Si", "Si", "Si"),
additional_therapy = c("NA", "S", "NA", "V+S"), end_yr = c(0.08, 0.39, 3.03, 3.4)
)
swimmer_plot(
df = df, id = "study_id",
end = "end_yr", name_fill = "primary_therapy",
width = 0.85, color = NA) +
geom_col_pattern(aes(study_id, end_yr,
pattern = additional_therapy), color=NA,
fill = NA,
show.legend=FALSE, width=0.85,
pattern_spacing = 0.01, pattern_fill="black", pattern_color=NA,
pattern_size = 0.5, pattern_density=0.1,
pattern_linetype = 0.5, pattern_orientation="vertical") +
scale_pattern_manual(name="Additional Therapy", values = c("S"="stripe","V"="stripe","V+S"="crosshatch","NA"="none"))
For demonstration purposes, I changed your dataframe so you can see how the levels of additional_therapy get plotted, since your example dataframe didn't include any appearances of the level "V".
To achieve your goal of changing the stripe element for one level of additional_therapy, you need to add the argument pattern_angle back into geom_col_pattern and then add an extra line for scale_pattern_angle_manual() to specify which levels' patterns get set at which angles.
library(swimplot)
library(ggpattern)
library(tidyverse)
df <- data.frame(
study_id = c(3, 3, 3, 3, 3),
primary_therapy = c("Si", "Si", "Si", "Si", "Si"),
additional_therapy = c("NA", "NA", "S", "V", "V+S"),
end_yr = c(0.08, 1.11, 2.11, 3.03, 3.4)
)
# Convert additional_therapy to ordered factor (optional but highly recommended)
# This just determines the order that the items in the Additional Therapy legend appear in
df <- df %>% mutate(additional_therapy = factor(additional_therapy, levels = c("S", "V", "V+S", "NA")))
swimmer_plot(
df = df, id = "study_id",
end = "end_yr", name_fill = "primary_therapy",
width = 0.85, color = NA) +
geom_col_pattern(aes(study_id, end_yr,
pattern = additional_therapy,
pattern_angle = additional_therapy
),
color=NA,
fill = NA,
show.legend=TRUE, # so you can see the legend
width=0.85,
pattern_spacing = 0.01,
pattern_fill="black",
pattern_color=NA,
pattern_size = 0.5,
pattern_density=0.1,
pattern_linetype = 0.5,
pattern_orientation="vertical") +
scale_pattern_manual(name="Additional Therapy", values = c("S"="stripe","V"="stripe","V+S"="crosshatch","NA"="none")) +
scale_pattern_angle_manual(name="Additional Therapy", values = c(30, -30, 30, 30))
Unfortunately, to the best of my knowledge, there is a problem with ggpattern that is causing the issue with the x-axis. I discussed it in another of your questions here. I confirmed that it wasn't an issue with swimplot.

Is it possible to create bubble chart using Echarts4R

I'm trying to create a bubble chart that look like this using e_scatter.
This is what the data looks like and the state I am able to recreate now
data <- data.frame(
group = c("Upper", "Upper", "Upper", "Upper", "Upper", "Upper", "Upper", "Upper", "Lower", "Lower"),
category = c("A", "B", "C", "D", "E", "F", "G", "H", "a", "b"),
size = c(0.74, 0.72, 0.68, 0.67, 0.63, 0.54, 0.53, 0.49, 0.02, 0.02),
sample_x = c(0, 0.2, 0.4, 0.6, 0.8, 0.8, 0.6, 0.4, 0.2, 0),
sample_y = c(1, 2, 3, 2, 1, -1, -2, -3, -2, -1)
)
data |>
group_by(group) |>
e_charts(sample_x) |>
e_scatter(sample_y, size)
Originally the data only have group, category and value columns
Because I don't have x and y in the data, is it possible to recreate the bubble chart using e_scatter?
Or am I using the wrong chart type for this?
(I did try by adding e_polar but I couldn't scale the size of the dot using size column)

Coloured Box Plot With Precomputed Quartiles

I'm trying to colour a r-plotly boxplot with custom values, but it remains in the default blue colour.
For example - see the code in the official tutorial: https://plotly.com/r/box-plots/#box-plot-with-precomputed-quartiles. When I add the code for coloring, nothing happens with the colours:
fig <- plot_ly(y = list(1,2,3,4,5,6,7,8,9), type = "box", q1=list(1, 2, 3), median=list(4, 5, 6),
q3=list(7, 8, 9 ), lowerfence=list(-1, 0, 1),
upperfence=list(5, 6, 7), mean=list(2.2, 2.8, 3.2 ),
sd=list(0.2, 0.4, 0.6), notchspan=list(0.2, 0.4, 0.6),
color = list(1,1,2),
colors = list("red2", "grey"))
fig
What am I doing wrong?
The example has several issues, so I don't know if I understood the aim of the OP correctly. In the following, I removed the dummy ydata and replaced it with ID characters, used vectors instead of lists (except for y) and a formula with ~ for the color mapping:
library("plotly")
fig <- plot_ly(
y = c("A", "B", "C"),
type = "box",
q1 = c(1, 2, 3),
median = c(4, 5, 6),
q3 = c(7, 8, 9 ),
lowerfence = c(-1, 0, 1),
upperfence = c(5, 6, 7),
mean = c(2.2, 2.8, 3.2 ),
sd = c(0.2, 0.4, 0.6),
notchspan = c(0.2, 0.4, 0.6),
color = ~ c("a", "a", "c"),
colors = c("red2", "grey")
)
fig
For the mean and sd values, I left the original dummy values as in the original example.

How do I find best starting values for nlimb optimization?

I am trying to run the following code with the attached dataset. How do I solve the error of hessian matrix inversion?
library(stats4)
library(bbmle)
library(stats)
library(numDeriv)
library('bbmle')
x<- c(1.1, 1.4, 1.3, 1.7,1.9, 1.8, 1.6, 2.2, 1.7, 2.7, 4.1, 1.8, 1.5, 1.2, 1.4, 3, 1.7, 2.3, 1.6, 2.0)
hist(x)
fEHLKUMW<-function(a,b,alpha,vartheta)
{
-sum(log( (2*a*b*alpha*vartheta*(x^(vartheta-1))*(exp(-x^(vartheta)))*((1- (exp(-x^(vartheta))))^(a-1))*((1-((1-((1-(exp(-x^(vartheta))))^a))^b))^(alpha-1)))/((1-((1-(exp(-x^(vartheta))))^a))^(b*(alpha+1)))
))
}
EHLKUMW.result<-mle2(fEHLKUMW,hessian = NULL,start=list(a=0.01,b=0.01,alpha=.3,vartheta=0.01),optimizer="nlminb",lower=0)
summary(EHLKUMW.result)
I get the error as;
**
Warning messages:
1: In nlminb(start = start, objective = objectivefunction, hessian = NULL, :
NA/NaN function evaluation
2: In mle2(fEHLKUML, hessian = NULL, start = list(a = 1, b = 0.4, c = 0.5, :
couldn't invert Hessian
**
This is a very open question I think, so I'll present some tools and an approach to this, and maybe others can comment, etc.
First, the main part:
library(bbmle)
library(stats)
library(numDeriv)
library(bbmle)
x<- c(1.1, 1.4, 1.3, 1.7,1.9, 1.8, 1.6, 2.2, 1.7, 2.7, 4.1, 1.8, 1.5, 1.2, 1.4, 3, 1.7, 2.3, 1.6, 2.0)
hist(x)
fEHLKUMW <- function(a,b,alpha,vartheta) {
-sum(log( (2*a*b*alpha*vartheta*(x^(vartheta-1))*(exp(-x^(vartheta)))*((1- (exp(-x^(vartheta))))^(a-1))*((1-((1-((1-(exp(-x^(vartheta))))^a))^b))^(alpha-1)))/((1-((1-(exp(-x^(vartheta))))^a))^(b*(alpha+1)))
))
}
Now, we can of course run it like you've done:
EHLKUMW.result <- mle2(
fEHLKUMW,
hessian = NULL,
start = list(
a = 0.01,
b = 0.01,
alpha = .3,
vartheta = 0.01
),
optimizer = "nlminb",
lower = 0
)
But we can also run it with a distribution on each of these parameters, to get a new input all the time:
EHLKUMW.result <- mle2(
fEHLKUMW,
hessian = NULL,
start =
list(
# a = 0.01,
# a = rt(1, 10, ncp = 0.01),
a = truncdist::rtrunc(1, spec = "t", a = 0, ncp = 0.01, df = 10),
# b = 0.01,
b = truncdist::rtrunc(1, spec = "t", a = 0, ncp = 0.01, df = 10),
# alpha = .3,
alpha = truncdist::rtrunc(1, spec = "t", a = 0, ncp = 0.3, df = 10),
# vartheta = 0.01
vartheta = truncdist::rtrunc(1, spec = "t", a = 0, ncp = 0.01, df = 10)
),
optimizer = "nlminb",
lower = 0
)
I've chosen to use trundist to get a t-distribution, centralised around
the values you provided, and lower is 0 through the a = -argument.
If you know what the upper limit of these parameters, this can be done with b = -argument.
The output that I think is most relevant are the attained logLik and the coef.
library(tidyverse)
exec(fEHLKUMW, !!!list(
a = 0.01,
b = 0.01,
alpha = .3,
vartheta = 0.01
))
replicate(
250,
exec(fEHLKUMW, !!!list(
# a = 0.01,
# a = rt(1, 10, ncp = 0.01),
a = truncdist::rtrunc(1, spec = "t", a = 0, ncp = 0.01, df = 10),
# b = 0.01,
b = truncdist::rtrunc(1, spec = "t", a = 0, ncp = 0.01, df = 10),
# alpha = .3,
alpha = truncdist::rtrunc(1, spec = "t", a = 0, ncp = 0.3, df = 10),
# vartheta = 0.01
vartheta = truncdist::rtrunc(1, spec = "t", a = 0, ncp = 0.01, df = 10)
)))
I used this to fine-tune the distributions the parameters
The following is a multiple runs and their output compared to
logLik.
tibble(n = seq_len(100),
output = map(n, ~mle2(
fEHLKUMW,
hessian = NULL,
start =
list(
# a = 0.01,
# a = rt(1, 10, ncp = 0.01),
a = truncdist::rtrunc(1, spec = "t", a = 0, ncp = 0.01, df = 10),
# b = 0.01,
b = truncdist::rtrunc(1, spec = "t", a = 0, ncp = 0.01, df = 10),
# alpha = .3,
alpha = truncdist::rtrunc(1, spec = "t", a = 0, ncp = 0.3, df = 10),
# vartheta = 0.01
vartheta = truncdist::rtrunc(1, spec = "t", a = 0, ncp = 0.01, df = 10)
),
optimizer = "nlminb",
lower = 0
))) ->
outputs_df
This code gives a nice print
outputs_df %>%
mutate(coef = output %>% map(coef),
logLik = output %>% map_dbl(logLik)) %>%
unnest_wider(coef) %>%
arrange(logLik) %>%
print(n=Inf)

Error with using unlist, lapply and grepl in data.tables R

This question is an extension of this particular question. I have this particular data.table. I'm using data.table, mc2d, and e1071 libraries
library("data.table")
library("mc2d")
library("e1071")
col <- c("COST","TIME")
dt <- structure(
list(
ID = c("a", "b", "c", "d", "e", "f", "g", "h", "i", "j"),COST_PR_L = c(NA, 0.4, 0.31, 0.4, 0.5, 0.17, 1, 0.5, 0.5, 0.5),COST_PR_U = c(7.5, 2, 2.67, 1.67, 2.4,2, 1.5, 2, 2, 1.67),COST_PO_L = c(NA, 0.33, 0.25, 0.44,0.5, 0.25, 1, 0.5, 0.5, 0.5),COST_PO_U = c(3, 1.43, 3.33,1.8, 2.4, 3.6, 1.45, 2, 1.5, 1.67), TIME_PR_L = c(NA, 0.5,0.4, 0.5, 0.5, NA, 0.67, 0.5, 0.5, 0.5), TIME_PR_U = c(2,2.5, 3, 1.5, 2, NA, 1.5, 2, 1.67, 2), TIME_PO_L = c(NA,0.4, 0.25, 0.56, 0.5, NA, 0.6, 0.5, 0.5, 0.5), TIME_PO_U = c(2,2, 5, 1.67, 2.5, NA, 1.5, 2, 1.67, 2)
),.Names = c("ID","COST_PR_L", "COST_PR_U","COST_PO_L","COST_PO_U","TIME_PR_L","TIME_PR_U","TIME_PO_L","TIME_PO_U"),class = c("data.table","data.frame"),row.names = c(NA,-10L))
When I run this particular operation on it,
dt[, unlist(lapply(col, function(xx) {
y = colnames(dt)[grepl(pattern = xx, x = colnames(dt))]
vars1 = y[grepl(pattern = "PR", x = y)]
vars2 = y[grepl(pattern = "PO", x = y)]
mn = get(vars1[1])
mx = get(vars1[2])
sk1 = ifelse(mn !=0 && mx !=0,skewness(rpert(1000, min = mn , mode = 1, max= mx )),-1)
mn = get(vars2[1])
mx = get(vars2[2])
sk2 = ifelse(mn !=0 && mx !=0,skewness(rpert(1000, min = mn , mode = 1, max= mx )),-1)
return(list(sk1, sk2))
}), recursive = FALSE)
, by = "ID"]
I get the following error
Error in [.data.table(dt, , unlist(lapply(col, function(xx) { :
Column 1 of result for group 2 is type 'double' but expecting type
'logical'. Column types must be consistent for each group.
However, If I remove the unlist in the code, It seems to calculate the answer. What is unlist doing that is messing it up?

Resources