I have a probability plot with point-wise confidence intervals fitted to the data. Using the graph I want to compute or locate the 0.001 quantile from the confidence bands. I used the function locator () to find the location of the point. Is there any other method that can be used to find the x-value given the y-value on a graph?
The code I used is as follows.
times <- c (17.88, 28.92, 33.00, 41.52, 42.12, 45.60, 48.40,
51.84, 51.96, 54.12, 55.56, 67.80, 68.64, 68.64,
68.88, 84.12, 93.12, 98.64, 105.12, 105.84, 127.92,
128.04,173.40)
N <- length (times)
t <- c (5, 10, 15)
rank.times <- rank (times) # Use average ranks for the tied observations
ecdf.times <- (rank.times - 0.5) / N
quant.ecdf <- log (-log (1 - ecdf.times))
weibull.ml <- suppressWarnings (fitdist (times, "weibull"))
weibull.cdf <- pweibull (times, shape = weibull.ml$estimate[1],
scale = weibull.ml$estimate[2])
wei <- log (-log (1 - weibull.cdf))
wei.extra <- approxExtrap (log (times), wei, log (t), method = "linear")
quant.wei <- c (wei.extra$y, wei)
set.seed (123)
B <- 2000
wei.boot <- suppressWarnings (bootdist (weibull.ml, bootmethod = "param", niter = B))
boot.cdf.we <- matrix (NA, nrow = B, ncol = N)
for (i in 1:B){
boot.cdf.we[i, ] <- pweibull (times, shape = wei.boot$estim$shape[i],
scale = wei.boot$estim$scale[i])
}
p <- 0.025
upper.wei <- NULL
lower.wei <- NULL
for (i in 1:N) {
upper.wei[i] <- log (-log (1 - quantile (boot.cdf.we[,i], probs = p)))
lower.wei[i] <- log (-log (1 - quantile (boot.cdf.we[,i], probs = 1-p)))
}
extra.wei.l <- approxExtrap (log (times), lower.wei, log (t), method = "linear")
lower.weibull <- c (extra.wei.l$y, lower.wei)
extra.wei.u <- approxExtrap (log (times), upper.wei, log (t), method = "linear")
upper.weibull <- c (extra.wei.u$y, upper.wei)
times.ext <- c (t, times)
loc1 <- c (.001, .005, .02, .05, .1, .2, .4, .6, .8, .9, .95, .98, .995)
loc2 <- log (-log (1 - loc1))
loc3 <- c (5, 9.77, 20, 50, 100, 200)
plot (times, quant.ecdf, log = "x", axes = FALSE, xlab = "Millions of cycles",
ylab = "Proportion failing", pch = 16, type = "p", main = "Weibull - Complete",
xlim = c (5, 200), ylim = c (-6.95, 1.7))
lines (times.ext, quant.wei)
lines (times.ext, upper.weibull)
lines (times.ext, lower.weibull)
abline (h = loc2[1])
segments (9.77789, -7.5, 9.77789, loc2[1])
axis (1, at = loc3, labels = loc3)
axis (2, at = loc2, labels = loc1, las = 2)
Thanks in advance!!
Related
I want to plot function res for different value of epsilon=0.1, 0.2,0.3,0.9 in the same plot in R.
My setting is that
#make this example reproducible
set.seed(1001)
n <- 500
#Sample GOE random matrix
A <- matrix(rnorm(n*n, mean=0, sd=1), n, n)
G <- (A + t(A))/sqrt(2*n)
ev <- eigen(G)
l <- ev$values
v <- ev$vectors
#size of multivariate distribution
mean <- rep(0, n)
var <- diag(n)
#simulate bivariate normal distribution
initial <- MASS::mvrnorm(n=1000, mu=mean, Sigma=var) #ten random vectors
#normalized the first possible initial value, the initial data uniformly distributed on the sphere
xmats <- lapply(1:1000, function(i) initial[i, ]/norm(initial[i, ], type="2"))
#define my function
h1t <- function(t,x_0) {
h10 <- c(x_0 %*% v[, n])
denom <- vapply(t, function(.t) {
sum((x_0 %*% v)^2 * exp(-4*(l - l[n]) * .t))
}, numeric(1L))
abs(h10) / sqrt(denom)
}
For 1000 initial value x_0 from normal distribution (I put them in a matrix xmats), I can plot all value of t so that h1t=epsilon for epsilon=0.9.
#set epsilon=0.9
find_t <- function(x, epsilon = 0.9, range = c(-500, 500)) {
uniroot(function(t) h1t(t, x) - epsilon, range,
tol = .Machine$double.eps)$root
}
res <- lapply(xmats, find_t)
plot(density(unlist(res)), xlim = c(0, 300),col = "red",
main = "Fix epsilon=0.9. Density of tau_epsilon for different initial value for n=500")
I got:
Question: How do I plot the graph of res for different epsilon in the same plot?
You can do this by applying your function to the parameter. I called the result find_t03 and call lines after the plot call with this new result.
I have added ylim to the plot and also added a break in the title.
#set epsilon=0.9
find_t <- function(x, epsilon = 0.9, range = c(-500, 500)) {
uniroot(function(t) h1t(t, x) - epsilon, range,
tol = .Machine$double.eps)$root
}
#set epsilon=0.3
find_t03 <- function(x, epsilon = 0.3, range = c(-500, 500)) {
uniroot(function(t) h1t(t, x) - epsilon, range,
tol = .Machine$double.eps)$root
}
res <- lapply(xmats, find_t)
res03 <- lapply(xmats, find_t03)
plot(density(unlist(res)), xlim = c(0, 200),
ylim=c(0, 0.2),col = "red",
main = paste0("Fix epsilon=0.9 (red) and 0.3 (black).",
"\n", "Density of tau_epsilon for different initial value for n=500"))
lines(density(unlist(res03)), ylim = c(0, 1000))
I am running Fuzzy C-Means Clustering using e1071 package. I want to decide the optimum number of clusters based on fuzzy performance index (FPI) (extent of fuzziness) and normalized classification entropy (NCE) (degree of disorganization of specific class) given in the following formula
where c is the number of clusters and n is the number of observations, μik is the fuzzy membership and loga is the natural logarithm.
I am using the following code
library(e1071)
x <- rbind(matrix(rnorm(100,sd=0.3),ncol=2),
matrix(rnorm(100,mean=1,sd=0.3),ncol=2))
cl <- cmeans(x,2,20,verbose=TRUE,method="cmeans")
cl$membership
I have been able to extract the μik i.e. fuzzy membership. Now, cmeans has to for different number of clusters e.g. 2 to 6 and the FPI and NCE has to be calculated to have a plot like the following
How can it be achieved in R?
Edit
I have tried the code provided by #nya for iris dataset using the following code
df <- scale(iris[-5])
FPI <- function(cmem){
c <- ncol(cmem)
n <- nrow(cmem)
1 - (c / (c - 1)) * (1 - sum(cmem^2) / n)
}
NCE <- function(cmem){
c <- ncol(cmem)
n <- nrow(cmem)
(n / (n - c)) * (- sum(cmem * log(cmem)) / n)
}
# prepare variables
cl <- list()
fpi <- nce <- NULL
# cycle through the desired number of clusters
for(i in 2:6){
cl[[i]] <- cmeans(df, i, 20, method = "cmeans")
fpi <- c(fpi, FPI(cl[[i]]$membership))
nce <- c(nce, NCE(cl[[i]]$membership))
}
# add space for the second axis label
par(mar = c(5,4,1,4) + .1)
# plot FPI
plot(2:6, fpi, lty = 2, pch = 18, type = "b", xlab = "Number of clusters", ylab = "FPI")
# plot NCE, manually adding the second axis
par(new = TRUE)
plot(2:6, nce, lty = 1, pch = 15, type = "b", xlab = "", ylab = "", axes = FALSE)
axis(4, at = pretty(range(nce)))
mtext("NCE", side = 4, line = 3)
# add legend
legend("top", legend = c("FPI", "NCE"), pch = c(18,15), lty = c(2,1), horiz = TRUE)
The minimum values of fuzzy performance index(FPI) and normalized classification entropy (NCE) were considered to decide the optimum number of clusters. NCE is always increasing and FPI is showing the decreasing value. Ideally it should have been
With available equations, we can program our own functions. Here, the two functions use equations present in the paper you suggested and one of the references the authors cite.
FPI <- function(cmem, method = c("FuzME", "McBrathney", "Rahul")){
method = match.arg(method)
C <- ncol(cmem)
N <- nrow(cmem)
# Rahul et al. 2019. https://doi.org/10.1080/03650340.2019.1578345
if(method == "Rahul"){
res <- 1 - (C / (C - 1)) * (1 - sum(cmem^2) / N)
}
# McBrathney & Moore 1985 https://doi.org/10.1016/0168-1923(85)90082-6
if(method == "McBrathney"){
F <- sum(cmem^2) / N
res <- 1 - (C * F - 1) / (F - 1)
}
# FuzME https://precision-agriculture.sydney.edu.au/resources/software/
# MATLAB code file fvalidity.m, downloaded on 11 Nov, 2021
if(method == "FuzME"){
F <- sum(cmem^2) / N
res <- 1 - (C * F - 1) / (C - 1)
}
return(res)
}
NCE <- function(cmem, method = c("FuzME", "McBrathney", "Rahul")){
method = match.arg(method)
C <- ncol(cmem)
N <- nrow(cmem)
if(method == "Rahul"){
res <- (N / (N - C)) * (- sum(cmem * log(cmem)) / N)
}
if(method %in% c("FuzME", "McBrathney")){
H <- -1 / N * sum(cmem * log(cmem))
res <- H / log(C)
}
return(res)
}
Then use those to calculate the indices from the degrees of membership from the cmeans function from the iris dataset.
# prepare variables
cl <- list()
fpi <- nce <- NULL
# cycle through the desired number of clusters
for(i in 2:6){
cl[[i]] <- e1071::cmeans(iris[, -5], i, 20, method = "cmeans")
fpi <- c(fpi, FPI(cl[[i]]$membership, method = "M"))
nce <- c(nce, NCE(cl[[i]]$membership, method = "M"))
}
Last, plot with two different axes in one plot.
# add space for the second axis label
par(mar = c(5,4,1,4) + .1)
# plot FPI
plot(2:6, fpi, lty = 2, pch = 18, type = "b", xlab = "Number of clusters", ylab = "FPI")
# plot NCE, manually adding the second axis
par(new = TRUE)
plot(2:6, nce, lty = 1, pch = 15, type = "b", xlab = "", ylab = "", axes = FALSE)
axis(4, at = pretty(range(nce)))
mtext("NCE", side = 4, line = 3)
# add legend
legend("top", legend = c("FPI", "NCE"), pch = c(18,15), lty = c(2,1), horiz = TRUE)
EDIT1: Updated the functions according to optional equations from two different publications and calculated the example on the iris dataset.
EDIT2: Added code for the FPI and NCE calculations specified in the FuzME MATLAB code available here.
Hope this could help
library(dplyr)
library(ggplot2)
f <- function(cl) {
C <- length(cl$size)
N <- sum(cl$size)
mu <- cl$membership
fpi <- 1 - C / (C - 1) * (1 - sum((mu)^2) / N)
nce <- N / (N - C) * (-sum(log(mu) * mu) / N)
c(FPI = fpi, NCE = nce)
}
data.frame(t(rbind(
K = 2:6,
sapply(
K,
function(k) f(cmeans(x, k, 20, verbose = TRUE, method = "cmeans"))
)
))) %>%
pivot_longer(cols = FPI:NCE, names_to = "Index") %>%
ggplot(aes(x = K, y = value, group = Index)) +
geom_line(aes(linetype = Index, color = Index)) +
geom_point() +
scale_y_continuous(
name = "FPI",
sec.axis = sec_axis(~., name = "NCE")
) +
theme(legend.position = "top")
I am trying to mimic the graphpad ELISA analysis using R, however I am having a bit of difficulty getting a P-Value and an R^2 value.
I have followed the tutorial: http://weightinginbayesianmodels.github.io/poctcalibration/calib_tut4_curve_ocon.html#unweighted-nonlinear-regression-in-r
It got me a majority of the information needed using a package called "minpack.lm", however I am not sure how to approach getting the R^2 and P value from here.
ODCalc1 <- c(.007, .072, .328, .988, 1.534, 1.983)
ODCalc2 <- c(.006, .074, .361, .858, 1.612, 1.993)
ODCalc <- (ODCalc1 + ODCalc2)/2
concentration <- log10(c(1, 36, 180, 540, 1080, 1800))
ocon <- data.frame(10^(concentration), "rep", ODCalc, stringsAsFactors = F)
ocon$X.rep. <- as.numeric(ocon$X.rep.)
ocon$X.rep. <- 1
names(ocon) <- c("conc", "rep", "od")
# Plot the O'Connell data
par(mfrow = c(1, 2), cex.main = 1, mar = c(4, 4, 1, 2), oma = c(0.5, 0.5, 2.5, 0))
plot(ocon$conc, ocon$od, pch = 21, bg = "grey", ylab = "Response (od)",
xlab = "Concentration")
grid()
# Plot on the log(x) scale
plot(log(ocon$conc), ocon$od, pch = 21, bg = "grey", ylab = "Response (od)",
xlab = "log(concentration)")
grid()
title("O'Connell's ELISA: concentration on absolute (left) and log (right) scales",
outer = T)
par(mfrow = c(1, 1))
# ------------ Function: 4PL curve function ---------------------------------
M.4pl <- function(x, small.x.asymp, inf.x.asymp, inflec, hill){
f <- small.x.asymp + ((inf.x.asymp - small.x.asymp)/
(1 + (x / inflec)^hill))
return(f)
}
# ------------- end ---------------------------------------------------------
start.ocon <- c(small.x.asymp = 0.1, inf.x.asymp = 1, inflec = 3000, hill = -1)
library(minpack.lm)
uw.4pl <- nlsLM(od ~ M.4pl(conc, small.x.asymp, inf.x.asymp, inflec, hill),
data = ocon,
start = start.ocon)
data.4pl <- summary(uw.4pl)
bottom.4pl <- data.4pl$parameters[1,1]
top.4pl <- data.4pl$parameters[2,1]
IC50.4pl <- data.4pl$parameters[3,1]
HillSlope.4pl <- abs(data.4pl$parameters[4,1])
RSS.p <- sum(residuals(uw.4pl)^2)
TSS <- sum((ocon$od - mean(ocon$od))^2)
r.squared <- 1-(RSS.p/TSS) # is this the proper way to get an r^2 value? It does not match what graphpad has which is an issue.
# I have also read this should work, but since the model is a linear model instead of a Sigmoidal, 4PL, X is log (concentration) model
model <- lm(concentration ~ poly(ODCalc, degree = 4, raw=T))
summary(model) # R^2 is not the correct value I am looking for.
# Not sure if sample data is needed but these were the values we were using to produce the values below
sample.od.values1 <- c(0.275, 1.18, 0.085, 0.054, 0.119)
sample.od.values2 <- c(0.263, 1.149, 0.068, 0.062, 0.109)
sample.od.values <- (sample.od.values1+sample.od.values2)/2
Values to prove the methods are the same:
bottom.4pl = 0.01657
top.4pl = 3.002
HillSlope = 1.222
R^2 = 0.9978
R^2(adjusted) = 0.9969
P-Value = 0.5106
Thank you in advance for any helpful tips!
Since R^2 measures linear association it is normally used for linear regression but ignoring that this seems to give the numbers you want or at least numbers that are close to those. For the adjusted R squared formula see https://en.wikipedia.org/wiki/Coefficient_of_determination#Adjusted_R2 and for the p-value I have assumed that you are looking for the p-value for the hypothesis that the first coefficient is zero.
RSS <- deviance(uw.4pl); RSS
## [1] 0.001514624
coef(uw.4pl) # coefficients/parameters
## small.x.asymp inf.x.asymp inflec hill
## 0.01654996 3.00261439 1033.53324214 -1.22171740
R2 <- cor(ocon$od, fitted(uw.4pl))^2; R2
## [1] 0.9995529
n <- nobs(uw.4pl)
p <- length(coef(uw.4pl))
adjR2 <- 1 - (1-R2) * (n - 1) / (n - p - 1); adjR2
## [1] 0.9977645
pvalue <- coef(summary(uw.4pl))[1, 4]; pvalue
## [1] 0.5486584
Background:
Below I have generated some random beta data using R and manipulate the shape of the data a bit to arrive at what I call "Final" in my code. And I histogram "Final" in my code.
Question:
I'm wondering why when trying to fit a "beta" distribution to "Final" data using MASS packages' "fitdistr" function, I get the following error (Any suggestion how to avoid this error)?
Error in stats::optim(x = c(0.461379379270288, 0.0694261016478062, 0.76934266883081, :
initial value in 'vmmin' is not finite
Here is my R code:
require(MASS)
## Generate some data and manipulate it
set.seed(47)
Initial = rbeta(1e5, 2, 3)
d <- density(Initial)
b.5 <- dbeta(seq(0, 1, length.out = length(d$y)), 50, 50)
b.5 <- b.5 / (max(b.5) / max(d$y)) # Scale down to max of original density
b.6 <- dbeta(seq(0, 1, length.out = length(d$y)), 60, 40)
b.6 <- b.6 / (max(b.6) / max(d$y))
# Collect maximum densities at each x to use as sample probability weights
p <- pmax(d$y, b.5, b.6)
Final <- sample(d$x, 1e4, replace = TRUE, prob = p) ## THIS IS MY FINAL DATA
hist(Final, freq = F, ylim = c(0, 2)) ## HERE IS A HISTOGRAM
m <- MASS::fitdistr(Final, "beta", ## RUN THIS TO SEE HOW THE ERROR COMES UP
start = list(shape1 = 1, shape2 = 1))
Here is the code.
It is the same with your code, I just removed the negative beta values.
library(MASS)
set.seed(47)
Initial = rbeta(1e5, 2, 3)
d <- density(Initial)
b.5 <- dbeta(seq(0, 1, length.out = length(d$y)), 50, 50)
b.5 <- b.5 / (max(b.5) / max(d$y)) # Scale down to max of original
density
b.6 <- dbeta(seq(0, 1, length.out = length(d$y)), 60, 40)
b.6 <- b.6 / (max(b.6) / max(d$y))
# Collect maximum densities at each x to use as sample probability weights
p <- pmax(d$y, b.5, b.6)
Final <- sample(d$x, 1e4, replace = TRUE, prob = p) ## THIS IS MY FINAL DATA
hist(Final, freq = F, ylim = c(0, 2)) ## HERE IS A HISTOGRAM
# replace negative beta values with smallest value > 0
Final[Final<= 0] <- min(Final[Final>0])
hist(Final, freq = F, ylim = c(0, 2))
m <- MASS::fitdistr(x = Final, densfun = "beta",
start = list(shape1 = 1, shape2 = 1))
Here are the shape parameters:
> m
shape1 shape2
1.99240852 2.90219720
(0.02649853) (0.04010168)
Take note that it gives some warnings.
I'm using the function gammamixEM from the package mixtools. How can I return the graphical output of density as in the function normalmixEM (i.e., the second plot in plot(...,which=2)) ?
Update:
Here is a reproducible example for the function gammamixEM:
x <- c(rgamma(200, shape = 0.2, scale = 14), rgamma(200,
shape = 32, scale = 10), rgamma(200, shape = 5, scale = 6))
out <- gammamixEM(x, lambda = c(1, 1, 1)/3, verb = TRUE)
Here is a reproducible example for the function normalmixEM:
data(faithful)
attach(faithful)
out <- normalmixEM(waiting, arbvar = FALSE, epsilon = 1e-03)
plot(out, which=2)
I would like to obtain this graphical output of density from the function gammamixEM.
Here you go.
out <- normalmixEM(waiting, arbvar = FALSE, epsilon = 1e-03)
x <- out
whichplots <- 2
density = 2 %in% whichplots
loglik = 1 %in% whichplots
def.par <- par(ask=(loglik + density > 1), "mar") # only ask and mar are changed
mix.object <- x
k <- ncol(mix.object$posterior)
x <- sort(mix.object$x)
a <- hist(x, plot = FALSE)
maxy <- max(max(a$density), .3989*mix.object$lambda/mix.object$sigma)
I just had to dig into the source code of plot.mixEM
So, now to do this with gammamixEM:
x <- c(rgamma(200, shape = 0.2, scale = 14), rgamma(200,
shape = 32, scale = 10), rgamma(200, shape = 5, scale = 6))
gammamixEM.out <- gammamixEM(x, lambda = c(1, 1, 1)/3, verb = TRUE)
mix.object <- gammamixEM.out
k <- ncol(mix.object$posterior)
x <- sort(mix.object$x)
a <- hist(x, plot = FALSE)
maxy <- max(max(a$density), .3989*mix.object$lambda/mix.object$sigma)
main2 <- "Density Curves"
xlab2 <- "Data"
col2 <- 2:(k+1)
hist(x, prob = TRUE, main = main2, xlab = xlab2,
ylim = c(0,maxy))
for (i in 1:k) {
lines(x, mix.object$lambda[i] *
dnorm(x,
sd = sd(x)))
}
I believe it should be pretty straight forward to continue this example a bit, if you want to add the labels, smooth lines, etc. Here's the source of the plot.mixEM function.