lasso/doodler tool for selecting points/drawing inside a plot - r

Sometimes I would like to use a mouse to draw a circle or squiggly shape around my plotted points to select these points specifically. Has anyone built functionality to do this yet? Perhaps something requiring Tcl/tk?

You could take advantage of locator, and then use the coordinates to put into a circle drawing function like from plotrix. Then put it into a function for ease of use:
plot(rnorm(100))
click.shape('circle', border = 'red', col = NA)
click.shape <- function(shape = c('circle', 'arrow', 'rect', 'cyl', 'line', 'poly'),
corners = 3L, ...) {
shape <- match.arg(shape)
coords <- if (shape %in% 'poly')
locator(as.integer(corners)) else unlist(locator(2L))
ARROW <- function(...) {
arrows(coords[1L], coords[3L], coords[2L], coords[4L], ...)
}
CIRCLE <- function(...) {
require(plotrix)
rad <- sqrt(((coords[2L] - coords[1L]) ^ 2) + ((coords[4L] - coords[3L]) ^ 2))
draw.circle(coords[1L], coords[3L], radius = rad, ...)
}
CYL <- function(...) {
require(plotrix)
cylindrect(coords[1L], coords[3L], coords[2L], coords[4L], ...)
}
LINE <- function(...) {
segments(coords[1L], coords[3L], coords[2L], coords[4L], ...)
}
POLY <- function(...) {
polygon(coords, ...)
}
RECT <- function(...) {
rect(coords[1L], coords[3L], coords[2L], coords[4L], ...)
}
suppressWarnings(
switch(shape, arrow = ARROW(...), circle = CIRCLE(...), cyl = CYL(...),
line = LINE(...), poly = POLY(...), rect = RECT(...),
stop('Invalid shape'))
)
}
Another option which I haven't had time lately to expand
set.seed(1618)
x <- runif(10)
y <- rnorm(10, mean = 5)
par(mfrow = c(1, 2))
plot(x, y, xlab = 'mean', ylab = 'sd')
zoomin(x, y)
## ESC to quit
code for zoomin
zoomin <- function(x, y, ...) {
op <- par(no.readonly = TRUE)
on.exit(par(op))
ans <- identify(x, y, n = 1, plot = FALSE, ...)
zoom <- function (x, y, xlim, ylim, xd, yd) {
rxlim <- x + c(-1, 1) * (diff(range(xd)) / 20)
rylim <- y + c(-1, 1) * (diff(range(yd)) / 20)
par(mfrow = c(1, 2))
plot(xd, yd, xlab = 'mean', ylab = 'sd')
xext <- yext <- rxext <- ryext <- 0
if (par('xaxs') == 'r') {
xext <- diff(xlim) * 0.04
rxext <- diff(rxlim) * 0.04
}
if (par('yaxs') == 'r') {
yext <- diff(ylim) * 0.04
ryext <- diff(rylim) * 0.04
}
rect(rxlim[1] - rxext, rylim[1] - ryext, rxlim[2] + rxext, rylim[2] + ryext)
xylim <- par('usr')
xypin <- par('pin')
rxi0 <- xypin[1] * (xylim[2] - (rxlim[1] - rxext)) / diff(xylim[1:2])
rxi1 <- xypin[1] * (xylim[2] - (rxlim[2] + rxext)) / diff(xylim[1:2])
y01i <- xypin[2] * (xylim[4] - (rylim[2] + ryext)) / diff(xylim[3:4])
y02i <- xypin[2] * ((rylim[1] - ryext) - xylim[3]) / diff(xylim[3:4])
mu <- x
curve(dnorm(x, mean = mu, sd = y), from = -4 * y + mu, to = 4 * y + mu,
xlab = paste('mean:', round(mu, 2), ', sd: ', round(y, 2)), ylab = '')
xypin <- par('pin')
par(xpd = NA)
xylim <- par('usr')
xymai <- par('mai')
x0 <- xylim[1] - diff(xylim[1:2]) * (xymai[2] + xymai[4] + rxi0)/xypin[1]
x1 <- xylim[1] - diff(xylim[1:2]) * (xymai[2] + xymai[4] + rxi1)/xypin[1]
y01 <- xylim[4] - diff(xylim[3:4]) * y01i/xypin[2]
y02 <- xylim[3] + diff(xylim[3:4]) * y02i/xypin[2]
par(xpd = TRUE)
xend <- xylim[1] - diff(xylim[1:2]) * xymai[2] / (2 * xypin[1])
xprop0 <- (xylim[1] - xend) / (xylim[1] - x0)
xprop1 <- (xylim[2] - xend) / (xylim[2] - x1)
par(xpd = NA)
segments(c(x0, x0, x1, x1),
c(y01, y02, y01, y02),
c(xend, xend, xend, xend),
c(xylim[4] - (xylim[4] - y01) * xprop0,
xylim[3] + (y02 - xylim[3]) * xprop0,
xylim[4] - (xylim[4] - y01) * xprop1,
xylim[3] + (y02 - xylim[3]) * xprop1))
par(mfg = c(1, 1))
plot(xd, yd, xlab = 'mean', ylab = 'sd')
}
if(length(ans)) {
zoom(x[ans], y[ans], range(x), range(y), x, y)
points(x[ans], y[ans], pch = 19)
zoomin(x, y)
}
}

Related

Residual plots of a regression model in ggplot2

I'm trying to transform two residual plots performed below into ggplot2.
As a description, in order to perform these graphs, it is necessary to previously define some functions associated with the specifics of the class of the adopted model, which I am providing below.
The model is in the fit argument whose data is from the nlme library, and the graphs are plotted at the end of the code using the qqPlot2 function.
rm(list = ls()); cat('\014')
library(ggplot2)
library(dplyr)
library(plotly)
library(nlme)
library(lme4)
library(MASS)
library(tidyverse)
library(splines)
library(gamlss)
library(gridExtra)
library(hnp)
library(car)
extract.lmeDesign2 <- function(m){
start.level = 1
data <- getData(m)
grps <- nlme::getGroups(m)
n <- length(grps)
X <- list()
grp.dims <- m$dims$ncol
Zt <- model.matrix(m$modelStruct$reStruct, data)
cov <- as.matrix(m$modelStruct$reStruct)
i.col <- 1
n.levels <- length(m$groups)
Z <- matrix(0, n, 0)
if (start.level <= n.levels) {
for (i in 1:(n.levels - start.level + 1)) {
if (length(levels(m$groups[[n.levels - i + 1]])) != 1)
{
X[[1]] <- model.matrix(~m$groups[[n.levels - i +
1]] - 1,
contrasts.arg = c("contr.treatment",
"contr.treatment"))
}
else X[[1]] <- matrix(1, n, 1)
X[[2]] <- as.matrix(Zt[, i.col:(i.col + grp.dims[i] -
1)])
i.col <- i.col + grp.dims[i]
Z <- cbind(mgcv::tensor.prod.model.matrix(X),Z)
}
Vr <- matrix(0, ncol(Z), ncol(Z))
start <- 1
for (i in 1:(n.levels - start.level + 1)) {
k <- n.levels - i + 1
for (j in 1:m$dims$ngrps[i]) {
stop <- start + ncol(cov[[k]]) - 1
Vr[ncol(Z) + 1 - (stop:start),ncol(Z) + 1 - (stop:start)] <- cov[[k]]
start <- stop + 1
}
}
}
X <- if (class(m$call$fixed) == "name" && !is.null(m$data$X)) {
m$data$X
} else {
model.matrix(formula(eval(m$call$fixed)),data)
}
y <- as.vector(matrix(m$residuals, ncol = NCOL(m$residuals))[,NCOL(m$residuals)] +
matrix(m$fitted, ncol = NCOL(m$fitted))[,NCOL(m$fitted)])
return(list(
Vr = Vr,
X = X,
Z = Z,
sigmasq = m$sigma ^ 2,
lambda = unique(diag(Vr)),
y = y,
k = n.levels
)
)
}
fit = lme(distance ~ age, method="REML",data = Orthodont)
data.fit <- extract.lmeDesign2(fit)
data <- getData(fit)
y <- data.fit$y
X <- data.fit$X
N <- length(y)
id <- sort(as.numeric(getGroups(fit, level = 1)), index.return = TRUE)$x
n <- length(as.numeric(names(table(id))))
vecni <- (table(id))
p <- ncol(X)
n.levels <- length(fit$groups)
start.level <- 1
Cgrps <- nlme::getGroups(fit, level = start.level)
CCind <- levels((Cgrps))
sigma2 <- fit$sigma^2
obs <- numeric()
for (i in 1:n)
{
obs <- append(obs,1:vecni[i])
}
if (n.levels > 1) {
lZi <- list()
lgi <- list()
numrow <- numeric()
mgroups <- fit$groups
for (n in 1:length(CCind)) {
dgi <- data.frame(as.matrix(mgroups[mgroups == CCind[n], ]))
nrowzi <- dim(dgi)[1]
ncolzi <- 0
girep <- as.numeric(length(levels(dgi[,1])))
for (k in 2:n.levels) {
girep <- c(girep,as.numeric(length(levels(dgi[,k]))))
}
for (k in 1:n.levels) {
ncolzi <- ncolzi + as.numeric(length(levels(dgi[,k])))
}
auxi <- as.vector(table(dgi[,1]))
for (i in 2:n.levels) {
auxi <- c(auxi,as.vector(table(dgi[,i])))
}
l <- 1
Zi <- matrix(0,nrowzi,ncolzi)
for (j in 1:ncolzi) {
Zi[l:(l + auxi[j] - 1),j] <- rep(1,auxi[j])
l <- l + auxi[j]
if (l == (nrowzi + 1)) l <- 1
}
lZi[[n]] <- Zi
numrow[n] <- dim(Zi)[1]
comp.var <- as.matrix(fit1$modelStruct$reStruct)
auxg <- rep(as.numeric(comp.var[1])*sigma2,girep[1])
for (i in 2:length(girep)) {
auxg <- c(auxg,rep(as.numeric(comp.var[i])*sigma2,girep[i]))
}
lgi[[n]] <- diag(auxg)
}
q <- dim(lgi[[1]])[1]
for (h in 2:length(CCind)) {
q <- c(q,dim(lgi[[h]])[1])
}
Z <- lZi[[1]]
for (k in 2:length(CCind)) {
Z <- bdiag(Z,(lZi[[k]]))
}
Z <- as.matrix(Z)
nrowZi <- lZi[[1]]
for (h in 2:length(CCind)) {
nrowZi <- c(nrowZi,dim(lZi[[h]])[1])
}
Gam <- lgi[[1]]
for (k in 2:length(CCind)) {
Gam <- bdiag(Gam,(lgi[[k]]))
}
Gam <- as.matrix(Gam)
}else{
mataux <- model.matrix(fit$modelStruct$reStruct,data)
mataux <- as.data.frame(cbind(mataux,id))
lZi <- list()
lgi <- list()
for (i in (as.numeric(unique(id)))) {
lZi[[i]] <- as.matrix((subset(split(mataux,id == i,
drop = T)$`TRUE`,select = -id)))
lgi[[i]] <- getVarCov(fit,type = "random.effects")
}
Z <- as.matrix(bdiag(lZi))
g <- getVarCov(fit,type = "random.effects")
q <- dim(g)[1]
Gam <- as.matrix(kronecker(diag(length(as.numeric(unique(id)))),g))
}
if (n.levels > 1) {
if (!inherits(fit, "lme"))
stop("object does not appear to be of class lme")
grps <- nlme::getGroups(fit)
n <- length(grps)
n.levels <- length(fit$groups)
if (is.null(fit$modelStruct$corStruct))
n.corlevels <- 0
else n.corlevels <- length(all.vars(nlme::getGroupsFormula(fit$modelStruct$corStruct)))
if (n.levels < n.corlevels) {
getGroupsFormula(fit$modelStruct$corStruct)
vnames <- all.vars(nlme::getGroupsFormula(fit$modelStruct$corStruct))
lab <- paste(eval(parse(text = vnames[1]), envir = fit$data))
if (length(vnames) > 1)
for (i in 2:length(vnames)) {
lab <- paste(lab, "/", eval(parse(text = vnames[i]),
envir = fit$data), sep = "")
}
grps <- factor(lab)
}
if (n.levels >= start.level || n.corlevels >= start.level) {
if (n.levels >= start.level)
Cgrps <- nlme::getGroups(fit, level = start.level)
else Cgrps <- grps
Cind <- sort(as.numeric(Cgrps), index.return = TRUE)$ix
rCind <- 1:n
rCind[Cind] <- 1:n
Clevel <- levels(Cgrps)
n.cg <- length(Clevel)
size.cg <- array(0, n.cg)
for (i in 1:n.cg) size.cg[i] <- sum(Cgrps == Clevel[i])
}
else {
n.cg <- 1
Cind <- 1:n
}
if (is.null(fit$modelStruct$varStruct))
w <- rep(fit$sigma, n)
else {
w <- 1/nlme::varWeights(fit$modelStruct$varStruct)
group.name <- names(fit$groups)
order.txt <- paste("ind<-order(data[[\"", group.name[1],
"\"]]", sep = "")
if (length(fit$groups) > 1)
for (i in 2:length(fit$groups)) order.txt <- paste(order.txt,
",data[[\"", group.name[i], "\"]]", sep = "")
order.txt <- paste(order.txt, ")")
eval(parse(text = order.txt))
w[ind] <- w
w <- w * fit$sigma
}
w <- w[Cind]
if (is.null(fit$modelStruct$corStruct))
lR <- array(1, n)
else {
c.m <- nlme::corMatrix(fit$modelStruct$corStruct)
if (!is.list(c.m)) {
lR <- c.m
lR <- lR[Cind, ]
lR <- lR[, Cind]
}
else {
lR <- list()
ind <- list()
for (i in 1:n.cg) {
lR[[i]] <- matrix(0, size.cg[i], size.cg[i])
ind[[i]] <- 1:size.cg[i]
}
Roff <- cumsum(c(1, size.cg))
gr.name <- names(c.m)
n.g <- length(c.m)
j0 <- rep(1, n.cg)
ii <- 1:n
for (i in 1:n.g) {
Clev <- unique(Cgrps[grps == gr.name[i]])
if (length(Clev) > 1)
stop("inner groupings not nested in outer!!")
k <- (1:n.cg)[Clevel == Clev]
j1 <- j0[k] + nrow(c.m[[i]]) - 1
lR[[k]][j0[k]:j1, j0[k]:j1] <- c.m[[i]]
ind1 <- ii[grps == gr.name[i]]
ind2 <- rCind[ind1]
ind[[k]][j0[k]:j1] <- ind2 - Roff[k] + 1
j0[k] <- j1 + 1
}
for (k in 1:n.cg) {
lR[[k]][ind[[k]], ] <- lR[[k]]
lR[[k]][, ind[[k]]] <- lR[[k]]
}
}
}
if (is.list(lR)) {
for (i in 1:n.cg) {
wi <- w[Roff[i]:(Roff[i] + size.cg[i] - 1)]
lR[[i]] <- as.vector(wi) * t(as.vector(wi) * lR[[i]])
}
}
else if (is.matrix(lR)) {
lR <- as.vector(w) * t(as.vector(w) * lR)
}
else {
lR <- w^2 * lR
}
if (is.list(lR)) {
R <- lR[[1]]
for (k in 2:n.cg) {
R <- bdiag(R,lR[[k]])
}
R <- as.matrix(R)
}
else{
R <- diag(lR)
}
}else{
R <- getVarCov(fit,type = "conditional",individual = 1)[[1]]
for (i in 2:length(as.numeric(unique(id)))) {
R <- as.matrix(bdiag(R,getVarCov(fit,
type = "conditional",individual = i)[[1]] ) )
}
}
sqrt.matrix <- function(mat) {
mat <- as.matrix(mat)
singular_dec <- svd(mat,LINPACK = F)
U <- singular_dec$u
V <- singular_dec$v
D <- diag(singular_dec$d)
sqrtmatrix <- U %*% sqrt(D) %*% t(V)
}
V <- (Z %*% Gam %*% t(Z)) + R
iV <- solve(V)
varbeta <- solve((t(X) %*% iV %*% X))
Q <- (iV - iV %*% X %*% (varbeta) %*% t(X) %*% iV )
zq <- t(Z) %*% Q
norm.frob.ZtQ <- sum(diag(zq %*% t(zq)))
eblue <- as.vector(fixef(fit))
eblup <- Gam %*% t(Z) %*% iV %*% (y - X %*% eblue)
predm <- X %*% eblue
predi <- X %*% eblue + Z %*% eblup
resm <- (y - predm)
resc <- (y - predi)
var.resm <- V - X %*% solve(t(X) %*% iV %*% X) %*% t(X)
var.resc <- R %*% Q %*% R
ident <- diag(N)
auxnum <- (R %*% Q %*% Z %*% Gam %*% t(Z) %*% Q %*% R)
auxden <- R %*% Q %*% R
CF <- diag(auxnum)/diag(auxden)
rescp <- resc/sqrt(diag(var.resc))
R.half <- sqrt.matrix(R)
auxqn <- eigen((R.half %*% Q %*% R.half), symmetric = T, only.values = FALSE)
lt <- sqrt(solve(diag((auxqn$values[1:(N-p)])))) %*% t(auxqn$vectors[1:N,1:(N-p)]) %*% solve(sqrt.matrix(R[1:N,1:N]))
var.resmcp <- lt %*% var.resc[1:N,1:N] %*% t(lt)
resmcp <- (lt %*% resc[1:N] )/sqrt(diag(var.resmcp))
if (n.levels > 1) {
aux <- Gam %*% t(Z) %*% Q %*% Z %*% Gam
qm <- q - 1
dm <- matrix(0,length(CCind),1)
gbi <- aux[1:(q[1]),(1:q[1])]
eblupi <- eblup[1:(q[1]),]
dmi <- t(eblupi) %*% ginv(gbi) %*% eblupi
dm[1] <- dmi
for (j in 2:length(CCind)) {
gbi <- aux[((j - 1)*q[(j - 1)] + 1 ):(q[j] + q[(j - 1)]),((j - 1)*q[(j - 1)] + 1 ):(q[j] + q[(j - 1)])]
eblupi <- eblup[((j - 1)*q[(j - 1)] + 1 ):(q[j] + q[(j - 1)]),]
dmi <- t(eblupi) %*% ginv(gbi) %*% eblupi
dm[j] <- dmi
}
}else{
aux <- Gam %*% t(Z) %*% Q %*% Z %*% Gam
qm <- q - 1
dm <- matrix(0,n,1)
for (j in 1:length(CCind))
{
if (q == 1)
{
gbi <- aux[j,j]
eblupi <- eblup[(q*j - qm):(q*j)]
dmi <- t(eblupi) %*% ginv(gbi) %*% eblupi
dm[j] <- dmi
}
else
{
gbi <- aux[(q*j - qm):(q*j),(q*j - qm):(q*j)]
cat(gbi,'\n','\t')
eblupi <- eblup[(q*j - qm):(q*j)]
dmi <- t(eblupi) %*% ginv(gbi) %*% eblupi
dm[j] <- dmi
}
}
}
qqPlot2 <- function(x, distribution="norm", ..., ylab=deparse(substitute(x)),
xlab=paste(distribution, "quantiles"), main = NULL,
las = par("las"),
envelope = .95,
col = palette()[1],
col.lines = palette()[2], lwd = 2, pch = 1, cex = par("cex"),
cex.lab = par("cex.lab"), cex.axis = par("cex.axis"),
line = c("quartiles", "robust", "none"),
labels = if (!is.null(names(x))) names(x) else seq(along = x),
id.method = "y",
id.n = if (id.method[1] == "identify") Inf else 0,
id.cex = 1, id.col=palette()[1], grid = TRUE)
{
line <- match.arg(line)
good <- !is.na(x)
ord <- order(x[good])
ord.x <- x[good][ord]
ord.lab <- labels[good][ord]
q.function <- eval(parse(text = paste("q", distribution, sep = "")))
d.function <- eval(parse(text = paste("d", distribution, sep = "")))
n <- length(ord.x)
P <- ppoints(n)
z <- q.function(P, ...)
plot(z, ord.x, type = "n", xlab = xlab,
ylab = ylab, main = main,
las = las,cex.lab = cex.lab, cex.axis = cex.axis)
if (grid) {
grid(lty = 1, equilogs = FALSE)
box()}
points(z, ord.x, col = col, pch = pch, cex = cex)
if (line == "quartiles" || line == "none") {
Q.x <- quantile(ord.x, c(.25,.75))
Q.z <- q.function(c(.25,.75), ...)
b <- (Q.x[2] - Q.x[1])/(Q.z[2] - Q.z[1])
a <- Q.x[1] - b*Q.z[1]
abline(a, b, col = col.lines, lwd = lwd)
}
if (line == "robust") {
coef <- coef(rlm(ord.x ~ z))
a <- coef[1]
b <- coef[2]
abline(a, b)
}
conf <- if (envelope == FALSE) .95 else envelope
zz <- qnorm(1 - (1 - conf)/2)
SE <- (b/d.function(z, ...))*sqrt(P*(1 - P)/n)
fit.value <- a + b*z
upper <- fit.value + zz*SE
lower <- fit.value - zz*SE
if (envelope != FALSE) {
lines(z, upper, lty = 2, lwd = lwd, col = col.lines)
lines(z, lower, lty = 2, lwd = lwd, col = col.lines)
}
}
x11()
qqPlot2(resmcp, ylab = "Resíduos",
xlab = "Quantil N(0,1)", pch = 20)
qqPlot2(dm, distribution = 'chisq', df = q, pch = 20,
ylab = expression(paste("Quantis de Mahalanobis")),
xlab = "Quantis da Qui-quadrado")
My attempt to reproduce them in ggplot2 was as follows:
P1 = qqPlot2(resmcp, ylab = "Resíduos",
xlab = "Quantil N(0,1)", pch = 20)
PP1 = ggplot(data = P1, aes(resmcp)) +
geom_point(aes(y = resmcp), show.legend = FALSE)
P2 = qqPlot2(dm, distribution = 'chisq', df = q, pch = 20,
ylab = expression(paste("Quantis de Mahalanobis")),
xlab = "Quantis da Qui-quadrado")
PP2 = ggplot(data = P2, aes(dm)) +
geom_point(aes(y = dm), show.legend = FALSE)
x11()
gridExtra::grid.arrange(PP1,PP2, ncol = 2)
However, something is happening, as I have gotten the following result:
See my attempt below for the quantile mahalanobis distance graph vs. chi-square quantiles:
gVals <- function(y, dist, conf){ # distribution; confidence interval
y <- sort(y) # make sure they're in order
p <- ppoints(length(y))
if(dist == "chisq") {
zi <- qchisq(p, df = length(p) - 1)
zd <- dchisq(zi, df = length(p) - 1)
qz <- qchisq(c(.25, .75), length(p) - 1)
} else {
zi <- qnorm(p)
zd <- dnorm(zi)
qz <- qnorm(c(.25, .75))
}
# if quartiles preferred
qx <- quantile(y, c(.25, .75))
b <- (qx[2] - qx[1]) / (qz[2] - qz[1])
a <- qx[1] - b * qz[1]
# if robust preferred
# coef <- coef(rlm(y~zi))
# a <- coef[1]
# b <- coef[2]
z <- qnorm(1 - (1 - conf)/2) # z = 1.96 for 95%...
se <- (b / zd) * sqrt(p * (1 - p)/length(p))
ft <- a + b * zi
uc <- ft + z * se
dc <- ft - z * se
dff = data.frame(z = zi, y = y, uc = uc, dc = dc)
list(a = a, b = b, dff = dff) # returns intercept, slope, and data frame
}
cdf <- gVals(dm, "chisq", .95) # dm is defined in the previous code above
ggplot(cdf$dff, aes(x = z, y = y)) +
geom_point() +
geom_abline(intercept = cdf$a[[1]], slope = cdf$b[[1]]) +
annotate("line", x = cdf$dff$z, y = cdf$dff$uc, color = "red", lty = 2) +
annotate("line", x = cdf$dff$z, y = cdf$dff$dc, color = "red", lty = 2)
Note that the x axis should go from 0 to 8, and the y axis should go from 0 to 14. Also, the shape of the simulation envelope is not similar. I am not able to fix this problem.
Update
Instead of having the code for the option quartile commented out, I have commented out the code for the option robust in the function. Additionally, instead of returning a data frame, it returns a list. FYI, you only need the MASS package if you use the robust option (for the function rlm).
This function is based on the code used in qqPlot2 in your question. However, it doesn't return a plot; it returns data.
library(car)
library(MASS)
library(tidyverse)
gVals <- function(y, dist, conf){ # distribution; confidence interval
y <- sort(y) # make sure they're in order
p <- ppoints(length(y))
if(dist == "chisq") {
zi <- qchisq(p, df = length(p) - 1)
zd <- dchisq(zi, df = length(p) - 1)
qz <- qchisq(c(.25, .75), length(p) - 1)
} else {
zi <- qnorm(p)
zd <- dnorm(zi)
qz <- qnorm(c(.25, .75))
}
# if quartiles preferred
qx <- quantile(y, c(.25, .75))
b <- (qx[2] - qx[1]) / (qz[2] - qz[1])
a <- qx[1] - b * qz[1]
# if robust preferred
# coef <- coef(rlm(y~zi))
# a <- coef[1]
# b <- coef[2]
z <- qnorm(1 - (1 - conf)/2) # z = 1.96 for 95%...
se <- (b / zd) * sqrt(p * (1 - p)/length(p))
ft <- a + b * zi
uc <- ft + z * se
dc <- ft - z * se
dff = data.frame(z = zi, y = y, uc = uc, dc = dc)
list(a = a, b = b, dff = dff) # returns intercept, slope, and data frame
}
Here is a comparison with some arbitrary data.
data(mtcars)
qqPlot2(mtcars$mpg)
qqPlot2(mtcars$mpg, dist = "chisq", df = 31)
ndf <- gVals(mtcars$mpg, "norm", .95)
ggplot(ndf$dff, aes(x = z, y = y)) +
geom_point() +
geom_abline(intercept = ndf$a[[1]], slope = ndf$b[[1]]) +
annotate("line", x = ndf$dff$z, y = ndf$dff$uc, color = "red", lty = 2) +
annotate("line", x = ndf$dff$z, y = ndf$dff$dc, color = "red", lty = 2)
cdf <- gVals(mtcars$mpg, "chisq", .95)
ggplot(cdf$dff, aes(x = z, y = y)) +
geom_point() +
geom_abline(intercept = cdf$a[[1]], slope = cdf$b[[1]]) +
annotate("line", x = cdf$dff$z, y = cdf$dff$uc, color = "red", lty = 2) +
annotate("line", x = cdf$dff$z, y = cdf$dff$dc, color = "red", lty = 2)
I managed to solve it through the library qqplotr.
library(qqplotr)
dist <- "chisq"
dpar <- list(df = q)
QT <- data.frame(QUANTIS = dm); ggplot(QT, aes(sample = QUANTIS)) +
stat_qq_band(distribution = dist, dparams = dpar) +
stat_qq_point(distribution = dist, dparams = dpar) +
stat_qq_line(distribution = dist, dparams = dpar, color = "blue");
qqPlot2(dm, distribution = 'chisq', df = q, pch = 20,
ylab = expression(paste("Quantis de Mahalanobis")),
xlab = "Quantis da Qui-quadrado")

How to define a function of `f_n-chi-square and use `uniroot` to find Confidence Interval?

I want to get a 95% confidence interval for the following question.
I have written function f_n in my R code. I first randomly sample 100 with Normal and then I define function h for lambda. Then I can get f_n. My question is that how to define a function of f_n-chi-square and use uniroot` to find Confidence interval.
# I first get 100 samples
set.seed(201111)
x=rlnorm(100,0,2)
Based on the answer by #RuiBarradas, I try the following code.
set.seed(2011111)
# I define function h, and use uniroot function to find lambda
h <- function(lam, n)
{
sum((x - theta)/(1 + lam*(x - theta)))
}
# sample size
n <- 100
# the parameter of interest must be a value in [1, 12],
#true_theta<-1
#true_sd<- exp(2)
#x <- rnorm(n, mean = true_theta, sd = true_sd)
x=rlnorm(100,0,2)
xmax <- max(x)
xmin <- min(x)
theta_seq = seq(from = 1, to = 12, by = 0.01)
f_n <- rep(NA, length(theta_seq))
for (i in seq_along(theta_seq))
{
theta <- theta_seq[i]
lambdamin <- (1/n-1)/(xmax - theta)
lambdamax <- (1/n-1)/(xmin - theta)
lambda = uniroot(h, interval = c(lambdamin, lambdamax), n = n)$root
f_n[i] = -sum(log(1 + lambda*(x - theta)))
}
j <- which.max(f_n)
max_fn <- f_n[j]
mle_theta <- theta_seq[j]
plot(theta_seq, f_n, type = "l",
main = expression(Estimated ~ theta),
xlab = expression(Theta),
ylab = expression(f[n]))
points(mle_theta, f_n[j], pch = 19, col = "red")
segments(
x0 = c(mle_theta, xmin),
y0 = c(min(f_n)*2, max_fn),
x1 = c(mle_theta, mle_theta),
y1 = c(max_fn, max_fn),
col = "red",
lty = "dashed"
)
I got the following plot of f_n.
For 95% CI, I try
LR <- function(theta, lambda)
{
2*sum(log(1 + lambda*(x - theta))) - qchisq(0.95, df = 1)
}
lambdamin <- (1/n-1)/(xmax - mle_theta)
lambdamax <- (1/n-1)/(xmin - mle_theta)
lambda <- uniroot(h, interval = c(lambdamin, lambdamax), n = n)$root
uniroot(LR, c(xmin, mle_theta), lambda = lambda)$root
The result is 0.07198144. Then the logarithm is log(0.07198144)=-2.631347.
But there is NA in the following code.
uniroot(LR, c(mle_theta, xmax), lambda = lambda)$root
So the 95% CI is theta >= -2.631347.
But the question is that the 95% CI should be a closed interval...
Here is a solution.
First of all, the data generation code is wrong, the parameter theta is in the interval [1, 12], and the data is generated with rnorm(., mean = 0, .). I change this to a true_theta = 5.
set.seed(2011111)
# I define function h, and use uniroot function to find lambda
h <- function(lam, n)
{
sum((x - theta)/(1 + lam*(x - theta)))
}
# sample size
n <- 100
# the parameter of interest must be a value in [1, 12],
true_theta <- 5
true_sd <- 2
x <- rnorm(n, mean = true_theta, sd = true_sd)
xmax <- max(x)
xmin <- min(x)
theta_seq <- seq(from = xmin + .Machine$double.eps^0.5,
to = xmax - .Machine$double.eps^0.5, by = 0.01)
f_n <- rep(NA, length(theta_seq))
for (i in seq_along(theta_seq))
{
theta <- theta_seq[i]
lambdamin <- (1/n-1)/(xmax - theta)
lambdamax <- (1/n-1)/(xmin - theta)
lambda = uniroot(h, interval = c(lambdamin, lambdamax), n = n)$root
f_n[i] = -sum(log(1 + lambda*(x - theta)))
}
j <- which.max(f_n)
max_fn <- f_n[j]
mle_theta <- theta_seq[j]
plot(theta_seq, f_n, type = "l",
main = expression(Estimated ~ theta),
xlab = expression(Theta),
ylab = expression(f[n]))
points(mle_theta, f_n[j], pch = 19, col = "red")
segments(
x0 = c(mle_theta, xmin),
y0 = c(min(f_n)*2, max_fn),
x1 = c(mle_theta, mle_theta),
y1 = c(max_fn, max_fn),
col = "red",
lty = "dashed"
)
LR <- function(theta, lambda)
{
2*sum(log(1 + lambda*(x - theta))) - qchisq(0.95, df = 1)
}
lambdamin <- (1/n-1)/(xmax - mle_theta)
lambdamax <- (1/n-1)/(xmin - mle_theta)
lambda <- uniroot(h, interval = c(lambdamin, lambdamax), n = n)$root
uniroot(LR, c(xmin, mle_theta), lambda = lambda)$root
#> [1] 4.774609
Created on 2022-03-25 by the reprex package (v2.0.1)
The one-sided CI95 is theta >= 4.774609.

Add a numeric axis to a mosaicplot

I am plotting data using a mosaic plot (with mosaicplot()) and am considering adding a numeric axis to one dimension to clarify the size of the different groups. But, I do not understand how the plot cells are aligned to the axis since it seems to range from approximately 0.2 to .98 (or something like that) on the graphics device. Here's a reproducible example:
mosaicplot(Titanic, main = "Survival on the Titanic", off = 0)
axis(1, seq(0, 1, by = 0.1))
Note how a 0-1 x-axis actually extends to the left and right of the plot. Is it possible to add a set of axis labels that is scaled correctly?
par(mfrow = c(2,1), mar = c(3,4,2,1))
mp(Titanic)
mp(Titanic, off = 0)
This one isn't difficult to fix, but there are a couple things going on:
obviously, the axis doesn't start at 0 nor does it end at something round which is what you get from pretty (used to calculate, draw, and label the ticks and labels). From these lines, we can see that the polygons are drawn from 50 to 950 along the x (depending on what is set for cex.axis):
x1 <- 30 + 20 * cex.axis/0.66
y1 <- 5
x2 <- 950
y2 <- 1000 - x1
Secondly, the plotting device is finished when the function exits which is why your attempt ranges from 0 to 1 instead of pretty(c(50, 950)), and I don't see any way to pass something through mosaicplot like new or add since
Warning message:
In mosaicplot.default(Titanic, new = TRUE) :
extra argument ‘new’ will be disregarded
So I don't think there is an easy fix without editing the source code (because seems like you would have to backtrace how far over your last plot has shifted the origin and how that translates to a new window which may not be the same for every plot and blah blah blah).
The only thing I changed was adding the final three lines.
## graphics:::mosaicplot.default
mp <- function (x, main = deparse(substitute(x)), sub = NULL, xlab = NULL,
ylab = NULL, sort = NULL, off = NULL, dir = NULL, color = NULL,
shade = FALSE, margin = NULL, cex.axis = 0.66, las = par("las"),
border = NULL, type = c("pearson", "deviance", "FT"), ...) {
mosaic.cell <- function(X, x1, y1, x2, y2, srt.x, srt.y,
adj.x, adj.y, off, dir, color, lablevx, lablevy, maxdim,
currlev, label) {
p <- ncol(X) - 2
if (dir[1L] == "v") {
xdim <- maxdim[1L]
XP <- rep.int(0, xdim)
for (i in seq_len(xdim)) XP[i] <- sum(X[X[, 1L] ==
i, p])/sum(X[, p])
if (anyNA(XP))
stop("missing values in contingency table")
white <- off[1L] * (x2 - x1)/max(1, xdim - 1)
x.l <- x1
x.r <- x1 + (1 - off[1L]) * XP[1L] * (x2 - x1)
if (xdim > 1L)
for (i in 2:xdim) {
x.l <- c(x.l, x.r[i - 1L] + white)
x.r <- c(x.r, x.r[i - 1L] + white + (1 - off[1L]) *
XP[i] * (x2 - x1))
}
if (lablevx > 0L) {
this.lab <- if (is.null(label[[1L]][1L])) {
paste(rep.int(as.character(currlev), length(currlev)),
as.character(seq_len(xdim)), sep = ".")
}
else label[[1L]]
text(x = x.l + (x.r - x.l)/2, y = 1000 - 35 *
cex.axis/0.66 + 22 * cex.axis/0.65 * (lablevx -
1), srt = srt.x, adj = adj.x, cex = cex.axis,
this.lab, xpd = NA)
}
if (p > 2L) {
for (i in seq_len(xdim)) {
if (XP[i] > 0) {
Recall(X[X[, 1L] == i, 2L:(p + 2L), drop = FALSE],
x.l[i], y1, x.r[i], y2, srt.x, srt.y, adj.x,
adj.y, off[-1L], dir[-1L], color, lablevx -
1, (i == 1L) * lablevy, maxdim[-1L],
currlev + 1, label[2:p])
}
else {
segments(rep.int(x.l[i], 3L), y1 + (y2 -
y1) * c(0, 2, 4)/5, rep.int(x.l[i], 3L),
y1 + (y2 - y1) * c(1, 3, 5)/5)
}
}
}
else {
for (i in seq_len(xdim)) {
if (XP[i] > 0) {
polygon(c(x.l[i], x.r[i], x.r[i], x.l[i]),
c(y1, y1, y2, y2), lty = if (extended)
X[i, p + 1L]
else 1L, col = color[if (extended)
X[i, p + 2L]
else i], border = border)
}
else {
segments(rep.int(x.l[i], 3L), y1 + (y2 -
y1) * c(0, 2, 4)/5, rep.int(x.l[i], 3L),
y1 + (y2 - y1) * c(1, 3, 5)/5)
}
}
}
}
else {
ydim <- maxdim[1L]
YP <- rep.int(0, ydim)
for (j in seq_len(ydim)) {
YP[j] <- sum(X[X[, 1L] == j, p])/sum(X[, p])
}
white <- off[1L] * (y2 - y1)/(max(1, ydim - 1))
y.b <- y2 - (1 - off[1L]) * YP[1L] * (y2 - y1)
y.t <- y2
if (ydim > 1L) {
for (j in 2:ydim) {
y.b <- c(y.b, y.b[j - 1] - white - (1 - off[1L]) *
YP[j] * (y2 - y1))
y.t <- c(y.t, y.b[j - 1] - white)
}
}
if (lablevy > 0L) {
this.lab <- if (is.null(label[[1L]][1L])) {
paste(rep.int(as.character(currlev), length(currlev)),
as.character(seq_len(ydim)), sep = ".")
}
else label[[1L]]
text(x = 35 * cex.axis/0.66 - 20 * cex.axis/0.66 *
(lablevy - 1), y = y.b + (y.t - y.b)/2, srt = srt.y,
adj = adj.y, cex = cex.axis, this.lab, xpd = NA)
}
if (p > 2L) {
for (j in seq_len(ydim)) {
if (YP[j] > 0) {
Recall(X[X[, 1L] == j, 2:(p + 2), drop = FALSE],
x1, y.b[j], x2, y.t[j], srt.x, srt.y, adj.x,
adj.y, off[-1L], dir[-1L], color, (j ==
1L) * lablevx, lablevy - 1, maxdim[-1L],
currlev + 1, label[2:p])
}
else {
segments(x1 + (x2 - x1) * c(0, 2, 4)/5, rep.int(y.b[j],
3L), x1 + (x2 - x1) * c(1, 3, 5)/5, rep.int(y.b[j],
3L))
}
}
}
else {
for (j in seq_len(ydim)) {
if (YP[j] > 0) {
polygon(c(x1, x2, x2, x1), c(y.b[j], y.b[j],
y.t[j], y.t[j]), lty = if (extended)
X[j, p + 1]
else 1, col = color[if (extended)
X[j, p + 2]
else j], border = border)
}
else {
segments(x1 + (x2 - x1) * c(0, 2, 4)/5, rep.int(y.b[j],
3L), x1 + (x2 - x1) * c(1, 3, 5)/5, rep.int(y.b[j],
3L))
}
}
}
}
}
srt.x <- if (las > 1)
90
else 0
srt.y <- if (las == 0 || las == 3)
90
else 0
if (is.null(dim(x)))
x <- as.array(x)
else if (is.data.frame(x))
x <- data.matrix(x)
dimd <- length(dx <- dim(x))
if (dimd == 0L || any(dx == 0L))
stop("'x' must not have 0 dimensionality")
if (!missing(...))
warning(sprintf(ngettext(length(list(...)), "extra argument %s will be disregarded",
"extra arguments %s will be disregarded"), paste(sQuote(names(list(...))),
collapse = ", ")), domain = NA)
Ind <- 1L:dx[1L]
if (dimd > 1L) {
Ind <- rep.int(Ind, prod(dx[2:dimd]))
for (i in 2:dimd) {
Ind <- cbind(Ind, c(matrix(1L:dx[i], byrow = TRUE,
nrow = prod(dx[1L:(i - 1)]), ncol = prod(dx[i:dimd]))))
}
}
Ind <- cbind(Ind, c(x))
if (is.logical(shade) && !shade) {
extended <- FALSE
Ind <- cbind(Ind, NA, NA)
}
else {
if (is.logical(shade))
shade <- c(2, 4)
else if (any(shade <= 0) || length(shade) > 5)
stop("invalid 'shade' specification")
extended <- TRUE
shade <- sort(shade)
breaks <- c(-Inf, -rev(shade), 0, shade, Inf)
color <- c(hsv(0, s = seq.int(1, to = 0, length.out = length(shade) +
1)), hsv(4/6, s = seq.int(0, to = 1, length.out = length(shade) +
1)))
if (is.null(margin))
margin <- as.list(1L:dimd)
E <- stats::loglin(x, margin, fit = TRUE, print = FALSE)$fit
type <- match.arg(type)
residuals <- switch(type, pearson = (x - E)/sqrt(E),
deviance = {
tmp <- 2 * (x * log(ifelse(x == 0, 1, x/E)) -
(x - E))
tmp <- sqrt(pmax(tmp, 0))
ifelse(x > E, tmp, -tmp)
}, FT = sqrt(x) + sqrt(x + 1) - sqrt(4 * E + 1))
Ind <- cbind(Ind, c(1 + (residuals < 0)), as.numeric(cut(residuals,
breaks)))
}
label <- dimnames(x)
if (is.null(off))
off <- if (dimd == 2)
2 * (dx - 1)
else rep.int(10, dimd)
if (length(off) != dimd)
off <- rep_len(off, dimd)
if (any(off > 50))
off <- off * 50/max(off)
if (is.null(dir) || length(dir) != dimd) {
dir <- rep_len(c("v", "h"), dimd)
}
if (!is.null(sort)) {
if (length(sort) != dimd)
stop("length of 'sort' does not conform to 'dim(x)'")
Ind[, seq_len(dimd)] <- Ind[, sort]
off <- off[sort]
dir <- dir[sort]
label <- label[sort]
}
nam.dn <- names(label)
if (is.null(xlab) && any(dir == "v"))
xlab <- nam.dn[min(which(dir == "v"))]
if (is.null(ylab) && any(dir == "h"))
ylab <- nam.dn[min(which(dir == "h"))]
ncolors <- length(tabulate(Ind[, dimd]))
if (!extended && ((is.null(color) || length(color) != ncolors))) {
color <- if (is.logical(color))
if (color[1L])
gray.colors(ncolors)
else rep.int(0, ncolors)
else if (is.null(color))
rep.int("grey", ncolors)
else rep_len(color, ncolors)
}
dev.hold()
on.exit(dev.flush())
plot.new()
if (!extended) {
opar <- par(usr = c(1, 1000, 1, 1000), mgp = c(1, 1,
0))
on.exit(par(opar), add = TRUE)
}
else {
pin <- par("pin")
rtxt <- "Standardized\nResiduals:"
rtxtCex <- min(1, pin[1L]/(strheight(rtxt, units = "inches") *
12), pin[2L]/(strwidth(rtxt, units = "inches")/4))
rtxtWidth <- 0.1
opar <- par(usr = c(1, 1000 * (1.1 + rtxtWidth), 1, 1000),
mgp = c(1, 1, 0))
on.exit(par(opar), add = TRUE)
rtxtHeight <- strwidth(rtxt, units = "i", cex = rtxtCex)/pin[2L]
text(1000 * (1.05 + 0.5 * rtxtWidth), 0, labels = rtxt,
adj = c(0, 0.25), srt = 90, cex = rtxtCex)
len <- length(shade) + 1
bh <- 0.95 * (0.95 - rtxtHeight)/(2 * len)
x.l <- 1000 * 1.05
x.r <- 1000 * (1.05 + 0.7 * rtxtWidth)
y.t <- 1000 * rev(seq.int(from = 0.95, by = -bh, length.out = 2 *
len))
y.b <- y.t - 1000 * 0.8 * bh
ltype <- c(rep.int(2, len), rep.int(1, len))
for (i in 1:(2 * len)) {
polygon(c(x.l, x.r, x.r, x.l), c(y.b[i], y.b[i],
y.t[i], y.t[i]), col = color[i], lty = ltype[i],
border = border)
}
brks <- round(breaks, 2)
y.m <- y.b + 1000 * 0.4 * bh
text(1000 * (1.05 + rtxtWidth), y.m, c(paste0("<", brks[2L]),
paste(brks[2:(2 * len - 1)], brks[3:(2 * len)], sep = ":"),
paste0(">", brks[2 * len])), srt = 90, cex = cex.axis,
xpd = NA)
}
if (!is.null(main) || !is.null(xlab) || !is.null(ylab) ||
!is.null(sub))
title(main, sub = sub, xlab = xlab, ylab = ylab)
adj.x <- adj.y <- 0.5
x1 <- 30 + 20 * cex.axis/0.66
y1 <- 5
x2 <- 950
y2 <- 1000 - x1
maxlen.xlabel <- maxlen.ylabel <- 35 * cex.axis/0.66
if (srt.x == 90) {
maxlen.xlabel <- max(strwidth(label[[dimd + 1L - match("v",
rev(dir))]], cex = cex.axis))
adj.x <- 1
y2 <- y2 - maxlen.xlabel
}
if (srt.y == 0) {
maxlen.ylabel <- max(strwidth(label[[match("h", dir)]],
cex = cex.axis))
adj.y <- 0
x1 <- x1 + maxlen.ylabel
}
mosaic.cell(Ind, x1 = x1, y1 = y1, x2 = x2, y2 = y2, srt.x = srt.x,
srt.y = srt.y, adj.x = adj.x, adj.y = adj.y, off = off/100,
dir = dir, color = color, lablevx = 2, lablevy = 2,
maxdim = apply(as.matrix(Ind[, 1L:dimd]), 2L, max),
currlev = 1, label = label)
## new stuff
at <- seq(x1, x2, length.out = 6)
axis(1, at, (at - min(at)) / diff(range(at)))
invisible()
}

Removing default title from wind rose in 'openair' package

I have created a wind rose using the package 'openair', for water current and direction data.
However, a default title is applied to the plot "Frequency of counts by wind direction (%)" which is not applicable to water current data. I cannot remove the title - can anyone help?
windRose(Wind, ws = "ws", wd = "wd", ws2 = NA, wd2 =NA,
ws.int = 20, angle = 10, type = "default", cols ="increment",
grid.line = NULL, width = 0.5, seg = NULL,
auto.text = TRUE, breaks = 5, offset = 10, paddle =FALSE,
key.header = "Current Speed", key.footer = "(cm/s)",
key.position = "right", key = TRUE, dig.lab = 3,
statistic = "prop.count", pollutant = NULL, annotate =
TRUE, border = NA, na.action=NULL)
Thanks!
There is another way that does not involve copying the whole function.
If you inspect the windRose code you can see that the title is set according to the value of the statistic option. In the documentation you can see that the oficial options are "prop.count", "prop.mean", "abs.count" and "frequency"; but code also checks if the argument passed to the statistic option is a list and sets the statistic options according to the list contents:
if (is.list(statistic)) {
stat.fun <- statistic$fun
stat.unit <- statistic$unit
stat.scale <- statistic$scale
stat.lab <- statistic$lab
stat.fun2 <- statistic$fun2
stat.lab2 <- statistic$lab2
stat.labcalm <- statistic$labcalm
}
the title that you want to change is defined by statistic$lab
By passing a list to the statistic option you can set among others, the title. So, an easy way to change the title is to pass a list to the statistic option with everything copied from one of the oficial options and changing the title. For example, let's say that I want to use "prop.count" with a custom title. Then I'd transform the options listed in the code:
stat.fun <- length
stat.unit <- "%"
stat.scale <- "all"
stat.lab <- "Frequency of counts by wind direction (%)"
stat.fun2 <- function(x) signif(mean(x, na.rm = TRUE),
3)
stat.lab2 <- "mean"
stat.labcalm <- function(x) round(x, 1)
into a named list with the title (lab) changed:
my.statistic <- list("fun"=length,"unit" = "%","scale" = "all", "lab" = "My title" , "fun2" = function(x) signif(mean(x, na.rm = TRUE), 3), "lab2" = "mean","labcalm" = function(x) round(x, 1))
and use it in the call to windRose:
windRose(mydata,statistic=my.statistic)
The great thing about a lot of R functions is you can type their name to see the source, in many cases. So here you could type windRose, and edit the required label as below:
windRose.2 <- function (mydata, ws = "ws", wd = "wd", ws2 = NA, wd2 = NA, ws.int = 2,
angle = 30, type = "default", cols = "default", grid.line = NULL,
width = 1, seg = NULL, auto.text = TRUE, breaks = 4, offset = 10,
paddle = TRUE, key.header = NULL, key.footer = "(m/s)", key.position = "bottom",
key = TRUE, dig.lab = 5, statistic = "prop.count", pollutant = NULL,
annotate = TRUE, border = NA, ...)
{
if (is.null(seg))
seg <- 0.9
if (length(cols) == 1 && cols == "greyscale") {
trellis.par.set(list(strip.background = list(col = "white")))
calm.col <- "black"
}
else {
calm.col <- "forestgreen"
}
current.strip <- trellis.par.get("strip.background")
on.exit(trellis.par.set("strip.background", current.strip))
if (360/angle != round(360/angle)) {
warning("In windRose(...):\n angle will produce some spoke overlap",
"\n suggest one of: 5, 6, 8, 9, 10, 12, 15, 30, 45, etc.",
call. = FALSE)
}
if (angle < 3) {
warning("In windRose(...):\n angle too small", "\n enforcing 'angle = 3'",
call. = FALSE)
angle <- 3
}
extra.args <- list(...)
extra.args$xlab <- if ("xlab" %in% names(extra.args))
quickText(extra.args$xlab, auto.text)
else quickText("", auto.text)
extra.args$ylab <- if ("ylab" %in% names(extra.args))
quickText(extra.args$ylab, auto.text)
else quickText("", auto.text)
extra.args$main <- if ("main" %in% names(extra.args))
quickText(extra.args$main, auto.text)
else quickText("", auto.text)
if (is.character(statistic)) {
ok.stat <- c("prop.count", "prop.mean", "abs.count",
"frequency")
if (!is.character(statistic) || !statistic[1] %in% ok.stat) {
warning("In windRose(...):\n statistic unrecognised",
"\n enforcing statistic = 'prop.count'", call. = FALSE)
statistic <- "prop.count"
}
if (statistic == "prop.count") {
stat.fun <- length
stat.unit <- "%"
stat.scale <- "all"
stat.lab <- ""
stat.fun2 <- function(x) signif(mean(x, na.rm = TRUE),
3)
stat.lab2 <- "mean"
stat.labcalm <- function(x) round(x, 1)
}
if (statistic == "prop.mean") {
stat.fun <- function(x) sum(x, na.rm = TRUE)
stat.unit <- "%"
stat.scale <- "panel"
stat.lab <- "Proportion contribution to the mean (%)"
stat.fun2 <- function(x) signif(mean(x, na.rm = TRUE),
3)
stat.lab2 <- "mean"
stat.labcalm <- function(x) round(x, 1)
}
if (statistic == "abs.count" | statistic == "frequency") {
stat.fun <- length
stat.unit <- ""
stat.scale <- "none"
stat.lab <- "Count by wind direction"
stat.fun2 <- function(x) round(length(x), 0)
stat.lab2 <- "count"
stat.labcalm <- function(x) round(x, 0)
}
}
if (is.list(statistic)) {
stat.fun <- statistic$fun
stat.unit <- statistic$unit
stat.scale <- statistic$scale
stat.lab <- statistic$lab
stat.fun2 <- statistic$fun2
stat.lab2 <- statistic$lab2
stat.labcalm <- statistic$labcalm
}
vars <- c(wd, ws)
diff <- FALSE
rm.neg <- TRUE
if (!is.na(ws2) & !is.na(wd2)) {
vars <- c(vars, ws2, wd2)
diff <- TRUE
rm.neg <- FALSE
mydata$ws <- mydata[, ws2] - mydata[, ws]
mydata$wd <- mydata[, wd2] - mydata[, wd]
id <- which(mydata$wd < 0)
if (length(id) > 0)
mydata$wd[id] <- mydata$wd[id] + 360
pollutant <- "ws"
key.footer <- "ws"
wd <- "wd"
ws <- "ws"
vars <- c("ws", "wd")
if (missing(angle))
angle <- 10
if (missing(offset))
offset <- 20
if (is.na(breaks[1])) {
max.br <- max(ceiling(abs(c(min(mydata$ws, na.rm = TRUE),
max(mydata$ws, na.rm = TRUE)))))
breaks <- c(-1 * max.br, 0, max.br)
}
if (missing(cols))
cols <- c("lightskyblue", "tomato")
seg <- 1
}
if (any(type %in% openair:::dateTypes))
vars <- c(vars, "date")
if (!is.null(pollutant))
vars <- c(vars, pollutant)
mydata <- openair:::checkPrep(mydata, vars, type, remove.calm = FALSE,
remove.neg = rm.neg)
mydata <- na.omit(mydata)
if (is.null(pollutant))
pollutant <- ws
mydata$x <- mydata[, pollutant]
mydata[, wd] <- angle * ceiling(mydata[, wd]/angle - 0.5)
mydata[, wd][mydata[, wd] == 0] <- 360
mydata[, wd][mydata[, ws] == 0] <- -999
if (length(breaks) == 1)
breaks <- 0:(breaks - 1) * ws.int
if (max(breaks) < max(mydata$x, na.rm = TRUE))
breaks <- c(breaks, max(mydata$x, na.rm = TRUE))
if (min(breaks) > min(mydata$x, na.rm = TRUE))
warning("Some values are below minimum break.")
breaks <- unique(breaks)
mydata$x <- cut(mydata$x, breaks = breaks, include.lowest = FALSE,
dig.lab = dig.lab)
theLabels <- gsub("[(]|[)]|[[]|[]]", "", levels(mydata$x))
theLabels <- gsub("[,]", " to ", theLabels)
prepare.grid <- function(mydata) {
if (all(is.na(mydata$x)))
return()
levels(mydata$x) <- c(paste("x", 1:length(theLabels),
sep = ""))
all <- stat.fun(mydata[, wd])
calm <- mydata[mydata[, wd] == -999, ][, pollutant]
mydata <- mydata[mydata[, wd] != -999, ]
calm <- stat.fun(calm)
weights <- tapply(mydata[, pollutant], list(mydata[,
wd], mydata$x), stat.fun)
if (stat.scale == "all") {
calm <- calm/all
weights <- weights/all
}
if (stat.scale == "panel") {
temp <- stat.fun(stat.fun(weights)) + calm
calm <- calm/temp
weights <- weights/temp
}
weights[is.na(weights)] <- 0
weights <- t(apply(weights, 1, cumsum))
if (stat.scale == "all" | stat.scale == "panel") {
weights <- weights * 100
calm <- calm * 100
}
panel.fun <- stat.fun2(mydata[, pollutant])
u <- mean(sin(2 * pi * mydata[, wd]/360))
v <- mean(cos(2 * pi * mydata[, wd]/360))
mean.wd <- atan2(u, v) * 360/2/pi
if (all(is.na(mean.wd))) {
mean.wd <- NA
}
else {
if (mean.wd < 0)
mean.wd <- mean.wd + 360
if (mean.wd > 180)
mean.wd <- mean.wd - 360
}
weights <- cbind(data.frame(weights), wd = as.numeric(row.names(weights)),
calm = calm, panel.fun = panel.fun, mean.wd = mean.wd)
weights
}
if (paddle) {
poly <- function(wd, len1, len2, width, colour, x.off = 0,
y.off = 0) {
theta <- wd * pi/180
len1 <- len1 + off.set
len2 <- len2 + off.set
x1 <- len1 * sin(theta) - width * cos(theta) + x.off
x2 <- len1 * sin(theta) + width * cos(theta) + x.off
x3 <- len2 * sin(theta) - width * cos(theta) + x.off
x4 <- len2 * sin(theta) + width * cos(theta) + x.off
y1 <- len1 * cos(theta) + width * sin(theta) + y.off
y2 <- len1 * cos(theta) - width * sin(theta) + y.off
y3 <- len2 * cos(theta) + width * sin(theta) + y.off
y4 <- len2 * cos(theta) - width * sin(theta) + y.off
lpolygon(c(x1, x2, x4, x3), c(y1, y2, y4, y3), col = colour,
border = border)
}
}
else {
poly <- function(wd, len1, len2, width, colour, x.off = 0,
y.off = 0) {
len1 <- len1 + off.set
len2 <- len2 + off.set
theta <- seq((wd - seg * angle/2), (wd + seg * angle/2),
length.out = (angle - 2) * 10)
theta <- ifelse(theta < 1, 360 - theta, theta)
theta <- theta * pi/180
x1 <- len1 * sin(theta) + x.off
x2 <- rev(len2 * sin(theta) + x.off)
y1 <- len1 * cos(theta) + x.off
y2 <- rev(len2 * cos(theta) + x.off)
lpolygon(c(x1, x2), c(y1, y2), col = colour, border = border)
}
}
mydata <- cutData(mydata, type, ...)
results.grid <- ddply(mydata, type, prepare.grid)
results.grid$calm <- stat.labcalm(results.grid$calm)
results.grid$mean.wd <- stat.labcalm(results.grid$mean.wd)
strip.dat <- openair:::strip.fun(results.grid, type, auto.text)
strip <- strip.dat[[1]]
strip.left <- strip.dat[[2]]
pol.name <- strip.dat[[3]]
if (length(theLabels) < length(cols)) {
col <- cols[1:length(theLabels)]
}
else {
col <- openColours(cols, length(theLabels))
}
max.freq <- max(results.grid[, (length(type) + 1):(length(theLabels) +
length(type))], na.rm = TRUE)
off.set <- max.freq * (offset/100)
box.widths <- seq(0.002^0.25, 0.016^0.25, length.out = length(theLabels))^4
box.widths <- box.widths * max.freq * angle/5
legend <- list(col = col, space = key.position, auto.text = auto.text,
labels = theLabels, footer = key.footer, header = key.header,
height = 0.6, width = 1.5, fit = "scale", plot.style = if (paddle) "paddle" else "other")
legend <- openair:::makeOpenKeyLegend(key, legend, "windRose")
temp <- paste(type, collapse = "+")
myform <- formula(paste("x1 ~ wd | ", temp, sep = ""))
mymax <- 2 * max.freq
myby <- if (is.null(grid.line))
pretty(c(0, mymax), 10)[2]
else grid.line
if (myby/mymax > 0.9)
myby <- mymax * 0.9
xyplot.args <- list(x = myform, xlim = 1.03 * c(-max.freq -
off.set, max.freq + off.set), ylim = 1.03 * c(-max.freq -
off.set, max.freq + off.set), data = results.grid, type = "n",
sub = stat.lab, strip = strip, strip.left = strip.left,
as.table = TRUE, aspect = 1, par.strip.text = list(cex = 0.8),
scales = list(draw = FALSE), panel = function(x, y, subscripts,
...) {
panel.xyplot(x, y, ...)
angles <- seq(0, 2 * pi, length = 360)
sapply(seq(off.set, mymax, by = myby), function(x) llines(x *
sin(angles), x * cos(angles), col = "grey85",
lwd = 1))
subdata <- results.grid[subscripts, ]
upper <- max.freq + off.set
larrows(-upper, 0, upper, 0, code = 3, length = 0.1)
larrows(0, -upper, 0, upper, code = 3, length = 0.1)
ltext(upper * -1 * 0.95, 0.07 * upper, "W", cex = 0.7)
ltext(0.07 * upper, upper * -1 * 0.95, "S", cex = 0.7)
ltext(0.07 * upper, upper * 0.95, "N", cex = 0.7)
ltext(upper * 0.95, 0.07 * upper, "E", cex = 0.7)
if (nrow(subdata) > 0) {
for (i in 1:nrow(subdata)) {
with(subdata, {
for (j in 1:length(theLabels)) {
if (j == 1) {
temp <- "poly(wd[i], 0, x1[i], width * box.widths[1], col[1])"
} else {
temp <- paste("poly(wd[i], x", j - 1,
"[i], x", j, "[i], width * box.widths[",
j, "], col[", j, "])", sep = "")
}
eval(parse(text = temp))
}
})
}
}
ltext(seq((myby + off.set), mymax, myby) * sin(pi/4),
seq((myby + off.set), mymax, myby) * cos(pi/4),
paste(seq(myby, mymax, by = myby), stat.unit,
sep = ""), cex = 0.7)
if (annotate) if (statistic != "prop.mean") {
if (!diff) {
ltext(max.freq + off.set, -max.freq - off.set,
label = paste(stat.lab2, " = ", subdata$panel.fun[1],
"\ncalm = ", subdata$calm[1], stat.unit,
sep = ""), adj = c(1, 0), cex = 0.7, col = calm.col)
}
if (diff) {
ltext(max.freq + off.set, -max.freq - off.set,
label = paste("mean ws = ", round(subdata$panel.fun[1],
1), "\nmean wd = ", round(subdata$mean.wd[1],
1), sep = ""), adj = c(1, 0), cex = 0.7,
col = calm.col)
}
} else {
ltext(max.freq + off.set, -max.freq - off.set,
label = paste(stat.lab2, " = ", subdata$panel.fun[1],
stat.unit, sep = ""), adj = c(1, 0), cex = 0.7,
col = calm.col)
}
}, legend = legend)
xyplot.args <- openair:::listUpdate(xyplot.args, extra.args)
plt <- do.call(xyplot, xyplot.args)
if (length(type) == 1)
plot(plt)
else plot(useOuterStrips(plt, strip = strip, strip.left = strip.left))
newdata <- results.grid
output <- list(plot = plt, data = newdata, call = match.call())
class(output) <- "openair"
invisible(output)
}
Here I've copied the entire source, and made a new function, windRose.2 with the only difference being stat.lab <- "Frequency of counts by wind direction (%)" is now stat.lab <- "".

How to get the stars command to have segments of different angles ? (in R)

I am playing with the "stars" ({graphics}) function to create a segment of flowers.
I wish to plot a flower of segments, for example in way the following command will produce:
stars1(mtcars[, 1:7],
draw.segments = T,
main = "Motor Trend Cars : stars(*, full = F)", full = T, col.radius = 1:8)
But, I want the segments to not have equal angles, but smaller angles (and between the flowers there could be space).
The goal I am striving for is to be able to give each flower "weight" so that some aspects are more important (larger weight) and some are less (and thus, will have a smaller angle).
I understand this can be changes in the following part of the stars command:
if (draw.segments) {
aangl <- c(angles, if (full) 2 * pi else pi)
for (i in 1L:n.loc) {
px <- py <- numeric()
for (j in 1L:n.seg) {
k <- seq.int(from = aangl[j], to = aangl[j +
1], by = 1 * deg)
px <- c(px, xloc[i], s.x[i, j], x[i, j] * cos(k) +
xloc[i], NA)
py <- c(py, yloc[i], s.y[i, j], x[i, j] * sin(k) +
yloc[i], NA)
}
polygon(px, py, col = col.segments, lwd = lwd, lty = lty)
}
But I am unsure as to how to manipulate it in order to achieve my task (of weighted flowers, by different angles)
Do you have any perceptual justification for this change? If the weights are going to vary by star it's going to be very hard to interpret the plot.
(But it should be trivial to implement - instead of using equally distributed angles, use weights: angles <- weights / sum(weights) * 2 * pi)
I found out how to do it.
For future reference, here is the code:
# functions we'll need...
add.num.before.and.after <- function(vec, num = NULL)
{
# this will add a number before and after every number in a vector.
# the deafult adds the number which is one more then the length of the vector
# assuming that later we will add a zero column to a data.frame and will use that column to add the zero columns...
if(is.null(num)) num <- rep(length(vec) +1, length(vec))
if(length(num)==1) num <- rep(num, length(vec))
#x <- as.list(vec)
list.num.x.num <- sapply(seq_along(vec) , function(i) c(num[i], vec[i], num[i]), simplify = F)
num.x.num <- unlist(list.num.x.num)
return(num.x.num)
}
add.0.columns.to.DF <- function(DF, zero.column.name = " ")
{
# this function gets a data frame
# and returns a data.frame with extra two columns (of zeros) before and after every column
zero.column <- rep(0, dim(DF)[1]) # the column of zeros
column.seq <- seq_len(dim(DF)[2]) # the column ID for the original data.frame
DF.new.order <- add.num.before.and.after(column.seq) # add the last column id before and after every element in the column id vector
DF.and.zero <- cbind(DF, zero.column) # making a new data.frame with a zero column at the end
new.DF <- DF.and.zero[,DF.new.order] # moving the zero column (and replicating it) before and after every column in the data.frame
# renaming the zero columns to be " "
columns.to.erase.names <- ! (colnames(new.DF) %in% colnames(DF))
colnames(new.DF)[columns.to.erase.names] <- zero.column.name
return(new.DF)
}
angles.by.weight <- function(angles, weights = NULL)
{
angles <- angles[-1] # remove the 0 from "angles"
angles <- c(angles, 2*pi) # add last slice angle
number.of.slices = length(angles)
if(is.null(weights)) weights <- rep(.6, number.of.slices) # Just for the example
slice.angle <- diff(angles)[1]
#new.angles <- rep(0, 3*length(angles))
new.angles <- numeric()
for(i in seq_along(angles))
{
weighted.slice.angle <- slice.angle*weights[i]
half.leftover.weighted.slice.angle <- slice.angle* ((1-weights[i])/2)
angle1 <- angles[i] - (weighted.slice.angle + half.leftover.weighted.slice.angle)
angle2 <- angles[i] - half.leftover.weighted.slice.angle
angle3 <- angles[i]
new.angles <- c(new.angles,
angle1,angle2,angle3)
}
new.angles.length <- length(new.angles)
new.angles <- c(0, new.angles[-new.angles.length])
return(new.angles)
}
# The updated stars function
stars2 <-
function (x, full = TRUE, scale = TRUE, radius = TRUE, labels =
dimnames(x)[[1L]],
locations = NULL, nrow = NULL, ncol = NULL, len = 1, key.loc = NULL,
key.labels = dimnames(x)[[2L]], key.xpd = TRUE, xlim = NULL,
ylim = NULL, flip.labels = NULL, draw.segments = FALSE, col.segments = 1L:n.seg,
col.stars = NA, axes = FALSE, frame.plot = axes, main = NULL,
sub = NULL, xlab = "", ylab = "", cex = 0.8, lwd = 0.25,
lty = par("lty"), xpd = FALSE, mar = pmin(par("mar"), 1.1 +
c(2 * axes + (xlab != ""), 2 * axes + (ylab != ""), 1,
# 0)), add = FALSE, plot = TRUE, ...)
0)), add = FALSE, plot = TRUE, col.radius = NA, polygon = TRUE,
key.len = len,
segment.weights = NULL,
...)
{
if (is.data.frame(x))
x <- data.matrix(x)
else if (!is.matrix(x))
stop("'x' must be a matrix or a data frame")
if (!is.numeric(x))
stop("data in 'x' must be numeric")
# this code was moved here so that the angles will be proparly created...
n.seg <- ncol(x) # this will be changed to the ncol of the new x - in a few rows...
# creates the angles
angles <- if (full)
seq.int(0, 2 * pi, length.out = n.seg + 1)[-(n.seg + 1)]
else if (draw.segments)
seq.int(0, pi, length.out = n.seg + 1)[-(n.seg + 1)]
else seq.int(0, pi, length.out = n.seg)
if (length(angles) != n.seg)
stop("length of 'angles' must equal 'ncol(x)'")
# changing to allow weighted segments
angles <- angles.by.weight(angles, segment.weights)
#angles <- angles.by.weight.2(angles) # try2
# try3
# weights <- sample(c(.3,.9), length(angles)-1, replace = T)
# angles <- weights / sum(weights) * 2 * pi
# angles <- c(0,angles )
# changing to allow weighted segments
col.segments <- add.num.before.and.after(col.segments, "white") # for colors
x <- add.0.columns.to.DF(x)
n.loc <- nrow(x)
n.seg <- ncol(x)
if (is.null(locations)) {
if (is.null(nrow))
nrow <- ceiling(if (!is.numeric(ncol)) sqrt(n.loc) else n.loc/ncol)
if (is.null(ncol))
ncol <- ceiling(n.loc/nrow)
if (nrow * ncol < n.loc)
stop("nrow * ncol < number of observations")
ff <- if (!is.null(labels))
2.3
else 2.1
locations <- expand.grid(ff * 1L:ncol, ff * nrow:1)[1L:n.loc,
]
if (!is.null(labels) && (missing(flip.labels) ||
!is.logical(flip.labels)))
flip.labels <- ncol * mean(nchar(labels, type = "c")) >
30
}
else {
if (is.numeric(locations) && length(locations) == 2) {
locations <- cbind(rep.int(locations[1L], n.loc),
rep.int(locations[2L], n.loc))
if (!missing(labels) && n.loc > 1)
warning("labels do not make sense for a single location")
else labels <- NULL
}
else {
if (is.data.frame(locations))
locations <- data.matrix(locations)
if (!is.matrix(locations) || ncol(locations) != 2)
stop("'locations' must be a 2-column matrix.")
if (n.loc != nrow(locations))
stop("number of rows of 'locations' and 'x' must be equal.")
}
if (missing(flip.labels) || !is.logical(flip.labels))
flip.labels <- FALSE
}
xloc <- locations[, 1]
yloc <- locations[, 2]
# Here we created the angles, but I moved it to the beginning of the code
if (scale) {
x <- apply(x, 2L, function(x) (x - min(x, na.rm = TRUE))/diff(range(x,
na.rm = TRUE)))
}
x[is.na(x)] <- 0
mx <- max(x <- x * len)
if (is.null(xlim))
xlim <- range(xloc) + c(-mx, mx)
if (is.null(ylim))
ylim <- range(yloc) + c(-mx, mx)
deg <- pi/180
op <- par(mar = mar, xpd = xpd)
on.exit(par(op))
if (plot && !add)
plot(0, type = "n", ..., xlim = xlim, ylim = ylim, main = main,
sub = sub, xlab = xlab, ylab = ylab, asp = 1, axes = axes)
if (!plot)
return(locations)
s.x <- xloc + x * rep.int(cos(angles), rep.int(n.loc, n.seg))
s.y <- yloc + x * rep.int(sin(angles), rep.int(n.loc, n.seg))
if (draw.segments) {
aangl <- c(angles, if (full) 2 * pi else pi)
for (i in 1L:n.loc) {
px <- py <- numeric()
for (j in 1L:n.seg) {
k <- seq.int(from = aangl[j], to = aangl[j +
1], by = 1 * deg)
px <- c(px, xloc[i], s.x[i, j], x[i, j] * cos(k) +
xloc[i], NA)
py <- c(py, yloc[i], s.y[i, j], x[i, j] * sin(k) +
yloc[i], NA)
}
polygon3(px, py, col = col.segments, lwd = lwd, lty = lty)
}
}
else {
for (i in 1L:n.loc) {
# polygon3(s.x[i, ], s.y[i, ], lwd = lwd, lty = lty,
# col = col.stars[i])
if (polygon)
polygon3(s.x[i, ], s.y[i, ], lwd = lwd, lty = lty,
col = col.stars[i])
if (radius)
segments(rep.int(xloc[i], n.seg), rep.int(yloc[i],
# n.seg), s.x[i, ], s.y[i, ], lwd = lwd, lty = lty)
n.seg), s.x[i, ], s.y[i, ], lwd = lwd, lty = lty, col =
col.radius)
}
}
if (!is.null(labels)) {
y.off <- mx * (if (full)
1
else 0.1)
if (flip.labels)
y.off <- y.off + cex * par("cxy")[2L] * ((1L:n.loc)%%2 -
if (full)
0.4
else 0)
text(xloc, yloc - y.off, labels, cex = cex, adj = c(0.5,
1))
}
if (!is.null(key.loc)) {
par(xpd = key.xpd)
key.x <- key.len * cos(angles) + key.loc[1L]
key.y <- key.len * sin(angles) + key.loc[2L]
if (draw.segments) {
px <- py <- numeric()
for (j in 1L:n.seg) {
k <- seq.int(from = aangl[j], to = aangl[j +
1], by = 1 * deg)
px <- c(px, key.loc[1L], key.x[j], key.len * cos(k) +
key.loc[1L], NA)
py <- c(py, key.loc[2L], key.y[j], key.len * sin(k) +
key.loc[2L], NA)
}
polygon3(px, py, col = col.segments, lwd = lwd, lty = lty)
}
else {
# polygon3(key.x, key.y, lwd = lwd, lty = lty)
if (polygon)
polygon3(key.x, key.y, lwd = lwd, lty = lty)
if (radius)
segments(rep.int(key.loc[1L], n.seg), rep.int(key.loc[2L],
# n.seg), key.x, key.y, lwd = lwd, lty = lty)
n.seg), key.x, key.y, lwd = lwd, lty = lty, col = col.radius)
}
lab.angl <- angles + if (draw.segments)
(angles[2L] - angles[1L])/2
else 0
label.x <- 1.1 * key.len * cos(lab.angl) + key.loc[1L]
label.y <- 1.1 * key.len * sin(lab.angl) + key.loc[2L]
for (k in 1L:n.seg) {
text.adj <- c(if (lab.angl[k] < 90 * deg || lab.angl[k] >
270 * deg) 0 else if (lab.angl[k] > 90 * deg &&
lab.angl[k] < 270 * deg) 1 else 0.5, if (lab.angl[k] <=
90 * deg) (1 - lab.angl[k]/(90 * deg))/2 else if (lab.angl[k] <=
270 * deg) (lab.angl[k] - 90 * deg)/(180 * deg) else 1 -
(lab.angl[k] - 270 * deg)/(180 * deg))
text(label.x[k], label.y[k], labels = key.labels[k],
cex = cex, adj = text.adj)
}
}
if (frame.plot)
box(...)
invisible(locations)
}
Here is an example of running this:
#require(debug)
# mtrace(stars2)
stars(mtcars[1:3, 1:8],
draw.segments = T,
main = "Motor Trend Cars : stars(*, full = F)", full = T, col.segments = 1:2)
stars2(mtcars[1:3, 1:8],
draw.segments = T,
main = "Motor Trend Cars : stars(*, full = F)", full = T, col.segments = 0:3,
segment.weights = c(.2,.2,1,1,.4,.4,.6,.9))
(I'll probably publish this with explanation on my blog sometime soon...)

Resources