How to plot non-linear decision boundaries with a grid in R? - r

I find this particular graph in ISLR (Figure 2.13) or ESL very well done. I can't guess how the authors would have made this in R. I know how to get the orange and blue points very easily. The main confusion is the background dots and the purple line.
Any ideas?
Here is some sample code to get the yellow and orange points with a grey grid. How do I get an arbitrary non-linear curve in purple and then color the grid according to the curve?
set.seed(pi)
points = replicate(100, runif(2))
pointsColored = ifelse(apply(points, 2, sum) <= 1, "orange", "blue")
# Confound some
pointsColored[sample.int(length(pointsColored), 10)] = "orange"
plot(x=points[1, ], y=points[2, ])
grid(nx=100, ny=100)
# Plot points over the grid.
points(x=points[1, ], y=points[2, ], col=pointsColored)

As I indicated in my comment, a solution was provided by #chl here on stats.stackexchange.com. Here it is, applied to your data set.
library(class)
set.seed(pi)
X <- t(replicate(1000, runif(2)))
g <- ifelse(apply(X, 1, sum) <= 1, 0, 1)
xnew <- cbind(rep(seq(0, 1, length.out=50), 50),
rep(seq(0, 1, length.out=50), each=50))
m <- knn(X, xnew, g, k=15, prob=TRUE)
prob <- attr(m, "prob")
prob <- ifelse(m=="1", prob, 1-prob)
prob15 <- matrix(prob, 50)
par(mar=rep(3, 4))
contour(unique(xnew[, 1]), unique(xnew[, 2]), prob15, levels=0.5,
labels="", xlab='', ylab='', axes=FALSE, lwd=2.5, asp=1)
title(xlab=expression(italic('X')[1]), ylab=expression(italic('X')[2]),
line=1, family='serif', cex.lab=1.5)
points(X, bg=ifelse(g==1, "#CA002070", "#0571B070"), pch=21)
gd <- expand.grid(x=unique(xnew[, 1]), y=unique(xnew[, 2]))
points(gd, pch=20, cex=0.4, col=ifelse(prob15 > 0.5, "#CA0020", "#0571B0"))
box()
(UPDATE: I changed the colour palette because the blue/yellow/purple thing was pretty hideous.)

This was my silly attempt at approximation. Clearly the issues raised by #StephenKolassa are valid and not handled by this approximation.
myCurve1 = function (x)
abs(x[[1]] * sin(x[[1]]) + x[[2]] * sin(x[[2]]))
myCurve2 = function (x)
abs(x[[1]] * cos(x[[1]]) + x[[2]] * cos(x[[2]]))
myCurve3 = function (x)
abs(x[[1]] * tan(x[[1]]) + x[[2]] * tan(x[[2]]))
tmp = function (myCurve, seed=99) {
set.seed(seed)
points = replicate(100, runif(2))
colors = ifelse(apply(points, 2, myCurve) > 0.5, "orange", "blue")
# Confound some
swapInts = sample.int(length(colors), 6)
for (i in swapInts) {
if (colors[[i]] == "orange") {
colors[[i]] = "blue"
} else {
colors[[i]] = "orange"
}
}
gridPoints = seq(0, 1, 0.005)
gridPoints = as.matrix(expand.grid(gridPoints, gridPoints))
gridColors = vector("character", nrow(gridPoints))
gridPch = vector("character", nrow(gridPoints))
for (i in 1:nrow(gridPoints)) {
val = myCurve(gridPoints[i, ])
if (val > 0.505) {
gridColors[[i]] = "orange"
gridPch[[i]] = "."
} else if (val < 0.495) {
gridColors[[i]] = "blue"
gridPch[[i]] = "."
} else {
gridColors[[i]] = "purple"
gridPch[[i]] = "*"
}
}
plot(x=gridPoints[ , 1], y=gridPoints[ , 2], col=gridColors, pch=gridPch)
points(x=points[1, ], y=points[2, ], col=colors, lwd=2)
}
par(mfrow=c(1, 3))
tmp(myCurve1)
tmp(myCurve2)
tmp(myCurve3)

Related

R function to plot inequalities with shading

Suppose I have a set of inequalities:
-2x + y <= -3
1.25x + y <= 2.5
y >= -3
And I can summarize the information as follows:
mat <- matrix(c(-2, 1, 1.25, 1, 0, 1), nrow = 3, byrow = TRUE)
dir <- c("<=", "<=", ">=")
rhs <- c(-3, 2.5, -3)
I wrote the following function to plot the inequalities:
plot(0, 0, xlim = c(-1, 5), ylim = c(-4, 1))
plot_ineq <- function(mat, dir, rhs, xlow, xhigh){
line <- list()
for(i in 1:nrow(mat)){
if(mat[i, 2] > 0){
line[[i]] <- sapply(seq(xlow, xhigh, 0.1), function(x) (rhs[i] - mat[i, 1] * x)/mat[i, 2])
}else if(mat[i, 2] < 0){
line[[i]] <- sapply(seq(xlow, xhigh, 0.1), function(x) (rhs[i] - mat[i, 1] * x)/mat[i, 2])
if(dir[i] == ">="){
dir[i] = "<="
}else dir[i] = ">="
}
lines(seq(xlow, xhigh, 0.1), line[[i]])
}
}
plot_ineq(mat = mat, dir = dir, rhs = rhs, xlow = -1, xhigh = 5)
I have two questions: (1) how can I have a blank plot without having the (0, 0) point there? and (2) how can I shade the corresponding region according to dir? Should I try ggplot2?
I'm simply looking to shade the area that is described by the set of inequalities above. Not where (0, 0) lies.
1) Change the last inequality to be the same direction as the others and then use plotPolytope in gMOIP.
library(gMOIP)
mat <- matrix(c(-2, 1, 1.25, 1, 0, -1), nrow = 3, byrow = TRUE)
rhs <- c(-3, 2.5, 3)
argsFaces <- list(argsGeom_polygon = list(fill = "blue"))
plotPolytope(mat, rhs, argsFaces = argsFaces)
giving (continued after image)
2) The above uses ggplot2 graphics but if you prefer classic graphics then using mat and rhs from above:
library(gMOIP)
cp <- cornerPoints(mat, rhs)
cp <- cp[chull(cp), ] # chull gives indices of convex hull in order
plot(cp, type = "n")
polygon(cp, col = "blue")
# not shown but to add lines run this too
for(i in 1:nrow(cp)) {
ix <- if (i < nrow(cp)) i + 0:1 else c(i, 1)
b <- diff(cp[ix, 2]) / (d <- diff(cp[ix, 1]))
if (abs(d) < 1e-5) abline(v = a <- cp[i, 1])
else abline(a = a <- cp[i, 2] - b * cp[i, 1], b = b)
}
giving (continued after image)
3) Note that there is an archived package named intpoint on CRAN and it could be used to draw the boundary of the feasible region and lines. It does have the limitation that it is hard coded to show X and Y axes between -1 and 5 although it might not be hard to generalize it. It is used like this (output not shown) where mat, rhs and cp are from above.
library(intpoint)
intpoint:::show2d(mat, rhs, c = numeric(2))
polygon(cp, col = "blue")

How to plot two different user defined functions in the same plot in R

I need to plot 2 different user defined function in the same R plot.
I vectorize each of them:
Vectorize creates a function wrapper that vectorizes the action of its argument FUN. Vectorize(FUN, vectorize.args = arg.names, SIMPLIFY = TRUE,USE.NAMES = TRUE)
If I plot them separately I get the correct plot, however if I try to plot both functions in the same graph, it does not work.
Here is what I did:
1) first function:
payoff_call <- function(S, K){
if(S < 0 | K < 0){
return(print("The input S and K must be > 0"))
}else{
return(max(S-K,0))
}
}
2) second function:
myBlackScholes <- function(S, K, tau, r, sigma, type = c("call", "put")) {
if(S < 0 | K < 0 | tau < 0 | sigma < 0) {
return(print("The input S , K , tau and sigma must be > 0"))
} else
{
d1 <- (log(S/K) + (r + 0.5*sigma^2)*tau)/(sigma*sqrt(tau))
d2 <- d1 - sigma*sqrt(tau)
if(type == "call"){
output <- cbind(
V_BS_Call = S*pnorm(d1) - K*exp(-r*(tau))*pnorm(d2), #fair value call
delta_call = pnorm(d1), #delta call
vega_call = S*sqrt(tau)*dnorm(d1), #vega call
theta_call = -S*dnorm(d1)*sigma/(2*sqrt(tau)) - r*K*exp(-r*tau)*pnorm(d2), #theta call
rho_call = K*tau*exp(-r*tau)*pnorm(d2), #rho call
kappa_call = -exp(-r*tau)*(pnorm(-d2)-1), #kappa call
gamma_call = dnorm(d1)/(S*sigma*sqrt(tau)))#gamma call
return(output)
} else if(type == "put"){
output <- cbind(
V_BS_Put = K*exp(-r*(tau))*pnorm(-d2) - S*pnorm(-d1), #fair value put
delta_put = pnorm(d1)-1, #delta put
vega_put = S*sqrt(tau)*dnorm(d1), #vega put same as vega call
theta_put = -S*dnorm(d1)*sigma/(2*sqrt(tau)) + r*K*exp(-r*tau)*pnorm(-d2), #theta put
rho_put = -K*tau*exp(-r*tau)*pnorm(-d2), #rho put
kappa_put = exp(-r*tau)*pnorm(-d2), #kappa put
gamma_put = dnorm(d1)/(S*sigma*sqrt(tau))) #gamma put
return(output)
} else{
return(print("Wrong type in input"))
}
}
}
3) I vectorize each function:
vect_payoff_call <- Vectorize(payoff_call)
vect_myBlackScholes <- Vectorize(myBlackScholes)
4) I plot the 2 functions, for S starting at 0 to 100:
plot(x = 0:100, y = vect_payoff_call(0:100, 50),
type="l", col="blue", lty = 1, lwd = 1,
main = "Long Call Option Payoff function", xlab = "S", ylab = expression(f(S)))
plot(x = 0:100, y = vect_myBlackScholes(0:100,50, 1, 0.12, 0.3, "call")[1,], type="l", col="green", lty = 1, lwd = 1, add=TRUE)
The first plot is correct, but the second is not.
Any suggestion?
Here is how. Note that I use ggplot2 in my example:
library(ggplot2)
x <- seq(0,2, by=0.1)
my_square <- function(x) x^2
my_cube <- function(x) x^3
my_data <- data.frame(argx = x, my_square = my_square(x),
my_cube = my_cube(x))
ggplot(my_data) +
geom_point(aes(argx, my_square, color = 'x^2')) +
geom_line(aes(argx, my_square, color = 'x^2')) +
geom_point(aes(argx, my_cube, color = 'x^3')) +
geom_line(aes(argx, my_cube, color = 'x^3')) +
theme_bw() +
labs(x = 'x', y = 'y') +
scale_color_manual(values = c('x^2' = 'red', 'x^3' = 'green'), name = 'function')
Output

BRT: Add gradient colors to interaction plots using gbm.perspec

I would like to add a gradient of colours following the fitted values (e.g. higher fitted values darker colours, lower fitted values lighter colours) in my three-dimensional dependence plots.
I have used the example presented in dismo package:
library(dismo)
data(Anguilla_train)
angaus.tc5.lr01 <- gbm.step(data=Anguilla_train, gbm.x = 3:13, gbm.y = 2,
family = "bernoulli", tree.complexity = 5, learning.rate = 0.01,
bag.fraction = 0.5)
# Find interactions in the gbm model:
find.int <- gbm.interactions( angaus.tc5.lr01)
find.int$interactions
find.int$rank.list
I have only managed to add the same colour to the whole plot:
gbm.perspec( angaus.tc5.lr01, 7, 1,
x.label = "USRainDays",
y.label = "SegSumT",
z.label = "Fitted values",
z.range=c(0,0.435),
col="blue")
Or to add a gradient colour but not following the fitted values:
gbm.perspec( angaus.tc5.lr01, 7, 1,
x.label = "USRainDays",
y.label = "SegSumT",
z.label = "Fitted values",
col=heat.colors(50),
z.range=c(0,0.435))
I also checked the code of function gbm.perspec, and If I understood correctly the fitted values are call inside the formula as "prediction", and later on are part of the "pred.matrix" that is passed to the final plotting: persp(x = x.var, y = y.var, z = pred.matrix...), but I have no managed to access them from the gbm.perspec formula. I tried to modified the gbm.perpec function by adding "col=heat.colors(100)[round(pred.matrix*100, 0)]" into the persp() inside the function, but it does not do what I am looking for:
persp(x = x.var, y = y.var, z = pred.matrix, zlim = z.range,
xlab = x.label, ylab = y.label, zlab = z.label,
theta = theta, phi = phi, r = sqrt(10), d = 3,
ticktype = ticktype,
col=heat.colors(100)[round(pred.matrix*100, 0)],
mgp = c(4, 1, 0), ...)
I believe the solution might come from modifying the gbm.perpec function, do you know how?
Thank you for your time!
Modifying the gbm.perspec function is certainly an option, although if you use the predicted values from the gbm model and plot them onto a 3D scatterplot from another package you should be able to achieve it as well.
Here's an option using the plot3Drgl package, original code was provided by #Fabrice.
library(dismo); library(plot3Drgl); library(devEMF)
data(Anguilla_train)
angaus.tc5.lr01 <- gbm.step(data=Anguilla_train, gbm.x = 3:13, gbm.y = 2,
family = "bernoulli", tree.complexity = 5, learning.rate = 0.01,
bag.fraction = 0.5)
# Find interactions in the gbm model:
find.int <- gbm.interactions( angaus.tc5.lr01)
find.int$interactions
find.int$rank.list
d<-plot(angaus.tc5.lr01,c(1,7),return.grid=T)
x <- d$SegSumT
y <- d$USRainDays
z <- d$y
grid.lines = 30
elevation.site = loess(z ~ x*y, data=d, span=1, normalize = FALSE)
x.pred <- seq(min(x), max(x), length.out = grid.lines) # x grid
y.pred <- seq(min(y), max(y), length.out = grid.lines) # y grid
xy <- expand.grid( x = x.pred, y = y.pred) # final grid combined
z.site=matrix(predict(elevation.site, newdata = xy), nrow = grid.lines, ncol = grid.lines) # predicedt matrix
scatter3D(x, y, z, theta = 160, phi = 35, # x y z coords and angle of plot
clab = c(""), # Needs moving - label legend
colkey = list(side = 4, length = 0.65,
adj.clab = 0.15, dist = -0.15, cex.clab = 0.6, cex.axis = 0.6), # change the location and length of legend, change position of label and legend
clim = c(-4,0.1),
bty = "b", # type of box
col = ramp.col(col = c("grey", "blue"), 200),
pch = 19, cex = 0.55, # shape and size of points
xlab = "SegSumT",
xlim=c(10,20),ylim=c(0,3.5), zlim=c(-4,0.1), d= 2,
ylab = "USRaindays",
zlab= "Fitted values", #axes labels
cex.lab = 0.8, font.lab = 1, cex.axis = 0.6, font.axis= 1, # size and font of axes and ticks
ticktype = "detailed", nticks = 5, # ticks and numer of ticks
#type = "h", # vertical lines
surf = list(x = x.pred, y = y.pred, z = z.site,
facets = NA, CI=NULL))
enter image description here
By tweaking with grid.lines and reversing the x axis you should be able to produce exactly what you want.
By incorporating some of the code found here into the gbm.perspec() source code you can create the desired effect.
First run
# Color palette (100 colors)
col.pal<-colorRampPalette(c("blue", "red"))
colors<-col.pal(100)
Then, add z.facet.center to gbm.perspec() source code after else and change the z in the code to pred.matrixas follows,
# and finally plot the result
#
if (!perspective) {
image(x = x.var, y = y.var, z = pred.matrix, zlim = z.range)
} else {
z.facet.center <- (pred.matrix[-1, -1] + pred.matrix[-1, -ncol(pred.matrix)] +
pred.matrix[-nrow(pred.matrix), -1] + pred.matrix[-nrow(pred.matrix), -ncol(pred.matrix)])/4
# Range of the facet center on a 100-scale (number of colors)
z.facet.range<-cut(z.facet.center, 100)
persp(x=x.var, y=y.var, z=pred.matrix, zlim= z.range, # input vars
xlab = x.label, ylab = y.label, zlab = z.label, # labels
theta=theta, phi=phi, r = sqrt(10), d = 3,
col=colors[z.facet.range],# viewing pars
ticktype = ticktype, mgp = c(4,1,0), ...) #
which will give you a plot like this (please note, this is not plotted using the sample dataset which is why the interaction effect is different than the plot in the question).
Alternatively, you can create a new function. The following example modifies gbm.perspec() to give a white-to-red gradient. Simply run the code in R, then change gbm.perspec() to gbm.perspec2()
# interaction function
# Color palette (100 colors)
col.pal<-colorRampPalette(c("white", "pink", "red"))
colors<-col.pal(100)
gbm.perspec2 <- function(gbm.object,
x = 1, # the first variable to be plotted
y = 2, # the second variable to be plotted
pred.means = NULL, # allows specification of values for other variables
x.label = NULL, # allows manual specification of the x label
x.range = NULL, # manual range specification for the x variable
y.label = NULL, # and y la seminar committeebel
z.label = "fitted value", #default z label
y.range = NULL, # and the y
z.range = NULL, # allows control of the vertical axis
leg.coords = NULL, #can specify coords (x, y) for legend
ticktype = "detailed",# specifiy detailed types - otherwise "simple"
theta = 55, # rotation
phi=40, # and elevation
smooth = "none", # controls smoothing of the predicted surface
mask = FALSE, # controls masking using a sample intensity model
perspective = TRUE, # controls whether a contour or perspective plot is drawn
...) # allows the passing of additional arguments to plotting routine
# useful options include shade, ltheta, lphi for controlling illumination
# and cex for controlling text size - cex.axis and cex.lab have no effect
{
if (! requireNamespace('gbm') ) { stop('you need to install the gbm package to use this function') }
requireNamespace('splines')
#get the boosting model details
gbm.call <- gbm.object$gbm.call
gbm.x <- gbm.call$gbm.x
n.preds <- length(gbm.x)
gbm.y <- gbm.call$gbm.y
pred.names <- gbm.call$predictor.names
family = gbm.call$family
# and now set up range variables for the x and y preds
have.factor <- FALSE
x.name <- gbm.call$predictor.names[x]
if (is.null(x.label)) {
x.label <- gbm.call$predictor.names[x]
}
y.name <- gbm.call$predictor.names[y]
if (is.null(y.label)) {
y.label <- gbm.call$predictor.names[y]
}
data <- gbm.call$dataframe[ , gbm.x, drop=FALSE]
n.trees <- gbm.call$best.trees
# if marginal variable is a vector then create intervals along the range
if (is.vector(data[,x])) {
if (is.null(x.range)) {
x.var <- seq(min(data[,x],na.rm=T),max(data[,x],na.rm=T),length = 50)
} else {
x.var <- seq(x.range[1],x.range[2],length = 50)
}
} else {
x.var <- names(table(data[,x]))
have.factor <- TRUE
}
if (is.vector(data[,y])) {
if (is.null(y.range)) {
y.var <- seq(min(data[,y],na.rm=T),max(data[,y],na.rm=T),length = 50)
} else {y.var <- seq(y.range[1],y.range[2],length = 50)}
} else {
y.var <- names(table(data[,y]))
if (have.factor) { #check that we don't already have a factor
stop("at least one marginal predictor must be a vector!")
} else {have.factor <- TRUE}
}
pred.frame <- expand.grid(list(x.var,y.var))
names(pred.frame) <- c(x.name,y.name)
pred.rows <- nrow(pred.frame)
#make sure that the factor variable comes first
if (have.factor) {
if (is.factor(pred.frame[,2])) { # swap them about
pred.frame <- pred.frame[,c(2,1)]
x.var <- y.var
}
}
j <- 3
# cycle through the predictors
# if a non-target variable find the mean
for (i in 1:n.preds) {
if (i != x & i != y) {
if (is.vector(data[,i])) {
m <- match(pred.names[i],names(pred.means))
if (is.na(m)) {
pred.frame[,j] <- mean(data[,i],na.rm=T)
} else pred.frame[,j] <- pred.means[m]
}
if (is.factor(data[,i])) {
m <- match(pred.names[i],names(pred.means))
temp.table <- table(data[,i])
if (is.na(m)) {
pred.frame[,j] <- rep(names(temp.table)[2],pred.rows)
} else {
pred.frame[,j] <- pred.means[m]
}
pred.frame[,j] <- factor(pred.frame[,j],levels=names(temp.table))
}
names(pred.frame)[j] <- pred.names[i]
j <- j + 1
}
}
#
# form the prediction
#
#assign("pred.frame", pred.frame, pos=1)
prediction <- gbm::predict.gbm(gbm.object,pred.frame,n.trees = n.trees, type="response")
#assign("prediction", prediction, pos=1, immediate =T)
# model smooth if required
if (smooth == "model") {
pred.glm <- glm(prediction ~ ns(pred.frame[,1], df = 8) * ns(pred.frame[,2], df = 8), data=pred.frame,family=poisson)
prediction <- fitted(pred.glm)
}
# report the maximum value and set up realistic ranges for z
max.pred <- max(prediction)
message("maximum value = ",round(max.pred,2),"\n")
if (is.null(z.range)) {
if (family == "bernoulli") {
z.range <- c(0,1)
} else if (family == "poisson") {
z.range <- c(0,max.pred * 1.1)
} else {
z.min <- min(data[,y],na.rm=T)
z.max <- max(data[,y],na.rm=T)
z.delta <- z.max - z.min
z.range <- c(z.min - (1.1 * z.delta), z.max + (1.1 * z.delta))
}
}
# now process assuming both x and y are vectors
if (have.factor == FALSE) {
# form the matrix
pred.matrix <- matrix(prediction,ncol=50,nrow=50)
# kernel smooth if required
if (smooth == "average") { #apply a 3 x 3 smoothing average
pred.matrix.smooth <- pred.matrix
for (i in 2:49) {
for (j in 2:49) {
pred.matrix.smooth[i,j] <- mean(pred.matrix[c((i-1):(i+1)),c((j-1):(j+1))])
}
}
pred.matrix <- pred.matrix.smooth
}
# mask out values inside hyper-rectangle but outside of sample space
if (mask) {
mask.trees <- gbm.object$gbm.call$best.trees
point.prob <- gbm::predict.gbm(gbm.object[[1]],pred.frame, n.trees = mask.trees, type="response")
point.prob <- matrix(point.prob,ncol=50,nrow=50)
pred.matrix[point.prob < 0.5] <- 0.0
}
#
# and finally plot the result
#
if (!perspective) {
image(x = x.var, y = y.var, z = pred.matrix, zlim = z.range)
} else {
z.facet.center <- (pred.matrix[-1, -1] + pred.matrix[-1, -ncol(pred.matrix)] +
pred.matrix[-nrow(pred.matrix), -1] + pred.matrix[-nrow(pred.matrix), -ncol(pred.matrix)])/4
# Range of the facet center on a 100-scale (number of colors)
z.facet.range<-cut(z.facet.center, 100)
persp(x=x.var, y=y.var, z=pred.matrix, zlim= z.range, # input vars
xlab = x.label, ylab = y.label, zlab = z.label, # labels
theta=theta, phi=phi, r = sqrt(10), d = 3,
col=colors[z.facet.range],# viewing pars
ticktype = ticktype, mgp = c(4,1,0), ...) #
}
}
if (have.factor) {
# we need to plot values of y for each x
factor.list <- names(table(pred.frame[,1]))
n <- 1
#add this bit so z.range still works as expected:
if (is.null(z.range)) {
vert.limits <- c(0, max.pred * 1.1)
} else {
vert.limits <- z.range
}
plot(pred.frame[pred.frame[,1]==factor.list[1],2],
prediction[pred.frame[,1]==factor.list[1]],
type = 'l',
#ylim = c(0, max.pred * 1.1),
ylim = vert.limits,
xlab = y.label,
ylab = z.label, ...)
for (i in 2:length(factor.list)) {
#factor.level in factor.list) {
factor.level <- factor.list[i]
lines(pred.frame[pred.frame[,1]==factor.level,2],
prediction[pred.frame[,1]==factor.level], lty = i)
}
# now draw a legend
if(is.null(leg.coords)){
x.max <- max(pred.frame[,2])
x.min <- min(pred.frame[,2])
x.range <- x.max - x.min
x.pos <- c(x.min + (0.02 * x.range),x.min + (0.3 * x.range))
y.max <- max(prediction)
y.min <- min(prediction)
y.range <- y.max - y.min
y.pos <- c(y.min + (0.8 * y.range),y.min + (0.95 * y.range))
legend(x = x.pos, y = y.pos, factor.list, lty = c(1:length(factor.list)), bty = "n")
} else {
legend(x = leg.coords[1], y = leg.coords[2], factor.list, lty = c(1:length(factor.list)), bty = "n", ncol = 2)
}
}
}

Connecting two points with curved lines (s-ish curve) in R

Suppose I would like to produce a kind of tree structure like the one below:
plot(0, type="n",xlim=c(0, 5), ylim=c(-3, 8), axes=FALSE, xlab="", ylab="", main="")
points(1, 2.5)
points(3, 5)
points(3, 0)
lines(c(1, 3), c(2.5, 5))
lines(c(1, 3), c(2.5, 0))
text(1, 2.5, adj=1, label="Parent ")
text(3, 5, adj=0, label=" Child 1")
text(3, 0, adj=0, label=" Child 2")
I wonder if there is a way in R where we can produce curved lines that resemble varying degrees of a S-curve like the ones below. Crucially it would be great if it would be possible to create such lines without resorting to ggplot.
EDIT removed and made into an answer
Following #thelatemail's suggestion, I decided to make my edit into an answer. My solution is based on #thelatemail's answer.
I wrote a small function to draw curves, which makes use of the logistic function:
#Create the function
curveMaker <- function(x1, y1, x2, y2, ...){
curve( plogis( x, scale = 0.08, loc = (x1 + x2) /2 ) * (y2-y1) + y1,
x1, x2, add = TRUE, ...)
}
A working example is below. In this example, I want to create a plot for a taxonomy with 3 levels: parent --> 2 children -- > 20 grandchildren. One child has 12 grandchildren, and the other child has 8 children.
#Prepare data:
parent <- c(1, 16)
children <- cbind(2, c(8, 28))
grandchildren <- cbind(3, (1:20)*2-1)
labels <- c("Parent ", paste("Child ", 1:2), paste(" Grandchild", 1:20) )
#Make a blank plot canvas
plot(0, type="n", ann = FALSE, xlim = c( 0.5, 3.5 ), ylim = c( 0.5, 39.5 ), axes = FALSE )
#Plot curves
#Parent and children
invisible( mapply( curveMaker,
x1 = parent[ 1 ],
y1 = parent[ 2 ],
x2 = children[ , 1 ],
y2 = children[ , 2 ],
col = gray( 0.6, alpha = 0.6 ), lwd = 1.5 ) )
#Children and grandchildren
invisible( mapply( curveMaker,
x1 = children[ 1, 1 ],
y1 = children[ 1, 2 ],
x2 = grandchildren[ 1:8 , 1 ],
y2 = grandchildren[ 1:8, 2 ],
col = gray( 0.6, alpha = 0.6 ), lwd = 1.5 ) )
invisible( mapply( curveMaker,
x1 = children[ 2, 1 ],
y1 = children[ 2, 2 ],
x2 = grandchildren[ 9:20 , 1 ],
y2 = grandchildren[ 9:20, 2 ],
col = gray( 0.6, alpha = 0.6 ), lwd = 1.5 ) )
#Plot text
text( x = c(parent[1], children[,1], grandchildren[,1]),
y = c(parent[2], children[,2], grandchildren[,2]),
labels = labels,
pos = rep(c(2, 4), c(3, 20) ) )
#Plot points
points( x = c(parent[1], children[,1], grandchildren[,1]),
y = c(parent[2], children[,2], grandchildren[,2]),
pch = 21, bg = "white", col="#3182bd", lwd=2.5, cex=1)
Sounds like a sigmoid curve, e.g.:
f <- function(x,s) s/(1 + exp(-x))
curve(f(x,s=1),xlim=c(-4,4))
curve(f(x,s=0.9),xlim=c(-4,4),add=TRUE)
curve(f(x,s=0.8),xlim=c(-4,4),add=TRUE)
curve(f(x,s=0.7),xlim=c(-4,4),add=TRUE)
Result:
You can start to adapt this, e.g. here's a clunky bit of code:
plot(NA,type="n",ann=FALSE,axes=FALSE,xlim=c(-6,6),ylim=c(0,1))
curve(f(x,s=1),xlim=c(-4,4),add=TRUE)
curve(f(x,s=0.8),xlim=c(-4,4),add=TRUE)
curve(f(x,s=0.6),xlim=c(-4,4),add=TRUE)
text(
c(-4,rep(4,3)),
c(0,f(c(4),c(1,0.8,0.6))),
labels=c("Parent","Kid 1","Kid 2","Kid 3"),
pos=c(2,4,4,4)
)
Result:
I think Paul Murrell has a document illustrating similar diagrams in grid. Here's a basic example,
library(grid)
labelGrob <- function(x,y,label, ...){
t <- textGrob(x,y,label=label)
w <- convertWidth(1.5*grobWidth(t), "npc", valueOnly = TRUE)
h <- convertHeight(1.5*grobHeight(t), "npc", valueOnly = TRUE)
gTree(cl = "label", west = unit(x-0.5*w, "npc"),
east = unit(x+0.5*w, "npc"),
children=gList(t, roundrectGrob(x=x, y=y, gp=gpar(fill=NA),
width=w, height=h)))
}
xDetails.label <- function(x, theta){
if(theta == 180) return(x$west[1]) else
if(theta == 0) return(x$east[1]) else
xDetails(x$children[[1]], theta)
}
yDetails.label <- function(x, theta){
if(theta %in% c("west", "east")) return(x$y) else
yDetails(x$children[[1]], theta)
}
lab1 <- labelGrob(0.1, 0.5, "start")
lab2 <- labelGrob(0.6, 0.75, "end")
grid.newpage()
grid.draw(lab1)
grid.draw(lab2)
grid.curve(grobX(lab1, "east"), grobY(lab1, "east"),
grobX(lab2, "west"), grobY(lab2, "west"),
inflect = TRUE, curvature=0.1)

R: textplot() how change the colour of points?

require(wordcloud)
textplot(loc[,1],loc[,2],states)
Gives me this
http://blog.fellstat.com/wp-content/uploads/2012/09/blog_text2.png
I want to change the colour of the red dots.
textplot(loc[,1],loc[,2],states, col="blue")
This changes the colour of the text.
Somwehre in the texplot function is the code for points:
> textplot
function (x, y, words, cex = 1, new = TRUE, show.lines = TRUE,
....
points(x[i], y[i], pch = 16, col = "red", cex = 0.5)
....
<environment: namespace:wordcloud>
But does this mean it's fixed to red, pch 16 and cex 0.5? I changed the par to par(col="blue") and it still shows the red dots.
It is unfortunately hard coded into the function. You can just modify the function somewhere in your scripts to implement this.
library(wordcloud)
dat <- sweep(USArrests, 2, colMeans(USArrests))
dat <- sweep(dat, 2, sqrt(diag(var(dat))),"/")
loc <- cmdscale(dist(dat))
textplot2 <- function(x,
y,
words,
cex = 1,
pch = 16,
pointcolor = "red",
new = TRUE,
show.lines=TRUE,
...){
if(new)
plot(x,y,type="n",...)
lay <- wordlayout(x,y,words,cex,...)
if(show.lines){
for(i in 1:length(x)){
xl <- lay[i,1]
yl <- lay[i,2]
w <- lay[i,3]
h <- lay[i,4]
if(x[i]<xl || x[i]>xl+w ||
y[i]<yl || y[i]>yl+h){
points(x[i],y[i],pch= pch,col= pointcolor,cex= .5)
nx <- xl+.5*w
ny <- yl+.5*h
lines(c(x[i],nx),c(y[i],ny),col="grey")
}
}
}
text(lay[,1]+.5*lay[,3],lay[,2]+.5*lay[,4],words,cex = cex,...)
}
Then call the new textplot2
textplot2(loc[,1],loc[,2],rownames(loc),pch = 16, pointcolor = "blue")

Resources