I would like to create a graph in R looking like this:
It is a so-called triaxial ratio diagram to display ratios of plant nutrient contents. It needs a log-scale from 0.01 to 100, the axes cross at 1. I have found two scripts on this page, however, they had another purpose and don't really fit my needs. Here is one:
get.coords <- function(a, d, x0, y0) {
a <- ifelse(a <= 90,90 - a, 450 - a)
data.frame(x = x0 + d * cos(a / 180 * pi),
y = y0+ d * sin(a / 180 * pi))
}
rotatedAxis <- function(x0, y0, a, d, symmetrical=FALSE, tickdist,
ticklen,...) {
if(isTRUE(symmetrical)) {
axends <- get.coords(c(a, a + 180), d, x0, y0)
tick.d <- c(seq(0, d, tickdist), seq(-tickdist, -d, -tickdist))
} else {
axends <- rbind(get.coords(a, d, x0, y0), c(x0, y0))
tick.d <- seq(0, d, tickdist)
}
invisible(lapply(apply(get.coords(a, d=tick.d, x0, y0), 1, function(x) {
get.coords(a + 90, c(-ticklen, ticklen), x[1], x[2])
}), function(x) lines(x$x, x$y, ...)))
lines(axends$x, axends$y, ...)
}
plot.new()
plot.window(xlim=c(-70, 70), ylim=c(-70, 70), asp=1)
# Plot the rotated axes -original
rotatedAxis(0, 0, a=60, d=60,symmetrical=TRUE, tickdist=10, ticklen=1)
rotatedAxis(0, 0, a=120, d=60, symmetrical=TRUE, tickdist=10, ticklen=1)
rotatedAxis(0, 0, a=180, d=60, symmetrical=TRUE, tickdist=10, ticklen=1)
# Add text labels to circumference -original
text.coords <- get.coords(seq(0, 300, 60), 65, 0, 0)
text(text.coords$x, text.coords$y, c('I', 'A', 'S', 'E', 'C', 'R'))
points(0, 0, pch=21, bg=1, col=0, lwd=2, cex=2)
This code creates the rotated axis, but not the log-scales and plotting points to the diagram is only possible via xy-coordinates, not xyz.
It would be great if someone could help me, the original paper doesn't offer description of the method and I haven't found anything helpful on the internet.
Many Thanks!
This code should give you a plot close to what you desire. I used it with some example data to create the plot given below. The code uses base graphics, so it should be simple to modify. The functions create a plot with log axes from 0.01 to 100.
Because the plotted values are ratios, the only two of them are independent. The third ratio can be calculated from the other two, so only specifying two ratios is required.
I didn't find much on the underlying calculations for the plot either. This code assumes that the ratios for each point are read by tracing the point perpendicular to the axes. For example, the point at 10 on Ca/Mg will not be about 0.3 for each, not 1.
makeAxes <- function(a3, # angles of axes, assumed at 120-degree separation, value indicates positive side from 3-o'clock
d, # length of half-axis in log10-units, 2 => 0.01-100
axLab, # labels for axes
cen=c(0,0), # center of axis
ticks=T, # should tick marks be included
tickLocs=c((1:9)*10^-2,(1:9)*10^-1,(1:9),(1:9)*10^1,100), # location of tick marks
tickLen=0.05, # dimension of tick marks
endText=T # are the values at the end of axes plotted
){
# making an empty plot
plot(NA,NA
,ylim=c(-d,d)*1.5 + cen[1]
,xlim=c(-d,d)*1.5 + cen[2]
,bty='n'
,axes = F
,ylab=''
,xlab='')
# adding axis lines and notations
for(i in 1:3){
# coordinates of x and y ends of axes
# col1 = x values
matTemp <- matrix(0,2,2)
matTemp[,1] <- c(cen[1] + d*cos(a3[i]*pi/180),cen[1] - d*cos(a3[i]*pi/180))
matTemp[,2] <- c(cen[2] + d*sin(a3[i]*pi/180),cen[2] - d*sin(a3[i]*pi/180))
# plotting lines
lines(matTemp[,1],matTemp[,2])
# adding text for axes
text(x=matTemp[1,1]*1.4,y=matTemp[1,2]*1.4,
labels=axLab[i])
if(endText){
text(x=matTemp[,1]*1.1,
y=matTemp[,2]*1.1,
labels=c(10^d,10^-d),
cex=0.8
)
text(cen[1],cen[2],
lab=1,
pos=4,
cex=0.8
)
}
# adding tick marks
if(ticks){
# values are the point on the axis plus a correction to make it perpendicular
xVals1 <- cen[1] + log10(tickLocs)*cos(a3[i]*pi/180) - tickLen*sin(a3[i]*pi/180)
yVals1 <- cen[2] + log10(tickLocs)*sin(a3[i]*pi/180) - tickLen*cos(a3[i]*pi/180)
xVals2 <- cen[1] + log10(tickLocs)*cos(a3[i]*pi/180) + tickLen*sin(a3[i]*pi/180)
yVals2 <- cen[2] + log10(tickLocs)*sin(a3[i]*pi/180) + tickLen*cos(a3[i]*pi/180)
# plotting lines as tick marks
for(j in 1:length(xVals1)){
lines(c(xVals1[j],xVals2[j]),c(yVals2[j],yVals1[j]))
}
}
}
}
addPoints <- function(df, # data frame of values, must have axis columns
cen=c(0,0), # center of axis
a3 # angles of axes, assumed at 120-degree separation, value indicates positive side from 3-o'clock
){
# temporary data frame for calculations
dftemp <- df
# calculation of log values
dftemp$axis1log <- log10(dftemp$axis1)
dftemp$axis2log <- log10(dftemp$axis2)
dftemp$axis3log <- log10(dftemp$axis3)
# calculation of values along axis 1
dftemp$axis1log_xvals <- cen[1] + dftemp$axis1log*cos(a3[1]*pi/180)
dftemp$axis1log_yvals <- cen[2] + dftemp$axis1log*sin(a3[1]*pi/180)
# calculation of values along axis 2
dftemp$axis2log_xvals <- cen[1] + dftemp$axis2log*cos(a3[2]*pi/180)
dftemp$axis2log_yvals <- cen[2] + dftemp$axis2log*sin(a3[2]*pi/180)
# slope calcuation
dftemp$axis1_perp <- sin((a3[1]+90)*pi/180)/cos((a3[1]+90)*pi/180)
dftemp$axis2_perp <- sin((a3[2]+90)*pi/180)/cos((a3[2]+90)*pi/180)
# intersection points
# y1 = ax1 + c
# y2 = bx2 + d
a <- dftemp$axis1_perp
b <- dftemp$axis2_perp
c <- dftemp$axis1log_yvals - dftemp$axis1_perp*dftemp$axis1log_xvals
d <- dftemp$axis2log_yvals - dftemp$axis2_perp*dftemp$axis2log_xvals
dftemp$intersectx <- (d-c)/(a-b)
dftemp$intersecty <- a*(d-c)/(a-b)+c
points(dftemp$intersectx
,dftemp$intersecty
,pch=19 # plotting symbol
,col='gray48' # colors of points
,cex=1.2 # multiplier for plotting symbols
)
}
# example data
labs <- c("a","b","c","d","e","f","g")
k <- c(150, 100, 3000, 3500, 10, 10, 30)
ca <- c(500, 120, 350, 100, 10, 10, 30)
mg <- c(50, 450, 220, 350, 10, 100, 60)
# conversion into data frame
dataPlot <- as.data.frame(list('labs'=labs,'k'=k,'ca'=ca,'mg'=mg)
# calcuations with data frame
dataPlot$axis1 <- dataPlot$ca/dataPlot$mg
dataPlot$axis2 <- dataPlot$k/dataPlot$ca
dataPlot$axis3 <- dataPlot$mg/dataPlot$k
# making axes
makeAxes(a3=c(90,-30,210),
d=2,
axLab=c("Ca/Mg", "K/Ca", "Mg/K" ))
# adding points
addPoints(df=dataPlot,
a3=c(90,-30,210))
Related
I generate 4 parts of big data: cluster1(10000 points), cluster2(15000 points), cluster3(15000 points) and throws(500 points). Here is the code:
library('MASS')
library('fpc')
#library("dbscan")
library("factoextra")
library("clustertend")
library("boot")
library("stream")
set.seed(123)
mu1<-c(-5,-7)
mu1
sigma1<-matrix(c(4,-2,-2,2), nrow=2, ncol=2, byrow = TRUE)
sigma1
n<-10000
cluster1<-mvrnorm(n,mu1,sigma1)
cluster1
#cluster1<-as.data.frame(cluster1)
#cluster1
#c<-runif(10000,1,1000)
#c
phi <- runif(15000, max = 2*pi)
rho <- sqrt(runif(15000))
x <- sqrt(5)*rho*cos(phi) + 6
y <- sqrt(10/3)*rho*sin(phi) + 4
range(2*(x - 6)^2 + 3*(y - 4)^2)
#[1] 0.001536582 9.999425234
plot(x, y)
cluster2<-cbind(x,y)
cluster2
u <- runif(15000, max = 3)
v <- runif(15000, max = 2)
x <- u + v - 10
y <- v - u + 8
range(x + y)
#[1] -1.999774 1.999826
range(x - y + 15)
#[1] -2.999646 2.999692
plot(x, y)
cluster3<-cbind(x,y)
cluster3
#cluster3<-as.data.frame(cluster1)
#cluster3
x <- runif(500, -20, 20)
y <- runif(500, -20, 20)
#u <- runif(500, max = 20)
#v <- runif(500, max = 20)
#x <- u + v - 20
#y <- v - u
range(x)
range(y)
plot(x,y)
throws<-cbind(x,y)
throws
data<-rbind(cluster1,cluster2,cluster3,throws)
data<-as.data.frame(data)
data
plot(data)
Then I try by using the bootstrap method, construct a distribution of H statistics for some
fixed m, which is from 7% of the total number of generated points(m=2835). Here is th code where I do this:
B<-10#number of iterations
H<-NULL#value of Hopkins statistic
for(i in 1:B){
N<-dim(data)[1]
s<-sample(N,0.8*N)
stat<-hopkins(data[s,], n=2835, byrow = TRUE)$H
H[i]<-stat
#print(c(i, stat))
}
It takes very to generate. Then I should to compare this result with beta distribution - B(m,m). Here is the code:
hist(H)
#(density(H), col="red")
#hist(distB)
X<-seq(min(H), max(H), 0.001)
X
lines(X, dbeta(X,2835,2835), type="l", col="red")
The problem is that lined doesn't draw on hist. Can anybody say what is the problem? Here is the image, I see red line, but it's not exactly right.
Your y-axis values plotted by dbeta() are way too low to register on the supplied y-axis (<0.0000001). You need to overlay the second plot:
# sample data
H <- sample(seq(0.455,0.475,0.001), 1000, replace = TRUE)
#plot histogram
hist(H)
# prepare graphics to add second plot
par(new = TRUE)
# sample data for second plot
X <- seq(0.455,0.475, 0.001)
Y <- dbeta(X,2835,2835)
# plot second plot, remove axes
plot(X, dbeta(X,2835,2835), type="l", col="red", axes = FALSE)
axis(4, Y) # add axis on right side
f(x,y)= (1/25)*(20-x)/x 10<x<20, x/2 <y <x
0 o.t
I have to create this image through this expression.
but
x <- seq(10, 20, length=20)
y <- seq(10, 20, length=20)
f <- function(x,y){(1/25)*(20-x)/5}
z <- outer(x,y,f)
persp(x,y,z,theta=30,phi=30, expand=0.5,col=rainbow(19), border=NA)
what is wrong?
You should mask z based on the constraint. As a suggestion, you can use an amazing interactive rgl package in R.
#source: https://stackoverflow.com/questions/50079316/plot3d-how-to-change-z-axis-surface-color-to-heat-map-color
map2color <- function(x, pal, limits = range(x,na.rm=T)){
pal[findInterval(x, seq(limits[1], limits[2], length.out = length(pal) + 1),
all.inside=TRUE)]
}
x <- seq(10, 20, length=20)
y <- seq(10, 20, length=20)
mask <- sapply(x,function(m) sapply(y,function(n) if((n>m/2)&(n<m)){(1/25)*(20-m)/5}else{ NA }))
z <- outer(x,y,f)
z <- z * mask
#persp(x,y,z, col= map2color(z, rainbow(100)),border = NA)
library(rgl)
persp3d(x,y,z,col = map2color(z, rainbow(100)),theta=30,phi=30)
#SRhm's answer is probably the best choice, but if you want to live on the bleeding edge, you can get rid of the jagged diagonal edge using a development version of rgl (from R-forge), at least version 0.100.8.
This version supports triangulations with boundaries using the tripack package. So you set up a grid of values over the x-y range, then define the boundaries of the region using the equations, and you get smooth edges. For example:
library(tripack)
library(rgl)
g <- expand.grid(x=10:20, y=5:20)
keep <- with(g, 10 < x & x < 20 & x/2 < y & y < x)
g2 <- g[keep,]
tri <- tri.mesh(g2)
# Set up boundary constraints
cx <- c(10:20, 20: 10)
cy <- c(seq(5, 10, len=11), 20:10)
tri2 <- add.constraint(tri, cx, cy, reverse = TRUE)
# This isn't necessary, but shows where the formula will be evaluated
plot(tri2)
It might be better to fill in some of the left and right edges with more points to avoid those big triangles,
but skip that for now.
z <- with(tri2, (1/25)*(20-x)/x)
# Now plot it, using the map2color function #SRhm found:
#source: https://stackoverflow.com/questions/50079316/plot3d-how-to-change-z-axis-surface-color-to-heat-map-color
map2color <- function(x, pal, limits = range(x,na.rm=T)){
pal[findInterval(x, seq(limits[1], limits[2], length.out = length(pal) + 1),
all.inside=TRUE)]
}
persp3d(tri2, z, col = map2color(z, rainbow(100)))
After rotation, you get this view:
I have a bunch of datasets of x’s and y’s. For each dataset, I plot points (x, y) in R. And the resulting plots are generally similar to either type A or type B. Type B has an intersection while type A doesn’t have.
My question: Given a new dataset, is it possible to calculate (in R) the red shaded area under the curve as indicated in type A and type B plot without knowing the visualization?
The main challenges are:
1) How to determine whether the dataset will generate type A or type B in R?
2) How to compute the red shaded area in type B using the dataset with R?
Here is the code producing the dataset that generated type B curve.
set.seed(300)
predicted_value_A = c(rbeta(300, 9, 2), rbeta(700, 2, 4), rbeta(10000, 2, 4))
predicted_value_B = c(rbeta(1000, 4, 3), rbeta(10000, 2, 3))
real_value = c(rep(1, 1000), rep(0, 10000))
library(ROCR)
library(ggplot2)
predB <- prediction(predicted_value_B, real_value)
perfB <- performance(predB, measure = "mat", x.measure = "f")
yB <- attr(perfB, "y.values")[[1]]
yB <- (yB + 1)/2
xB <- attr(perfB, "x.values")[[1]]
# dataset that generates type B curve
dfB <- data.frame(X = xB, Y= yB)
ggplot(df, aes(x=X, y=Y, ymin=0, ymax=1, xmin=0, xmax=1 )) + geom_point(size = 0.2, shape = 21, fill="white")+
ggtitle("Type B curve") +
theme(plot.title=element_text(hjust=0.5))
Here is a bit of code to shade the plot from a set of (x,y) points using an approximation with small rectangles. This assumes evenly spaced x values, and enough that the rectangular approximation works well.
# sample dataset
x <- seq(0,2,length.out=1000)
y1 <- x
y2 <- sin(x*pi)+x
# plot
plot(x,y1,type='l',ylab='y')
lines(x,y2)
# shade the plot
## not efficient but works
dx <- x[2]-x[1]
area <- 0
# shade plot and calculate area
## uses a rectangular strip approximation
## assumes even spacing in x. Could also calculate the dx in each step if it changes
for (i in 1:(length(x))) {
if (y1[i] < y2[i]) {
cord.x <- c(x[i]-dx/2,x[i]-dx/2,x[i]+dx/2,x[i]+dx/2)
cord.y <- c(y1[i],y2[i],y2[i],y1[i])
} else {
cord.x <- c(x[i]-dx/2,x[i]-dx/2,x[i]+dx/2,x[i]+dx/2)
cord.y <- c(y2[i],y1[i],y1[i],y2[i])
}
# draw the polygons
polygon(cord.x, cord.y, col = 'pink', border = NA)
# sum to the area
area <- area + abs(y2[i]-y1[i])*dx
}
area
sample shaded plot by rectangular approximation
I would like to achieve that the points I add to the plot have their size adjusted to obtain a better 3D impression. I know that I somehow have to use the transformation matrix that is returned to compute the length of the vector orthogonal to the 2d plane to the respective point in 3d, but I don't know how to do that.
Here is an example:
x1 <- rnorm(100)
x2 <- 4 + rpois(100, 4)
y <- 0.1*x1 + 0.2*x2 + rnorm(100)
dat <- data.frame(x1, x2, y)
m1 <- lm(y ~ x1 + x2, data=dat)
x1r <- range(dat$x1)
x1seq <- seq(x1r[1], x1r[2], length=30)
x2r <- range(dat$x2)
x2seq <- seq(x2r[1], x2r[2], length=30)
z <- outer(x1seq, x2seq, function(a,b){
predict(m1, newdata=data.frame(x1=a, x2=b))
})
res <- persp(x1seq, x2seq, z)
mypoints <- trans3d(dat$x1, dat$x2, dat$y, pmat=res)
points(mypoints, pch=1, col="red")
You can use the function presented here to determine distance to the observer, then scale the pointsize (cex) to that distance:
# volcano data
z <- 2 * volcano # Exaggerate the relief
x <- 10 * (1:nrow(z)) # 10 meter spacing (S to N)
y <- 10 * (1:ncol(z)) # 10 meter spacing (E to W)
# draw volcano and store transformation matrix
pmat <- persp(x, y, z, theta = 35, phi = 40, col = 'green4', scale = FALSE,
ltheta = -120, shade = 0.75, border = NA, box = TRUE)
# take some xyz values from the matrix
s = sample(1:prod(dim(z)), size=500)
xx = x[row(z)[s] ]
yy = y[col(z)[s]]
zz = z[s] + 10
# depth calculation function (adapted from Duncan Murdoch at https://stat.ethz.ch/pipermail/r-help/2005-September/079241.html)
depth3d <- function(x,y,z, pmat, minsize=0.2, maxsize=2) {
# determine depth of each point from xyz and transformation matrix pmat
tr <- as.matrix(cbind(x, y, z, 1)) %*% pmat
tr <- tr[,3]/tr[,4]
# scale depth to point sizes between minsize and maxsize
psize <- ((tr-min(tr) ) * (maxsize-minsize)) / (max(tr)-min(tr)) + minsize
return(psize)
}
# determine distance to eye
psize = depth3d(xx,yy,zz,pmat,minsize=0.1, maxsize = 1)
# from 3D to 2D coordinates
mypoints <- trans3d(xx, yy, zz, pmat=pmat)
# plot in 2D space with pointsize related to distance
points(mypoints, pch=8, cex=psize, col=4)
How can I plot a fanchart as displayed on this Wikipedia page?
I have installed the nlme package with its MathAchieve dataset, but I cannot find the commands for plotting this graph.
The nlme pdf file is here.
I also checked this link but it is non-english.
With the fan.plot function from the plotrix package, I could only draw pie charts:
https://sites.google.com/site/distantyetneversoclose/excel-charts/the-pie-doughnut-combination-a-fan-plot
Thanks for your help.
Having thought about this a bit more since my previous answer, I've come up with a simpler way of producing multipanel (if appropriate) fanplots, overlaid on a levelplot, as shown in the Wikipedia Fan chart page. This approach works with a data.frame that has two independent variables and zero or more conditioning variables that separate data into panels.
First we define a new panel function, panel.fanplot.
panel.fanplot <- function(x, y, z, zmin, zmax, subscripts, groups,
nmax=max(tapply(z, list(x, y, groups),
function(x) sum(!is.na(x))), na.rm=T),
...) {
if(missing(zmin)) zmin <- min(z, na.rm=TRUE)
if(missing(zmin)) zmax <- max(z, na.rm=TRUE)
get.coords <- function(a, d, x0, y0) {
a <- ifelse(a <= 90, 90 - a, 450 - a)
data.frame(x = x0 + d * cos(a / 180 * pi),
y = y0 + d * sin(a / 180 * pi))
}
z.scld <- (z - zmin)/(zmax - zmin) * 360
fan <- aggregate(list(z=z.scld[subscripts]),
list(x=x[subscripts], y=y[subscripts]),
function(x)
c(n=sum(!is.na(x)),
quantile(x, c(0.25, 0.5, 0.75), na.rm=TRUE) - 90))
panel.levelplot(fan$x, fan$y,
(fan$z[, '50%'] + 90) / 360 * (zmax - zmin) + zmin,
subscripts=seq_along(fan$x), ...)
lapply(which(!is.na(fan$z[, '50%'])), function(i) {
with(fan[i, ], {
poly <- rbind(c(x, y),
get.coords(seq(z[, '25%'], z[, '75%'], length.out=200),
0.3, x, y))
lpolygon(poly$x, poly$y, col='gray10', border='gray10', lwd=3)
llines(get.coords(c(z[, '50%'], 180 + z[, '50%']), 0.3, x, y),
col='black', lwd=3, lend=1)
llines(get.coords(z[, '50%'], c(0.3, (1 - z[, 'n']/nmax) * 0.3), x, y),
col='white', lwd=3)
})
})
}
Now we create some dummy data and call levelplot:
d <- data.frame(z=runif(1000),
x=sample(5, 1000, replace=TRUE),
y=sample(5, 1000, replace=TRUE),
grp=sample(4, 1000, replace=TRUE))
colramp <- colorRampPalette(c('#fff495', '#bbffaa', '#70ffeb', '#72aaff',
'#bf80ff'))
levelplot(z ~ x*y|as.factor(grp), d, groups=grp, asp=1, col.regions=colramp,
panel=panel.fanplot, zmin=min(d$z, na.rm=TRUE),
zmax=max(d$z, na.rm=TRUE), at=seq(0, 1, 0.2))
It's important to pass the conditioning variable (that separates plots into panels) to levelplot via the argument group, as shown above with the variable grp, in order for sample sizes to be calculated (shown by white line length).
And here's how we would mimic the Wikipedia plot:
library(nlme)
data(MathAchieve)
MathAchieve$SESfac <- as.numeric(cut(MathAchieve$SES, seq(-2.5, 2, 0.5)))
MathAchieve$MEANSESfac <-
as.numeric(cut(MathAchieve$MEANSES, seq(-1.25, 1, 0.25)))
levels(MathAchieve$Minority) <- c('Non-minority', 'Minority')
MathAchieve$group <-
as.factor(paste0(MathAchieve$Sex, ', ', MathAchieve$Minority))
colramp <- colorRampPalette(c('#fff495', '#bbffaa', '#70ffeb', '#72aaff',
'#bf80ff'))
levelplot(MathAch ~ SESfac*MEANSESfac|group, MathAchieve,
groups=group, asp=1, col.regions=colramp,
panel=panel.fanplot, zmin=0, zmax=28, at=seq(0, 25, 5),
scales=list(alternating=1,
tck=c(1, 0),
x=list(at=seq(1, 11) - 0.5,
labels=seq(-2.5, 2, 0.5)),
y=list(at=seq(1, 11) - 0.5,
labels=seq(-1.25, 1, 0.25))),
between=list(x=1, y=1), strip=strip.custom(bg='gray'),
xlab='Socio-economic status of students',
ylab='Mean socio-economic status for school')
I can think of a couple of ways of going about this with lattice. You could either use xyplot and fill panels with panel.fill, or you can use levelplot. The polygons themselves have to be added with a custom panel and lpolygon. Here's how I've done it with levelplot. I'm really a lattice novice, though, and there may very well be some shortcuts that I don't know about.
Because I'm using levelplot, we first create a matrix containing median MathAch scores for each combination of MEANSES and SES. These will be used to plot the cell colours.
library(lattice)
library(nlme)
data(MathAchieve)
Below, I convert SES and MEANSES into factors using cut, with breakpoints as in the Wikipedia example.
MathAchieve$SESfac <- as.numeric(cut(MathAchieve$SES, seq(-2.5, 2, 0.5)))
MathAchieve$MEANSESfac <- as.numeric(cut(MathAchieve$MEANSES,
seq(-1.25, 1, 0.25)))
I'm not sure how to plot the four panels as on the Wikipedia page, so I'll just subset to non-minority females:
d <- subset(MathAchieve, Sex=='Female' & Minority=='No')
To convert this dataframe to a matrix, I split it to a list and then coerce back to a matrix with the appropriate dimensions. Each cell of the matrix contains the median MathAch for a particular combination of SESfac and MEANSESfac.
l <- split(d$MathAch, list(d$SESfac, d$MEANSESfac))
m.median <- matrix(sapply(l, median), ncol=9)
When we use levelplot we will have access to x and y, being the coordinates of the "current" cell. In order to pass the vector of MathAch to levelplot, so that a polygon can be plotted for each cell, I create a matrix (same dimensions as m.median) of lists, where each cell is a list containing a MathAch vector.
m <- matrix(l, ncol=9)
Below we create a color ramp as used by Wolfram Fischer in the example on Wikipedia.
colramp <- colorRampPalette(c('#fff495', '#bbffaa', '#70ffeb', '#72aaff',
'#bf80ff'))
Now we define the custom panel function. I've commented throughout to explain:
fanplot <- function(x, y, z, subscripts, fans, ymin, ymax,
nmax=max(sapply(fans, length)), ...) {
# nmax is the maximum sample size across all combinations of conditioning
# variables. For generality, ymin and ymax are limits of the circle around
# around which fancharts are plotted.
# fans is our matrix of lists, which are used to plot polygons.
get.coords <- function(a, d, x0, y0) {
a <- ifelse(a <= 90, 90 - a, 450 - a)
data.frame(x = x0 + d * cos(a / 180 * pi),
y = y0 + d * sin(a / 180 * pi))
}
# getcoords returns coordinates of one or more points, given angle(s),
# (i.e., a), distances (i.e., d), and an origin (x0 and y0).
panel.levelplot(x, y, z, subscripts, ...)
# Below, we scale the raw vectors of data such that ymin thru ymax map to
# 0 thru 360. We then calculate the relevant quantiles (i.e. 25%, 50% and 75%).
smry <- lapply(fans, function(y) {
y.scld <- (y - ymin)/(ymax - ymin) * 360
quantile(y.scld, c(0.25, 0.5, 0.75)) - 90
})
# Now we use get.coords to determine relevant coordinates for plotting
# polygons and lines. We plot a white line inwards from the circle's edge,
# with length according to the ratio of the sample size to nmax.
mapply(function(x, y, smry, n) {
if(!any(is.na(smry))) {
lpolygon(rbind(c(x, y),
get.coords(seq(smry['25%'], smry['75%'], length.out=200),
0.3, x, y)), col='gray10', lwd=2)
llines(get.coords(c(smry['50%'], 180 + smry['50%']), 0.3,
x, y), col=1, lwd=3)
llines(get.coords(smry['50%'], c(0.3, (1 - n/nmax) * 0.3),
x, y), col='white', lwd=3)
}
}, x=x, y=y, smry=smry, n=sapply(fans, length))
}
And finally use this custom panel function within levelplot:
levelplot(m.median, fans=m, ymin=0, ymax=28,
col.regions=colramp, at=seq(0, 25, 5), panel=fanplot,
scales=list(tck=c(1, 0),
x=list(at=seq_len(ncol(m.median) + 1) - 0.5,
labels=seq(-2.5, 2, 0.5)),
y=list(at=seq_len(nrow(m.median) + 1) - 0.5,
labels=seq(-1.25, 1, 0.25))),
xlab='Socio-economic status of students',
ylab='Mean socio-economic status for the school')
I haven't coloured cells grey if they have sample size < 7, as was done for the equivalent plot on the Wikipedia page, but this could be done with lrect if needed.