PCA Scaling with ggbiplot - r

I'm trying to plot a principal component analysis using prcomp and ggbiplot. I'm getting data values outside of the unit circle, and haven't been able to rescale the data prior to calling prcomp in such a way that I can constrain the data to the unit circle.
data(wine)
require(ggbiplot)
wine.pca=prcomp(wine[,1:3],scale.=TRUE)
ggbiplot(wine.pca,obs.scale = 1,
var.scale=1,groups=wine.class,ellipse=TRUE,circle=TRUE)
I tried scaling by subtracting mean and dividing by standard deviation before calling prcomp:
wine2=wine[,1:3]
mean=apply(wine2,2,mean)
sd=apply(wine2,2,mean)
for(i in 1:ncol(wine2)){
wine2[,i]=(wine2[,i]-mean[i])/sd[i]
}
wine2.pca=prcomp(wine2,scale.=TRUE)
ggbiplot(wine2.pca,obs.scale=1,
var.scale=1,groups=wine.class,ellipse=TRUE,circle=TRUE)
ggbiplot package installed as follows:
require(devtools)
install_github('ggbiplot','vqv')
Output of either code chunk:
Per #Brian Hanson's comment below, I'm adding an additional image reflecting the output I'm trying to get.

I edited the code for the plot function and was able to get the functionality I wanted.
ggbiplot2=function(pcobj, choices = 1:2, scale = 1, pc.biplot = TRUE,
obs.scale = 1 - scale, var.scale = scale,
groups = NULL, ellipse = FALSE, ellipse.prob = 0.68,
labels = NULL, labels.size = 3, alpha = 1,
var.axes = TRUE,
circle = FALSE, circle.prob = 0.69,
varname.size = 3, varname.adjust = 1.5,
varname.abbrev = FALSE, ...)
{
library(ggplot2)
library(plyr)
library(scales)
library(grid)
stopifnot(length(choices) == 2)
# Recover the SVD
if(inherits(pcobj, 'prcomp')){
nobs.factor <- sqrt(nrow(pcobj$x) - 1)
d <- pcobj$sdev
u <- sweep(pcobj$x, 2, 1 / (d * nobs.factor), FUN = '*')
v <- pcobj$rotation
} else if(inherits(pcobj, 'princomp')) {
nobs.factor <- sqrt(pcobj$n.obs)
d <- pcobj$sdev
u <- sweep(pcobj$scores, 2, 1 / (d * nobs.factor), FUN = '*')
v <- pcobj$loadings
} else if(inherits(pcobj, 'PCA')) {
nobs.factor <- sqrt(nrow(pcobj$call$X))
d <- unlist(sqrt(pcobj$eig)[1])
u <- sweep(pcobj$ind$coord, 2, 1 / (d * nobs.factor), FUN = '*')
v <- sweep(pcobj$var$coord,2,sqrt(pcobj$eig[1:ncol(pcobj$var$coord),1]),FUN="/")
} else {
stop('Expected a object of class prcomp, princomp or PCA')
}
# Scores
df.u <- as.data.frame(sweep(u[,choices], 2, d[choices]^obs.scale, FUN='*'))
# Directions
v <- sweep(v, 2, d^var.scale, FUN='*')
df.v <- as.data.frame(v[, choices])
names(df.u) <- c('xvar', 'yvar')
names(df.v) <- names(df.u)
if(pc.biplot) {
df.u <- df.u * nobs.factor
}
# Scale the radius of the correlation circle so that it corresponds to
# a data ellipse for the standardized PC scores
r <- 1
# Scale directions
v.scale <- rowSums(v^2)
df.v <- df.v / sqrt(max(v.scale))
## Scale Scores
r.scale=sqrt(max(df.u[,1]^2+df.u[,2]^2))
df.u=.99*df.u/r.scale
# Change the labels for the axes
if(obs.scale == 0) {
u.axis.labs <- paste('standardized PC', choices, sep='')
} else {
u.axis.labs <- paste('PC', choices, sep='')
}
# Append the proportion of explained variance to the axis labels
u.axis.labs <- paste(u.axis.labs,
sprintf('(%0.1f%% explained var.)',
100 * pcobj$sdev[choices]^2/sum(pcobj$sdev^2)))
# Score Labels
if(!is.null(labels)) {
df.u$labels <- labels
}
# Grouping variable
if(!is.null(groups)) {
df.u$groups <- groups
}
# Variable Names
if(varname.abbrev) {
df.v$varname <- abbreviate(rownames(v))
} else {
df.v$varname <- rownames(v)
}
# Variables for text label placement
df.v$angle <- with(df.v, (180/pi) * atan(yvar / xvar))
df.v$hjust = with(df.v, (1 - varname.adjust * sign(xvar)) / 2)
# Base plot
g <- ggplot(data = df.u, aes(x = xvar, y = yvar)) +
xlab(u.axis.labs[1]) + ylab(u.axis.labs[2]) + coord_equal()
if(var.axes) {
# Draw circle
if(circle)
{
theta <- c(seq(-pi, pi, length = 50), seq(pi, -pi, length = 50))
circle <- data.frame(xvar = r * cos(theta), yvar = r * sin(theta))
g <- g + geom_path(data = circle, color = muted('white'),
size = 1/2, alpha = 1/3)
}
# Draw directions
g <- g +
geom_segment(data = df.v,
aes(x = 0, y = 0, xend = xvar, yend = yvar),
arrow = arrow(length = unit(1/2, 'picas')),
color = muted('red'))
}
# Draw either labels or points
if(!is.null(df.u$labels)) {
if(!is.null(df.u$groups)) {
g <- g + geom_text(aes(label = labels, color = groups),
size = labels.size)
} else {
g <- g + geom_text(aes(label = labels), size = labels.size)
}
} else {
if(!is.null(df.u$groups)) {
g <- g + geom_point(aes(color = groups), alpha = alpha)
} else {
g <- g + geom_point(alpha = alpha)
}
}
# Overlay a concentration ellipse if there are groups
if(!is.null(df.u$groups) && ellipse) {
theta <- c(seq(-pi, pi, length = 50), seq(pi, -pi, length = 50))
circle <- cbind(cos(theta), sin(theta))
ell <- ddply(df.u, 'groups', function(x) {
if(nrow(x) < 2) {
return(NULL)
} else if(nrow(x) == 2) {
sigma <- var(cbind(x$xvar, x$yvar))
} else {
sigma <- diag(c(var(x$xvar), var(x$yvar)))
}
mu <- c(mean(x$xvar), mean(x$yvar))
ed <- sqrt(qchisq(ellipse.prob, df = 2))
data.frame(sweep(circle %*% chol(sigma) * ed, 2, mu, FUN = '+'),
groups = x$groups[1])
})
names(ell)[1:2] <- c('xvar', 'yvar')
g <- g + geom_path(data = ell, aes(color = groups, group = groups))
}
# Label the variable axes
if(var.axes) {
g <- g +
geom_text(data = df.v,
aes(label = varname, x = xvar, y = yvar,
angle = angle, hjust = hjust),
color = 'darkred', size = varname.size)
}
# Change the name of the legend for groups
# if(!is.null(groups)) {
# g <- g + scale_color_brewer(name = deparse(substitute(groups)),
# palette = 'Dark2')
# }
# TODO: Add a second set of axes
return(g)
}

Related

Plot a discontinuous function in R without connecting a "jump"

I'd like to plot a discontinuous function without connecting a jump. For example, in the following plot, I'd like to delete the line connecting (0.5, 0.5) and (0.5, 1.5).
f <- function(x){
(x < .5) * (x) + (x >= .5) * (x + 1)
}
ggplot()+
geom_function(fun = f)
Edit: I'm looking for a solution that works even if the discountinuous point is not a round number, say pi/10.
You could write a little wrapper function which finds discontinuities in the given function and plots them as separate groups:
plot_fun <- function(fun, from = 0, to = 1, by = 0.001) {
x <- seq(from, to, by)
groups <- cut(x, c(-Inf, x[which(abs(diff(fun(x))) > 0.1)], Inf))
df <- data.frame(x, groups, y = fun(x))
ggplot(df, aes(x, y, group = groups)) +
geom_line()
}
This allows
plot_fun(f)
plot_fun(floor, 0, 10)
This answer is based on Allan Cameron's answer, but depicts the jump using open and closed circles. Whether the function is right or left continuous is controlled by an argument.
library("ggplot2")
plot_fun <- function(fun, from = 0, to = 1, by = 0.001, right_continuous = TRUE) {
x <- seq(from, to, by)
tol_vertical <- 0.1
y <- fun(x)
idx_break <- which(abs(diff(y)) > tol_vertical)
x_break <- x[idx_break]
y_break_l <- y[idx_break]
y_break_r <- y[idx_break + 1]
groups <- cut(x, c(-Inf, x_break, Inf))
df <- data.frame(x, groups, y = fun(x))
plot_ <- ggplot(df, aes(x, y, group = groups)) +
geom_line()
# add open and closed points showing jump
dataf_l <- data.frame(x = x_break, y = y_break_l)
dataf_r <- data.frame(x = x_break, y = y_break_r)
shape_open_circle <- 1
# this is the default of shape, but might as well specify.
shape_closed_circle <- 19
shape_size <- 4
if (right_continuous) {
shape_l <- shape_open_circle
shape_r <- shape_closed_circle
} else {
shape_l <- shape_closed_circle
shape_r <- shape_open_circle
}
plot_ <- plot_ +
geom_point(data = dataf_l, aes(x = x, y = y), group = NA, shape = shape_l, size = shape_size) +
geom_point(data = dataf_r, aes(x = x, y = y), group = NA, shape = shape_r, size = shape_size)
return(plot_)
}
Here's the OP's original example:
f <- function(x){
(x < .5) * (x) + (x >= .5) * (x + 1)
}
plot_fun(f)
Here's Allan's additional example using floor, which shows multiple discontinuities:
plot_fun(floor, from = 0, to = 10)
And here's an example showing that the function does not need to be piecewise linear:
f_curved <- function(x) ifelse(x > 0, yes = 0.5*(2-exp(-x)), no = 0)
plot_fun(f_curved, from = -1, to = 5)
You can insert everything inside an ifelse:
f <- function(x){
ifelse(x==0.5,
NA,
(x < .5) * (x) + (x >= .5) * (x + 1))
}
ggplot()+
geom_function(fun = f)

How to position a common label for multiple plots using gtable in ggplot in R?

I have been attempting to solve this issue for a considerable amount of time with no success. I am creating multiple partial dependence plots (PDPs) and utilising a package called zenplots to lay them out. However, the issue I am having is I cannot figure out a way to have a common legend for the multiple plots. I have tried placing them on a grid and plotting and tried changing the positioning of the grobs... but I cant figure it out. For example:
In the above plot, all PDPs are on the same scale and I would like a single legend. Currently, when I produce the image, it plots a legend for each individual plot. Whereas, what I want is something like the image below (which I made in photoshop):
The code Im providing to produce the plots is somewhat long ( which I hope won't deter people)... but essentially it's only the ggplot part of the code that I need to manipulate. That is, Im creating the actual ggplot on lines 103-105 and more generally between lines 103-125, where I use ggtable to build the plots. For example, changing the color argument on line 115 to: guides(fill = FALSE, color = "colour bar") will create the legend for each plot... setting color = FALSE will remove the legends.
below is the code used to make the plots and it's application on the air quality data:
library(randomForest)
library(ggplot2)
library(dplyr)
pdpLayout <- function(data,
fit,
response,
pal = rev(RColorBrewer::brewer.pal(11, "RdYlBu")),
gridSize = 10,
nmax = 500,
class = 1,
rug = TRUE,
...) {
data <- na.omit(data)
# if (is.numeric(nmax) && nmax < nrow(data)) {
# data <- data[sample(nrow(data), nmax), , drop = FALSE]
# }
gridSize <- min(gridSize, nmax)
predData <- predict(fit, data)
vars <- names(data)
vars <- vars[-match(response, vars)]
datap <- data[,vars]
zpath <- 1:length(vars)
zdata <- datap
zpairs <- t(sapply(1:(length(zpath)-1), function(i){
z <- zpath[i:(i+1)]
if (i %% 2 == 0) rev(z) else z
}))
zpairs <- cbind(vars[zpairs[, 1]], vars[zpairs[, 2]])
# loop through vars and create a list of pdps for each pair
pdplist <- vector("list", nrow(zpairs))
for (i in 1:nrow(zpairs)) {
ind <- zpairs[i, ]
if (!is.na(ind[1])) {
px <- pdp_data(data, ind, gridsize = gridSize)
px$.pid <- i
pdplist[[i]] <- px
} else {
pdplist[[i]] <- NULL
}
}
pdplist <- bind_rows(pdplist)
pdplist$fit <- predict(fit, pdplist)
pdplist <- split(pdplist, pdplist$.pid)
pdplist0 <- vector("list", nrow(zpairs))
j <- 1
for (i in 1:nrow(zpairs)) {
ind <- zpairs[i, ]
if (!is.na(ind[1])) {
pdplist0[[i]] <- pdplist[[j]] %>%
group_by(.data[[ind[1]]], .data[[ind[2]]]) %>%
summarise(fit = mean(fit))
j <- j + 1
} else {
pdplist0[[i]] <- NULL
}
}
pdplist <- pdplist0
pdplist0 <- NULL
names(pdplist) <- paste(zpairs[, 2], zpairs[, 1], sep = "pp")
message("Finished ice/pdp")
# Set limits for pairs
pdplist0 <- pdplist[!sapply(pdplist, is.null)]
r <- range(sapply(pdplist0, function(x) range(x$fit)))
limits <- range(labeling::rpretty(r[1], r[2]))
# Zenplot graphing function
data$pred <- predData
z2index <- 0
pdpnn <- function(zargs) {
z2index <<- z2index + 1
vars <- zpairs[z2index, ]
pdp <- pdplist[[z2index]]
if (!is.null(pdp)) {
if (is.factor(pdp[[vars[1]]]) + is.factor(pdp[[vars[2]]]) == 1) {
if (is.factor(pdp[[vars[1]]])) vars <- rev(vars)
p <- ggplot(data = pdp, aes(x = .data[[vars[1]]], y = fit, color = .data[[vars[2]]])) +
geom_line() +
geom_rug(data = data, sides = "b", aes(y = .data[["pred"]]))
} else {
if (is.factor(pdp[[vars[1]]])) posx <- "jitter" else posx <- "identity"
if (is.factor(pdp[[vars[2]]])) posy <- "jitter" else posy <- "identity"
p <- ggplot(data = pdp, aes(x = .data[[vars[1]]], y = .data[[vars[2]]])) +
geom_tile(aes(fill = fit)) +
scale_fill_gradientn(name = "y-hat", colors = pal, limits = limits, oob = scales::squish)
if (rug) {
p <- p +
geom_rug(data = data, sides = "b", position = posx, aes(color = .data[["pred"]])) +
geom_rug(data = data, sides = "l", position = posy, aes(color = .data[["pred"]])) +
scale_color_gradientn(name = "y-hat", colors = pal, limits = limits, oob = scales::squish)
}
}
p <- p +
guides(fill = FALSE, color = FALSE) +
theme_bw() +
theme(
axis.line = element_blank(),
axis.ticks = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.border = element_rect(colour = "gray", fill = NA, size = 1.5)
)
} else {
p <- ggplot() +
theme(panel.background = element_blank())
}
ggplot_gtable(ggplot_build(p))
}
suppressMessages({
zenplots::zenplot(zdata,
pkg = "grid", labs = list(group = NULL),
plot2d = pdpnn, ...
)
})
}
pdp_data <- function(d, var, gridsize = 30) {
if (length(var) == 1) {
pdpvar <- d[[var]]
if (is.factor(pdpvar)) {
gridvals <- levels(pdpvar)
} else {
gridvals <- seq(min(pdpvar, na.rm = T), max(pdpvar, na.rm = T), length.out = gridsize)
}
dnew <- do.call(rbind, lapply(gridvals, function(i) {
d1 <- d
d1[[var]] <- i
d1
}))
if (is.factor(pdpvar)) dnew[[var]] <- factor(dnew[[var]], levels = levels(pdpvar), ordered = is.ordered(pdpvar))
}
else {
pdpvar1 <- d[[var[1]]]
pdpvar2 <- d[[var[2]]]
if (is.factor(pdpvar1)) {
gridvals1 <- levels(pdpvar1)
} else {
gridvals1 <- seq(min(pdpvar1, na.rm = T), max(pdpvar1, na.rm = T), length.out = gridsize)
}
if (is.factor(pdpvar2)) {
gridvals2 <- levels(pdpvar2)
} else {
gridvals2 <- seq(min(pdpvar2, na.rm = T), max(pdpvar2, na.rm = T), length.out = gridsize)
}
gridvals <- expand.grid(gridvals1, gridvals2)
dnew <- do.call(rbind, lapply(1:nrow(gridvals), function(i) {
d1 <- d
d1[[var[1]]] <- gridvals[i, 1]
d1[[var[2]]] <- gridvals[i, 2]
d1
}))
if (is.factor(pdpvar1)) dnew[[var[1]]] <- factor(dnew[[var[1]]], levels = levels(pdpvar1), ordered = is.ordered(pdpvar1))
if (is.factor(pdpvar2)) dnew[[var[2]]] <- factor(dnew[[var[2]]], levels = levels(pdpvar2), ordered = is.ordered(pdpvar2))
}
dnew$.id <- 1:nrow(d)
rownames(dnew) <- NULL
dnew
}
Now use some data to create the plots:
aq <- na.omit(airquality)
rf <- randomForest(Ozone~., data = aq)
pdpLayout(aq, rf, "Ozone")
Any help or suggestions is greatly appreciated.

Trying to recreate plot with tresholds in ggplot

So basically I'm trying to recreate this plot in ggplot, to match my theme:
and I've come pretty close:
but I can't recreate the treshold in my plot. How can I possibly add this to my ggplot? Here is the source code of the original plotting function:
function (data, option = c("alpha", "xi", "quantile"), start = 15,end = NA,
reverse = FALSE, p = NA, ci = 0.95, auto.scale = TRUE, labels = TRUE, ...)
{
if (is.timeSeries(data))
data <- as.vector(series(data))
data <- as.numeric(data)
ordered <- rev(sort(data))
ordered <- ordered[ordered > 0]
n <- length(ordered)
option <- match.arg(option)
if ((option == "quantile") && (is.na(p)))
stop("\nInput a value for the probability p.\n")
if ((option == "quantile") && (p < 1 - start/n)) {
cat("Graph may look strange !! \n\n")
cat(paste("Suggestion 1: Increase `p' above", format(signif(1 -
start/n, 5)), "\n"))
cat(paste("Suggestion 2: Increase `start' above ", ceiling(length(data) *
(1 - p)), "\n"))
}
k <- 1:n
loggs <- logb(ordered)
avesumlog <- cumsum(loggs)/(1:n)
xihat <- c(NA, (avesumlog - loggs)[2:n])
alphahat <- 1/xihat
y <- switch(option, alpha = alphahat, xi = xihat, quantile = ordered *
((n * (1 - p))/k)^(-1/alphahat))
ses <- y/sqrt(k)
if (is.na(end))
end <- n
x <- trunc(seq(from = min(end, length(data)), to = start))
y <- y[x]
ylabel <- option
yrange <- range(y)
if (ci && (option != "quantile")) {
qq <- qnorm(1 - (1 - ci)/2)
u <- y + ses[x] * qq
l <- y - ses[x] * qq
ylabel <- paste(ylabel, " (CI, p =", ci, ")", sep = "")
yrange <- range(u, l)
}
if (option == "quantile")
ylabel <- paste("Quantile, p =", p)
index <- x
if (reverse)
index <- -x
if (auto.scale) {
plot(index, y, ylim = yrange, type = "l", xlab = "",
ylab = "", axes = FALSE, ...)
}
else {
plot(index, y, type = "l", xlab = "", ylab = "", axes = FALSE,
...)
}
axis(1, at = index, labels = paste(x), tick = FALSE)
axis(2)
threshold <- findthreshold(data, x)
axis(3, at = index, labels = paste(format(signif(threshold,
3))), tick = FALSE)
box()
if (ci && (option != "quantile")) {
lines(index, u, lty = 2, col = 2)
lines(index, l, lty = 2, col = 2)
}
if (labels) {
title(xlab = "Order Statistics", ylab = ylabel)
mtext("Threshold", side = 3, line = 3)
}
return(invisible(list(x = index, y = y)))
}
Thanks for your help!
I think you are looking for the sec.axis argument for scale_x_continuous(). To make sure everything lined up, I had to create a function to find every nth value. Hope this helps
set.seed(1234)
df <- tibble(
x = 1:50,
threshold = round(1+rnorm(1:50), 2),
base_y = c(0.1, runif(49, -1, 2)/5),
mid = cumsum(base_y),
upper = mid + 5,
lower = mid - 5
)
every_nth <- function(x, n) {
x[seq(0, length(x), n)]
}
ggplot(df, aes(x = x)) +
geom_ribbon(aes(ymin = lower, ymax = upper), alpha = 0.2) +
geom_line(aes(y = mid)) +
scale_x_continuous(
breaks = every_nth(df$x, 10),
sec.axis = dup_axis(
labels = every_nth(df$threshold, 10),
name = "Threshold"
)
) +
theme_minimal()

How to plot a CDF functon from PDF in R

I have the following function:
fx <- function(x) {
if(x >= 0 && x < 3) {
res <- 0.2;
} else if(x >=3 && x < 5) {
res <- 0.05;
} else if(x >= 5 && x < 6) {
res <- 0.15;
} else if(x >= 7 && x < 10) {
res <- 0.05;
} else {
res <- 0;
}
return(res);
}
How can I plot it's CDF function on the interval [0,10]?
Try
fx <- Vectorize(fx)
grid <- 0:10
p <- fx(grid)
cdf <- cumsum(p)
plot(grid, cdf, type = 'p', ylim = c(0, 1), col = 'steelblue',
xlab = 'x', ylab = expression(F(x)), pch = 19, las = 1)
segments(x0 = grid, x1 = grid + 1, y0 = cdf)
segments(x0 = grid + 1, y0 = c(cdf[-1], 1), y1 = cdf, lty = 2)
To add a bit accuracy to #Martin Schmelzer's answer. A cummulative distribution function(CDF)
evaluated at x, is the probability that X will take a value less than
or equal to x
So to get CDF from Probability Density Function(PDF), you need to integrate on PDF:
fx <- Vectorize(fx)
dx <- 0.01
x <- seq(0, 10, by = dx)
plot(x, cumsum(fx(x) * dx), type = "l", ylab = "cummulative probability", main = "My CDF")
Just adding up on the previous answers and using ggplot
# cdf
Fx <- function(x, dx) {
cumsum(fx(x)*dx)
}
fx <- Vectorize(fx)
dx <- 0.01
x <- seq(0, 10, dx)
df <- rbind(data.frame(x, value=fx(x), func='pdf'),
data.frame(x, value=Fx(x, dx), func='cdf'))
library(ggplot2)
ggplot(df, aes(x, value, col=func)) +
geom_point() + geom_line() + ylim(0, 1)

Fix interpolated polar contour plot function to works with current R and (possibly) use ggplot

The question R interpolated polar contour plot shows an excellent way to produce interpolated polar plots in R. I include the very slightly modified version I'm using:
PolarImageInterpolate <- function(
### Plotting data (in cartesian) - will be converted to polar space.
x, y, z,
### Plot component flags
contours=TRUE, # Add contours to the plotted surface
legend=TRUE, # Plot a surface data legend?
axes=TRUE, # Plot axes?
points=TRUE, # Plot individual data points
extrapolate=FALSE, # Should we extrapolate outside data points?
### Data splitting params for color scale and contours
col_breaks_source = 1, # Where to calculate the color brakes from (1=data,2=surface)
# If you know the levels, input directly (i.e. c(0,1))
col_levels = 10, # Number of color levels to use - must match length(col) if
#col specified separately
col = rev(heat.colors(col_levels)), # Colors to plot
# col = rev(heat.colors(col_levels)), # Colors to plot
contour_breaks_source = 1, # 1=z data, 2=calculated surface data
# If you know the levels, input directly (i.e. c(0,1))
contour_levels = col_levels+1, # One more contour break than col_levels (must be
# specified correctly if done manually
### Plotting params
outer.radius = ceiling(max(sqrt(x^2+y^2))),
circle.rads = pretty(c(0,outer.radius)), #Radius lines
spatial_res=1000, #Resolution of fitted surface
single_point_overlay=0, #Overlay "key" data point with square
#(0 = No, Other = number of pt)
### Fitting parameters
interp.type = 1, #1 = linear, 2 = Thin plate spline
lambda=0){ #Used only when interp.type = 2
minitics <- seq(-outer.radius, outer.radius, length.out = spatial_res)
# interpolate the data
if (interp.type ==1 ){
Interp <- akima:::interp(x = x, y = y, z = z,
extrap = extrapolate,
xo = minitics,
yo = minitics,
linear = FALSE)
Mat <- Interp[[3]]
}
else if (interp.type == 2){
library(fields)
grid.list = list(x=minitics,y=minitics)
t = Tps(cbind(x,y),z,lambda=lambda)
tmp = predict.surface(t,grid.list,extrap=extrapolate)
Mat = tmp$z
}
else {stop("interp.type value not valid")}
# mark cells outside circle as NA
markNA <- matrix(minitics, ncol = spatial_res, nrow = spatial_res)
Mat[!sqrt(markNA ^ 2 + t(markNA) ^ 2) < outer.radius] <- NA
### Set contour_breaks based on requested source
if ((length(contour_breaks_source == 1)) & (contour_breaks_source[1] == 1)){
contour_breaks = seq(min(z,na.rm=TRUE),max(z,na.rm=TRUE),
by=(max(z,na.rm=TRUE)-min(z,na.rm=TRUE))/(contour_levels-1))
}
else if ((length(contour_breaks_source == 1)) & (contour_breaks_source[1] == 2)){
contour_breaks = seq(min(Mat,na.rm=TRUE),max(Mat,na.rm=TRUE),
by=(max(Mat,na.rm=TRUE)-min(Mat,na.rm=TRUE))/(contour_levels-1))
}
else if ((length(contour_breaks_source) == 2) & (is.numeric(contour_breaks_source))){
contour_breaks = pretty(contour_breaks_source,n=contour_levels)
contour_breaks = seq(contour_breaks_source[1],contour_breaks_source[2],
by=(contour_breaks_source[2]-contour_breaks_source[1])/(contour_levels-1))
}
else {stop("Invalid selection for \"contour_breaks_source\"")}
### Set color breaks based on requested source
if ((length(col_breaks_source) == 1) & (col_breaks_source[1] == 1))
{zlim=c(min(z,na.rm=TRUE),max(z,na.rm=TRUE))}
else if ((length(col_breaks_source) == 1) & (col_breaks_source[1] == 2))
{zlim=c(min(Mat,na.rm=TRUE),max(Mat,na.rm=TRUE))}
else if ((length(col_breaks_source) == 2) & (is.numeric(col_breaks_source)))
{zlim=col_breaks_source}
else {stop("Invalid selection for \"col_breaks_source\"")}
# begin plot
Mat_plot = Mat
Mat_plot[which(Mat_plot<zlim[1])]=zlim[1]
Mat_plot[which(Mat_plot>zlim[2])]=zlim[2]
image(x = minitics, y = minitics, Mat_plot , useRaster = TRUE, asp = 1, axes = FALSE, xlab = "", ylab = "", zlim = zlim, col = col)
# add contours if desired
if (contours){
CL <- contourLines(x = minitics, y = minitics, Mat, levels = contour_breaks)
A <- lapply(CL, function(xy){
lines(xy$x, xy$y, col = gray(.2), lwd = .5)
})
}
# add interpolated point if desired
if (points){
points(x, y, pch = 21, bg ="blue")
}
# add overlay point (used for trained image marking) if desired
if (single_point_overlay!=0){
points(x[single_point_overlay],y[single_point_overlay],pch=0)
}
# add radial axes if desired
if (axes){
# internals for axis markup
RMat <- function(radians){
matrix(c(cos(radians), sin(radians), -sin(radians), cos(radians)), ncol = 2)
}
circle <- function(x, y, rad = 1, nvert = 500){
rads <- seq(0,2*pi,length.out = nvert)
xcoords <- cos(rads) * rad + x
ycoords <- sin(rads) * rad + y
cbind(xcoords, ycoords)
}
# draw circles
if (missing(circle.rads)){
circle.rads <- pretty(c(0,outer.radius))
}
for (i in circle.rads){
lines(circle(0, 0, i), col = "#66666650")
}
# put on radial spoke axes:
axis.rads <- c(0, pi / 6, pi / 3, pi / 2, 2 * pi / 3, 5 * pi / 6)
r.labs <- c(90, 60, 30, 0, 330, 300)
l.labs <- c(270, 240, 210, 180, 150, 120)
for (i in 1:length(axis.rads)){
endpoints <- zapsmall(c(RMat(axis.rads[i]) %*% matrix(c(1, 0, -1, 0) * outer.radius,ncol = 2)))
segments(endpoints[1], endpoints[2], endpoints[3], endpoints[4], col = "#66666650")
endpoints <- c(RMat(axis.rads[i]) %*% matrix(c(1.1, 0, -1.1, 0) * outer.radius, ncol = 2))
lab1 <- bquote(.(r.labs[i]) * degree)
lab2 <- bquote(.(l.labs[i]) * degree)
text(endpoints[1], endpoints[2], lab1, xpd = TRUE)
text(endpoints[3], endpoints[4], lab2, xpd = TRUE)
}
axis(2, pos = -1.25 * outer.radius, at = sort(union(circle.rads,-circle.rads)), labels = NA)
text( -1.26 * outer.radius, sort(union(circle.rads, -circle.rads)),sort(union(circle.rads, -circle.rads)), xpd = TRUE, pos = 2)
}
# add legend if desired
# this could be sloppy if there are lots of breaks, and that's why it's optional.
# another option would be to use fields:::image.plot(), using only the legend.
# There's an example for how to do so in its documentation
if (legend){
library(fields)
image.plot(legend.only=TRUE, smallplot=c(.78,.82,.1,.8), col=col, zlim=zlim)
# ylevs <- seq(-outer.radius, outer.radius, length = contour_levels+ 1)
# #ylevs <- seq(-outer.radius, outer.radius, length = length(contour_breaks))
# rect(1.2 * outer.radius, ylevs[1:(length(ylevs) - 1)], 1.3 * outer.radius, ylevs[2:length(ylevs)], col = col, border = NA, xpd = TRUE)
# rect(1.2 * outer.radius, min(ylevs), 1.3 * outer.radius, max(ylevs), border = "#66666650", xpd = TRUE)
# text(1.3 * outer.radius, ylevs[seq(1,length(ylevs),length.out=length(contour_breaks))],round(contour_breaks, 1), pos = 4, xpd = TRUE)
}
}
Unfortunately, this function has a few bugs:
a) Even with a purely radial pattern, the produced plot has a distortion whose origin I don't understand:
#example
r <- rep(seq(0.1, 0.9, len = 8), each = 8)
theta <- rep(seq(0, 7/4*pi, by = pi/4), times = 8)
x <- r*sin(theta)
y <- r*cos(theta)
z <- z <- rep(seq(0, 1, len = 8), each = 8)
PolarImageInterpolate(x, y, z)
why the wiggles between 300° and 360°? The z function is constant in theta, so there's no reason why there should be wiggles.
b) After 4 years, some of the packages loaded have been modified and at least one functionality of the function is broken. Setting interp.type = 2 should use thin plate splines for interpolation instead than a basic linear interpolation, but it doesn't work:
> PolarImageInterpolate(x, y, z, interp.type = 2)
Warning:
Grid searches over lambda (nugget and sill variances) with minima at the endpoints:
(GCV) Generalized Cross-Validation
minimum at right endpoint lambda = 9.493563e-06 (eff. df= 60.80002 )
predict.surface is now the function predictSurface
Error in image.default(x = minitics, y = minitics, Mat_plot, useRaster = TRUE, :
'z' must be a matrix
the first message is a warning and doesn't worry me, but the second one is actually an error and prevents me from using thin plate splines. Can you help me solve these two problems?
Also, I'd like to "upgrade" to using ggplot2, so if you can give an answer which does that, it would be great. Otherwise, after the bugs are fixed, I'll try asking a specific question which only asks to modify the function so that it uses ggplot2.
For the ggplot2 solution, here is a start. geom_raster allows interpolation, but does not work for polar coordinates. Instead, you can use geom_tile, though then you may need to do the interpolation yourself before passing the values to ggplot.
Of important note: the example data you gave gives an error when working with geom_raster that I believe is caused by the spacing of the values. Here is an example set that works (note, used this blog as a guide, though it is now outdated):
dat_grid <-
expand.grid(x = seq(0,350,10), y = 0:10)
dat_grid$density <- runif(nrow(dat_grid))
ggplot(dat_grid
, aes(x = x, y = y, fill = density)) +
geom_tile() +
coord_polar() +
scale_x_continuous(breaks = seq(0,360,90)) +
scale_fill_gradient2(low = "white"
, mid = "yellow"
, high = "red3"
, midpoint = 0.5)
If you are working from raw data, you might be able to get ggplot to do the work for you. Here is an example working from raw data. There are a lot of manual tinkering things to do, but it is at least an optional starting point:
polarData <-
data.frame(
theta = runif(10000, 0, 2*pi)
, r = log(abs(rnorm(10000, 0, 10)))
)
toCart <-
data.frame(
x = polarData$r * cos(polarData$theta)
, y = polarData$r * sin(polarData$theta)
)
axisLines <-
data.frame(
x = 0
, y = 0
, xend = max(polarData$r)*cos(seq(0, 2*pi, pi/4))
, yend = max(polarData$r)*sin(seq(0, 2*pi, pi/4))
, angle = paste(seq(0, 2, 1/4), "pi") )
ticks <-
data.frame(
label = pretty(c(0, max(polarData$r)) )[-1]
)
ggplot(toCart) +
# geom_point(aes(x = x, y = y)) +
stat_density_2d(aes(x = x, y = y
, fill = ..level..)
, geom = "polygon") +
scale_fill_gradient(low = "white"
, high = "red3") +
theme(axis.text = element_blank()
, axis.title = element_blank()
, axis.line = element_blank()
, axis.ticks = element_blank()) +
geom_segment(data = axisLines
, aes(x = x, y = y
, xend = xend
, yend = yend)) +
geom_label(data = axisLines
, aes(x = xend, y = yend, label = angle)) +
geom_label(data = ticks
, aes(x = 0, y = label, label = label))
From an another post, I came to know that the fucnction predict.surface from package fields is deprecated whic is used for interp.type = 2 in PolarImageInterpolate. Instead, a new predictSurface function is introduced, which can be used here.
Example:
r <- rep(seq(0.1, 0.9, len = 8), each = 8)
theta <- rep(seq(0, 7/4*pi, by = pi/4), times = 8)
x <- r*sin(theta)
y <- r*cos(theta)
z <- z <- rep(seq(0, 1, len = 8), each = 8)
PolarImageInterpolate(x, y, z, interp.type = 2)

Resources