SPI drought index [closed] - r

Closed. This question needs details or clarity. It is not currently accepting answers.
Want to improve this question? Add details and clarify the problem by editing this post.
Closed 9 years ago.
Improve this question
I have the following data:
pcp # precipitation monthly data from 2001-01,2020-12
I have used the following to compute the SPI drought index
library(SPEI)
spi1 <- spi(pcp,1,kernel = list(type = "rectangular", shift = 0),distribution = "Gamma", fit = "ub-pwm", na.rm = FALSE,ref.start=NULL, ref.end=NULL, x=FALSE, params=NULL)
First question:
when I plot(spi1) I get SPEI on the Y axis which I don't want , what I want is SPI,
second:
how to plot each month separately for example when you call spi1 it will give you the index value for each month, and I want to plot it for each month

For the first answer, you can rewrite the function plot.spei
plot.spei <-
function (x, ...)
{
## label <- ifelse(as.character(x$call)[1] == "spei", "SPEI",
## "SPI")
ser <- ts(as.matrix(x$fitted[-c(1:x$scale), ]), end = end(x$fitted),
frequency = frequency(x$fitted))
ser[is.nan(ser - ser)] <- 0
se <- ifelse(ser == 0, ser, NA)
tit <- dimnames(x$coefficients)[2][[1]]
if (start(ser)[2] == 1) {
ns <- c(start(ser)[1] - 1, 12)
}
else {
ns <- c(start(ser)[1], start(ser)[2] - 1)
}
if (end(ser)[2] == 12) {
ne <- c(end(ser)[1] + 1, 1)
}
else {
ne <- c(end(ser)[1], end(ser)[2] + 1)
}
n <- ncol(ser)
if (is.null(n))
n <- 1
par(mar = c(4, 4, 2, 1) + 0.1)
if (n > 1 & n < 5)
par(mfrow = c(n, 1))
if (n > 1 & n >= 5)
par(mfrow = c({
n + 1
}%/%2, 2))
for (i in 1:n) {
datt <- ts(c(0, ser[, i], 0), frequency = frequency(ser),
start = ns, end = ne)
datt.pos <- ifelse(datt > 0, datt, 0)
datt.neg <- ifelse(datt <= 0, datt, 0)
plot(datt, type = "n", xlab = "", main = tit[i], ...)
if (!is.null(x$ref.period)) {
k <- ts(5, start = x$ref.period[1, ], end = x$ref.period[2,
], frequency = 12)
k[1] <- k[length(k)] <- -5
polygon(k, col = "light grey", border = NA, density = 20)
abline(v = x$ref.period[1, 1] + (x$ref.period[1,
2] - 1)/12, col = "grey")
abline(v = x$ref.period[2, 1] + (x$ref.period[2,
2] - 1)/12, col = "grey")
}
grid(col = "black")
polygon(datt.pos, col = "blue", border = NA)
polygon(datt.neg, col = "red", border = NA)
lines(datt, col = "dark grey")
abline(h = 0)
points(se, pch = 21, col = "white", bg = "black")
}
}
And then use the ylab parameters
plot(spi1, ylab = "SPI")
If you want to plot it separately, you can extract the fitted value of class ts and apply basic plotting for time series object in R.
par(mfrow = c(3, 4))
listofmonths <- split(fitted(spi1), cycle(fitted(spi1)))
names(listofmonths) <- month.abb
require(plyr)
l_ply(seq_along(listofmonths), function(x) {
plot(x = seq_along(listofmonths[[x]]), y = listofmonths[[x]],
type = "l", xlab = "", ylab = "SPI")
title(names(listofmonths)[x])
})
You can also try these types of plot
monthplot(fitted(spi1), labels = month.abb, cex.axis = 0.8)
boxplot(fitted(spi1) ~ cycle(fitted(spi1)), names = month.abb, cex.axis = 0.8)

Related

Dot Plot include vertical line and dots of different colors

I needed to include in the code below, a vertical line,
for example, in position x = 5 and that all points smaller than 5 have another color,
for example blue.
The values of a variable can be read from the x-axis, and the y-axis shows the order of the observations in the variable (from bottom to top). Isolated points as the far ends, and on either side in a plot, suggest potentional outliers
Thanks
library(dplyr)
library(lattice)
n = 1000
df <- tibble(
xx1 = runif(n, min = 3, max = 10),
xx2 = runif(n, min = 3, max = 10),
xx3 = runif(n, min = 3, max = 10)
)
MyVar <- c("xx1","xx2","xx3")
MydotplotBR <- function(DataSelected){
P <- dotplot(as.matrix(as.matrix(DataSelected)),
groups=FALSE,
strip = strip.custom(bg = 'white',
par.strip.text = list(cex = 1.2)),
scales = list(x = list(relation = "same",tck = 1,
draw = TRUE, at=seq(0,10,1)),x=list(at=seq),
y = list(relation = "free", draw = FALSE),
auto.key = list(x =1)),
col=10,
axes = FALSE,
cex = 0.4, pch = 5,
xlim=c(0,10),
xlab = list(label = "Variable Value", cex = 1.5),
ylab = list(label = "Order of data in the file", cex = 1.5))
print(P)
}
(tempoi <- Sys.time())
Vertemp <- MydotplotBR(df[,MyVar])
(tempof <- Sys.time()-tempoi)
I find it weird that you want a color dependent only on the x-axis when values are also used on the y-axis of other plots.
Nevertheless, here's a homemade pairs_cutoff() function doing what you want.
pairs_cutoff <- function(data, cutoff, cols = c("red", "blue"),
only.lower = F, ...){
data <- as.data.frame(data)
cns <- colnames(data)
nc <- ncol(data)
layout(matrix(seq_len(nc^2), ncol = nc))
invisible(
sapply(seq_len(nc), function(i){
sapply(seq_len(nc), function(j){
if(i == j){
plot.new()
legend("center", bty = "n", title = cns[i], cex = 1.5, text.font = 2, legend = "")
} else {
if(j < i & only.lower)
plot.new()
else{
if(is.null(cutoff))
cols <- cols[1]
plot(data[,i], data[,j], col = cols[(data[,i] < cutoff) + 1],
xlab = cns[i], ylab = cns[j], ...)
}
}
})
})
)
}
Using your suggested data :
n = 1000
dat <- tibble(
xx1 = runif(n, min = 3, max = 10),
xx2 = runif(n, min = 3, max = 10),
xx3 = runif(n, min = 3, max = 10)
)
pairs_cutoff(dat, cutoff = 5, only.lower = T)
outputs the following plot :
You can specify extra parameters to the plot function (eg. pch) directly to pairs_cutoff.
Also, if you want the full symmetric grid of plots, set only.lower = F.

How to plot two different user defined functions in the same plot in R

I need to plot 2 different user defined function in the same R plot.
I vectorize each of them:
Vectorize creates a function wrapper that vectorizes the action of its argument FUN. Vectorize(FUN, vectorize.args = arg.names, SIMPLIFY = TRUE,USE.NAMES = TRUE)
If I plot them separately I get the correct plot, however if I try to plot both functions in the same graph, it does not work.
Here is what I did:
1) first function:
payoff_call <- function(S, K){
if(S < 0 | K < 0){
return(print("The input S and K must be > 0"))
}else{
return(max(S-K,0))
}
}
2) second function:
myBlackScholes <- function(S, K, tau, r, sigma, type = c("call", "put")) {
if(S < 0 | K < 0 | tau < 0 | sigma < 0) {
return(print("The input S , K , tau and sigma must be > 0"))
} else
{
d1 <- (log(S/K) + (r + 0.5*sigma^2)*tau)/(sigma*sqrt(tau))
d2 <- d1 - sigma*sqrt(tau)
if(type == "call"){
output <- cbind(
V_BS_Call = S*pnorm(d1) - K*exp(-r*(tau))*pnorm(d2), #fair value call
delta_call = pnorm(d1), #delta call
vega_call = S*sqrt(tau)*dnorm(d1), #vega call
theta_call = -S*dnorm(d1)*sigma/(2*sqrt(tau)) - r*K*exp(-r*tau)*pnorm(d2), #theta call
rho_call = K*tau*exp(-r*tau)*pnorm(d2), #rho call
kappa_call = -exp(-r*tau)*(pnorm(-d2)-1), #kappa call
gamma_call = dnorm(d1)/(S*sigma*sqrt(tau)))#gamma call
return(output)
} else if(type == "put"){
output <- cbind(
V_BS_Put = K*exp(-r*(tau))*pnorm(-d2) - S*pnorm(-d1), #fair value put
delta_put = pnorm(d1)-1, #delta put
vega_put = S*sqrt(tau)*dnorm(d1), #vega put same as vega call
theta_put = -S*dnorm(d1)*sigma/(2*sqrt(tau)) + r*K*exp(-r*tau)*pnorm(-d2), #theta put
rho_put = -K*tau*exp(-r*tau)*pnorm(-d2), #rho put
kappa_put = exp(-r*tau)*pnorm(-d2), #kappa put
gamma_put = dnorm(d1)/(S*sigma*sqrt(tau))) #gamma put
return(output)
} else{
return(print("Wrong type in input"))
}
}
}
3) I vectorize each function:
vect_payoff_call <- Vectorize(payoff_call)
vect_myBlackScholes <- Vectorize(myBlackScholes)
4) I plot the 2 functions, for S starting at 0 to 100:
plot(x = 0:100, y = vect_payoff_call(0:100, 50),
type="l", col="blue", lty = 1, lwd = 1,
main = "Long Call Option Payoff function", xlab = "S", ylab = expression(f(S)))
plot(x = 0:100, y = vect_myBlackScholes(0:100,50, 1, 0.12, 0.3, "call")[1,], type="l", col="green", lty = 1, lwd = 1, add=TRUE)
The first plot is correct, but the second is not.
Any suggestion?
Here is how. Note that I use ggplot2 in my example:
library(ggplot2)
x <- seq(0,2, by=0.1)
my_square <- function(x) x^2
my_cube <- function(x) x^3
my_data <- data.frame(argx = x, my_square = my_square(x),
my_cube = my_cube(x))
ggplot(my_data) +
geom_point(aes(argx, my_square, color = 'x^2')) +
geom_line(aes(argx, my_square, color = 'x^2')) +
geom_point(aes(argx, my_cube, color = 'x^3')) +
geom_line(aes(argx, my_cube, color = 'x^3')) +
theme_bw() +
labs(x = 'x', y = 'y') +
scale_color_manual(values = c('x^2' = 'red', 'x^3' = 'green'), name = 'function')
Output

Changing the colour of a calibration plot

I've been generating calibration plots for my cph models of survival data. However, the default setting puts the "ideal" line in grey, which makes it difficult to discriminate. I've tried to specify the colour parameters in plot(), but this obviously only changes the line for "observed". What can I pass in plot() to change the line of the "ideal" line in a calibration plot generated in rms?
Here is one option:
Let's say you have code to create a cph model of survival data and use calibrate from the rms package:
library(rms)
set.seed(1)
n <- 200
d.time <- rexp(n)
x1 <- runif(n)
x2 <- factor(sample(c('a', 'b', 'c'), n, TRUE))
f <- cph(Surv(d.time) ~ pol(x1,2) * x2, x=TRUE, y=TRUE, surv=TRUE,time.inc=1.5)
cal <- calibrate(f, u=1.5, cmethod='KM', m=50, B=20)
This will generate a calibrate object:
R> class(cal)
[1] "calibrate"
If you are using plot on this object, you can discover the function being called in rms:
R> getAnywhere("plot.calibrate.default")
A single object matching ‘plot.calibrate.default’ was found
It was found in the following places
registered S3 method for plot from namespace rms
namespace:rms
with value
function (x, xlab, ylab, xlim, ylim, legend = TRUE, subtitles = TRUE,
cex.subtitles = 0.75, riskdist = TRUE, scat1d.opts = list(nhistSpike = 200),
...)
You can create your own function based on this function, and alter the color of the ideal line. In this case, we make the ideal line green (and revise the text labels to match):
myplot <- function (x, xlab, ylab, subtitles = TRUE, conf.int = TRUE, cex.subtitles = 0.75,
riskdist = TRUE, add = FALSE, scat1d.opts = list(nhistSpike = 200),
par.corrected = NULL, ...)
{
at <- attributes(x)
u <- at$u
units <- at$units
if (length(par.corrected) && !is.list(par.corrected))
stop("par.corrected must be a list")
z <- list(col = "blue", lty = 1, lwd = 1, pch = 4)
if (!length(par.corrected))
par.corrected <- z
else for (n in setdiff(names(z), names(par.corrected))) par.corrected[[n]] <- z[[n]]
predicted <- at$predicted
if ("KM" %in% colnames(x)) {
type <- "stratified"
pred <- x[, "mean.predicted"]
cal <- x[, "KM"]
cal.corrected <- x[, "KM.corrected"]
se <- x[, "std.err"]
}
else {
type <- "smooth"
pred <- x[, "pred"]
cal <- x[, "calibrated"]
cal.corrected <- x[, "calibrated.corrected"]
se <- NULL
}
un <- if (u == 1)
paste(units, "s", sep = "")
else units
if (missing(xlab))
xlab <- paste("Predicted ", format(u), units, "Survival")
if (missing(ylab))
ylab <- paste("Fraction Surviving ", format(u), " ",
un, sep = "")
if (length(se) && conf.int) {
ciupper <- function(surv, d) ifelse(surv == 0, 0, pmin(1,
surv * exp(d)))
cilower <- function(surv, d) ifelse(surv == 0, 0, surv *
exp(-d))
errbar(pred, cal, cilower(cal, 1.959964 * se), ciupper(cal,
1.959964 * se), xlab = xlab, ylab = ylab, type = "b",
add = add, ...)
}
else if (add)
lines(pred, cal, type = if (type == "smooth")
"l"
else "b")
else plot(pred, cal, xlab = xlab, ylab = ylab, type = if (type ==
"smooth")
"l"
else "b", ...)
err <- NULL
if (riskdist && length(predicted)) {
do.call("scat1d", c(list(x = predicted), scat1d.opts))
if (type == "smooth") {
s <- !is.na(pred + cal.corrected)
err <- predicted - approxExtrap(pred[s], cal.corrected[s],
xout = predicted, ties = mean)$y
}
}
if (subtitles && !add) {
if (type == "smooth") {
Col <- par.corrected$col
substring(Col, 1, 1) <- toupper(substring(Col, 1,
1))
title(sub = sprintf("Black: observed Green: ideal\n%s : optimism corrected",
Col), adj = 0, cex.sub = cex.subtitles)
w <- if (length(err))
paste("B=", at$B, " based on ", at$what, "\nMean |error|=",
round(mean(abs(err)), 3), " 0.9 Quantile=",
round(quantile(abs(err), 0.9, na.rm = TRUE),
3), sep = "")
else paste("B=", at$B, "\nBased on ", at$what, sep = "")
title(sub = w, adj = 1, cex.sub = cex.subtitles)
}
else {
title(sub = paste("n=", at$n, " d=", at$d, " p=",
at$p, ", ", at$m, " subjects per group\nGreen: ideal",
sep = ""), adj = 0, cex.sub = cex.subtitles)
title(sub = paste("X - resampling optimism added, B=",
at$B, "\nBased on ", at$what, sep = ""), adj = 1,
cex.sub = cex.subtitles)
}
}
abline(0, 1, col = "green")
if (type == "stratified")
points(pred, cal.corrected, pch = par.corrected$pch,
col = par.corrected$col)
else lines(pred, cal.corrected, col = par.corrected$col,
lty = par.corrected$lty, lwd = par.corrected$lwd)
invisible()
}
Then you can use your custom function with your calibrate object:
myplot(cal)

Legend not displaying in plot function: R

I have tried to plot a simple graph using the following code;
plot(y = Et, x = t, type = "l", col = 1,
xlab = "Time", ylab= "Equity ($)",
main = "Figure 1–3: Randomly Generated Equity Curves")
grid()
abline(h = 10000)
lines(y = Et2, x = t, col = 2)
lines(y = Eb, x = t, col = 8)
Then I try to add a legend to the plot
legend(x = "topleft", col = c(1,2,8), lwd = 2, legend = c("Curve 1",
"Curve 2",
"SPY"))
And the result I obtain is the following;
Where am I going wrong?
EDIT 1: I restarted R studio and re-ran the plots and got the following result.
EDIT 2: Reproducible code:
library(quantmod)
options("getSymbols.warning4.0" = FALSE,
"getSymbols.auto.assign" = FALSE)
SPY <- getSymbols(c("SPY"), from = "2016-09-01")
SPY <- as.numeric(SPY$SPY.Close)
set.seed(123)
#create a time index
t <- 1:(length(SPY)-1)
#tradable capital vector
Vt <- c(rep(10000, length(t)))
#Benchmark return series
Rb <- rep(NA, length(t))
for(i in 2:length(t)) {
Rb[i] <- (SPY[i] / SPY[i - 1]) - 1
}
#Benchmark equity curve
Eb <- rep(NA, length(t))
Eb[1] <- Vt[1]
for(i in 2:length(t)) {
Eb[i] <- Eb[i - 1] * (1 + Rb[i])
}
#Randomy simulated return series 1
Rt <- rep(NA, length(t))
for(i in 2:length(t)) {
Rt[i] <- Rb[i] + rnorm(n = 1,
mean = 0.24/length(t),
sd = 2.5 * sd(Rb, na.rm = TRUE))
}
#Randomly simulated return series 2
Rt2 <- rep(NA, length(t))
for(i in 2:length(t)) {
Rt2[i] <- Rb[i] + rnorm(n = 1,
mean = 0.02/length(t),
sd = 0.75 * sd(Rb, na.rm = TRUE))
}
# Randomly Simulated Equity Curve 1
Et <- rep(NA, length(t))
Et <- Vt[1]
for(i in 2:length(t)) {
Et[i] <- Et[i-1] * (1 + Rt[i])
}
# Randomly Simulated Equity Curve 2
Et2 <- rep(NA, length(t))
Et2 <- Vt[1]
for(i in 2:length(t)) {
Et2[i] <- Et2[i-1] * (1 + Rt2[i])
}
#Plot of Et1 against the SPY Portfolio
plot(y = Et, x = t, type = "l", col = 1,
xlab = "Time", ylab= "Equity ($)",
main = "Figure 1-3: Randomly Generated Equity Curves")
grid()
abline(h = 10000)
lines(y = Et2, x = t, col = 2)
lines(y = Eb, x = t, col = 8)
legend(x = "topleft", col = c(1,2,8), lwd = 2, legend = c("Curve 1",
"Curve 2",
"SPY"))
The above code is what I have ran in order to produce the graphs above. If you are able to run it and get the same error let me know.

BRT: Add gradient colors to interaction plots using gbm.perspec

I would like to add a gradient of colours following the fitted values (e.g. higher fitted values darker colours, lower fitted values lighter colours) in my three-dimensional dependence plots.
I have used the example presented in dismo package:
library(dismo)
data(Anguilla_train)
angaus.tc5.lr01 <- gbm.step(data=Anguilla_train, gbm.x = 3:13, gbm.y = 2,
family = "bernoulli", tree.complexity = 5, learning.rate = 0.01,
bag.fraction = 0.5)
# Find interactions in the gbm model:
find.int <- gbm.interactions( angaus.tc5.lr01)
find.int$interactions
find.int$rank.list
I have only managed to add the same colour to the whole plot:
gbm.perspec( angaus.tc5.lr01, 7, 1,
x.label = "USRainDays",
y.label = "SegSumT",
z.label = "Fitted values",
z.range=c(0,0.435),
col="blue")
Or to add a gradient colour but not following the fitted values:
gbm.perspec( angaus.tc5.lr01, 7, 1,
x.label = "USRainDays",
y.label = "SegSumT",
z.label = "Fitted values",
col=heat.colors(50),
z.range=c(0,0.435))
I also checked the code of function gbm.perspec, and If I understood correctly the fitted values are call inside the formula as "prediction", and later on are part of the "pred.matrix" that is passed to the final plotting: persp(x = x.var, y = y.var, z = pred.matrix...), but I have no managed to access them from the gbm.perspec formula. I tried to modified the gbm.perpec function by adding "col=heat.colors(100)[round(pred.matrix*100, 0)]" into the persp() inside the function, but it does not do what I am looking for:
persp(x = x.var, y = y.var, z = pred.matrix, zlim = z.range,
xlab = x.label, ylab = y.label, zlab = z.label,
theta = theta, phi = phi, r = sqrt(10), d = 3,
ticktype = ticktype,
col=heat.colors(100)[round(pred.matrix*100, 0)],
mgp = c(4, 1, 0), ...)
I believe the solution might come from modifying the gbm.perpec function, do you know how?
Thank you for your time!
Modifying the gbm.perspec function is certainly an option, although if you use the predicted values from the gbm model and plot them onto a 3D scatterplot from another package you should be able to achieve it as well.
Here's an option using the plot3Drgl package, original code was provided by #Fabrice.
library(dismo); library(plot3Drgl); library(devEMF)
data(Anguilla_train)
angaus.tc5.lr01 <- gbm.step(data=Anguilla_train, gbm.x = 3:13, gbm.y = 2,
family = "bernoulli", tree.complexity = 5, learning.rate = 0.01,
bag.fraction = 0.5)
# Find interactions in the gbm model:
find.int <- gbm.interactions( angaus.tc5.lr01)
find.int$interactions
find.int$rank.list
d<-plot(angaus.tc5.lr01,c(1,7),return.grid=T)
x <- d$SegSumT
y <- d$USRainDays
z <- d$y
grid.lines = 30
elevation.site = loess(z ~ x*y, data=d, span=1, normalize = FALSE)
x.pred <- seq(min(x), max(x), length.out = grid.lines) # x grid
y.pred <- seq(min(y), max(y), length.out = grid.lines) # y grid
xy <- expand.grid( x = x.pred, y = y.pred) # final grid combined
z.site=matrix(predict(elevation.site, newdata = xy), nrow = grid.lines, ncol = grid.lines) # predicedt matrix
scatter3D(x, y, z, theta = 160, phi = 35, # x y z coords and angle of plot
clab = c(""), # Needs moving - label legend
colkey = list(side = 4, length = 0.65,
adj.clab = 0.15, dist = -0.15, cex.clab = 0.6, cex.axis = 0.6), # change the location and length of legend, change position of label and legend
clim = c(-4,0.1),
bty = "b", # type of box
col = ramp.col(col = c("grey", "blue"), 200),
pch = 19, cex = 0.55, # shape and size of points
xlab = "SegSumT",
xlim=c(10,20),ylim=c(0,3.5), zlim=c(-4,0.1), d= 2,
ylab = "USRaindays",
zlab= "Fitted values", #axes labels
cex.lab = 0.8, font.lab = 1, cex.axis = 0.6, font.axis= 1, # size and font of axes and ticks
ticktype = "detailed", nticks = 5, # ticks and numer of ticks
#type = "h", # vertical lines
surf = list(x = x.pred, y = y.pred, z = z.site,
facets = NA, CI=NULL))
enter image description here
By tweaking with grid.lines and reversing the x axis you should be able to produce exactly what you want.
By incorporating some of the code found here into the gbm.perspec() source code you can create the desired effect.
First run
# Color palette (100 colors)
col.pal<-colorRampPalette(c("blue", "red"))
colors<-col.pal(100)
Then, add z.facet.center to gbm.perspec() source code after else and change the z in the code to pred.matrixas follows,
# and finally plot the result
#
if (!perspective) {
image(x = x.var, y = y.var, z = pred.matrix, zlim = z.range)
} else {
z.facet.center <- (pred.matrix[-1, -1] + pred.matrix[-1, -ncol(pred.matrix)] +
pred.matrix[-nrow(pred.matrix), -1] + pred.matrix[-nrow(pred.matrix), -ncol(pred.matrix)])/4
# Range of the facet center on a 100-scale (number of colors)
z.facet.range<-cut(z.facet.center, 100)
persp(x=x.var, y=y.var, z=pred.matrix, zlim= z.range, # input vars
xlab = x.label, ylab = y.label, zlab = z.label, # labels
theta=theta, phi=phi, r = sqrt(10), d = 3,
col=colors[z.facet.range],# viewing pars
ticktype = ticktype, mgp = c(4,1,0), ...) #
which will give you a plot like this (please note, this is not plotted using the sample dataset which is why the interaction effect is different than the plot in the question).
Alternatively, you can create a new function. The following example modifies gbm.perspec() to give a white-to-red gradient. Simply run the code in R, then change gbm.perspec() to gbm.perspec2()
# interaction function
# Color palette (100 colors)
col.pal<-colorRampPalette(c("white", "pink", "red"))
colors<-col.pal(100)
gbm.perspec2 <- function(gbm.object,
x = 1, # the first variable to be plotted
y = 2, # the second variable to be plotted
pred.means = NULL, # allows specification of values for other variables
x.label = NULL, # allows manual specification of the x label
x.range = NULL, # manual range specification for the x variable
y.label = NULL, # and y la seminar committeebel
z.label = "fitted value", #default z label
y.range = NULL, # and the y
z.range = NULL, # allows control of the vertical axis
leg.coords = NULL, #can specify coords (x, y) for legend
ticktype = "detailed",# specifiy detailed types - otherwise "simple"
theta = 55, # rotation
phi=40, # and elevation
smooth = "none", # controls smoothing of the predicted surface
mask = FALSE, # controls masking using a sample intensity model
perspective = TRUE, # controls whether a contour or perspective plot is drawn
...) # allows the passing of additional arguments to plotting routine
# useful options include shade, ltheta, lphi for controlling illumination
# and cex for controlling text size - cex.axis and cex.lab have no effect
{
if (! requireNamespace('gbm') ) { stop('you need to install the gbm package to use this function') }
requireNamespace('splines')
#get the boosting model details
gbm.call <- gbm.object$gbm.call
gbm.x <- gbm.call$gbm.x
n.preds <- length(gbm.x)
gbm.y <- gbm.call$gbm.y
pred.names <- gbm.call$predictor.names
family = gbm.call$family
# and now set up range variables for the x and y preds
have.factor <- FALSE
x.name <- gbm.call$predictor.names[x]
if (is.null(x.label)) {
x.label <- gbm.call$predictor.names[x]
}
y.name <- gbm.call$predictor.names[y]
if (is.null(y.label)) {
y.label <- gbm.call$predictor.names[y]
}
data <- gbm.call$dataframe[ , gbm.x, drop=FALSE]
n.trees <- gbm.call$best.trees
# if marginal variable is a vector then create intervals along the range
if (is.vector(data[,x])) {
if (is.null(x.range)) {
x.var <- seq(min(data[,x],na.rm=T),max(data[,x],na.rm=T),length = 50)
} else {
x.var <- seq(x.range[1],x.range[2],length = 50)
}
} else {
x.var <- names(table(data[,x]))
have.factor <- TRUE
}
if (is.vector(data[,y])) {
if (is.null(y.range)) {
y.var <- seq(min(data[,y],na.rm=T),max(data[,y],na.rm=T),length = 50)
} else {y.var <- seq(y.range[1],y.range[2],length = 50)}
} else {
y.var <- names(table(data[,y]))
if (have.factor) { #check that we don't already have a factor
stop("at least one marginal predictor must be a vector!")
} else {have.factor <- TRUE}
}
pred.frame <- expand.grid(list(x.var,y.var))
names(pred.frame) <- c(x.name,y.name)
pred.rows <- nrow(pred.frame)
#make sure that the factor variable comes first
if (have.factor) {
if (is.factor(pred.frame[,2])) { # swap them about
pred.frame <- pred.frame[,c(2,1)]
x.var <- y.var
}
}
j <- 3
# cycle through the predictors
# if a non-target variable find the mean
for (i in 1:n.preds) {
if (i != x & i != y) {
if (is.vector(data[,i])) {
m <- match(pred.names[i],names(pred.means))
if (is.na(m)) {
pred.frame[,j] <- mean(data[,i],na.rm=T)
} else pred.frame[,j] <- pred.means[m]
}
if (is.factor(data[,i])) {
m <- match(pred.names[i],names(pred.means))
temp.table <- table(data[,i])
if (is.na(m)) {
pred.frame[,j] <- rep(names(temp.table)[2],pred.rows)
} else {
pred.frame[,j] <- pred.means[m]
}
pred.frame[,j] <- factor(pred.frame[,j],levels=names(temp.table))
}
names(pred.frame)[j] <- pred.names[i]
j <- j + 1
}
}
#
# form the prediction
#
#assign("pred.frame", pred.frame, pos=1)
prediction <- gbm::predict.gbm(gbm.object,pred.frame,n.trees = n.trees, type="response")
#assign("prediction", prediction, pos=1, immediate =T)
# model smooth if required
if (smooth == "model") {
pred.glm <- glm(prediction ~ ns(pred.frame[,1], df = 8) * ns(pred.frame[,2], df = 8), data=pred.frame,family=poisson)
prediction <- fitted(pred.glm)
}
# report the maximum value and set up realistic ranges for z
max.pred <- max(prediction)
message("maximum value = ",round(max.pred,2),"\n")
if (is.null(z.range)) {
if (family == "bernoulli") {
z.range <- c(0,1)
} else if (family == "poisson") {
z.range <- c(0,max.pred * 1.1)
} else {
z.min <- min(data[,y],na.rm=T)
z.max <- max(data[,y],na.rm=T)
z.delta <- z.max - z.min
z.range <- c(z.min - (1.1 * z.delta), z.max + (1.1 * z.delta))
}
}
# now process assuming both x and y are vectors
if (have.factor == FALSE) {
# form the matrix
pred.matrix <- matrix(prediction,ncol=50,nrow=50)
# kernel smooth if required
if (smooth == "average") { #apply a 3 x 3 smoothing average
pred.matrix.smooth <- pred.matrix
for (i in 2:49) {
for (j in 2:49) {
pred.matrix.smooth[i,j] <- mean(pred.matrix[c((i-1):(i+1)),c((j-1):(j+1))])
}
}
pred.matrix <- pred.matrix.smooth
}
# mask out values inside hyper-rectangle but outside of sample space
if (mask) {
mask.trees <- gbm.object$gbm.call$best.trees
point.prob <- gbm::predict.gbm(gbm.object[[1]],pred.frame, n.trees = mask.trees, type="response")
point.prob <- matrix(point.prob,ncol=50,nrow=50)
pred.matrix[point.prob < 0.5] <- 0.0
}
#
# and finally plot the result
#
if (!perspective) {
image(x = x.var, y = y.var, z = pred.matrix, zlim = z.range)
} else {
z.facet.center <- (pred.matrix[-1, -1] + pred.matrix[-1, -ncol(pred.matrix)] +
pred.matrix[-nrow(pred.matrix), -1] + pred.matrix[-nrow(pred.matrix), -ncol(pred.matrix)])/4
# Range of the facet center on a 100-scale (number of colors)
z.facet.range<-cut(z.facet.center, 100)
persp(x=x.var, y=y.var, z=pred.matrix, zlim= z.range, # input vars
xlab = x.label, ylab = y.label, zlab = z.label, # labels
theta=theta, phi=phi, r = sqrt(10), d = 3,
col=colors[z.facet.range],# viewing pars
ticktype = ticktype, mgp = c(4,1,0), ...) #
}
}
if (have.factor) {
# we need to plot values of y for each x
factor.list <- names(table(pred.frame[,1]))
n <- 1
#add this bit so z.range still works as expected:
if (is.null(z.range)) {
vert.limits <- c(0, max.pred * 1.1)
} else {
vert.limits <- z.range
}
plot(pred.frame[pred.frame[,1]==factor.list[1],2],
prediction[pred.frame[,1]==factor.list[1]],
type = 'l',
#ylim = c(0, max.pred * 1.1),
ylim = vert.limits,
xlab = y.label,
ylab = z.label, ...)
for (i in 2:length(factor.list)) {
#factor.level in factor.list) {
factor.level <- factor.list[i]
lines(pred.frame[pred.frame[,1]==factor.level,2],
prediction[pred.frame[,1]==factor.level], lty = i)
}
# now draw a legend
if(is.null(leg.coords)){
x.max <- max(pred.frame[,2])
x.min <- min(pred.frame[,2])
x.range <- x.max - x.min
x.pos <- c(x.min + (0.02 * x.range),x.min + (0.3 * x.range))
y.max <- max(prediction)
y.min <- min(prediction)
y.range <- y.max - y.min
y.pos <- c(y.min + (0.8 * y.range),y.min + (0.95 * y.range))
legend(x = x.pos, y = y.pos, factor.list, lty = c(1:length(factor.list)), bty = "n")
} else {
legend(x = leg.coords[1], y = leg.coords[2], factor.list, lty = c(1:length(factor.list)), bty = "n", ncol = 2)
}
}
}

Resources