Suppose I would like to produce a kind of tree structure like the one below:
plot(0, type="n",xlim=c(0, 5), ylim=c(-3, 8), axes=FALSE, xlab="", ylab="", main="")
points(1, 2.5)
points(3, 5)
points(3, 0)
lines(c(1, 3), c(2.5, 5))
lines(c(1, 3), c(2.5, 0))
text(1, 2.5, adj=1, label="Parent ")
text(3, 5, adj=0, label=" Child 1")
text(3, 0, adj=0, label=" Child 2")
I wonder if there is a way in R where we can produce curved lines that resemble varying degrees of a S-curve like the ones below. Crucially it would be great if it would be possible to create such lines without resorting to ggplot.
EDIT removed and made into an answer
Following #thelatemail's suggestion, I decided to make my edit into an answer. My solution is based on #thelatemail's answer.
I wrote a small function to draw curves, which makes use of the logistic function:
#Create the function
curveMaker <- function(x1, y1, x2, y2, ...){
curve( plogis( x, scale = 0.08, loc = (x1 + x2) /2 ) * (y2-y1) + y1,
x1, x2, add = TRUE, ...)
}
A working example is below. In this example, I want to create a plot for a taxonomy with 3 levels: parent --> 2 children -- > 20 grandchildren. One child has 12 grandchildren, and the other child has 8 children.
#Prepare data:
parent <- c(1, 16)
children <- cbind(2, c(8, 28))
grandchildren <- cbind(3, (1:20)*2-1)
labels <- c("Parent ", paste("Child ", 1:2), paste(" Grandchild", 1:20) )
#Make a blank plot canvas
plot(0, type="n", ann = FALSE, xlim = c( 0.5, 3.5 ), ylim = c( 0.5, 39.5 ), axes = FALSE )
#Plot curves
#Parent and children
invisible( mapply( curveMaker,
x1 = parent[ 1 ],
y1 = parent[ 2 ],
x2 = children[ , 1 ],
y2 = children[ , 2 ],
col = gray( 0.6, alpha = 0.6 ), lwd = 1.5 ) )
#Children and grandchildren
invisible( mapply( curveMaker,
x1 = children[ 1, 1 ],
y1 = children[ 1, 2 ],
x2 = grandchildren[ 1:8 , 1 ],
y2 = grandchildren[ 1:8, 2 ],
col = gray( 0.6, alpha = 0.6 ), lwd = 1.5 ) )
invisible( mapply( curveMaker,
x1 = children[ 2, 1 ],
y1 = children[ 2, 2 ],
x2 = grandchildren[ 9:20 , 1 ],
y2 = grandchildren[ 9:20, 2 ],
col = gray( 0.6, alpha = 0.6 ), lwd = 1.5 ) )
#Plot text
text( x = c(parent[1], children[,1], grandchildren[,1]),
y = c(parent[2], children[,2], grandchildren[,2]),
labels = labels,
pos = rep(c(2, 4), c(3, 20) ) )
#Plot points
points( x = c(parent[1], children[,1], grandchildren[,1]),
y = c(parent[2], children[,2], grandchildren[,2]),
pch = 21, bg = "white", col="#3182bd", lwd=2.5, cex=1)
Sounds like a sigmoid curve, e.g.:
f <- function(x,s) s/(1 + exp(-x))
curve(f(x,s=1),xlim=c(-4,4))
curve(f(x,s=0.9),xlim=c(-4,4),add=TRUE)
curve(f(x,s=0.8),xlim=c(-4,4),add=TRUE)
curve(f(x,s=0.7),xlim=c(-4,4),add=TRUE)
Result:
You can start to adapt this, e.g. here's a clunky bit of code:
plot(NA,type="n",ann=FALSE,axes=FALSE,xlim=c(-6,6),ylim=c(0,1))
curve(f(x,s=1),xlim=c(-4,4),add=TRUE)
curve(f(x,s=0.8),xlim=c(-4,4),add=TRUE)
curve(f(x,s=0.6),xlim=c(-4,4),add=TRUE)
text(
c(-4,rep(4,3)),
c(0,f(c(4),c(1,0.8,0.6))),
labels=c("Parent","Kid 1","Kid 2","Kid 3"),
pos=c(2,4,4,4)
)
Result:
I think Paul Murrell has a document illustrating similar diagrams in grid. Here's a basic example,
library(grid)
labelGrob <- function(x,y,label, ...){
t <- textGrob(x,y,label=label)
w <- convertWidth(1.5*grobWidth(t), "npc", valueOnly = TRUE)
h <- convertHeight(1.5*grobHeight(t), "npc", valueOnly = TRUE)
gTree(cl = "label", west = unit(x-0.5*w, "npc"),
east = unit(x+0.5*w, "npc"),
children=gList(t, roundrectGrob(x=x, y=y, gp=gpar(fill=NA),
width=w, height=h)))
}
xDetails.label <- function(x, theta){
if(theta == 180) return(x$west[1]) else
if(theta == 0) return(x$east[1]) else
xDetails(x$children[[1]], theta)
}
yDetails.label <- function(x, theta){
if(theta %in% c("west", "east")) return(x$y) else
yDetails(x$children[[1]], theta)
}
lab1 <- labelGrob(0.1, 0.5, "start")
lab2 <- labelGrob(0.6, 0.75, "end")
grid.newpage()
grid.draw(lab1)
grid.draw(lab2)
grid.curve(grobX(lab1, "east"), grobY(lab1, "east"),
grobX(lab2, "west"), grobY(lab2, "west"),
inflect = TRUE, curvature=0.1)
Related
I needed to include in the code below, a vertical line,
for example, in position x = 5 and that all points smaller than 5 have another color,
for example blue.
The values of a variable can be read from the x-axis, and the y-axis shows the order of the observations in the variable (from bottom to top). Isolated points as the far ends, and on either side in a plot, suggest potentional outliers
Thanks
library(dplyr)
library(lattice)
n = 1000
df <- tibble(
xx1 = runif(n, min = 3, max = 10),
xx2 = runif(n, min = 3, max = 10),
xx3 = runif(n, min = 3, max = 10)
)
MyVar <- c("xx1","xx2","xx3")
MydotplotBR <- function(DataSelected){
P <- dotplot(as.matrix(as.matrix(DataSelected)),
groups=FALSE,
strip = strip.custom(bg = 'white',
par.strip.text = list(cex = 1.2)),
scales = list(x = list(relation = "same",tck = 1,
draw = TRUE, at=seq(0,10,1)),x=list(at=seq),
y = list(relation = "free", draw = FALSE),
auto.key = list(x =1)),
col=10,
axes = FALSE,
cex = 0.4, pch = 5,
xlim=c(0,10),
xlab = list(label = "Variable Value", cex = 1.5),
ylab = list(label = "Order of data in the file", cex = 1.5))
print(P)
}
(tempoi <- Sys.time())
Vertemp <- MydotplotBR(df[,MyVar])
(tempof <- Sys.time()-tempoi)
I find it weird that you want a color dependent only on the x-axis when values are also used on the y-axis of other plots.
Nevertheless, here's a homemade pairs_cutoff() function doing what you want.
pairs_cutoff <- function(data, cutoff, cols = c("red", "blue"),
only.lower = F, ...){
data <- as.data.frame(data)
cns <- colnames(data)
nc <- ncol(data)
layout(matrix(seq_len(nc^2), ncol = nc))
invisible(
sapply(seq_len(nc), function(i){
sapply(seq_len(nc), function(j){
if(i == j){
plot.new()
legend("center", bty = "n", title = cns[i], cex = 1.5, text.font = 2, legend = "")
} else {
if(j < i & only.lower)
plot.new()
else{
if(is.null(cutoff))
cols <- cols[1]
plot(data[,i], data[,j], col = cols[(data[,i] < cutoff) + 1],
xlab = cns[i], ylab = cns[j], ...)
}
}
})
})
)
}
Using your suggested data :
n = 1000
dat <- tibble(
xx1 = runif(n, min = 3, max = 10),
xx2 = runif(n, min = 3, max = 10),
xx3 = runif(n, min = 3, max = 10)
)
pairs_cutoff(dat, cutoff = 5, only.lower = T)
outputs the following plot :
You can specify extra parameters to the plot function (eg. pch) directly to pairs_cutoff.
Also, if you want the full symmetric grid of plots, set only.lower = F.
I would like to add a gradient of colours following the fitted values (e.g. higher fitted values darker colours, lower fitted values lighter colours) in my three-dimensional dependence plots.
I have used the example presented in dismo package:
library(dismo)
data(Anguilla_train)
angaus.tc5.lr01 <- gbm.step(data=Anguilla_train, gbm.x = 3:13, gbm.y = 2,
family = "bernoulli", tree.complexity = 5, learning.rate = 0.01,
bag.fraction = 0.5)
# Find interactions in the gbm model:
find.int <- gbm.interactions( angaus.tc5.lr01)
find.int$interactions
find.int$rank.list
I have only managed to add the same colour to the whole plot:
gbm.perspec( angaus.tc5.lr01, 7, 1,
x.label = "USRainDays",
y.label = "SegSumT",
z.label = "Fitted values",
z.range=c(0,0.435),
col="blue")
Or to add a gradient colour but not following the fitted values:
gbm.perspec( angaus.tc5.lr01, 7, 1,
x.label = "USRainDays",
y.label = "SegSumT",
z.label = "Fitted values",
col=heat.colors(50),
z.range=c(0,0.435))
I also checked the code of function gbm.perspec, and If I understood correctly the fitted values are call inside the formula as "prediction", and later on are part of the "pred.matrix" that is passed to the final plotting: persp(x = x.var, y = y.var, z = pred.matrix...), but I have no managed to access them from the gbm.perspec formula. I tried to modified the gbm.perpec function by adding "col=heat.colors(100)[round(pred.matrix*100, 0)]" into the persp() inside the function, but it does not do what I am looking for:
persp(x = x.var, y = y.var, z = pred.matrix, zlim = z.range,
xlab = x.label, ylab = y.label, zlab = z.label,
theta = theta, phi = phi, r = sqrt(10), d = 3,
ticktype = ticktype,
col=heat.colors(100)[round(pred.matrix*100, 0)],
mgp = c(4, 1, 0), ...)
I believe the solution might come from modifying the gbm.perpec function, do you know how?
Thank you for your time!
Modifying the gbm.perspec function is certainly an option, although if you use the predicted values from the gbm model and plot them onto a 3D scatterplot from another package you should be able to achieve it as well.
Here's an option using the plot3Drgl package, original code was provided by #Fabrice.
library(dismo); library(plot3Drgl); library(devEMF)
data(Anguilla_train)
angaus.tc5.lr01 <- gbm.step(data=Anguilla_train, gbm.x = 3:13, gbm.y = 2,
family = "bernoulli", tree.complexity = 5, learning.rate = 0.01,
bag.fraction = 0.5)
# Find interactions in the gbm model:
find.int <- gbm.interactions( angaus.tc5.lr01)
find.int$interactions
find.int$rank.list
d<-plot(angaus.tc5.lr01,c(1,7),return.grid=T)
x <- d$SegSumT
y <- d$USRainDays
z <- d$y
grid.lines = 30
elevation.site = loess(z ~ x*y, data=d, span=1, normalize = FALSE)
x.pred <- seq(min(x), max(x), length.out = grid.lines) # x grid
y.pred <- seq(min(y), max(y), length.out = grid.lines) # y grid
xy <- expand.grid( x = x.pred, y = y.pred) # final grid combined
z.site=matrix(predict(elevation.site, newdata = xy), nrow = grid.lines, ncol = grid.lines) # predicedt matrix
scatter3D(x, y, z, theta = 160, phi = 35, # x y z coords and angle of plot
clab = c(""), # Needs moving - label legend
colkey = list(side = 4, length = 0.65,
adj.clab = 0.15, dist = -0.15, cex.clab = 0.6, cex.axis = 0.6), # change the location and length of legend, change position of label and legend
clim = c(-4,0.1),
bty = "b", # type of box
col = ramp.col(col = c("grey", "blue"), 200),
pch = 19, cex = 0.55, # shape and size of points
xlab = "SegSumT",
xlim=c(10,20),ylim=c(0,3.5), zlim=c(-4,0.1), d= 2,
ylab = "USRaindays",
zlab= "Fitted values", #axes labels
cex.lab = 0.8, font.lab = 1, cex.axis = 0.6, font.axis= 1, # size and font of axes and ticks
ticktype = "detailed", nticks = 5, # ticks and numer of ticks
#type = "h", # vertical lines
surf = list(x = x.pred, y = y.pred, z = z.site,
facets = NA, CI=NULL))
enter image description here
By tweaking with grid.lines and reversing the x axis you should be able to produce exactly what you want.
By incorporating some of the code found here into the gbm.perspec() source code you can create the desired effect.
First run
# Color palette (100 colors)
col.pal<-colorRampPalette(c("blue", "red"))
colors<-col.pal(100)
Then, add z.facet.center to gbm.perspec() source code after else and change the z in the code to pred.matrixas follows,
# and finally plot the result
#
if (!perspective) {
image(x = x.var, y = y.var, z = pred.matrix, zlim = z.range)
} else {
z.facet.center <- (pred.matrix[-1, -1] + pred.matrix[-1, -ncol(pred.matrix)] +
pred.matrix[-nrow(pred.matrix), -1] + pred.matrix[-nrow(pred.matrix), -ncol(pred.matrix)])/4
# Range of the facet center on a 100-scale (number of colors)
z.facet.range<-cut(z.facet.center, 100)
persp(x=x.var, y=y.var, z=pred.matrix, zlim= z.range, # input vars
xlab = x.label, ylab = y.label, zlab = z.label, # labels
theta=theta, phi=phi, r = sqrt(10), d = 3,
col=colors[z.facet.range],# viewing pars
ticktype = ticktype, mgp = c(4,1,0), ...) #
which will give you a plot like this (please note, this is not plotted using the sample dataset which is why the interaction effect is different than the plot in the question).
Alternatively, you can create a new function. The following example modifies gbm.perspec() to give a white-to-red gradient. Simply run the code in R, then change gbm.perspec() to gbm.perspec2()
# interaction function
# Color palette (100 colors)
col.pal<-colorRampPalette(c("white", "pink", "red"))
colors<-col.pal(100)
gbm.perspec2 <- function(gbm.object,
x = 1, # the first variable to be plotted
y = 2, # the second variable to be plotted
pred.means = NULL, # allows specification of values for other variables
x.label = NULL, # allows manual specification of the x label
x.range = NULL, # manual range specification for the x variable
y.label = NULL, # and y la seminar committeebel
z.label = "fitted value", #default z label
y.range = NULL, # and the y
z.range = NULL, # allows control of the vertical axis
leg.coords = NULL, #can specify coords (x, y) for legend
ticktype = "detailed",# specifiy detailed types - otherwise "simple"
theta = 55, # rotation
phi=40, # and elevation
smooth = "none", # controls smoothing of the predicted surface
mask = FALSE, # controls masking using a sample intensity model
perspective = TRUE, # controls whether a contour or perspective plot is drawn
...) # allows the passing of additional arguments to plotting routine
# useful options include shade, ltheta, lphi for controlling illumination
# and cex for controlling text size - cex.axis and cex.lab have no effect
{
if (! requireNamespace('gbm') ) { stop('you need to install the gbm package to use this function') }
requireNamespace('splines')
#get the boosting model details
gbm.call <- gbm.object$gbm.call
gbm.x <- gbm.call$gbm.x
n.preds <- length(gbm.x)
gbm.y <- gbm.call$gbm.y
pred.names <- gbm.call$predictor.names
family = gbm.call$family
# and now set up range variables for the x and y preds
have.factor <- FALSE
x.name <- gbm.call$predictor.names[x]
if (is.null(x.label)) {
x.label <- gbm.call$predictor.names[x]
}
y.name <- gbm.call$predictor.names[y]
if (is.null(y.label)) {
y.label <- gbm.call$predictor.names[y]
}
data <- gbm.call$dataframe[ , gbm.x, drop=FALSE]
n.trees <- gbm.call$best.trees
# if marginal variable is a vector then create intervals along the range
if (is.vector(data[,x])) {
if (is.null(x.range)) {
x.var <- seq(min(data[,x],na.rm=T),max(data[,x],na.rm=T),length = 50)
} else {
x.var <- seq(x.range[1],x.range[2],length = 50)
}
} else {
x.var <- names(table(data[,x]))
have.factor <- TRUE
}
if (is.vector(data[,y])) {
if (is.null(y.range)) {
y.var <- seq(min(data[,y],na.rm=T),max(data[,y],na.rm=T),length = 50)
} else {y.var <- seq(y.range[1],y.range[2],length = 50)}
} else {
y.var <- names(table(data[,y]))
if (have.factor) { #check that we don't already have a factor
stop("at least one marginal predictor must be a vector!")
} else {have.factor <- TRUE}
}
pred.frame <- expand.grid(list(x.var,y.var))
names(pred.frame) <- c(x.name,y.name)
pred.rows <- nrow(pred.frame)
#make sure that the factor variable comes first
if (have.factor) {
if (is.factor(pred.frame[,2])) { # swap them about
pred.frame <- pred.frame[,c(2,1)]
x.var <- y.var
}
}
j <- 3
# cycle through the predictors
# if a non-target variable find the mean
for (i in 1:n.preds) {
if (i != x & i != y) {
if (is.vector(data[,i])) {
m <- match(pred.names[i],names(pred.means))
if (is.na(m)) {
pred.frame[,j] <- mean(data[,i],na.rm=T)
} else pred.frame[,j] <- pred.means[m]
}
if (is.factor(data[,i])) {
m <- match(pred.names[i],names(pred.means))
temp.table <- table(data[,i])
if (is.na(m)) {
pred.frame[,j] <- rep(names(temp.table)[2],pred.rows)
} else {
pred.frame[,j] <- pred.means[m]
}
pred.frame[,j] <- factor(pred.frame[,j],levels=names(temp.table))
}
names(pred.frame)[j] <- pred.names[i]
j <- j + 1
}
}
#
# form the prediction
#
#assign("pred.frame", pred.frame, pos=1)
prediction <- gbm::predict.gbm(gbm.object,pred.frame,n.trees = n.trees, type="response")
#assign("prediction", prediction, pos=1, immediate =T)
# model smooth if required
if (smooth == "model") {
pred.glm <- glm(prediction ~ ns(pred.frame[,1], df = 8) * ns(pred.frame[,2], df = 8), data=pred.frame,family=poisson)
prediction <- fitted(pred.glm)
}
# report the maximum value and set up realistic ranges for z
max.pred <- max(prediction)
message("maximum value = ",round(max.pred,2),"\n")
if (is.null(z.range)) {
if (family == "bernoulli") {
z.range <- c(0,1)
} else if (family == "poisson") {
z.range <- c(0,max.pred * 1.1)
} else {
z.min <- min(data[,y],na.rm=T)
z.max <- max(data[,y],na.rm=T)
z.delta <- z.max - z.min
z.range <- c(z.min - (1.1 * z.delta), z.max + (1.1 * z.delta))
}
}
# now process assuming both x and y are vectors
if (have.factor == FALSE) {
# form the matrix
pred.matrix <- matrix(prediction,ncol=50,nrow=50)
# kernel smooth if required
if (smooth == "average") { #apply a 3 x 3 smoothing average
pred.matrix.smooth <- pred.matrix
for (i in 2:49) {
for (j in 2:49) {
pred.matrix.smooth[i,j] <- mean(pred.matrix[c((i-1):(i+1)),c((j-1):(j+1))])
}
}
pred.matrix <- pred.matrix.smooth
}
# mask out values inside hyper-rectangle but outside of sample space
if (mask) {
mask.trees <- gbm.object$gbm.call$best.trees
point.prob <- gbm::predict.gbm(gbm.object[[1]],pred.frame, n.trees = mask.trees, type="response")
point.prob <- matrix(point.prob,ncol=50,nrow=50)
pred.matrix[point.prob < 0.5] <- 0.0
}
#
# and finally plot the result
#
if (!perspective) {
image(x = x.var, y = y.var, z = pred.matrix, zlim = z.range)
} else {
z.facet.center <- (pred.matrix[-1, -1] + pred.matrix[-1, -ncol(pred.matrix)] +
pred.matrix[-nrow(pred.matrix), -1] + pred.matrix[-nrow(pred.matrix), -ncol(pred.matrix)])/4
# Range of the facet center on a 100-scale (number of colors)
z.facet.range<-cut(z.facet.center, 100)
persp(x=x.var, y=y.var, z=pred.matrix, zlim= z.range, # input vars
xlab = x.label, ylab = y.label, zlab = z.label, # labels
theta=theta, phi=phi, r = sqrt(10), d = 3,
col=colors[z.facet.range],# viewing pars
ticktype = ticktype, mgp = c(4,1,0), ...) #
}
}
if (have.factor) {
# we need to plot values of y for each x
factor.list <- names(table(pred.frame[,1]))
n <- 1
#add this bit so z.range still works as expected:
if (is.null(z.range)) {
vert.limits <- c(0, max.pred * 1.1)
} else {
vert.limits <- z.range
}
plot(pred.frame[pred.frame[,1]==factor.list[1],2],
prediction[pred.frame[,1]==factor.list[1]],
type = 'l',
#ylim = c(0, max.pred * 1.1),
ylim = vert.limits,
xlab = y.label,
ylab = z.label, ...)
for (i in 2:length(factor.list)) {
#factor.level in factor.list) {
factor.level <- factor.list[i]
lines(pred.frame[pred.frame[,1]==factor.level,2],
prediction[pred.frame[,1]==factor.level], lty = i)
}
# now draw a legend
if(is.null(leg.coords)){
x.max <- max(pred.frame[,2])
x.min <- min(pred.frame[,2])
x.range <- x.max - x.min
x.pos <- c(x.min + (0.02 * x.range),x.min + (0.3 * x.range))
y.max <- max(prediction)
y.min <- min(prediction)
y.range <- y.max - y.min
y.pos <- c(y.min + (0.8 * y.range),y.min + (0.95 * y.range))
legend(x = x.pos, y = y.pos, factor.list, lty = c(1:length(factor.list)), bty = "n")
} else {
legend(x = leg.coords[1], y = leg.coords[2], factor.list, lty = c(1:length(factor.list)), bty = "n", ncol = 2)
}
}
}
I have written a code to plot nearly 100000 lines in a graph with different colors depending on different conditions. The code is as following.
for(i in c(160000:260000)){
if(data[i,]$Char1 == 'A' & data[i,]$Char2 == 'S'){
if(data[i,]$Q1 < data[i,]$Q2){
lines(c(i,i),c(data[i,]$P + 2,data[i,]$P + 22),col="green")
}else{
lines(c(i,i),c(data[i,]$P - 2,data[i,]$P - 22),col="green")
}
}
if(data[i,]$Char1 == "B" & data[i,]$Char2 == 'S'){
lines(c(i,i),c(data[i,]$P + 2,data[i,]$P + 22),col='blue')
}
}
I have Plotted a normal graph before that. Which is
plot(data$P,type="l")
I ran the code and for more than 2-3 hours it kept on running till the point I stopped it. Is there any way to do this task easily and with less amount of time?
You may be able to save some computation time by not actually displaying the plot. Running
library(scales)
n <- 100000
m <- 20
system.time({
plot(0, 0, type = 'n', xlim = c(0, 10), ylim = c(0, 10), xlab = '', ylab = '')
for (i in 1:n) lines(sort(runif(m, max = 10)), sort(runif(m, max = 10)),
col = ifelse(i %% 10 == 0, 'red', alpha('lightblue', 0.1)),
lwd = 0.2)
})
vs.
system.time({
png('plot.png')
plot(0, 0, type = 'n', xlim = c(0, 10), ylim = c(0, 10), xlab = '', ylab = '')
for (i in 1:n) lines(sort(runif(m, max = 10)), sort(runif(m, max = 10)),
col = ifelse(i %% 10 == 0, 'red', alpha('lightblue', 0.1)),
lwd = 0.2)
dev.off()
})
gives
user system elapsed
44.415 0.704 45.435
vs.
user system elapsed
23.115 0.294 23.585
on my machine.
Update
Using CathG's answer brings down the computation time drastically when plotting lines:
n <- 100000
data <- data.frame(x0 = runif(n), y0 = runif(n), x1 = runif(n),
y1 = runif(n), col = 1:10)
system.time({
png('plot.png', 640, 640)
plot(0, 0, type = 'n', xlab = '', ylab = '', xlim = c(0, 1), ylim = c(0, 1))
for (i in 1:n) lines(data[i, c(1, 3)], data[i, c(2, 4)], col = data$col,
lwd = 0.1)
dev.off()
})
system.time({
png('plot.png', 640, 640)
plot(0, 0, type = 'n', xlab = '', ylab = '', xlim = c(0, 1), ylim = c(0, 1))
segments(data$x0, data$y0, data$x1, data$y1, col = data$col, lwd = 0.1)
dev.off()
})
gives
user system elapsed
119.682 0.822 121.525
vs.
user system elapsed
2.267 0.020 2.303
I think you should compute the different x and y (and color) first and then plot them all in one call with segments and I also think you should directly plot them using png for example and not in the window device:
data2 <- data[160000:260000, ]
data2$x0 <- data2$x1 <- 160000:260000
cond1 <- (data2$Char1=="A") & (data2$Char2 == "S") & (data2$Q1 < data2$Q2)
cond2 <- (data2$Char1=="A") & (data2$Char2 == "S") & (data2$Q1 >= data2$Q2)
cond3 <- (data2$Char1=="B") & (data2$Char2 == "S")
data2$y0[cond1] <- data2$P[cond1] + 2
data2$y0[cond2] <- data2$P[cond2] - 2
data2$y0[cond3] <- data2$P[cond3] + 2
data2$y1[cond1] <- data2$P[cond1] + 22
data2$y1[cond2] <- data2$P[cond2] - 22
data2$y1[cond3] <- data2$P[cond3] + 22
data2$color[cond1] <- "green"
data2$color[cond2] <- "green"
data2$color[cond3] <- "blue"
png("nameofyourfile.png")
plot(data$P,type="l")
segments(data2$x0, data2$y0, data2$x1, data2$y1, col=data2$color)
dev.off()
I find this particular graph in ISLR (Figure 2.13) or ESL very well done. I can't guess how the authors would have made this in R. I know how to get the orange and blue points very easily. The main confusion is the background dots and the purple line.
Any ideas?
Here is some sample code to get the yellow and orange points with a grey grid. How do I get an arbitrary non-linear curve in purple and then color the grid according to the curve?
set.seed(pi)
points = replicate(100, runif(2))
pointsColored = ifelse(apply(points, 2, sum) <= 1, "orange", "blue")
# Confound some
pointsColored[sample.int(length(pointsColored), 10)] = "orange"
plot(x=points[1, ], y=points[2, ])
grid(nx=100, ny=100)
# Plot points over the grid.
points(x=points[1, ], y=points[2, ], col=pointsColored)
As I indicated in my comment, a solution was provided by #chl here on stats.stackexchange.com. Here it is, applied to your data set.
library(class)
set.seed(pi)
X <- t(replicate(1000, runif(2)))
g <- ifelse(apply(X, 1, sum) <= 1, 0, 1)
xnew <- cbind(rep(seq(0, 1, length.out=50), 50),
rep(seq(0, 1, length.out=50), each=50))
m <- knn(X, xnew, g, k=15, prob=TRUE)
prob <- attr(m, "prob")
prob <- ifelse(m=="1", prob, 1-prob)
prob15 <- matrix(prob, 50)
par(mar=rep(3, 4))
contour(unique(xnew[, 1]), unique(xnew[, 2]), prob15, levels=0.5,
labels="", xlab='', ylab='', axes=FALSE, lwd=2.5, asp=1)
title(xlab=expression(italic('X')[1]), ylab=expression(italic('X')[2]),
line=1, family='serif', cex.lab=1.5)
points(X, bg=ifelse(g==1, "#CA002070", "#0571B070"), pch=21)
gd <- expand.grid(x=unique(xnew[, 1]), y=unique(xnew[, 2]))
points(gd, pch=20, cex=0.4, col=ifelse(prob15 > 0.5, "#CA0020", "#0571B0"))
box()
(UPDATE: I changed the colour palette because the blue/yellow/purple thing was pretty hideous.)
This was my silly attempt at approximation. Clearly the issues raised by #StephenKolassa are valid and not handled by this approximation.
myCurve1 = function (x)
abs(x[[1]] * sin(x[[1]]) + x[[2]] * sin(x[[2]]))
myCurve2 = function (x)
abs(x[[1]] * cos(x[[1]]) + x[[2]] * cos(x[[2]]))
myCurve3 = function (x)
abs(x[[1]] * tan(x[[1]]) + x[[2]] * tan(x[[2]]))
tmp = function (myCurve, seed=99) {
set.seed(seed)
points = replicate(100, runif(2))
colors = ifelse(apply(points, 2, myCurve) > 0.5, "orange", "blue")
# Confound some
swapInts = sample.int(length(colors), 6)
for (i in swapInts) {
if (colors[[i]] == "orange") {
colors[[i]] = "blue"
} else {
colors[[i]] = "orange"
}
}
gridPoints = seq(0, 1, 0.005)
gridPoints = as.matrix(expand.grid(gridPoints, gridPoints))
gridColors = vector("character", nrow(gridPoints))
gridPch = vector("character", nrow(gridPoints))
for (i in 1:nrow(gridPoints)) {
val = myCurve(gridPoints[i, ])
if (val > 0.505) {
gridColors[[i]] = "orange"
gridPch[[i]] = "."
} else if (val < 0.495) {
gridColors[[i]] = "blue"
gridPch[[i]] = "."
} else {
gridColors[[i]] = "purple"
gridPch[[i]] = "*"
}
}
plot(x=gridPoints[ , 1], y=gridPoints[ , 2], col=gridColors, pch=gridPch)
points(x=points[1, ], y=points[2, ], col=colors, lwd=2)
}
par(mfrow=c(1, 3))
tmp(myCurve1)
tmp(myCurve2)
tmp(myCurve3)
Closed. This question needs details or clarity. It is not currently accepting answers.
Want to improve this question? Add details and clarify the problem by editing this post.
Closed 9 years ago.
Improve this question
I have the following data:
pcp # precipitation monthly data from 2001-01,2020-12
I have used the following to compute the SPI drought index
library(SPEI)
spi1 <- spi(pcp,1,kernel = list(type = "rectangular", shift = 0),distribution = "Gamma", fit = "ub-pwm", na.rm = FALSE,ref.start=NULL, ref.end=NULL, x=FALSE, params=NULL)
First question:
when I plot(spi1) I get SPEI on the Y axis which I don't want , what I want is SPI,
second:
how to plot each month separately for example when you call spi1 it will give you the index value for each month, and I want to plot it for each month
For the first answer, you can rewrite the function plot.spei
plot.spei <-
function (x, ...)
{
## label <- ifelse(as.character(x$call)[1] == "spei", "SPEI",
## "SPI")
ser <- ts(as.matrix(x$fitted[-c(1:x$scale), ]), end = end(x$fitted),
frequency = frequency(x$fitted))
ser[is.nan(ser - ser)] <- 0
se <- ifelse(ser == 0, ser, NA)
tit <- dimnames(x$coefficients)[2][[1]]
if (start(ser)[2] == 1) {
ns <- c(start(ser)[1] - 1, 12)
}
else {
ns <- c(start(ser)[1], start(ser)[2] - 1)
}
if (end(ser)[2] == 12) {
ne <- c(end(ser)[1] + 1, 1)
}
else {
ne <- c(end(ser)[1], end(ser)[2] + 1)
}
n <- ncol(ser)
if (is.null(n))
n <- 1
par(mar = c(4, 4, 2, 1) + 0.1)
if (n > 1 & n < 5)
par(mfrow = c(n, 1))
if (n > 1 & n >= 5)
par(mfrow = c({
n + 1
}%/%2, 2))
for (i in 1:n) {
datt <- ts(c(0, ser[, i], 0), frequency = frequency(ser),
start = ns, end = ne)
datt.pos <- ifelse(datt > 0, datt, 0)
datt.neg <- ifelse(datt <= 0, datt, 0)
plot(datt, type = "n", xlab = "", main = tit[i], ...)
if (!is.null(x$ref.period)) {
k <- ts(5, start = x$ref.period[1, ], end = x$ref.period[2,
], frequency = 12)
k[1] <- k[length(k)] <- -5
polygon(k, col = "light grey", border = NA, density = 20)
abline(v = x$ref.period[1, 1] + (x$ref.period[1,
2] - 1)/12, col = "grey")
abline(v = x$ref.period[2, 1] + (x$ref.period[2,
2] - 1)/12, col = "grey")
}
grid(col = "black")
polygon(datt.pos, col = "blue", border = NA)
polygon(datt.neg, col = "red", border = NA)
lines(datt, col = "dark grey")
abline(h = 0)
points(se, pch = 21, col = "white", bg = "black")
}
}
And then use the ylab parameters
plot(spi1, ylab = "SPI")
If you want to plot it separately, you can extract the fitted value of class ts and apply basic plotting for time series object in R.
par(mfrow = c(3, 4))
listofmonths <- split(fitted(spi1), cycle(fitted(spi1)))
names(listofmonths) <- month.abb
require(plyr)
l_ply(seq_along(listofmonths), function(x) {
plot(x = seq_along(listofmonths[[x]]), y = listofmonths[[x]],
type = "l", xlab = "", ylab = "SPI")
title(names(listofmonths)[x])
})
You can also try these types of plot
monthplot(fitted(spi1), labels = month.abb, cex.axis = 0.8)
boxplot(fitted(spi1) ~ cycle(fitted(spi1)), names = month.abb, cex.axis = 0.8)