R Programming other alternatives for plot - r

I wonder how you can simplify these two :
plot (payroll,wins)
id = identify(payroll, wins,labels = code, n = 5)
plot (payroll,wins)
with(data, text(payroll, wins, labels = code, pos = 1, cex=0.5))
using other alternatives - pch() dan as.numeric()?

Not sure it's easier but you change pch during identification as below (taken from the R-help). Every time you click empty point change to filled-in dot.
# data simulation
data <- data.frame(payroll = rnorm(10), wins = rnorm(10), code = letters[1:10])
identifyPch <- function(x, y = NULL, n = length(x), plot = FALSE, pch = 19, ...)
{
xy <- xy.coords(x, y)
x <- xy$x
y <- xy$y
sel <- rep(FALSE, length(x))
while (sum(sel) < n) {
ans <- identify(x[!sel], y[!sel], labels = which(!sel), n = 1, plot = plot, ...)
if(!length(ans)) {
break
}
ans <- which(!sel)[ans]
points(x[ans], y[ans], pch = pch)
sel[ans] <- TRUE
}
## return indices of selected points
which(sel)
}
if(dev.interactive()) { ## use it
with(data, plot(payroll,wins))
id = with(data, identifyPch(payroll, wins))
}

Related

Draw a vector field from matrix multiplication r

I'm trying to print a vector field based on a matrix multiplication. The problem is that the function that will print values to make the matrix multiplication can only take a single number. When a range of number is put into the all.p function, the output is not usable to do the matrix multiplication. Is there a way to change all.p so that with multiple inputs, the matrix multiplication can still be valid, and the vector field can be computed? The code fails at the vectorfield function as this function with put the values into the range 0 to 1, but the all.p can't take multiple inputs.
geno.fit = matrix(c(0.791,1.000,0.834,
0.670,1.006,0.901,
0.657,0.657,1.067),
nrow = 3,
ncol = 3,
byrow = T)
all.p <- function(p) {
if (length(p)>1) {
stop("More numbers in input than expected")
}
P = p^2
PQ = 2*p*(1-p)
Q = (1-p)^2
return(list=c(P=P,PQ=PQ,Q=Q))
}
library(pracma)
f <- function(x, y) all.p(x) %*% geno.fit %*% all.p(y)
xx <- c(0, 1); yy <- c(0, 1)
vectorfield(fun = f, xlim = xx, ylim = yy, scale = 0.1)
for (xs in seq(0, 1, by = 0.25)) {
sol <- rk4(f, 0, 1, xs, 100)
lines(sol$x, sol$y, col="darkgreen")
}
grid()
I also tried to use a for loop.
f <- function(x, y, n = 16) {
space3 = matrix(NA,nrow = n,ncol = n)
for (i in 1:(length(x))) {
for (j in 1:(length(y))) {
# Calculate mean fitness
space3[i,j] = all.p(x[i]) %*% geno.fit %*% all.p(y[j])
}
}
return(space3)
}
xx <- c(0, 1); yy <- c(0, 1)
f(seq(0,1,length.out = 16), seq(0,1,length.out = 16))
vectorfield(fun = f, xlim = xx, ylim = yy, scale = 0.1)
Below is the code to make the gradient ascend (without the vectors).
library(fields) # for image.plot
res = 0.01
seq.x = seq(0,1,by = res)
space = outer(seq.x,seq.x,"*")
pace2 = space
for (i in 1:length(seq.x)) {
for (j in 1:length(seq.x)) {
space[i,j] = all.p(1-seq.x[i]) %*% geno.fit %*% all.p(1-seq.x[j])
}
}
round(t(space),3)
new.space = t(space)
image.plot(new.space)
by.text = 8
for (i in seq(1,length(seq.x),by = by.text)) {
for (j in seq(1,length(seq.x),by = by.text)) {
text(seq.x[i],seq.x[j],
labels = round(new.space[i,j],4),
cex = new.space[i,j]/2,
col = "black")
}
}
contour(new.space,ylim=c(1,0),add = T, nlevels = 50)
I was able to make the vector field function work, but it's not showing what I was expecting from the previous gradient ascend vector field:
How can the 2 be reconciled? (i.e., plotting the vectors on the gradient ascend image which would show the proper direction of the vectors in the steepest ascend)
Here is my solution:
library(fields) # for image.plot
library(plotly)
library(raster)
# Genotype fitness matrix -------------------------------------------------
geno.fit = matrix(c(0.791,1.000,0.834,
0.670,1.006,0.901,
0.657,0.657,1.067),
nrow = 3,
ncol = 3,
byrow = T)
# Resolution
res = 0.01
# Sequence of X
seq.x = seq(0,1,by = res)
# Make a matrix
space = outer(seq.x,seq.x,"*")
# Function to calculate the AVERAGE fitness for a given frequency of an allele to get the expected frequency of genotypes in a population
all.p <- function(p) { # Takes frequency of an allele in the population
if (length(p)>1) { # Has to be only 1 number
stop("More numbers in input than expected")
}
P = p^2 # Gets the AA
PQ = 2*p*(1-p) # gets the Aa
Q = (1-p)^2 # Gets the aa
return(list=c(P=P, # Return the values
PQ=PQ,
Q=Q))
}
# Examples
all.p(0)
all.p(1)
# Plot the matrix of all combinations of genotype frequencies
image.plot(space,
ylim=c(1.05,-0.05),
ylab= "Percentage of Chromosome EF of TD form",
xlab= "Percentage of Chromosome CD of BL form")
# Backup the data
space2 = space
# calculate the average fitness for EVERY combination of frequency of 2 genotypes
for (i in 1:length(seq.x)) {
for (j in 1:length(seq.x)) {
# Calculate mean fitness
space[i,j] = all.p(1-seq.x[i]) %*% geno.fit %*% all.p(1-seq.x[j])
}
}
# Show the result
round(t(space),3)
# Transform the space
new.space = t(space)
image.plot(new.space,
# ylim=c( 1.01,-0.01),
ylab= "Percentage of Chromosome EF of TD (Tidbinbilla) form",
xlab= "Percentage of Chromosome CD of BL (Blundell) form")
# Add the numbers to get a better sense of the average fitness values at each point
by.text = 8
for (i in seq(1,length(seq.x),by = by.text)) {
for (j in seq(1,length(seq.x),by = by.text)) {
text(seq.x[i],seq.x[j],
labels = round(new.space[i,j],4),
cex = new.space[i,j]/2,
col = "black") # col = "gray70"
}
}
# Add contour lines
contour(new.space,ylim=c(1,0),add = T, nlevels = 50)
# Plotly 3D graph --------------------------------------------------------
# To get the 3D plane in an INTERACTIVE graph
xyz=cbind(expand.grid(seq.x,
seq.x),
as.vector(new.space))
plot_ly(x = xyz[,1],y = xyz[,2],z = xyz[,3],
color = xyz[,3])
# Vector field on the Adaptive landscape ----------------------------------
library(tidyverse)
library(ggquiver)
raster2quiver <- function(rast, aggregate = 50, colours = terrain.colors(6), contour.breaks = 200)
{
names(rast) <- "z"
quiv <- aggregate(rast, aggregate)
terr <- terrain(quiv, opt = c('slope', 'aspect'))
quiv$u <- -terr$slope[] * sin(terr$aspect[])
quiv$v <- -terr$slope[] * cos(terr$aspect[])
quiv_df <- as.data.frame(quiv, xy = TRUE)
rast_df <- as.data.frame(rast, xy = TRUE)
print(ggplot(mapping = aes(x = x, y = y, fill = z)) +
geom_raster(data = rast_df, na.rm = TRUE) +
geom_contour(data = rast_df,
aes(z=z, color=..level..),
breaks = seq(0,3, length.out = contour.breaks),
size = 1.4)+
scale_color_gradient(low="blue", high="red")+
geom_quiver(data = quiv_df, aes(u = u, v = v), vecsize = 1.5) +
scale_fill_gradientn(colours = colours, na.value = "transparent") +
theme_bw())
return(quiv_df)
}
r <-raster(
space,
xmn=range(seq.x)[1], xmx=range(seq.x)[2],
ymn=range(seq.x)[1], ymx=range(seq.x)[2],
crs=CRS("+proj=utm +zone=11 +datum=NAD83")
)
# Draw the adaptive landscape
raster2quiver(rast = r, aggregate = 2, colours = tim.colors(100))
Not exactly what I wanted, but it does what I was looking for!

BRT: Add gradient colors to interaction plots using gbm.perspec

I would like to add a gradient of colours following the fitted values (e.g. higher fitted values darker colours, lower fitted values lighter colours) in my three-dimensional dependence plots.
I have used the example presented in dismo package:
library(dismo)
data(Anguilla_train)
angaus.tc5.lr01 <- gbm.step(data=Anguilla_train, gbm.x = 3:13, gbm.y = 2,
family = "bernoulli", tree.complexity = 5, learning.rate = 0.01,
bag.fraction = 0.5)
# Find interactions in the gbm model:
find.int <- gbm.interactions( angaus.tc5.lr01)
find.int$interactions
find.int$rank.list
I have only managed to add the same colour to the whole plot:
gbm.perspec( angaus.tc5.lr01, 7, 1,
x.label = "USRainDays",
y.label = "SegSumT",
z.label = "Fitted values",
z.range=c(0,0.435),
col="blue")
Or to add a gradient colour but not following the fitted values:
gbm.perspec( angaus.tc5.lr01, 7, 1,
x.label = "USRainDays",
y.label = "SegSumT",
z.label = "Fitted values",
col=heat.colors(50),
z.range=c(0,0.435))
I also checked the code of function gbm.perspec, and If I understood correctly the fitted values are call inside the formula as "prediction", and later on are part of the "pred.matrix" that is passed to the final plotting: persp(x = x.var, y = y.var, z = pred.matrix...), but I have no managed to access them from the gbm.perspec formula. I tried to modified the gbm.perpec function by adding "col=heat.colors(100)[round(pred.matrix*100, 0)]" into the persp() inside the function, but it does not do what I am looking for:
persp(x = x.var, y = y.var, z = pred.matrix, zlim = z.range,
xlab = x.label, ylab = y.label, zlab = z.label,
theta = theta, phi = phi, r = sqrt(10), d = 3,
ticktype = ticktype,
col=heat.colors(100)[round(pred.matrix*100, 0)],
mgp = c(4, 1, 0), ...)
I believe the solution might come from modifying the gbm.perpec function, do you know how?
Thank you for your time!
Modifying the gbm.perspec function is certainly an option, although if you use the predicted values from the gbm model and plot them onto a 3D scatterplot from another package you should be able to achieve it as well.
Here's an option using the plot3Drgl package, original code was provided by #Fabrice.
library(dismo); library(plot3Drgl); library(devEMF)
data(Anguilla_train)
angaus.tc5.lr01 <- gbm.step(data=Anguilla_train, gbm.x = 3:13, gbm.y = 2,
family = "bernoulli", tree.complexity = 5, learning.rate = 0.01,
bag.fraction = 0.5)
# Find interactions in the gbm model:
find.int <- gbm.interactions( angaus.tc5.lr01)
find.int$interactions
find.int$rank.list
d<-plot(angaus.tc5.lr01,c(1,7),return.grid=T)
x <- d$SegSumT
y <- d$USRainDays
z <- d$y
grid.lines = 30
elevation.site = loess(z ~ x*y, data=d, span=1, normalize = FALSE)
x.pred <- seq(min(x), max(x), length.out = grid.lines) # x grid
y.pred <- seq(min(y), max(y), length.out = grid.lines) # y grid
xy <- expand.grid( x = x.pred, y = y.pred) # final grid combined
z.site=matrix(predict(elevation.site, newdata = xy), nrow = grid.lines, ncol = grid.lines) # predicedt matrix
scatter3D(x, y, z, theta = 160, phi = 35, # x y z coords and angle of plot
clab = c(""), # Needs moving - label legend
colkey = list(side = 4, length = 0.65,
adj.clab = 0.15, dist = -0.15, cex.clab = 0.6, cex.axis = 0.6), # change the location and length of legend, change position of label and legend
clim = c(-4,0.1),
bty = "b", # type of box
col = ramp.col(col = c("grey", "blue"), 200),
pch = 19, cex = 0.55, # shape and size of points
xlab = "SegSumT",
xlim=c(10,20),ylim=c(0,3.5), zlim=c(-4,0.1), d= 2,
ylab = "USRaindays",
zlab= "Fitted values", #axes labels
cex.lab = 0.8, font.lab = 1, cex.axis = 0.6, font.axis= 1, # size and font of axes and ticks
ticktype = "detailed", nticks = 5, # ticks and numer of ticks
#type = "h", # vertical lines
surf = list(x = x.pred, y = y.pred, z = z.site,
facets = NA, CI=NULL))
enter image description here
By tweaking with grid.lines and reversing the x axis you should be able to produce exactly what you want.
By incorporating some of the code found here into the gbm.perspec() source code you can create the desired effect.
First run
# Color palette (100 colors)
col.pal<-colorRampPalette(c("blue", "red"))
colors<-col.pal(100)
Then, add z.facet.center to gbm.perspec() source code after else and change the z in the code to pred.matrixas follows,
# and finally plot the result
#
if (!perspective) {
image(x = x.var, y = y.var, z = pred.matrix, zlim = z.range)
} else {
z.facet.center <- (pred.matrix[-1, -1] + pred.matrix[-1, -ncol(pred.matrix)] +
pred.matrix[-nrow(pred.matrix), -1] + pred.matrix[-nrow(pred.matrix), -ncol(pred.matrix)])/4
# Range of the facet center on a 100-scale (number of colors)
z.facet.range<-cut(z.facet.center, 100)
persp(x=x.var, y=y.var, z=pred.matrix, zlim= z.range, # input vars
xlab = x.label, ylab = y.label, zlab = z.label, # labels
theta=theta, phi=phi, r = sqrt(10), d = 3,
col=colors[z.facet.range],# viewing pars
ticktype = ticktype, mgp = c(4,1,0), ...) #
which will give you a plot like this (please note, this is not plotted using the sample dataset which is why the interaction effect is different than the plot in the question).
Alternatively, you can create a new function. The following example modifies gbm.perspec() to give a white-to-red gradient. Simply run the code in R, then change gbm.perspec() to gbm.perspec2()
# interaction function
# Color palette (100 colors)
col.pal<-colorRampPalette(c("white", "pink", "red"))
colors<-col.pal(100)
gbm.perspec2 <- function(gbm.object,
x = 1, # the first variable to be plotted
y = 2, # the second variable to be plotted
pred.means = NULL, # allows specification of values for other variables
x.label = NULL, # allows manual specification of the x label
x.range = NULL, # manual range specification for the x variable
y.label = NULL, # and y la seminar committeebel
z.label = "fitted value", #default z label
y.range = NULL, # and the y
z.range = NULL, # allows control of the vertical axis
leg.coords = NULL, #can specify coords (x, y) for legend
ticktype = "detailed",# specifiy detailed types - otherwise "simple"
theta = 55, # rotation
phi=40, # and elevation
smooth = "none", # controls smoothing of the predicted surface
mask = FALSE, # controls masking using a sample intensity model
perspective = TRUE, # controls whether a contour or perspective plot is drawn
...) # allows the passing of additional arguments to plotting routine
# useful options include shade, ltheta, lphi for controlling illumination
# and cex for controlling text size - cex.axis and cex.lab have no effect
{
if (! requireNamespace('gbm') ) { stop('you need to install the gbm package to use this function') }
requireNamespace('splines')
#get the boosting model details
gbm.call <- gbm.object$gbm.call
gbm.x <- gbm.call$gbm.x
n.preds <- length(gbm.x)
gbm.y <- gbm.call$gbm.y
pred.names <- gbm.call$predictor.names
family = gbm.call$family
# and now set up range variables for the x and y preds
have.factor <- FALSE
x.name <- gbm.call$predictor.names[x]
if (is.null(x.label)) {
x.label <- gbm.call$predictor.names[x]
}
y.name <- gbm.call$predictor.names[y]
if (is.null(y.label)) {
y.label <- gbm.call$predictor.names[y]
}
data <- gbm.call$dataframe[ , gbm.x, drop=FALSE]
n.trees <- gbm.call$best.trees
# if marginal variable is a vector then create intervals along the range
if (is.vector(data[,x])) {
if (is.null(x.range)) {
x.var <- seq(min(data[,x],na.rm=T),max(data[,x],na.rm=T),length = 50)
} else {
x.var <- seq(x.range[1],x.range[2],length = 50)
}
} else {
x.var <- names(table(data[,x]))
have.factor <- TRUE
}
if (is.vector(data[,y])) {
if (is.null(y.range)) {
y.var <- seq(min(data[,y],na.rm=T),max(data[,y],na.rm=T),length = 50)
} else {y.var <- seq(y.range[1],y.range[2],length = 50)}
} else {
y.var <- names(table(data[,y]))
if (have.factor) { #check that we don't already have a factor
stop("at least one marginal predictor must be a vector!")
} else {have.factor <- TRUE}
}
pred.frame <- expand.grid(list(x.var,y.var))
names(pred.frame) <- c(x.name,y.name)
pred.rows <- nrow(pred.frame)
#make sure that the factor variable comes first
if (have.factor) {
if (is.factor(pred.frame[,2])) { # swap them about
pred.frame <- pred.frame[,c(2,1)]
x.var <- y.var
}
}
j <- 3
# cycle through the predictors
# if a non-target variable find the mean
for (i in 1:n.preds) {
if (i != x & i != y) {
if (is.vector(data[,i])) {
m <- match(pred.names[i],names(pred.means))
if (is.na(m)) {
pred.frame[,j] <- mean(data[,i],na.rm=T)
} else pred.frame[,j] <- pred.means[m]
}
if (is.factor(data[,i])) {
m <- match(pred.names[i],names(pred.means))
temp.table <- table(data[,i])
if (is.na(m)) {
pred.frame[,j] <- rep(names(temp.table)[2],pred.rows)
} else {
pred.frame[,j] <- pred.means[m]
}
pred.frame[,j] <- factor(pred.frame[,j],levels=names(temp.table))
}
names(pred.frame)[j] <- pred.names[i]
j <- j + 1
}
}
#
# form the prediction
#
#assign("pred.frame", pred.frame, pos=1)
prediction <- gbm::predict.gbm(gbm.object,pred.frame,n.trees = n.trees, type="response")
#assign("prediction", prediction, pos=1, immediate =T)
# model smooth if required
if (smooth == "model") {
pred.glm <- glm(prediction ~ ns(pred.frame[,1], df = 8) * ns(pred.frame[,2], df = 8), data=pred.frame,family=poisson)
prediction <- fitted(pred.glm)
}
# report the maximum value and set up realistic ranges for z
max.pred <- max(prediction)
message("maximum value = ",round(max.pred,2),"\n")
if (is.null(z.range)) {
if (family == "bernoulli") {
z.range <- c(0,1)
} else if (family == "poisson") {
z.range <- c(0,max.pred * 1.1)
} else {
z.min <- min(data[,y],na.rm=T)
z.max <- max(data[,y],na.rm=T)
z.delta <- z.max - z.min
z.range <- c(z.min - (1.1 * z.delta), z.max + (1.1 * z.delta))
}
}
# now process assuming both x and y are vectors
if (have.factor == FALSE) {
# form the matrix
pred.matrix <- matrix(prediction,ncol=50,nrow=50)
# kernel smooth if required
if (smooth == "average") { #apply a 3 x 3 smoothing average
pred.matrix.smooth <- pred.matrix
for (i in 2:49) {
for (j in 2:49) {
pred.matrix.smooth[i,j] <- mean(pred.matrix[c((i-1):(i+1)),c((j-1):(j+1))])
}
}
pred.matrix <- pred.matrix.smooth
}
# mask out values inside hyper-rectangle but outside of sample space
if (mask) {
mask.trees <- gbm.object$gbm.call$best.trees
point.prob <- gbm::predict.gbm(gbm.object[[1]],pred.frame, n.trees = mask.trees, type="response")
point.prob <- matrix(point.prob,ncol=50,nrow=50)
pred.matrix[point.prob < 0.5] <- 0.0
}
#
# and finally plot the result
#
if (!perspective) {
image(x = x.var, y = y.var, z = pred.matrix, zlim = z.range)
} else {
z.facet.center <- (pred.matrix[-1, -1] + pred.matrix[-1, -ncol(pred.matrix)] +
pred.matrix[-nrow(pred.matrix), -1] + pred.matrix[-nrow(pred.matrix), -ncol(pred.matrix)])/4
# Range of the facet center on a 100-scale (number of colors)
z.facet.range<-cut(z.facet.center, 100)
persp(x=x.var, y=y.var, z=pred.matrix, zlim= z.range, # input vars
xlab = x.label, ylab = y.label, zlab = z.label, # labels
theta=theta, phi=phi, r = sqrt(10), d = 3,
col=colors[z.facet.range],# viewing pars
ticktype = ticktype, mgp = c(4,1,0), ...) #
}
}
if (have.factor) {
# we need to plot values of y for each x
factor.list <- names(table(pred.frame[,1]))
n <- 1
#add this bit so z.range still works as expected:
if (is.null(z.range)) {
vert.limits <- c(0, max.pred * 1.1)
} else {
vert.limits <- z.range
}
plot(pred.frame[pred.frame[,1]==factor.list[1],2],
prediction[pred.frame[,1]==factor.list[1]],
type = 'l',
#ylim = c(0, max.pred * 1.1),
ylim = vert.limits,
xlab = y.label,
ylab = z.label, ...)
for (i in 2:length(factor.list)) {
#factor.level in factor.list) {
factor.level <- factor.list[i]
lines(pred.frame[pred.frame[,1]==factor.level,2],
prediction[pred.frame[,1]==factor.level], lty = i)
}
# now draw a legend
if(is.null(leg.coords)){
x.max <- max(pred.frame[,2])
x.min <- min(pred.frame[,2])
x.range <- x.max - x.min
x.pos <- c(x.min + (0.02 * x.range),x.min + (0.3 * x.range))
y.max <- max(prediction)
y.min <- min(prediction)
y.range <- y.max - y.min
y.pos <- c(y.min + (0.8 * y.range),y.min + (0.95 * y.range))
legend(x = x.pos, y = y.pos, factor.list, lty = c(1:length(factor.list)), bty = "n")
} else {
legend(x = leg.coords[1], y = leg.coords[2], factor.list, lty = c(1:length(factor.list)), bty = "n", ncol = 2)
}
}
}

Generating a sequence of equidistant points on polygon boundary

I am looking for a procedure that allows me to generate a sequence of equidistant points (coordinates) along the sides of an arbitrary polygon.
Imaging a polygon defined by the coordinates of its vertexes:
poly.mat <- matrix(c(0,0,
0,1,
0.5,1.5,
0.5,0,
0,0 # last row included to close the polygon
), byrow = T, ncol = 2)
colnames(poly.mat) <- c("x", "y")
plot(poly.mat, type = "l")
If the length of the sequence I want to generate is n (adjustable), how I can produce a sequence, starting at (0,0), of equidistant coordinates.
I got as far as calculating the perimeter of the shape with the geosphere package (which I believe I need)
library(geosphere)
n <- 50 # sequence of length set to be 50
perim <- perimeter(poly.mat)
perim/n # looks like every section needs to be 8210.768 something in length
You will have to write the code yourself. Sorry, there isn't a library function for every last detail of every last assignment. Assuming that each pair of points defines a line segment, you could just generate N points along each segment, as in
begin = [xbegin, ybegin ];
end = [xend, yend ];
xdist = ( xend - xbegin ) / nintervals;
ydist = ( yend - ybegin ) / nintervals;
then your points are given by [ xbegin + i * xdist, ybegin + i * ydist ]
Here is the solution I came up with.
pointDistance <- function(p1, p2){
sqrt((p2[,1]-p1[,1])^2) + sqrt((p2[,2]-p1[,2])^2)
}
getPos <- function(shp.mat, ll){
greaterLL <- shp.mat$cumdis > ll
if(all(greaterLL == FALSE)) return(poly.mat[nrow(poly.mat), c("x", "y")])
smallRow <- min(which(greaterLL)) # the smallest coordinate that has greater length
p.start <- shp.mat[smallRow-1, c("x","y")]
p.end <- shp.mat[smallRow, c("x","y")]
cumVal <- shp.mat$cumdis[smallRow]
prop <- (ll-shp.mat$cumdis[smallRow-1])/(shp.mat$cumdis[smallRow]-shp.mat$cumdis[smallRow-1])
p.start + (prop)* (p.end-p.start)
}
# shp1
poly.mat <- matrix(c(0,0,
0,1,
0.5,1.5,
0.5,0,
0,0
),byrow = T, ncol = 2)
colnames(poly.mat) <- c("x", "y")
poly.mat <- as.data.frame(poly.mat)
# Main fun
pointsOnPath <- function(shp.mat, n){
dist <- vector(mode = "numeric", length = nrow(shp.mat)-1)
for(i in 2:nrow(shp.mat)){
dist[i] <- pointDistance(p1 = shp.mat[i,], p2 = shp.mat[i-1,])
}
shp.mat$dist <- dist
shp.mat$cumdis <- cumsum(shp.mat$dist)
dis <- matrix(seq(from = 0, to = max(shp.mat$cumdis), length.out = n+1), ncol = 1)
out <- lapply(dis, function(x) getPos(shp.mat = shp.mat, ll = x))
out <- do.call("rbind", out)
out$dis <- dis
out[-nrow(out),]
}
df <- pointsOnPath(shp.mat = poly.mat, 5)
# Plot
plot(poly.mat$x, poly.mat$y, type = "l", xlim = c(0,1.5), ylim = c(0,1.5))
points(df$x, df$y, col = "red", lwd = 2)
There is room for improving the code, but it should return the correct result

Graphical output of density for the function gammamixEM (package mixtools)

I'm using the function gammamixEM from the package mixtools. How can I return the graphical output of density as in the function normalmixEM (i.e., the second plot in plot(...,which=2)) ?
Update:
Here is a reproducible example for the function gammamixEM:
x <- c(rgamma(200, shape = 0.2, scale = 14), rgamma(200,
shape = 32, scale = 10), rgamma(200, shape = 5, scale = 6))
out <- gammamixEM(x, lambda = c(1, 1, 1)/3, verb = TRUE)
Here is a reproducible example for the function normalmixEM:
data(faithful)
attach(faithful)
out <- normalmixEM(waiting, arbvar = FALSE, epsilon = 1e-03)
plot(out, which=2)
I would like to obtain this graphical output of density from the function gammamixEM.
Here you go.
out <- normalmixEM(waiting, arbvar = FALSE, epsilon = 1e-03)
x <- out
whichplots <- 2
density = 2 %in% whichplots
loglik = 1 %in% whichplots
def.par <- par(ask=(loglik + density > 1), "mar") # only ask and mar are changed
mix.object <- x
k <- ncol(mix.object$posterior)
x <- sort(mix.object$x)
a <- hist(x, plot = FALSE)
maxy <- max(max(a$density), .3989*mix.object$lambda/mix.object$sigma)
I just had to dig into the source code of plot.mixEM
So, now to do this with gammamixEM:
x <- c(rgamma(200, shape = 0.2, scale = 14), rgamma(200,
shape = 32, scale = 10), rgamma(200, shape = 5, scale = 6))
gammamixEM.out <- gammamixEM(x, lambda = c(1, 1, 1)/3, verb = TRUE)
mix.object <- gammamixEM.out
k <- ncol(mix.object$posterior)
x <- sort(mix.object$x)
a <- hist(x, plot = FALSE)
maxy <- max(max(a$density), .3989*mix.object$lambda/mix.object$sigma)
main2 <- "Density Curves"
xlab2 <- "Data"
col2 <- 2:(k+1)
hist(x, prob = TRUE, main = main2, xlab = xlab2,
ylim = c(0,maxy))
for (i in 1:k) {
lines(x, mix.object$lambda[i] *
dnorm(x,
sd = sd(x)))
}
I believe it should be pretty straight forward to continue this example a bit, if you want to add the labels, smooth lines, etc. Here's the source of the plot.mixEM function.

define breaks for hist2d in R

is there a simple way to define breaks instead of nbins for a 2d histogram (hist2d) in R?
I want to define the range for the x- and yaxis for a 2D histogram and the number of bins for each dimension.
My example:
# example data
x <- sample(-1:100, 2000, replace=T)
y <- sample(0:89, 2000, replace=T)
# create 2d histogram
h2 <- hist2d(x,y,nbins=c(23,19),xlim=c(-1,110), ylim=c(0,95),xlab='x',ylab='y',main='hist2d')
This results in this 2D histogram output 1
----------------------------
2-D Histogram Object
----------------------------
Call: hist2d(x = x, y = y, nbins = c(23, 19), xlab = "x", ylab = "y",
xlim = c(-1, 110), ylim = c(0, 95), main = "hist2d")
Number of data points: 2000
Number of grid bins: 23 x 19
X range: ( -1 , 100 )
Y range: ( 0 , 89 )
I need
X range: ( -1 , 110 )
Y range: ( 0 , 95 )
instead.
My attempt to define the xlim and ylim only extends the plot but does not define the axis range for the histogram. I know that there would be no data in the additional bins.
Is there a way to define
xbreaks = seq(-1,110,5)
ybreaks = seq(0,95,5)
instead of using nbins which divides the range from minimum to maximum into the given number of bins?
Thank you for your help
I changed the code a little bit and this version should work the with explicitly defining the breaks for both axes. First you have to load the function. Then you can give the x.breaks and y.breaks options with x.breaks=seq(0,10,0.1).
If same.scale is true, you only need x.breaks
The return value addionaly contains the number of bins and the relative counts.
Also, you can include a legend if wanted, by setting legend=TRUE. For that you need to have the package Fields
hist2d_breaks = function (x, y = NULL, nbins = 200,same.scale = FALSE, na.rm = TRUE,
show = TRUE, col = c("black", heat.colors(12)), FUN = base::length,
xlab, ylab,x.breaks,y.breaks, ...)
{
if (is.null(y)) {
if (ncol(x) != 2)
stop("If y is ommitted, x must be a 2 column matirx")
y <- x[, 2]
x <- x[, 1]
}
if (length(nbins) == 1)
nbins <- rep(nbins, 2)
nas <- is.na(x) | is.na(y)
if (na.rm) {
x <- x[!nas]
y <- y[!nas]
}
else stop("missinig values not permitted if na.rm=FALSE")
if(same.scale){
x.cuts = x.breaks;
y.cuts = x.breaks;
}else{
x.cuts <- x.breaks
y.cuts <- y.breaks
}
index.x <- cut(x, x.cuts, include.lowest = TRUE)
index.y <- cut(y, y.cuts, include.lowest = TRUE)
m <- tapply(x, list(index.x, index.y), FUN)
if (identical(FUN, base::length))
m[is.na(m)] <- 0
if (missing(xlab))
xlab <- deparse(substitute(xlab))
if (missing(ylab))
ylab <- deparse(substitute(ylab))
if (show){
if(legend){
image.plot(x.cuts, y.cuts, m, col = col, xlab = xlab, ylab = ylab,
...)
}else{
image(x.cuts, y.cuts, m, col = col, xlab = xlab, ylab = ylab,
...)
}
}
midpoints <- function(x) (x[-1] + x[-length(x)])/2
retval <- list()
retval$counts <- m
retval$counts_rel <- m/max(m)
retval$x.breaks = x.cuts
retval$y.breaks = y.cuts
retval$x = midpoints(x.cuts)
retval$y = midpoints(y.cuts)
retval$nobs = length(x)
retval$bins = c(length(x.cuts),length(y.cuts))
retval$call <- match.call()
class(retval) <- "hist2d"
retval
}
The call of (my data) then brings the following:
hist2d_breaks(df,x.breaks=seq(0,10,1),y.breaks=seq(-10,10,1),legend=TRUE)
brings up the following plot
2D Histogram with breaks
Revise the "hist2d" as follows
hist2d_range<-function (x, y = NULL, nbins = 200, same.scale = TRUE, na.rm = TRUE,
show = TRUE, col = c("black", heat.colors(12)), FUN = base::length,
xlab, ylab,range=NULL, ...)
{
if (is.null(y)) {
if (ncol(x) != 2)
stop("If y is ommitted, x must be a 2 column matirx")
y <- x[, 2]
x <- x[, 1]
}
if (length(nbins) == 1)
nbins <- rep(nbins, 2)
nas <- is.na(x) | is.na(y)
if (na.rm) {
x <- x[!nas]
y <- y[!nas]
}
else stop("missinig values not permitted if na.rm=FALSE")
if (same.scale) {
if(is.null(range))
{
x.cuts <- seq(from = min(x, y), to = max(x, y), length = nbins[1] +
1)
y.cuts <- seq(from = min(x, y), to = max(x, y), length = nbins[2] +
1)
}else{
x.cuts <- seq(from = range[1], to = range[2], length = nbins[1] + 1)
y.cuts <- seq(from = range[1], to = range[2], length = nbins[1] + 1)
}
}
else {
x.cuts <- seq(from = min(x), to = max(x), length = nbins[1] +
1)
y.cuts <- seq(from = min(y), to = max(y), length = nbins[2] +
1)
}
index.x <- cut(x, x.cuts, include.lowest = TRUE)
index.y <- cut(y, y.cuts, include.lowest = TRUE)
m <- tapply(x, list(index.x, index.y), FUN)
if (identical(FUN, base::length))
m[is.na(m)] <- 0
if (missing(xlab))
xlab <- deparse(substitute(xlab))
if (missing(ylab))
ylab <- deparse(substitute(ylab))
if (show)
image(x.cuts, y.cuts, m, col = col, xlab = xlab, ylab = ylab,
...)
midpoints <- function(x) (x[-1] + x[-length(x)])/2
retval <- list()
retval$counts <- m
retval$x.breaks = x.cuts
retval$y.breaks = y.cuts
retval$x = midpoints(x.cuts)
retval$y = midpoints(y.cuts)
retval$nobs = length(x)
retval$call <- match.call()
class(retval) <- "hist2d"
retval
}
This function has an additional argument "range".
The revised point is as follows.
if(is.null(range))
{
x.cuts <- seq(from = min(x, y), to = max(x, y), length = nbins[1] +
1)
y.cuts <- seq(from = min(x, y), to = max(x, y), length = nbins[2] +
1)
}else{
x.cuts <- seq(from = range[1], to = range[2], length = nbins[1] + 1)
y.cuts <- seq(from = range[1], to = range[2], length = nbins[1] + 1)
}

Resources