Contour - Plot - Ascending order - r

I have a matrix containing position (X,Y,elevation). I add a column to the matrix I call "index". I create a X and a Y vector from the matrix. They both include the index column. I then sort in ascending value the X and Y vector I just made. I then construct a Z matrix containing the elevation and I relate it to the position using the index. I then try to use the command contour (I want to plot a contour graph) and I get the error saying that X and Y should be ascending order... which I just made !!! What did I do wrong?
noeud<-read.table("position.out")
Matrice_Noeud<-matrix(ncol = ncol(noeud), nrow=nrow(noeud))
for (i in 1:nrow(noeud)) {
for (j in 1:ncol(noeud)) {
Matrice_Noeud[i,j]<-noeud[i,j]
}
}
Matrice_Noeud <- cbind(Matrice_Noeud, c(seq(1,nrow(noeud),1)))
x<-data.frame(x=Matrice_Noeud[,1],Index=Matrice_Noeud[,4])
y<-data.frame(y=Matrice_Noeud[,2],Index=Matrice_Noeud[,4])
X<-x[order(x$x),]
Y<-y[order(y$y),]
Z<-matrix(NA, ncol=nrow(noeud),nrow=nrow(noeud))
for (x_i in 1:nrow(noeud)) {
for (y_i in 1:nrow(noeud)) {
if (Y$Index[y_i]==X$Index[x_i]) {
niveau<-which(Matrice_Noeud[,4]==Y$Index[y_i])
Z[x_i,y_i]<-Matrice_Noeud[niveau,3]
}
}
}
Xx<-array(X[,1])
Yy<-array(Y[,1])
Zz<-data.frame(Z)
contour(Xx,Yy,Zz)

OK, since I'd started doing it, I've done it.
#### making example data
## assumptions: length(unique(x))=19, length(unique(y))=12, nrow(data)=121
## (They mean the number of grid points is 19 * 12 = 228, but z.value is only 121.)
xyz.f <- function(m, n) - m + (n - 7)^2 + 16 # make z from x and y (it means nothing special)
xyz <- cbind( xyz <- expand.grid(x = round(seq(11,15,,19), 2), y = round(seq(6,10,,12), 2)),
z = apply(xyz, 1, function(k) xyz.f(k[1], k[2])) )
set.seed(1); ind <- sample(19*12, 121) # decide to use the 121 z of 19*12
noeud <- as.matrix(xyz[ind,]) # example data maked out
#### making contour()'s arguments
Xx <- sort(unique(noeud[,1]))
Yy <- sort(unique(noeud[,2])) # nrow(noeud); length(Xx); length(Yy) # OK (121, 19, 12)
Zz <- matrix(NA, ncol=length(Yy), nrow=length(Xx)) # make 19 x 12 Z matrix (empty)
# In each row, calculate x (y) value is what number in Xx (Yy) (= the position in Z matrix)
X0 <- as.numeric( factor( noeud[,1] ) ) # (edit) using Mr.Tufte's code in R help mailing.
Y0 <- as.numeric( factor( noeud[,2] ) )
apply(cbind(X0, Y0, noeud[,3]), 1, function (a) Zz[ a[1], a[2] ] <<- a[3])
## contour()'s arguments ( Xx, Yy, Zz ) maked out
contour(Xx, Yy, Zz, xlab="including NAs") # length(Zz); length(Zz[!is.na(Zz)]) # OK (228,121)
#### interpolating
## I know few packages having interpolation functions.
library(akima) # use cubic spline interpolation methods of H. Akima
NOEUD <- interp(noeud[,1], noeud[,2], noeud[,3])
#### results
par.old <- par(no.readonly=T); par(mfrow=c(1,3), mar=c(4,0,1,0))
contour(Xx, Yy, Zz, xlab="including NAs", yaxt="n") # the including NAs data
contour(NOEUD, xlab="Akima interpolation", yaxt="n") # the Akima interpolation data
contour(Xx, Yy, matrix(xyz[,3], nrow=19), xlab="origin", yaxt="n") # the origin data
# (edit) I noticed some interp()'s arguments make a difference (default: linear=T, extrap=F).
contour(interp(noeud[,1], noeud[,2], noeud[,3], linear=T, extrap=F), xlab="Akima interp() default")
contour(interp(noeud[,1], noeud[,2], noeud[,3], linear=F, extrap=F), xlab="interp(linear=F)")
contour(interp(noeud[,1], noeud[,2], noeud[,3], linear=F, extrap=T), xlab="interp(linear=F, extrap=T)")
par(par.old)
### supplement (using the same data, output is about the same)
noeud2 <- data.frame(x=noeud[,1], y=noeud[,2], z=noeud[,3]) # equal to the including NAs data
NOEUD2 <- cbind(expand.grid(x=NOEUD$x, y=NOEUD$y), z=c(NOEUD$z)) # equal to the Akima interpolation data
ggplot2::ggplot( noeud2, aes( x, y, z = z )) + geom_contour()
lattice::contourplot( z ~ x * y, NOEUD2 )

Related

How to find outer limits of contour lines for three-dimensional data in r

I would like to extend the example given here
How to plot a contour line showing where 95% of values fall within, in R and in ggplot2
to data with three dimensions (x, y and z), and instead of plotting the contour line I'd like to get the limits of the x, y and z values.
This is the example from the previous post.
library(ggplot2)
set.seed(1001)
d <- data.frame(x=rnorm(1000),y=rnorm(1000))
kd <- ks::kde(d, compute.cont=TRUE)
contour_95 <- with(kd, contourLines(x=eval.points[[1]], y=eval.points[[2]],
z=estimate, levels=cont["5%"])[[1]])
contour_95 <- data.frame(contour_95)
ggplot(data=d, aes(x, y)) +
geom_point() +
geom_path(aes(x, y), data=contour_95) +
theme_bw()
and then, it's possible to get the limits of the contour like this:
range(contour_95$x)
range(contour_95$y)
I would love to know how to get the x, y and z ranges of 3-D contours at specified percentiles.
ks:kde can deal with higher dimensions, but contourLines() cant.
This is what I've tried...
set.seed(1001)
d <- data.frame(x=rnorm(1000),y=rnorm(1000), y=rnorm(1000))
kd <- ks::kde(d, compute.cont=TRUE)
#what kd$estimates are > 95th percentile?
#make function that can extract from 3d array
multi.which <- function(A){
if ( is.vector(A) ) return(which(A))
d <- dim(A)
T <- which(A) - 1
nd <- length(d)
t( sapply(T, function(t){
I <- integer(nd)
I[1] <- t %% d[1]
sapply(2:nd, function(j){
I[j] <<- (t %/% prod(d[1:(j-1)])) %% d[j]
})
I
}) + 1 )
}
#extract those estimates that have >density than 95th percentile
ests <- multi.which(kd$estimate > kd$cont["5%"])
#make into a long dataframe with column number in the second column and row number in first column
col1=rep(1, nrow(ests))
col2=rep(2, nrow(ests))
col3=rep(3, nrow(ests))
rows=c(ests[,1], ests[,2], ests[,3])
cols=c(col1,col2,col3)
index=cbind(rows,cols)#this is the index so we can extract the coordinates in multi-D space
car::some(index)
#get coordinates with this function
fExtract <- function(dat, indexDat){
dat[as.matrix(indexDat)]
}
#pull three coordinates (x,y,z) from eval.points into 3 columns
eval.pts <- cbind(kd$eval.points[[1]], kd$eval.points[[2]], kd$eval.points[[3]])
v <- fExtract(eval.pts, index) #one long vector
#re-create the three columns of x, y and z coordinates of points at higher density than 95th percentile
x1 <- v[1:nrow(ests)]
y1 <- v[(nrow(ests)+1):(2*nrow(ests))]
z1 <- v[(2*nrow(ests)+1):(3*nrow(ests))]
#the three coordinates.
fin <- cbind(x1,y1,z1)
#get range of each dimension
range(x1)
range(y1)
range(z1)
But I'm not confident it's right.

Plotting 2 out of 3 columns of a data frame

I have a data frame consisting of 400 rows of x, y and z values, and I would like to plot the x column against the y column.
This is what I have got so far:
sample3d = function(n)
{
df = data.frame()
while(n>0)
{
X = runif(1,-1,1)
Y = runif(1,-1,1)
Z = runif(1,-1,1)
a = X^2 + Y^2 + Z^2
if( a < 1 )
{
b = (X^2+Y^2+Z^2)^(0.5)
vector = data.frame(X = X/b, Y = Y/b, Z = Z/b)
df = rbind(vector,df)
n = n- 1
}
}
df
}
sample3d(400)
If you need to draw random points on the surface of a sphere, you need just to extract the polar coordinates. Something like that:
sample3d_v2 <- function(n) {
phi<-runif(n,0,2*pi)
cost<-runif(n,-1,1)
sint<-sqrt(1-cost^2)
data.frame(X=sint*cos(phi),Y=sint*sin(phi),Z=cost)
}
Just some tests:
system.time(old<-sample3d(4000))
# user system elapsed
# 3.895 0.000 3.879
system.time(new<-sample3d_v2(4000))
# user system elapsed
# 0.000 0.000 0.002
As you can see, a gain of thousands of times. Test the results are correct:
require(rgl)
plot3d(old)
open3d();plot3d(new)
Regarding your question: just name the object resulting for your function and plot the X and Y components.
data<-sample3d_v2(400)
plot(data$X,data$Y)
I named the dataframe your function creates data
(data <- sample3d(400)).
Then, if you need a scatterplot (points at x and y coordinates) of the X and Y column, you can use base R's function plot(). e.g. plot(x = data$X, y = data$Y). If you are like me and don't want to type data$ before every column you can make the columns available to functions as vectors by wrapping everything inside with(data = data, {...}).
If you want to create a scatterplot of all three coordinates, you might want to check out the packages in the following two examples:
library(scatterplot3d)
with(data = data, {
scatterplot3d(x = X, y = Y, z = Z)
})
library(rgl)
with(data, {
plot3d(X, Y, Z)
})

fitting a 5th order Bézier Curve to a data set

This is a slightly specific problem, so a bit of knowledge of R and of Bézier curves is required to be of help... (thanks if you do!!)
So I need some help with my R code: I have a series of discretely sampled observations and I am trying to fit a Bézier Curve of the 5th order through these points with simple LSS regression. I have some limitations on the position of the 6 control points:
A & B have the same Y-axis coordinate
B & C have the same X-axis coordinate
C & D have the same Y-axis coordinate
D & E have the same X-axis coordinate
E & F have the same Y-axis coordinate
A is located on the observation 2 turning points ago from the last
observation
The X-axis coordinate of the last observation is
somewhere between the X-axis coordinates of E and F
Like this image:
Say I have these data:
-0.01105
-0.01118
-0.01271
-0.01479
-0.01729
-0.01996
-0.02250
-0.02473
-0.02554
-0.02478
-0.02207
-0.01788
-0.01319
-0.00956
They have a "curvy" shape so a Bézier curve would fit: the result of my code is this image: the data are in red, the 5th order Bézier and its control points with their restrictions in blue:
Like this image:
So you see that I have some kind of solution, but this is the problem:
The X-axis location of right-most control point is always to the right of the last input data point, and to get an appropriate fit, I had to require a value of t (t goes from 0 to 1 in a Bézier) where t is at if the input data end (the "limit" variable in my code). How do I rewrite it so I don't have to do that anymore, and the horizontal spread of the t-values remains constant, also outside of the input data?
(given the restrictions on the control points, and maximizing the fit of the part of the curve that overlaps with the input data)
If you can help, please take a look at this R code, any help is .. much much appreciated and happy holidays!!
ps: what I call exampledata.csv in my code is just the data above.
getT <- function(x){
# Calculates length from origin of each point in the path.
# args:
# x : a one dimensional vector
# Returns:
# out : a vector of distances from the origin, as a percent of end point - start point distance
out <- cumsum(abs(diff(x)))
out <- c(0, out/ out[length(out)])
return(out)
}
cost_f <- function(X,Y,K){
pred <-K%*%X
c <- Y- pred
out <- list(loss= as.vector(t(c)%*%c), pred = pred)
return(out)
}
df <- read.csv('exampledata.csv')
T <- nrow(df)
df['d'] = 1:T
# # identify all turning points:
# turn_point <- c(1)
# for(i in 2:(T-1)){
# if( ( (df[i,'x'] < df[i-1,'x']) & (df[i,'x'] < df[i+1,'x'])) | ( (df[i,'x'] > df[i-1,'x']) & (df[i,'x'] > df[i+1,'x'])) ){
# turn_point <- c(turn_point, i)
# }
# }
fit_last_piece <- function(df){
limit <- .79
turn_point <- c(1)
for(i in 2:(T-1)){
if( ( (df[i,'x'] < df[i-1,'x']) & (df[i,'x'] < df[i+1,'x'])) | ( (df[i,'x'] > df[i-1,'x']) & (df[i,'x'] > df[i+1,'x'])) ){
turn_point <- c(turn_point, i)
}
}
nk <- length(turn_point) # number of turning points
data <- df[turn_point[nk-1]:nrow(df),]
end_x <- data$d[1]
end_y <- data$x[1]
constr_x <- matrix(c(1,0,0,0,0,0, # remember data is input column to column
0,1,1,0,0,0,
0,0,0,1,1,0,
0,0,0,0,0,1),nrow = 6, ncol = 4)
constr_y <- matrix(c(1,1,0,0,0,0,
0,0,1,1,0,0,
0,0,0,0,1,1),nrow = 6, ncol = 3)
M = matrix(c(-1,5,-10,10,-5,1,
5,-20,30,-20,5,0,
-10,30,-30,10,0,0,
10,-20,10,0,0,0,
-5,5,0,0,0,0,
1,0,0,0,0,0),nrow = 6, ncol = 6)
t_x = getT(data$d)*limit
T_x = cbind(t_x^5, t_x^4 ,t_x^3, t_x^2, t_x,rep(1,length(t_x)))
in_par <- ( tail(data$d,1)-data$d[1])*c(2/5,4/5,6/5) + data$d[1] # initial values of the intermediate x levels are at 1/3 and 2/3 midpoints
res_x <- optim(par = in_par, fn = function(par){cost_f(c(data$d[1], par[1],par[2], par[3]), data$d, T_x%*%M%*%constr_x)$loss})
#res_x <- optimize(f = function(par){cost_f(c(df$d[1],par,df$d[nrow(df)]), df$d, T_x%*%M%*%constr_x)$loss}, interval = c(df$d[1],df$d[nrow(df)]),tol = .Machine$double.eps^0.25)
optim_x <- c(data$d[1],res_x$par)
pred_x <- cost_f(optim_x, data$d, T_x%*%M%*%constr_x)$pred
t_y = getT(data$x)*limit
T_y = cbind(t_y^5, t_y^4,t_y^3, t_y^2, t_y,rep(1,length(t_y)))
in_par <- c()
res_y <- optim(par = c(data$x[floor(nrow(data)/2)],tail(data$x,1)), fn = function(par){cost_f(c(data$x[1],par[1],par[2]), data$x, T_y%*%M%*%constr_y)$loss})
optim_y <- c(data$x[1],res_y$par[1],res_y$par[2])
#pred_y <- cost_f(res_y$par, df$x, T_y%*%M%*%constr_y)$pred
pred_y <- cost_f(optim_y, data$x, T_y%*%M%*%constr_y)$pred
t_x_p <- c(t_x,seq(tail(t_x,1),1,length.out = 10))
T_x_p <- cbind(t_x_p^5, t_x_p^4 ,t_x_p^3, t_x_p^2, t_x_p,rep(1,length(t_x_p)))
t_y_p <- c(t_y,seq(tail(t_y,1),1,length.out = 10))
T_y_p <- cbind(t_y_p^5, t_y_p^4 ,t_y_p^3, t_y_p^2, t_y_p,rep(1,length(t_y_p)))
pred_x <- T_x_p%*%M%*%constr_x%*%optim_x
pred_y <- T_y_p%*%M%*%constr_y%*%optim_y
# this part is new:
plot(pred_x,pred_y, ylim = c(min(c(data$x, pred_y,res_y$par)), max(c(data$x, pred_y,res_y$par))),col="blue",type="b")
points(data$d,data$x,col = 'red',type="b")
points(pred_x[1],pred_y[1],pch=20,col='blue')
points(res_x$par[1],pred_y[1],pch=20,col='blue')
points(res_x$par[1],res_y$par[1],pch=20,col='blue')
points(res_x$par[2],res_y$par[1],pch=20,col='blue')
points(res_x$par[2],res_y$par[2],pch=20,col='blue')
points(res_x$par[3],res_y$par[2],pch=20,col='blue')
segments(pred_x[1],pred_y[1],res_x$par[1],pred_y[1],lty=3,col='blue')
segments(res_x$par[1],pred_y[1],res_x$par[1],res_y$par[1],lty=3,col='blue')
segments(res_x$par[1],res_y$par[1],res_x$par[2],res_y$par[1],lty=3,col='blue')
segments(res_x$par[2],res_y$par[1],res_x$par[2],res_y$par[2],lty=3,col='blue')
segments(res_x$par[2],res_y$par[2],res_x$par[3],res_y$par[2],lty=3,col='blue')
}
fit_last_piece(df)

R: Draw a polygon with conditional colour

I want to colour the area under a curve. The area with y > 0 should be red, the area with y < 0 should be green.
x <- c(1:4)
y <- c(0,1,-1,2,rep(0,4))
plot(y[1:4],type="l")
abline(h=0)
Using ifelse() does not work:
polygon(c(x,rev(x)),y,col=ifelse(y>0,"red","green"))
What I achieved so far is the following:
polygon(c(x,rev(x)),y,col="green")
polygon(c(x,rev(x)),ifelse(y>0,y,0),col="red")
But then the red area is too large. Do you have any ideas how to get the desired result?
If you want two different colors, you need two different polygons. You can either call polygon multiple times, or you can add NA values in your x and y vectors to indicate a new polygon. R will not automatically calculate the intersection for you. You must do that yourself. Here's how you could draw that with different colors.
x <- c(1,2,2.5,NA,2.5,3,4)
y <- c(0,1,0,NA,0,-1,0)
#calculate color based on most extreme y value
g <- cumsum(is.na(x))
gc <- ifelse(tapply(y, g,
function(x) x[which.max(abs(x))])>0,
"red","green")
plot(c(1, 4),c(-1,1), type = "n")
polygon(x, y, col = gc)
abline(h=0)
In the more general case, it might not be as easy to split a polygon into different regions. There seems to be some support for this type of operation in GIS packages, where this type of thing is more common. However, I've put together a somewhat general case that may work for simple polygons.
First, I define a closure that will define a cutting line. The function will take a slope and y-intercept for a line and will return the functions we need to cut a polygon.
getSplitLine <- function(m=1, b=0) {
force(m); force(b)
classify <- function(x,y) {
y >= m*x + b
}
intercepts <- function(x,y, class=classify(x,y)) {
w <- which(diff(class)!=0)
m2 <- (y[w+1]-y[w])/(x[w+1]-x[w])
b2 <- y[w] - m2*x[w]
ix <- (b2-b)/(m-m2)
iy <- ix*m + b
data.frame(x=ix,y=iy,idx=w+.5, dir=((rank(ix, ties="first")+1) %/% 2) %% 2 +1)
}
plot <- function(...) {
abline(b,m,...)
}
list(
intercepts=intercepts,
classify=classify,
plot=plot
)
}
Now we will define a function to actually split a polygon using the splitter we've just defined.
splitPolygon <- function(x, y, splitter) {
addnullrow <- function(x) if (!all(is.na(x[nrow(x),]))) rbind(x, NA) else x
rollup <- function(x,i=1) rbind(x[(i+1):nrow(x),], x[1:i,])
idx <- cumsum(is.na(x) | is.na(y))
polys <- split(data.frame(x=x,y=y)[!is.na(x),], idx[!is.na(x)])
r <- lapply(polys, function(P) {
x <- P$x; y<-P$y
side <- splitter$classify(x, y)
if(side[1] != side[length(side)]) {
ints <- splitter$intercepts(c(x,x[1]), c(y, y[1]), c(side, side[1]))
} else {
ints <- splitter$intercepts(x, y, side)
}
sideps <- lapply(unique(side), function(ss) {
pts <- data.frame(x=x[side==ss], y=y[side==ss],
idx=seq_along(x)[side==ss], dir=0)
mm <- rbind(pts, ints)
mm <- mm[order(mm$idx), ]
br <- cumsum(mm$dir!=0 & c(0,head(mm$dir,-1))!=0 &
c(0,diff(mm$idx))>1)
if (length(unique(br))>1) {
mm<-rollup(mm, sum(br==br[1]))
}
br <- cumsum(c(FALSE,abs(diff(mm$dir*mm$dir))==3))
do.call(rbind, lapply(split(mm, br), addnullrow))
})
pss<-rep(unique(side), sapply(sideps, nrow))
ps<-do.call(rbind, lapply(sideps, addnullrow))[,c("x","y")]
attr(ps, "side")<-pss
ps
})
pss<-unname(unlist(lapply(r, attr, "side")))
src <- rep(seq_along(r), sapply(r, nrow))
r <- do.call(rbind, r)
attr(r, "source")<-src
attr(r, "side")<-pss
r
}
The input is just the values of x and y as you would pass to polygon along with the cutter. It will return a data.frame with x and y values that can be used with polygon.
For example
x <- c(1,2,2.5,NA,2.5,3,4)
y <- c(1,-2,2,NA,-1,2,-2)
sl<-getSplitLine(0,0)
plot(range(x, na.rm=T),range(y, na.rm=T), type = "n")
p <- splitPolygon(x,y,sl)
g <- cumsum(c(F, is.na(head(p$y,-1))))
gc <- ifelse(attr(p,"side")[is.na(p$y)],
"red","green")
polygon(p, col=gc)
sl$plot(lty=2, col="grey")
This should work for simple concave polygons as well with sloped lines. Here's another example
x <- c(1,2,3,4,5,4,3,2)
y <- c(-2,2,1,2,-2,.5,-.5,.5)
sl<-getSplitLine(.5,-1.25)
plot(range(x, na.rm=T),range(y, na.rm=T), type = "n")
p <- splitPolygon(x,y,sl)
g <- cumsum(c(F, is.na(head(p$y,-1))))
gc <- ifelse(attr(p,"side")[is.na(p$y)],
"red","green")
polygon(p, col=gc)
sl$plot(lty=2, col="grey")
Right now things can get a bit messy when the the vertex of the polygon falls directly on the splitting line. I may try to correct that in the future.
A faster, but not very accurate solution is to split data frame to list according to grouping variable (e.g. above=red and below=blue). This is a pretty nice workaround for rather big (I would say > 100 elements) datasets. For smaller chunks some discontinuity may be visible:
x <- 1:100
y1 <- sin(1:100/10)*0.8
y2 <- sin(1:100/10)*1.2
plot(x, y2, type='l')
lines(x, y1, col='red')
df <- data.frame(x=x, y1=y1, y2=y2)
df$pos_neg <- ifelse(df$y2-df$y1>0,1,-1) # above (1) or below (-1) average
# create the number for chunks to be split into lists:
df$chunk <- c(1,cumsum(abs(diff(df$pos_neg)))/2+1) # first element needs to be added`
df$colors <- ifelse(df$pos_neg>0, "red","blue") # colors to be used for filling the polygons
# create lists to be plotted:
l <- split(df, df$chunk) # we should get 4 sub-lists
lapply(l, function(x) polygon(c(x$x,rev(x$x)),c(x$y2,rev(x$y1)),col=x$colors))
As I said, for smaller dataset some discontinuity may be visible if sharp changes occur between positive and negative areas, but if horizontal line distinguishes between those two, or more elements are plotted then this effect is neglected:

hierarchy in voronoi tessellations

I am working with voronoi tessellations. I have different polygons representing regions in the tessellations.
The points below are used to draw the tessellation in the figure.
tessdata
[,1] [,2]
1 -0.4960583 -0.3529047
2 -2.4986929 0.8897895
3 3.6514561 -1.3533369
4 -1.7263101 -5.5341202
5 2.2140143 0.3883696
6 -2.5208933 -1.4881461
7 -3.2556913 4.4535629
8 0.6423109 -2.8350062
9 -0.4160715 1.2676151
10 4.4059361 4.5641771
Using tessdata as input to draw the tessellation as below:
library(deldir)
dd<-deldir(tessdata[,1], tessdata[,2])
plot(dd,wlines="tess")
Sammon coordinates are below.
[,1] [,2]
1 3.14162704 -1.45728604
2 2.35422623 2.46437927
3 -0.85051049 2.71503294
4 1.94310458 -0.45936958
5 0.08737757 3.74324701
6 1.23007799 1.34443842
7 0.01571924 2.19322032
8 1.43320754 2.64818631
9 -0.05463431 0.66980876
10 1.51344967 5.03351176
I want to construct the tessellations for which the sammon coordinate points are input. The tessellation using these points should be within one of the regions in the figure shown and for that, the above points should be scaled or we can restrict the plot of the tessellation within one of the regions in the above figure.
Hope i have covered all the necessary data.
P.S:
sammon's projection comes in "MASS" package.
voronoi tessellations from "deldir" package.
dirsgs argument of the deldir function output will give the coordinates of the points forming the lines in the tessellations.
segments function of package graphics can be used to join the 2 points whose coordinates are extracted from dirsgs.
If you want to restrict the second set of points
to one of the tiles of the tessellation,
you can use tile.list to have a description of each tile,
and then check which points are in this tile
(there are many functions to do so:
in the following example, I use secr::pointsInPolygon).
# Sample data
x <- matrix( rnorm(20), nc = 2 )
y <- matrix( rnorm(1000), nc=2 )
# Tessellation
library(deldir)
d <- deldir(x[,1], x[,2])
plot(d, wlines="tess")
# Pick a cell at random
cell <- sample( tile.list(d), 1 )[[1]]
points( cell$pt[1], cell$pt[2], pch=16 )
polygon( cell$x, cell$y, lwd=3 )
# Select the points inside that cell
library(secr)
i <- pointsInPolygon(
y,
cbind(
c(cell$x,cell$x[1]),
c(cell$y,cell$y[1])
)
)
points(y[!i,], pch=".")
points(y[i,], pch="+")
# Compute a tessellation of those points
dd <- deldir(y[i,1], y[i,2])
plot(dd, wlines="tess", add=TRUE)
If, instead, you want to translate and rescale the points
to fit them into the tile, that is trickier.
We need to somehow estimate how far away from the tile the points are:
to this end, let us define a few auxilliary functions to compute,
first the distance from a point to a segment,
then the distance from a point to a polygon.
distance_to_segment <- function(M, A, B) {
norm <- function(u) sqrt(sum(u^2))
lambda <- sum( (B-A) * (M-A) ) / norm(B-A)^2
if( lambda <= 0 ) {
norm(M-A)
} else if( lambda >= 1 ) {
norm(M-B)
} else {
N <- A + lambda * (B-A)
norm(M-N)
}
}
A <- c(-.5,0)
B <- c(.5,.5)
x <- seq(-1,1,length=100)
y <- seq(-1,1,length=100)
z <- apply(
expand.grid(x,y),
1,
function(u) distance_to_segment( u, A, B )
)
par(las=1)
image(x, y, matrix(z,nr=length(x)))
box()
segments(A[1],A[2],B[1],B[2],lwd=3)
library(secr)
distance_to_polygon <- function(x, poly) {
closed_polygon <- rbind(poly, poly[1,])
if( pointsInPolygon( t(x), closed_polygon ) )
return(0)
d <- rep(Inf, nrow(poly))
for(i in 1:nrow(poly)) {
A <- closed_polygon[i,]
B <- closed_polygon[i+1,]
d[i] <- distance_to_segment(x,A,B)
}
min(d)
}
x <- matrix(rnorm(20),nc=2)
poly <- x[chull(x),]
x <- seq(-5,5,length=100)
y <- seq(-5,5,length=100)
z <- apply(
expand.grid(x,y),
1,
function(u) distance_to_polygon( u, poly )
)
par(las=1)
image(x, y, matrix(z,nr=length(x)))
box()
polygon(poly, lwd=3)
We can now look for a transformation of the form
x --> lambda * x + a
y --> lambda * y + b
that minimizes the (sum of the squared) distances to the polygon.
That is actually not sufficient: we are likely to end up with scaling factor
lambda equal to (or close to) zero.
To avoid this, we can add a penalty if lambda is small.
# Sample data
x <- matrix(rnorm(20),nc=2)
x <- x[chull(x),]
y <- matrix( c(1,2) + 5*rnorm(20), nc=2 )
plot(y, axes=FALSE, xlab="", ylab="")
polygon(x)
# Function to minimize:
# either the sum of the squares of the distances to the polygon,
# if at least one point is outside,
# or minus the square of the scaling factor.
# It is not continuous, but (surprisingly) that does not seem to be a problem.
f <- function( p ) {
lambda <- log( 1 + exp(p[1]) )
a <- p[2:3]
y0 <- colMeans(y)
transformed_points <- t( lambda * (t(y)-y0) + a )
distances <- apply(
transformed_points,
1,
function(u) distance_to_polygon(u, x)
)
if( all(distances == 0) ) - lambda^2
else sum( distances^2 )
}
# Minimize this function
p <- optim(c(1,0,0), f)$par
# Compute the optimal parameters
lambda <- log( 1 + exp(p[1]) )
a <- p[2:3]
y0 <- colMeans(y)
# Compute the new coordinates
transformed_points <- t( lambda * (t(y)-y0) + a )
# Plot them
segments( y[,1], y[,2], transformed_points[,1], transformed_points[,2], lty=3 )
points( transformed_points, pch=3 )
library(deldir)
plot(
deldir( transformed_points[,1], transformed_points[,2] ),
wlines="tess", add=TRUE
)

Resources