Graph to compare two matrices in R - r

I have two matrices (of approximately 300 x 100) and I would like to plot a graph to see the parts of the first one that are higher than those of the second.
I can do, for instance:
# Calculate the matrices and put them into m1 and m2
# Note that the values are between -1 and 1
par(mfrow=c(1,3))
image(m1, zlim=c(-1,1))
image(m2, zlim=c(-1,1))
image(m1-m2, zlim=c(0,1))
This will plot only the desired regions in the 3rd plot but I would like to do something a bit different, like putting a line around those areas over the first plot in order to highlight them directly there.
Any idea how I can do that?
Thank you
nico

How about:
par(mfrow = c(1, 3))
image(m1, zlim = c(-1, 1))
contour(m1 - m2, add = TRUE)
image(m2, zlim = c(-1, 1))
contour(m1 - m2, add = TRUE)
image(m1 - m2, zlim = c(0, 1))
contour(m1 - m2, add = TRUE)
This adds a contour map around the regions. Sort of puts rings around the areas of the 3rd plot (might want to fiddle with the (n)levels of the contours to get fewer 'circles').

Another way of doing your third image might be:
image(m1>m2)
this produces a matrix of TRUE/FALSE values which gets imaged as 0/1, so you have a two-colour image. Still not sure about your 'putting a line around' thing though...

Here's some code I wrote to do something similar. I wanted to highlight contiguous regions above a 0.95 threshold by drawing a box round them, so I got all the grid squares above 0.95 and did a clustering on them. Then do a bit of fiddling with the clustering output to get the rectangle coordinates of the regions:
computeHotspots = function(xyz, thresh, minsize=1, margin=1){
### given a list(x,y,z), return a data frame where each row
### is a (xmin,xmax,ymin,ymax) of bounding box of a contiguous area
### over the given threshhold.
### or approximately. lets use the clustering tools in R...
overs <- which(xyz$z>thresh,arr.ind=T)
if(length(overs)==0){
## found no hotspots
return(NULL)
}
if(length(overs)==2){
## found one hotspot
xRange <- cbind(xyz$x[overs[,1]],xyz$x[overs[,1]])
yRange <- cbind(xyz$y[overs[,2]],xyz$y[overs[,2]])
}else{
oTree <- hclust(dist(overs),method="single")
oCut <- cutree(oTree,h=10)
oXYc <- data.frame(x=xyz$x[overs[,1]],y=xyz$y[overs[,2]],oCut)
xRange <- do.call("rbind",tapply(oXYc[,1],oCut,range))
yRange <- do.call("rbind",tapply(oXYc[,2],oCut,range))
}
### add user-margins
xRange[,1] <- xRange[,1]-margin
xRange[,2] <- xRange[,2]+margin
yRange[,1] <- yRange[,1]-margin
yRange[,2] <- yRange[,2]+margin
## put it all together
xr <- apply(xRange,1,diff)
xm <- apply(xRange,1,mean)
xRange[xr<minsize,1] <- xm[xr<minsize]-(minsize/2)
xRange[xr<minsize,2] <- xm[xr<minsize]+(minsize/2)
yr <- apply(yRange,1,diff)
ym <- apply(yRange,1,mean)
yRange[yr<minsize,1] <- ym[yr<minsize]-(minsize/2)
yRange[yr<minsize,2] <- ym[yr<minsize]+(minsize/2)
cbind(xRange,yRange)
}
Test code:
x=1:23
y=7:34
m1=list(x=x,y=y,z=outer(x,y,function(x,y){sin(x/3)*cos(y/3)}))
image(m1)
hs = computeHotspots(m1,0.95)
That should give you a matrix of rectangle coordinates:
> hs
[,1] [,2] [,3] [,4]
1 13 15 8 11
2 3 6 17 20
3 22 24 18 20
4 13 16 27 30
Now you can draw them over the image with rect:
image(m1)
rect(hs[,1],hs[,3],hs[,2],hs[,4])
and to show they are where they should be:
image(list(x=m1$x,y=m1$y,z=m1$z>0.95))
rect(hs[,1],hs[,3],hs[,2],hs[,4])
You could of course adapt this to draw circles, but more complex shapes would be tricky. It works best when the regions of interest are fairly compact.
Barry

Related

R: Sample a matrix for cells close to a specified position

I'm trying to find sites to collect snails by using a semi-random selection method. I have set a 10km2 grid around the region I want to collect snails from, which is broken into 10,000 10m2 cells. I want to randomly this grid in R to select 200 field sites.
Randomly sampling a matrix in R is easy enough;
dat <- matrix(1:10000, nrow = 100)
sample(dat, size = 200)
However, I want to bias the sampling to pick cells closer to a single position (representing sites closer to the research station). It's easier to explain this with an image;
The yellow cell with a cross represents the position I want to sample around. The grey shading is the probability of picking a cell in the sample function, with darker cells being more likely to be sampled.
I know I can specify sampling probabilities using the prob argument in sample, but I don't know how to create a 2D probability matrix. Any help would be appreciated, I don't want to do this by hand.
I'm going to do this for a 9 x 6 grid (54 cells), just so it's easier to see what's going on, and sample only 5 of these 54 cells. You can modify this to a 100 x 100 grid where you sample 200 from 10,000 cells.
# Number of rows and columns of the grid (modify these as required)
nx <- 9 # rows
ny <- 6 # columns
# Create coordinate matrix
x <- rep(1:nx, each=ny);x
y <- rep(1:ny, nx);y
xy <- cbind(x, y); xy
# Where is the station? (edit: not snails nest)
Station <- rbind(c(x=3, y=2)) # Change as required
# Determine distance from each grid location to the station
library(SpatialTools)
D <- dist2(xy, Station)
From the help page of dist2
dist2 takes the matrices of coordinates coords1 and coords2 and
returns the inter-Euclidean distances between coordinates.
We can visualize this using the image function.
XY <- (matrix(D, nr=nx, byrow=TRUE))
image(XY) # axes are scaled to 0-1
# Create a scaling function - scales x to lie in [0-1)
scale_prop <- function(x, m=0)
(x - min(x)) / (m + max(x) - min(x))
# Add the coordinates to the grid
text(x=scale_prop(xy[,1]), y=scale_prop(xy[,2]), labels=paste(xy[,1],xy[,2],sep=","))
Lighter tones indicate grids closer to the station at (3,2).
# Sampling probabilities will be proportional to the distance from the station, which are scaled to lie between [0 - 1). We don't want a 1 for the maximum distance (m=1).
prob <- 1 - scale_prop(D, m=1); range (prob)
# Sample from the grid using given probabilities
sam <- sample(1:nrow(xy), size = 5, prob=prob) # Change size as required.
xy[sam,] # Thse are your (**MY!**) 5 samples
x y
[1,] 4 4
[2,] 7 1
[3,] 3 2
[4,] 5 1
[5,] 5 3
To confirm the sample probabilities are correct, you can simulate many samples and see which coordinates were sampled the most.
snail.sam <- function(nsamples) {
sam <- sample(1:nrow(xy), size = nsamples, prob=prob)
apply(xy[sam,], 1, function(x) paste(x[1], x[2], sep=","))
}
SAMPLES <- replicate(10000, snail.sam(5))
tab <- table(SAMPLES)
cols <- colorRampPalette(c("lightblue", "darkblue"))(max(tab))
barplot(table(SAMPLES), horiz=TRUE, las=1, cex.names=0.5,
col=cols[tab])
If using a 100 x 100 grid and the station is located at coordinates (60,70), then the image would look like this, with the sampled grids shown as black dots:
There is a tendency for the points to be located close to the station, although the sampling variability may make this difficult to see. If you want to give even more weight to grids near the station, then you can rescale the probabilities, which I think is ok to do, to save costs on travelling, but these weights need to be incorporated into the analysis when estimating the number of snails in the whole region. Here I've cubed the probabilities just so you can see what happens.
sam <- sample(1:nrow(xy), size = 200, prob=prob^3)
The tendency for the points to be located near the station is now more obvious.
There may be a better way than this but a quick way to do it is to randomly sample on both x and y axis using a distribution (I used the normal - bell shaped distribution, but you can really use any). The trick is to make the mean of the distribution the position of the research station. You can change the bias towards the research station by changing the standard deviation of the distribution.
Then use the randomly selected positions as your x and y coordinates to select the positions.
dat <- matrix(1:10000, nrow = 100)
#randomly selected a position for the research station
rs <- c(80,30)
# you can change the sd to change the bias
x <- round(rnorm(400,mean = rs[1], sd = 10))
y <- round(rnorm(400, mean = rs[2], sd = 10))
position <- rep(NA, 200)
j = 1
i = 1
# as some of the numbers sampled can be outside of the area you want I oversampled # and then only selected the first 200 that were in the area of interest.
while (j <= 200) {
if(x[i] > 0 & x[i] < 100 & y[i] > 0 & y [i]< 100){
position[j] <- dat[x[i],y[i]]
j = j +1
}
i = i +1
}
plot the results:
plot(x,y, pch = 19)
points(x =80,y = 30, col = "red", pch = 19) # position of the station

How to find out x/y shift of two raster layers?

regarding two raster layers which do not match exactly because of defective data, i would like to know, how to find out about the x/y shift between these two layers to align them properly using raster::shift()
i have already tried to investigate on the x/y-shift using qgis, but i just found the georeferencing tool, providing to relocate raster layers but not something interactive. i am looking for a possibility to move my defective raster on a basemap and getting information about the x/y shift.
i am NOT looking for a solution where i have to set specific georeferencing points to align the two raster layers since i am working on a highly dynamic landscape where it is difficult to find matching points, but where it is possible to align the raster layers by textural information provided by the datasets.
a code example should look like the solution provided by user #dTanMan URL:https://gis.stackexchange.com/users/77712/dtanman in this post URL:https://gis.stackexchange.com/a/201750
raster <- raster()
raster <- shift(raster, x=5, y=-15)
thanks a lot in advance, cheers, ExploreR
Perhaps you can use something like this
Example data
library(raster)
a <- raster(ncol=20, nrow=20, xmn=0,xmx=20,ymn=0,ymx=20)
values(a) <- 1:400
set.seed(3)
b <- a + runif(400)
Function to compare similarity of cell values
rmse <- function(obs, prd) {
sqrt(mean((obs-prd)^2, na.rm=TRUE))
}
Values from reference raster. May need to take a sample if raster is very large
nsamples <- 10000
s <- sampleRegular(a, nsamples, cells=TRUE)
sample_a <- s[,2]
Locations to be compared
xy <- xyFromCell(a, s[,1])
Test range for cell shifts
xrange <- -5:5 * xres(a)
yrange <- -5:5 * yres(a)
Matrix to store the results in
result <- cbind(rep(xrange, each=length(yrange)), rep(yrange, length(xrange)), NA)
colnames(result) <- c("dx", "dy", "rmse")
Loop over cellshift combinations
i <- 1
for (dx in xrange) {
for (dy in yrange) {
x <- shift(b, dx, dy)
sample_b <- extract(x, xy)
result[i,3] <- rmse(sample_a, sample_b)
i <- i + 1
}
}
Results suggest that dx=0 and dy=0 is the best in this case.
r <- result[order(result[,3]), ]
head(r)
# dx dy rmse
#[1,] 0 0 0.5734866
#[2,] 1 0 0.5800670
#[3,] -1 0 1.5252878
#[4,] 2 0 1.5302921
#[5,] -2 0 2.5153573
#[6,] 3 0 2.5157728
Test
bb <- shift(b, dx=r[1,1], dy=r[1,2])
rmse(values(a), values(bb))
#[1] 0.5734866

Create bubble chart with biggest bubble at the center

I'm trying to create a bubble chart using a set of data as follows:
X --> 10
Y --> 20
Z --> 5
Q --> 10
I simply need to have the biggest bubble (based on its number) to be at the centre (give or take) and the rest of the bubbles be around it without overlapping.
All of the R examples I have seen require a two dimensional dataset, and since the data I have are only one dimensional, I like to know if it's at all possible to create such graphs in R.
It would be great if someone could suggest me some useful hints or so. By the way for this task I need to use a SA tools so something like d3js is out of options. However, I am open to using a tool other than R.
I wasn't quite sure if this question should be asked in On Stack Overflow or Cross Validated, so if moderators believe it doesn't belong here, I'll remove it.
This should do, the main idea being that you sort by the value of the radius, so the first is the biggest, then shift the values around it (odd on one side, even on the other) so that the values are decreasing both ways.
Further explanations in the code.
library(plotrix)
library(RColorBrewer)
# Set the random seed, to get reproducible results
set.seed(54321)
# Generate some random values for the radius
num.circles <- 11
rd <- runif(num.circles, 1, 20)
df <- data.frame(labels=paste("Lbl", 1:num.circles), radius=rd)
# Sort by descending radius. The biggest circle is always row 1
df <- df[rev(order(df$radius)),]
# Now we want to put the biggest circle in the middle and the others on either side
# To do so we reorder the data frame taking the even values first reversed, then the odd values.
# This ensure the biggest circle is in the middle
df <- df[c(rev(seq(2, num.circles, 2)), seq(1, num.circles, 2)),]
# Space between the circles. 0.2 * average radius seems OK
space.between <- 0.2 * mean(df$radius)
# Creat an empty plot
plot(0, 0, "n", axes=FALSE, bty="n", xlab="", ylab="",
xlim=c(0, sum(df$radius)*2+space.between*num.circles),
ylim=c(0, 2.5 * max(df$radius)))
# Draw the circle at half the height of the biggest circle (plus some padding)
xx <- 0
mid.y <- max(df$radius) * 1.25
# Some nice degrading tones of blue
colors <- colorRampPalette(brewer.pal(8,"Blues"))(num.circles/2)
for (i in 1:nrow(df))
{
row <- df[i,]
x <- xx + row$radius + i*space.between
y <- mid.y
# Draw the circle
draw.circle(x, y, row$radius,
col=colors[abs(num.circles/2-i)])
# Add the label
text(x, y, row$labels, cex=0.6)
# Update current x position
xx <- xx + row$radius * 2
}
The result:
Live version on RFiddle.

R Surface Plot from List of X,Y,Z points

I am trying to make a surface plot for data that is in a very long list of x,y,z points. To do this, I am dividing the data into a grid of 10k squares and finding the max value of z within each square. From my understanding, each z value should be stored in a matrix where each element of the matrix corresponds to a square on the grid. Is there an easier way to do this than the code below? That last line is already pretty long and it is only one square.
x<-(sequence(101)-1)*max(eff$CFaR)/100
y<-(sequence(101)-1)*max(eff$EaR)/100
effmap<-matrix(ncol=length(x)-1, nrow=length(y)-1)
someMatrix <- max(eff$Cost[which(eff$EaR[which(eff$CFaR >= x[50] & eff$CFaR <x[51], arr.ind=TRUE)]>=y[20] & eff$EaR[which(eff$CFaR >= x[50] & eff$CFaR <x[51], arr.ind=TRUE)]< y[91])])
So this is my interpretation of what you are trying to accomplish...
df <- read.csv("effSample.csv") # downloaded from your link
df <- df[c("CFaR","EaR","Cost")] # remove unnecessary columns
df$x <- cut(df$CFaR,breaks=100,labels=FALSE) # establish bins: CFaR
df$y <- cut(df$EaR,breaks=100,labels=FALSE) # establish bins: EaR
df.max <- expand.grid(x=1:100,y=1:100) # template; 10,000 grid cells
# maximum cost in each grid cell - NOTE: most of the cells are *empty*
df.max <- merge(df.max,aggregate(Cost~x+y,df,max),all.x=TRUE)
z <- matrix(df.max$Cost,nr=100,nc=100) # Cost vector -> matrix
# colors based on z-value
palette <- rev(rainbow(20)) # palette of 20 colors
zlim <- range(z[!is.na(z)])
colors <- palette[19*(z-zlim[1])/diff(zlim) + 1]
# create the plot
library(rgl)
open3d(scale=c(1,1,10)) # CFaR and EaR range ~ 10 X Cost range
x.values <- min(df$CFaR)+(0:99)*diff(range(df$CFaR))/100
y.values <- min(df$EaR)+(0:99)*diff(range(df$EaR))/100
surface3d(x.values,y.values,z,col=colors)
axes3d()
title3d(xlab="CFaR",ylab="EaR",zlab="Cost")
The code above generates a rotatable 3D plot, so the image is just a screen shot. Notice how there are lots of "holes". This is (partially) because you provided only part of your data. However, it is important to realize that just because you imagine 10,000 grid cells (e.g., a 100 X 100 grid), does not mean that there will be data in every cell.

Overlay a map on top of a 3d surface map in r

I have created a 3d map using rgl.surface(), mainly following Shane's answer in this post. Using my own data, I get this map
On top of this surface map, I would like to add a map of vegetation density such that I obtain something like this (obtained with the software Surfer):
Is it possible to do this with rgl, or for that matter any other package in r or is the only solution to have two maps like in Shane's answer?
Thank you.
Edit:
Following #gsk3's request, here is the code for this map:
library(rgl)
# Read the z (i.e. elevation) dimension from file
z1 = matrix(scan("myfile.txt"),nrow=256, ncol=256, byrow=TRUE)
#create / open x y (i.e. easting and northing coordinates) dimensions
y=8*(1:ncol(z)) # Each point is 8 m^2
x=8*(1:nrow(z))
# See https://stackoverflow.com/questions/1896419/plotting-a-3d-surface-plot-with-contour-map-overlay-using-r for details of code below
zlim <- range(z)
zlen <- zlim[2] - zlim[1] + 1
colorlut <- terrain.colors(zlen,alpha=0) # height color lookup table
col <- colorlut[ z-zlim[1]+1 ] # assign colors to heights for each point
open3d()
rgl.surface(x,y,z)
I can't post the elevation code because there are 65536 (i.e. x*y=256*256) points but it is a matrix which looks like this
[,1] [,2] [,3] [,4] [,5]
[1,] 1513.708 1513.971 1514.067 1513.971 1513.875
[2,] 1513.622 1513.524 1513.578 1513.577 1513.481
and so on.
Same for the vegetation density map, which is exactly the same format and for which I have a single value for each x*y point. I hope this makes things a bit clearer...?
Edit 2, final version
This is the map I have produced with R. I haven't got the legend on it yet but this is something I'll do at a later stage.
The final code for this is
library(rgl)
z1 = matrix(scan("myfile.txt"),nrow=256, ncol=256, byrow=TRUE)
# Multiply z by 2 to accentuate the relief otherwise it looks a little bit flat.
z= z1*2
#create / open x y dimensions
y=8*(1:ncol(z))
x=8*(1:nrow(z))
trn = matrix(scan("myfile.txt"),nrow=256, ncol=256, byrow=TRUE)
fv = trn*100
trnlim = range(fv)
fv.colors = colorRampPalette(c("white","tan4","darkseagreen1","chartreuse4")) ## define the color ramp
colorlut =fv.colors(100)c(1,seq(35,35,length.out=9),seq(35,75,length.out=30),seq(75,100,length.out=61))]
# Assign colors to fv for each point
col = colorlut[fv-trnlim[1]+1 ]
open3d()
rgl.surface(x,y,z,color=col)
Thank you very much to #gsk3 and #nullglob in this post for their help. Hope this post will help many others!
Modified above code to give an answer. Note that terrain should be a matrix in the same format as the elevation matrix. And I added a ,color argument to your function call so it actually uses the color matrix you created.
library(rgl)
# Read the z (i.e. elevation) dimension from file
z1 = matrix(scan("myfile.txt"),nrow=256, ncol=256, byrow=TRUE)
#create / open x y (i.e. easting and northing coordinates) dimensions
y=8*(1:ncol(z)) # Each point is 8 m^2
x=8*(1:nrow(z))
# Read the terrain types from a file
trn = matrix(scan("terrain.txt"),nrow=256, ncol=256, byrow=TRUE)
# See http://stackoverflow.com/questions/1896419/plotting-a-3d-surface-plot-with-contour-map-overlay-using-r for details of code below
trnlim <- range(trn)
trnlen <- trnlim[2] - trnlim[1] + 1
colorlut <- terrain.colors(trnlen,alpha=0) # height color lookup table
col <- colorlut[ trn-trnlim[1]+1 ] # assign colors to heights for each point
open3d()
rgl.surface(x,y,z,color=col)

Resources