Plotting netcdf in R with correct grid - r

My goal is to plot nitrate (no3) data on a world map, using the correct longitude and latitude for these data.
There are two netcdf files:
1. with the data
2. with the grid information
Summary info on the data:
no3 is an array of length x*y*sigma
no3_df is 'x*y obs. of 3 variables'
x = integer [180]
y = integer [193]
sigma = array[53]
I want to look at sigma ('depth') 20. I therefore did the following:
# Load the needed libraries to handle netcdf files
library(ncdf)
library(akima)
# Open data and grid files
file1 <- open.ncdf(file.choose())
grid <- open.ncdf(file.choose())
# Read relevant variables/parameters from data file1
x <- get.var.ncdf(file1,varid="x")
y <- get.var.ncdf(file1,varid="y")
sigma <- get.var.ncdf(file1,varid="sigma")
no3 <- get.var.ncdf(file1,varid="no3")
sigma_plot <- no3[,,sigma=20]
# Read relevant variables/parameters from grid file
plon <- get.var.ncdf(grid,varid="plon")
plat <- get.var.ncdf(grid,varid="plat")
# Each cell of sigma_plot corresponds to one cell of plon and plat.
A <- array(c(plon,plat,sigma_plot),dim=c(180,193,3))
# Now B is an array containing for each row: (longitude, latitude, value).
B <- apply(A, 3, cbind)
# But it is not a regular grid, so interpolate to a regular grid. akima library
C <- interp(B[,1],B[,2],B[,3],
xo=seq(-180,180,1),yo=seq(-90,90,by=1), # tweak here the resolution
duplicate='mean') # extra y values are duplicates
#########
# PLOTTING
#########
# This one works, but doesn't have a correct longitude and latitude:
filled.contour(x,y,sigma_plot, col=rich.colors(18))
# Try to plot with lon and lat
filled.contour(C, col=rich.colors(30))
Since the filled.contour plot doesn't have correct longitude and latitude, I would like to use ggplot. However, I don't know how to do this...
# And the plotting with ggplot
ggplot(aes(x=plon_datafrm,y=plat_datafrm),data=no3_df) +
geom_raster() +
coord_equal() +
scale_fill_gradient()
This doesn't seem to work. I am net to ggplot so that might be the reason, I would truly appreciate any help.

library(ncdf)
data <- open.ncdf(file1)
no3 <- get.var.ncdf(data,varid="no3")
sigma_plot <- no3[,,20]
grid <- open.ncdf(file2)
plon <- get.var.ncdf(grid,varid="plon")
plat <- get.var.ncdf(grid,varid="plat")
Contrary to what I previously understood, each cell of sigma_plot corresponds to one cell of plon and plat.
A <- array(c(plon,plat,a),dim=c(180,193,3))
B <- apply(A, 3, cbind)
Now B is an array containing for each row: (longitude, latitude, value). But it is not a regular grid, so you need to interpolate a regular grid. Easiest way would be using interp from package akima:
library(akima)
C <- interp(B[,1],B[,2],B[,3],
xo=seq(-180,180,1),yo=seq(-90,90,by=1), #you can tweak here the resolution
duplicate='mean') #for some reasons some entries are duplicates, i don t know how you want to handle it.
image(C) #for instance, or filled.contour if you prefer
library(maptools)
data(wrld_simpl)
plot(wrld_simpl, add=TRUE, col="white") #To add a simple world map on top

Related

Extraction of data from raster file in R results in misordered vector?

I'm relatively new to geospatial analysis in R, and am attempting to extract the values in a raster file (specifically, USDA's CropScape data layers) to a set of latitudes and longitudes.
The raster package's extract function seems perfect for this, but I'm having an issue where the function returns a vector that appears to be in a different order than the original matrix.
In other words, while the function returns the same number of entries in each raster value group, the vector that raster::extract() returns does not align with the original matrix. If I iterate through each lat/long point using a for loop, I get the same number of entries by group, but the vector has a different order. The order of the input coordinate data does not differ in the final datasets. A manual review of the values the iterated approach returns suggests the for loop represents the "correct" ordering. (i.e. developed areas correctly labeled as 'developed')
This is the general approach for how I've been matching the lat/long data I have to the CropScape raster.
require(raster)
require(sp)
#Importing raster file
raster <- raster(x = "CropScape2017Path")
set.seed(1988)
latitude <- rnorm(100,42,sd=0.25)
longitude <- rnorm(100,-91,sd=0.25)
df <- data.frame(longitude,latitude)
#Creating spatial points file
sp <- sp::SpatialPoints(
df[,c("longitude","latitude")]
,proj4string = CRS("+proj=longlat +datum=WGS84")
)
#Converting sp CRS to raster CRS
sp <- spTransform(sp,raster#crs)
#Extracting Class_Names attribute from raster
rasterOutput1 <- raster::extract(raster
,sp
,FUN = max
,df=T
)
rasterOutput1 <- cbind(rasterOutput1,sp#coords)
#Iterating through spatial points
rasterOutput2 <- NULL
for(i in 1:nrow(df)){
#Creating spatial points file
spRow <- sp::SpatialPoints(
df[i,c("longitude","latitude")]
,proj4string = CRS("+proj=longlat +datum=WGS84")
)
#Converting sp row CRS to raster CRS
spRow <- spTransform(spRow,raster#crs)
#Extracting single raster element
rasterRow <- raster::extract(raster
,spRow
,FUN = max
,df=T)
rasterRow <- cbind(rasterRow,spRow#coords)
rasterOutput2 <<- rbind(rasterOutput2,rasterRow)
}
names(rasterOutput2) <- paste0(names(rasterOutput2),"_iterated")
#Comparing raster output
rasterOutput <- cbind(rasterOutput1,rasterOutput2)
#Zero rows returned having different coordinate data
nrow(rasterOutput[which(rasterOutput$longitude != rasterOutput$longitude_iterated),])
#Both approaches return the same number of observations in each group
length(unique(rasterOutput$Class_Names))
sum(table(rasterOutput$Class_Names)==table(rasterOutput$Class_Names_iterated))
#Results differ by approach
rasterOutput <- rasterOutput %>%
mutate(
cn_cat = case_when(Class_Names %in% c("Corn","Soybeans") ~ "A"
,Class_Names %in% c("Grassland/Pasture") ~ "B"
,Class_Names %in% c("Developed/Low Intensity","Developed/Open Space") ~ "C"
,Class_Names %in% c("Deciduous Forest","Woody Wetlands") ~ "D"
)
,cn_cat_iterated = case_when(Class_Names_iterated %in% c("Corn","Soybeans") ~ "A"
,Class_Names_iterated %in% c("Grassland/Pasture") ~ "B"
,Class_Names_iterated %in% c("Developed/Low Intensity","Developed/Open Space") ~ "C"
,Class_Names_iterated %in% c("Deciduous Forest","Woody Wetlands") ~ "D"
)
)
table(rasterOutput$cn_cat,rasterOutput$cn_cat_iterated)
The final table shows the following, where 10 observations are assigned to group A using the first method, but D using the iterated method. I've confirmed the raster cells are in the same order. The coordinate data are in the same order. I'm not seeing any difference between the final datasets other than the order of the raster value vector. Is there any reason why raster::extract() would result in a vector of a different order than an iterated approach, or am I going wrong somewhere else?
Group A
Group B
Group C
Group D
Group A
40
10
3
8
Group B
8
4
1
5
Group C
3
2
1
0
Group D
10
2
1
2
Edit: This result does not persist when using a the below sample raster:
set.seed(1988)
raster <- raster()
raster <- raster(ncol=36, nrow=18, xmn=-92, xmx=-90, ymn=41, ymx=43)
res(raster) <- .1
projection(raster) <- "+proj=longlat +datum=WGS84"
values(raster) <- round(rnorm(ncol(raster)*nrow(raster),mean=5.3))
Edit 2: Think I found the solution. The CropScape layer is very large, relating to the entire US at 30m pixels. Since the smaller raster file worked, I made the following adjustment. Using raster_crop instead of the entire raster file made my approaches align.
sp_extent <- extent(sp)
raster_crop <- crop(x = raster, y = sp_extent)

Grid points within polygon: previous solution does not work for me

For some reason I can't get the solution provided by #RichPauloo to work and do appreciate some help.
I have a SpatialPolygonsDataFrame called "spdf" (in the dropbox link below)
https://www.dropbox.com/s/ibhp5mqbgfmmntz/spdf.Rda?dl=0
I used the code from below post to get the grid data within the boundary.
Create Grid in R for kriging in gstat
library(sp)
grd <- makegrid(spdf, n = 10000)
colnames(grd) <- c('x','y');
outline <- spdf#polygons[[1]]#Polygons[[1]]#coords
library(splancs)
new_grd <- grd[inout(grd,outline), ]
Here is what I get:
Black dots are "grd" from makegrid
Blue dots are "outline" as boundary
Red dots are"new-grd" as the grid within the boundary
As you can see it does not capture all the data within the boundary? What am I doing wrong?
Try this:
# packages
library(sp)
# make grid
grd <- makegrid(spdf, n = 100)
colnames(grd) <- c('x','y') # assign names to columns
# check the class
class(grd)
# transform into spatial points
grd_pts <- SpatialPoints(coords = grd,
proj4string=CRS(as.character(NA)))
# check the class again
class(grd_pts)
# show that points fall outside of polygon
plot(spdf)
points(grd_pts)
# subset for points within the polygon
grd_pts_in <- grd_pts[spdf, ]
# visualize
plot(spdf)
points(grd_pts_in)
# transform grd_pts_in back into a matrix and data frame
gm <- coordinates(grd_pts_in) # matrix
gdf <- as.data.frame(coordinates(grd_pts_in)) # data frame

Calculating the distance between points in R

I looked through the questions that been asked but dealing with coordinates but couldn't find something can help me out with my problem.
I have dataset that contain ID, Speed, Time , List of Latitude & Longitude. ( dataset can be found in the link)
https://drive.google.com/file/d/1MJUvM5WEhua7Rt0lufCyugBdGSKaHMGZ/view?usp=sharing
I want to measure the distance between each point of Latitude & Longitude.
For example;
Latitude has: x1 ,x2 ,x3 ,...x1000
Longitude has: y1 ,y2 ,y3 ,..., y100
I want to measure the distance between (x1,y1) to all the points , and (x2,y2) to all the points, and so on.
The reason I'm doing this to know which point close to which and assign index to each location based on the distance.
if (x1, y1) is close to (x4,y4) so (x1, y1) will get the index A for example and (x4,y4) will get labeled as B. sort the points in order based on distance.
I tried gDistance function but showed error message: "package ‘gDistance’ is not available (for R version 3.4.3)"
and if I change the version to 3.3 library(rgeos) won't work !!
Any suggestions?
here's what I tried,
#requiring necessary packages:
library(sp) # vector data
library(rgeos) # geometry ops
#Read the data and transform them to spatial objects
d <- read.csv("ReadyData.csv")
sp.ReadData <- d
coordinates(sp.ReadyData) <- ~Longitude + Latitude
d <- gDistance(sp.ReadyData, byid= TRUE)
here's update my solution, I created spatial object and made spatial data frame as follow:
#Create spatial object:
lonlat <- cbind(spatial$Longitude, spatial$Latitude)
#Create a SpatialPoints object:
library(sp)
pts <- SpatialPoints(lonlat)
crdref <- CRS('+proj=longlat +datum=WGS84')
pts <- SpatialPoints(lonlat, proj4string=crdref)
# make spatial data frame
ptsdf <- SpatialPointsDataFrame(pts, data=spatial)
Now I'm trying to measure the Distance for longitude/latitude coordinates. I tried dist method but seems not working for me and tried pointDistance method:
gdis <- pointDistance(pts, lonlat=TRUE)
still not clear for me how this function can measure the distance, I need to figure out the distance so I can locate the point in the middle and assign numbers for each point based on its location from the middle point..
You can use raster::pointDistance or geosphere::distm among others functions.
Part of your example data (please avoid files in your questions):
d <- read.table(sep=",", text='
"OBU ID","Time Received","Speed","Latitude","Longitude"
"1",20,1479171686325,0,38.929596,-77.2478813
"2",20,1479171686341,0,38.929596,-77.2478813
"3",20,1479171698485,1.5,38.9295887,-77.2478945
"4",20,1479171704373,1,38.9295048,-77.247922
"5",20,1479171710373,0,38.9294865,-77.2479055
"6",20,1479171710373,0,38.9294865,-77.2479055
"7",20,1479171710373,0,38.9294865,-77.2479055
"8",20,1479171716373,2,38.9294773,-77.2478712
"9",20,1479171716374,2,38.9294773,-77.2478712
"10",20,1479171722373,1.32,38.9294773,-77.2477417')
Solution:
library(raster)
m <- pointDistance(d[, c("Longitude", "Latitude")], lonlat=TRUE)
To get the nearest point to each point, you can do
mm <- as.matrix(as.dist(m))
diag(mm) <- NA
i <- apply(mm, 1, which.min)
The point pairs
p <- cbind(1:nrow(mm), i)
To get the distances, you can do:
mm[p]
Or do this:
apply(mm, 1, min, na.rm=TRUE)
Note that rgeos::gDistance is for planar data, not for longitude/latitude data.
Here is a similar question/answer with some illustration.
our data set is too large to make a single distance matrix. You can process your data in chunks to with that. Here I am showing that with a rather small chunk size of 4 rows. Make this number much bigger to speed up processing time.
library(geosphere)
chunk <- 4 # rows
start <- seq(1, nrow(d), chunk)
end <- c(start[-1], nrow(d))
x <- d[, c("Longitude", "Latitude")]
r <- list()
for (i in 1:length(start)) {
y <- x[start[i]:end[i], , drop=FALSE]
m <- distm(y, x)
m[cbind(1:nrow(m), start[i]:end)] <- NA
r[[i]] <- apply(m, 1, which.min)
}
r <- unlist(r)
r
# [1] 2 1 1 5 6 6 5 5 9 8 8 8
So for your data:
d <- read.csv("ReadyData.csv")
chunk <- 100 # rows
# etc
This will take a long time.
An alternative approach:
library(spdep)
x <- as.matrix(d[, c("Longitude", "Latitude")])
k <- as.vector(knearneigh(x, k=1, longlat=TRUE)$nn)
Assuming you have p1 as spatialpoints of x and p2 as spatialpoints of y, to get the index of the nearest other point:
ReadyData$cloDist <- apply(gDistance(p1, p2, byid=TRUE), 1, which.min)
If you have the same coordinate in the list you will get an index of the point itself since the closest place to itself is itself. An easy trick to avoid that is to use the second farthest distance as reference with a quick function:
f_which.min <- function(vec, idx) sort(vec, index.return = TRUE)$ix[idx]
ReadyData$cloDist2 <- apply(gDistance(p1, p2, byid=TRUE), 1, f_which.min,
idx = 2)

Taking random point from list of points per grid square

Below I have a set of points with locations and attributes.
I have one problem here:
The Attr is not passed into final point_grid_loc
Secondly, what I want do do next is take 1 random point from each grid and return it as a data.frame or SpatialPointDataFrame of points.
Struggling with how to approach it:
# Install libraries
library(sp)
library(gstat)
# Set seed for reproducible results
set.seed = 34
x <- c(5.9,6.5,7.1,3.1,5.6,8.1,6.3,5.8,2.1,8.8,5.3,6.8,9.9,2.5,5.8,9.1,2.4,2.5,9.2)
y <- c(3.6,6.5,5.4,5.2,1.1,5.1,2.7,3.8,6.07,4.4,7.3,1.8,9.2,8.5,6.8,9.3,2.5,9.2,2.5)
attr <- c(23,56,2,34,7,89,45,34,2,34,5,67,8,99,6,65,3,32,12)
initialdata <- data.frame(x,y,attr)
colnames(initialdata) <- c("x","y","attr")
# Creating SpatialPointDataFrame:
coords <- data.frame(initialdata$x,initialdata$y)
coords <- SpatialPoints(coords, proj4string=CRS(as.character(NA)), bbox = NULL)
initialdata_DF <- data.frame(coords,initialdata$attr)
initialdata_SPDF <- SpatialPointsDataFrame(coords,initialdata_DF)
#==============#
cellsize <- 3
#==============#
# Creating a grid which will constitute a mesh for stratified sampling
# Info how to include CSR p. 50 yellow book
bb<- bbox(coords)
cs <- c(cellsize,cellsize)
cc <- bb[,1] + (cs/2)
cd <- ceiling(diff(t(bb))/cs)
initialdata_grd <- GridTopology(cellcentre.offset = cc, cellsize = cs,
cells.dim = cd)
initialdata_SG <- SpatialGrid(initialdata_grd) # Final grid created here
# Plot the results:
plot(initialdata_SG)
plot(initialdata_SPDF, add=T,col="blue", pch="+")
# Create a polygon:
poly <- as.SpatialPolygons.GridTopology(initialdata_grd)
# Identifies which point is in which grid/polygon location:
point_grid_loc <- data.frame(initialdata_SG,grid=over(initialdata_SPDF,poly))
I think you're running into trouble at the last step because you're calling the wrong object. If you want to add grid location to your spatial data, try:
initialdata_SPDF$grid <- over(initialdata_SPDF, poly)
To do the sampling part, you can use a split/apply/combine approach, like this:
# Split the spatial data into a list of data frames by grid location
gridlist <- split(initialdata_SPDF, initialdata_SPDF$grid)
# Sample one row from each data frame (grid cell) in the resulting list; see sample() help for details on that part
samples <- lapply(gridlist, function(x) x[sample(1:nrow(x), 1, FALSE),])
# Bind those rows back together in a new data frame
sampledgrid <- do.call(rbind, samples)

How to combine estUD's into single layer

I have some data for 10 animals from which I have generated some random points. Each data set I have replicated 100 times. Below I have separated out of the data 1 animal and generated kernelUD's for each rep. I would now like someway to combine the UD's to be able to produce a summed estimated density map which I can then go on and measure 50% and 90% home ranges along with other metrics.
bat.master <- read.csv("C:/Users/Sim/Dropbox/Wallington GIS/bat.master")
names(bat.master)
# subset data frame to 1st bat only
bat1 <- bat.master$id="Y2889a"
xybat1 <- subset(bat.master, bat.master$id == "Y2889a",select=x:loopno )
# change to spatial points
xy <- xybat1[1:2] # first two rows save as coords
SPDF <- SpatialPointsDataFrame(coords=xy, data=df) # combine df and xy
ud1 <- kernelUD(SPDF, h = "href", same4all = TRUE, kern = "bivnorm")
Not sure if I understood your question right, but you could try something like this:
library(adehabitatHR)
## generate some dummy data
SPDF <- SpatialPointsDataFrame(coords=cbind(rnorm(1000), rnorm(1000)),
data=data.frame(id=rep(1:10, each=100)))
udHR <- kernelUD(SPDF, h = "href", same4all = TRUE, kern = "bivnorm")
## I would proceed using the raster packages
library(raster)
ud1 <- stack(lapply(udHR, raster))
## You can now check the first one
plot(ud1[[1]])
## or at all of them
plot(ud1)
## take the mean
plot(udm <- mean(ud1))
## now you can either proceed in raster and calculate your isopleths or convert it back to a estUD, this is a bit of a hack and not the nicest way to do it
udHR <- udHR[[1]]
udHR#grid <- as(udm, "GridTopology")
## now you can work with udHR as if it were a HR estimate
plot(getverticeshr(udHR, percent=95))
plot(getverticeshr(udHR, percent=50), add=TRUE)

Resources