Predict to raster multinomial gbm - r

Currently, it looks as through it is not possible to predict out to a rasterbrick a multinomial gbm model. See however that there is an easy way around this for relatively small raster grids - which is explained below. But the process here is very slow and not without its challenges when you a dealing with large rasters, many classes (in my case vegetation communities) and predictor variables. I am hoping the information below might be of use to anyone who comes up against the same challenges.
Below I try to predict the probability of occurrence for 36 vegetation communities using a multinomial gbm model and 20 predictor variables. My study area is a 30x30m raster grid with 213,000,000 pixels - however the code below relates to a small grid of 1221 cells that I used to develop/test the process.
> require (gbm)
> require (raster)
> require (rgdal)
> load("gbmmodel_p20.Rda")
> print(gbmmodel)
gbm(formula = as.formula(Nclustal_1 ~ tcd_coast_disa_f + tce_raddq_f +
tce_radwq_f + tct_temp_minwin_f + tct_tempdq_f + tcw_clim_etaaann_f +
tcw_precipseas_f + tcw_precipwq_f + tcw_rain1mm_f + tdd_strmdstge6_i +
tlf_logre10_f + tlf_rough0500_f + trs_land_pfc_2008 + trs88_sspr_g_50p +
trs88_ssum_b_50p + trs88_ssum_d_50p + tsp_bd200_f + tsp_cly200a_f +
tsp_ph200_f + tsp_tn060a_f), distribution = "multinomial",
data = gbmdata, n.trees = 2500, interaction.depth = 2, n.minobsinnode = 3,
shrinkage = 0.003, bag.fraction = 0.75, train.fraction = 1,
cv.folds = 8, keep.data = TRUE, verbose = TRUE, class.stratify.cv = TRUE,
n.cores = 8)
A gradient boosted model with multinomial loss function.2500 iterations were performed.
The best cross-validation iteration was 2500.
There were 20 predictors of which 20 had non-zero influence.
I stacked the predictor variables into a raster stack as follows:
> img.files <- list.files("/mnt/scratch/mcilwea/R/TSG/inmodel20_test",
pattern='\\.img$', full.names=TRUE)
> rasStack <- stack(img.files)
> NAvalue(rasStack) <- -9999
> projection(rasStack)
"+proj=longlat +ellps=GRS80 +towgs84=-16.237,3.51,9.939,1.4157e-006,2.1477e-006,1.3429e-006,1.91e-007 +no_defs"
Its important to check that the names in the rasStack are identical to those in model above
> names(rasStack)
[1] "tcd_coast_disa_f" "tce_raddq_f" "tce_radwq_f"
[4] "tct_tempdq_f" "tct_temp_minwin_f" "tcw_clim_etaaann_f"
[7] "tcw_precipseas_f" "tcw_precipwq_f" "tcw_rain1mm_f"
[10] "tdd_strmdstge6_i" "tlf_logre10_f" "tlf_rough0500_f"
[13] "trs88_sspr_g_50p" "trs88_ssum_b_50p" "trs88_ssum_d_50p"
[16] "trs_land_pfc_2008" "tsp_bd200_f" "tsp_cly200a_f"
[19] "tsp_ph200_f" "tsp_tn060a_f"
Before running predict.gbm I call up the best iteration model
> best.iter <- gbm.perf(gbmmodel, method = "cv", plot.it = TRUE)
I can create a set of raster output images for my test area which has 1221 cells by converting the grid cells to a set of spatial points (as shown below).
points<-raster(img.files[1])
points.df <- as.data.frame(rasterToPoints(points))
coordinates(points.df) <- ~x+y
plot(points.df)
coords <- coordinates(points.df)
rasterOut <- extract(rasStack, coords)
outTable<- as.data.frame(cbind(coords, rasterOut))
outTable[1:1,1:22]
x y tcd_coast_disa_f tce_raddq_f tce_radwq_f tct_temp_minwin_f tct_tempdq_f
149.1269 -35.6457 1.052329 10.82778 23.63533 -0.9852222 5.928154
tcw_clim_etaaann_f tcw_precipseas_f tcw_precipwq_f tcw_rain1mm_f tdd_strmdstge6_i tlf_logre10_f
600 13.93321 179.9841 80.2064 491 1.945529
tlf_rough0500_f trs_land_pfc_2008 trs88_sspr_g_50p trs88_ssum_b_50p trs88_ssum_d_50p tsp_bd200_f
15.6701 0 0.38 0.09000003 0.55 1.590021
tsp_cly200a_f tsp_ph200_f tsp_tn060a_f
33.33834 5.648166 0.03193555
To run the predict.gbm model
predtable <- as.data.frame(predict.gbm(gbmmodel, outTable, n.trees=best.iter, type="response"))
predout <- cbind(coords,predtable)
predout[1:1,1:38]
x y e24.2500 e26.2500 e59.2500 g152.2500 g157.2500 g94.2500 m31.2500
149.1269 -35.6457 0.001286283 0.0006473167 0.002043077 0.4973372 8.686316e-05 0.0006710651 0.01067058
m36.2500 m68.2500 MU11.2500 MU45.2500 OTHER.2500 p14.2500 p15.2500 p17.2500
0.004314056 0.007128109 0.0005012718 0.0006254022 0.1727706 0.1411112 0.0009099294 0.0002520156
p19.2500 p20.2500 p22.2500 p220.2500 p23.2500 p24.2500 p27.2500 p338.2500
0.003205936 0.002534798 0.0001474091 0.001214219 0.008455798 0.01701965 0.001879607 0.002238932
p420.2500 p520.2500 p54.2500 p9.2500 u118.2500 u179.2500 u21.2500 u22.2500
0.001456685 0.00108458 0.0003695966 0.02501649 0.0005977814 0.01711885 0.0558054 0.002357498
u23.2500 u27.2500 u28.2500 u78.2500 Unit5.2500
0.00040357 0.001422519 0.0002764237 0.01699094 4.835942e-05
write.csv(predout, "Predout.csv", row.names=TRUE)
I can write the probability of occurrence values from predtable to a set of 36 new raster images via:
names <- names(predtable)
for (i in 1:length(names)) {
SpatialPointspredTable <- SpatialPointsDataFrame (coords=coords, data=predtable[i])
gridded(SpatialPointspredTable)=TRUE
rasValues <- raster(SpatialPointspredTable)
projection(rasValues) <- "+proj=longlat +ellps=GRS80 +towgs84=-16.237,3.51,9.939,1.4157e-006,2.1477e-006,1.3429e-006,1.91e-007 +no_defs"
plot(rasValues)
writeRaster(rasValues, filename=names[i], format="HFA", overwrite=TRUE)
}
this gives me the outputs I want - HOWEVER -the instead of having to predict out to a dataframe - the process would be a lot faster and more efficient if it were possible to predict directly to a rasterbrick.
If I run
predict(rasStack,
gbmmodel,
n.trees=best.iter,
filename="multiclass_BRT_20p_test_idrisi",
format="IDRISI",
na.rm=FALSE,
type="response",
overwrite=TRUE,
progress="text",
cores=8)
The output is raster grid that represents the first vegetation community that I want to predict:
|=========================================================| 100%
class : RasterLayer
dimensions : 33, 37, 1221 (nrow, ncol, ncell)
resolution : 0.0002777778, 0.0002777778 (x, y)
extent : 149.1268, 149.1371, -35.65473, -35.64556 (xmin, xmax, ymin, ymax)
coord. ref. : NA
data source : /mnt/scratch/mcilwea/R/TSG/multiclass_BRT_20p_test_idrisi.rdc
names : layer
values : 3.762369e-06, 0.9337785 (min, max)
The IDRISI file format does not support multi-band images so I can't add index=1:36 into the mix to produce a multiband rasterbrick as the output. If I try to do this - setting format="GTiff" or "HFA" (or any other format that requires rgdal) I get the error message: "Error in rgdal::putRasterData(x#file#transient, v, band = 1, offset = off) : Failure during raster IO"
I can however get a rasterbrick output if I set the format="raster", but this wont let me read/write any data other than that in the idrisi image (the first output column from the predict.gbm model)
"Warning message:
In .rasterFromRasterFile(grdfile, band = band, objecttype, ...) :
size of values file does not match the number of cells (given the data type)"
predrast <- predict(object=rasStack,
model=gbmmodel,
n.trees=best.iter,
filename="multi_test",
fun=predict.gbm,
format="raster",
index=1:5,
bandorder="BIL",
ext=extent(rasStack[[1:20]]),
na.rm=FALSE,
type="response",
datatype="FLT4S",
overwrite=TRUE,
progress="text",
cores=8)
|=====================================================================100%
predrast
class : RasterBrick
dimensions : 33, 37, 1221, 5 (nrow, ncol, ncell, nlayers)
resolution : 0.0002777778, 0.0002777778 (x, y)
extent : 149.1268, 149.1371, -35.65473, -35.64556 (xmin, xmax, ymin, ymax)
coord. ref. : +proj=longlat +ellps=GRS80 +towgs84=-16.237,3.51,9.939,1.4157e-006,2.1477e-006,1.3429e-006,1.91e-007 +no_defs
data source : C:\Data\FINAL_TSG\test\multi_test.grd
names : layer.1, layer.2, layer.3, layer.4, layer.5
min values : 3.762369e-06, 3.762369e-06, 3.762369e-06, 3.762369e-06, 3.762369e-06
max values : 0.9337785, 0.9337785, 0.9337785, 0.9337785, 0.9337785
If I try to convert the rasterbrick above to a set of individual raster images
writeRaster(predrast, filename="multi_test.img", format="HFA", bylayer=TRUE, suffix="numbers", overwrite=TRUE)
none of the images make any sense.
It also a little puzzling, that if I try to write as a multiband CDF image I get a different set of warning messages to the rgdal error:
| 0%
Error in ncdf::put.var.ncdf(nc, x#title, v, start = c(1, start, lstart), :
put.var.ncdf: error: you asked to write 11988 values, but the passed data array only has 11840 entries!
|======== | 25%
Error in ncdf::put.var.ncdf(nc, x#title, v, start = c(1, start, lstart), :
put.var.ncdf: error: you asked to write 11988 values, but the passed data array only has 11840 entries!
|================== | 50%
Error in ncdf::put.var.ncdf(nc, x#title, v, start = c(1, start, lstart), :
put.var.ncdf: error: you asked to write 11988 values, but the passed data array only has 11840 entries!
|=============================================== | 75%
Error in ncdf::put.var.ncdf(nc, x#title, v, start = c(1, start, lstart), :
put.var.ncdf: error: you asked to write 7992 values, but the passed data array only has 7955 entries!
|=============================================================| 100%
Here, I am not sure what's going on??
It would be great if someone with the know how could work with the authors of the gbm package to make it possible predict directly out to a rasterbrick, without running into any of the problems above.
If anyone wants to know the code I have used on the full raster grid, leave a comment below and I am happy to supply.
cheers
Allen
sessionInfo()
R version 3.1.2 (2014-10-31)
Platform: x86_64-w64-mingw32/x64 (64-bit)
locale:
[1] LC_COLLATE=English_Australia.1252 LC_CTYPE=English_Australia.1252 LC_MONETARY=English_Australia.1252
[4] LC_NUMERIC=C LC_TIME=English_Australia.1252
attached base packages:
[1] parallel splines stats graphics grDevices utils datasets methods base
other attached packages:
[1] ncdf_1.6.8 rgdal_0.9-1 gbm_2.1 lattice_0.20-30 survival_2.37-7 raster_2.3-24 sp_1.0-17
loaded via a namespace (and not attached):
[1] grid_3.1.2 tools_3.1.2
# Traceback error for
Error in rgdal::putRasterData(x#file#transient, v, band = 1, offset = off) :
Failure during raster IO
> traceback()
7: .Call("RGDAL_PutRasterData", raster, rasterData, as.integer(offset),
PACKAGE = "rgdal")
6: rgdal::putRasterData(x#file#transient, v, band = 1, offset = off)
5: writeValues(predrast, predv, tr$row[i])
4: writeValues(predrast, predv, tr$row[i])
3: .local(object, ...)
2: predict(object = rasStack, model = gbmmodel, n.trees = best.iter,
filename = "multi_img", format = "HFA", na.rm = FALSE, type = "response",
datatype = "FLT4S", overwrite = TRUE, progress = "text")
1: predict(object = rasStack, model = gbmmodel, n.trees = best.iter,
filename = "multi_img", format = "HFA", na.rm = FALSE, type = "response",
datatype = "FLT4S", overwrite = TRUE, progress = "text")

Related

Write RasterStack and preserve metadata in R

I would like to write a RasterStack object and preserve names and metadata of the individual layers. How to preserve names is explained here. Is there a way to preserve metadata of individual layers when writing a RasterStack object?
Here is replicable code:
# load library
library(raster)
# create example rasters
ras_1 <- raster(nrows=180, ncols=360, xmn=-180, xmx=180, ymn=-90, ymx=90, resolution=, vals=1)
ras_2 <- raster(nrows=180, ncols=360, xmn=-180, xmx=180, ymn=-90, ymx=90, resolution=, vals=2)
ras_3 <- raster(nrows=180, ncols=360, xmn=-180, xmx=180, ymn=-90, ymx=90, resolution=, vals=3)
# assign names
names(ras_1) <- "raster_A"
names(ras_2) <- "raster_B"
names(ras_3) <- "raster_C"
# assign metadata
metadata(ras_1) <- list("metadata_raster_A")
metadata(ras_2) <- list("metadata_raster_B")
metadata(ras_3) <- list("metadata_raster_C")
# check
ras_1
ras_2
ras_3
metadata(ras_1)
metadata(ras_2)
metadata(ras_3)
# create and check stack
raster_stack <- stack(ras_1,
ras_2,
ras_3)
raster_stack
raster_stack[[1]]
metadata(raster_stack[[1]])
# write raster stack to disk
setwd("~")
# load library
library(terra)
# create rast object
raster_stack_terr <- rast(raster_stack)
# write raster stack
terra::writeRaster(raster_stack_terr, "raster_stack_terr_test.tif")
# load and check raster stack
raster_stack_check <- stack("raster_stack_terr_test.tif")
raster_stack_check
raster_stack_check[[1]]
names(raster_stack_check[[1]])
metadata(raster_stack_check[[1]])
Use terra to preseve names according to the 3rd answer from here.
When opening the RasterStack from disk, the metadata is not preserved. See console output:
> metadata(raster_stack_check[[1]])
list()
How to preserve metadata of individual layers when writing and re-loading a RasterStack object? Thanks!
It does not seem like {terra} offers an equivalent to raster::metadata(). However, from my perspective, the use cases would be limited here, because you would only be able to store structured information in corresponding format-specific tags (at least, this is my understanding) when writing to disk.
TIFF files (c.f. here) seem to offer the following tags:
TIFFTAG_DOCUMENTNAME
TIFFTAG_IMAGEDESCRIPTION
TIFFTAG_SOFTWARE
TIFFTAG_DATETIME
TIFFTAG_ARTIST
TIFFTAG_HOSTCOMPUTER
TIFFTAG_COPYRIGHT
TIFFTAG_XRESOLUTION
TIFFTAG_YRESOLUTION
TIFFTAG_RESOLUTIONUNIT
ESRI-Grids, on the other hand, do not offer any possibilities to store metadata except for the known header and maybe the filename as far as I know.
If you only wanted to store certain metadata with your raster object, you might as well make use of attr(r, "meta") <- "foobar". However, I don't see how this (random) information can be stored in specific formats and restored afterwards.
You already noticed names() when using {terra}, but there is also time() to be mentioned. Maybe this already suits your needs, since you did not specify what exactly you intend to store.
# set up a raster stack with three layers
library(terra)
#> terra 1.6.17
# create raster
r <- rast(nrows = 10, ncols = 10)
values(r) <- rnorm(100)
# set metadata
names(r) <- "foo"
time(r) <- as.Date("2000-01-01")
attr(r, "meta") <- "bar"
# inspect
r
#> class : SpatRaster
#> dimensions : 10, 10, 1 (nrow, ncol, nlyr)
#> resolution : 36, 18 (x, y)
#> extent : -180, 180, -90, 90 (xmin, xmax, ymin, ymax)
#> coord. ref. : lon/lat WGS 84
#> source : memory
#> name : foo
#> min value : -2.503790
#> max value : 1.998731
#> time (days) : 2000-01-01
# write to disk
writeRaster(r, "sample.tif", overwrite = TRUE)
# read from disk
r2 <- rast("sample.tif")
r2
#> class : SpatRaster
#> dimensions : 10, 10, 1 (nrow, ncol, nlyr)
#> resolution : 36, 18 (x, y)
#> extent : -180, 180, -90, 90 (xmin, xmax, ymin, ymax)
#> coord. ref. : lon/lat WGS 84 (EPSG:4326)
#> source : sample.tif
#> name : foo
#> min value : -2.503790
#> max value : 1.998731
#> time (days) : 2000-01-01
# try to access attributes
attr(r2, "meta")
#> NULL
As expected, data stored as attribute has been lost whereas information provided via names() and time() was sustained.

stack and brick function error despite all of the rasters have been

Good day everyone..
I have 13 bioclimatic variables (in .tiff format) that I will used to perform sdm by using dismo package.
I followed the tutorial written by Robert J. Hijmans and Jane Elith.
However, when I tried to stack all of the variables, I got the following error
Error in .local(.Object, ...) :
Error in .rasterObjectFromFile(x, band = band, objecttype = "RasterLayer", :
Cannot create a RasterLayer object from this file.
All of my file's coordinate system, extent, and cell size have been adjusted so they are all the same..
when I tried to used the alternative brick function, I got the following error :
Error in .rasterObjectFromFile(x, objecttype = "RasterBrick", ...) :
Cannot create a RasterLayer object from this file.
In addition: There were 12 warnings (use warnings() to see them)
I used the warning() message but it was empty..
do any of you have any hints regarding what may be the cause of such errors?
i've tried to google it, but unfortunately, no answer can solve it.
Thank you in advance..
Here presented is the fraction of the transcript
#setting the workspace
setwd("D:/Riset/MaxentSelaginella/newpaperproject_part2/MakalahVI/Workspace_R")
#Loading Libraries
library("sp")
library("raster")
library("maptools")
library("rgdal")
library("dismo")
library("rJava")
#open the csv file
obs.data <- read.csv(file = "data3/Selaginella_plana.csv", sep = ",")
#open Environmental Data
files <- list.files(path = "data3/tif/", pattern = ".tif", full.names=TRUE)
#stacking all the files
predictors <- brick(files)
I guess you need to use stack instead of brick. As per brick help, in fact:
A RasterBrick is a multi-layer raster object. They are typically created from
a multi-layer (band) file; but they can also exist entirely in memory.
They are similar to a RasterStack (that can be created with stack), but processing
time should be shorter when using a RasterBrick. Yet they are less flexible as they can only point to a single file.
So, if we try to “stack” multiple files:
library(raster)
r <- raster(ncols = 100, nrows = 100, vals = 1:10000)
rfile1 <- tempfile(fileext = ".tif")
writeRaster(r, filename = rfile1)
rfile2 <- tempfile(fileext = ".tif")
writeRaster(r, filename = rfile2)
files_to_stack <- c(rfile1, rfile2)
This fails:
brick(files_to_stack)
#> Warning in if (x == "" | x == ".") {: the condition has length > 1 and only
#> the first element will be used
#> Warning in if (!start %in% c("htt", "ftp")) {: the condition has length > 1
#> and only the first element will be used
#> Warning in if (fileext %in% c(".GRD", ".GRI")) {: the condition has length
#> > 1 and only the first element will be used
#> Warning in if (!file.exists(x)) {: the condition has length > 1 and only
#> the first element will be used
.....
#> Error in .rasterObjectFromFile(x, objecttype = "RasterBrick", ...): Cannot create a RasterLayer object from this file.
While this works:
stack(files_to_stack)
#> class : RasterStack
#> dimensions : 100, 100, 10000, 2 (nrow, ncol, ncell, nlayers)
#> resolution : 3.6, 1.8 (x, y)
#> extent : -180, 180, -90, 90 (xmin, xmax, ymin, ymax)
#> coord. ref. : +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0
#> names : file46e41bcd78e3, file46e43ea75bad
#> min values : 1, 1
#> max values : 10000, 10000
If you want to have a brick to get some gain in “efficiency” in further
processing, you can save the different "layers" as a multiband tiff, and then open using brick:
rfile_multi <- tempfile(fileext = ".tif")
writeRaster(stack(files_to_stack), filename = rfile_multi)
brick(rfile_multi)
#> class : RasterBrick
#> dimensions : 100, 100, 10000, 2 (nrow, ncol, ncell, nlayers)
#> resolution : 3.6, 1.8 (x, y)
#> extent : -180, 180, -90, 90 (xmin, xmax, ymin, ymax)
#> coord. ref. : +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0
#> data source : D:\RTemp\RtmpacXztJ\file4808784f268c.tif
#> names : file4808784f268c.1, file4808784f268c.2
#> min values : 1, 1
#> max values : 10000, 10000
Created on 2018-11-10 by the reprex package (v0.2.1)

how to add average rasters within for-loop that creates the rasters? R

I have several directories with 700+ binary encoded rasters that i take average the output rasters per directory. however, i currently create the rasters 1 by 1 in a for loop, then load newly created rasters back into R to take the sum to obtain the monthly rainfall total.
However, since I dont need the individual rasters, only the average raster, I have a hunch that I could do this all w/in 1 loop and not save the rasters but just the output average raster, but I am coming up short in how to program this in R.
setwd("~/Desktop/CMORPH/Levant-Clip/200001")
dir.output <- '~/Desktop/CMORPH/Levant-Clip/200001' ### change as needed to give output location
path <- list.files("~/Desktop/CMORPH/MonthlyCMORPH/200001",pattern="*.bz2", full.names=T, recursive=T)
for (i in 1:length(path)) {
files = bzfile(path[i], "rb")
data <- readBin(files,what="double",endian = "little", n = 4948*1649, size=4) #Mode of the vector to be read
data[data == -999] <- NA #covert missing data from -999(CMORPH notation) to NAs
y<-matrix((data=data), ncol=1649, nrow=4948)
r <- raster(y)
e <- extent(-180, 180, -90, 83.6236) ### choose the extent based on the netcdf file info
tr <- t(r) #transpose
re <- setExtent(tr,extent(e)) ### set the extent to the raster
ry <- flip(re, direction = 'y')
projection(ry) <- "+proj=longlat +datum=WGS84 +ellps=WGS84"
C_Lev <- crop(ry, Levant) ### Clip to Levant
M_C_Lev<-mask(C_Lev, Levant)
writeRaster(M_C_Lev, paste(dir.output, basename(path[i]), sep = ''), format = 'GTiff', overwrite = T) ###the basename allows the file to be named the same as the original
}
#
raspath <- list.files ('~/Desktop/CMORPH/Levant-Clip/200001',pattern="*.tif", full.names=T, recursive=T)
rasstk <- stack(raspath)
sum200001<-sum(rasstk)
writeRaster(avg200001, paste(dir.output, basename(path[i]), sep = ''), format = 'GTiff', overwrite = T) ###the basename allows the file to be named the same as the original
currently, this code takes about 75 mins to execute, and I have about 120 more directories to go, and am looking for faster solutions.
thank you for all and any comments and input. best, evan
Elaborating on my previous comment, you could try:
setwd("~/Desktop/CMORPH/Levant-Clip/200001")
dir.output <- '~/Desktop/CMORPH/Levant-Clip/200001' ### change as needed to give output location
path <- list.files("~/Desktop/CMORPH/MonthlyCMORPH/200001",pattern="*.bz2", full.names=T, recursive=T)
raster_list = list()
for (i in 1:length(path)) {
files = bzfile(path[i], "rb")
data <- readBin(files,what="double",endian = "little", n = 4948*1649, size=4) #Mode of the vector to be read
data[data == -999] <- NA #covert missing data from -999(CMORPH notation) to NAs
y<-matrix((data=data), ncol=1649, nrow=4948)
r <- raster(y)
if (i == 1) {
e <- extent(-180, 180, -90, 83.6236) ### choose the extent based on the netcdf file info
}
tr <- t(r) #transpose
re <- setExtent(tr,extent(e)) ### set the extent to the raster
ry <- flip(re, direction = 'y')
projection(ry) <- "+proj=longlat +datum=WGS84 +ellps=WGS84"
C_Lev <- crop(ry, Levant) ### Clip to Levant
M_C_Lev<-mask(C_Lev, Levant)
raster_list[[i]] = M_C_Lev
}
#
rasstk <- stack(raster_list, quick = TRUE) # OR rasstk <- brick(raster_list, quick = TRUE)
avg200001<-mean(rasstk)
writeRaster(avg200001, paste(dir.output, basename(path[i]), sep = ''), format = 'GTiff', overwrite = T) ###the basename allows the file to be named the same as the original
Using the "quick" options in stack should definitely speed-up things, in particular if you have many rasters.
Another possibility is to first compute the average, and then perform the "spatial proceesing". For example:
for (i in 1:length(path)) {
files = bzfile(path[i], "rb")
data <- readBin(files,what="double",endian = "little", n = 4948*1649, size=4) #Mode of the vector to be read
data[data == -999] <- NA #covert missing data from -999(CMORPH notation) to NAs
if (i == 1) {
totdata <- data
num_nonNA <- as.numeric(!is.na(data))
} else {
totdata = rowSums(cbind(totdata,data), na.rm = TRUE)
# We have to count the number of "valid" entries so that the average is correct !
num_nonNA = rowSums(cbind(num_nonNA,as.numeric(!is.na(data))),na.rm = TRUE)
}
}
avg_data = totdata/num_nonNA # Compute the average
# Now do the "spatial" processing
y<-matrix(avg_data, ncol=1649, nrow=4948)
r <- raster(y)
e <- extent(-180, 180, -90, 83.6236) ### choose the extent based on the netcdf file info
tr <- t(r) #transpose
re <- setExtent(tr,extent(e)) ### set the extent to the raster
ry <- flip(re, direction = 'y')
projection(ry) <- "+proj=longlat +datum=WGS84 +ellps=WGS84"
C_Lev <- crop(avg_data, Levant) ### Clip to Levant
M_C_Lev<-mask(C_Lev, Levant)
writeRaster(M_C_Lev, paste(dir.output, basename(path[i]), sep = ''), format = 'GTiff', overwrite = T) ###the basename allows the file to be named the same as the original
This could be faster or slower, depending from "how much" you are cropping the original data.
HTH,
Lorenzo
I'm adding another answer to clarify and simplify things a bit, also in relation with comments in chat. The code below should do what you ask: that is, cycle over files, read the "data", compute the sum over all files and convert it to a raster with specified dimensions.
Note that for testing purposes here I substituted your cycle on file names with a simple 1 to 720 cycle, and file reading with the creation of arrays of the same length as yours filled with values from 1 to 4 and some NA !
totdata <- array(dim = 4948*1649) # Define Dummy array
for (i in 1:720) {
message("Working on file: ", i)
data <- array(rep(c(1,2,3,4),4948*1649/4), dim = 4948*1649) # Create a "fake" 4948*1649 array each time to simulate data reading
data[1:1000] <- -999 # Set some values to NA
data[data == -999] <- NA #convert missing data from -999
totdata <- rowSums(cbind(totdata, data), na.rm = T) # Let's sum the current array with the cumulative sum so far
}
# Now reshape to matrix and convertt to raster, etc.
y <- matrix(totdata, ncol=1649, nrow=4948)
r <- raster(y)
e <- extent(-180, 180, -90, 83.6236) ### choose the extent based on the netcdf file info
tr <- t(r) #transpose
re <- setExtent(tr,e) ### set the extent to the raster
ry <- flip(re, direction = 'y')
projection(ry) <- "+proj=longlat +datum=WGS84 +ellps=WGS84"
This generates a "proper" raster:
> ry
class : RasterLayer
dimensions : 1649, 4948, 8159252 (nrow, ncol, ncell)
resolution : 0.07275667, 0.1052902 (x, y)
extent : -180, 180, -90, 83.6236 (xmin, xmax, ymin, ymax)
coord. ref. : +proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0
data source : in memory
names : layer
values : 0, 2880 (min, max)
contatining the sum of the different arrays: You can notice that max value is 720 * 4 = 2880 (Only caveat: If you have cells which are always at NA, you will get 0 instead than NA)
On my laptop, this runs in about 5 minutes !
In practice:
to avoid memory problems, I am not reading in memory all the data.
Each of your arrays is more or less 64MB, so I cannot load them all
and then do the sum (unless I have 50 GB of RAM to throw away - and even in
that case it would be slow). I instead make use of the associative
propoerty of summation by computing a "cumulative" sum at each
cycle. In this way you are only working with two 8-millions arrays at
a time: the one you read from file "i", and the one that contains
the current sum.
to avoid unnecessary computations here I am summing directly the
1-dimensional arrays I get from reading the binary. You don't need
to reshape to matrix the arrays in the cycle because you can do that
on the final "summed" array which you can then convert to matrix form
I hope this will work for you and that I am not missing something obvious !
As far as I can understand, if using this approach is still slow you are having problems elsewhere (for example in data reading: on 720 files, 3 seconds spent on reading for each file means roughly 35 minutes of processing).
HTH,
Lorenzo

How to plot a spatially explicit hdf5 file? - r - raster

I have an hdf5 data file that has lat/long and a dozen of values, for example temperature. I would like to plot so I can visualize the data in a map style so with lat/long in the axis. Is there a direct way to do it? I only found a very complicated way on a blog. With netcdf data I can just use plot(ncvariable) and it will work, not with a hdf5 file.
I am using the rhdf5. If I read the h5 file and a specific value I only get a vector
> library(rhdf5)
> ncep<-h5read("CB_OL1_1979OCT.h5", "sh")
> head(ncep)
[1] 1.03953242 0.79024571 2.29503083 0.43957919 0.36909071 -0.04498866
I am attaching the file. I am not sure you would call it a 3d, it's a simple raster with x,y,value.
h5 file
It's been some time since you asked, but in case you did not found a solution yet, I'd like to provide two approaches on how to handle this.
Since your hdf5 file is not available anymore, I just picked a random file provided here for illustration.
On the one hand, you could simply use plot(grid = TRUE) from {terra} after having read your hdf5 file using rast():
library(terra)
#> terra 1.6.17
# read netcdf file
r <- rast("AMSR_E_L3_DailyOcean_V04_20020619.hdf")
# get layer names
names(r)
#> [1] "Very_low_res_sst" "Low_res_sst" "Low_res_wind" "Med_res_wind"
#> [5] "Med_res_vapor" "High_res_cloud" "RFI_angle"
# subset dataset by layer name
rs <- r[["Med_res_vapor"]]
rs
#> class : SpatRaster
#> dimensions : 720, 1440, 1 (nrow, ncol, nlyr)
#> resolution : 0.25, 0.25 (x, y)
#> extent : -180, 180, -90, 90 (xmin, xmax, ymin, ymax)
#> coord. ref. : lon/lat Unknown datum based upon the Clarke 1866 ellipsoid
#> source : AMSR_E_L3_DailyOcean_V04_20020619.hdf:GlobalGrid:Med_res_vapor
#> varname : AMSR_E_L3_DailyOcean_V04_20020619
#> name : Med_res_vapor
plot(rs, grid = TRUE)
Or you could go for the little bit more complex solution, generating graticules manually using sf::st_graticules() first, followed by making use of {tidyterra} to be able to use {ggplot2} with SpatRaster and SpatVector objects from {terra}:
library(sf)
#> Linking to GEOS 3.9.1, GDAL 3.4.3, PROJ 7.2.1; sf_use_s2() is TRUE
library(ggplot2)
library(tidyterra)
grat <- st_graticule(lon = seq(-180, 180, 30),
lat = seq(-90, 90, 30),
ndiscr = 100) |>
st_transform("epsg:4326") |>
vect()
ggplot() +
geom_spatraster(data = rs) +
geom_spatvector(data = grat, color = alpha("grey60", 0.5)) +
coord_sf(expand = FALSE) +
scale_x_continuous(breaks = seq(-180, 180, 30)) +
scale_y_continuous(breaks = seq(-90, 90, 30))
#> SpatRaster resampled to ncells = 5e+05

R: Raster mosaic from list of rasters?

I am working from the post here: How can I create raster mosaic using list of rasters? to create a raster mosaic using a list of rasters. The example in the answer given by fmark works perfectly but I get an error when I follow the steps using my own data. Not sure where I am going wrong, any help would be very much appreciated!
R version 2.15.3 (2013-03-01)
Platform: x86_64-unknown-linux-gnu (64-bit)
locale:
[1] C
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] raster_2.2-12 rgdal_0.8-10 sp_1.0-14
loaded via a namespace (and not attached):
[1] grid_2.15.3 lattice_0.20-15 tools_2.15.3
I used the function from How to iterate over a list preserving the format of the results? to generate my raster list.
ListRasters <- function(list_names) {
raster_list <- list() # initialise the list of rasters
for (i in 1:(length(list_names))){
grd_name <- list_names[i] # list_names contains all the names of the images in .grd format
raster_file <- raster(grd_name)
}
raster_list <- append(raster_list, raster_file) # update raster_list at each iteration
}
Then I generate my list names and create my raster list from them.
wgs84.tif.list <- list.files(path=mod.dir, pattern=glob2rx("*_wgs84.tif"), full.names=T,recursive=F)
list_names <- NULL
for (i in 1:length(wgs84.tif.list)) {
list_names <- c(list_names, wgs84.tif.list[i])
}
raster.list <-sapply(list_names, FUN = ListRasters)
raster.list$fun <- mean
mos <- do.call(mosaic, raster.list)
This is the error I get:
Error in function (classes, fdef, mtable) : unable to find an
inherited method for function 'mosaic' for signature '"missing",
"missing"'
My raster.list starts off like this (it contains 11 rasters):
$`/import/c/w/kbennett/MODSCAG/snow-dav.jpl.nasa.gov/modscag-historic/2002/091/MOD09GA.A2002091.h08v03.005.2007124035032snow_fraction_wgs84.tif`
class : RasterLayer
dimensions : 2400, 2400, 5760000 (nrow, ncol, ncell)
resolution : 463.3127, 463.3127 (x, y)
extent : -11119737, -10007786, 5559984, 6671935 (xmin, xmax, ymin, ymax)
coord. ref. : +proj=sinu +lon_0=0 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0
data source : /import/c/w/kbennett/MODSCAG/snow-dav.jpl.nasa.gov/modscag-historic/2002/091/MOD09GA.A2002091.h08v03.005.2007124035032snow_fraction_wgs84.tif
names : MOD09GA.A2002091.h08v03.005.2007124035032snow_fraction_wgs84
values : 0, 255 (min, max)
My rasters were not named correctly. To rectify this ran, before calling fun on it:
names(rasters.list) <- NULL
Then:
raster.list$fun <- mean
mos <- do.call(mosaic, raster.list)
To expand a bit on foo's answer. You can use sapply to create a list of RasterLayer objects.
rlist <- sapply(list_names)
Then add the names of the other arguments. The first ones are 'x' and 'y' (see ?mosaic). However it will also work if they are NULL (as their position will be used).
names(rlist)[1:2] <- c('x', 'y')
rlist$fun <- mean
rlist$na.rm <- TRUE
And now call do.call
x <- do.call(mosaic, rlist)
how about that? Im noob in R.
lista = list of rasters
mosaicar = function(lista){
raster = lista[[1]]
for (i in 2:length(lista)){
raster1 = mosaic(raster, lista[[i]], fun = max)
raster = raster1
}
return(raster)
}
As mentioned by #Bappa Das above, the provided solution does not work on terra. #moho wu did not mention the na.rm issue. It remains unclear how to pass the na.rm to terra::mosaic. If anywone has a working answer...

Resources