Mosaic fails when reading rasters from disc but not from memory - r

I ran into a weird issue when trying to make a mosaic from several hundred rasters. The satellite imagery I'm using is not perfectly aligned or shares the exact same resolution, so I followed the steps found here to resample my rasters and then mosaic them.
I started off testing on a subset of only four images and had no problem doing this (had to manually calculate the full extent since unionExtent and the newer union only allows two extent arguments):
# Reading raster files
rst <- lapply(list.files(), FUN = stack)
# Extracting individual extents
rst_ext <- lapply(rst, FUN = extent)
# Calculating full extent
xmin_rst <- c(); xmax_rst <- c(); ymin_rst <- c(); ymax_rst <- c();
for (i in 1:length(rst_ext)) {
xmin_rst <- c(xmin_rst, rst_ext[[i]]#xmin)
ymin_rst <- c(ymin_rst, rst_ext[[i]]#ymin)
xmax_rst <- c(xmax_rst, rst_ext[[i]]#xmax)
ymax_rst <- c(ymax_rst, rst_ext[[i]]#ymax)
}
full_extent <- extent(min(xmin_rst), max(xmax_rst),
min(ymin_rst), max(ymax_rst))
# Creating raster from full extent and first rasters' CRS and resolution
bounding_rst <- raster(full_extent,
crs = crs(rst[[1]]),
res = res(rst[[1]]))
# Resampling rasters to match attributes of the bounding raster
rst_resampled <- lapply(X = rst, fun = function(x) {
target_rst <- crop(bounding_rst, x)
resample(x, target_rst, method="bilinear")
})
# Creating mosaic
rst_mosaic <- do.call("mosaic", c(rst_resampled, fun = mean))
That worked out OK, but of course, I didn't want to save all those rasters in my memory since I'd run out of it. I decided to save them in a new folder and re-read them as a stack, then make the mosaic.
# Function to crop, resample and write to a new GeoTIFF
resample_write <- function(x) {
target_rst <- crop(bounding_rst, x)
x <- resample(x, target_rst, method="bilinear")
save_name <- gsub("\\.1",
"_resampled.tif",
names(x)[1]) # Modifying name of 1st band
writeRaster(x,
filename = paste("../testing_resampling/",
save_name, sep = ""),
format = "GTiff")
}
# Running the function
lapply(rst, FUN = resample_write)
# Reading resampled images
setwd("../testing_resampling/")
rst_resampled2 <- lapply(list.files(), FUN = stack)
## Making the mosaic
rst_mosaic2 <- do.call("mosaic", c(rst_resampled2, fun = mean))
This gives the following error:
> rst_mosaic2 <- do.call("mosaic", c(rst_resampled2, fun = mean))
Error in compareRaster(x, extent = FALSE, rowcol = FALSE, orig = TRUE, :
different origin
I was able to get around it by setting the increasing the tolerance argument of mosaic to 0.4 but still don't understand why rst_resampled1 and rst_resampled2 yield different mosaic results.
Comparing them both with compareRaster and cellStats tells me that they're exactly the same.

Related

Error in do.ply(i) : task 1 failed - "could not find function "%>%"" in R parallel programming

Every time I run the script it always gives me an error: Error in { : task 1 failed - "could not find function "%>%""
I already check every post on this forum and tried to apply it but no one works.
Please advise any solution.
Please note: I have only 2 cores on my PC.
My code is as follows:
library(dplyr) # For basic data manipulation
library(ncdf4) # For creating NetCDF files
library(tidync) # For easily dealing with NetCDF data
library(ggplot2) # For visualising data
library(doParallel) # For parallel processing
MHW_res_grid <- readRDS("C:/Users/SUDHANSHU KUMAR/Desktop/MTech Project/R/MHW_result.Rds")
# Function for creating arrays from data.frames
df_acast <- function(df, lon_lat){
# Force grid
res <- df %>%
right_join(lon_lat, by = c("lon", "lat")) %>%
arrange(lon, lat)
# Convert date values to integers if they are present
if(lubridate::is.Date(res[1,4])) res[,4] <- as.integer(res[,4])
# Create array
res_array <- base::array(res[,4], dim = c(length(unique(lon_lat$lon)), length(unique(lon_lat$lat))))
dimnames(res_array) <- list(lon = unique(lon_lat$lon),
lat = unique(lon_lat$lat))
return(res_array)
}
# Wrapper function for last step before data are entered into NetCDF files
df_proc <- function(df, col_choice){
# Determine the correct array dimensions
lon_step <- mean(diff(sort(unique(df$lon))))
lat_step <- mean(diff(sort(unique(df$lat))))
lon <- seq(min(df$lon), max(df$lon), by = lon_step)
lat <- seq(min(df$lat), max(df$lat), by = lat_step)
# Create full lon/lat grid
lon_lat <- expand.grid(lon = lon, lat = lat) %>%
data.frame()
# Acast only the desired column
dfa <- plyr::daply(df[c("lon", "lat", "event_no", col_choice)],
c("event_no"), df_acast, .parallel = T, lon_lat = lon_lat)
return(dfa)
}
# We must now run this function on each column of data we want to add to the NetCDF file
doParallel::registerDoParallel(cores = 2)
prep_dur <- df_proc(MHW_res_grid, "duration")
prep_max_int <- df_proc(MHW_res_grid, "intensity_max")
prep_cum_int <- df_proc(MHW_res_grid, "intensity_cumulative")
prep_peak <- df_proc(MHW_res_grid, "date_peak")

R: nel2igraph and PN.amalgamation - igraph not correctly produced

I encounter a problem with the package shp2graph. I want to use the function PN.amalgamation which works fine (see below). Afterwards, I would like to create an igraph object. Here the code fails to do that.
I can create igraph objects just fine with every non-amalgamated shp2graph object.
Here my sample code, which largely is a copy paste from the description of the package shp2graph:
library(igraph)
library(shp2graph)
data(ORN)
rtNEL<-readshpnw(ORN.nt, ELComputed=TRUE)
res.sl<-SL.extraction(rtNEL[[2]],rtNEL[[3]])
res.me<-ME.simplification(res.sl[[1]],res.sl[[2]],DegreeL=res.sl[[4]])
res.pn<-PN.amalgamation(res.me[[1]],res.me[[2]],DegreeL=res.me[[4]])
ptcoords<-Nodes.coordinates(res.pn[[1]])
plot(ORN.nt)
points(ptcoords, col="green")
plot(ORN.nt)
points(Nodes.coordinates(rtNEL[[2]]), col="red")
# igraph created from amalgamation is wrong
test <- nel2igraph(nodelist= res.pn[[1]], edgelist=res.pn[[2]], Directed = TRUE)
plot(test,vertex.size=1,edge.width=1,edge.arrow.size=0,vertex.label=NA)
# res.me is one step before amalgamation
test <- nel2igraph(nodelist= res.me[[1]], edgelist=res.me[[2]], Directed = TRUE)
plot(test,vertex.size=1,edge.width=1,edge.arrow.size=0,vertex.label=NA)
Any help is appreciated.
I have found that the bug lies somehow in the interaction with the igraph package. The issue is that the labels of the nodes create by PN.amalgamation are not continuous anymore; some are missing, since we deleted them. However, igraph somehow still tries to create them and gives then the following warning:
For anyone having the same trouble here a work-around, which re-indeces the labels.
Create your own nel2igraph function:
nel2igraph_corr <- function (nodelist, edgelist, weight = NULL, eadf = NULL, Directed = FALSE)
{
nodes <- nodelist[, 1]
Ne <- length(edgelist[, 1])
Nn <- length(nodes)
for (i in 1:Nn) {
kk <- nodelist[i,][[1]]
edgelist[which(edgelist[,c(2)]==kk),2] <- i
edgelist[which(edgelist[,c(3)]==kk),3] <- i
nodelist[i,][[1]] <- i
}
if (!is.null(weight)) {
if (length(weight) != Ne && is.numeric(weight))
stop("Please give right edge weight, which must be numeric and the same length as edges elment")
}
if (!is.null(eadf)) {
if (length(eadf[, 1]) != Ne)
stop("The eadf must be numeric and the same length as edges elment")
}
gr <- graph.edgelist(unique(edgelist[, c(2, 3)]), directed = T)
gr <- set.vertex.attribute(gr, "x", V(gr), Nodes.coordinates(nodelist)[,1])
gr <- set.vertex.attribute(gr, "y", V(gr), Nodes.coordinates(nodelist)[,
2])
gr.es <- E(gr)
if (!is.null(weight))
gr <- set.edge.attribute(gr, "weight", gr.es, weight)
if (!is.null(eadf)) {
eanms <- colnames(eadf)
n <- length(eanms)
for (i in 1:n) gr <- set.edge.attribute(gr, eanms[i],
gr.es, eadf[, i])
}
gr
}

Calculate the length of shared boundaries between multiple polygons

I have a shapefile and I want to know for each polygon what other polygons touch it. To that end I have this code:
require("rgdal")
require("rgeos")
download.file("https://www.dropbox.com/s/vbxx9dic34qwz63/Polygons.zip?dl=1", "Polygons.zip")
Shapefile <- readOGR(".","Polygons")
Touching_List <- gTouches(Shapefile, byid = TRUE, returnDense=FALSE)
Touching_DF <- setNames(stack(lapply(Touching_List, as.character)), c("TOUCHING", "ORIGIN"))
I now want to go further and understand the extent to which each polygon touch other polygons. What I am after for each row in Touching_DF is a total length/perimeter for each ORIGIN polygon and the total length that each TOUCHING polygon is touching the origin polygon. This will then allow the percentage of the shared boundary to be calculated. I can imagine the output of this would be 3 new columns in Touching_DF (e.g. for the first row it could be something like origin parameter 1000m, touching length 500m, shared boundary 50%). Thanks.
EDIT 1
I have applied #StatnMap's answer to my real dataset. It appears that gTouches is returning results if a polygon shares both an edge and a point. These points are causing issues because they have no length. I have modified StatnMap's portion of code to deal with it, but when it comes to creating the data frame at the end there is a mismatch between how many shared edges/vertices gTouches returns and how many edges have lengths.
Here is some code to demonstrate the problem using a sample of my actual dataset:
library(rgdal)
library(rgeos)
library(sp)
library(raster)
download.file("https://www.dropbox.com/s/hsnrdfthut6klqn/Sample.zip?dl=1", "Sample.zip")
unzip("Sample.zip")
Shapefile <- readOGR(".","Sample")
Touching_List <- gTouches(Shapefile, byid = TRUE, returnDense=FALSE)
# ---- Calculate perimeters of all polygons ----
perimeters <- sp::SpatialLinesLengths(as(Shapefile, "SpatialLines"))
# ---- All in a lapply loop ----
all.length.list <- lapply(1:length(Touching_List), function(from) {
lines <- rgeos::gIntersection(Shapefile[from,], Shapefile[Touching_List[[from]],], byid = TRUE)
if(class(lines) != "SpatialLines"){lines <- lines#lineobj}
l_lines <- sp::SpatialLinesLengths(lines, longlat=FALSE)
results <- data.frame(origin = from,
perimeter = perimeters[from],
touching = Touching_List[[from]],
t.length = l_lines,
t.pc = 100*l_lines/perimeters[from])
results
})
This specifically shows the issue for one of the polygons:
from <- 4
lines <- rgeos::gIntersection(Shapefile[from,], Shapefile[Touching_List[[from]],], byid = TRUE)
if(class(lines) != "SpatialLines"){lines <- lines#lineobj}
l_lines <- sp::SpatialLinesLengths(lines, longlat=FALSE)
plot(Shapefile[c(from, Touching_List[[from]]),])
plot(lines, add = TRUE, col = 1 + 1:length(Touching_List[[from]]), lwd = 2)
The two possible solutions I see are 1. getting gTouches to return only shared edges with a length greater than zero or 2. returning a length of zero (rather than error) when a point rather than an edge is encountered. So far I can't find anything that will do either of these things.
EDIT 2
#StatnMap's revised solution works great. However, if a polygon does not share a snapped boarder with its neighbouring polygon (i.e. it goes to a point and then creates an island slither polygon) then it comes up with this error after lines <- rgeos::gIntersection(Shapefile[from,], Shapefile[Touching_List[[from]],], byid = TRUE)
Error in RGEOSBinTopoFunc(spgeom1, spgeom2, byid, id, drop_lower_td, unaryUnion_if_byid_false, :
Geometry collections may not contain other geometry collections
I have been looking for a solution that is able to identify polygons with badly drawn borders and not perform any calculations and return 'NA' in res (so they can still be identified later). However, I have been unable to find a command that distinguishes these problematic polygons from 'normal' polygons.
Running #StatnMap's revised solution with these 8 polygons demonstrates the issue:
download.file("https://www.dropbox.com/s/ttg2mi2nq1gbbrq/Bad_Polygon.zip?dl=1", "Bad_Polygon.zip")
unzip("Bad_Polygon.zip")
Shapefile <- readOGR(".","Bad_Polygon")
The intersection of two polygons only touching themselves is a line. Calculating a line length is easy with functions of spatial libraries in R.
As you started your example with library sp, you'll find a proposition with this library. However, I also give you a proposition with the new library sf.
Calculate polygons shared boundaries lengths with library sp
require("rgdal")
require("rgeos")
library(sp)
library(raster)
download.file("https://www.dropbox.com/s/vbxx9dic34qwz63/Polygons.zip?dl=1", "Polygons.zip")
unzip("Polygons.zip")
Shapefile <- readOGR(".","Polygons")
Touching_List <- gTouches(Shapefile, byid = TRUE, returnDense=FALSE)
# Touching_DF <- setNames(utils::stack(lapply(Touching_List, as.character)), c("TOUCHING", "ORIGIN"))
# ---- Calculate perimeters of all polygons ----
perimeters <- sp::SpatialLinesLengths(as(Shapefile, "SpatialLines"))
# ---- Example with the first object of the list and first neighbor ----
from <- 1
to <- 1
line <- rgeos::gIntersection(Shapefile[from,], Shapefile[Touching_List[[from]][to],])
l_line <- sp::SpatialLinesLengths(line)
plot(Shapefile[c(from, Touching_List[[from]][to]),])
plot(line, add = TRUE, col = "red", lwd = 2)
# ---- Example with the first object of the list and all neighbors ----
from <- 1
lines <- rgeos::gIntersection(Shapefile[from,], Shapefile[Touching_List[[from]],], byid = TRUE)
l_lines <- sp::SpatialLinesLengths(lines)
plot(Shapefile[c(from, Touching_List[[from]]),])
plot(lines, add = TRUE, col = 1 + 1:length(Touching_List[[from]]), lwd = 2)
# ---- All in a lapply loop ----
all.length.list <- lapply(1:length(Touching_List), function(from) {
lines <- rgeos::gIntersection(Shapefile[from,], Shapefile[Touching_List[[from]],], byid = TRUE)
l_lines <- sp::SpatialLinesLengths(lines)
res <- data.frame(origin = from,
perimeter = perimeters[from],
touching = Touching_List[[from]],
t.length = l_lines,
t.pc = 100*l_lines/perimeters[from])
res
})
# ---- Retrieve as a dataframe ----
all.length.df <- do.call("rbind", all.length.list)
In the table above, t.length is the touching length and t.pc is the touching percentage with regards to the perimeter of the polygon of origin.
Edit: Some shared boundaries are points (with sp)
As commented, some frontiers may be a unique point instead of lines. To account for this case, I suggest to double the coordinates of the point to create a line of length=0. This requires to calculate intersections with other polygons one by one, when this case appear.
For a single polygon, we can test this:
# Example with the first object of the list and all neighbours
from <- 4
lines <- rgeos::gIntersection(Shapefile[from,], Shapefile[Touching_List[[from]],], byid = TRUE)
# If lines and points, need to do it one by one to find the point
if (class(lines) == "SpatialCollections") {
list.Lines <- lapply(1:length(Touching_List[[from]]), function(to) {
line.single <- rgeos::gIntersection(Shapefile[from,], Shapefile[Touching_List[[from]][to],])
if (class(line.single) == "SpatialPoints") {
# Double the point to create a line
L1 <- rbind(line.single#coords, line.single#coords)
rownames(L1) <- letters[1:2]
Sl1 <- Line(L1)
Lines.single <- Lines(list(Sl1), ID = as.character(to))
} else if (class(line.single) == "SpatialLines") {
Lines.single <- line.single#lines[[1]]
Lines.single#ID <- as.character(to)
}
Lines.single
})
lines <- SpatialLines(list.Lines)
}
l_lines <- sp::SpatialLinesLengths(lines)
plot(Shapefile[c(from, Touching_List[[from]]),])
plot(lines, add = TRUE, col = 1 + 1:length(Touching_List[[from]]), lwd = 2)
For all in a lapply loop:
# Corrected for point outputs: All in a lapply loop
all.length.list <- lapply(1:length(Touching_List), function(from) {
lines <- rgeos::gIntersection(Shapefile[from,], Shapefile[Touching_List[[from]],], byid = TRUE)
if (class(lines) == "SpatialCollections") {
list.Lines <- lapply(1:length(Touching_List[[from]]), function(to) {
line.single <- rgeos::gIntersection(Shapefile[from,], Shapefile[Touching_List[[from]][to],])
if (class(line.single) == "SpatialPoints") {
# Double the point to create a line
L1 <- rbind(line.single#coords, line.single#coords)
rownames(L1) <- letters[1:2]
Sl1 <- Line(L1)
Lines.single <- Lines(list(Sl1), ID = as.character(to))
} else if (class(line.single) == "SpatialLines") {
Lines.single <- line.single#lines[[1]]
Lines.single#ID <- as.character(to)
}
Lines.single
})
lines <- SpatialLines(list.Lines)
}
l_lines <- sp::SpatialLinesLengths(lines)
res <- data.frame(origin = from,
perimeter = perimeters[from],
touching = Touching_List[[from]],
t.length = l_lines,
t.pc = 100*l_lines/perimeters[from])
res
})
all.length.df <- do.call("rbind", all.length.list)
This may also be applied with library sf, but as you apparently chose to work with sp, I won't update the code for this part. Maybe later...
---- End of Edit ----
Calculate polygons shared boundaries lengths with library sf
Figures and outputs are the same.
library(sf)
Shapefile.sf <- st_read(".","Polygons")
# ---- Touching list ----
Touching_List <- st_touches(Shapefile.sf)
# ---- Polygons perimeters ----
perimeters <- st_length(Shapefile.sf)
# ---- Example with the first object of the list and first neighbour ----
from <- 1
to <- 1
line <- st_intersection(Shapefile.sf[from,], Shapefile.sf[Touching_List[[from]][to],])
l_line <- st_length(line)
plot(Shapefile.sf[c(from, Touching_List[[from]][to]),])
plot(line, add = TRUE, col = "red", lwd = 2)
# ---- Example with the first object of the list and all neighbours ----
from <- 1
lines <- st_intersection(Shapefile.sf[from,], Shapefile.sf[Touching_List[[from]],])
lines <- st_cast(lines) # In case of multiple geometries (ex. from=71)
l_lines <- st_length(lines)
plot(Shapefile.sf[c(from, Touching_List[[from]]),])
plot(lines, add = TRUE, col = 1:length(Touching_List[[from]]), lwd = 2)
# ---- All in a lapply loop ----
all.length.list <- lapply(1:length(Touching_List), function(from) {
lines <- st_intersection(Shapefile.sf[from,], Shapefile.sf[Touching_List[[from]],])
lines <- st_cast(lines) # In case of multiple geometries
l_lines <- st_length(lines)
res <- data.frame(origin = from,
perimeter = as.vector(perimeters[from]),
touching = Touching_List[[from]],
t.length = as.vector(l_lines),
t.pc = as.vector(100*l_lines/perimeters[from]))
res
})
# ---- Retrieve as dataframe ----
all.length.df <- do.call("rbind", all.length.list)
Just to add to Sébastien Rochette answer, I think function st_length from sfpackage does not work with polygons (see this post). Instead I suggest using function st_perimeter in lwgeom package.
(I wanted to comment the answer but I don't have enough reputation)

how to add average rasters within for-loop that creates the rasters? R

I have several directories with 700+ binary encoded rasters that i take average the output rasters per directory. however, i currently create the rasters 1 by 1 in a for loop, then load newly created rasters back into R to take the sum to obtain the monthly rainfall total.
However, since I dont need the individual rasters, only the average raster, I have a hunch that I could do this all w/in 1 loop and not save the rasters but just the output average raster, but I am coming up short in how to program this in R.
setwd("~/Desktop/CMORPH/Levant-Clip/200001")
dir.output <- '~/Desktop/CMORPH/Levant-Clip/200001' ### change as needed to give output location
path <- list.files("~/Desktop/CMORPH/MonthlyCMORPH/200001",pattern="*.bz2", full.names=T, recursive=T)
for (i in 1:length(path)) {
files = bzfile(path[i], "rb")
data <- readBin(files,what="double",endian = "little", n = 4948*1649, size=4) #Mode of the vector to be read
data[data == -999] <- NA #covert missing data from -999(CMORPH notation) to NAs
y<-matrix((data=data), ncol=1649, nrow=4948)
r <- raster(y)
e <- extent(-180, 180, -90, 83.6236) ### choose the extent based on the netcdf file info
tr <- t(r) #transpose
re <- setExtent(tr,extent(e)) ### set the extent to the raster
ry <- flip(re, direction = 'y')
projection(ry) <- "+proj=longlat +datum=WGS84 +ellps=WGS84"
C_Lev <- crop(ry, Levant) ### Clip to Levant
M_C_Lev<-mask(C_Lev, Levant)
writeRaster(M_C_Lev, paste(dir.output, basename(path[i]), sep = ''), format = 'GTiff', overwrite = T) ###the basename allows the file to be named the same as the original
}
#
raspath <- list.files ('~/Desktop/CMORPH/Levant-Clip/200001',pattern="*.tif", full.names=T, recursive=T)
rasstk <- stack(raspath)
sum200001<-sum(rasstk)
writeRaster(avg200001, paste(dir.output, basename(path[i]), sep = ''), format = 'GTiff', overwrite = T) ###the basename allows the file to be named the same as the original
currently, this code takes about 75 mins to execute, and I have about 120 more directories to go, and am looking for faster solutions.
thank you for all and any comments and input. best, evan
Elaborating on my previous comment, you could try:
setwd("~/Desktop/CMORPH/Levant-Clip/200001")
dir.output <- '~/Desktop/CMORPH/Levant-Clip/200001' ### change as needed to give output location
path <- list.files("~/Desktop/CMORPH/MonthlyCMORPH/200001",pattern="*.bz2", full.names=T, recursive=T)
raster_list = list()
for (i in 1:length(path)) {
files = bzfile(path[i], "rb")
data <- readBin(files,what="double",endian = "little", n = 4948*1649, size=4) #Mode of the vector to be read
data[data == -999] <- NA #covert missing data from -999(CMORPH notation) to NAs
y<-matrix((data=data), ncol=1649, nrow=4948)
r <- raster(y)
if (i == 1) {
e <- extent(-180, 180, -90, 83.6236) ### choose the extent based on the netcdf file info
}
tr <- t(r) #transpose
re <- setExtent(tr,extent(e)) ### set the extent to the raster
ry <- flip(re, direction = 'y')
projection(ry) <- "+proj=longlat +datum=WGS84 +ellps=WGS84"
C_Lev <- crop(ry, Levant) ### Clip to Levant
M_C_Lev<-mask(C_Lev, Levant)
raster_list[[i]] = M_C_Lev
}
#
rasstk <- stack(raster_list, quick = TRUE) # OR rasstk <- brick(raster_list, quick = TRUE)
avg200001<-mean(rasstk)
writeRaster(avg200001, paste(dir.output, basename(path[i]), sep = ''), format = 'GTiff', overwrite = T) ###the basename allows the file to be named the same as the original
Using the "quick" options in stack should definitely speed-up things, in particular if you have many rasters.
Another possibility is to first compute the average, and then perform the "spatial proceesing". For example:
for (i in 1:length(path)) {
files = bzfile(path[i], "rb")
data <- readBin(files,what="double",endian = "little", n = 4948*1649, size=4) #Mode of the vector to be read
data[data == -999] <- NA #covert missing data from -999(CMORPH notation) to NAs
if (i == 1) {
totdata <- data
num_nonNA <- as.numeric(!is.na(data))
} else {
totdata = rowSums(cbind(totdata,data), na.rm = TRUE)
# We have to count the number of "valid" entries so that the average is correct !
num_nonNA = rowSums(cbind(num_nonNA,as.numeric(!is.na(data))),na.rm = TRUE)
}
}
avg_data = totdata/num_nonNA # Compute the average
# Now do the "spatial" processing
y<-matrix(avg_data, ncol=1649, nrow=4948)
r <- raster(y)
e <- extent(-180, 180, -90, 83.6236) ### choose the extent based on the netcdf file info
tr <- t(r) #transpose
re <- setExtent(tr,extent(e)) ### set the extent to the raster
ry <- flip(re, direction = 'y')
projection(ry) <- "+proj=longlat +datum=WGS84 +ellps=WGS84"
C_Lev <- crop(avg_data, Levant) ### Clip to Levant
M_C_Lev<-mask(C_Lev, Levant)
writeRaster(M_C_Lev, paste(dir.output, basename(path[i]), sep = ''), format = 'GTiff', overwrite = T) ###the basename allows the file to be named the same as the original
This could be faster or slower, depending from "how much" you are cropping the original data.
HTH,
Lorenzo
I'm adding another answer to clarify and simplify things a bit, also in relation with comments in chat. The code below should do what you ask: that is, cycle over files, read the "data", compute the sum over all files and convert it to a raster with specified dimensions.
Note that for testing purposes here I substituted your cycle on file names with a simple 1 to 720 cycle, and file reading with the creation of arrays of the same length as yours filled with values from 1 to 4 and some NA !
totdata <- array(dim = 4948*1649) # Define Dummy array
for (i in 1:720) {
message("Working on file: ", i)
data <- array(rep(c(1,2,3,4),4948*1649/4), dim = 4948*1649) # Create a "fake" 4948*1649 array each time to simulate data reading
data[1:1000] <- -999 # Set some values to NA
data[data == -999] <- NA #convert missing data from -999
totdata <- rowSums(cbind(totdata, data), na.rm = T) # Let's sum the current array with the cumulative sum so far
}
# Now reshape to matrix and convertt to raster, etc.
y <- matrix(totdata, ncol=1649, nrow=4948)
r <- raster(y)
e <- extent(-180, 180, -90, 83.6236) ### choose the extent based on the netcdf file info
tr <- t(r) #transpose
re <- setExtent(tr,e) ### set the extent to the raster
ry <- flip(re, direction = 'y')
projection(ry) <- "+proj=longlat +datum=WGS84 +ellps=WGS84"
This generates a "proper" raster:
> ry
class : RasterLayer
dimensions : 1649, 4948, 8159252 (nrow, ncol, ncell)
resolution : 0.07275667, 0.1052902 (x, y)
extent : -180, 180, -90, 83.6236 (xmin, xmax, ymin, ymax)
coord. ref. : +proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0
data source : in memory
names : layer
values : 0, 2880 (min, max)
contatining the sum of the different arrays: You can notice that max value is 720 * 4 = 2880 (Only caveat: If you have cells which are always at NA, you will get 0 instead than NA)
On my laptop, this runs in about 5 minutes !
In practice:
to avoid memory problems, I am not reading in memory all the data.
Each of your arrays is more or less 64MB, so I cannot load them all
and then do the sum (unless I have 50 GB of RAM to throw away - and even in
that case it would be slow). I instead make use of the associative
propoerty of summation by computing a "cumulative" sum at each
cycle. In this way you are only working with two 8-millions arrays at
a time: the one you read from file "i", and the one that contains
the current sum.
to avoid unnecessary computations here I am summing directly the
1-dimensional arrays I get from reading the binary. You don't need
to reshape to matrix the arrays in the cycle because you can do that
on the final "summed" array which you can then convert to matrix form
I hope this will work for you and that I am not missing something obvious !
As far as I can understand, if using this approach is still slow you are having problems elsewhere (for example in data reading: on 720 files, 3 seconds spent on reading for each file means roughly 35 minutes of processing).
HTH,
Lorenzo

How to use a script on multiple excel working sheets in r studio

I have a rather long script, at least for me, but i'm new in R.
It looks like this
#DATA IS LOADED
library(xlsx)
df=read.xlsx2("/Users/emiliemariafalkkallenbach/Documents/Norsk_Institut_for_vandforskning/Chase_DK.xlsx",sheetIndex=1)
df[df==""]<- NA
df=data.matrix(df)
df=data.frame(df)
attach(df)
Phyto=df[,1:6]
Submerged=df[,7:12]
Benthic=df[13:18]
# PHTYOPLANKTON
#making of boundaries
a<-(ifelse(P_Resp<=0,(1-P_Acdev),1/(1+P_Acdev)))
b=(0.95+a)/2
c=2*a-b
d=2*c-a
#Weighted boundaries
e=0.95*P_weight
e=sum(e)
f=b*P_weight
f=sum(f)
g=a*P_weight
g=sum(g)
h=c*P_weight
g=sum(g)
i=d*P_weight
i=sum(i)
#EQR weighted
j=P_EQR*P_weight
j=sum(j)
#complete
l <- ifelse(is.na(rowSums(Phyto)),1,0)
m=sum(l)
#SUBMERGED VEGETATION
#Overall assesment
y=ifelse(m<=0,j,"")
z=ifelse(y!="NA",0,(ifelse(y>f,1,(ifelse(y>g,2,(ifelse(y>h,3,(ifelse(y>i,4,5)))))))))
#making of boundaries
a.sub<-(ifelse(S_Resp<=0,(1-S_Acdev),1/(1+S_Acdev)))
a.sub=sum(a.sub)
b.sub=(0.95+a.sub)/2
b.sub=sum(b.sub)
c.sub=2*a.sub-b.sub
c.sub=sum(c.sub)
d.sub=2*c.sub-a.sub
d.sub=sum(d.sub)
#Weighted boundaries
e.sub=0.95*S_weight
e.sub=sum(e)
f.sub=b.sub*S_weight
f.sub=sum(f)
g.sub=a.sub*S_weight
g.sub=sum(g)
h.sub=c.sub*S_weight
h.sub=sum(h)
i.sub=d.sub*S_weight
i.sub=sum(i)
#EQR.sub weighted
j.sub=S_EQR*S_weight
j.sub=sum(j.sub)
#complete.sub
l.sub <- ifelse(is.na(rowSums(Submerged)),1,0)
m.sub=sum(l.sub)
#Overall Assesment
q.sub=m.sub*0.75
y.sub=ifelse(m.sub<=0,j.sub,"")
z.sub=ifelse(y.sub!="NA",(ifelse(y.sub>f.sub,1,(ifelse(y.sub>g.sub,2,(ifelse(y.sub>h.sub,3,(ifelse(y.sub>i.sub,4,5)))))))),0)
BENTHIC INVERTEBRATES
#making of boundaries
a.ben<-(ifelse(B_Resp<=0,(1-B_Acdev),1/(1+B_Acdev)))
b.ben=(0.95+a.ben)/2
c.ben=2*a.ben-b.ben
d.ben=2*c.ben-a.ben
#Weighted boundaries
e.ben=0.95*B_weight
e.ben=sum(e.ben)
f.ben=b.ben*B_weight
f.ben=sum(f.ben)
g.ben=a.ben*B_weight
g.ben=(sum(g.ben))
h.ben=c.ben*B_weight
h.ben=sum(h.ben)
i.ben=d.ben*B_weight
i.ben=sum(i.ben)
#EQR weighted
j.ben=B_EQR*B_weight
#Complete
l.ben <- ifelse(is.na(rowSums(Benthic)),1,0)
m.ben=sum(l.sub)
#ChkAccDev
n.ben=ifelse(B_Resp>0,0.53,1.1)
o.ben=ifelse(B_Acdev<0.15,-1,0)
p.ben=ifelse(B_Acdev>n.ben,1,o.ben)
#Overall Assesment
q.ben=m.ben*0.75
y.ben=ifelse(m.ben<1,j.ben,"")
z.ben=ifelse(y.ben!="NA",ifelse(y.ben>f.ben,1,(ifelse(y.ben>g.ben,2,(ifelse(y.ben>h.ben,3,(ifelse(y.ben>i.ben,4,5))))))),0)
#Final assesment
Z=max(na.omit(c(z,z.sub,z.ben)))
#Overall assesment
SCORE=(ifelse(Z<=1 && Z<2,"HIGH",(ifelse(Z>=2 && Z<3,"GOOD",(ifelse(Z>=3 && Z<4,"MODERATE",(ifelse(Z>=4 && Z<5,"BAD",(ifelse(Z>=5,"POOR","NA"))))))))))
SCORE
#Pie Chart
library('plotrix')
Phyto=33.3
Submerged=33.3
Benthic=33.3
total=100
Slices1=c=(1)
total=100
iniR=0.2
Slices1=c=(1)
pie(1, radius=iniR, init.angle=90, col=c('white'), border = NA, labels='Overall')
colors=c(ifelse(z<=1,"blue", ifelse(z<=2,"green",ifelse(z<=3,"yellow",ifelse(z<=4,"orange",ifelse(z<=5,"red","red"))))), ifelse(z.sub<=1,"blue", ifelse(z.sub<=2,"green", ifelse(z.sub<=3,"yellow",ifelse(z.sub<=4,"orange",ifelse(z.sub<=5,"red","red"))))),ifelse(z.ben<=1,"blue", ifelse(z.ben<=2,"green", ifelse(z.ben<=3, "yellow", ifelse(z.ben<=4,"orange",ifelse(z.ben<=5,"red","red"))))))
floating.pie(0,0,c(Phyto, Submerged, Benthic),radius=5*iniR, startpos=pi/2, col=colors,border=NA)
Slices1=c=(1)
total=100
iniR=0.2 # initial radius
colorst=c(ifelse(Z<=1,"blue", ifelse(Z<=2,"green",ifelse(Z<=3,"yellow",ifelse(Z<=4,"orange",ifelse(Z<=5,"red","red"))))))
floating.pie(0,0,c(total),radius=3*iniR, startpos=pi/2, col=colorst,border=NA)
angles=as.numeric(c(-10,75,80))
pie.labels(0,0,angles,c("Phyto","Submerged","Benthic"),radius=1, bg="white")
pie.labels(0,0,0,c("Overall Assessment"),radius=-0.3)
I guess it does not matter, what is into my script.
At the moment it only runs the first sheet in excel, but I have several and would like to run them on all of them.
The outcome should be a table looking like this
z z.sub z.ben Z Pie-chart (only if possible)
Sheet 1 0 NA NA High
Sheet 2 ... ... ... ...
Sheet 3 ... ... ... ...
I'm sorry if this is an ordinary question!
Hope someone is able to help
Thanks!
a better way to read xls is the library readxl.
# remove "#" if you don't have these libraries installed already
# install.packages("readxl") # faster excel reader
# install.packages("data.table") # faster everything, in this case rbindlist
library(readxl)
library(data.table)
sheets = 1:5 # index numbers or names of the sheets you want to read
readmysheets = function(sheets) {
df = read_excel(file="myexcel.xls", sheets)
}
myfiles = lapply(sheets, readmysheets) # apply the indices/names on the readmysheets function
# you now have a list:
str(myfiles)
# bind the separate sheets together
together = rbindlist(myfiles, fill = T)
Wrap the body of your script into a "for" loop. You have several was of doing this, here are two.
# pre-allocate an object to write to
out <- matrix(rep(NA, numsheets * numcols), ncols = numcols))
for (i in 1:nsheets) {
df=read.xlsx2(".../Chase_DK.xlsx", sheetIndex = i)
#... do calculations
out[, i] <- c(z, z.sub, z.ben, Z)
}
# second way, no need to pre-allocate anything
sapply(1:nsheets, FUN = function(x) {
df=read.xlsx2(".../Chase_DK.xlsx", sheetIndex = x)
#... do calculations
out <- c(z, z.sub, z.ben, Z) # specify what you wish the function to return
return(out) # sapply will try to simplify the combined result on its own
})

Resources