I have a list of raster files in GeoTIFF files, and I've like to remove each file and if one has more than 50% of NA values.
I wanna delete it from the list my new list (f2). In my code:
library(raster)
# example data
r <- raster(ncol=10, nrow=10)
set.seed(0)
# 10 layers
s <- stack(lapply(1:10, function(i) setValues(r, runif(ncell(r)))))
# set about half the values to NA
s[s < .5] <- NA
#Create GeoTIFF for each layer
sl<-1:10
for (i in 1:length(sl)){
writeRaster(s[[i]],filename=paste(sl[i],sep=""),
format="GTiff",datatype="FLT4S",overwrite=TRUE)
}
#Take images in batch
f <- list.files(getwd(), pattern = ".tif")
ras <- lapply(f,raster)
#Remove from my list when I have more than 50% of cells that are NA
class <- vector()
for (j in 1:length(ras)){
i <- cellStats(is.na(ras[[j]]), sum) # count the NA values in each layer
i <- i/ncell(ras[[j]]) # fraction that is NA
ss <- ras[[j]][[which(i>.5)]] # Select the layers that more than half the cells with values
class<-c(class,ss)
}
Here, I have my problem, the output class has all my images and not the layers that more than 50% of the cells with values
I've like to apply this conditon for:
#Remove target images
f2 <- list.files(getwd(), pattern = ".tif")
f2<- f[f!=class]
ras2 <- lapply(f2,raster)
Your example data
library(raster)
r <- raster(ncol=10, nrow=10)
set.seed(0)
s <- stack(lapply(1:10, function(i) setValues(r, runif(ncell(r)))))
s[s < .5] <- NA
# I skip the file writing bit. But from these files I would make a RasterStack again
#f <- list.files(getwd(), pattern = ".tif")
#ras <- stack(f)
Count the number of cells with NA and divide by the number of cells
f <- freq(s, value=NA) / ncell(s)
# equivalent to cellStats(is.na(s), "mean")
i <- which(f <= 0.5)
i
#layer.2 layer.6 layer.7
# 2 6 7
use these indices to subset the RasterStack
ss <- s[[i]]
If you cannot make a RasterStack from your real data (perhaps the rasters do not align), you can use a list and a loop like this
ras <- as.list(s)
result <- rep(NA, length(ras))
for (i in 1:length(ras)){
result[i] <- freq(ras[[i]], value=NA) / ncell(ras[[i]])
}
# equivalent to
# result <- sapply(1:length(ras), function(i) freq(ras[[i]], value=NA) / ncell(ras[[i]]))
j <- result < 0.5
sras <- ras[j]
Related
Given:
library(terra)
r <- rast(nrows=512, ncols=640,nlyrs=1, res=1,crs="local",extent=c(0,640,0,512))
values(r) <- rnorm(ncell(r),100,10)
x <- c(r, r*2, r*3, r*0.5)
plotRGB(x, stretch="lin")
and
weirds <- data.frame(bands=c(1,3), mincol=c(5,100), maxcol=c(7,103))
weirds
> weirds
bands mincol maxcol
1 5 7
3 100 103
I have to set as NA those cells within mincol and maxcol in weirds for the given bands.
I achieve in this way:
y <- x
for (i in 1:nrow(weirds)) {
y[[weirds$bands[i]]][,weirds$mincol[i]:weirds$maxcol[i]] <- NA
}
plotRGB(y, stretch="lin")
But I am sure there are more efficient ways. Can anyone make suggestions?
With terra 1.6-24 (currently the development version) you can use indexing for layers.
Example data
library(terra)
#terra 1.6.24
r <- rast(nrows=30, ncols=30, nlyrs=1,extent=c(0,640,0,512))
values(r) <- rnorm(ncell(r), 100, 10)
x <- c(r, r*2, r*3)
w <- data.frame(bands=c(1,3), mincol=c(5,20), maxcol=c(7,23))
First approach. This is very similar to what you had, but perhaps a little cleaner.
y <- x
for (i in 1:nrow(w)) {
y[ , w$mincol[i]:w$maxcol[i], w$bands[i]] <- NA
}
plot(y)
Second approach. Using a list for one or more of the indices i (row), j (col), or k (lyr) you can do multiple replacements at once.
z <- x
cols <- lapply(1:nrow(w), function(i) seq(w[i, 2], w[i, 3]))
z[ , cols, w$bands] <- NA
plot(z, nc=3)
I would assume that this is the better approach and that you can probably directly create the cols list, instead of deriving it from w
You can install the development version of "terra" with install.packages('terra', repos='https://rspatial.r-universe.dev')
I have raster files that have the same resolution and extent but differ in the number of NA. I want to unify the number of NA between all of them. Is it possible to do it by considering a cell as non-NA if it's not NA in all the raster files?
Here an example :
library(raster)
library(terra)
f <- system.file("external/test.grd", package="raster")
r1 <- raster(f)
r2 <- calc(r1, fun=function(x){ x[x < 500] <- NA; return(x)} )
r1 <- calc(r1, fun=function(x){ x[x > 1200] <- NA; return(x)} )
raste <- rast(r1)
rNA <- terra:: global(!(is.na(raste)), sum, na.rm=TRUE)
print(paste0("Non-NA of r1", rNA))
raste <- rast(r2)
rNA <- terra:: global(!(is.na(raste)), sum, na.rm=TRUE)
print(paste0("Non-NA of r2", rNA))
I want both r1 and r2 to have the same number of non-NA cells. I have more than two rasters, so I wonder if I can do it for a large number of files.
It can be a bit confusing to use raster and terra together, so I will just use terra (but you can do the same with raster, using stack in stead of c and cellStats in stead of global.
Your example data
library(terra)
f <- system.file("external/test.grd", package="raster")
r <- rast(f)
r1 <- clamp(r, upper=1200, values=FALSE)
r2 <- clamp(r, lower=500, values=FALSE)
global(!(is.na(r1)), sum)
# sum
#lyr.1 3145
global(!(is.na(r2)), sum)
# sum
#lyr.1 802
Solution:
r <- c(r1, r2)
names(r) <- c("r1", "r2")
m <- any(is.na(r))
x <- mask(r, m, maskvalue=1)
global(!(is.na(x)), sum, na.rm=TRUE)
# sum
#r1 769
#r2 769
I like the use of any(is.na()) because it makes clear what the intent is.
But you could combine the layers in one of many other ways. As long as you do not use na.rm=TRUE the cells with an NA in one of the layers will be NA in the output. For example with sum, diff, prod, mean or app.
m <- sum(r)
x <- mask(r, m)
global(!(is.na(x)), sum, na.rm=TRUE)
Suppose I have a raster stack that each layer has data gap in. I want to use the next two layers in the stack to fill the gaps of each layer:
library(raster)
r1 <- raster(ncol=20,nrow=20, xmn=0, xmx=20, ymn=0,ymx=20)
r1[] <- 1:20
r2 <- r3 <- r4 <- r5 <- r1
set.seed(0)
r1[sample(1:ncell(r1), size = 20)] <- NA
r2[sample(1:ncell(r2), size = 30)] <- NA
r3[sample(1:ncell(r3), size = 10)] <- NA
r4[sample(1:ncell(r4), size = 18)] <- NA
r5[sample(1:ncell(r5), size = 18)] <- NA
s <- stack(r1, r2, r3, r4, r5)
In this case r2 and r3 will be used to fill the gaps in r1 and so forth.
MikeJewski's solution might work, but the cover function is designed for this and more direct. It is not clear how you want to use the next two layers. The mean:
for(i in 1:(nlayers(s) - 2) ){
s[[i]] <- cover( s[[i]], mean( s[[(i+1):(i+2)]], na.rm=TRUE))
}
Or first the closest (as MikeJewski assumed):
for(i in 1:(nlayers(s) - 2) ){
s[[i]] <- cover( s[[i]], s[[(i+1)]])
s[[i]] <- cover( s[[i]], s[[(i+2)]])
}
This would be another, but probably inefficient, approach:
f <- function(x) {
for(i in 1:((ncol(x)-2)) ){
x[is.na(x[,i]),i] <- x[is.na(x[,i]),i+1]
x[is.na(x[,i]),i] <- x[is.na(x[,i]),i+2]
}
x
}
ss <- calc(s, f)
Not sure if this is what you want, but it will give you a start. I'm still new to R, so there is probably another way to do it.
library(raster)
r1 <- raster(ncol=20,nrow=20, xmn=0, xmx=20, ymn=0,ymx=20)
r1[] <- 1:20
r2 <- r3 <- r4 <- r5 <- r1
set.seed(0)
r1[sample(1:ncell(r1), size = 20)] <- NA
r2[sample(1:ncell(r2), size = 30)] <- NA
r3[sample(1:ncell(r3), size = 10)] <- NA
r4[sample(1:ncell(r4), size = 18)] <- NA
r5[sample(1:ncell(r5), size = 18)] <- NA
s <- stack(r1, r2, r3, r4, r5)
for(i in 1:(nlayers(s) - 2) ){
s[[i]] <- merge( s[[i]], mask( s[[(i+1)]], s[[i]], inverse = TRUE))
s[[i]] <- merge( s[[i]], mask( s[[(i+2)]], s[[i]], inverse = TRUE))
}
Here is an adaptation for those who want to fill gaps with a moving window. This is using the average of the pixel values before and after the gap. The code also makes sure to fill in the gaps in the beginning and at the end of the RasterStack:
fill_gaps <- function(IMG, WINDOW=2){
# Arguments:
# IMG: RasterStack or RasterBrick containing the datacube with gaps. The empty pixels must be NA value, not 0.
# WINDOW: (integer) the number of images before and after the gap used to fill the gap. By default WINDOW=2.
# Note that the NA values will be replaced by the mean value of the images in ''window''.
# If your empty pixels are stored as 0 use: img <- img[img < 0] <- NA before applying the function.
if (!require("raster")) install.packages("raster")
library(raster)
s <- raster::stack(IMG)
for(i in 1:nlayers(s)){
if(i <= WINDOW) {
s[[i]] <- cover(s[[i]], mean(s[[WINDOW+1]]:s[[WINDOW*2]]), na.rm=TRUE)}
if(i >= (nlayers(s)-WINDOW)) {
s[[i]] <- cover(s[[i]], mean(s[[nlayers(s)-(WINDOW*2+1)]]:s[[nlayers(s)-WINDOW]]), na.rm=TRUE)}
else {
s[[i]] <- cover( s[[i]], mean(s[[(i-WINDOW):(i+WINDOW)]], na.rm=TRUE))}
}
return(s)
}
I have a raster map with 202 rows and 201 columns
there are some grids in this map which pixel values is 0
I want to write a function that return all the pixel values 0 grids' coordinate
how can I do it
I was trying to use if loop and while loop
but it always says TRUE/FALSE need
here is my sample code
library(raster)
library(rgdal)
library(maptools)
library(sp)
setwd("E:/Landsat-data-NASA atm-corrected/sample_day1")
restdir2 <- ("E:/Landsat-data-NASA atm-corrected/sample_day1")
n3 <- list.files(restdir2, pattern="*band4_clip_1.tif", full.names=TRUE)
n4 <- list.files(restdir2, pattern="*cloud_qa_clip_1.tif", full.names=TRUE)
n5 <- list.files(restdir2, pattern="*cloud.tif", full.names=TRUE)
create<- function(x,y)
{
layer <- raster(n4)
layer2 <- raster(n3)
for(c in 1:x)
{
for(r in 1:y)
{
nl<- layer2
if(layer[c,r]==0)
return layer[c,r]
}
}
}
create (10,10)
Here are two (very similar) approaches
library(raster)
# set up example data
r <- raster(nrow=18, ncol=36)
set.seed(0)
r[] <- round(runif(ncell(r)) * 10 - 5)
# approach 1, for a single layer
p <- rasterToPoints(r, fun=function(x){x == 0})
# approach 2, also works for multiple layers
# first remove all non zero cells
z <- subs(r, data.frame(0, 1))
p <- rasterToPoints(z)
# results
plot(r)
points(p[,1:2])
if you have multiple layers with the same spatial parameters (extent and resolution)
# create example data
x1 <- setValues(r, round(runif(ncell(r)) * 10 - 5))
x2 <- setValues(r, round(runif(ncell(r)) * 10 - 5))
x3 <- setValues(r, round(runif(ncell(r)) * 10 - 5))
# combine layers
s <- stack(x1, x2, x3)
z <- subs(r, data.frame(0, 1))
p <- rasterToPoints(z)
I have two raster layers that I wish to combine into one. Let's call them mask (with values 1 and NA), and vrs.
library(raster)
mask <- raster(ncol=10, nrow=10)
mask[] <- c(rep(0, 50), rep(1, 50))
mask[mask < 0.5] <- NA
vrs <-raster(ncol=10, nrow=10)
vrs[] <- rpois(100, 2)
vrs[vrs >= 4] <- NA
I wish to combine two big layers, but for the sake of understanding these small examples are ok. What I wish to do is to set the pixel values of my output layer to zero for those pixels where mask layer is 1 and vrs layer is NA. All other pixels should remain with the values of original vrs.
This is my only thought as to how:
zero.for.NA <- function(x, y, filename){
out <- raster(y)
if(canProcessInMemory(out, n = 4)) { #wild guess..
val <- getValues(y) #values
NA.pos <- which(is.na(val)) #positiones for all NA-values in values-layer
NA.t.noll.pos<-which(x[NA.pos]==1) #Positions where mask is 1 within the
#vector of positions of NA values in vrs
val[NA.pos[NA.t.noll.pos]] <- 0 #set values layer to 0 where condition met
out <- setValues(out, val)
return(out)
} else { #for large rasters the same thing by chunks
bs <- blockSize(out)
out <- writeStart(out, filename, overwrite=TRUE)
for (i in 1:bs$n) {
v <- getValues(y, row=bs$row[i], nrows=bs$nrows[i])
xv <- getValues(x, row=bs$row[i], nrows=bs$nrows[i])
NA.pos <- which(is.na(v))
NA.t.noll.pos <- which(xv[NA.pos]==1)
v[NA.pos[NA.t.noll.pos]] <- 0
out <- writeValues(out, v, bs$row[i])
}
out <- writeStop(out)
return(out)
}
}
This function did work on the small example and seems to work on the bigger ones. Is there a faster/better way of doing this? Some way that is better for larger files? I will have to use this on many sets of layers and I would appreciate any help in making the process safer and or quicker!
I'd use cover():
r <- cover(vrs, mask-1)
plot(r)
You can do this with overlay, as well:
r <- overlay(mask, vrs, fun=function(x, y) ifelse(x==1 & is.na(y), 0, y))