R apply raster function to a list of characters - r

I started recently to work with R so this question has probably a simple solution.
I have some .tif satellite images from different scenes. I can create a test raster brick with it but the process needs to be automatised because of the huge amount of files. Therefore I have been trying to create a function to read the list of .tif files and to output a list of rasters.
You can find here below the code I have been using:
# Description: Prepare a raster brick with ordered acquisitions
# from all the scenes of the study area
library(raster)
library(rgdal)
library(sp)
library(rtiff)
rm(list = ls())
setwd=getwd()
# If you want to download the .tif files of the 2 scenes from dropbox:
dl_from_dropbox <- function(x, key) {
require(RCurl)
bin <- getBinaryURL(paste0("https://dl.dropboxusercontent.com/s/", key, "/", x),
ssl.verifypeer = FALSE)
con <- file(x, open = "wb")
writeBin(bin, con)
close(con)
message(noquote(paste(x, "read into", getwd())))
}
dl_from_dropbox("lndsr.LT52210611985245CUB00-vi.NDVI.tif", "qb1bap9rghwivwy")
dl_from_dropbox("lndsr.LT52210611985309CUB00-vi.NDVI.tif", "sbhcffotirwnnc6")
dl_from_dropbox("lndsr.LT52210611987283CUB00-vi.NDVI.tif", "2zrkoo00ngigfzm")
dl_from_dropbox("lndsr.LT42240631992198XXX02-vi.NDVI.tif", "gx0ctxn2mca3u5v")
dl_from_dropbox("lndsr.LT42240631992214XXX02-vi.NDVI.tif", "pqnjw2dpz9beeo5")
dl_from_dropbox("lndsr.LT52240631986157CUB02-vi.NDVI.tif", "rrka10yaktv8la8")
# 1- Create a list of .tif files with names ordered chronologically (for time series analysis later on)
pathdir= # change
# List all the images from any scene in that folder and
# make a dataframe with a column for the date
a <- list.files(path=pathdir,pattern="lndsr.LT", all.files=FALSE,full.names=FALSE)
a1 <- as.data.frame(a, row.names=NULL, optional=FALSE, stringsAsFactors=FALSE) # class(a1$a) # character
# Create date column with julean date and order it in ascending order
a1$date <- substr(a1$a, 16, 22) # class(a1$date) = character
a1 <- a1[order(a1$date),]
# Keep only the column with the name of the scene
a1 <- subset(a1, select=1) # class(a1$a): character
# retrieve an ordered list from the dataframe
ord_dates <- as.list(as.data.frame(t(a1$a))) # length(ord_dates): 4 (correct)
# class(odd_dates) # list
# 2- Create rasters from elements of a list
for (i in 1:(length(ord_dates))){
# Point to each individual .tif file
tif_file <- ord_dates[i] # Problem: accesses only the first item of ord_dates
# Make a raster out of it
r <- raster(tif_file) # we cant use here a list as an input. Gives error:
# Error in .local(x, ...) : list has no "x"
# Give it a standardised name (r1,r2,r3, etc)
name <- paste("r", 1:length(ord_dates),sep = "")
# Write the raster to file
writeRaster (r , filename = name,format = "GTiff", overwrite =T )
}
I have also tried to use lapply() without much success.
r = lapply(ord_dates, raster)
Can you give me an advice on what concept to follow? I am guessing I should be using matrices but I don't really understand which are their advantages here or in what step they are required.
Any help is really appreciated!
Thanks in advance

Assuming ord_dates is a list of file names (that have full path or are in your getwd()), you can apply a (any) function to this list using lapply. I haven't tested this, unfortunately.
convertAllToRaster <- function(tif_file) {
r <- raster(tif_file)
# Give it a standardised name (r1,r2,r3, etc)
name <- paste("r", 1:length(ord_dates),sep = "")
# Write the raster to file
writeRaster (r , filename = name,format = "GTiff", overwrite =T )
message("Eeee, maybe it was written successfully.")
}
lapply(ord_dates, FUN = convertAllToRaster)

After solving the issues with factors and with the name, this is the code that worked for me. I added a for loop also inside the function you proposed, Roman. Thankyou very much for your kind help!!
convertAllToRaster <- function(ord_dates) {
for (i in 1:(length(ord_dates))){
tif_file <- ord_dates[i]
r <- raster(tif_file)
# Keep the original name
name <- paste(tif_file, ".grd", sep ="")
# Write the raster to file
writeRaster (r , filename = name,format = "raster", overwrite =T ) # in .grd format
}
}
lapply(ord_dates, FUN = convertAllToRaster)

Related

How to Loop this following script in R instead of Repeating the same exact steps all the time for each file

I am new to R. I am working on cmip6 models. I have over 200 plus .nc files. I can run the script for individual files and get the output i need but having hard time looping the script. Can you guys help me out. It will save me alot of my time. Thank you in advance.
setwd("D:\\data")
library (raster) ## load required library
library(sp) ## load library
library(ncdf4)
station.data = read.csv(file.choose(), sep = ",", header =T) ## import station data.file
lon.lat = station.data[,c(2,3)] ## extract data of all stations in station file for which point values are to be extractd
lon.lat = SpatialPoints(lon.lat) ## lon.lat for further use
lon.lat
robject = brick(file.choose(), varname = "pr")## import raster netcdf file from which point values are to be extractd
dim(robject) ## check the dimensions of project
vall = extract(robject , lon.lat, method = "simple" ) ## extract values
vall = t(vall)
write.csv(vall, file = "earthvegbil.csv", fileEncoding = "macroman") ## save output csv file containing extracted point values .
Put your code in an lapply and loop over the indeces of your files. The latter is useful to get nice numerical suffixes in write.csv.
setwd("D:\\data")
files <- list.files(pattern=".csv$")
bricks <- list.files(pattern=".pr$")
stopifnot(length(files) == length(bricks)) ## check for equal length of both vectors
mapply(seq_along(files), \(x) {
station.data <- read.csv(files[[x]], sep=", ", header=T) ## import station data.file
lon.lat <- station.data[, c(2, 3)] ## extract data of all stations in station file for which point values are to be extractd
lon.lat <- SpatialPoints(lon.lat) ## lon.lat for further use
robject <- brick(bricks[[x]], varname="pr")## import raster netcdf file from which point values are to be extractd
vall <- extract(robject , lon.lat, method="simple" ) ## extract values
vall <- t(vall)
write.csv(vall, file=sprintf('./out/earthvegbil_%03d.csv', x), fileEncoding="macroman") ## save output csv file containing extracted point values .
}

Cannot combine files in list of files when opening multiple .dta files [duplicate]

I have a folder with more than 500 .dta files. I would like to load some of this files into a single R object.
My .dta files have a generic name composed of four parts : 'two letters/four digits/y/.dta'. For instance, a name can be 'de2015y.dta' or 'fr2008y.dta'. Only the parts corresponding to the two letters and the four digits change across the .dta file.
I have written a code that works, but I am not satisfied with it. I would like to avoid using a loop and to shorten it.
My code is:
# Select the .dta files I want to load
#.....................................
name <- list.files(path="E:/Folder") # names of the .dta files in the folder
db <- as.data.frame(name)
db$year <- substr(db$name, 3, 6)
db <- subset (db, year == max(db$year)) # keep last year available
db$country <- substr(db$name, 1, 2)
list.name <- as.list(db$country)
# Loading all the .dta files in the Global environment
#..................................................
for(i in c(list.name)){
obj_name <- paste(i, '2015y', sep='')
file_name <- file.path('E:/Folder',paste(obj_name,'dta', sep ='.'))
input <- read.dta13(file_name)
assign(obj_name, value = input)
}
# Merge the files into a single object
#..................................................
df2015 <- rbind (at2015y, be2015y, bg2015y, ch2015y, cy2015y, cz2015y, dk2015y, ee2015y, ee2015y, es2015y, fi2015y,
fr2015y, gr2015y, hr2015y, hu2015y, ie2015y, is2015y, it2015y, lt2015y, lu2015y, lv2015y, mt2015y,
nl2015y, no2015y, pl2015y, pl2015y, pt2015y, ro2015y, se2015y, si2015y, sk2015y, uk2015y)
Does anyone know how I can avoid using a loop and shortening my code ?
You can also use purrr for your task.
First create a named vector of all files you want to load (as I understand your question, you simply need all files from 2015). The setNames() part is only necessary in case you'd like an ID variable in your data frame and it is not already included in the .dta files.
After that, simply use map_df() to read all files and return a data frame. Specifying .id is optional and results in an ID column the values of which are based on the names of in_files.
library(purrr)
library(haven)
in_files <- list.files(path="E:/Folder", pattern = "2015y", full.names = TRUE)
in_files <- setNames(in_files, in_files)
df2015 <- map_df(in_files, read_dta, .id = "id")
The following steps should give you what you want:
Load the foreign package:
library(foreign) # or alternatively: library(haven)
create a list of file names
file.list <- list.files(path="E:/Folder", pattern='*.dat', full.names = TRUE)
determine which files to read (NOTE: you have to check if these are the correct position in substr it is an estimate from my side)
vec <- as.integer(substr(file.list,13,16))
file.list2 <- file.list[vec==max(vec)]
read the files
df.list <- sapply(file.list2, read.dta, simplify=FALSE)
remove the path from the listnames
names(df.list) <- gsub("E:/Folder","",names(df.list))
bind the the dataframes together in one data.frame/data.table and create an id-column as well
library(data.table)
df <- rbindlist(df.list, idcol = "id")
# or with 'dplyr'
library(dplyr)
df <- bind_rows(df.list, .id = "id")
Now you have a data.frame with an id-column that identifies the different original files.
I would change the working directory for this task...
Then does this do what you are asking for?
setwd("C:/.../yourfiles")
# get file names where year equals "2015"
name=list.files(pattern="*.dta")
name=name[substr(name,3,6)=="2015"]
# read in the files in a list
files=lapply(name,foreign::read.dta)
# remove ".dta" from file names and
# give the file contents in the list their name
names(files)=lapply(name,function(x) substr(x, 1, nchar(x)-4))
#or alternatively
names(files)=as.list(substr(name,1,nchar(name)-4))
# optional: put all file contents into one data-frame
#(data-frames (vectors) need to have the same row counts (lengths) for this last step to work)
mydatafrm = data.frame(files)

Merge netCDF files in R

I have 2 netCDF files (each .nc file has 4 variables: Susceptible, Infected, Recovered and Inhabitable. The dimension of each variable is 64 x 88). I would like to merge these 2 files into a single netCDF file such that the merged file will stack separately Susceptible from the 2 files, Infected from the 2 files, Recovered from the 2 files and Inhabitable from the 2 files.
Here are the 2 files(first and second)
Could anyone help me with this please?
Thanks in advance,
Ashok
The ncdf4 package will do what you want to do. Have a look at the code below, example for one variable only.
#install.packages('ncdf4')
library(ncdf4)
file1 <- nc_open('England_aggr_GPW4_2000_0001.nc')
file2 <- nc_open('England_aggr_GPW4_2000_0002.nc')
# Just for one variable for now
dat_new <- cbind(
ncvar_get(file1, 'Susceptible'),
ncvar_get(file2, 'Susceptible'))
dim(dat_new)
var <- file1$var['Susceptible']$Susceptible
# Create a new file
file_new3 <- nc_create(
filename = 'England_aggr_GPW4_2000_new.nc',
# We need to define the variables here
vars = ncvar_def(
name = 'Susceptible',
units = var$units,
dim = dim(dat_new)))
# And write to it
ncvar_put(
nc = file_new,
varid = 'Susceptible',
vals = dat_new)
# Finally, close the file
nc_close(file_new)
Update:
An alternative approach is using the raster package as shown below. I didn't figure out how to make 4D raster stacks, so I am splitting your data into one NCDF file per variable. Would that work for you?
#install.packages('ncdf4')
library(ncdf4)
library(raster)
var_names <- c('Susceptible', 'Infected', 'Recovered', 'Inhabitable')
for (var_name in var_names) {
# Create raster stack
x <- stack(
raster('England_aggr_GPW4_2000_0001.nc', varname = var_name),
raster('England_aggr_GPW4_2000_0002.nc', varname = var_name))
# Name each layer
names(x) <- c('01', '02')
writeRaster(x = x,
filename = paste0(var_name, '_out.nc'),
overwrite = TRUE,
format = 'CDF')
}

save files into a specific subfolder in a loop in R

I feel I am very close to the solution but at the moment i cant figure out how to get there.
I´ve got the following problem.
In my folder "Test" I´ve got stacked datafiles with the names M1_1; M1_2, M1_3 and so on: /Test/M1_1.dat for example.
No I want to seperate the files, so that I get: M1_1[1].dat, M1_1[2].dat, M1_1[3].dat and so on. These files I´d like to save in specific subfolders: Test/M1/M1_1[1]; Test/M1/M1_1[2] and so on, and Test/M2/M1_2[1], Test/M2/M1_2[2] and so on.
Now I already created the subfolders. And I got the following command to split up the files so that i get M1_1.dat[1] and so on:
for (e in dir(path = "Test/", pattern = ".dat", full.names=TRUE, recursive=TRUE)){
data <- read.table(e, header=TRUE)
df <- data[ -c(2) ]
out <- split(df , f = df$.imp)
lapply(names(out),function(z){
write.table(out[[z]], paste0(e, "[",z,"].dat"),
sep="\t", row.names=FALSE, col.names = FALSE)})
}
Now the paste0 command gets me my desired split up data (although its M1_1.dat[1] instead of M1_1[1].dat), but i cant figure out how to get this data into my subfolders.
Maybe you´ve got an idea?
Thanks in advance.
I don't have any idea what your data looks like so I am going to attempt to recreate the scenario with the gender datasets available at baby names
Assuming all the files from the zip folder are stored to "inst/data"
store all file paths to all_fi variable
all_fi <- list.files("inst/data",
full.names = TRUE,
recursive = TRUE,
pattern = "\\.txt$")
> head(all_fi, 3)
[1] "inst/data/yob1880.txt" "inst/data/yob1881.txt"
Preset function that will apply to each file in the directory
f.it <- function(f_in = NULL){
# Create the new folder based on the existing basename of the input file
new_folder <- file_path_sans_ext(f_in)
dir.create(new_folder)
data.table::fread(f_in) %>%
select(name = 1, gender = 2, freq = 3) %>%
mutate(
gender = ifelse(grepl("F", gender), "female","male")
) %>% (function(x){
# Dataset contains names for males and females
# so that's what I'm using to mimic your split
out <- split(x, x$gender)
o <- rbind.pages(
lapply(names(out), function(i){
# New filename for each iteration of the split dataframes
###### THIS IS WHERE YOU NEED TO TWEAK FOR YOUR NEEDS
new_dest_file <- sprintf("%s/%s.txt", new_folder, i)
# Write the sub-data-frame to the new file
data.table::fwrite(out[[i]], new_dest_file)
# For our purposes return a dataframe with file info on the new
# files...
data.frame(
file_name = new_dest_file,
file_size = file.size(new_dest_file),
stringsAsFactors = FALSE)
})
)
o
})
}
Now we can just loop through:
NOTE: for my purposes I'm not going to spend time looping through each file, for your purposes this would apply to each of your initial files, or in my case all_fi rather than all_fi[2:5].
> rbind.pages(lapply(all_fi[2:5], f.it))
============================ =========
file_name file_size
============================ =========
inst/data/yob1881/female.txt 16476
inst/data/yob1881/male.txt 15306
inst/data/yob1882/female.txt 18109
inst/data/yob1882/male.txt 16923
inst/data/yob1883/female.txt 18537
inst/data/yob1883/male.txt 15861
inst/data/yob1884/female.txt 20641
inst/data/yob1884/male.txt 17300
============================ =========

Creating a list of raster bricks from a multivariate netCDF file

I've been working with the RCP (Representative Concentration Pathway) spatial data. It's a nice gridded dataset in netCDF format. How can I get a list of bricks where each element represents one variable from a multivariate netCDF file (by variable I don't mean lat,lon,time,depth...etc). This is what Iv'e tried to do. I can't post an example of the data, but I've set up the script below to be reproducible if you want to look in to it. Obviously questions welcome... I might not have expressed the language associated with the code smoothly. Cheers.
A: Package requirements
library(sp)
library(maptools)
library(raster)
library(ncdf)
library(rgdal)
library(rasterVis)
library(latticeExtra)
B: Gather data and look at the netCDF file structure
td <- tempdir()
tf <- tempfile(pattern = "fileZ")
download.file("http://tntcat.iiasa.ac.at:8787/RcpDb/download/R85_NOX.zip", tf , mode = 'wb' )
nc <- unzip( tf , exdir = td )
list.files(td)
## Take a look at the netCDF file structure, beyond this I don't use the ncdf package directly
ncFile <- open.ncdf(nc)
print(ncFile)
vars <- names(ncFile$var)[1:12] # I'll try to use these variable names later to make a list of bricks
C: Create a raster brick for one variable. Levels correspond to years
r85NOXene <- brick(nc, lvar = 3, varname = "emiss_ene")
NAvalue(r85NOXene) <- 0
dim(r85NOXene) # [1] 360 720 12
D: Names to faces
data(wrld_simpl) # in maptools
worldPolys <- SpatialPolygons(wrld_simpl#polygons)
cTheme <- rasterTheme(region = rev(heat.colors(20)))
levelplot(r85NOXene,layers = 4,zscaleLog = 10,main = "2020 NOx Emissions From Power Plants",
margin = FALSE, par.settings = cTheme) + layer(sp.polygons(worldPolys))
E: Summarize all grid cells for each year one variable "emis_ene", I want to do this for each variable of the netCDF file I'm working with.
gVals <- getValues(r85NOXene)
dim(gVals)
r85NOXeneA <- sapply(1:12,function(x){ mat <- matrix(gVals[,x],nrow=360)
matfun <- sum(mat, na.rm = TRUE) # Other conversions are needed, but not for the question
return(matfun)
})
F: Another meet and greet. Check out how E looks
library(ggplot2) # loaded here because of masking issues with latticeExtra
years <- c(2000,2005,seq(2010,2100,by=10))
usNOxDat <- data.frame(years=years,NOx=r85NOXeneA)
ggplot(data=usNOxDat,aes(x=years,y=(NOx))) + geom_line() # names to faces again
detach(package:ggplot2, unload=TRUE)
G: Attempt to create a list of bricks. A list of objects created in part C
brickLst <- lapply(1:12,function(x){ tmpBrk <- brick(nc, lvar = 3, varname = vars[x])
NAvalue(tmpBrk) <- 0
return(tmpBrk)
# I thought a list of bricks would be a good structure to do (E) for each netCDF variable.
# This doesn't break but, returns all variables in each element of the list.
# I want one variable in each element of the list.
# with brick() you can ask for one variable from a netCDF file as I did in (C)
# Why can't I loop through the variable names and return on variable for each list element.
})
H: Get rid of the junk you might have downloaded... Sorry
file.remove(dir(td, pattern = "^fileZ",full.names = TRUE))
file.remove(dir(td, pattern = "^R85",full.names = TRUE))
close(ncFile)
Your (E) step can be simplified using cellStats.
foo <- function(x){
b <- brick(nc, lvar = 3, varname = x)
NAvalue(b) <- 0
cellStats(b, 'sum')
}
sumLayers <- sapply(vars, foo)
sumLayers is the result you are looking for, if I understood correctly your question.
Moreover, you may use the zoo package because you are dealing with time series.
library(zoo)
tt <- getZ(r85NOXene)
z <- zoo(sumLayers, tt)
xyplot(z)

Resources