I am so close to getting my code to work, but cannot seem to figure out how to get a dynamic file name. Here is what Ivve got:
require(ncdf)
require(raster)
require(rgdal)
## For multiple files, use a for loop
## Input directory
dir.nc <- 'inputdirectoy'
files.nc <- list.files(dir.nc, full.names = T, recursive = T)
## Output directory
dir.output <- 'outputdirectory'
## For simplicity, I use "i" as the file name, but would like to have a dynamic one
for (i in 1:length(files.nc)) {
r.nc <- raster(files.nc[i], varname = "precipitation")
writeRaster(r.nc, paste(dir.output, i, '.tiff', sep = ''), format = 'GTiff', prj = T, overwrite = T)
}
## END
I appreciate any help. So close!!
You can do this in different ways, but I think it is generally easiest to first create all the output filenames (and check if they are correct) and then use these in the loop.
So something like this:
library(raster)
infiles <- list.files('inputpath', full.names=TRUE)
ff <- extension(basename(infiles), '.tif')
outpath <- 'outputpath'
outfiles <- file.path(outpath, ff)
To assure that you are writing to an existing folder, you can create it first.
dir.create(outpath, showWarnings=FALSE, recursive=TRUE)
And then loop over the files
for (i in 1:length(infiles)) {
r <- raster(infiles[i])
writeRaster(r, paste(outfiles[i], overwrite = TRUE)
}
You might also use something along these lines
outfiles <- gsub('in', 'out', infiles)
Here is the code that finally worked:
# Imports
library(raster)
#Set source file
infiles <- list.files('infilepath', full.names=TRUE)
#create dynamic file names and choose outfiles to view list
ff <- extension(basename(infiles), '.tif')
outpath <- 'outfilepath'
outfiles <- file.path(outpath, ff)
#run da loop
for (i in 1:length(infiles)) {
r <- raster(infiles[i])
writeRaster(r, paste(outfiles[i]), format ='GTiff', overwrite = T)
}
## END
Related
I have written the following function to combine 300 .csv files. My directory name is "specdata". I have done the following steps for execution,
x <- function(directory) {
dir <- directory
data_dir <- paste(getwd(),dir,sep = "/")
files <- list.files(data_dir,pattern = '\\.csv')
tables <- lapply(paste(data_dir,files,sep = "/"), read.csv, header = TRUE)
pollutantmean <- do.call(rbind , tables)
}
# Step 2: call the function
x("specdata")
# Step 3: inspect results
head(pollutantmean)
Error in head(pollutantmean) : object 'pollutantmean' not found
What is my mistake? Can anyone please explain?
There's a lot of unnecessary code in your function. You can simplify it to:
load_data <- function(path) {
files <- dir(path, pattern = '\\.csv', full.names = TRUE)
tables <- lapply(files, read.csv)
do.call(rbind, tables)
}
pollutantmean <- load_data("specdata")
Be aware that do.call + rbind is relatively slow. You might find dplyr::bind_rows or data.table::rbindlist to be substantially faster.
To update Prof. Wickham's answer above with code from the more recent purrr library which he coauthored with Lionel Henry:
Tbl <-
list.files(pattern="*.csv") %>%
map_df(~read_csv(.))
If the typecasting is being cheeky, you can force all the columns to be as characters with this.
Tbl <-
list.files(pattern="*.csv") %>%
map_df(~read_csv(., col_types = cols(.default = "c")))
If you are wanting to dip into subdirectories to construct your list of files to eventually bind, then be sure to include the path name, as well as register the files with their full names in your list. This will allow the binding work to go on outside of the current directory. (Thinking of the full pathnames as operating like passports to allow movement back across directory 'borders'.)
Tbl <-
list.files(path = "./subdirectory/",
pattern="*.csv",
full.names = T) %>%
map_df(~read_csv(., col_types = cols(.default = "c")))
As Prof. Wickham describes here (about halfway down):
map_df(x, f) is effectively the same as do.call("rbind", lapply(x, f)) but under the hood is much more efficient.
and a thank you to Jake Kaupp for introducing me to map_df() here.
This can be done very succinctly with dplyr and purrr from the tidyverse. Where x is a list of the names of your csv files you can simply use:
bind_rows(map(x, read.csv))
Mapping read.csv to x produces a list of dfs that bind_rows then neatly combines!
```{r echo = FALSE, warning = FALSE, message = FALSE}
setwd("~/Data/R/BacklogReporting/data/PastDue/global/") ## where file are located
path = "~/Data/R/BacklogReporting/data/PastDue/global/"
out.file <- ""
file.names <- dir(path, pattern = ".csv")
for(i in 1:length(file.names)){
file <- read.csv(file.names[i], header = TRUE, stringsAsFactors = FALSE)
out.file <- rbind(out.file, file)
}
write.csv(out.file, file = "~/Data/R/BacklogReporting/data/PastDue/global/global_stacked/past_due_global_stacked.csv", row.names = FALSE) ## directory to write stacked file to
past_due_global_stacked <- read.csv("C:/Users/E550143/Documents/Data/R/BacklogReporting/data/PastDue/global/global_stacked/past_due_global_stacked.csv", stringsAsFactors = FALSE)
files <- list.files(pattern = "\\.csv$") %>% t() %>% paste(collapse = ", ")
```
If your csv files are into an other directory, you could use something like this:
readFilesInDirectory <- function(directory, pattern){
files <- list.files(path = directory,pattern = pattern)
for (f in files){
file <- paste(directory,files, sep ="")
temp <- lapply(file, fread, sep=",")
data <- rbindlist( temp )
}
return(data)
}
In your current function pollutantmean is available only in the scope of the function x. Modify your function to this
x <- function(directory) {
dir <- directory
data_dir <- paste(getwd(),dir,sep = "/")
files <- list.files(data_dir,pattern = '\\.csv')
tables <- lapply(paste(data_dir,files,sep = "/"), read.csv, header = TRUE)
assign('pollutantmean',do.call(rbind , tables))
}
assign should put result of do.call(rbind, tables) into variable called pollutantmean in global environment.
I am trying to read list of json files in my local folder. But the below code does not work. Can anyone help me in solving this
json_files <- list.files(pattern = "*.json")
for(i in length(json_files))
{
json_data3[[i]] <- fromJSON(paste(readLines[[i]],collapse = ""))
}
Im not sure, which json library you are using, but it looks like jsonlite:
library(jsonlite)
json_data <- lapply(list.files(pattern = "*.json"), read_json)
json_data2 <- lapply(list.files(pattern = "*.json"), fromJSON)
To preserve the names of your files as list entries you can do:
names(json_data) <- list.files(pattern= "*.json")
# OR
json_data3 <- sapply(list.files(pattern="*.json"),FUN = read_json,
simplify = FALSE,USE.NAMES = TRUE)
I would like to transform 96 .txt files to matrix in R with data.matirx
Here is part of input data in one files
Domain Phylum Class Order
OTU10001 Fungi Ascomycota Dothideomycetes Capnodiales
OTU10004 Fungi Ascomycota Dothideomycetes Pleosporales
And the code for single files:
BC76_OTU <- data.matrix(BC76.frequencytable)
I am trying to process all the files with data.matirx and write out each file to the environment with the following code:
Feature_to_matrix <- function(x) {
x <- as.matrix (files)
return(x)
}
files <- list.files(path="path to directory", pattern="*.txt", full.names=TRUE, recursive=FALSE)
lapply(files, function(Feature_to_matrix) {
t <- read.table(Feature_to_matrix, header=TRUE, row.names=1, sep="")
out <- t
})
But this code doesn't generate output files to the R environment.
Any suggestions?
Thanks!
I also try to write a loop for it
temp = list.files(pattern="*.txt"
for (i in 1:length(temp)) {
sample[i] <- read.csv(temp[i], header = TRUE,row.names=1,sep = "") write.matrix(sample[i]) }
but get an error as follow
Error in sample[i] <- read.csv(temp[i], header = TRUE, row.names = 1, : object of type 'closure' is not subsettable
could anyone give me some suggestion to modify the code?
I have a list of approximately 500 csv files each with a filename that consists of a six-digit number followed by a year (ex. 123456_2015.csv). I would like to append all files together that have the same six-digit number. I tried to implement the code suggested in this question:
Import and rbind multiple csv files with common name in R but I want the appended data to be saved as new csv files in the same directory as the original files are currently saved. I have also tried to implement the below code however the csv files produced from this contain no data.
rm(list=ls())
filenames <- list.files(path = "C:/Users/smithma/Desktop/PM25_test")
NAPS_ID <- gsub('.+?\\([0-9]{5,6}?)\\_.+?$', '\\1', filenames)
Unique_NAPS_ID <- unique(NAPS_ID)
n <- length(Unique_NAPS_ID)
for(j in 1:n){
curr_NAPS_ID <- as.character(Unique_NAPS_ID[j])
NAPS_ID_pattern <- paste(".+?\\_(", curr_NAPS_ID,"+?)\\_.+?$", sep = "" )
NAPS_filenames <- list.files(path = "C:/Users/smithma/Desktop/PM25_test", pattern = NAPS_ID_pattern)
write.csv(do.call("rbind", lapply(NAPS_filenames, read.csv, header = TRUE)),file = paste("C:/Users/smithma/Desktop/PM25_test/MERGED", "MERGED_", Unique_NAPS_ID[j], ".csv", sep = ""), row.names=FALSE)
}
Any help would be greatly appreciated.
Because you're not doing any data manipulation, you don't need to treat the files like tabular data. You only need to copy the file contents.
filenames <- list.files("C:/Users/smithma/Desktop/PM25_test", full.names = TRUE)
NAPS_ID <- substr(basename(filenames), 1, 6)
Unique_NAPS_ID <- unique(NAPS_ID)
for(curr_NAPS_ID in Unique_NAPS_ID){
NAPS_filenames <- filenames[startsWith(basename(filenames), curr_NAPS_ID)]
output_file <- paste0(
"C:/Users/nwerth/Desktop/PM25_test/MERGED_", curr_NAPS_ID, ".csv"
)
for (fname in NAPS_filenames) {
line_text <- readLines(fname)
# Write the header from the first file
if (fname == NAPS_filenames[1]) {
cat(line_text[1], '\n', sep = '', file = output_file)
}
# Append every line in the file except the header
line_text <- line_text[-1]
cat(line_text, file = output_file, sep = '\n', append = TRUE)
}
}
My changes:
list.files(..., full.names = TRUE) is usually the best way to go.
Because the digits appear at the start of the filenames, I suggest substr. It's easier to get an idea of what's going on when skimming the code.
Instead of looping over the indices of a vector, loop over the values. It's more succinct and less likely to cause problems if the vector's empty.
startsWith and endsWith are relatively new functions, and they're great.
You only care about copying lines, so just use readLines to get them in and cat to get them out.
You might consider something like this:
##will take the first 6 characters of each file name
six.digit.filenames <- substr(filenames, 1,6)
path <- "C:/Users/smithma/Desktop/PM25_test/"
unique.numbers <- unique(six.digit.filenames)
for(j in unique.numbers){
sub <- filenames[which(substr(filenames,1,6) == j)]
data.for.output <- c()
for(file in sub){
##now do your stuff with these files including read them in
data <- read.csv(paste0(path,file))
data.for.output <- rbind(data.for.output,data)
}
write.csv(data.for.output,paste0(path,j, '.csv'), row.names = F)
}
I want to write many raster files using a for loop.
path <- "D:/FolderA/FolderB/FolderC/FolderD/"
files1 <- c("FolderE1/raster.tif",
"FolderE2/raster.tif",
"FolderE3/raster.tif")
files2 <- c("FolderF1/raster.tif",
"FolderF2/raster.tif",
"FolderF3/raster.tif")
for (i in 1:length(files1)) {
raster1 <- raster(paste(path, files1[i], sep = ""), band = 1)
is.na(raster1[[0]])
raster2 <- raster(paste(path, files2[i], sep = ""), band = 1)
is.na(raster2[[0]])
mosaicraster <- mosaic(raster1, raster2, fun = mean)
NAvalue(mosaicraster) <- 0
outputfile <- paste(path, "mosaics/", files1[i], sep = "")
writeRaster(mosaikraster, outputfile , type = "GeoTIFF", datatype = "INT1U", overwrite = TRUE)
print(c(i, "of", length(files1)))
}
How do I create for each file a new folder within "D:/FolderA/FolderB/FolderC/FolderD/mosaics/" which includes FolderE1/, E2/... etc. plus the filename, e.g. mosaic.tif ?
outputfile <- paste(path, "mosaics/", files1[i], sep = "")
Does not give a satisfying result.
Just to demonstrate one method of making folders within a loop: If you have the directories in an object just looping over the elements of that object.
folders1 <- c("FolderE1",
"FolderE2",
"FolderE3")
for(i in folders1)
{
dir.create(i) #creates a dir named after the ith element of folders1
setwd(i) #goes into that directory
tiff('raster.tif') #plots your picture
plot(rnorm(10,rnorm(10)))
dev.off()
setwd('../') #goes out to the original folder
}
Just a warning: this is all a bit dangerous because mistakes can make a big mess.