Reading multiple files in R as string in a for.loop - r

I have a code that reads two different csv files from a folder at the time of execution. i need to use for loop in this context to execute this multiple times and write the output in to a separate csv file of the form "bsc_.csv". The file format of the two input csv files are "base_.csv" and "fut_.csv". The files are incrementally numbered, and that that is the pattern I need to iterative over. The sample code is attached below.
library('CDFt')
d1<-read.csv("base1.csv",header=TRUE)
d2<-read.csv("fut1.csv",header=TRUE)
A1<-d1[,2]
A2<-d1[,3]
A3<-d2[,2]
CT<-CDFt(A1,A2,A3)
x<-CT$x
FGp<-CT$FGp
FGf<-CT$FGf
FRp<-CT$FRp
FRf<-CT$FRf
ds<-CT$DS
d<-round(ds,3)
dat<-replace(d,d<0,0)
write.table(dat,"bsc1.csv", row.names=F,na="NA",append=T, quote= FALSE, sep=",", col.names=F)

Try this (untested):
bases <- list.files(pattern = "base[0-9]*\\.csv$")
futs <- list.files(pattern = "fut[0-9]*\\.csv$")
mismatches <- setdiff(gsub("^base", "", bases), gsub("^fut", "", futs) )
if (length(mismatches)) {
warning("'bases' files not in 'futs': ", paste(sQuote(mismatches), collapse = ", "))
bases <- setdiff(bases, paste0("base", mismatches))
}
# and the reverse
mismatches <- setdiff(gsub("^fut", "", futs), gsub("^base", "", bases) )
if (length(mismatches)) {
warning("'futs' files not in 'bases': ", paste(sQuote(mismatches), collapse = ", "))
futs <- setdiff(futs, paste0("fut", mismatches))
}
ign <- Map(function(fb, ff) {
bdat <- read.csv(fb, header = TRUE)
fdat <- read.csv(ff, header = TRUE)
# ...
newfn <- gsub("^base", "bsc", fb)
write.table(dat, newfn, ...)
}, bases, futs)

Related

How to convert .txt to .csv file in R

I found this code here, and it worked to convert the '.txt' to '.csv' but the file is not broken into columns, pretty sure there's an easy fix or line to add here, but I'm not finding it. Still new to r and working through, so any help or direction is appreciated.
EDIT: The file contains the following, a list of invasive plants:
Header: Noxious Weed List.
'(a) Abrus precatorius – rosary pea '
'(b) Aeginetia spp. – aeginetia'
'(c) Ageratina adenophora – crofton weed '
'(d) Alectra spp. – alectra '
And so I would like to get all the parts, i.e., genus, species, and common name, in a separate column. and if possible, delete the letters '(a)' and the ' - ' separating hyphen.
filelist = list.files(pattern = ".txt")
for (i in 1:length(filelist)) {
input<-filelist[i]
output <- paste0(gsub("\\.txt$", "", input), ".csv")
print(paste("Processing the file:", input))
data = read.delim(input, header = TRUE)
write.table(data, file=output, sep=",", col.names=TRUE, row.names=FALSE)
}
You'll need to adjust if you have common names with three or more words, but this is the general idea:
path <- "C:\\Your File Path Here\\"
file <- paste0(path, "WeedList.txt")
DT <- read.delim(file, header = FALSE, sep = " ")
DT <- DT[-c(1),-c(1,4,7)]
colnames(DT) <- c("Genus", "Species", "CommonName", "CommonName2")
DT$CommonName <- gsub("'", "", DT$CommonName)
DT$CommonName2 <- gsub("'", "", DT$CommonName2)
DT$CommonName <- paste(DT$CommonName, DT$CommonName2, sep = " ")
DT <- DT[,-c(4)]
write.csv(DT, paste0(path, "WeedList.csv"), row.names = FALSE)

For loop using grepl

Example Data
I'm writing a script with the intent to copy input files, each to multiple locations. Below is an example of functional code to achieve this:
##### File 1 #####
output_paths_1 <- list(c(paste0(path_1, "file_1", ".xlsx"),
paste0(path_2, "file_1", ".xlsx"),
paste0(path_3, "file_1", " ", gsub("-", "", Sys.Date()), ".xlsx")))
lapply(output_paths_1, function (x) file.copy(paste0(input_path, "input_1.xlsx"), x, overwrite = T))
##### File 2 #####
output_paths_2 <- list(c(paste0(path_1, "file_2", ".xlsx"),
paste0(path_2, "file_2", ".xlsx"),
paste0(path_3, "file_2", " ", gsub("-", "", Sys.Date()), ".xlsx")))
lapply(output_paths_2, function (x) file.copy(paste0(input_path, "input_2.xlsx"), x, overwrite = T))
##### File 3 #####
output_paths_3 <- list(c(paste0(path_1, "file_3", ".xlsx"),
paste0(path_2, "file_3", ".xlsx"),
paste0(path_3, "file_3", " ", gsub("-", "", Sys.Date()), ".xlsx")))
lapply(output_paths_3, function (x) file.copy(paste0(input_path, "input_3.xlsx"), x, overwrite = T))
Reprex
But I suspect there are more efficient methods. In my latest attempt, which does not work, I used a nested 'for' loop. I create data frames containing each input and file name. Then (in theory), for each i in inputs, I write an output paths data frame for each i in files. I filter this data frame for only one file at a time using grepl. See code below:
files <- data.frame(data = c("file_1", "file_2", "file_3"))
inputs <- data.frame(data = c("input_1.xlsx", "input_2.xlsx", "input_3.xlsx"))
for (i in seq_along(inputs)) {
for (i in seq_along(files)) {
output_paths <- data.frame(data = c(paste0(path_1, files[[i]], ".xlsx"),
paste0(path_2, files[[i]], ".xlsx"),
paste0(path_3, files[[i]], " ", gsub("-", "", Sys.Date()), ".xlsx"))) %>%
filter(grepl(files[[i]], `data`))
lapply(output_paths, function (x) file.copy(paste0(input_path, inputs[[i]]), x, overwrite = T))
}
}
I expected this to copy the first file to three locations, then the next file to those same locations, etc. Instead, the following Warning appears, and only the first file is copied to the desired locations:
Warning message:
In grepl(files[[i]], data) :
argument 'pattern' has length > 1 and only the first element will be used
Running the code without including the grepl function does nothing at all - no files are copied to the desired locations.
Questions:
How might I tweak the code above to iterate for all elements, instead of the first element only?
Is there a more elegant approach entirely? (just looking for pointers, not reprex necessarily)
I don't understand what you are trying to accomplish with your "Reprex" approach. But if you want to do what your first but of code does by writing less code, then you could do something like
files = c("file1", "file2", "file3") # file names
opaths = c("path1", "path2", "path3") # output paths
df = expand.grid(file = files, path = opaths, stringsAsFactors = F)
df$from = file.path(input_path, df$file)
df$to = file.path(df$path, df$file)
file.copy(from = df$from, to = df$to)
If you want the timestamp in the file name for path3, you could then do something like
df$to[df$path == "path3"] <- file.path(df$path[df$path == "path3"],
paste0(format(Sys.Date(), "%Y%m%d_"), df$file[df$path == "path3"])
)

How do I repeat this search function in R?

I have several rtf files and would like to search if certain words appear in the files. If it appears, I would like to extract the entire sentence with that word in it.
This is the part of the code that I would like to repeat. I have tried this on the document 'a1' but I have documents a1 to a10 in the same folder that I would like to search.
library(striprtf)
files <- list.files(path="/Users/Jane/R/Test", pattern="*.rtf", full.names=TRUE, recursive=FALSE)
rtf <- read_rtf("a1.rtf", verbose = FALSE, row_start = "*| ", row_end = "",
cell_end = " | ", ignore_tables = FALSE, check_file = TRUE)
text <- unlist(strsplit(rtf, "\\."))
)
The final output should be one data frame with a column for the file name (i.e. a1), sentence
You can use lapply() and define a function to do what you want. results will be a list with all your matches data frames
library(striprtf)
files <- list.files(path="/Users/Jane/R/Test", pattern="*.rtf", full.names=TRUE, recursive=FALSE)
# files <-paste0(paste0("a",1:10),".rtf")
results = lapply(files, function(x) {
rtf <- read_rtf(x, verbose = FALSE, row_start = "*| ", row_end = "",
cell_end = " | ", ignore_tables = FALSE, check_file = TRUE)
text <- unlist(strsplit(rtf, "\\."))
toMatch <- c("passive","fund act")
matches <- unique(grep(paste(toMatch,collapse="|"),
text, value=TRUE))
matches <- data.frame(matches)
})

How to load a txt file one by one in R rather than read all at once and combine into a single matrix

I have 100 text file in a folder. I can use this function below to read all the files and store it into myfile.
file_list <- list.files("C:/Users/User/Desktop/code/Test/", full=T)
file_con <- lapply(file_list, function(x){
return(read.table(x, head=F, quote = "\"", skip = 6, sep = ","))
})
myfile <- do.call(rbind, file_con)
My question is how I can read the first file in the Test folder before I read the second file. All the text file name also are different and I cannot change it to for example number from 1 to 100. I was thinking of maybe can add a integer no infront of all my text file, then use a for loop to match the file and call but is this possible?
I need to read the first file then do some calculation and then export the result into result.txt before read the second file.but now I'm doing it manually and I have almost 800 file, so it will be a big trouble for me to sit and wait it to compute. The code below is the one that I current in use.
myfile = read.table("C:/Users/User/Desktop/code/Test/20081209014205.txt", header = FALSE, quote = "\"", skip = 0, sep = ",")
The following setup will read one file at the time, perform an analysis,
and save it back with a slightly modified name.
save_file_list <- structure(
.Data = gsub(
pattern = "\\.txt$",
replacement = "-e.txt",
x = file_list),
.Names = file_list)
your_function <- function(.file_content) {
## The analysis you want to do on the content of each file.
}
for (.file in file_list) {
.file_content <- read.table(
file = .file,
head = FALSE,
quote = "\"",
skip = 6,
sep = ",")
.result <- your_function(.file_content)
write.table(
x = .result,
file = save_file_list[.file])
}
Now I can read a file and do calculation using
for(e in 1:100){
myfile = read.table(file_list[e], header = FALSE, quote = "\"", skip = 0, sep = ",");
while(condition){
Calculation
}
myresult <- file.path("C:/Users/User/Desktop/code/Result/", paste0("-",e, ".txt"));
write.table(x, file = myresult, row.names=FALSE, col.names=FALSE ,sep = ",");
Now my problem is how I can make my output file to have the same name of the original file but add a -e value at the back?

create a loop: convert .txt to .csv in R

I try to convert all my .txt files in .csv, but I didn't manage to create the loop.
The actual line for one file (which works perfectly) would be the following:
tab = read.delim("name_file", header = TRUE, skip = 11)
write.table(tab, file="name_file.csv",sep=",",col.names=TRUE,row.names=FALSE)
And I would like to do that for all the .txt file I have in wd.
I tried the loop with, based on some reasearch on the web, but I am not sure it's the right one:
FILES = list.files(pattern = ".txt")
for (i in 1:length(FILES)) {
FILES = read.csv(file = FILES[i], header = TRUE, skip = 11, fill = TRUE)
write.csv(FILES, file = paste0(sub("folder_name", ".txt","", FILES[i]), ".csv"))
}
I'm on Windows system.
I would appreciate some help... Thanks!
Hi I have the same problem before just like you, and now I made it works. Try this:
directory <- "put_your_txt_directory_here"
ndirectory <- "put_your_csv_directory_here"
file_name <- list.files(directory, pattern = ".txt")
files.to.read <- paste(directory, file_name, sep="/")
files.to.write <- paste(ndirectory, paste0(sub(".txt","", file_name),".csv"), sep="/")
for (i in 1:length(files.to.read)) {
temp <- (read.csv(files.to.read[i], header = TRUE, skip = 11, fill = TRUE))
write.csv(temp, file = files.to.write[i])
}
You need to index the output inside the loop as well. Try this:
INFILES = list.files(pattern = ".txt")
OUTFILES = vector(mode = "character", length = length(INFILES))
for (i in 1:length(INFILES)) {
OUTFILES[i] = read.csv(file = INFILES[i], header = TRUE, skip = 11,
fill = TRUE)
write.csv(OUTFILES[i], file = paste0("folder_name", sub(".txt","", INFILES[i]), ".csv"))
}
Assuming that your input files always have at least 11 rows (since you skip the first 11 rows!) this should work:
filelist = list.files(pattern = ".txt")
for (i in 1:length(filelist)) {
cur.input.file <- filelist[i]
cur.output.file <- paste0(cur.input.file, ".csv")
print(paste("Processing the file:", cur.input.file))
# If the input file has less than 11 rows you will reveive the error message:
# "Error in read.table: no lines available in input")
data = read.delim(cur.input.file, header = TRUE, skip = 11)
write.table(data, file=cur.output.file, sep=",", col.names=TRUE, row.names=FALSE)
}
If you reveive any error during file conversion it is caused by the content (e. g. unequal number of rows per column, unequal number of columns etc.).
PS: Using a for loop is OK here since it does not limit the performance (there is no "vectorized" logic to read and write files).

Resources