Multiple files process with different output names in R - r

I have a number data set (say 50 files) of csv files: crasha, crashabd, crashd, …
I wrote a function to do some changes and analysis for a single data. I want to have a dynamic name for output. For example, I want to have newcrasha, newcrashabd, newcrashd, and … as output csv files. Indeed, I want to get names of imported files and use these as output filenames?
For example:
filenames <- list.files(path = "D:/health/car crash/", pattern = "csv",full.names = TRUE)
analyze <- function(filename) {
# Input is character string of a csv file.
crash <- read.csv(file = filename, header = TRUE)
#merg and summation (crashcounter and NUMBER_INJURED)
newcrash<-crash %>% group_by(COLLISION_DATE) %>% summarise(crashcounter = sum(crashcounter), NUMBER_INJURED = sum(NUMBER_INJURED))
write.csv( newcrash, "D://health//car crash// newcrash.csv", row.names = FALSE)
}
filenames <- filenames[1:50]
for (f in filenames) {
analyze(f)
}
Thank you for any help

try this, following the suggestion of #mhovd:
filename <- list.files(path = "D:/health/car crash/", pattern = "csv",full.names = TRUE)
analyze <- function(filename) {
# Input is character string of a csv file.
crash <- read.csv(file = filename, header = TRUE)
#merg and summation (crashcounter and NUMBER_INJURED)
newcrash<-crash %>% group_by(COLLISION_DATE) %>% summarise(crashcounter = sum(crashcounter), NUMBER_INJURED = sum(NUMBER_INJURED))
new.name <- paste0("D:/health/car crash/new",basename(tools::file_path_sans_ext(filename)),".csv")
write.csv( newcrash, file=new.name, row.names = FALSE)
}
lapply(filename[1:50], analyze)

Related

Error when looping through files and outputting separate files in R?

I have a list of 50 text files all beginning with NEW.
I want to loop through each textfile/dataframe and run some function and then output the results via the write.table function. Therefore for each file, a function is applied and then an output should be created containing the original name with output at the end.
Here is my code.
fileNames <- Sys.glob("*NEW.*")
for (fileName in fileNames) {
df <- read.table(fileName, header = TRUE)
FUNCTION (not shown as this works)
...
result <-print(chr1$results) #for each file a result would be printed.
write.table(result, file = paste0(fileName,"_output.txt"), quote = F, sep = "\t", row.names = F, col.names = T)
#for each file a new separate file is created with the original output name retained.
}
However, I only get one output rather than 50 output files. It seems like its only looping through one file. What am I doing wrong?
readme <- function(folder_name = "my_texts"){
file_list <- list.files(path = folder_name, pattern = "*.txt",
recursive = TRUE, full.names = TRUE).
#list files with .txt ending
textdata <- lapply(file_list, function(x) {.
paste(readLines(x), collapse=" ").
}).
#apply readlines over the file list.
data.table::setattr(textdata, "names", file_list) .
#add names attribute to textdata from file_list.
lapply(names(file_list), function(x){.
lapply(names(file_list[[x]]), function(y) setattr(DT[[x]], y,
file_list[[x]][[y]])).
}).
#set names attribute over the list.
df1 <- data.frame(doc_id = rep(names(textdata), lengths(textdata)),
doc_text = unlist(textdata), row.names = NULL).
#convert to dataframe where names attribute is doc_id and textdata is text.
return(df1).
}

Get the number of rows and columns of a multiple CSV file

Is there any way to get information about the number of rows and columns of a multiple CSV file in R and save it in a CSV file? Here is my R code:
#Library
if (!require("tidyverse")) install.packages("tidyverse")
if (!require("fs")) install.packages("fs")
#Mentioning Files Location
file_paths <- fs::dir_ls("C:\\Users\\Desktop\\FileCount\\Test")
file_paths[[2]]
#Reading Multiple CSV Files
file_paths %>%
map(function(path)
{
read_csv(path,col_names = FALSE)
})
#Counting Number of Rows
lapply(X = file_paths, FUN = function(x) {
length(count.fields(x))
})
#Counting Number of Columns
lapply(X = file_paths, FUN = function(x) {
length(ncol(x))
})
#Saving CSV File
write.csv(file_paths,"C:\\Users\\Desktop\\FileCount\\Test\\FileName.csv", row.names = FALSE)
Couple of things are not working:
Number of Columns of a multiple CSV file
When I am saving the file, I want to save Filename, number of rows and number of columns. See attached image.
How the output looks like:
Attached some CSV Files for testing: Here
Any help appreciated.
Welcome on SO! Using the tidyverse and data.table, here's a way to do it:
Note: All the .csv files are in my TestStack directory, but you can change it with your own directory (C:/Users/Desktop/FileCount/Test).
Code:
library(tidyverse)
csv.file <- list.files("TestStack") # Directory with your .csv files
data.frame.output <- data.frame(number_of_cols = NA,
number_of_rows = NA,
name_of_csv = NA) #The df to be written
MyF <- function(x){
csv.read.file <- data.table::fread(
paste("TestStack", x, sep = "/")
)
number.of.cols <- ncol(csv.read.file)
number.of.rows <- nrow(csv.read.file)
data.frame.output <<- add_row(data.frame.output,
number_of_cols = number.of.cols,
number_of_rows = number.of.rows,
name_of_csv = str_remove_all(x,".csv")) %>%
filter(!is.na(name_of_csv))
}
map(csv.file, MyF)
Output:
number_of_cols number_of_rows name_of_csv
1 3 2150 CH_com
2 2 34968 epci_com_20
3 3 732 g1g4
4 7 161905 RP
I have this output because my TestStack had 4 files named CH_com.csv, epci_com_20.csv,...
You can then write the object data.frame.output to a .csv as you wanted: data.table::fwrite(data.frame.output, file = "Output.csv")
files_map <- "test"
files <- list.files(files_map)
library(data.table)
output <- data.table::rbindlist(
lapply(files, function(file) {
dt <- data.table::fread(paste(files_map, file, sep = "/"))
list("number_of_cols" = ncol(dt), "number_of_rows" = nrow(dt), "name_of_csv" = file)
})
)
data.table::fwrite(output, file = "Filename.csv")
Or with map and a seperate function to do the tasks, but without using an empty table first and update it with a global assignment. I see this happen a lot on apply functions, while it is not needed at all.
myF <- function(file) {
dt <- data.table::fread(paste(files_map, file, sep = "/"))
data.frame("number_of_cols" = ncol(dt), "number_of_rows" = nrow(dt), "name_of_csv" = file)
}
output <- do.call(rbind, map(files, myF))

Extracting specific rows from each text files and store in one txt file

ListOfFileNames= list.files(path = "D:/in/",
pattern = '*.txt',recursive = T)
options(stringsAsFactors = F)
setwd("D:/in/")
outFile <- file("output.txt", "w")
for (i in ListOfFileNames){
x = read.delim(ListOfFileNames[i], skip = 29, nrows= 1)
x = as.character(x)
writeLines(x, paste('D:/out/out.csv',sep = ","))
}
enter link description hereThis the txt files that I have.
I would like to extract row number 30 and 63 from each txt file and save it into one txt file. How can I solve this in R ? This is the codes that I try to extract row number 30 and store it in one csv file. But it doesn't work. Could you please help ?
Thanks
You can try :
ListOfFileNames= list.files(path = "D:/in/",
pattern = '*.txt',recursive = TRUE, full.names = TRUE)
result <- do.call(rbind, lapply(ListOfFileNames, function(x)
read.csv(x)[c(30, 63), ]))
write.csv(result, 'D:/out/out.csv', row.names = FALSE)

R write.csv is creating an empty file

Some background for my question: This is an R script that a previous research assistant wrote, but he did not provide any guidance to me on using it for myself. After working through an R textbook, I attempted to use the code on my data files.
What this code is supposed to do is load multiple .csv files, delete certain items/columns from them, and then write the new cleaned .csv files to a specified directory.
Currently, the files are being created in the right directory with the right file name, but the .csv files that are being created are empty.
I am currently getting the following error message:
Warning in
fread(input = paste0("data/", str_match(pattern = "CAFAS|PECFAS",: Starting data input on line 2 and discarding line 1 because it has too few or too many items to be column names or data: (variable names).
This is my code:
library(data.table)
library(magrittr)
library(stringr)
# create a function to delete unnecessary variables from a CAFAS or PECFAS
data set and save the reduced copy
del.items <- function(file){
data <- fread(input = paste0("data/", str_match(pattern = "CAFAS|PECFAS",
string = file) %>% tolower, "/raw/", file), sep = ",", header = TRUE,
na.strings = "", stringsAsFactors = FALSE, skip = 0, colClasses =
"character", data.table = FALSE)
data <- data[-grep(pattern = "^(CA|PEC)FAS_E[0-9]+(TR?(Initial|[0-
9]+|Exit)|SP[a-z])_(G|S|Item)[0-9]+$", x = names(data))]
write.csv(data, file = paste0("data/", str_match(pattern = "CAFAS|PECFAS",
string = file) %>% tolower, "/items-del/", sub(pattern = "ExportData_", x =
file, replacement = "")) %>% tolower, row.names = FALSE)
}
# delete items from all cafas data sets
cafas.files <- list.files("data/cafas/raw", pattern = ".csv")
for (file in cafas.files){
del.items(file)
}
# delete items from all pecfas data sets
pecfas.files <- list.files("data/pecfas/raw", pattern = ".csv")
for (file in pecfas.files){
del.items(file)
}

create a loop: convert .txt to .csv in R

I try to convert all my .txt files in .csv, but I didn't manage to create the loop.
The actual line for one file (which works perfectly) would be the following:
tab = read.delim("name_file", header = TRUE, skip = 11)
write.table(tab, file="name_file.csv",sep=",",col.names=TRUE,row.names=FALSE)
And I would like to do that for all the .txt file I have in wd.
I tried the loop with, based on some reasearch on the web, but I am not sure it's the right one:
FILES = list.files(pattern = ".txt")
for (i in 1:length(FILES)) {
FILES = read.csv(file = FILES[i], header = TRUE, skip = 11, fill = TRUE)
write.csv(FILES, file = paste0(sub("folder_name", ".txt","", FILES[i]), ".csv"))
}
I'm on Windows system.
I would appreciate some help... Thanks!
Hi I have the same problem before just like you, and now I made it works. Try this:
directory <- "put_your_txt_directory_here"
ndirectory <- "put_your_csv_directory_here"
file_name <- list.files(directory, pattern = ".txt")
files.to.read <- paste(directory, file_name, sep="/")
files.to.write <- paste(ndirectory, paste0(sub(".txt","", file_name),".csv"), sep="/")
for (i in 1:length(files.to.read)) {
temp <- (read.csv(files.to.read[i], header = TRUE, skip = 11, fill = TRUE))
write.csv(temp, file = files.to.write[i])
}
You need to index the output inside the loop as well. Try this:
INFILES = list.files(pattern = ".txt")
OUTFILES = vector(mode = "character", length = length(INFILES))
for (i in 1:length(INFILES)) {
OUTFILES[i] = read.csv(file = INFILES[i], header = TRUE, skip = 11,
fill = TRUE)
write.csv(OUTFILES[i], file = paste0("folder_name", sub(".txt","", INFILES[i]), ".csv"))
}
Assuming that your input files always have at least 11 rows (since you skip the first 11 rows!) this should work:
filelist = list.files(pattern = ".txt")
for (i in 1:length(filelist)) {
cur.input.file <- filelist[i]
cur.output.file <- paste0(cur.input.file, ".csv")
print(paste("Processing the file:", cur.input.file))
# If the input file has less than 11 rows you will reveive the error message:
# "Error in read.table: no lines available in input")
data = read.delim(cur.input.file, header = TRUE, skip = 11)
write.table(data, file=cur.output.file, sep=",", col.names=TRUE, row.names=FALSE)
}
If you reveive any error during file conversion it is caused by the content (e. g. unequal number of rows per column, unequal number of columns etc.).
PS: Using a for loop is OK here since it does not limit the performance (there is no "vectorized" logic to read and write files).

Resources