setwd("C:\\Users\\...\\Documents\\Main\\eml orders")
files <- list.files(pattern="*.eml")
newfiles <- gsub(".eml$", ".txt", files)
file.rename(files, newfiles)
eml_files <- list.files(pattern = "txt$")
I have this code to convert .eml into .txt files now I want to rename the same files into a string that i make with a function.
Example of function
fetch_date <- function(x) {
date <- paste0(as.character(Sys.time()), ".txt")
file.rename(x, date)
}
Now I try map(eml_files, fetch_date)
And get this error:
cannot rename file '24 New order placed.txt' to '2020-11-14', reason 'The network path was not found'
No clue what's happening any help would be appreciated.
Be aware that your fetch_date function only outputs one string (Sys.date()). It tries to name several .txt objects with the same name. On my mac, this results in the last files getting kept while the others are overwritten. Perhaps you use Windows and there is another default behavior when files are overwritten?
eml_files <- list.files(pattern = "txt$")
fetch_date <- function(x) {
date <- paste0(as.character(Sys.time()), ".txt")
file.rename(x, date)
}
map(eml_files, fetch_date)
Related
I want to use a defined function morphon an entire folder /folder of .txt files and afterwards save all the new files to another folder /folder2.
How can I use lapply to cycle through every file and return another file that can be saved to the new folder? If possible retaining the old name as well!
Currently a list of multiple data.frames is created, none being named.
Code is something I came up with, but doesn't work as intended.
old.files <- list.files(path="/Users/F/folder/", pattern="*.txt", full.names=TRUE, recursive=FALSE)
new.files <- paste0("/Users/F/folder2/New_Profile_",1:length(old.files),".txt")
morph <- function (x){
tx0 <- read.table(x,row.names = NULL,col.names= c("Time","Stage"), skip=7, stringsAsFactors = F)
tx1<- as.data.frame(ifelse(tx0 == "Wach",0,
ifelse(tx0=="N1",2,
ifelse(tx0=="N2",1,
ifelse(tx0=="N3",3,
ifelse(tx0=="N4",4,
ifelse(tx0=="Rem",5,
ifelse(tx0=="Bewegung",0,
ifelse(tx0=="A",0,
tx0[tx0==""]<-tx0$Time)))))))))
mutate(tx1, Epoch=1:n())
}
files.list <- as.list(lapply (old.files, morph))
file.copy(from = files.list,to=new.files)
If we need to save the files
out <- lapply(old.files, morph)
lapply(seq_along(out), function(i) write.table(out[[i]], file = new.files[i]))
I need to shape the data stored in Excel files and save it as new .csv files. I figured out what specific actions should be done, but can't understand how to use lapply.
All Excell files have the same structure. Each of the .csv files should have the name of original files.
## the original actions successfully performed on a single file
library(readxl)
library("reshape2")
DataSource <- read_excel("File1.xlsx", sheet = "Sheet10")
DataShaped <- melt(subset(DataSource [-(1),], select = - c(ng)), id.vars = c ("itemname","week"))
write.csv2(DataShaped, "C:/Users/Ol/Desktop/Meta/File1.csv")
## my attempt to apply to the rest of the files in the directory
lapply(Files, function (i){write.csv2((melt(subset(read_excel(i,sheet = "Sheet10")[-(1),], select = - c(ng)), id.vars = c ("itemname","week"))))})
R returns the result to the console but doesn't create any files. The result resembles .csv structure.
Could anybody explain what I am doing wrong? I'm new to R, I would be really grateful for the help
Answer
Thanks to the prompt answer from #Parfait the code is working! So glad. Here it is:
library(readxl)
library(reshape2)
Files <- list.files(full.names = TRUE)
lapply(Files, function(i) {
write.csv2(
melt(subset(read_excel(i, sheet = "Decomp_Val")[-(1),],
select = -c(ng)),id.vars = c("itemname","week")),
file = paste0(sub(".xlsx", ".csv",i)))
})
It reads an Excel file in the directory, drops first row (but headers) and the column named "ng", melts the data by labels "itemname" and "week", writes the result as a .csv to the working directory attributing the name of the original file. And then - rinse and repeat.
Simply pass an actual file path to write.csv2. Otherwise, as denoted in docs ?write.csv, the default value for file argument is empty string "" :
file: either a character string naming a file or a connection open for writing. "" indicates output to the console.
Below concatenates the Excel file stem to the specified path directory with .csv extension:
path <- "C:/Users/Ol/Desktop/Meta/"
lapply(Files, function (i){
write.csv2(
melt(subset(read_excel(i, sheet = "Sheet10")[-(1),],
select = -c(ng)),
id.vars = c("itemname","week")),
file = paste0(path, sub(".xlsx", ".csv", i))
)
})
I'm trying to rename all files in a folder (about 7,000 files) files with just a portion of their original name.
The initial fip code is a 4 or 5 digit code that identifies counties, and is different for every file in the folder. The rest of the name in the original files is the state_county_lat_lon of every file.
For example:
Original name:
"5081_Illinois_Jefferson_-88.9255_38.3024_-88.75_38.25.wth"
"7083_Illinois_Jersey_-90.3424_39.0953_-90.25_39.25.wth"
"11085_Illinois_Jo_Daviess_-90.196_42.3686_-90.25_42.25.wth"
"13087_Illinois_Johnson_-88.8788_37.4559_-88.75_37.25.wth"
"17089_Illinois_Kane_-88.4342_41.9418_-88.25_41.75.wth"
And I need it to rename with just the initial code (fips):
"5081.wth"
"7083.wth"
"11085.wth"
"13087.wth"
"17089.wth"
I've tried by using the list.files and file.rename functions, but I do not know how to identify the code name out of he full name. Some kind of a "wildcard" could work, but don't know how to apply those properly because they all have the same pattern but differ in content.
This is what I've tried this far:
setwd("C:/Users/xxx")
Files <- list.files(path = "C:/Users/xxx", pattern = "fips_*.wth" all.files = TRUE)
newName <- paste("fips",".wth", sep = "")
for (x in length(Files)) {
file.rename(nFiles,newName)}
I've also tried with the "sub" function as follows:
setwd("C:/Users/xxxx")
Files <- list.files(path = "C:/Users/xxxx", all.files = TRUE)
for (x in length(Files)) {
sub("_*", ".wth", Files)}
but get Error in as.character(x) :
cannot coerce type 'closure' to vector of type 'character'
OR
setwd("C:/Users/xxxx")
Files <- list.files(path = "C:/Users/xxxx", all.files = TRUE)
for (x in length(Files)) {
sub("^(\\d+)_.*", "\\1.wth", file)}
Which runs without errors but does nothing to the names in the file.
I could use any help.
Thanks
Here is my example.
Preparation for data to use;
dir.create("test_dir")
data_sets <- c("5081_Illinois_Jefferson_-88.9255_38.3024_-88.75_38.25.wth",
"7083_Illinois_Jersey_-90.3424_39.0953_-90.25_39.25.wth",
"11085_Illinois_Jo_Daviess_-90.196_42.3686_-90.25_42.25.wth",
"13087_Illinois_Johnson_-88.8788_37.4559_-88.75_37.25.wth",
"17089_Illinois_Kane_-88.4342_41.9418_-88.25_41.75.wth")
setwd("test_dir")
file.create(data_sets)
Rename the files;
Files <- list.files(all.files = TRUE, pattern = ".wth")
newName <- sub("^(\\d+)_.*", "\\1.wth", Files)
file.rename(Files, newName)
I would like to make a script that reads data from the correct folder. I have several lines in my code refering to the foldername, therefore I would like to make this dynamic. Is it possible to make the reference to a folder name dynamic? See below what I would like to do
# Clarifies the name of the folder, afterwards "Foldername" will be used as reference
FolderA <- Foldername
# Read csv to import the data from the selected location
data1 <- read.csv(file="c:/R/Foldername/datafile1.csv", header=TRUE, sep=",")
data2 <- read.csv(file="c:/R/Foldername/datafile2.csv", header=TRUE, sep=",")
I am trying to get the same result as what I would get with this code:
data1 <- read.csv(file="c:/R/FolderA/datafile1.csv", header=TRUE, sep=",")
data2 <- read.csv(file="c:/R/FolderA/datafile2.csv", header=TRUE, sep=",")
Can somebody please clarify how it would be possible to make this dynamic?
You could use paste0 for this:
FolderA <- "Foldername"
paste0("c:/R/", FolderA, "/datafile1.csv")
#[1] "c:/R/Foldername/datafile1.csv"
So in your case:
data1 <- read.csv(file=paste0("c:/R/", FolderA, "/datafile1.csv"), header=TRUE, sep=",")
A slight generalization of #LyzandeR's answer,
make_files <- function(directory, filenames) {
sprintf("C:/R/%s/%s", directory, filenames)
}
##
Files <- sprintf("file%i.csv", 1:3)
##
make_files("FolderA", Files)
#[1] "C:/R/FolderA/file1.csv" "C:/R/FolderA/file2.csv" "C:/R/FolderA/file3.csv"
you could also try the following method. The loop will create a list with output file, but if your files all have the same column names you could just rbind them together (method 2). This method will allow you to specify your folder, then use the list.files function to extract all files with extension ".csv". This way if you have many csv files in a folder you won't have to write them all out individually.
# Specify working directory or location of files:
FolderA = "c:/R/Foldername"
# identify all files with specific extension:
files = list.files(FolderA,pattern="*.csv")
Method 1 - Separate by lists
data = NULL
for(i in 1:length(files)){
data[[i]] = read.csv(files[i],header=F,stringsAsFactors=F)
}
Method 2 - single dataframe
data = NULL
for(i in 1:length(files)){
df = read.csv(files[i],header=F,stringsAsFactors=F)
data = rbind(data,df)
}
I need to automate R to read a csv datafile that's into a zip file.
For example, I would type:
read.zip(file = "myfile.zip")
And internally, what would be done is:
Unzip myfile.zip to a temporary folder
Read the only file contained on it using read.csv
If there is more than one file into the zip file, an error is thrown.
My problem is to get the name of the file contained into the zip file, in orded to provide it do the read.csv command. Does anyone know how to do it?
UPDATE
Here's the function I wrote based on #Paul answer:
read.zip <- function(zipfile, row.names=NULL, dec=".") {
# Create a name for the dir where we'll unzip
zipdir <- tempfile()
# Create the dir using that name
dir.create(zipdir)
# Unzip the file into the dir
unzip(zipfile, exdir=zipdir)
# Get the files into the dir
files <- list.files(zipdir)
# Throw an error if there's more than one
if(length(files)>1) stop("More than one data file inside zip")
# Get the full name of the file
file <- paste(zipdir, files[1], sep="/")
# Read the file
read.csv(file, row.names, dec)
}
Since I'll be working with more files inside the tempdir(), I created a new dir inside it, so I don't get confused with the files. I hope it may be useful!
Another solution using unz:
read.zip <- function(file, ...) {
zipFileInfo <- unzip(file, list=TRUE)
if(nrow(zipFileInfo) > 1)
stop("More than one data file inside zip")
else
read.csv(unz(file, as.character(zipFileInfo$Name)), ...)
}
You can use unzip to unzip the file. I just mention this as it is not clear from your question whether you knew that. In regard to reading the file. Once your extracted the file to a temporary dir (?tempdir), just use list.files to find the files that where dumped into the temporary directory. In your case this is just one file, the file you need. Reading it using read.csv is then quite straightforward:
l = list.files(temp_path)
read.csv(l[1])
assuming your tempdir location is stored in temp_path.
I found this thread as I was trying to automate reading multiple csv files from a zip. I adapted the solution to the broader case. I haven't tested it for weird filenames or the like, but this is what worked for me so I thought I'd share:
read.csv.zip <- function(zipfile, ...) {
# Create a name for the dir where we'll unzip
zipdir <- tempfile()
# Create the dir using that name
dir.create(zipdir)
# Unzip the file into the dir
unzip(zipfile, exdir=zipdir)
# Get a list of csv files in the dir
files <- list.files(zipdir)
files <- files[grep("\\.csv$", files)]
# Create a list of the imported csv files
csv.data <- sapply(files, function(f) {
fp <- file.path(zipdir, f)
return(read.csv(fp, ...))
})
return(csv.data)}
If you have zcat installed on your system (which is the case for linux, macos, and cygwin) you could also use:
zipfile<-"test.zip"
myData <- read.delim(pipe(paste("zcat", zipfile)))
This solution also has the advantage that no temporary files are created.
Here is an approach I am using that is based heavily on #Corned Beef Hash Map 's answer. Here are some of the changes I made:
My approach makes use of the data.table package's fread(), which
can be fast (generally, if it's zipped, sizes might be large, so you
stand to gain a lot of speed here!).
I also adjusted the output format so that it is a named list, where
each element of the list is named after the file. For me, this was a
very useful addition.
Instead of using regular expressions to sift through the files
grabbed by list.files, I make use of list.file()'s pattern
argument.
Finally, I by relying on fread() and by making pattern an
argument to which you could supply something like "" or NULL or
".", you can use this to read in many types of data files; in fact,
you can read in multiple types of at once (if your .zip contains
.csv, .txt in you want both, e.g.). If there are only some types of
files you want, you can specify the pattern to only use those, too.
Here is the actual function:
read.csv.zip <- function(zipfile, pattern="\\.csv$", ...){
# Create a name for the dir where we'll unzip
zipdir <- tempfile()
# Create the dir using that name
dir.create(zipdir)
# Unzip the file into the dir
unzip(zipfile, exdir=zipdir)
# Get a list of csv files in the dir
files <- list.files(zipdir, rec=TRUE, pattern=pattern)
# Create a list of the imported csv files
csv.data <- sapply(files,
function(f){
fp <- file.path(zipdir, f)
dat <- fread(fp, ...)
return(dat)
}
)
# Use csv names to name list elements
names(csv.data) <- basename(files)
# Return data
return(csv.data)
}
The following refines the above answers. FUN could be read.csv, cat, or anything you like, providing the first argument will accept a file path. E.g.
head(read.zip.url("http://www.cms.gov/Medicare/Coding/ICD9ProviderDiagnosticCodes/Downloads/ICD-9-CM-v32-master-descriptions.zip", filename = "CMS32_DESC_LONG_DX.txt"))
read.zip.url <- function(url, filename = NULL, FUN = readLines, ...) {
zipfile <- tempfile()
download.file(url = url, destfile = zipfile, quiet = TRUE)
zipdir <- tempfile()
dir.create(zipdir)
unzip(zipfile, exdir = zipdir) # files="" so extract all
files <- list.files(zipdir)
if (is.null(filename)) {
if (length(files) == 1) {
filename <- files
} else {
stop("multiple files in zip, but no filename specified: ", paste(files, collapse = ", "))
}
} else { # filename specified
stopifnot(length(filename) ==1)
stopifnot(filename %in% files)
}
file <- paste(zipdir, files[1], sep="/")
do.call(FUN, args = c(list(file.path(zipdir, filename)), list(...)))
}
Another approach that uses fread from the data.table package
fread.zip <- function(zipfile, ...) {
# Function reads data from a zipped csv file
# Uses fread from the data.table package
## Create the temporary directory or flush CSVs if it exists already
if (!file.exists(tempdir())) {dir.create(tempdir())
} else {file.remove(list.files(tempdir(), full = T, pattern = "*.csv"))
}
## Unzip the file into the dir
unzip(zipfile, exdir=tempdir())
## Get path to file
file <- list.files(tempdir(), pattern = "*.csv", full.names = T)
## Throw an error if there's more than one
if(length(file)>1) stop("More than one data file inside zip")
## Read the file
fread(file,
na.strings = c(""), # read empty strings as NA
...
)
}
Based on the answer/update by #joão-daniel
unzipped file location
outDir<-"~/Documents/unzipFolder"
get all the zip files
zipF <- list.files(path = "~/Documents/", pattern = "*.zip", full.names = TRUE)
unzip all your files
purrr::map(.x = zipF, .f = unzip, exdir = outDir)
I just wrote a function based on top read.zip that may help...
read.zip <- function(zipfile, internalfile=NA, read.function=read.delim, verbose=TRUE, ...) {
# function based on http://stackoverflow.com/questions/8986818/automate-zip-file-reading-in-r
# check the files within zip
unzfiles <- unzip(zipfile, list=TRUE)
if (is.na(internalfile) || is.numeric(internalfile)) {
internalfile <- unzfiles$Name[ifelse(is.na(internalfile),1,internalfile[1])]
}
# Create a name for the dir where we'll unzip
zipdir <- tempfile()
# Create the dir using that name
if (verbose) catf("Directory created:",zipdir,"\n")
dir.create(zipdir)
# Unzip the file into the dir
if (verbose) catf("Unzipping file:",internalfile,"...")
unzip(zipfile, file=internalfile, exdir=zipdir)
if (verbose) catf("Done!\n")
# Get the full name of the file
file <- paste(zipdir, internalfile, sep="/")
if (verbose)
on.exit({
catf("Done!\nRemoving temporal files:",file,".\n")
file.remove(file)
file.remove(zipdir)
})
else
on.exit({file.remove(file); file.remove(zipdir);})
# Read the file
if (verbose) catf("Reading File...")
read.function(file, ...)
}