I am trying to get size of files and other details from my directory however it is returning NA values for some files but it is returning details for other files. Below is the code i used. will there be any administrator settings for files to fetch these details??
library(tidyr)
library(dplyr)
wd <- "F:\\working\\others"
setwd(wd)
#get file list - your list of files would be different
fileList <- list.files()[1:240]
class(fileList)
#result
cbind(
file.info(fileList)[,c("size"), drop=FALSE],
x = as.character(file.mtime(fileList))) %>%
separate(x,
into = c("DateModified","TimeModified"),
sep=" ") %>%
add_rownames %>%
select(DateModified,
TimeModified,
Size=size,
FileName=rowname)
try this:
the trick is in the full.names = TRUE
ldir <- normalizePath("<type here the path of directory>")
finf <- file.info(dir(path = ldir, full.names = TRUE), extra_cols = FALSE)
View(finf)
Related
I have discovered R a couple of years ago and it has been very handy to clean up dataframes, prepare some data and to handle other basic tasks.
Now I would like to try using R to apply basic treatments but on many different files stored in different folders at once.
Here is the script I would like to improve into one function that would loop through my folder "dataset_2006" and "dataset_2007" to do all the work.
library(dplyr)
library(readr)
library(sf)
library(purrr)
setwd("C:/Users/Downloads/global_data/dataset_2006")
shp2006 <- list.files(pattern = 'data_2006.*\\.shp$', full.names = TRUE)
listOfShp <- lapply(shp2006, st_read)
combinedShp <- do.call(what = sf:::rbind.sf, args=listOfShp)
#import and merge CSV files into one data frame
folderfiles <- list.files(pattern = 'csv_2006_.*\\.csv$', full.names = TRUE)
csv_data <- folderfiles %>%
set_names() %>%
map_dfr(.f = read_delim,
delim = ";",
.id = "file_name")
new_shp_2006 <- merge(combinedShp, csv_data , by = "ID") %>% filter(label %in% c("AR45T", "GK879"))
st_write(new_shp_2006 , "new_shp_2006.shp", overwrite = TRUE)
setwd("C:/Users/Downloads/global_data/dataset_2007")
shp2007 <- list.files(pattern = 'data_2007.*\\.shp$', full.names = TRUE)
listOfShp <- lapply(shp2007, st_read)
combinedShp <- do.call(what = sf:::rbind.sf, args=listOfShp)
#import and merge CSV files into one data frame
folderfiles <- list.files(pattern = 'csv_2007_.*\\.csv$', full.names = TRUE)
csv_data <- folderfiles %>%
set_names() %>%
map_dfr(.f = read_delim,
delim = ";",
.id = "file_name")
new_shp_2007 <- merge(combinedShp, csv_data , by = "ID") %>% filter(label %in% c("AR45T", "GK879"))
st_write(new_shp_2007 , "new_shp_2007.shp", overwrite = TRUE)
This is easy to achieve with a for-loop to loop over multiple items. To allow us to use wildcards, we can also add the function Sys.glob():
myfunction <- function(directories) {
for(dir in Sys.glob(directories)) {
# do something with a single dir
print(dir)
}
}
# you can specify multiple directories manually:
myfunction(c('C:/Users/Downloads/global_data/dataset_2006',
'C:/Users/Downloads/global_data/dataset_2007'))
# or use a wildcard to automatically get all files/directories that match the pattern:
myfunction('C:/Users/Downloads/global_data/dataset_200*')
By using R programming I want to read files in folder. perform some operations on it, plot and save as csv1.
Read next file, perform same operations, plot and save the new modified dataframe in csv1 with rbind function. Remember I want 1 plot from all files I read in for loop and save plot as pdf.
Right now i am using following code but my system crash due to shortage of RAM
all_paths <-
list.files(path = "/work/newplots",
pattern = "*.*",
full.names = TRUE)
all_filenames <- all_paths %>%
basename() %>%
as.list()
all_content <-
all_paths %>%
lapply(read.table,
header = TRUE,
skip=60,
sep=',',
encoding = "UTF-8")
file <- data.frame()
for (i in 1:length(all_filenames)) {
all_lists <- mapply(c, all_content, i, SIMPLIFY = FALSE)
data <- rbindlist(all_lists, fill = T)
names(data)[1] <- "File.Path"
x1 <- data %>% select(V1) %>% unique()
data <- data %>% data.frame(str_split_fixed(data$File.Path, " ", 23))%>% select(-c(File.Path))%>% filter(X1=='Interactions')
data<- cbind(x1,data)
data <- data %>% select(-c(2)) %>%select(V1,X2)
data$X2 <-as.numeric(data$X2)
file <- write.table(data,"/work/con1_10.csv",row.names = FALSE)
file <- append(file,data)
p<-plot(data$X2, xlab="Cycle number",ylab="Interactions",type = "p")
print(p)
Z<- (2*data$X2)/20006
px<-plot(Z, xlab="Cycle number", ylab="Z")
print(px)
}
Is there any way to read most recent xlsx 2 files in a directory (in Windows System) in R and merge them to a single file?
The current code which I am using is pulling only the last file but I want last 2 files.
I have tried:
library(magrittr)
List <- list.files(path = "x", full.names = TRUE) %>%
magrittr::extract(which.max(file.mtime(.)))
We can sort and slice the last 2
library(dplyr)
files <- list.files(path = "x", full.names = TRUE, pattern = "\\.xlsx$") %>%
tibble(col1 = .) %>%
arrange(file.mtime(col1)) %>%
slice_tail(n = 2) %>%
pull(col1)
If we need to join those files
library(purrr)
library(readxl)
out <- map_dfr(files, ~ read_excel(path = .x, sheet = 1))
In R I wish to find the latest xlsx file in a folder and then import the data from that file. All files have the same format. I just keep getting blank. Please advise correct code.
CompanyFileNames <- file.info(list.files
(path = "Y:/...Data",
pattern = "*port.xlsx",
full.names = T))
CompanyFilelatest <- subset(CompanyFileNames, mtime == max(mtime))
CompanyFilelatest <- CompanyFilelatest[0]
Companymonthly <- sapply(CompanyFilelatest,
read_excel, simplify=FALSE)
%>% bind_rows(.id = "id")
write.csv(Companymonthly, "Companymonthly.csv")
What you need is the filepath of the latest file, which is stored as the rowname of CompanyFilelatest. Extract the file path with rownames() and then this should work.
CompanyFileNames <- file.info(list.files
(path = getwd(),
pattern = "*.xlsx",
full.names = T))
CompanyFilelatest <- subset(CompanyFileNames, mtime == max(mtime))
CompanyFilelatest <- rownames(CompanyFilelatest) # use rownames not subseting with 0
Companymonthly <- sapply(CompanyFilelatest,
read_excel, simplify=FALSE) %>% bind_rows(.id = "id")
write.csv(Companymonthly, "Companymonthly.csv")
I have a multiple folder with files name in numeric (12345.in). I am trying to write a function which will list the nearest file if the file in the command is not in the folder
soili=371039 #this is the file name
Getmapunit <- function(soili){
soilfile=list.files(pattern = paste0(soili,".in"), recursive = TRUE)
if (length(soilfile)==0){
soilfile=list.files(pattern = paste0(soili+1,".in"), recursive = TRUE)
}
soilfile
}
soilfile=Getmapunit(soili)
#i want to extract the file name closest to 371039, i was able to write function to get file name with next number
I would try to extract the number of each file and check for the nearest value:
library(magrittr)
library(stringr)
soili <- 371039
# get all files in the specific folder
files <- list.files(path = "file folder", full.names = F)
# extract number of each file and turn it into an integer
numbers <- str_extract(files, ".*(?=.in") %>% as.integer()
# get the number of the nearest file
nearest_file <- numbers[which.min(abs(soili - numbers)]
# turn it into a filename
paste0(as.character(nearest_file), ".in")
You can also put everything into one pipe:
soili <- 371039
nearest_file <- list.files(path = "file folder", full.names = F) %>%
str_extract(files, ".*(?=.in") %>%
as.integer() %>%
.[which.min(abs(soili - .)] %>%
paste0(as.character(nearest_file), ".in")
Of course, you can also translate this approach into a function.
Edit:
If you have all the files in different folders, you can use this approach:
soili <- 371039
files <- list.files(path = "highest_file_folder", full.names = T)
nearest_file <- files %>%
str_extract(., "[^/]*$") %>%
str_extract(".*(?=.in)") %>%
as.integer() %>%
.[which.min(abs(soili - .)] %>%
paste0(as.character(nearest_file), ".in")
# getting filepath with nearest_file out of the files vector
files[str_detect(files, nearest_file)]
# little example
files <- c("./folder1/12345.in", "./folder2/56789.in") %>%
str_extract(., "[^/]*$") %>%
str_extract(.,".*(?=.in)")