combine results from a loop in one file - r

I create a loop like this one:
for (p in 1:nrow(outcomes)) {
id <- apply(regulationtable, 1, function(i)
sum(i[1:length(regulationtable)] != outcomes[p,])==0)
idd <- as.matrix(id)
test2 = subset(idd, idd[,1]==TRUE)
result <- as.data.frame(rownames(test2))
filename = paste("file", p, ".txt")
write.table(result, filename)
}
The results of every loop will be saved as a file. I want to combine this results and create one file with all the results.
Can anyone help me with this?

With the append argument in write.table you can add lines to an existing file rather than overwriting them:
if (p == 1)
{
write.table(result, "file.txt")
} else
{
write.table(result, "file.txt", append = TRUE, col.names = FALSE)
}
Is this what you mean?
EDIT: You might want the first run to initialize it and not append, then each other run to not print the column names ( I do assume these are the same for each table).

Related

Dynamic files name load and processing with for loop

I'm processing some .xlsx, there are named like time1_drug1,time1_drug2,until tiume6_drug5 (30 files in total). I want to load these xlsx to R and name them to dataset such as t1d1, t2d2.
I tried to use sprintf, but I cannot figure out how to make valid.
for(i in 1:6) {
for(j in 1:5) {
sprintf("time%i","drug%j,i,j)=read.xlsx("/Users/pathway/dataset/time_sprintf(%i,i)_drug(%j,j).xlsx", 1)}
names(sprintf("t%i","d%j,i,j))=c("result", "testF","TestN")
sprintf("t%i","d%j,i,j)$Discription[which(sprintf("t%i","d%j,i,j)$testF>=1&sprintf("t%i","d%j,i,j)$TestN>=2)]="High+High"
}
}
I expect to get 30 data like t1d1 till t6d5.
You should (almost) never use assign. When reading multiple files into R you should (almost) always put them in a named list.
A rough outline of a much better approach is this:
# Put all the excel files in a directory and this retrieves all their paths
f <- dir("/Users/pathway/dataset/",full.names = TRUE)
# Read all files into a list
drug_time <- lapply(X = f,FUN = read.xlsx)
# Name each list element based on the file name
names(drug_time) <- gsub(pattern = ".xlsx",replacement = "",x = basename(f),fixed = TRUE)
You can use the for loop as you are, but you should also use the assign function:
for(i in 1:6){
for(j in 1:5){
assign(paste0('t', i, '_', 'd', j), read.xlsx(paste0("/Users/pathway/dataset/time_",i,"_drug",j,".xlsx"), 1))
}
}

Looping and extracting file numbers from .csv file names and inserting into a column

I have managed to combined the data from several files and currently trying to extract a file number from my files and insert these into a column.
fnames = dir("../data/temperature_trials", full.names=TRUE)
print(fnames)
for (i in 1: length(fnames) ) {
#open each file in turn
temp = read.csv(fnames[i])
if (i == 1) {
res = temp
} else {
res = rbind(res, temp)
}
}
```
Imported 12 .csv files and used rbind to combine all data.Files named:
Trial1.csv
Trial2.csv
.
.
.
Trial12.csv
```
for (i in 1: length(fnames)) {
loc = regexpr(pattern = "Trial[0-9]*", text = fnames[i])
trialNumber = as.numeric(substr(fnames[i], start = loc[[1]][1]+5,
stop = loc[[1]][1] + attr(loc, 'match.length')-1))
print(trialNumber)
res1 = cbind(trialNumber, res)
```
I am trying to extract the trial numbers from each .csv file name and place them into a column named TrialNumber. When I do so it will only place a 12 into this column for every data point. Since it is using a loop I am assuming this is why, but can not figure out how to fix this or another way to do so. I need to assign the trial number to each data point corresponding with each .csv file.
Maybe you can simply add Trial number during each iteration of the loop-
for (i in 1: length(fnames) ) {
#open each file in turn
temp = read.csv(fnames[i])
if (i == 1) {
res = temp
} else {
res = rbind(res, temp)
}
res$trial_number=i
}
This way you will have a trial number column which will correspond to the file which had been imported.
You can also try extracting the numeric part of the file name as pointed out in this answer-
Extract numeric part of strings of mixed numbers and characters in R
I'd create a list of data frames from the CSV files, using the file name as the basis for each list element name:
fnames <- list.files("full/path/to/data/temperature_trials",
pattern = "*.csv", full.names = TRUE)
temp <- lapply(fnames, read.csv)
names(temp) <- tools::file_path_sans_ext(basename(fnames))
Then dplyr::bind_rows() will create a dataframe from the list with the treatment label in the .id column:
library(dplyr)
temp_df <- bind_rows(temp, .id = "TrialNumber")

How to store results of a function?

I am trying to store the results of the function below,
I could only print the results but cannot save it as csv format. how can i save these results into a csv file?
Thanks in advance!!!
calEAD=function(loan, R, final, startdate, first_enddate,enddate){
I=loan*R
start=as.Date(startdate)
firstend=as.Date(first_enddate)
p=firstend-start
period=as.numeric(p)/365
EADabc=0
b=enddate-2017
for(i in (0:b)){
EADabc=I/((1+R)**(i+period))+EADabc
print(EADabc)}
}
calEAD1=calEAD(6690012.88,0.0588,6690012.88, '2016-12-31','2017-08-29',2022)
calEAD2=calEAD(385000.12,0.0588,385000.12, '2016-12-31','2017-09-11',2023)
It seems that you want to keep every intermediate result of EADabc inside the for loop. In that case you could concatenate them to a vector like this:
calEAD=function(loan, R, final, startdate, first_enddate,enddate){
I=loan*R
start=as.Date(startdate)
firstend=as.Date(first_enddate)
p=firstend-start
period=as.numeric(p)/365
EADabc=0
b=enddate-2017
# Define an empty vector
result = c()
for(i in (0:b)){
EADabc=I/((1+R)**(i+period))+EADabc
# Append the current result to the vector
result = c(result,EADabc)
}
# Make the function return this vector
result
}
Result:
calEAD1=calEAD(6690012.88,0.0588,6690012.88, '2016-12-31','2017-08-29',2022)
calEAD2=calEAD(385000.12,0.0588,385000.12, '2016-12-31','2017-09-11',2023)
> calEAD1
[1] 378809 736581 1074484 1393622 1695037 1979713
> calEAD2
[1] 21755.57 42302.95 61709.24 80037.81 97348.51 113697.87 129139.28
If you want to save them to a csv file, do:
write.csv(x = calEAD1, file = "test.csv")
You can use write.csv or the write.table functions.
for(i in (0:b))
{
EADabc=I/((1+R)**(i+period))+EADabc
write.table(EADabc, file = "file_name.csv",sep = ",", col.names = T, append =T)
}
Or
write.csv(EADabc, file = "file_name.csv", row.names = FALSE)

Combine csv files with common file identifier

I have a list of approximately 500 csv files each with a filename that consists of a six-digit number followed by a year (ex. 123456_2015.csv). I would like to append all files together that have the same six-digit number. I tried to implement the code suggested in this question:
Import and rbind multiple csv files with common name in R but I want the appended data to be saved as new csv files in the same directory as the original files are currently saved. I have also tried to implement the below code however the csv files produced from this contain no data.
rm(list=ls())
filenames <- list.files(path = "C:/Users/smithma/Desktop/PM25_test")
NAPS_ID <- gsub('.+?\\([0-9]{5,6}?)\\_.+?$', '\\1', filenames)
Unique_NAPS_ID <- unique(NAPS_ID)
n <- length(Unique_NAPS_ID)
for(j in 1:n){
curr_NAPS_ID <- as.character(Unique_NAPS_ID[j])
NAPS_ID_pattern <- paste(".+?\\_(", curr_NAPS_ID,"+?)\\_.+?$", sep = "" )
NAPS_filenames <- list.files(path = "C:/Users/smithma/Desktop/PM25_test", pattern = NAPS_ID_pattern)
write.csv(do.call("rbind", lapply(NAPS_filenames, read.csv, header = TRUE)),file = paste("C:/Users/smithma/Desktop/PM25_test/MERGED", "MERGED_", Unique_NAPS_ID[j], ".csv", sep = ""), row.names=FALSE)
}
Any help would be greatly appreciated.
Because you're not doing any data manipulation, you don't need to treat the files like tabular data. You only need to copy the file contents.
filenames <- list.files("C:/Users/smithma/Desktop/PM25_test", full.names = TRUE)
NAPS_ID <- substr(basename(filenames), 1, 6)
Unique_NAPS_ID <- unique(NAPS_ID)
for(curr_NAPS_ID in Unique_NAPS_ID){
NAPS_filenames <- filenames[startsWith(basename(filenames), curr_NAPS_ID)]
output_file <- paste0(
"C:/Users/nwerth/Desktop/PM25_test/MERGED_", curr_NAPS_ID, ".csv"
)
for (fname in NAPS_filenames) {
line_text <- readLines(fname)
# Write the header from the first file
if (fname == NAPS_filenames[1]) {
cat(line_text[1], '\n', sep = '', file = output_file)
}
# Append every line in the file except the header
line_text <- line_text[-1]
cat(line_text, file = output_file, sep = '\n', append = TRUE)
}
}
My changes:
list.files(..., full.names = TRUE) is usually the best way to go.
Because the digits appear at the start of the filenames, I suggest substr. It's easier to get an idea of what's going on when skimming the code.
Instead of looping over the indices of a vector, loop over the values. It's more succinct and less likely to cause problems if the vector's empty.
startsWith and endsWith are relatively new functions, and they're great.
You only care about copying lines, so just use readLines to get them in and cat to get them out.
You might consider something like this:
##will take the first 6 characters of each file name
six.digit.filenames <- substr(filenames, 1,6)
path <- "C:/Users/smithma/Desktop/PM25_test/"
unique.numbers <- unique(six.digit.filenames)
for(j in unique.numbers){
sub <- filenames[which(substr(filenames,1,6) == j)]
data.for.output <- c()
for(file in sub){
##now do your stuff with these files including read them in
data <- read.csv(paste0(path,file))
data.for.output <- rbind(data.for.output,data)
}
write.csv(data.for.output,paste0(path,j, '.csv'), row.names = F)
}

try to create new variable using loop in R,but failed

I am a new user to R.I have already imported all data from all my txt file using the code down below,but i want to create a new variable when importing data,the variable is called case.The value of case for the first row is 1 and for the rest is 0.
And when i try to run the code,the console did not say anytime wrong ,the data has been imported, but the new variable wasn't created.I don't know why.
for(i in Filenames){
perpos <- which(strsplit(i, "")[[1]]==".")
data=assign(
gsub(" ","",substr(i, 1, perpos-1)),
read.table(paste(filepath,i,sep=""),fill=TRUE,header=TRUE,quote ="",row.names = NULL,sep="\t")
)
strsplit(i, "")
filename = strsplit(as.character(i),"\\.txt")
data$case = ifelse(data$NAME=="filename",1,0)
}
Thanks guys! I used #joosts's code and made some ajustment. The code down below works just fine.
fn <- paste(filepath,Filenames,sep="")
mylist <- lapply(fn, read.table,fill = TRUE, header = TRUE, quote = "",row.names = NULL, sep = "\t",stringsAsFactors=FALSE)
for(i in 1:length(Filenames)){
mylist[[i]]<- cbind(mylist[[i]], case = 0)
if(nrow(mylist[[i]])>0) {
mylist[[i]]$case[1] <- 1
}
mylist[[i]]<- cbind(mylist[[i]], ID = i)
}
do.call(rbind, mylist)
I am assuming you want to read in multiple text files, with each file containing the same columns (in the same order). In order to combine multiple dataframes (the things that result from calling read.data()), you should call the function rbind().
And I assume your code to get a filename without the extension is slightly overcomplex...
for(file in filenames) {
sanitized_filename <- gsub(" ", "", strsplit(file, "\\.")[[1]][1])
file.frame <- read.table(paste(filepath, file, sep=""), fill = TRUE, header = TRUE, quote = "", row.names = NULL, sep = "\t")
file.frame <- cbind(file.frame, name = I(sanitized_filename), case = 0)
if(nrow(file.frame)>0) {
file.frame$case[1] <- 1
}
data <- ifelse(exists("data"), rbind(data, file.frame), file.frame)
}

Resources