Please accept my apologies as I am new to R. The following code is used to process multiple files in one go and placing the output in a separate folder.
require(highfrequency)
require(quantmod)
require(readxl)
input_files1=list("file_path1.xlsx","file_path2.xlsx","file_path3.xlsx") #making list of file paths
for(i in length(input_files1))
{
bid_df<-read_excel(input_files1[i], sheet = 1, col_names = TRUE, col_types = NULL, na = "", skip = 0)
#read_excel takes file path as first argument
ask_df<-read_excel(input_files1[i], sheet = 2, col_names = TRUE, col_types = NULL, na = "", skip = 0)
trade_df<-read_excel(input_files1[i], sheet = 3, col_names = TRUE, col_types = NULL, na = "", skip = 0)
qdata_df <- merge(ask_df, bid_df, by = "TIMESTAMP")
qdata_xts_raw<-xts(qdata_df[,-1], order.by=qdata_df[,1])
qdata_xts_m<-mergeQuotesSameTimestamp(qdata_xts_raw, selection = "median")
trade_xts_raw <- xts(trade_df[,-1], order.by=trade_df[,1])
trade_xts_m<-mergeTradesSameTimestamp(trade_xts_raw, selection = "median")
tqdata=matchTradesQuotes(trade_xts_m,qdata_xts_m)
quoted_spread<-tqLiquidity(tqdata,trade_xts_m,qdata_xts_m,type="qs")
qs_30<-aggregatets(quoted_spread,FUN="mean",on="minutes",k=30)
indexTZ(qs_30) <- "UTC"
write.csv(qs_30, file = file.path("output_file_path", paste0("CAN_out", i)))
}
When the code is run, it gives the following error
Error in file.exists(path) : invalid 'file' argument
Please help in removing the error and running the code.
Access elements of the list in the path as input_files1[[i]]
Related
I'm working on functions that can take the chracter string argument GSE_expt. I have written 4 separate functions which take the argument GSE_expt and produce the output that I am able to save as a variable in the R environment.
The code block below has 2 of those functions. I use paste0 function with the variable GSE_expt to create a file name that the here and rio packages can use to import the file.
# Extracting metadata from 2 different sources and combining them into a single file
extract_metadata <- function(GSE_expt){
GSE_expt <- deparse(substitute(GSE_expt)) # make sure it is a character string
metadata_1 <- rnaseq_metadata_allsamples %>% # subset a larger metadata file
as_tibble %>%
dplyr::filter(GSE == GSE_expt)
# metadata from ENA imported using rio and here packages
metadata_2 <- import(here("metadata", "rnaseq", paste0(GSE_expt, ".txt"))) %>%
as_tibble %>%
select("run_accession","library_layout", "library_strategy","library_source","read_count", "base_count", "sample_alias", "fastq_md5")
metadata <- full_join(metadata_1, metadata_2, by = c("Run"="run_accession"))
return(metadata)
}
# Extracting coverage stats obtained from samtools
clean_extract_coverage <- function(GSE_expt){
coverage <- read_tsv(file = here("results","rnaseq","2022-01-11", "coverage", paste0("coverage_stats_", deparse(substitute(GSE_expt)), "_percent.txt")), col_names = FALSE)
coverage <- data.frame("Run" = coverage$X1[c(TRUE, FALSE)],
"stats" = coverage$X1[c(FALSE, TRUE)])
coverage <- separate(coverage, stats, into = c("num_reads", "covered_bases", "coverage_percent"), convert = TRUE)
return(coverage)
}
The functions work fine on their own individually when I use GSE118008 as the variable for the argument GSE_expt.
I am trying to create a nested/combined function so that I can run GSE118008 on both (or more) functions at the same time and save the output as a list.
When I ran a nested/combined function,
extract_coverage_metadata <- function(GSE_expt){
coverage <- clean_extract_coverage(GSE_expt)
metadata <- extract_metadata(GSE_expt)
return(metadata)
}
extract_coverage_metadata(GSE118008)
This is the error message I got.
Error: 'results/rnaseq/2022-01-11/coverage/coverage_stats_GSE_expt_percent.txt' does not exist.
Rather than creating a filename
coverage_stats_GSE118008_percent.txt
(which it does fine with the individual function), it is unable to do so in this combined function, and instead returns the filename coverage_stats_GSE_expt_percent.txt
Traceback
8. stop("'", path, "' does not exist", if (!is_absolute_path(path)) { paste0(" in current working directory ('", getwd(), "')") }, ".", call. = FALSE)
7. check_path(path)
6. (function (path, write = FALSE) { if (is.raw(path)) { return(rawConnection(path, "rb")) ...
5. vroom_(file, delim = delim %||% col_types$delim, col_names = col_names, col_types = col_types, id = id, skip = skip, col_select = col_select, name_repair = .name_repair, na = na, quote = quote, trim_ws = trim_ws, escape_double = escape_double, escape_backslash = escape_backslash, ...
4. vroom::vroom(file, delim = "\t", col_names = col_names, col_types = col_types, col_select = { { col_select ...
3. read_tsv(file = here("results", "rnaseq", "2022-01-11", "coverage", paste0("coverage_stats_", deparse(substitute(GSE_expt)), "_percent.txt")), col_names = FALSE) at rnaseq_functions.R#30
2. clean_extract_coverage(GSE_expt)
1. extract_coverage_metadata(GSE118008)
I would appreciate any recommendations on how to solve this.
Thanks in advance!
Husain
So i have a wrapper function that contains a lot of sub functions. Rather than write out arguments for all the potential arguments for each sub functions in the wrapper. I want to use ... (dots) to allow them to pass through any number of arguments to change the behaviour of sub functions if necessary.
The problem is that several functions may wish to make use of different arguments ... and i keep getting unused argument errors.
So I've tried to use do.call and to update the formals outputs with matches from ... see below code for function
elipRead <- function( type, path, ...){
if(type == "csv"){
ar <- list(...)
args <- formals(readr::read_csv)
args$file <- path
args[which(names(args) %in% names(ar))] <- ar[na.omit(match(names(args), names(ar)))]
out <- do.call(readr::read_csv, args = args)
} else {
ar <- list(...)
args <- formals(readxl::read_xlsx)
args$path <- path
args[which(names(args) %in% names(ar))] <- ar[na.omit(match(names(args), names(ar)))]
out <- do.call(readxl::read_xlsx, args)
}
return(out)
}
However, despite checking the args list is updating correctly i still get errors
csv <-"csv_Filename.csv"
test1 <- elipRead("csv", paste0(getwd(),csv), sheet = "Sheet1" , col_names = FALSE)
# Error in default_locale() : could not find function "default_locale"
xlsx <-"xlsx_Filename.xlsx"
test2 <- elipRead("xlsx", paste0(getwd(),xlsx), sheet = "Sheet1", col_names = TRUE)
# Error: `guess_max` must be a positive integer
for the xlsx attempt the error is in the guess_max default where it cannot find
n_max object. I assume this is to do with do.call envir and n_max not being in the parent environment. For the csv issue again its an issue of not being able to find the default_local() function.
Error in check_non_negative_integer(guess_max, "guess_max") :
object 'n_max' not found
6.
check_non_negative_integer(guess_max, "guess_max")
5.
check_guess_max(guess_max)
4.
read_excel_(path = path, sheet = sheet, range = range, col_names = col_names,
col_types = col_types, na = na, trim_ws = trim_ws, skip = skip,
n_max = n_max, guess_max = guess_max, progress = progress,
.name_repair = .name_repair, format = "xlsx")
3.
(function (path, sheet = NULL, range = NULL, col_names = TRUE,
col_types = NULL, na = "", trim_ws = TRUE, skip = 0, n_max = Inf,
guess_max = min(1000, n_max), progress = readxl_progress(),
.name_repair = "unique") ...
2.
do.call(readxl::read_xlsx, args)
1.
elipRead("xlsx", paste0(add, xlsx), sheet = "Sheet1", col_names = TRUE)
In the end there are three potential answers i'm hoping for:
1 recommendations of changes to my current code to ensure the do.call function works.
2 An alternative method for using ... to only pass the relevant arguments from the ... dots list to a function.
3 An completely different approach for passing arguments from a wrapper to internal functions.
I'm unable to write tweets from search_tweet() in 'rtweet' package to csv. It throws the following error:
Here's a link to the question I previously asked, that has details on the type of search_tweet() object creates: Class and type of object is different in R. How should I make it consistent?
How should I write this files as csv?
library(rtweet)
comments <- search_tweets(
queryString, include_rts = FALSE,
n = 18000, type = "recent",
retryonratelimit = FALSE)
write_csv(comments, "comments.csv", append = TRUE)
Error: Error in stream_delim_(df, path, ..., bom = bom, quote_escape = quote_escape) :
Don't know how to handle vector of type list.
class(comments)
"tbl_df" "tbl" "data.frame"
screen grab of comments
The rtweet package has a function to export to CSV called write_as_csv but for some reason does not expose the append= option. You can take the code of that function and change it to add an append option. For example
write_as_csv2 <- function(x, file_name,
prepend_ids = TRUE,
na = "",
fileEncoding = "UTF-8", append=FALSE) {
## to minimize rounding
op <- options()
on.exit(options(op))
options(scipen = 14, digits = 22)
## validate inputs
stopifnot(is.data.frame(x), is.character(file_name), length(file_name) == 1L)
if (!grepl("\\.csv$", file_name)) {
file_name <- paste0(file_name, ".csv")
}
## flatten data
x <- flatten(x)
if (prepend_ids) {
x <- prepend_ids(x)
}
utils::write.table(x, file_name, row.names = FALSE, na = na,
fileEncoding = fileEncoding, append=append, sep=",", dec=".", qmethod="double")
# or
# readr::write_csv(x, file_name, append = append)
}
environment(write_as_csv2) <- asNamespace("rtweet")
Then you can call it like
write_as_csv2(comments, "comments.csv", append = TRUE)
Tested with a simple data frame as input, i think it might be the Excel file itself.
workbook <- loadWorkbook(filepath, xlsxFile = NULL, isUnzipped = FALSE)
writeData(workbook, "Test1", al, startCol = 2, startRow = 5, colNames = FALSE)
saveWorkbook(workbook, filepath, overwrite = T)
But I get this :
Error in comment_list[[i]]$style[[j]] : subscript out of bounds
Apparantly, openxslx cannot handle comments. Deleting all comments in my excel file resolved the problem.
Closed. This question needs details or clarity. It is not currently accepting answers.
Want to improve this question? Add details and clarify the problem by editing this post.
Closed 7 years ago.
Improve this question
When I run the following code line by by, everything is fine except when the cusrsor goes to do.call.
require(highfrequency)
require(quantmod)
require(readxl)
require(xlsx)
setwd("file_path")
input_files=list(list.files(path="file_path", recursive=T, pattern='.xlsx'))
processLIQ <- function(input_files)
{
#reading bid data and making df object of it
bid_df<-read_excel(input_files, sheet = 1, col_names = TRUE, col_types = NULL, na = "", skip = 0)
#bid_df$TIMESTAMP<-as.POSIXct(bid_df$TIMESTAMP, format="%H:%M:%S")
#reading ask data and making df object of it
ask_df<-read_excel(input_files, sheet = 2, col_names = TRUE, col_types = NULL, na = "", skip = 0)
#merging df objects of bid and ask directly and making xts object of qdata
qdata_df <- merge(ask_df, bid_df, by = "TIMESTAMP")
str(qdata_df)
qdata_xts_raw<-xts(qdata_df[,-1], order.by=qdata_df[,1])
str(qdata_xts_raw)
#Merge multiple quote entries with multiple timestamp
qdata_xts_m<-mergeQuotesSameTimestamp(qdata_xts_raw, selection = "median")
str(qdata_xts_m)
#reading trade data and making xts object of it
trade_df<-read_excel(input_files, sheet = 3, col_names = TRUE, col_types = NULL, na = "", skip = 0)
str(trade_df)
trade_xts_raw <- xts(trade_df[,-1], order.by=trade_df[,1])
#Merge multiple trade entries with multiple timestamp
trade_xts_m<-mergeTradesSameTimestamp(trade_xts_raw, selection = "median")
str(trade_xts_m)
#Matching Trade and Quotes
tqdata=matchTradesQuotes(trade_xts_m,qdata_xts_m)
#liquidity computation
#Quoted Spread(1)
quoted_spread<-tqLiquidity(tqdata,trade_xts_m,qdata_xts_m,type="qs")
qs_30<-aggregatets(quoted_spread,FUN="mean",on="minutes",k=30)
indexTZ(qs_30) <- "UTC"
Canara_out_xts<-merge(qs_30,pqs_30,log_qs_30,es_30,depth_xts_30,Rupee_depth_xts_30,log_returns_30,volume_30)
indexTZ(Canara_out_xts) <- "UTC"
write.xlsx(Canara_out_xts, file = file.path("output_file_path", paste0("CAN_test6", i,".xlsx")))
}
do.call(processLIQ, input_files)
The error is
Error in switch(ext, xls = "xls", xlsx = "xlsx", xlsm = "xlsx", stop("Unknown format .", :
EXPR must be a length 1 vector
In addition: Warning message:
In if (!file.exists(path)) { :
the condition has length > 1 and only the first element will be used
Browse in console is opened along with Source viewer which has the code:
function (path, sheet = 1, col_names = TRUE, col_types = NULL,
na = "", skip = 0)
{
path <- check_file(path)
ext <- tolower(tools::file_ext(path))
switch(excel_format(path), xls = read_xls(path, sheet, col_names,
col_types, na, skip), xlsx = read_xlsx(path, sheet, col_names,
col_types, na, skip))
}
Kindly help in resolving this issue.
The problem is that read_excel takes a single path to an Excel spreadsheet, but you've passed a character vector with more than one path.