Error in R do.call function [closed] - r

Closed. This question needs details or clarity. It is not currently accepting answers.
Want to improve this question? Add details and clarify the problem by editing this post.
Closed 7 years ago.
Improve this question
When I run the following code line by by, everything is fine except when the cusrsor goes to do.call.
require(highfrequency)
require(quantmod)
require(readxl)
require(xlsx)
setwd("file_path")
input_files=list(list.files(path="file_path", recursive=T, pattern='.xlsx'))
processLIQ <- function(input_files)
{
#reading bid data and making df object of it
bid_df<-read_excel(input_files, sheet = 1, col_names = TRUE, col_types = NULL, na = "", skip = 0)
#bid_df$TIMESTAMP<-as.POSIXct(bid_df$TIMESTAMP, format="%H:%M:%S")
#reading ask data and making df object of it
ask_df<-read_excel(input_files, sheet = 2, col_names = TRUE, col_types = NULL, na = "", skip = 0)
#merging df objects of bid and ask directly and making xts object of qdata
qdata_df <- merge(ask_df, bid_df, by = "TIMESTAMP")
str(qdata_df)
qdata_xts_raw<-xts(qdata_df[,-1], order.by=qdata_df[,1])
str(qdata_xts_raw)
#Merge multiple quote entries with multiple timestamp
qdata_xts_m<-mergeQuotesSameTimestamp(qdata_xts_raw, selection = "median")
str(qdata_xts_m)
#reading trade data and making xts object of it
trade_df<-read_excel(input_files, sheet = 3, col_names = TRUE, col_types = NULL, na = "", skip = 0)
str(trade_df)
trade_xts_raw <- xts(trade_df[,-1], order.by=trade_df[,1])
#Merge multiple trade entries with multiple timestamp
trade_xts_m<-mergeTradesSameTimestamp(trade_xts_raw, selection = "median")
str(trade_xts_m)
#Matching Trade and Quotes
tqdata=matchTradesQuotes(trade_xts_m,qdata_xts_m)
#liquidity computation
#Quoted Spread(1)
quoted_spread<-tqLiquidity(tqdata,trade_xts_m,qdata_xts_m,type="qs")
qs_30<-aggregatets(quoted_spread,FUN="mean",on="minutes",k=30)
indexTZ(qs_30) <- "UTC"
Canara_out_xts<-merge(qs_30,pqs_30,log_qs_30,es_30,depth_xts_30,Rupee_depth_xts_30,log_returns_30,volume_30)
indexTZ(Canara_out_xts) <- "UTC"
write.xlsx(Canara_out_xts, file = file.path("output_file_path", paste0("CAN_test6", i,".xlsx")))
}
do.call(processLIQ, input_files)
The error is
Error in switch(ext, xls = "xls", xlsx = "xlsx", xlsm = "xlsx", stop("Unknown format .", :
EXPR must be a length 1 vector
In addition: Warning message:
In if (!file.exists(path)) { :
the condition has length > 1 and only the first element will be used
Browse in console is opened along with Source viewer which has the code:
function (path, sheet = 1, col_names = TRUE, col_types = NULL,
na = "", skip = 0)
{
path <- check_file(path)
ext <- tolower(tools::file_ext(path))
switch(excel_format(path), xls = read_xls(path, sheet, col_names,
col_types, na, skip), xlsx = read_xlsx(path, sheet, col_names,
col_types, na, skip))
}
Kindly help in resolving this issue.

The problem is that read_excel takes a single path to an Excel spreadsheet, but you've passed a character vector with more than one path.

Related

Write tweets from rtweets package to csv

I'm unable to write tweets from search_tweet() in 'rtweet' package to csv. It throws the following error:
Here's a link to the question I previously asked, that has details on the type of search_tweet() object creates: Class and type of object is different in R. How should I make it consistent?
How should I write this files as csv?
library(rtweet)
comments <- search_tweets(
queryString, include_rts = FALSE,
n = 18000, type = "recent",
retryonratelimit = FALSE)
write_csv(comments, "comments.csv", append = TRUE)
Error: Error in stream_delim_(df, path, ..., bom = bom, quote_escape = quote_escape) :
Don't know how to handle vector of type list.
class(comments)
"tbl_df" "tbl" "data.frame"
screen grab of comments
The rtweet package has a function to export to CSV called write_as_csv but for some reason does not expose the append= option. You can take the code of that function and change it to add an append option. For example
write_as_csv2 <- function(x, file_name,
prepend_ids = TRUE,
na = "",
fileEncoding = "UTF-8", append=FALSE) {
## to minimize rounding
op <- options()
on.exit(options(op))
options(scipen = 14, digits = 22)
## validate inputs
stopifnot(is.data.frame(x), is.character(file_name), length(file_name) == 1L)
if (!grepl("\\.csv$", file_name)) {
file_name <- paste0(file_name, ".csv")
}
## flatten data
x <- flatten(x)
if (prepend_ids) {
x <- prepend_ids(x)
}
utils::write.table(x, file_name, row.names = FALSE, na = na,
fileEncoding = fileEncoding, append=append, sep=",", dec=".", qmethod="double")
# or
# readr::write_csv(x, file_name, append = append)
}
environment(write_as_csv2) <- asNamespace("rtweet")
Then you can call it like
write_as_csv2(comments, "comments.csv", append = TRUE)

Issue downloading and opening xlsx-file from within R

I would like to download and open the following Excel-file with monthly and annual consumer price indices directly from within R.
https://www.bfs.admin.ch/bfsstatic/dam/assets/7066959/master
(the link can be found on this site: https://www.bfs.admin.ch/bfs/de/home/statistiken/preise/landesindex-konsumentenpreise/lik-resultate.assetdetail.7066959.html)
I used to download this file manually using the browser, save it locally on my computer, then open the xlsx-file with R and work with the data without any problems.
I have now tried to read the file directly from within R, but without luck so far. As you can see from the URL above, there is no .xlsx extension or the like, so I figured the file is zipped somehow. Here is what I've tried so far and where I am stuck.
library(foreign)
library(xlsx)
# in a browser, this links opens or dowloads an xlsx file
likurl <- "https://www.bfs.admin.ch/bfsstatic/dam/assets/7066959/master"
temp <- tempfile()
download.file(likurl, temp)
list.files <- unzip(temp,list=TRUE)
data <- read.xlsx(unz(temp,
+ list.files$Name[8]), sheetIndex=2)
The result from the last step is
Error in +list.files$Name[8] : invalid argument to unary operator
I do not really understand the unz function, but can see this is somehow wrong when reading the help file for unz (I found this suggested solution somewhere online).
I also tried the following, different approach:
library(XLConnect)
likurl <- "https://www.bfs.admin.ch/bfsstatic/dam/assets/7066959/master"
tmp = tempfile(fileext = ".xlsx")
download.file(likurl, tmp)
readWorksheetFromFile(tmp, sheet = 2, startRow = 4,
colNames = TRUE, rowNames = FALSE)
with the last line returning as result:
Error: ZipException (Java): invalid entry size (expected 1644 but got 1668 bytes)
I would greatly appreciate any help on how I can open this data and work with it as usual when reading in data from excel into R.
Thanks a lot in advance!
Here's my solution thanks to the hint by #Johnny. Reading the data from excel worked better with read.xlsx from the xlsx-package (instead of read_excel as suggested in the link above).
Some ugly details still remain with how the columns are named (colNames are not passed on correctly, except for the first and 11th column) and how strangely new columns are created from the options passed to read.xlsx (e.g., a column named colNames, with all entries == TRUE; for details, see the output structure with str(LIK.m)). However, these would be for another question and for the moment, they can be fixed in the quick and dirty way :-).
library(httr)
library(foreign)
library(xlsx)
# in a browser, this links opens or dowloads an xlsx file
likurl<-'https://www.bfs.admin.ch/bfsstatic/dam/assets/7066959/master'
p1f <- tempfile()
download.file(likurl, p1f, mode="wb")
GET(likurl, write_disk(tf <- tempfile(fileext = ".xlsx")))
# annual CPI
LIK.y <- read.xlsx(tf,
sheetIndex = 2, startRow = 4,
colNames = TRUE, rowNames = FALSE, stringsAsFactors = FALSE,
detectDates = FALSE, skipEmptyRows = TRUE, skipEmptyCols = TRUE ,
na.strings = "NA", check.names = TRUE, fillMergedCells = FALSE)
LIK.y$X. <- as.numeric(LIK.y$X.)
str(LIK.y)
# monthly CPI
LIK.m <- read.xlsx(tf,
sheetIndex = 1, startRow = 4,
colNames = TRUE, rowNames = FALSE, stringsAsFactors = FALSE,
detectDates = FALSE, skipEmptyRows = TRUE, skipEmptyCols = TRUE ,
na.strings = "NA", check.names = TRUE, fillMergedCells = FALSE)
LIK.m$X. <- as.numeric(LIK.m$X.)
str(LIK.m)

R write.csv is creating an empty file

Some background for my question: This is an R script that a previous research assistant wrote, but he did not provide any guidance to me on using it for myself. After working through an R textbook, I attempted to use the code on my data files.
What this code is supposed to do is load multiple .csv files, delete certain items/columns from them, and then write the new cleaned .csv files to a specified directory.
Currently, the files are being created in the right directory with the right file name, but the .csv files that are being created are empty.
I am currently getting the following error message:
Warning in
fread(input = paste0("data/", str_match(pattern = "CAFAS|PECFAS",: Starting data input on line 2 and discarding line 1 because it has too few or too many items to be column names or data: (variable names).
This is my code:
library(data.table)
library(magrittr)
library(stringr)
# create a function to delete unnecessary variables from a CAFAS or PECFAS
data set and save the reduced copy
del.items <- function(file){
data <- fread(input = paste0("data/", str_match(pattern = "CAFAS|PECFAS",
string = file) %>% tolower, "/raw/", file), sep = ",", header = TRUE,
na.strings = "", stringsAsFactors = FALSE, skip = 0, colClasses =
"character", data.table = FALSE)
data <- data[-grep(pattern = "^(CA|PEC)FAS_E[0-9]+(TR?(Initial|[0-
9]+|Exit)|SP[a-z])_(G|S|Item)[0-9]+$", x = names(data))]
write.csv(data, file = paste0("data/", str_match(pattern = "CAFAS|PECFAS",
string = file) %>% tolower, "/items-del/", sub(pattern = "ExportData_", x =
file, replacement = "")) %>% tolower, row.names = FALSE)
}
# delete items from all cafas data sets
cafas.files <- list.files("data/cafas/raw", pattern = ".csv")
for (file in cafas.files){
del.items(file)
}
# delete items from all pecfas data sets
pecfas.files <- list.files("data/pecfas/raw", pattern = ".csv")
for (file in pecfas.files){
del.items(file)
}

R find maxima of multiple variables from multiple .CSV files

I have multiple csv's, each containing multiple observations for one participant on several variables. Let's say each csv file looks something like the below, and the name of the file indicates the participant's ID:
data.frame(
happy = sample(1:20, 10),
sad = sample(1:20, 10),
angry = sample(1:20, 10)
)
I found some code in an excellent stackoverflow answer that allows me to access all files saved into a specific folder, calculate the sums of these emotions, and output them into a file:
# access all csv files in the working directory
fileNames <- Sys.glob("*.csv")
for (fileName in fileNames) {
# read original data:
sample <- read.csv(fileName,
header = TRUE,
sep = ",")
# create new data based on contents of original file:
data.summary <- data.frame(
File = fileName,
happy.sum = sum(sample$happy),
sad.sum = sum(sample$sad),
angry.sum = sum(sample$angry))
# write new data to separate file:
write.table(data.summary,
"sample-allSamples.csv",
append = TRUE,
sep = ",",
row.names = FALSE,
col.names = FALSE)}
However, I can ONLY get "sum" to work in this function. I would like to not only find the sums of each emotion for each participant, but also the maximum value of each.
When I try to modify the above:
for (fileName in fileNames) {
# read original data:
sample <- read.csv(fileName,
header = TRUE,
sep = ",")
# create new data based on contents of original file:
data.summary <- data.frame(
File = fileName,
happy.sum = sum(sample$happy),
happy.max = max(sample$happy),
sad.sum = sum(sample$sad),
angry.sum = sum(sample$angry))
# write new data to separate file:
write.table(data.summary,
"sample-allSamples.csv",
append = TRUE,
sep = ",",
row.names = FALSE,
col.names = FALSE)}
I get the following warning message:
In max(sample$happy) : no non-missing arguments to max; returning -Inf
Would sincerely appreciate any advice anyone can give me!
using your test data, the max() statement works fine for me. Is it related to a discrepancy between the sample code you have posted and your actual csv file structure?

Error in file.exists(path) : invalid 'file' argument in R

Please accept my apologies as I am new to R. The following code is used to process multiple files in one go and placing the output in a separate folder.
require(highfrequency)
require(quantmod)
require(readxl)
input_files1=list("file_path1.xlsx","file_path2.xlsx","file_path3.xlsx") #making list of file paths
for(i in length(input_files1))
{
bid_df<-read_excel(input_files1[i], sheet = 1, col_names = TRUE, col_types = NULL, na = "", skip = 0)
#read_excel takes file path as first argument
ask_df<-read_excel(input_files1[i], sheet = 2, col_names = TRUE, col_types = NULL, na = "", skip = 0)
trade_df<-read_excel(input_files1[i], sheet = 3, col_names = TRUE, col_types = NULL, na = "", skip = 0)
qdata_df <- merge(ask_df, bid_df, by = "TIMESTAMP")
qdata_xts_raw<-xts(qdata_df[,-1], order.by=qdata_df[,1])
qdata_xts_m<-mergeQuotesSameTimestamp(qdata_xts_raw, selection = "median")
trade_xts_raw <- xts(trade_df[,-1], order.by=trade_df[,1])
trade_xts_m<-mergeTradesSameTimestamp(trade_xts_raw, selection = "median")
tqdata=matchTradesQuotes(trade_xts_m,qdata_xts_m)
quoted_spread<-tqLiquidity(tqdata,trade_xts_m,qdata_xts_m,type="qs")
qs_30<-aggregatets(quoted_spread,FUN="mean",on="minutes",k=30)
indexTZ(qs_30) <- "UTC"
write.csv(qs_30, file = file.path("output_file_path", paste0("CAN_out", i)))
}
When the code is run, it gives the following error
Error in file.exists(path) : invalid 'file' argument
Please help in removing the error and running the code.
Access elements of the list in the path as input_files1[[i]]

Resources