Write tweets from rtweets package to csv - r

I'm unable to write tweets from search_tweet() in 'rtweet' package to csv. It throws the following error:
Here's a link to the question I previously asked, that has details on the type of search_tweet() object creates: Class and type of object is different in R. How should I make it consistent?
How should I write this files as csv?
library(rtweet)
comments <- search_tweets(
queryString, include_rts = FALSE,
n = 18000, type = "recent",
retryonratelimit = FALSE)
write_csv(comments, "comments.csv", append = TRUE)
Error: Error in stream_delim_(df, path, ..., bom = bom, quote_escape = quote_escape) :
Don't know how to handle vector of type list.
class(comments)
"tbl_df" "tbl" "data.frame"
screen grab of comments

The rtweet package has a function to export to CSV called write_as_csv but for some reason does not expose the append= option. You can take the code of that function and change it to add an append option. For example
write_as_csv2 <- function(x, file_name,
prepend_ids = TRUE,
na = "",
fileEncoding = "UTF-8", append=FALSE) {
## to minimize rounding
op <- options()
on.exit(options(op))
options(scipen = 14, digits = 22)
## validate inputs
stopifnot(is.data.frame(x), is.character(file_name), length(file_name) == 1L)
if (!grepl("\\.csv$", file_name)) {
file_name <- paste0(file_name, ".csv")
}
## flatten data
x <- flatten(x)
if (prepend_ids) {
x <- prepend_ids(x)
}
utils::write.table(x, file_name, row.names = FALSE, na = na,
fileEncoding = fileEncoding, append=append, sep=",", dec=".", qmethod="double")
# or
# readr::write_csv(x, file_name, append = append)
}
environment(write_as_csv2) <- asNamespace("rtweet")
Then you can call it like
write_as_csv2(comments, "comments.csv", append = TRUE)

Related

R: Importing file using rio and here packages in a nested function

I'm working on functions that can take the chracter string argument GSE_expt. I have written 4 separate functions which take the argument GSE_expt and produce the output that I am able to save as a variable in the R environment.
The code block below has 2 of those functions. I use paste0 function with the variable GSE_expt to create a file name that the here and rio packages can use to import the file.
# Extracting metadata from 2 different sources and combining them into a single file
extract_metadata <- function(GSE_expt){
GSE_expt <- deparse(substitute(GSE_expt)) # make sure it is a character string
metadata_1 <- rnaseq_metadata_allsamples %>% # subset a larger metadata file
as_tibble %>%
dplyr::filter(GSE == GSE_expt)
# metadata from ENA imported using rio and here packages
metadata_2 <- import(here("metadata", "rnaseq", paste0(GSE_expt, ".txt"))) %>%
as_tibble %>%
select("run_accession","library_layout", "library_strategy","library_source","read_count", "base_count", "sample_alias", "fastq_md5")
metadata <- full_join(metadata_1, metadata_2, by = c("Run"="run_accession"))
return(metadata)
}
# Extracting coverage stats obtained from samtools
clean_extract_coverage <- function(GSE_expt){
coverage <- read_tsv(file = here("results","rnaseq","2022-01-11", "coverage", paste0("coverage_stats_", deparse(substitute(GSE_expt)), "_percent.txt")), col_names = FALSE)
coverage <- data.frame("Run" = coverage$X1[c(TRUE, FALSE)],
"stats" = coverage$X1[c(FALSE, TRUE)])
coverage <- separate(coverage, stats, into = c("num_reads", "covered_bases", "coverage_percent"), convert = TRUE)
return(coverage)
}
The functions work fine on their own individually when I use GSE118008 as the variable for the argument GSE_expt.
I am trying to create a nested/combined function so that I can run GSE118008 on both (or more) functions at the same time and save the output as a list.
When I ran a nested/combined function,
extract_coverage_metadata <- function(GSE_expt){
coverage <- clean_extract_coverage(GSE_expt)
metadata <- extract_metadata(GSE_expt)
return(metadata)
}
extract_coverage_metadata(GSE118008)
This is the error message I got.
Error: 'results/rnaseq/2022-01-11/coverage/coverage_stats_GSE_expt_percent.txt' does not exist.
Rather than creating a filename
coverage_stats_GSE118008_percent.txt
(which it does fine with the individual function), it is unable to do so in this combined function, and instead returns the filename coverage_stats_GSE_expt_percent.txt
Traceback
8. stop("'", path, "' does not exist", if (!is_absolute_path(path)) { paste0(" in current working directory ('", getwd(), "')") }, ".", call. = FALSE)
7. check_path(path)
6. (function (path, write = FALSE) { if (is.raw(path)) { return(rawConnection(path, "rb")) ...
5. vroom_(file, delim = delim %||% col_types$delim, col_names = col_names, col_types = col_types, id = id, skip = skip, col_select = col_select, name_repair = .name_repair, na = na, quote = quote, trim_ws = trim_ws, escape_double = escape_double, escape_backslash = escape_backslash, ...
4. vroom::vroom(file, delim = "\t", col_names = col_names, col_types = col_types, col_select = { { col_select ...
3. read_tsv(file = here("results", "rnaseq", "2022-01-11", "coverage", paste0("coverage_stats_", deparse(substitute(GSE_expt)), "_percent.txt")), col_names = FALSE) at rnaseq_functions.R#30
2. clean_extract_coverage(GSE_expt)
1. extract_coverage_metadata(GSE118008)
I would appreciate any recommendations on how to solve this.
Thanks in advance!
Husain

My R code isn't throwing any errors, but it's not doing what it's supposed to

Some background for my question: This is an R script that a previous research assistant wrote, but he did not provide any guidance to me on using it for myself. After working through an R textbook, I attempted to use the code on my data files.
What this code is supposed to do is load multiple .csv files, delete certain items/columns from them, and then write the new cleaned .csv files to a specified directory.
When I run my code, I don't get any errors, but the code isn't going anything. I originally thought that this was a problem with file permissions, but I'm still having the problem after changing them. Not sure what to try next.
Here's the code:
library(data.table)
library(magrittr)
library(stringr)
# create a function to delete unnecessary variables from a CAFAS or PECFAS
data set and save the reduced copy
del.items <- function(file)
{
data <- read.csv(input = paste0("../data/pecfas|cafas/raw",
str_match(pattern = "cafas|pecfas", string = file) %>% tolower, "/raw/",
file), sep = ",", header = TRUE, na.strings = "", stringsAsFactors = FALSE,
skip = 0, colClasses = "character", data.table = FALSE)
data <- data[-grep(pattern = "^(CA|PEC)FAS_E[0-9]+(T(Initial|[0-
9]+|Exit)|SP[a-z])_(G|S|Item)[0-9]+$", x = names(data))]
write.csv(data, file = paste0("../data/pecfas|cafas/items-del",
str_match(pattern = "cafas|pecfas", string = file) %>% tolower, "/items-
del/", sub(pattern = "ExportData_", x = file, replacement = "")) %>%
tolower, sep = ",", row.names = FALSE, col.names = TRUE)
}
# delete items from all cafas data sets
cafas.files <- list.files("../data/cafas/raw/", pattern = ".csv")
for (file in cafas.files){
del.items(file)
}
# delete items from all pecfas data sets
pecfas.files <- list.files("../data/pecfas/raw/", pattern = ".csv")
for (file in pecfas.files){
del.items(file)
}

R write.csv is creating an empty file

Some background for my question: This is an R script that a previous research assistant wrote, but he did not provide any guidance to me on using it for myself. After working through an R textbook, I attempted to use the code on my data files.
What this code is supposed to do is load multiple .csv files, delete certain items/columns from them, and then write the new cleaned .csv files to a specified directory.
Currently, the files are being created in the right directory with the right file name, but the .csv files that are being created are empty.
I am currently getting the following error message:
Warning in
fread(input = paste0("data/", str_match(pattern = "CAFAS|PECFAS",: Starting data input on line 2 and discarding line 1 because it has too few or too many items to be column names or data: (variable names).
This is my code:
library(data.table)
library(magrittr)
library(stringr)
# create a function to delete unnecessary variables from a CAFAS or PECFAS
data set and save the reduced copy
del.items <- function(file){
data <- fread(input = paste0("data/", str_match(pattern = "CAFAS|PECFAS",
string = file) %>% tolower, "/raw/", file), sep = ",", header = TRUE,
na.strings = "", stringsAsFactors = FALSE, skip = 0, colClasses =
"character", data.table = FALSE)
data <- data[-grep(pattern = "^(CA|PEC)FAS_E[0-9]+(TR?(Initial|[0-
9]+|Exit)|SP[a-z])_(G|S|Item)[0-9]+$", x = names(data))]
write.csv(data, file = paste0("data/", str_match(pattern = "CAFAS|PECFAS",
string = file) %>% tolower, "/items-del/", sub(pattern = "ExportData_", x =
file, replacement = "")) %>% tolower, row.names = FALSE)
}
# delete items from all cafas data sets
cafas.files <- list.files("data/cafas/raw", pattern = ".csv")
for (file in cafas.files){
del.items(file)
}
# delete items from all pecfas data sets
pecfas.files <- list.files("data/pecfas/raw", pattern = ".csv")
for (file in pecfas.files){
del.items(file)
}

write results sequentially in a loop in r

I have a bunt of single files which need to apply a test. I need to find the way to write automatically results of each file into a file. Here is what I do:
library(ape)
stud_files <- list.files("path/dir/data",full.names = T)
for (f in stud_files) {
df <- read.table(f, header=TRUE, sep=";")
df_xts <- as.xts(df$cola, order.by = as.Date(df$colb,"%m/%d/%Y"))
pet <- testa(df_xts)
res <- data.frame(estimate = pet$estimate,
p.value=pet$p.value,
logi = pet$alternative)
write.dna(res,file = "res_testa.xls",format = "sequential")
}
This loop works well, except the last command which aim to write the results of each file consecutively, it saved only the last performance. And the results save as string, not a table as I define above (data.frame). Any idea in this case? Thanks in advance
Check help(write.dna).
write.dna(x, file, format = "interleaved", append = FALSE,
nbcol = 6, colsep = " ", colw = 10, indent = NULL,
blocksep = 1)
append a logical, if TRUE the data are appended to the file without
erasing the data possibly existing in the file, otherwise the file (if
it exists) is overwritten (FALSE the default).
Set append = TRUE and you should be all set.
As some of the comments point out, however, you are probably better off generating your table, and then writing it all at once to a file. Unless you have billions of files, you likely won't run out of memory.
Here is how I would approach this.
library(ape)
library(data.table)
stud_files <- list.files("path/dir/data",full.names = T)
sumfunc <- function(f) {
df <- read.table(f, header=TRUE, sep=";")
df_xts <- as.xts(df$cola, order.by = as.Date(df$colb,"%m/%d/%Y"))
pet <- testa(df_xts)
res <- data.table(estimate = pet$estimate,
p.value=pet$p.value,
logi = pet$alternative)
return(res)
}
lres <- lapply(stud_files, sumfunc)
dat <- rbindlist(lres)
write.table(dat,
file = "res_testa.csv",
sep = ",",
quote = FALSE,
row.names = FALSE)

Error in file.exists(path) : invalid 'file' argument in R

Please accept my apologies as I am new to R. The following code is used to process multiple files in one go and placing the output in a separate folder.
require(highfrequency)
require(quantmod)
require(readxl)
input_files1=list("file_path1.xlsx","file_path2.xlsx","file_path3.xlsx") #making list of file paths
for(i in length(input_files1))
{
bid_df<-read_excel(input_files1[i], sheet = 1, col_names = TRUE, col_types = NULL, na = "", skip = 0)
#read_excel takes file path as first argument
ask_df<-read_excel(input_files1[i], sheet = 2, col_names = TRUE, col_types = NULL, na = "", skip = 0)
trade_df<-read_excel(input_files1[i], sheet = 3, col_names = TRUE, col_types = NULL, na = "", skip = 0)
qdata_df <- merge(ask_df, bid_df, by = "TIMESTAMP")
qdata_xts_raw<-xts(qdata_df[,-1], order.by=qdata_df[,1])
qdata_xts_m<-mergeQuotesSameTimestamp(qdata_xts_raw, selection = "median")
trade_xts_raw <- xts(trade_df[,-1], order.by=trade_df[,1])
trade_xts_m<-mergeTradesSameTimestamp(trade_xts_raw, selection = "median")
tqdata=matchTradesQuotes(trade_xts_m,qdata_xts_m)
quoted_spread<-tqLiquidity(tqdata,trade_xts_m,qdata_xts_m,type="qs")
qs_30<-aggregatets(quoted_spread,FUN="mean",on="minutes",k=30)
indexTZ(qs_30) <- "UTC"
write.csv(qs_30, file = file.path("output_file_path", paste0("CAN_out", i)))
}
When the code is run, it gives the following error
Error in file.exists(path) : invalid 'file' argument
Please help in removing the error and running the code.
Access elements of the list in the path as input_files1[[i]]

Resources