str_subset with curly curly in R - r

I've a small function to read in files with certain string using str_subset which works if I pass the argument in quotes but I want to able to do it without. I thought I could do this with curly curly but isn't working.
Working example passing with quotes:
#creating csv file
library(tidyverse)
write_csv(mtcars, "C:\\Users\\testSTACK.csv")
#reading function
read_in_fun <- function(x) {
setwd("C:\\Users")
d <- list.files() #lists all files in folder
file <- d %>%
str_subset(pattern = x)
#read in
df <- read_csv(file)
arg_name <- deparse(substitute(x))
var_name <- paste("df_new", arg_name, sep = "_")
assign(var_name, df, env = .GlobalEnv)
}
read_in_fun("STACK")
#this works, returns df called:
df_new_"STACK"
now if i try to be able to pass with no quotes using curly curly approach:
read_in_fun <- function(x) {
setwd("C:\\Users")
d <- list.files() #lists all files in folder
file <- d %>%
str_subset(pattern = {{x}})
#read in
df <- read_csv(file)
arg_name <- deparse(substitute(x))
var_name <- paste("df_new", arg_name, sep = "_")
assign(var_name, df, env = .GlobalEnv)
}
read_in_fun(STACK)
#Error in type(pattern) : object 'STACK' not found
also tried using enquo
read_in_fun <- function(x) {
x_quo <- enquo(x)
setwd("C:\\Users")
d <- list.files() #lists all files in folder
file <- d %>%
str_subset(pattern = !! as_label(x_quo)) #OR !!(x_quo)
#read in
df <- read_csv(file)
arg_name <- deparse(substitute(x))
var_name <- paste("df_new", arg_name, sep = "_")
assign(var_name, df, env = .GlobalEnv)
}
read_in_fun(STACK)
# Error during wrapup: Quosures can only be unquoted within a quasiquotation context.
My desired output is a df called df_new_STACK. Can curly curly be used in this way? Thanks

Using ensym should work.
read_in_fun <- function(x) {
x_sym <- ensym(x)
d <- list.files()
file <- d %>%
str_subset(pattern = as_label(x_sym))
#read in
df <- read_csv(file)
arg_name <- deparse(substitute(x))
var_name <- paste("df_new", arg_name, sep = "_")
assign(var_name, df, env = .GlobalEnv)
}
read_in_fun(STACK)
df_new_STACK

Related

Write a function to manipulate and then write a dataframe

I would like to read in multiple .csv files (dataframes) from a folder and apply a function that I create to all the files. And finally this function will write the new .csv files.
I want the function to do the following 3 things
df$Class <- gsub("null", "OTHER", df$Class)
df$Class <- gsub(': ', ',', df$Class)
df <- df %>% select(c(Image, everything(.), -Name))
I don't really know how to put these thing into a function, but I've tried
`
file_names <- list.files(pattern = "\\.csv$")
tidy_up_fxn <- function(file_names) {
df <- do.call(bind_rows,lapply(file_names,data.table::fread))
df$Class <- gsub("null", "OTHER", df$Class)
df$Class <- gsub(': ', ',', df$Class)
df <- df %>% select(c(Image, everything(.), -Name))
out <- function(df)
fwrite(out, file = file_names, sep = ",")
}
tidy_up_fxn(file_names)
`
When I run it, R gets busy for a few seconds and then nothing happens. Please, help correct my function!
The following works the way I intended to
file_names <- list.files(pattern = "\\.csv$")
tidy_up_fxn <- function(file_names) {
df <- bind_rows(lapply(file_names,data.table::fread))
df$Class <- gsub("null", "OTHER", df$Class)
df$Class <- gsub(': ', ',', df$Class)
df <- df %>% select(c(Image, everything(.), -Name))
fwrite(df, file = "new.csv", sep = ",")
}
tidy_up_fxn(file_names)
Thank you all!!

How can i set a "key" argument in a gather function using a variable?

How can i set a "key" argument in a gather function using a variable ?
as.character() and get() doesnt work either
library(tidyr)
library(dplyr)
path = "C:/Users/lukas/Documents/Projekt/Data/"
files <- list.files(path = path, pattern = "*.csv")
dane <- list()
for (file in files){
temp_name <- file
file <- tibble(read.csv(paste(path,file,sep = ""))) %>%
gather("year", key = temp_name , -country)
dane <- append(dane,list(file))
}
Error in `ensym2()`:
! Must supply a symbol or a string as argument
We may escape with !!
dane <- list()
for (file in files){
temp_name <- file
file <- tibble(read.csv(paste(path,file,sep = ""))) %>%
gather("year", key = !!temp_name , -country)
dane <- append(dane,list(file))
}

How to operate on variables in R functions

I am trying to do following variable operations on data frame variables:
ptinr <- read.csv('ptinr.CSV')
ptinr$project <- gsub("_19T228z1xx","", ptinr$project)
ptinr$Subject <- as.integer(gsub("CTMS-",'', ptinr$Subject))
ptinr$Subject <- sprintf("%03d", ptinr$Subject)
ptinr$Subject <- paste0(ptinr$project,'-',ptinr$Subject)
I want to convert this to a function and pass the file name. Any suggestions?
Do you mean this kind of function?
f <- function(fname) {
ptinr <- read.csv(fname)
ptinr$project <- gsub("_19T228z1xx", "", ptinr$project)
ptinr$Subject <- as.integer(gsub("CTMS-", "", ptinr$Subject))
ptinr$Subject <- sprintf("%03d", ptinr$Subject)
ptinr$Subject <- paste0(ptinr$project, "-", ptinr$Subject)
ptinr
}
An option with tidyverse
library(readr)
library(stringr)
library(dplyr)
f1 <- function(fname) {
read_csv(fname) %>%
mutate(project = str_remove(project, '_19T228z1xx'),
Subject = glue::glue('{project}_',
'{sprintf("%03d", parse_number(Subject))}'))
}

Writing a csv file in R with parameter in the file name

I am doing a small log processing project in R. I am trying to write a function that gets a dataframe, and writes it in a csv file with some parameters (dataframe name, today's date.. etc)
I have made some progress but didn't manage to write the csv. I hope the code is reproducible and good.
library(dplyr)
wrt_csv <- function(df) {
dfname <- deparse(substitute(df))
dfpath <- paste0('"',"./logs/",dfname, "_", Sys.Date(),'.csv"')
dfpath <- as.data.frame(dfpath)
df %>% write_excel_csv(dfpath)
}
wrt_csv(mtcars)
EDIT- this is a final version that works well. Thanks to Ronak Shah.
wd<- getwd()
wrt_csv <- function(df) {
dfname <- deparse(substitute(df))
dfpath <- paste0(wd,'/logs/',dfname, '_', Sys.Date(),'.csv')
df %>% write_excel_csv(dfpath)
}
I do however now have a bunch of dataframes that i want to run the function with them. should I make them as a list? this didn't quite work
l <- list(df1,df2)
lapply(l , wrt_csv)
Any thoughts?
Thanks!
Keep dfpath as string. Try :
wrt_csv <- function(df) {
dfname <- deparse(substitute(df))
dfpath <- paste0('/logs/',dfname, '_', Sys.Date(),'.csv')
write.csv(df, dfpath, row.names = FALSE)
#Or same as OP
#df %>% write_excel_csv(dfpath)
}
wrt_csv(mtcars)
We can also do
wrt_csv <- function(df) {
dfname <- deparse(substitute(df))
dfpath <- sprintf('/logs/%s_%s.csv', dfname, Sys.Date())
write.csv(df, dfpath, row.names = FALSE)
}
wrt_csv(mtcars)

R efficiently bind_rows over many dataframes stored on harddrive

I have roughly 50000 .rda files. Each contains a dataframe named results with exactly one row. I would like to append them all into one dataframe.
I tried the following, which works, but is slow:
root_dir <- paste(path, "models/", sep="")
files <- paste(root_dir, list.files(root_dir), sep="")
load(files[1])
results_table = results
rm(results)
for(i in c(2:length(files))) {
print(paste("We are at step ", i,sep=""))
load(files[i])
results_table= bind_rows(list(results_table, results))
rm(results)
}
Is there a more efficient way to do this?
Using .rds is a little bit easier. But if we are limited to .rda the following might be useful. I'm not certain if this is faster than what you have done:
library(purrr)
library(dplyr)
library(tidyr)
## make and write some sample data to .rda
x <- 1:10
fake_files <- function(x){
df <- tibble(x = x)
save(df, file = here::here(paste0(as.character(x),
".rda")))
return(NULL)
}
purrr::map(x,
~fake_files(x = .x))
## map and load the .rda files into a single tibble
load_rda <- function(file) {
foo <- load(file = file) # foo just provides the name of the objects loaded
return(df) # note df is the name of the rda returned object
}
rda_files <- tibble(files = list.files(path = here::here(""),
pattern = "*.rda",
full.names = TRUE)) %>%
mutate(data = pmap(., ~load_rda(file = .x))) %>%
unnest(data)
This is untested code but should be pretty efficient:
root_dir <- paste(path, "models/", sep="")
files <- paste(root_dir, list.files(root_dir), sep="")
data_list <- lapply("mydata.rda", function(f) {
message("loading file: ", f)
name <- load(f) # this should capture the name of the loaded object
return(eval(parse(text = name))) # returns the object with the name saved in `name`
})
results_table <- data.table::rbindlist(data_list)
data.table::rbindlist is very similar to dplyr::bind_rows but a little faster.

Resources