Read and save multiple sheet - r

I have found this code and I would like to save the different sheets in R from Excel, how can I change the code?
library(readxl)
multiplesheets <- function(fname) {
# getting info about all excel sheets
sheets <- readxl::excel_sheets(fname)
tibble <- lapply(sheets, function(x) readxl::read_excel(fname, sheet = x))
data_frame <- lapply(tibble, as.data.frame)
# assigning names to data frames
names(data_frame) <- sheets
# print data frame
print(data_frame)
}
# specifying the path name
path <- "/Users/mallikagupta/Desktop/Gfg.xlsx"
multiplesheets(path)

You can use purrr::walk2()
Since the output of multiplesheets is a list of sheets, you can use walk2() to walk across the list of sheets and a list of their names, saving each sheet by its name:
sheets <- multiplesheets(path)
filenames <- paste0(names(sheets), ".csv")
## install.packages("purrr") #(if you haven't already)
purrr::walk2(sheets, filenames, write.csv) # or try readr::write_csv() for nicer output

Related

Reading xlsx with multiple sheets in R for duplication removal

I have a excel file which has multiple sheets embedded in it. My main goal is to basically remove all rows which are appearing multiple times in a single sheet and have to do this for every sheet.
I have written the code below but the code is only reading the first sheet and also giving ' ...' in first row and column. Can someone help me out where I might be going wrong. Thank you in advanced
**config_file_name <- '/RBIAPI3tables.xlsx'
config_xl <- paste(currentPath,config_file_name,sep="")
config_xl_sheets_name <- excel_sheets(path = config_xl) # An array of sheets is created. To access the array use config_xl_sheets[1]
count_of_xl_sheets <- length(config_xl_sheets_name)
# Read all sheets in the file as separate lists
list_all_sheets <- lapply(config_xl_sheets_name, function(x) read_excel(path = config_xl, sheet = x))
names (list_all_sheets) <- config_xl_sheets_name # Change the name of all the lists to excel file sheets name
count_of_list_all_sheets <- length(list_all_sheets) # to get the data frame of each list use list_all_sheets[[Config]]
# Create data frame for each sheet Assign the sheet name to the data frame
for (i in 1:count_of_list_all_sheets)
{
assign(x= trimws(config_xl_sheets_name[i]), value = data.frame(list_all_sheets[[i]]))
updateddata = unique(list_all_sheets[[i]])
}
write.xlsx(updateddata,"Unique3tables.xlsx",showNA = FALSE)**
this is my approach
library(readxl)
library(data.table)
library(openxlsx)
file.to.read <- "./testdata.xlsx"
sheets.to.read <- readxl::excel_sheets(file.to.read)
# read sheets from the file to a list and remove duplicate rows
L <- lapply(sheets.to.read, function(x) {
data <- setDT(readxl::read_excel(file.to.read, sheet = x))
#remove puplicates
data[!duplicated(data), ]
})
# create a new workbook
wb <- createWorkbook()
# create new worksheets an write to them
for (i in seq.int(L)) {
addWorksheet(wb, sheets.to.read[i])
writeData(wb, i, L[[i]] )
}
# write the workbook to disk
saveWorkbook(wb, "testdata_new.xlsx")

Append multiple sheets from multiple Excel files in R

I am trying to append multiple sheets from multiple Excel files. For instance, each Excel file has 10 sheets (different format), but the 10 sheets of an Excel file have the same names and format as the associated 10 sheets of another Excel file. Essentially, each Excel file holds the different types of information of a different country, but the types of information collected are the same for each country (population, pollution index, GDP, etc.). And I have many countries so I'm thinking of using a loop.
I use "report_1h" as the master Excel file, and append sheets of other Excel files into the master file's sheets.
library(rio)
x1_data <- import_list("report_1h.xlsx")
report_list <- list.files(path = 'E:/Report_folder', pattern = '*.xlsx')
sheet_ <- data.frame()
for (file in report_list){
book <- import_list(file)
for (i in 1:31){
sheet_[i] <- rbind(x1_data[[i]][,],book[[i]][,])
x1_data[[i]][,] <- sheet_[i]
}
}
The loop is intended to append sheets from each Excel file to sheets of the master file "report_1h". But it gives error:
Error in `[<-.data.frame`(`*tmp*`, i, value = c("Data Source(s):", "Data Source(s):", :
replacement has 2 rows, data has 0
Can someone tell me why?
Here's a way to do this -
library(tidyverse)
#get the all the filenames
report_list <- list.files(path = 'E:/Report_folder', pattern = '*.xlsx$')
#Create a list of dataframes
map(report_list, function(x) {
sheets <-excel_sheets(x)
map(sheets, function(y) read_excel(x, y))
}) %>% transpose() %>%
map(bind_rows) -> result
#assign sheet names
names(result) <- paste0('Sheet', seq_along(result))
#Write master excel file
writexl::write_xlsx(result, 'master_excel.xlsx')

Export List of Lists as CSV into seperate Files or Excel Sheets

I have a list of lists mylists with data.table objects
x <- rep("example",5)
y <- 1:5
list1 <- list('a'= data.table(x,y),'b' = data.table(x,y))
list2 <- list('c'= data.table(x,y), 'd' = data.table(x,y))
mylists <- list('Output1'= list1,'Output2' =list2)
mylists
I want to export every object of every list as a separate CSV File (preferably using fwrite from data.table) named like the object i.e. Output1_a.csv
I cannot rbind to one data.frame/table as the data needs to be kept separate.
I've tried using
lapply(mylists,fwrite)
but have trouble producing separate files with different names.
Additionally how could I produce an xlsx File where all objects of mylists are stored into separate sheets named as described above.
I'd like to know both ways as this might be useful for the future.
for the xlsx version you could do something like this
x <- rep("example",5)
y <- 1:5
list1 <- list('a'= data.table(x,y),'b' = data.table(x,y))
list2 <- list('c'= data.table(x,y), 'd' = data.table(x,y))
mylists <- list('Output1'= list1,'Output2' =list2)
purrr::walk(names(mylists),
function(x){
writexl::write_xlsx(mylists[[x]],
path = paste0(x, ".xlsx"))
})
this will produce xlsx files with the names of the outer list and sheets with the name of the inner lists
for the csv would I do something like this to first flatten the list
mylists_flat <- unlist(mylists, recursive = FALSE)
walk(names(mylists_flat),
function(x){
write.csv(mylists_flat[[x]],
file = paste0(x, ".csv"))
})
this should produce csv files with the name <outerlist_name>.<innerlist_name>.csv

Import Multiple Sheets into Multiple Data Frames in R

I have an Excel file with a lot of sheets and I need a code to import each sheet in a separate data frame which will be named in the same convention as the sheet name in Excel.
Example, tabs A, B, C will be imported as data frame A, B, and C respectively.
From other threads, I saw codes like:
length(excel_sheets(filename)) to get the number of sheets in the file
Then create a list that would contain each tab:
read_excel_allsheets <- function(filename) {
sheets <- readxl::excel_sheets(filename)
x <- lapply(sheets, function(X) readxl::read_excel(filename, sheet = X))
names(x) <- sheets
x
}
But I do not know how the tabs gets imported into R from there.
Would greatly appreciate the help.
Thanks in advance!
Here's one way to do it:
# write test data
tf <- writexl::write_xlsx(
list("the mtcars" = mtcars, "iris data" = iris),
tempfile(fileext = ".xlsx")
)
# read excel sheets
sheets <- readxl::excel_sheets(tf)
lst <- lapply(sheets, function(sheet)
readxl::read_excel(tf, sheet = sheet)
)
names(lst) <- sheets
# shove them into global environment
list2env(lst, envir = .GlobalEnv)
Your function reads in all the tabs and saves them as elements of a single list (because of lapply()). You can take the elements out of the list with list2env:
your_excel_list <- read_excel_allsheets("test.xlsx")
list2env(your_excel_list, .GlobalEnv)
You'll see that the named elements of your list are now data frames (or actually tbl_df) in your global environment
could read in one line.
should load magrittr and dplyr packages.
data <- lapply(list.files(pattern = "*.xlsx"),function(x) x=read_excel(x,sheet = "(sheetname)")) %>% bind_rows

Dataframes are created but column names are not changing when reading from excel workbook

I am trying to read an excel workbook in R and for each sheet will create a dataframe.
In the next step, i want to read that created dataframe and use sheet name along with under score before each of the column in the respective dataframe.
Here is what I am doing:
library(readxl)
# Store Sheet Names in a vector
sheet <- excel_sheets("D:/OTC/JULY DATA.XLSX")
# Trim any of the Trailing White Spaces
sheet_trim_trailing <- function (x) sub("\\s+$", "", x)
sheet <- sheet_trim_trailing(sheet)
# Read each of the sheets in the workbook and create a
# dataframe using respective names of the sheets
for(i in 1:length(sheet)){
# this read a sheet and create the dataframe using its name
assign(sheet[i], read.xlsx("DATA.XLSX", sheetIndex = i))
# store dataframe name into a vector
sname <- sheet[i]
# use vector to change the col names in the respective dataframe
colnames(sname) <- gsub("^", paste0(sname,"_"), colnames(sname))
}
Dataframes are created but column names are not changing?
I dont know where I am wrong?
What you need to do is something like
colnames(get(sheet[i])) <- gsub("^", paste0(sname,"_"), colnames(get(sheet[i])))
But this will give an error
target of assignment expands to non-language object
A workaround is to use a temporary variable to change column names
Reproducible example
temp <- mtcars[1:5,]
d <- get("temp")
colnames(d) <- sub("y", " ", colnames(d))
assign("temp", d)
Try this
for(i in 1:length(sheet)){
assign(sheet[i], read.xlsx("DATA.XLSX", sheetIndex = i))
t <- get(sheet[i])
colnames(t) <- gsub("^", paste0(sheet[i],"_"), colnames(t))
assign(sheet[i], t)
}
I think I was looking for something like this one, which does the same as above.
Try This Alternative:
library(readxl)
# function to read all the sheets from excel workbook
read_all_sheets <- function(xlsfile) {
sheets <- excel_sheets(xlsfile)
setNames(lapply(sheets, function(.) {
tbl <- read_excel(xlsfile, sheet = .)
# this will change the col names with sheet name
# and underscore as prefix
names(tbl) <- paste(., names(tbl), sep = "_")
tbl
}), sheets)
}
## create dataframes from sheets
# first read all the sheets are list
List_of_All_Sheets <- read_all_sheets("Location/of/the/file.xlsx")
# then create dataframes
lapply(names(List_of_All_Sheets),
function(nams) assign(nams, List_of_All_Sheets[[nams]],
envir = .GlobalEnv))

Resources