I have two excel files with multiple sheets. The sheet names and their corresponding column names are same of both the files. Only the values in the sheets will differ. I want to compare using R which values are different and want to mark those cells
For the fun of it, here's a quick&dirty example on which you can build up for your specific needs:
wbsCreate <- function(v) {
wb <- createWorkbook()
sheet <- createSheet(wb, "Sheet1")
rows <- createRow(sheet, rowIndex=1:5)
cells <- createCell(rows, colIndex=1:5)
for (r in 1:5)
for (c in 1:5)
setCellValue(cells[[r, c]], value = v[(r-1)*5+c])
saveWorkbook(wb, tf <- tempfile(fileext = ".xlsx"))
return(tf)
}
wbsMarkDiff <- function(fn1, fn2) {
fns <- c(fn1, fn2)
wb <- lapply(fns, loadWorkbook)
cs <- lapply(wb, function(x) CellStyle(x) +
Fill(backgroundColor="red",
foregroundColor="red",
pattern="SOLID_FOREGROUND"))
sheets <- lapply(wb, getSheets)
sheetnames <- do.call(intersect, lapply(sheets, names))
for (sheetname in sheetnames) {
sheet <- lapply(sheets, "[[", sheetname)
rows <- lapply(sheet, getRows)
cells <- lapply(rows, getCells)
values <- lapply(cells, function(cell) lapply(cell, getCellValue))
idx <- names(which(!mapply(identical, values[[1]], values[[2]])))
for (s in 1:2)
for (i in idx)
setCellStyle(cells[[s]][[i]], cs[[s]])
for (s in 1:2)
saveWorkbook(wb[[s]], fns[s])
}
}
library(xlsx)
# create to excel workbooks (same dimensions per sheet)
v <- LETTERS[1:25]
tf1 <- wbsCreate(v)
v[c(3,6,9)] <- letters[c(3,6,9)]
tf2 <- wbsCreate(v)
# mark differences
wbsMarkDiff(tf1, tf2)
shell.exec(tf1) # open file1 on windows
shell.exec(tf2) # open file2 on windows
You get help on each command by using ?, for example ?createWorkbook gives you the help files on that function.
Related
i want to write into multiple sheets of excel using loop. code is mentioned below.
first_column <- c("value_1", "value_2")
second_column <- c("ve_1", "ve_2")
fir_column <- c("1", "2")
se_column <- c("a1", "va2")
df <- data.frame(first_column, second_column) #sheet A of df.xlsx
df1 <- data.frame(fir_column, se_column) #sheet B of df.xlsx
sheets<-openxlsx::getSheetNames('./Out/df.xlsx')
for(i in 1:length(sheets)){
df<-read_excel('./Out/df.xlsx',sheet = sheets[i])
write.xlsx(df, './Out/df1.xlsx', sheetName = i, append=TRUE)
}
Only last sheet is getting generated.
Using your code (note pathnames are edited slightly):
library(xlsx)
library(readxl)
sheets<-openxlsx::getSheetNames('./df.xlsx')
for(i in 1:length(sheets)){
df<-readxl::read_excel('./df.xlsx',sheet = sheets[i])
appendSheet <- i > 1
# depending on version might need to do as.data.frame(df)
xlsx::write.xlsx(df, './df1.xlsx', sheetName = as.character(i), append=appendSheet)
}
Or with openxlsx:
library(readxl)
library(openxlsx)
sheets<-openxlsx::getSheetNames('./df.xlsx')
wb <- createWorkbook()
for(i in 1:length(sheets)){
df<-readxl::read_excel('./df.xlsx',sheet = sheets[i])
addWorksheet(wb, i)
writeData(wb, i, df)
}
saveWorkbook(wb, "df1.xlsx", overwrite = T)
Alternatively, you can work with the sheets as list elements, allowing you to easily work over each element with the apply family of functions:
library(rio)
library(writexl)
list_of_dfs <- import_list("df.xlsx") # from rio
write_xlsx(list_of_dfs, "df1.xlsx") # from writexl
for(x in unique_seg){
bf <- data.frame(matrix(x,nrow =3,ncol =3))
write.xlsx(bf,'bf.xlsx', sheetname =x, append=True)
}
But I see only last loop is returned. How to get all sheets in 1 excel ?
If we want to return the output, create a list and assign the output to the list
lst1 <- vector('list', length(unique_seg))
names(lst1) <- unique_seg
for(x in unique_seg){
bf <- data.frame(matrix(x,nrow =3,ncol =3))
write.xlsx(bf,'bf.xlsx', sheetname =x, append=TRUE)
lst1[[x]] <- bf
}
I'm trying to use the library(xlsx) to write some data from R into excel in a readable format.
My dataset is formatted as:
tbl <- list("some_name"=head(mtcars),"some_name2"=head(iris))
I would like to write this table to excel, with each item in the list being identified and the data being next to the item. E.g. the excel file should look like
"some_name" in cell A1
paste the dataframe head(mtcars) in cell A3
"some_name2" in cell A11
paste the dataframe head(iris) in cell A13
or something similar, e.g. pasting each item into a new worksheet.
Using
write.xlsx(tbl,"output.xlsx")
will output it correctly however it is not formatted in a readable way.
Any help would be great
The following codes create a xlsx file with multiple sheets, each of which holds a list name as the sheet name and a title, and a data frame below the title. You can modify it as you like.
require(xlsx)
ls2xlsx <- function(x, wb){
for(i in 1:length(x)){
sh <- createSheet(wb, names(x[i]))
cl_title <- createCell(createRow(sh, 1), 1)
addDataFrame(x[i], sh, startRow = 2, startColumn = 1)
setCellValue(cl_title[[1, 1]], names(x[i]))
}
}
tbl <- list("some_name" = head(mtcars),"some_name2"=head(iris))
wb <- createWorkbook()
ls2xlsx(tbl, wb)
saveWorkbook(wb, 'test.xlsx')
The following function writes a list of dataframes to an .xlsx file.
It has two modes, given by argument beside.
beside = TRUE is the default. It writes just one sheet, with the dataframe name on the first row, then an empty cell, then the dataframe. And repeats this for all dataframes, written side by side.
beside = FALSE writes one dataframe per sheet. The sheets' names are the dataframes names. If the list members do not have a name, the name is given by argument sheetNamePrefix.
The .xlsx file is written in the directory given by argument file.
writeList_xlsx <- function(x, file, beside = TRUE, sheetNamePrefix = "Sheet"){
xnames <- names(x)
shNames <- paste0(sheetNamePrefix, seq_along(x))
if(is.null(xnames)) xnames <- shNames
if(any(xnames == "")){
xnames[xnames == ""] <- shNames[xnames == ""]
}
wb <- createWorkbook(type = "xlsx")
if(beside){
sheet <- createSheet(wb, sheetName = shNames[1])
row <- createRow(sheet, rowIndex = 1)
col <- 0
for(i in seq_along(x)){
col <- col + 1
cell <- createCell(row, colIndex = col)
setCellValue(cell[[1, 1]], xnames[i])
col <- col + 2
addDataFrame(x[[i]], sheet,
startRow = 1, startColumn = col,
row.names = FALSE)
col <- col + ncol(x[[i]])
}
}else{
for(i in seq_along(x)){
sheet <- createSheet(wb, sheetName = xnames[i])
addDataFrame(x[[i]], sheet, row.names = FALSE)
}
}
if(!grepl("\\.xls", file)) file <- paste0(file, ".xlsx")
saveWorkbook(wb, file = file)
}
writeList_xlsx(tbl, file = "test.xlsx")
writeList_xlsx(tbl, file = "test2.xlsx", beside = FALSE)
I'm using the following code to merge several excel files with multiple sheets. I get an error when it runs across a sheet that has the same header as the other files but is not populated with data. This is the error:
Error in data.frame(sub.id, condition, s.frame, ss) :
arguments imply differing number of rows: 0, 2
How can I avoid the error? Here is the code I am using below.
file.names <- list.files(pattern='*.xls')
sheet.names <- getSheets(loadWorkbook('File.xls'))
sheet.names <-sheet.names[1:12]
e.names <- paste0(rep('v', 16), c(1:16))
data.1 <- data.frame(matrix(rep(NA,length(e.names)),
ncol = length(e.names)))
names(data.1) <- e.names
for (i in 1:length(file.names)) {
wb <- loadWorkbook(file.names[i])
for (j in 1:length(sheet.names)) {
ss <- readWorksheet(wb, sheet.names[j], startCol = 2, header = TRUE)
condition <- rep(sheet.names[j], nrow(ss))
sub.id <- rep(file.names[i], nrow(ss))
s.frame <- seq(1:nrow(ss))
df.1 <- data.frame(sub.id, condition, s.frame, ss)
names(df.1) <- e.names
data.1 <- rbind(data.1, df.1)
rm(ss, condition, s.frame, sub.id, df.1)
}
rm(wb)
}
I suppose this solution will work for you. It loads all .xlsx files in a specified folder into a list of lists. Sheet-names and -headers shouldn't be an issue.
library(openxlsx)
# Define folder where your files are
path_folder <- "C:/path_to_files/"
# load file names into a list
f <- list.files(path_folder)
f <- ifelse(substring(f,nchar(f)-4,nchar(f))==".xlsx",f,NA)
f <- f[!is.na(f)]
data_list <- as.list(f)
# get sheet-names
names(data_list) <- data_list
data_list <- lapply(data_list, function(x){getSheetNames(paste0(path_folder, x))})
# load data into a list of lists
data_list <- lapply(data_list, function(x){as.list(x)})
data_list <- lapply(names(data_list),function(x){
sapply(data_list[[x]],function(y){read.xlsx(paste0(path_folder, x),sheet=y)})
})
# name the list elements
names(data_list) <- gsub(".xlsx", "", f)
You end up with a list (containing each file) of lists (containing the sheets of each file).
From here you can remove empty sheets, merge and edit them as you like.
Added an if-statement to check if there was more than one row if not skip reading in and it resolved the error.
for (i in 1:length(file.names)) {
wb <- loadWorkbook(file.names[i])
for (j in 1:length(sheet.names)) {
ss <- readWorksheet(wb, sheet.names[j], startCol = 2, header = TRUE)
if (nrow(ss) > 1)
{
condition <- rep(sheet.names[j], nrow(ss))
sub.id <- rep(file.names[i], nrow(ss))
s.frame <- seq(1:nrow(ss))
df.1 <- data.frame(sub.id, condition, s.frame, ss)
names(df.1) <- e.names
data.1 <- rbind(data.1, df.1)
rm(ss, condition, s.frame, sub.id, df.1)
}
}
rm(wb)
}
I want to export data frames to Excel and highlight cells according to certain rules. I don't think this answer to a similar question is correct. I think it is possible, and I think I get close using the CellStyle functions of the xlsx package.
Below I outline what I've tried. Most of the ideas come from the package help files. I get all the way to the end and get an error when I try to apply the style I created to the cells that meet the criteria. I get the error: Error in .jcall(cell, "V", "setCellStyle", cellStyle$ref) : RcallMethod: invalid object parameter.
library(xlsx)
# create data
cols <- sample(c(1:5), 1) # number of columns to vary to mimic this unknown
label <- rep(paste0("label ", seq(from=1, to=10)))
mydata <- data.frame(label)
for (i in 1:cols) {
mydata[,i+1] <- sample(c(1:10), 10)
}
# exporting data.frame to excel is easy with xlsx package
sheetname <- "mysheet"
write.xlsx(mydata, "mydata.xlsx", sheetName=sheetname)
file <- "mydata.xlsx"
# but we want to highlight cells if value greater than or equal to 5
wb <- loadWorkbook(file) # load workbook
fo <- Fill(backgroundColor="yellow") # create fill object
cs <- CellStyle(wb, fill=fo) # create cell style
sheets <- getSheets(wb) # get all sheets
sheet <- sheets[[sheetname]] # get specific sheet
rows <- getRows(sheet) # get rows
cells <- getCells(rows) # get cells
values <- lapply(cells, getCellValue) # extract the values
# find cells meeting conditional criteria
highlight <- "test"
for (i in names(values)) {
x <- as.numeric(values[i])
if (x>=5 & !is.na(x)) {
highlight <- c(highlight, i)
}
}
highlight <- highlight[-1]
# apply style to cells that meet criteria
if (length(highlight)>0) { # proceed if any cells meet criteria
setCellStyle(cells[highlight], cs) # DOES NOT WORK
}
# save
saveWorkbook(wb, file)
Update:
I've also tried:
if (length(highlight)>0) { # proceed if any cells meet criteria
for (h in 1:length(highlight)) {
setCellStyle(cells[highlight[h]], cs) # DOES NOT WORK
}
}
But I get the error: Error in .jcall(cell, "V", "setCellStyle", cellStyle$ref) : RcallMethod: cannot determine object class
Try this out. I changed a few things, including the a slight change to the call to Fill and limiting the cells included for consideration to those with numeric data. I used lapply to apply the conditional formatting.
cols <- sample(c(1:5), 1) # number of columns to vary to mimic this unknown
label <- rep(paste0("label ", seq(from=1, to=10)))
mydata <- data.frame(label)
for (i in 1:cols) {
mydata[,i+1] <- sample(c(1:10), 10)
}
# exporting data.frame to excel is easy with xlsx package
sheetname <- "mysheet"
write.xlsx(mydata, "mydata.xlsx", sheetName=sheetname)
file <- "mydata.xlsx"
# but we want to highlight cells if value greater than or equal to 5
wb <- loadWorkbook(file) # load workbook
fo <- Fill(foregroundColor="yellow") # create fill object
cs <- CellStyle(wb, fill=fo) # create cell style
sheets <- getSheets(wb) # get all sheets
sheet <- sheets[[sheetname]] # get specific sheet
rows <- getRows(sheet, rowIndex=2:(nrow(mydata)+1) # get rows
# 1st row is headers
cells <- getCells(rows, colIndex = 3:(cols+3)) # get cells
# in the wb I import with loadWorkbook, numeric data starts in column 3
# and the first two columns are row number and label number
values <- lapply(cells, getCellValue) # extract the values
# find cells meeting conditional criteria
highlight <- "test"
for (i in names(values)) {
x <- as.numeric(values[i])
if (x>=5 & !is.na(x)) {
highlight <- c(highlight, i)
}
}
highlight <- highlight[-1]
lapply(names(cells[highlight]),
function(ii)setCellStyle(cells[[ii]],cs))
saveWorkbook(wb, file)
Old question, but for people that still research this topic:
In the package openxlsx, there is a function that makes this much easier- conditionalFormatting()
Below is an example:
#Load needed package
if (!require("pacman")
) install.packages("pacman")
pacman::p_load(
#add list of libraries here
openxlsx
)
##Create workbook and write in sample data
wb <- createWorkbook()
addWorksheet(wb, "Moving Row")
writeData(wb, "Moving Row", -5:5)
writeData(wb, "Moving Row", LETTERS[1:11], startCol = 2)
##Define how you want the cells to be formatted
negStyle <- createStyle(fontColour = "#9C0006", bgFill = "#FFC7CE")
posStyle <- createStyle(fontColour = "#006100", bgFill = "#C6EFCE")
## highlight row dependent on first cell in row
conditionalFormatting(wb, "Moving Row",
cols = 1:2,
rows = 1:11, rule = "$A1<0", style = negStyle
)
conditionalFormatting(wb, "Moving Row",
cols = 1:2,
rows = 1:11, rule = "$A1>0", style = posStyle
)
##Save workbook in default location
saveWorkbook(wb, "conditionalFormattingExample.xlsx", TRUE)
you can read about it here and see many other types of conditional highlighting it can do:
https://www.rdocumentation.org/packages/openxlsx/versions/4.2.5/topics/conditionalFormatting
It has been a while since I used this feature. Yes it should be possible to save conditional formatting. My (old) code is given below. Hope it helps you.
file.name <- paste('loadings.',state$data,'.xls', sep = "")
wb <- loadWorkbook(file.name, create = TRUE)
createSheet(wb, name = 'loadings')
clearSheet(wb, sheet = 'loadings')
Variables <- rownames(df)
df.loadings <- cbind(Variables,df)
df.loadings[,'Communality'] <- NULL
writeWorksheet(wb,df.loadings[,-1], sheet = 'loadings', rownames = 'Variables', startRow = 1, startCol = 1)
max.loading <- createCellStyle(wb)
setFillPattern(max.loading, fill = XLC$"FILL.SOLID_FOREGROUND")
setFillForegroundColor(max.loading, color = XLC$"COLOR.SKY_BLUE")
maxVal <- apply(abs(df.loadings[,-1]),1,max)
maxValIndex <- which(abs(df.loadings[,-1]) == maxVal, arr.ind = TRUE)
setCellStyle(wb, sheet = "loadings", row = maxValIndex[,'row']+1, col = maxValIndex[,'col']+1, cellstyle = max.loading)
df.corr <- data.frame(cor(f.data))
df.corr <- cbind(Variables,df.corr)
createSheet(wb, name = 'correlations')
clearSheet(wb, sheet = 'correlations')
writeWorksheet(wb, df.corr, sheet = 'correlations', startRow = 1, startCol = 1)
corr <- createCellStyle(wb)
setFillPattern(corr, fill = XLC$"FILL.SOLID_FOREGROUND")
setFillForegroundColor(corr, color = XLC$"COLOR.SKY_BLUE")
corrIndex <- which(abs(df.corr[,-1]) > .3 & df.corr[,-1] != 1 , arr.ind = TRUE)
setCellStyle(wb, sheet = "correlations", row = corrIndex[,'row']+1, col = corrIndex[,'col']+1, cellstyle = corr)
saveWorkbook(wb)
if(.Platform$OS.type == "unix") {
execute(paste("browseURL(\"",getwd(),'/',file.name,"\", browser = '/usr/bin/open')",sep=''))
} else {
execute(paste("browseURL(\"",getwd(),'/',file.name,"\", browser = NULL)",sep=''))
}