I have problems to automatically generate borders while exporting an excel file with r. Below is my code and the output I am currently generating and how I would like it to be.
I have tried to help myself with the solution here, but could not make it work on my example.
Here is some code to reproduce the problem:
#some dataframes to export as excel files
Agent1 <- data.frame("QUEUE" = c("call PA", "call", "Call", "call CB"), "NR" = c(6,15,3,7), "Client" = c("xyz company", "some other company", "Company abs", "BNM"), stringsAsFactors = FALSE)
Agent2 <- data.frame("QUEUE" = c("call PA", "call", "Call", "call CB"), "NR" = c(7,13,5,3), "Client" = c("xyz company", "some other company", "Company abs", "BNM"), stringsAsFactors = FALSE)
Agent3 <- data.frame("QUEUE" = c("call PA", "call", "Call", "call CB"), "NR" = c(4,4,3,7), "Client" = c("xyz company", "some other company", "Company abs", "BNM"), stringsAsFactors = FALSE)
nr_of_agents <- 3
# Variable creation for counting cases per agent
for (a in 1 : nr_of_agents) {
agent_s <- paste0("Agent",a,"sum")
assign(agent_s, 0)
}
for (a in 1:nr_of_agents){ #Counting cases per agent
agent <- paste0("Agent",a)
tempv <- eval(as.name(agent))
agent_s <- paste0("Agent",a,"sum")
tempv1 <- eval(as.name(agent_s))
tempv1 <- sum(tempv$NR)
assign(agent_s, paste("Total cases: ", tempv1))
}
## EXCEL OUTPUT
wb<-createWorkbook(type="xlsx")
TITLE_STYLE <- CellStyle(wb)+ Font(wb, heightInPoints=16, color=NULL, isBold=TRUE) +
Alignment(h="ALIGN_CENTER")
TEXT_STYLE <- CellStyle(wb)+ Font(wb, heightInPoints=12, color=NULL, isBold=FALSE) +
Alignment(h="ALIGN_RIGHT")+
Border(color="black", position=c("TOP"),
pen=c("BORDER_THIN"))
# Styles for the data table row/column names
TABLE_ROWNAMES_STYLE <- CellStyle(wb) + Font(wb, isBold=TRUE)
TABLE_COLNAMES_STYLE <- CellStyle(wb) + Font(wb,color="#FFFAFA", heightInPoints=12, name="Calibri Light", isBold=TRUE) +
Fill(foregroundColor="#9e2b11",pattern="SOLID_FOREGROUND")+#, backgroundColor="lightblue")
Alignment(wrapText=TRUE, horizontal="ALIGN_CENTER")+
Border(color="black", position=c("TOP", "BOTTOM", "LEFT", "RIGHT"),
pen=c("BORDER_THIN"))
#Code to add title
xlsx.addTitle<-function(sheet, rowIndex, title, titleStyle){
rows <-createRow(sheet,rowIndex=rowIndex)
sheetTitle <-createCell(rows, colIndex=3)
setCellValue(sheetTitle[[1,1]], title)
setCellStyle(sheetTitle[[1,1]], titleStyle)
}
#Code to add sums of cases per agent
xlsx.addsums<-function(sheet, rowIndex, title, titleStyle){
rows <-createRow(sheet,rowIndex=rowIndex)
sheetTitle <-createCell(rows, colIndex=3)
setCellValue(sheetTitle[[1,1]], title)
setCellStyle(sheetTitle[[1,1]], titleStyle)
}
names <- c("Mark", "Neli", "Sara") # Agents names
for (a in 1 : nr_of_agents) {
agent <- paste0("Agent",a)
tempv <- eval(as.name(agent))
agent_S <- paste0("Agent",a,"sum")
tempv1 <- eval(as.name(agent_S))
sheet<-createSheet(wb, sheetName = names[a]) #sheet creation
xlsx.addTitle(sheet, rowIndex=1, title=names[a], #Adding title to each sheet
titleStyle = TITLE_STYLE)
addDataFrame(tempv, sheet, startRow=3, startColumn=1, #Adding the dataframes
colnamesStyle = TABLE_COLNAMES_STYLE,
rownamesStyle = TABLE_ROWNAMES_STYLE
)
xlsx.addsums(sheet, rowIndex=(3+ nrow(tempv)+1), title= tempv1, #Adding total sum for every agent
titleStyle = TEXT_STYLE)
autoSizeColumn(sheet, colIndex=c(1:ncol(tempv))) #Auto size columns
}
saveWorkbook(wb, paste0(Sys.Date()," Test_file",".xlsx"))
Picture of current and desired output
As seen in the picture also the automatic column width is not working correctly, its size is dependant of the length of the column header and not the longest word in the column. Any idea on how to solve this?
Thanks for the help!
You can do this with openxlsx package.
library(openxlsx)
# Data
Agent1 <- data.frame("QUEUE" = c("call PA", "call", "Call", "call CB"), "NR" = c(6,15,3,7), "Client" = c("xyz company", "some other company", "Company abs", "BNM"), stringsAsFactors = FALSE)
Agent2 <- data.frame("QUEUE" = c("call PA", "call", "Call", "call CB"), "NR" = c(7,13,5,3), "Client" = c("xyz company", "some other company", "Company abs", "BNM"), stringsAsFactors = FALSE)
Agent3 <- data.frame("QUEUE" = c("call PA", "call", "Call", "call CB"), "NR" = c(4,4,3,7), "Client" = c("xyz company", "some other company", "Company abs", "BNM"), stringsAsFactors = FALSE)
agents <- c("Mark", "Neli", "Sara")
wb <- createWorkbook()
for (i in 1:length(agents)) {
agent <- paste0("Agent", i)
agent_nam <- agents[i]
agent_df <- eval(as.name(agent))
# Add sheet
addWorksheet(wb, agent_nam)
# Save Header (agent name)
writeData(wb, sheet = agent_nam, x = agent_nam, startRow = 1, startCol = 3)
# Write Dataframe
writeData(wb, sheet = agent_nam, x = agent_df, startRow = 3, rowNames = TRUE)
# Total cases
writeData(wb, sheet = agent_nam, x = paste0("Total cases: ", sum(agent_df$NR)), startRow = 8, startCol = 3)
# style 1: Agent names in bold
s1 <- createStyle(fontSize = 16, textDecoration = c("BOLD"), halign = "center")
# style 2: Bold white font with red background fill for table header
s2 <- createStyle(fontName = "Calibri Light", fontColour = "#FFFFFF",
fgFill = "#9e2b11", textDecoration = c("BOLD"), halign = "center",
border = "TopBottomLeftRight")
# style 3: border around the data
s3 <- createStyle(border = "TopBottomLeftRight")
# style 4: Text in the center for Total cases
s4 <- createStyle(halign = "center")
# Apply styles to the workbook
addStyle(wb, sheet = agent_nam, style = s1, rows = 1, cols = 3, gridExpand = TRUE)
addStyle(wb, sheet = agent_nam, style = s2, rows = 3, cols = 2:4, gridExpand = TRUE)
addStyle(wb, sheet = agent_nam, style = s3, rows = 4:7, cols = 2:4, gridExpand = TRUE)
addStyle(wb, sheet = agent_nam, style = s4, rows = 8, cols = 3, gridExpand = TRUE)
# Column widths
setColWidths(wb, sheet = agent_nam, cols = 1:4, widths = "auto")
}
saveWorkbook(wb, paste0(Sys.Date()," Test_file (openxlsx)",".xlsx"))
Related
I want to plot a heatmap with annotations using getMoHeatmap, which uses ComplexHeatmap under the hood.
My annotations annCol don't show up. Why?
library(MOVICS)
# Color Palette 3
cn.col <- c("grey90" , "black")
mut.col <- c("#a05195", "#caa0c2", "#f1f1f1", "#778478", "#0f2612")
exp.col <- c("#d45087", "#e8a4ba", "#f1f1f1", "#707e71", "#001c00")
mirna.col <- c("#ffedae", "#ffabab", "#f1f1f1","#768c86","#003329")
meth.col <- c("#ffc588","#fbdbbc","#f1f1f1","#86a8ab","#00636b")
protein.col <- c("#ffa600", "#ffcb8a", "#f1f1f1", "#8093a3","#003f5c")
col.list <- list(cn.col, mut.col, mirna.col, exp.col, meth.col, protein.col)
# Clinical info
annCol <- clin.info[,c("race", "gender", "age", "pathologic_stage"), drop = FALSE]
annCol <- na.omit(annCol)
annColors <- list(race = c("white"="#FDEFE5",
"black or african american"="#3D0C02",
"american indian or alaska native"="#CC9A8B",
"asian"="#E0Ac69"),
gender = c("male"="#375b58",
"female"="#f1bfb6"),
age = circlize::colorRamp2(breaks = c(min(annCol$age),
median(annCol$age),
max(annCol$age)),
colors = c("#bfe8ff", "#7896aa", "#374C5B")),
pathologic_stage = c("stage i"="#f1bfb6",
"stage ii"="#b893a5",
"stage iii"="#746f87",
"stage iv"="#374c5b"))
# Cluster colors
clust.col <- c("#41436A", "#984063", "#F64668", "#FE9677", "#0A4958", "#008b76", "#f08080", "#FFB480", "#FDFD96")
getMoHeatmap(data = plotdata,
row.title = c("Mutation", "CNA", "miRNA", "mRNA", "Methylation","Protein"),
is.binary = c(T,F,F,F,F,F),
legend.name = c("Mutated", "Copy Number", "miRNA.expression", "mRNA.norm.count","M value","Protein.RPPA"),
clust.res = cmoic.kirp$clust.res$clust, # consensusMOIC results
clust.dend = NULL, # no dendrogram
show.rownames = c(F,F,F,F,F,F), # specify for each omics data
show.colnames = FALSE, # show no sample names
annRow = annRow, # mark selected features
color = col.list,
annCol = annCol, # annotation for samples
annColors = annColors, # annotation color
width = 10, # width of each subheatmap
height = 5, # height of each subheatmap
fig.name = "Plots/comprehensive_heatmap_consensus_MOIC",
clust.col = clust.col)
dev.off()
I have a text file with words from historical accounts and I want to visualise the species and frequency of words associated with them.
So far I have tried using the following code with a txt file of all the historical documents in one doc but want to ask if there is specific formatting of a csv to then input into R for a bipartite network graph:
"""library(ggraph)
library(ggplot2)
library(dplyr)
library(pdftools)
library(tm)
library(readtext)
library(tidytext)
library(igraph)
library(tidyr)
library(FactoMineR)
library(factoextra)
library(flextable)
library(GGally)
library(ggdendro)
library(network)
library(Matrix)
library(quanteda)
library(stringr)
library(quanteda.textstats)
options(stringsAsFactors = F)
options(scipen = 999)
options(max.print=1000)
# Read in text--------
wordbase <- readtext("mq_bird_stories.txt")
# List of extra words to remove---------
extrawords <- c("the", "can", "get", "Ccchants", "make", "making", "house", "torn", "tree", "man", "however", "upon", "instructs", "wife", "coming","without", "mother", "versions","variant", "version", "thus", "got","throws", "are", "has", "already", "asks", "sacra", "can", "brings", "one", "look", "sees", "tonaheiee", "wants", "later",
"dont", "even", "may", "but", "will", "turn", "sing", "swallows", "alba", "gives", "find", "other","tonaheieee", "away","day","comes","another",
"much", "first", "but", "see", "new", "back","goes", "go","songs", "returns", "take","takes","come",
"many", "less", "now", "well", "taught", "like", "puts", "slits", "sends", "tell","tells","open","mentions",
"often", "every", "said", "two", "and", "handsome", "husband", "bring", "lives","gets", "von", "den", "steinen", "handy")
# Clean the data-------
darwin <- wordbase %>%
paste0(collapse = " ") %>%
stringr::str_squish() %>%
stringr::str_remove_all("\\(") %>%
stringr::str_remove_all("\\)") %>%
stringr::str_remove_all("!") %>%
stringr::str_remove_all(",") %>%
stringr::str_remove_all(";") %>%
stringr::str_remove_all("\\?") %>%
stringr::str_split(fixed(".")) %>%
unlist() %>%
tm :: removeWords(extrawords) %>%
paste0(collapse = " ")
# One method for calculating frequencies of bigrams------
# Process into a table of words
darwin_split <- darwin %>%
as_tibble() %>%
tidytext::unnest_tokens(words, value)
# Create data frame of bigrams-------
darwin_words <- darwin_split %>%
dplyr::rename(word1 = words) %>%
dplyr::mutate(word2 = c(word1[2:length(word1)], NA)) %>%
na.omit()
# Calculate frequency of bigrams-----
darwin2grams <- darwin_words %>%
dplyr::mutate(bigram = paste(word1, word2, sep = " ")) %>%
dplyr::group_by(bigram) %>%
dplyr::summarise(frequency = n()) %>%
dplyr::arrange(-frequency)
# Define stopwords
stps <- paste0(tm::stopwords(kind = "en"), collapse = "\\b|\\b")
# Remove stopwords from bigram table
darwin2grams_clean <- darwin2grams %>%
dplyr::filter(!str_detect(bigram, stps))
# Another method for calculating frequencies of bigrams
# Clean corpus
darwin_clean <- darwin %>%
stringr::str_to_title()
# Tokenize corpus----
darwin_tokzd <- quanteda::tokens(darwin_clean)
# Extract bigrams------
BiGrams <- darwin_tokzd %>%
quanteda::tokens_remove(stopwords("en")) %>%
quanteda::tokens_select(pattern = "^[A-Z]",
valuetype = "regex",
case_insensitive = FALSE,
padding = TRUE) %>%
quanteda.textstats::textstat_collocations(min_count = 1, tolower = FALSE)
# read in and process text
darwinsentences <- darwin %>%
stringr::str_squish() %>%
tokenizers::tokenize_sentences(.) %>%
unlist() %>%
stringr::str_remove_all("- ") %>%
stringr::str_replace_all("\\W", " ") %>%
stringr::str_squish()
# inspect data
head(darwinsentences)
darwincorpus <- Corpus(VectorSource(darwinsentences))
# clean corpus-----
darwincorpusclean <- darwincorpus %>%
tm::tm_map(removeNumbers) %>%
tm::tm_map(tolower) %>%
tm::tm_map(removeWords, stopwords()) %>%
tm::tm_map(removeWords, extrawords)
# create document term matrix
darwindtm <- DocumentTermMatrix(darwincorpusclean, control=list(bounds = list(global=c(1, Inf)), weighting = weightBin))
# convert dtm into sparse matrix
darwinsdtm <- Matrix::sparseMatrix(i = darwindtm$i, j = darwindtm$j,
x = darwindtm$v,
dims = c(darwindtm$nrow, darwindtm$ncol),
dimnames = dimnames(darwindtm))
# calculate co-occurrence counts
coocurrences <- t(darwinsdtm) %*% darwinsdtm
# convert into matrix
collocates <- as.matrix(coocurrences)
# inspect size of matrix
ncol(collocates)
#provide some summary stats
summary(rowSums(collocates))
#visualising collocations
# load function for co-occurrence calculation
source("https://slcladal.github.io/rscripts/calculateCoocStatistics.R")
# define term
coocTerm <- "pigeon"
# calculate co-occurrence statistics
coocs <- calculateCoocStatistics(coocTerm, darwinsdtm, measure="LOGLIK")
# inspect results
coocs[1:50]
coocdf <- coocs %>%
as.data.frame() %>%
dplyr::mutate(CollStrength = coocs,
Term = names(coocs)) %>%
dplyr::filter(CollStrength > 0)
###Make graph - visualize association strengths------
ggplot(coocdf, aes(x = reorder(Term, CollStrength, mean), y = CollStrength)) +
geom_point() +
coord_flip() +
theme_bw() +
labs(y = "")
##network
net = network::network(collocates_redux,
directed = FALSE,
ignore.eval = FALSE,
names.eval = "weights")
# vertex names
network.vertex.names(net) = rownames(collocates_redux)
# inspect object
net
ggnet2(net,label = TRUE,
label.size = 4,
alpha = 0.2,
size.cut = 3,
edge.alpha = 0.3) +
guides(color = FALSE, size = FALSE)"""
I'd suggest taking a look at the netCoin package. If you can transform your data into nodes and links data frames, then you can easily get a high quality network visualization:
#Example of links data frame
links <-
data.frame(
matrix(
c(
"Person A","Account 1", "not link",
"Person A","Account 2", "link",
"Person B","Account 2", "link",
"Person B","Account 3", "not link",
"Person B","Account 4", "link",
"Person C","Account 4", "link"
),
nrow = 6,
ncol = 3,
byrow = TRUE,
dimnames = list(NULL,
c("Source", "Target", "other_links_column"))
),
stringsAsFactors = FALSE
)
#Example of nodes data frame
nodes <-
data.frame(
matrix(
c(
"Person A","person",
"Person B","person",
"Person C","person",
"Account 1", "account",
"Account 2", "account",
"Account 3", "account",
"Account 4", "account"
),
nrow = 7,
ncol = 2,
byrow = TRUE,
dimnames = list(NULL,
c("name", "other_nodes_column"))
),
stringsAsFactors = FALSE
)
install.packages("netCoin") #may need to install the netCoin package
library(netCoin)
?netCoin #displays netCoin Help to see all the function options
graph_df <- netCoin(nodes = nodes, #Data frame of unique nodes and their attributes #Must contain name column
links = links, #Data frame of links and their attributes #Must contain Source and Target columns
cex = 1.25, #Font size
color = "other_nodes_column", #Column in node data frame to determine node color
shape = "other_nodes_column", #Column in node data frame to determine node shape
main = "This is the title of my visualization", #Visualization title
controls = 1:5, #Controls that will be shown in the visualization (maximum of 5)
dir = "folder-with-viz-output") #Output folder for the visualization #Entire folder should be exported as a zip file
plot(graph_df) #Command to display the visualization
I would like to use Complexheatmap for multiple files for plotting individual data frame or files .
So far I was able to do this as for small subset of files.
Reading files as list
list_of_files <- list.files('Model_hmap/',pattern = '\\.txt$', full.names = TRUE)
#Further arguments to read.csv can be passed in ...
#all_csv <- lapply(list_of_files,read_delim,delim = "\t", escape_double = FALSE,trim_ws = TRUE)
all_csv <- lapply(list_of_files,read.table,strip.white = FALSE,check.names = FALSE,header=TRUE,row.names=1)
#my_names = c("gene","baseMean","log2FoldChange","lfcSE","stat","pvalue","padj","UP_DOWN")
my_names = c("Symbol","baseMean","log2FoldChange","lfcSE","stat","pvalue","padj","UP_DOWN")
#my_names = c['X2']
#my_names = c("Peak","annotation","ENSEMBL","log2FoldChange","padj","UP_DOWN")
result_abd = lapply(all_csv, FUN = function(x) subset(x, select=-c(1:7,155)))
names(result_abd) <- gsub(".txt","",
list.files("Model_hmap/",full.names = FALSE),
fixed = TRUE)
Then Scaling the data
fun <- function(result_abd) {
p <- t(scale(t(result_abd[,1:ncol(result_abd)])))
}
p2 <- mapply(fun, result_abd, SIMPLIFY = FALSE)
Next step was to use the metadata which i would like to annotate my heat-map
My metadata is as such
dput(head(metadata))
structure(list(patient = c("TCGA-AB-2856", "TCGA-AB-2849", "TCGA-AB-2971",
"TCGA-AB-2930", "TCGA-AB-2891", "TCGA-AB-2872"), prior_malignancy = c("no",
"no", "no", "no", "no", "no"), FAB = c("M4", "M0", "M4", "M2",
"M1", "M3"), Risk_Cyto = c("Intermediate", "Poor", "Intermediate",
"Intermediate", "Poor", "Good")), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"))
To read the above metadata I'm doing this below Im not sure if its the right way or approach.
list_of_files1 <- list.files('Model_hmap_meta/',pattern = '\\.txt$', full.names = TRUE)
#Further arguments to read.csv can be passed in ...
meta1 <- lapply(list_of_files1,read.table, row.names = 1,sep = "\t",header = TRUE)
Now I'm stuck at the above step Im not sure how do I pass the argument as list which i have done for the dataframe of my gene expression which I had calculated the zscore which is a list. So I think the metadata should be the same class if I have to use this .
For single file This is how I used to annotation into my final plot
metadata = read_delim("Model_hmap_meta/FAB_table.txt",delim = "\t", escape_double = FALSE,
trim_ws = TRUE)
head(metadata)
dim(metadata)
ann <- data.frame(metadata$FAB, metadata$Risk_Cyto)
colnames(ann) <- c('FAB', 'Risk_Cyto')
colours <- list('FAB' = c('M0' = 'red2', 'M1' = 'royalblue', 'M2'='gold','M3'='forestgreen','M4'='chocolate','M5'='Purple'),
'Risk_Cyto' = c('Good' = 'limegreen', 'Intermediate' = 'navy' , 'N.D.' ='magenta','Poor'='black'))
colAnn <- HeatmapAnnotation(df = ann,
which = 'col',
col = colours,
annotation_width = unit(c(1, 4), 'cm'),
gap = unit(1, 'mm'))
Now this is what I need to pass it to the list if I understand which I'm not able to do
My plotting function.
This is the code I use to plot.
hm1 <- Heatmap(heat,
col= colorRamp2(c(-2.6,-1,0,1,2.6),c("blue","skyblue","white","lightcoral","red")),
#heatmap_legend_param=list(at=c(-2.6,-1,0,1,2.6),color_bar="continuous",
# legend_direction="vertical", legend_width=unit(5,"cm"),
# title_position="topcenter", title_gp=gpar(fontsize=10, fontface="bold")),
name = "Z-score",
#Row annotation configurations
cluster_rows=T,
show_row_dend=FALSE,
row_title_side="right",
row_title_gp=gpar(fontsize=8),
show_row_names=FALSE,
row_names_side="left",
#Column annotation configuratiions
cluster_columns=T,
show_column_dend=T,
column_title="DE genes",
column_title_side="top",
column_title_gp=gpar(fontsize=15, fontface="bold"),
show_column_names = FALSE,
column_names_gp = gpar(fontsize = 12, fontface="bold"),
#Dendrogram configurations: columns
clustering_distance_columns="euclidean",
clustering_method_columns="complete",
column_dend_height=unit(10,"mm"),
#Dendrogram configurations: rows
clustering_distance_rows="euclidean",
clustering_method_rows="complete",
row_dend_width=unit(4,"cm"),
row_dend_side = "left",
row_dend_reorder = TRUE,
#Splits
border=T,
row_km = 1,
column_km = 1,
#plot params
#width = unit(5, "inch"),
#height = unit(4, "inch"),
#height = unit(0.4, "cm")*nrow(mat),
#Annotations
top_annotation = colAnn)
# plot heatmap
draw(hm1, annotation_legend_side = "right", heatmap_legend_side="right")
Objective
How do I wrap all the above into a small function where I can take input multiple files and plot them.
UPDATE
Data files
My data files my metadafile
Using the code you provided I made the following function (make_heatmap). Some of the read in statements are altered to match what I was working with on my machine. I also only used 2 of your files but it should work with all 4 that you're using.
This function will allow you to pass the counts matrix (which you normalize and set up before passing to the function). The assumption is that you're using the same metadata/annotation for each file you're passing. If you have different annotation files you could set up the heatmap annotation before the function and then pass that to the function. This is a bit more tedious though.
Usually the way that I set up my heatmap analyzes is that I have a script containing all of my functions (one for each type of heatmap I have to make) and then every time I need to make a new heatmap I have another script where I read in/prepare (ie median center) my counts matrix and then call the heatmap function I need.
list_of_files <- dir(pattern = 'MAP', full.names = TRUE)
#Further arguments to read.csv can be passed in ...
#all_csv <- lapply(list_of_files,read_delim,delim = "\t", escape_double = FALSE,trim_ws = TRUE)
all_csv <- lapply(list_of_files,read.table,strip.white = FALSE,check.names = FALSE,header=TRUE,row.names=1)
#my_names = c("gene","baseMean","log2FoldChange","lfcSE","stat","pvalue","padj","UP_DOWN")
my_names = c("Symbol","baseMean","log2FoldChange","lfcSE","stat","pvalue","padj","UP_DOWN")
#my_names = c['X2']
#my_names = c("Peak","annotation","ENSEMBL","log2FoldChange","padj","UP_DOWN")
result_abd = lapply(all_csv, FUN = function(x) subset(x, select=-c(1:7,155)))
names(result_abd) <- gsub(".txt","",
list.files("Model_hmap/",full.names = FALSE),
fixed = TRUE)
fun <- function(result_abd) {
p <- t(scale(t(result_abd[,1:ncol(result_abd)])))
}
p2 <- mapply(fun, result_abd, SIMPLIFY = FALSE)
# list_of_files1 <- list.files('Model_hmap_meta/',pattern = '\\.txt$', full.names = TRUE)
# #Further arguments to read.csv can be passed in ...
# meta1 <- lapply(list_of_files1,read.table, row.names = 1,sep = "\t",header = TRUE)
make_heatmap<-function(counts_matrix){
metadata = read.table("FAB_table.txt",sep = "\t", header=1)
head(metadata)
dim(metadata)
ann <- data.frame(metadata$FAB, metadata$Risk_Cyto)
colnames(ann) <- c('FAB', 'Risk_Cyto')
colours <- list('FAB' = c('M0' = 'red2', 'M1' = 'royalblue', 'M2'='gold','M3'='forestgreen','M4'='chocolate','M5'='Purple'),
'Risk_Cyto' = c('Good' = 'limegreen', 'Intermediate' = 'navy' , 'N.D.' ='magenta','Poor'='black'))
colAnn <- HeatmapAnnotation(df = ann,
which = 'col',
col = colours,
annotation_width = unit(c(1, 4), 'cm'),
gap = unit(1, 'mm'))
hm1 <- Heatmap(counts_matrix,
col= colorRamp2(c(-2.6,-1,0,1,2.6),c("blue","skyblue","white","lightcoral","red")),
#heatmap_legend_param=list(at=c(-2.6,-1,0,1,2.6),color_bar="continuous",
# legend_direction="vertical", legend_width=unit(5,"cm"),
# title_position="topcenter", title_gp=gpar(fontsize=10, fontface="bold")),
name = "Z-score",
#Row annotation configurations
cluster_rows=T,
show_row_dend=FALSE,
row_title_side="right",
row_title_gp=gpar(fontsize=8),
show_row_names=FALSE,
row_names_side="left",
#Column annotation configuratiions
cluster_columns=T,
show_column_dend=T,
column_title="DE genes",
column_title_side="top",
column_title_gp=gpar(fontsize=15, fontface="bold"),
show_column_names = FALSE,
column_names_gp = gpar(fontsize = 12, fontface="bold"),
#Dendrogram configurations: columns
clustering_distance_columns="euclidean",
clustering_method_columns="complete",
column_dend_height=unit(10,"mm"),
#Dendrogram configurations: rows
clustering_distance_rows="euclidean",
clustering_method_rows="complete",
row_dend_width=unit(4,"cm"),
row_dend_side = "left",
row_dend_reorder = TRUE,
#Splits
border=T,
row_km = 1,
column_km = 1,
#plot params
#width = unit(5, "inch"),
#height = unit(4, "inch"),
#height = unit(0.4, "cm")*nrow(mat),
#Annotations
top_annotation = colAnn)
# plot heatmap
draw(hm1, annotation_legend_side = "right", heatmap_legend_side="right")
}
make_heatmap(as.matrix(p2[[1]])) #just call the function with the counts matrix
make_heatmap(as.matrix(p2[[2]]))
If you need to output the heatmap to a pdf or something, you can do that before calling the function or you can put that command inside of the heatmap function (just make sure to call dev.off() inside the function too in that case).
I am attempting to create save multiple formatted Excel files, each of which are subsetted from a certain data frame by a factor.
This is an example of what I have tried so far
# Create data
df <- data.frame(category = rep(c("a","b","c","d"),times = 20),
values = rnorm(20,5,2))
# Create workbooks named after specific level of factor
l1 <- sapply(levels(df$category), assign, value = createWorkbook())
# Create styles
hs <- createStyle(fgFill = "#808080", border = "bottom", textDecoration = "bold")
lt8 <- createStyle(bgFill = "#ff0000")
gt30 <- createStyle(bgFill = "#00b0f0")
grn <- createStyle(bgFill = "#00b000")
# For loop
for (i in l1) {
addWorksheet(i, names(i))
writeData(i, names(i), df[df$category == names(i),], headerStyle = hs)
conditionalFormatting(i, names(i), cols = 1:2, rows = 2:nrow(df[df$category == names(i),]), rule = "$B2<2", type = "expression", style = lt8)
conditionalFormatting(i, names(i), cols = 1:2, rows = 2:nrow(df[df$category == names(i),]), rule = "$B2>=7", type = "expression", style = gt30)
conditionalFormatting(i, names(i), cols = 1:2, rows = 2:nrow(df[df$category == names(i),]), rule = "AND($B2>=4, $B2<5.5)", style = grn)
setColWidths(i, names(i), cols=1:2, widths = "auto")
saveWorkbook(paste(i, ".wb", sep = ""), file = paste(i, " Report ", ".xlsx", sep = ""))
}
Each time, I run into this error
Error in if (tolower(sheetName) %in% tolower(wb$sheet_names)) stop("A worksheet by that name already exists! Sheet names must be unique case-insensitive.")
This is the first time I've attempted to assign any sheets so I'm not exactly sure why I keep getting this error.
Ultimately, I would like to save the subsetted and formatted excel workbooks through a repetitive process because my real data would produce many more workbooks. The workbooks must be separate and placing these subsets in sheets won't work.
Any and all advice on how to achieve this would be greatly appreciated.
Your error is coming from this line:
addWorksheet(i, names(i))
because names(i) is empty:
> names(l1[['a']])
character(0)
You might be better off looping over the names of l1, so you have the categories you want, using that to pull the appropriate workbook from the list. Something like:
for (i in names(l1)) {
wb = l1[[i]]
addWorksheet(wb, i)
category_data <- df[df$category == i,]
writeData(wb, i, category_data, headerStyle = hs)
conditionalFormatting(wb, i, cols = 1:2, rows = 2:nrow(category_data), rule = "$B2<2", type = "expression", style = lt8)
conditionalFormatting(wb, i, cols = 1:2, rows = 2:nrow(category_data), rule = "$B2>=7", type = "expression", style = gt30)
conditionalFormatting(wb, i, cols = 1:2, rows = 2:nrow(category_data), rule = "AND($B2>=4, $B2<5.5)", style = grn)
setColWidths(wb, i, cols=1:2, widths = "auto")
saveWorkbook(wb, file = paste(i, " Report ", ".xlsx", sep = ""))
}
There's still one subtle error here:
l1 <- sapply(levels(df$category), assign, value = createWorkbook())
createWorkbook() is only being called once, so you have 4 copies of the same workbook. That means the final save will have all 4 tabs. Compare:
> identical(l1$a, l1$b)
[1] TRUE
with 2 separate calls to createWorkbook():
> identical(createWorkbook(), createWorkbook())
[1] FALSE
Might be worth just looping over the distinct categories, and creating the workbook inside the loop. That is:
library(openxlsx)
# Create data
df <- data.frame(category = rep(c("a","b","c","d"),times = 20),
values = rnorm(20,5,2))
# Create styles
hs <- createStyle(fgFill = "#808080", border = "bottom", textDecoration = "bold")
lt8 <- createStyle(bgFill = "#ff0000")
gt30 <- createStyle(bgFill = "#00b0f0")
grn <- createStyle(bgFill = "#00b000")
# For loop
for (i in levels(df$category)) {
wb <- createWorkbook()
addWorksheet(wb, i)
category_data <- df[df$category == i,]
writeData(wb, i, category_data, headerStyle = hs)
conditionalFormatting(wb, i, cols = 1:2, rows = 2:nrow(category_data), rule = "$B2<2", type = "expression", style = lt8)
conditionalFormatting(wb, i, cols = 1:2, rows = 2:nrow(category_data), rule = "$B2>=7", type = "expression", style = gt30)
conditionalFormatting(wb, i, cols = 1:2, rows = 2:nrow(category_data), rule = "AND($B2>=4, $B2<5.5)", style = grn)
setColWidths(wb, i, cols=1:2, widths = "auto")
saveWorkbook(wb, file = paste(i, " Report ", ".xlsx", sep = ""))
}
I am trying to make a Shiny app where the user selects a few options and a network and data table will display based on the inputs. I have a diet study database and would like users to be able to specify the predator species they are interested in, the diet metric (weight, volumetric, etc) and the taxonomic level they want nodes identified to. The data table works fine (so I did not include the code) and updates based on the input but the network does not change, it only shows all of the data. When I run the code for generating the plot outside of Shiny it works fine. This is my first shiny attempt so any suggestions would be greatly appreciated.
library(dplyr)
library(igraph)
library(networkD3)
Diet <-data.frame(
Predator_Scientific_Name = rep("Acanthocybium solanderi", 10),
Class_Predator = rep("Actinopterygii", 10),
Order_Predator = rep("Perciformes", 10),
Family_Predator = rep("Scombridae", 10),
Genus_Predator = rep("Acanthocybium", 10),
Species_Predator = rep("solandri", 10),
Class_Prey = rep("Actinopterygii", 10),
Order_Prey = c( "Clupeiformes" , NA , "Perciformes", "Perciformes", "Perciformes", "Perciformes", "Perciformes", "Perciformes", "Tetraodontiformes", "Tetraodontiformes"),
Family_Prey = c("Clupeidae", NA, "Coryphaenidae", "Carangidae", "Scombridae","Echeneidae","Carangidae", "Scombridae", "Balistidae","Diodontidae"),
Genus_Prey = c("Sardinella", NA, "Coryphaena", "Decapterus", "Euthynnus", NA, NA, NA, "Balistes", "Diodon"),
Species_Prey = c("aurita" , "", "hippurus", "punctatus","alletteratus", "", "", "","capriscus", "spp." ),
Lowest_Taxonomic_Identification_Prey = c("Sardinella aurita","Actinopterygii","Coryphaena hippurus","Decapterus punctatus","Euthynnus alletteratus", "Echeneidae", "Carangidae","Scombridae","Balistes capriscus","Diodon spp."),
Frequency_of_Occurrence = c(2.8, 59.1, 1.4, 7.0, 1.4, 1.4, 15.5, 21.1, 2.8, 4.2), StringAsFactors = FALSE
)
pred.name <- unique(Diet$Predator_Scientific_Name)
prey.tax <- unique(Diet$Lowest_Taxonomic_Identification_Prey)
#Progress bar function
compute_data <- function(updateProgress = NULL) {
# Create 0-row data frame which will be used to store data
dat <- data.frame(x = numeric(0), y = numeric(0))
for (i in 1:10) {
Sys.sleep(0.25)
# Compute new row of data
new_row <- data.frame(x = rnorm(1), y = rnorm(1))
# If we were passed a progress update function, call it
if (is.function(updateProgress)) {
text <- paste0("x:", round(new_row$x, 2), " y:", round(new_row$y, 2))
updateProgress(detail = text)
}
# Add the new row of data
dat <- rbind(dat, new_row)
}
dat
}
####
# Define UI for application that draws a histogram
ui <- dashboardPage(
skin = "blue",
dashboardHeader(title = "Diet Database"),
dashboardSidebar(
sidebarMenu(
menuItem("Parameters",
tabName = "paramaters",
icon = shiny::icon("bar-chart")))
),
dashboardBody(
tabItems(
tabItem(
tabName = "paramaters",
fluidRow(
shiny::column(
width = 4,
shinydashboard::box(
title = "Predator",
status = "primary",
solidHeader = TRUE,
collapsible = TRUE,
width = NULL,
shiny::helpText("Select a predator to view its connections and prey items:"),
shiny::selectInput(
"pred",
shiny::h5("Predator Scientific Name:"),
c(NA,pred.name))),
shinydashboard::box(
title = "Prey",
status = "primary",
solidHeader = TRUE,
collapsible = TRUE,
width = NULL,
shiny::helpText("Select a prey taxa to view its connections and predators:"),
shiny::selectInput(
"prey",
shiny::h5("Prey Taxa:"),
c(NA,prey.tax))),
shinydashboard::box(
title = "Diet Metric",
status = "primary",
solidHeader = TRUE,
collapsible = TRUE,
width = NULL,
shiny::helpText("Select a diet metric to use:"),
shiny::selectInput(
"dietmetric",
shiny::h5("Diet Metric:"),
c("Frequency of Occurrence" = "Frequency_of_Occurrence",
"Wet Weight" = "Weight",
"Dry Weight" = "Dry_Weight",
"Volume" = "Volume",
"Index of Relative Importance" = "IRI",
"Index of Caloric Importance" = "ICI",
"Number" = "Number"))),
shinydashboard::box(
title = "Taxonomic Level",
status = "primary",
solidHeader = TRUE,
collapsible = TRUE,
width = NULL,
shiny::helpText("Select a taxonomic level of nodes:"),
shiny::selectInput(
"nodetax",
shiny::h5("Taxonomic Level:"),
c("Order" = "Order",
"Family" = "Family",
"Genus" = "Genus",
"Species" = "Species"))),
shinydashboard::box(
title = "Generate Network",
status = "primary",
solidHeader = T,
collapsible = T,
width = NULL,
actionButton("makenet", "Generate")
)
),
#Area for network to be displayed
shiny::column(
width = 8,
shinydashboard::box(
title = "Trophic Network",
status = "primary",
solidHeader = TRUE,
collapsible = FALSE,
width = NULL,
forceNetworkOutput("netplot")
)
)
))
)))
server <- function(input, output, session) {
network.data <- eventReactive(input$makenet, {
edgelist <- Diet %>% filter(Predator_Scientific_Name == input$pred|Lowest_Taxonomic_Identification_Prey == input$prey
) %>% select(
paste(input$nodetax, "Predator", sep = "_"),
Class_Predator,
paste(input$nodetax, "Prey", sep = "_"),
Class_Prey,
input$dietmetric
)
colnames(edgelist) <- c("SourceName",
"SourceClass",
"TargetName",
"TargetClass",
"Weight")
edgelist <- edgelist[complete.cases(edgelist),]
})
output$netplot <- renderForceNetwork( {
network.data()
ig <-igraph::simplify(igraph::graph_from_data_frame(edgelist[,c(1,3,5)], directed = TRUE))
SourceID <- TargetID <- c()
for (i in 1:nrow(edgelist)) {
SourceID[i] <- which(edgelist[i,1] == V(ig)$name)-1
TargetID[i] <- which(edgelist[i,3] == V(ig)$name)-1
}
#Create edgelist that contains source and target nodes and edge weights
edgeList <- cbind(edgelist, SourceID, TargetID)
nodeList <- data.frame(ID = c(0:(igraph::vcount(ig) - 1)),
nName = igraph::V(ig)$name)
#Determine and assign groups based on class
preddf <-
data.frame(SciName = edgelist[, 1], class = edgelist[, 2])
preydf <-
data.frame(SciName = edgelist[, 3], class = edgelist[, 4])
groupsdf <- rbind(preddf, preydf)
groupsdf <- groupsdf %>% mutate(SciName = as.character(SciName),
class = as.character(class))
nodeGroup <- c()
for (i in 1:nrow(nodeList)) {
index <- which(groupsdf[, 1] == nodeList$nName[i])
nodeGroup[i] <- groupsdf[index[1], 2]
}
nodeList <-
cbind(nodeList,
nodeGroup)
progress <- shiny::Progress$new()
progress$set(message = "Generating your network...", value = 0)
# Close the progress when this reactive exits (even if there's an error)
on.exit(progress$close())
# Create a callback function to update progress.
# Each time this is called:
# - If `value` is NULL, it will move the progress bar 1/5 of the remaining
# distance. If non-NULL, it will set the progress to that value.
# - It also accepts optional detail text.
updateProgress <- function(value = NULL, detail = NULL) {
if (is.null(value)) {
value <- progress$getValue()
value <- value + (progress$getMax() - value) / 5
}
progress$set(value = value, detail = detail)
}
# Compute the new data, and pass in the updateProgress function so
# that it can update the progress indicator.
compute_data(updateProgress)
networkD3::forceNetwork(
Links = edgeList,
# data frame that contains info about edges
Nodes = nodeList,
# data frame that contains info about nodes
Source = "SourceID",
# ID of source node
Target = "TargetID",
# ID of target node
Value = "Weight",
# value from the edge list (data frame) that will be used to value/weight relationship amongst nodes
NodeID = "nName",
# value from the node list (data frame) that contains node
Group = "nodeGroup",
# value from the node list (data frame) that contains value we want to use for node color
fontSize = 25,
opacity = 0.85,
zoom = TRUE,
# ability to zoom when click on the node
opacityNoHover = 0.4 # opacity of labels when static
)
})
}
# Run the application
shinyApp(ui = ui, server = server)
I am sharing my fixed code in case it helps someone in the future. I basically just changed the top of the server code.
network.data <- eventReactive(input$makenet, {
Diet %>% filter(Predator_Scientific_Name == input$pred|Lowest_Taxonomic_Identification_Prey == input$prey
) %>% select(
paste(input$nodetax, "Predator", sep = "_"),
Class_Predator,
paste(input$nodetax, "Prey", sep = "_"),
Class_Prey,
input$dietmetric
) %>% rename("SourceName" = paste(input$nodetax, "Predator", sep = "_"),
"SourceClass" = Class_Predator,
"TargetName" = paste(input$nodetax, "Prey", sep = "_"),
"TargetClass" = Class_Prey,
"Weight" = input$dietmetric) %>% na.omit()
})
output$netplot <- renderForceNetwork( {
edgelist <- network.data()