I have a function called "my function" which creates a ggplot graph and saves it within the function.
#define a function "add_data" to transpose peak values into summary table
my_function <- function(exp, cond) {
#Read in appropriate experiment number
#Remove first 2 columns
#Rename first column to "mintime" and convert to minutes
#Normalize raw fluorescence values
flowdata <- read_csv(paste0(exp, ".csv"))
title <- cond
flowdata <- flowdata[, -c(1:2)] %>%
rename(mintime = 1) %>%
transform(mintime = mintime / 60)
flowdata[,-1] <- data.frame(lapply(flowdata[,-1], function(X) X/X[1]))
#Exclude values up to 5 minutes
#Determine number of peaks per cell
#Add number of peaks per cell to summary table
flowdata_cut <- flowdata[which(flowdata$mintime>=5),]
peak_info <- lapply(flowdata_cut[,-1], findpeaks, threshold=2)
numberpeak <- unlist(lapply(peak_info, nrow))
summarypeaks <- add_peaks(summarypeaks, numberpeak, title)
#Prepare data for line graph
melted <- melt(flowdata, id.vars="mintime")
#####CREATE GRAPH#####
#Plot graph
ggplot(data=melted, aes(x=mintime, y=value, group=variable)) +
geom_line(show.legend = FALSE) +
scale_x_continuous(limits = c(3, 12), breaks = seq(3, 12, by = 3)) +
labs(y="Fluo-4 fluorescence (F/F0)", x = "Time (min)") +
ggtitle(title) +
theme_bw() +
# remove elements we don't need
theme(panel.grid = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black"),
panel.background = element_blank())
#####SAVE GRAPH#####
#Save line graph as .png file
ggsave(filename = paste0(exp, "_Line_Graph.jpg"), width = 8, height = 4)
# Return
return(summarypeaks)
}
When I knit the document, the ggplot graph doesn't show in my HTML output despite these parameters
{r fig.cap="Experiment DATE", results = TRUE, eval = TRUE, echo= FALSE, warning=FALSE, message=FALSE, error=FALSE}
#Add experiment and condition
summarypeaks <- my_function(1284, "CONDITION")
Is there a way for the R Markdown HTML document to display the outputs of plots created within a function?
There's a way of getting the plot to show even without returning it, by using side effects. Specifically, you need to save your ggplot object into some variable, say p, and then tell R to print it using plot(). You don't need to change anything about how your function returns its results, it will just happen as a side effect. See below an updated version of your function:
#define a function "add_data" to transpose peak values into summary table
my_function <- function(exp, cond) {
#Read in appropriate experiment number
#Remove first 2 columns
#Rename first column to "mintime" and convert to minutes
#Normalize raw fluorescence values
flowdata <- read_csv(paste0(exp, ".csv"))
title <- cond
flowdata <- flowdata[, -c(1:2)] %>%
rename(mintime = 1) %>%
transform(mintime = mintime / 60)
flowdata[,-1] <- data.frame(lapply(flowdata[,-1], function(X) X/X[1]))
#Exclude values up to 5 minutes
#Determine number of peaks per cell
#Add number of peaks per cell to summary table
flowdata_cut <- flowdata[which(flowdata$mintime>=5),]
peak_info <- lapply(flowdata_cut[,-1], findpeaks, threshold=2)
numberpeak <- unlist(lapply(peak_info, nrow))
summarypeaks <- add_peaks(summarypeaks, numberpeak, title)
#Prepare data for line graph
melted <- melt(flowdata, id.vars="mintime")
#####CREATE GRAPH#####
#Plot graph
p <- ggplot(data=melted, aes(x=mintime, y=value, group=variable)) +
geom_line(show.legend = FALSE) +
scale_x_continuous(limits = c(3, 12), breaks = seq(3, 12, by = 3)) +
labs(y="Fluo-4 fluorescence (F/F0)", x = "Time (min)") +
ggtitle(title) +
theme_bw() +
# remove elements we don't need
theme(panel.grid = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black"),
panel.background = element_blank())
plot(p)
#####SAVE GRAPH#####
#Save line graph as .png file
ggsave(filename = paste0(exp, "_Line_Graph.jpg"), width = 8, height = 4)
# Return
return(summarypeaks)
}
Related
I have a dataframe df with 4 unique UID - 1001,1002,1003,1004.
I want to write a user-defined function in R that does the following:
Plots Turbidity against Time for each unique UID. Turbidity values are the ones in the Time_1, Time_2 and Time_3 columns. For example, UID = 1001 will have 4 plots in one graph
Add a legend to each graph such as M-L, F-L, M-R, and F-R (from columns Gen and Type)
Add a title to each graph. For example- UID:1001
Export the graphs as pdf or jpeg or tiff pdf files - 4 graphs per page
# dataset
Gen <- c('M','M','M','M','F','F','F','F','M','M','M','M','F','F','F','F')
Site <- rep('FRX',length(gen))
Type <- c('L','L','L','L','L','L','L','L','R','R','R','R','R','R','R','R')
UID <- c(1001,1002,1003,1004,1001,1002,1003,1004,1001,1002,1003,1004,1001,1002,1003,1004)
Time_1 <- c(100.78,112.34,108.52,139.19,149.02,177.77,79.18,89.10,106.78,102.34,128.52,119.19,129.02,147.77,169.18,170.11)
Time_2 <- c(150.78,162.34,188.53,197.69,208.07,217.76,229.48,139.51,146.87,182.54,189.57,199.97,229.28,247.73,269.91,249.19)
Time_3 <- c(250.78,262.34,288.53,297.69,308.07,317.7,329.81,339.15,346.87,382.54,369.59,399.97,329.28,347.73,369.91,349.19)
df <- data.frame(Gen,Site,Type,UID,Time_1,Time_2,Time_3)
df
My attempt
library(ggplot2)
library(tidyr)
# See below for my thoughts/attempt- I am open to other R libraries and approaches
graphplotter <-function(x){
# 1. Convert from wide to long
data_long <- gather(df, time, turbidity, Time_1:Time_3, factor_key=TRUE)
data_long
#2. plot for each unique UID- 1001 to 1004 and add legend
basic <- ggplot(datalong, aes(time, turbidity, shape=Tree)) + geom_point() + geom_line()
basic + theme(
legend.position = c(.95, .95),
legend.justification = c("right", "top"),
legend.box.just = "right",
legend.margin = margin(6, 6, 6, 6))
#3. add title
print(basic+ labs( title= "UID: 1001, Tubidity against time", y="turbidity", x = "Time in hours"))
#4. export as pdf
pdf("turbdity-time.pdf")
par(mfrow = c(2, 2)) ## set the layout to be 2 by 2
sapply(1:4, function(i) plot(basic[,i]))
dev.off()
}
I want all four graphs to look something like this (ignore the circumference and age, should be turbidity and time).
Thanks
I use facet_wrap
graphplotter <-function(x){
x %>%
gather(., time, turbidity, Time_1:Time_3, factor_key=TRUE) %>%
mutate(label = (paste0(Gen, "-", Type))) %>%
#group_by(UID) %>%
ggplot(aes(color = label)) + geom_point(aes(time, turbidity, shape = label, group = label)) +
geom_line(aes(time, turbidity, group = label)) + facet_wrap(~UID) + theme(
legend.position = c(1, 1),
legend.justification = c("right", "top"),
legend.box.just = "right",
legend.margin = margin(1, 1, 1, 1),
legend.text = element_text(size = 7))
}
graphplotter(df)
Is there a way to create a function for my code below? I have multiple csv files that I'm running this exact code on, but it has been tiring using this same code over and over to do the same thing (and has made my script very long). Here is my code-
#define a function "add_peaks" to transpose peak values into summary table
add_peaks <- function(df, vec, colname) {
if(colname %in% names(df)) vec <- c(df[[colname]], vec)
new_row <- max(nrow(df), length(vec))
new_df <- df[1:new_row, ,drop = FALSE]
new_df[colname] <- c(vec, rep(NA, new_row - length(vec)))
new_df[is.na(new_df)] <- 0
rownames(new_df) <- NULL
new_df
}
#####INPUT#####
#Read in appropriate experiment number
#Remove first 2 columns
#Rename first column to "mintime" and convert to minutes
#Normalize raw fluorescence values
flowdata <- read_csv("####.csv") ####CHANGE EXP NUMBER
title <- "CONDITION HERE" ####CHANGE CONDITION"
flowdata <- flowdata[, -c(1:2)] %>%
rename(mintime = 1) %>%
transform(mintime = mintime / 60)
flowdata[,-1] <- data.frame(lapply(flowdata[,-1], function(X) X/X[1]))
#Exclude values up to 5 minutes
#Determine number of peaks per cell
#Add number of peaks per cell to summary table
flowdata_cut <- flowdata[which(flowdata$mintime>=5),]
peak_info <- lapply(flowdata_cut[,-1], findpeaks, threshold=2)
numberpeak <- unlist(lapply(peak_info, nrow))
summarypeaks <- add_peaks(summarypeaks, numberpeak, title)
#Prepare data for line graph
melted <- melt(flowdata, id.vars="mintime")
#####CREATE GRAPH#####
#Plot graph
ggplot(data=melted, aes(x=mintime, y=value, group=variable)) +
geom_line(show.legend = FALSE) +
scale_x_continuous(limits = c(3, 12), breaks = seq(3, 12, by = 3)) +
labs(y="Fluo-4 fluorescence (F/F0)", x = "Time (min)") +
ggtitle(title) +
theme_bw() +
# remove elements we don't need
theme(panel.grid = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black"),
panel.background = element_blank())
#####SAVE GRAPH#####
#Save line graph as .png file
ggsave(filename = "####_Line_Graph.png", ####CHANGE EXP NUMBER
width = 8, height = 4)
Please let me know if it is possible (code is appreciated), even if it is part of it only. Obviously I am new to coding. Thank you!
Below is your code, extracted into a function that takes 2 arguments - the experiment number (exp) and the condition (cond).
my_function <- function(exp, cond) {
#Read in appropriate experiment number
#Remove first 2 columns
#Rename first column to "mintime" and convert to minutes
#Normalize raw fluorescence values
flowdata <- read_csv(paste0(exp, ".csv"))
title <- cond
flowdata <- flowdata[, -c(1:2)] %>%
rename(mintime = 1) %>%
transform(mintime = mintime / 60)
flowdata[,-1] <- data.frame(lapply(flowdata[,-1], function(X) X/X[1]))
#Exclude values up to 5 minutes
#Determine number of peaks per cell
#Add number of peaks per cell to summary table
flowdata_cut <- flowdata[which(flowdata$mintime>=5),]
peak_info <- lapply(flowdata_cut[,-1], findpeaks, threshold=2)
numberpeak <- unlist(lapply(peak_info, nrow))
summarypeaks <- add_peaks(summarypeaks, numberpeak, title)
#Prepare data for line graph
melted <- melt(flowdata, id.vars="mintime")
#####CREATE GRAPH#####
#Plot graph
ggplot(data=melted, aes(x=mintime, y=value, group=variable)) +
geom_line(show.legend = FALSE) +
scale_x_continuous(limits = c(3, 12), breaks = seq(3, 12, by = 3)) +
labs(y="Fluo-4 fluorescence (F/F0)", x = "Time (min)") +
ggtitle(title) +
theme_bw() +
# remove elements we don't need
theme(panel.grid = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black"),
panel.background = element_blank())
#####SAVE GRAPH#####
#Save line graph as .png file
ggsave(filename = paste0(exp, "_Line_Graph.png"), width = 8, height = 4)
# Return
return(summarypeaks)
}
If you have experiment number 005 and condition "test", call the function like so, assigning the result to replace the old value of summarypeaks:
summarypeaks <- my_function(005, "test")
I have R code to create 100 individual line plots (2 lines per plot, and 100 such plots, for 100 patients) using the package ggplot2 and function ggplot. I am not sure how to atomize the code so that each plot will be created with filename = patient number, title on each plot=patient number and these 100 plots can be saved directly to a folder.
I have tried using function to create a loop to make the graphs, and then use ggsave to save them. But on calling my function, nothing seems to happen. Not sure how to troubleshoot. I tried it with only 10 patients.
My data: It is the response given by 100 patients to 22 questions(on a questionnaire). There are 100 columns - one per patient, and 44 rows. The first 22 rows are for the actual response given by patient (Grp=A) and the next 22 rows are for for what the right response should have been (Grp=B).
#Column names = patient no.
Patno <- c("P_1", "P_2", "P_3", "P_4", "P_5" ,"P_6" ,"P_7", "P_8", "P_9", "P_10")
#The text to be printed on each plot
Plno <- c("0001", "0002", "0003", "0004", "0005", "0006", "0007", "0008", "0009", "00010")
# create graphing function
pdf.graph <- function(df){
for (i in Patno){
p <- ggplot(data=df, aes_string(x="QN", y="Patno[i]", group="Grp")) +
geom_line(aes_string(color="Grp", linetype="Grp", size="Grp")) +
geom_point(aes_string(color="Grp"))
p1 <- p +
scale_linetype_manual(values = c("solid", "dotted")) +
scale_color_manual(values = c('#E69F00','#999999')) +
scale_size_manual(values = c(1.25, 1)) +
theme_bw() +
theme(panel.border = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black"),
legend.position="none") +
scale_x_continuous(name = " ", limits = c(1, 22),
breaks=seq(1,22), expand = c(0, 0) ) +
scale_y_discrete(name=" ",
labels = c("TRUE" = "T", "FALSE" = "F", "Don't Know" = "DK")) +
annotate(geom = 'text', label = " Plno[i]",
x = -Inf, y = Inf, hjust = 0, vjust = 1)
# save plots as .pdf
ggsave(plot,
file = paste(resullts, 'projection_graphs/county_graphs/',
Plno[i], ".pdf", sep=''),
scale = 2)
print(p1)
dev.off()
}
}
pdf.graph(df)
When I call the function with the dataset df, there is no error, but no o/p is produced. I would really appreciate a reply, as to how I can get around this. It is really important for me. I've tried a number of ways.
The idea is to combine R packages ClustOfVar and ggdendro to give a visual summary of variable clustering.
When there are few columns in the data, the result is pretty good except that there are areas not covered(as circled in the chart below). Using mtcars for example:
library(plyr)
library(ggplot2)
library(gtable)
library(grid)
library(gridExtra)
library(ClustOfVar)
library(ggdendro)
fit = hclustvar(X.quanti = mtcars)
labels = cutree(fit,k = 5)
labelx = data.frame(Names=names(labels),group = paste("Group",as.vector(labels)),num=as.vector(labels))
p1 = ggdendrogram(as.dendrogram(fit), rotate=TRUE)
df2<-data.frame(cluster=cutree(fit, k =5), states=factor(fit$labels,levels=fit$labels[fit$order]))
df3<-ddply(df2,.(cluster),summarise,pos=mean(as.numeric(states)))
p2 = ggplot(df2,aes(states,y=1,fill=factor(cluster)))+geom_tile()+
scale_y_continuous(expand=c(0,0))+
theme(axis.title=element_blank(),
axis.ticks=element_blank(),
axis.text=element_blank(),
legend.position="none")+coord_flip()+
geom_text(data=df3,aes(x=pos,label=cluster))
gp1<-ggplotGrob(p1)
gp2<-ggplotGrob(p2)
maxHeight = grid::unit.pmax(gp1$heights[2:5], gp2$heights[2:5])
gp1$heights[2:5] <- as.list(maxHeight)
gp2$heights[2:5] <- as.list(maxHeight)
grid.arrange(gp2, gp1, ncol=2,widths=c(1/6,5/6))
When there are a large number of columns, another issue occurs. That is, the height of the color tiles part does not match the height the dendrogram.
library(ClustOfVar)
library(ggdendro)
X = data.frame(mtcars,mtcars,mtcars,mtcars,mtcars,mtcars)
fit = hclustvar(X.quanti = X)
labels = cutree(fit,k = 5)
labelx = data.frame(Names=names(labels),group = paste("Group",as.vector(labels)),num=as.vector(labels))
p1 = ggdendrogram(as.dendrogram(fit), rotate=TRUE)
df2<-data.frame(cluster=cutree(fit, k =5), states=factor(fit$labels,levels=fit$labels[fit$order]))
df3<-ddply(df2,.(cluster),summarise,pos=mean(as.numeric(states)))
p2 = ggplot(df2,aes(states,y=1,fill=factor(cluster)))+geom_tile()+
scale_y_continuous(expand=c(0,0))+
theme(axis.title=element_blank(),
axis.ticks=element_blank(),
axis.text=element_blank(),
legend.position="none")+coord_flip()+
geom_text(data=df3,aes(x=pos,label=cluster))
gp1<-ggplotGrob(p1)
gp2<-ggplotGrob(p2)
maxHeight = grid::unit.pmax(gp1$heights[2:5], gp2$heights[2:5])
gp1$heights[2:5] <- as.list(maxHeight)
gp2$heights[2:5] <- as.list(maxHeight)
grid.arrange(gp2, gp1, ncol=2,widths=c(1/6,5/6))
#Sandy Muspratt has actually provided an excellent solution to this IF we have the R upgraded to version 3.3.1.
R: ggplot slight adjustment for clustering summary
But since I cannot change the version of the R deployed in the corporate server, I wonder if there is any other workaround that can align these two parts.
As far as I can tell, your code is not far wrong. The problem is that you are trying to match a continuous scale to a discrete scale when you merge the two plots. Also, it appears that ggdendrogram() adds additional space to the y-axis.
library(plyr)
library(ggplot2)
library(gtable)
library(grid)
library(gridExtra)
library(ClustOfVar)
library(ggdendro)
# Data
X = data.frame(mtcars,mtcars,mtcars,mtcars,mtcars,mtcars)
# Cluster analysis
fit = hclustvar(X.quanti = X)
# Labels data frames
df2 <- data.frame(cluster = cutree(fit, k =5),
states = factor(fit$labels, levels = fit$labels[fit$order]))
df3 <- ddply(df2, .(cluster), summarise, pos = mean(as.numeric(states)))
# Dendrogram
# scale_x_continuous() for p1 should match scale_x_discrete() from p2
# scale_x_continuous strips off the labels. I grab them from df2
# scale _y_continuous() puts a little space between the labels and the dendrogram
p1 <- ggdendrogram(as.dendrogram(fit), rotate = TRUE) +
scale_x_continuous(expand = c(0, 0.5), labels = levels(df2$states), breaks = 1:length(df2$states)) +
scale_y_continuous(expand = c(0.02, 0))
# Tiles and labels
p2 <- ggplot(df2,aes(states, y = 1, fill = factor(cluster))) +
geom_tile() +
scale_y_continuous(expand = c(0, 0)) +
scale_x_discrete(expand = c(0, 0)) +
geom_text(data = df3, aes(x = pos, label = cluster)) +
coord_flip() +
theme(axis.title = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = "none")
# Get the ggplot grobs
gp1 <- ggplotGrob(p1)
gp2 <- ggplotGrob(p2)
# Make sure the heights match
maxHeight <- unit.pmax(gp1$heights, gp2$heights)
gp1$heights <- as.list(maxHeight)
gp2$heights <- as.list(maxHeight)
# Combine the two plots
grid.arrange(gp2, gp1, ncol = 2,widths = c(1/6, 5/6))
There is a need to plot the result of a unsupervised rectangular SOM model. Additional requirements: 1) draw each node as a pie chart with corresponding observed classes; size of a chart should reflect the number of samples in the node. Default plot.kohonen doesn't suit such a case.
Here is a possible solution. The first function som.prep.df is called from the second 'som.draw', which has only two parameters SOM model and observed classes of training set.
som.prep.df <- function(som.model, obs.classes, scaled) {
require(reshape2)
lev <- factor(wine.classes)
df <- data.frame(cbind(unit=som.model$unit.classif, class=as.integer(lev)))
# create table
df2 <- data.frame(table(df))
df2 <- dcast(df2, unit ~ class, value.var="Freq")
df2$unit <- as.integer(df2$unit)
# calc sum
df2$sum <- rowSums(df2[,-1])
# calc fraction borders of classes in each node
tmp <- data.frame(cbind(X0=rep(0,nrow(df2)),
t(apply(df2[,-1], 1, function(x) {
cumsum(x[1:(length(x)-1)]) / x[length(x)]
}))))
df2 <- cbind(df2, tmp)
df2 <- melt(df2, id.vars=which(!grepl("^\\d$", colnames(df2))))
df2 <- df2[,-ncol(df2)]
# define border for each classs in each node
tmp <- t(apply(df2, 1, function(x) {
c(x[paste0("X", as.character(as.integer(x["variable"])-1))],
x[paste0("X", as.character(x["variable"]))])
}))
tmp <- data.frame(tmp, stringsAsFactors=FALSE)
tmp <- sapply(tmp, as.numeric)
colnames(tmp) <- c("ymin", "ymax")
df2 <- cbind(df2, tmp)
# scale size of pie charts
if (is.logical(scaled)) {
if (scaled) {
df2$xmax <- log2(df2$sum)
} else {
df2$xmax <- df2$sum
}
}
df2 <- df2[,c("unit", "variable", "ymin", "ymax", "xmax")]
colnames(df2) <- c("unit", "class", "ymin", "ymax", "xmax")
# replace classes with original levels names
df2$class <- levels(lev)[df2$class]
return(df2)
}
som.draw <- function(som.model, obs.classes, scaled=FALSE) {
# scaled - make or not a logarithmic scaling of the size of each node
require(ggplot2)
require(grid)
g <- som.model$grid
df <- som.prep.df(som.model, obs.classes, scaled)
df <- cbind(g$pts, df[,-1])
df$class <- factor(df$class)
g <- ggplot(df, aes(fill=class, ymax=ymax, ymin=ymin, xmax=xmax, xmin=0)) +
geom_rect() +
coord_polar(theta="y") +
facet_wrap(x~y, ncol=g$xdim, nrow=g$ydim) +
theme(axis.ticks = element_blank(),
axis.text.y = element_blank(),
axis.text.x = element_blank(),
panel.margin = unit(0, "cm"),
strip.background = element_blank(),
strip.text = element_blank(),
plot.margin = unit(c(0,0,0,0), "cm"),
panel.background = element_blank(),
panel.grid = element_blank())
return(g)
}
Usage example.
require(kohonen)
data(wines)
som.wines <- som(scale(wines), grid = somgrid(5, 5, "rectangular"))
# Non-scaled map
som.draw(som.wines, wine.classes)
# Scaled map
som.draw(som.wines, wine.classes, TRUE)
This function can also be used for the visualization of supervised models as well. But it suits only for rectangular maps. Hope this will help someone.
There are several possible improvements:
Choose a better scaling function than logarithm. Because now nodes with single sample become invisible after scaling.
Add legend to the whole plot which will reflect the size of nodes.
Or add information about nodes population on each chart.
PS. The code isn't very elegant, so any suggestions and improvements are welcome.