How to set the distance between discrete values in ggplot2? - r

I am attempting to use grid.arrange to plot several graphs in one column, as the x axis is the same for all graphs. However the different graphs have different number of discrete values, resulting in Samples in the top graph more distanced than the graph below. Is there a way to set the distance between discrete values on an axis so the distance between Sample1 and Sample2 lines is the same for both graphs? Thanks!
Here is an example:
library(reshape2)
library(tidyverse)
library(gridExtra)
#Data frame 1
a <- c(1,2,3,4,5)
b <- c(10,20,30,40,50)
Species <- factor(c("Species1","Species2","Species3","Species4","Species5"))
bubba <- data.frame(Sample1=a,Sample2=b,Species=Species)
bubba$Species=factor(bubba$Species, levels=bubba$Species)
xm=melt(bubba,id.vars = "Species", variable.name="Samples", value.name = "Size")
#Data frame 2
c <- c(1,2,3,4,5)
d <- c(10,20,30,40,50)
e <- c(1,2,3,4,5)
f <- c(10,20,30,40,50)
bubban <- data.frame(Sample1=c,Sample2=d,Sample3=e,Sample4=f,Species=Species)
xn=melt(bubban,id.vars = "Species", variable.name="Samples", value.name = "Size")
#Not related, but part of my original script i am using
shrink_10s_trans = trans_new("shrink_10s",
transform = function(y){
yt = ifelse(y >= 10, y*0.1, y)
return(yt)
},
inverse = function(yt){
return(yt) # Not 1-to-1 function, picking one possibility
}
)
#Make plot 1
p1=ggplot(xm,aes(x= Species,y= fct_rev(Samples), fill = Size < 10))+
geom_point(aes(size=Size), shape = 21)+
scale_size_area(trans = shrink_10s_trans, max_size = 10,
breaks = c(1,3,5,10,20,30,40,50),
labels = c(1,3,5,10,20,30,40,50)) +
scale_fill_manual(values = c(rgb(136,93,100, maxColorValue = 255),
rgb(236,160,172, maxColorValue = 255))) +
theme_bw()+theme(axis.text.x = element_text(angle = -45, hjust = 1))+scale_x_discrete(position = "top")
#Make plot 2
p2=ggplot(xn,aes(x= Species,y= fct_rev(Samples), fill = Size < 10))+
geom_point(aes(size=Size), shape = 21)+
scale_size_area(trans = shrink_10s_trans, max_size = 10,
breaks = c(1,3,5,10,20,30,40,50),
labels = c(1,3,5,10,20,30,40,50)) +
scale_fill_manual(values = c(rgb(136,93,100, maxColorValue = 255),
rgb(236,160,172, maxColorValue = 255))) +
theme_bw()+theme(axis.text.x = element_blank())
#arrange the plots
grid.arrange(p1,p2,nrow=2)

Instead of using grid.extra use ggpubr::ggarrange function. It lets you specify heights of each plot and set shared legend.
# Using plots generated with OPs code
ggpubr::ggarrange(p1, p2, nrow = 2, heights = c(1.3, 2),
common.legend = TRUE, legend = "right")
With argument heights you can set relative heights of each provided plot.

Related

How do I represent an enormous bar plot with very long axis labels in R and ggplot?

I have a ggplot of bar graphs for each cluster.
The axis labels ("Path") for the bar plot are all unique and long, but they are grouped by "PathTypes" and "Cluster" - info which I want to represent on the bar graphs. I use a texture (stripes, dots, etc) from the ggpattern package to represent the "PathType" and I use colors to represent the "Cluster".
The resulting graph I produce is illegible bc it's just too large. I've been butting heads with facet_grid and facet_wrap. I am fine with using 2-3 pages to represent all the clusters, but I'm unsure about how to split the data smartly to accomplish that.
Example code follows:
library(data.table)
library(ggpattern)
library(gridExtra)
library(ggpubr)
library(truncnorm)
library(ggplot2)
library(stringi)
# generating sample data for data table called all.cluster.dt
PathType <- sample(x = c("Type1", "Type2", "Type3", "Type4", "Type5"), # create the PathType column
size = 400,
replace = T)
Score <- rtruncnorm(n = 400, a = 15, b = 90, mean = 55, sd = 15) # create the Score Column
Path <- NA # initialize the Path column
Path.generator <- function() { # function to write unique Paths
a <- do.call(paste0, replicate(10, sample(LETTERS, 15, TRUE), FALSE))
single.Name <- paste(a, collapse = ' ')
return(single.Name)
}
cluster <- sample(x = c(1:14), # create the Cluster column
size = 400,
replace = T)
all.cluster.dt <- data.table( # create the data table with desired columns
PathType,
Score,
Path,
cluster
)
for(i in 1:length(all.cluster.dt$Path)){ # loop down Path column calling function to generate unique Path name for each row
all.cluster.dt$Path[i] <- Path.generator()
}
wrap.it <- function(x, len) # function to try and wrap long Path label text
{
sapply(x, function(y) paste(strwrap(y, len),
collapse = "\n"),
USE.NAMES = FALSE)
}
# Call this function with a list or vector
wrap.labels <- function(x, len)
{
if (is.list(x))
{
lapply(x, wrap.it, len)
} else {
wrap.it(x, len)
}
}
wr.lap <- wrap.labels(all.cluster.dt$Path, 40) # wrap Path labels to 40 characters long
all.cluster.dt$Path <- wr.lap
all.cluster.dt$Path <- factor(all.cluster.dt$Path, # group and factorize the data by PathType and Score
levels = unique(all.cluster.dt$Path[order(all.cluster.dt$PathType, all.cluster.dt$Score)]))
cluster.color.df <- data.frame("cluster" = c(1:14), # add custom colors to represent which Cluster the Path belongs to
"color" = c("#F5F2D4", "#CAD8F2", "#8FB6FF", "#FFFDD7", "#DADADA", "#DAEB9B", "#EED1F2", "#C9E2D0", "#FFDFA2", "#DFFFD6", "#F6DFDE", "#E2DEF5", "#F0B8BC", "#CAF3EF"))
setDT(all.cluster.dt)[cluster.color.df, color := i.color, on = .(cluster)] # match color to cluster in all.cluster.dt
bar.plots <- ggplot(all.cluster.dt, aes(x=Score, y=Path)) +
ggpattern::geom_col_pattern( # adds texture/patterns to the bars based on the PathType column
aes(pattern = `PathType`),
fill = all.cluster.dt$color,
colour = "black",
pattern_density = 0.2, # how dense the pattern should be
pattern_fill = "black",
pattern_spacing = 0.1) +
scale_x_continuous(expand = c(0, 0), limits = c(0, 90)) +
theme_bw() +
theme(axis.title.y = element_blank()) +
theme(legend.position = "none",
text = element_text(size = 8))
bar.plots + facet_grid(rows = vars(cluster), scales = "fixed") # draw the bar graph
I now attempt to use grid.arrange and ggsave to arrange the plots by Cluster on a page, but get an error: "replacement has 17 rows, data has 400"...
pdf("bar_graphs.pdf", wi=8.1,hei=10.6)
do.call(grid.arrange, bar.plots)
ggsave("bar_graphs.pdf", marrangeGrob(bar.plots, nrow=4, ncol=2))
dev.off()
Any answers that provide the overall solution to my goal (getting my bar graphs into a legible figure) or elucidate why I get an error arranging grobs is much appreciated.

How to override an aes color (controlled by a variable) based on a condition?

I'm trying to graph multiple nonlinear least squares regression in r in different colors based on the value of a variable.
However, I also display the equation of the last one, and I would like the color in the nonlinear regression corresponding to the equation to be black as well.
What I've tried is shown in the geom_smooth() layer - I tried to include an ifelse() statement, but this doesn't work because of reasons described here: Different between colour argument and aes colour in ggplot2?
test <- function() {
require(ggplot2)
set.seed(1);
master <- data.frame(matrix(NA_real_, nrow = 0, ncol = 3))
for( i in 1:5 ) {
df <- data.frame(matrix(NA_real_, nrow = 50, ncol = 3))
colnames(df) <- c("xdata", "ydata", "test")
df$xdata = as.numeric(sample(1:100, size = nrow(df), replace = FALSE))
df$ydata = as.numeric(sample(1:3, size = nrow(df), prob=c(.60, .25, .15), replace = TRUE))
# browser()
df$test = i
master <- rbind(master, df)
}
df <- master
last <- 5
# based on https://stackoverflow.com/questions/18305852/power-regression-in-r-similar-to-excel
power_eqn = function(df, start = list(a=300,b=1)) {
m = nls(as.numeric(reorder(xdata,-ydata)) ~ a*ydata^b, start = start, data = df)
# View(summary(m))
# browser()
# eq <- substitute(italic(hat(y)) == a ~italic(x)^b*","~~italic(r)^2~"="~r2*","~~p~"="~italic(pvalue),
eq <- substitute(italic(y) == a ~italic(x)^b*","~~italic('se')~"="~se*","~~italic(p)~"="~pvalue,
list(a = format(coef(m)[1], digits = 6), # a
b = format(coef(m)[2], digits = 6), # b
# r2 = format(summary(m)$r.squared, digits = 3),
se = format(summary(m)$parameters[2,'Std. Error'], digits = 6), # standard error
pvalue = format(summary(m)$coefficients[2,'Pr(>|t|)'], digits=6) )) # p value (based on t statistic)
as.character(as.expression(eq))
}
plot1 <- ggplot(df, aes(x = as.numeric(reorder(xdata,-ydata)), y = ydata ) ) +
geom_point(color="black", shape=1 ) +
# PROBLEM LINE
stat_smooth(aes(color=ifelse(test==5, "black", test)), method = 'nls', formula = 'y~a*x^b', method.args = list(start= c(a =1,b=1)),se=FALSE, fullrange=TRUE) +
geom_text(x = quantile(df$xdata)[4], y = max(df$ydata), label = power_eqn(df), parse = TRUE, size=4, color="black") + # make bigger? add border around?
theme(legend.position = "none", axis.ticks.x = element_blank() ) + #, axis.title.x = "family number", axis.title.y = "number of languages" ) # axis.text.x = element_blank(),
labs( x = "xdata", y = "ydata", title="test" )
plot1
}
test()
This is the graph I got.
I would like the line corresponding to the points and equation to be black as well. Does anyone know how to do this?
I do not want to use a scale_fill_manual, etc., because my real data would have many, many more lines - unless the scale_fill_manual/etc. can be randomly generated.
You could use scale_color_manual using a custom created palette where your level of interest (in your example where test equals 5) is set to black. Below I use palettes from RColorBrewer, extend them if necessary to the number of levels needed and sets the last color to black.
library(RColorBrewer) # provides several great palettes
createPalette <- function(n, colors = 'Greens') {
max_colors <- brewer.pal.info[colors, ]$maxcolors # Get maximum colors in palette
palette <- brewer.pal(min(max_colors, n), colors) # Get RColorBrewer palette
if (n > max_colors) {
palette <- colorRampPalette(palette)(n) # make it longer i n > max_colros
}
# assume that n-th color should be black
palette[n] <- "#000000"
# return palette
palette[1:n]
}
# create a palette with 5 levels using the Spectral palette
# change from 5 to the needed number of levels in your real data.
mypalette <- createPalette(5, 'Spectral') # palettes from RColorBrewer
We can then use mypalette with scale_color_manual(values=mypalette) to color points and lines according to the test variable.
Please note that I have updated geom_point and stat_smooth to so that they use aes(color=as.factor(test)). I have also changed the call to power_eqn to only use data points where df$test==5. The black points, lines and equation should now be based on the same data.
plot1 <- ggplot(df, aes(x = as.numeric(reorder(xdata,-ydata)), y = ydata )) +
geom_point(aes(color=as.factor(test)), shape=1) +
stat_smooth(aes(color=as.factor(test)), method = 'nls', formula = 'y~a*x^b', method.args = list(start= c(a =1,b=1)),se=FALSE, fullrange=TRUE) +
geom_text(x = quantile(df$xdata)[4], y = max(df$ydata), label = power_eqn(df[df$test == 5,]), parse = TRUE, size=4, color="black") +
theme(legend.position = "none", axis.ticks.x = element_blank() ) +
labs( x = "xdata", y = "ydata", title="test" ) +
scale_color_manual(values = mypalette)
plot1
See resulting figure here (not reputation enough to include them)
I hope you find my answer useful.

How to add two different magnitudes of point size in a ggplot bubbles chart?

I just encountered such graph attached where two colors of geom_point are used (I believe it is made by ggplot2). Similarly, I would like to have dots of one color to range from size 1 to 5, and have another color for a series of dots for the range 10 to 50. I have however no clue on how to add two different ranges of point in one graph.
At the basic step I have:
a <- c(1,2,3,4,5)
b <- c(10,20,30,40,50)
Species <- factor(c("Species1","Species2","Species3","Species4","Species5"))
bubba <- data.frame(Sample1=a,Sample2=b,Species=Species)
bubba$Species=factor(bubba$Species, levels=bubba$Species)
xm=melt(bubba,id.vars = "Species", variable.name="Samples", value.name = "Size")
str(xm)
ggplot(xm,aes(x= Samples,y= fct_rev(Species)))+geom_point(aes(size=Size))+scale_size(range = range(xm$Size))+theme_bw()
Any would have clues where I should look into ? Thanks!
I've got an approach that gets 90% of the way there, but I'm not sure how to finish the deed. To get a single legend for size, I used a transformation to convert input size to display size. That makes the legend appearance conform to the display. What I don't have figured out yet is how to apply a similar transformation to the fill so that both can be integrated into the same legend.
Here's the transformation, which in this case shrinks everything 10 or more:
library(scales)
shrink_10s_trans = trans_new("shrink_10s",
transform = function(y){
yt = if_else(y >= 10, y*0.1, y)
return(yt)
},
inverse = function(yt){
return(yt) # Not 1-to-1 function, picking one possibility
}
)
Then we can use this transformation on the size to selectively shink only the dots that are 10 or larger. This works out nicely for the legend, aside from integrating the fill encoding with the size encoding.
ggplot(xm,aes(x= Samples,y= fct_rev(Species), fill = Size < 10))+
geom_point(aes(size=Size), shape = 21)+
scale_size_area(trans = shrink_10s_trans, max_size = 10,
breaks = c(1,2,3,10,20,30,40),
labels = c(1,2,3,10,20,30,40)) +
scale_fill_manual(values = c(rgb(136,93,100, maxColorValue = 255),
rgb(236,160,172, maxColorValue = 255))) +
theme_bw()
a <- c(1, 2, 3, 4, 5)
b <- c(10, 20, 30, 40, 50)
Species <- factor(c("Species1", "Species2", "Species3", "Species4", "Species5"))
bubba <- data.frame(Sample1 = a, Sample2 = b, Species = Species)
bubba$Species <- factor(bubba$Species, levels = bubba$Species)
xm <- reshape2::melt(bubba, id.vars = "Species", variable.name = "Samples", value.name = "Size")
ggplot(xm, aes(x = Samples, y = fct_rev(Species))) +
geom_point(aes(size = Size, color = Size)) +
scale_color_continuous(breaks = c(1,2,3,10,20,30), guide = guide_legend()) +
scale_size(range = range(xm$Size), breaks = c(1,2,3,10,20,30)) +
theme_bw()
Here's a cludge. I haven't got time to figure out the legend at the moment. Note that 1 and 10 are the same size, but a different colour, as are 3 and 40.
# Create data frame
a <- c(1, 2, 3, 4, 5)
b <- c(10, 20, 30, 40, 50)
Species <- factor(c("Species1", "Species2", "Species3", "Species4", "Species5"))
bubba <- data.frame(Sample1 = a, Sample2 = b, Species = Species)
# Restructure data
xm <- reshape2::melt(bubba, id.vars = "Species", variable.name = "Samples", value.name = "Size")
# Calculate bubble size
bubble_size <- function(val){
ifelse(val > 3, (1/15) * val + (1/3), val)
}
# Calculate bubble colour
bubble_colour <- function(val){
ifelse(val > 3, "A", "B")
}
# Calculate bubble size and colour
xm %<>%
mutate(bub_size = bubble_size(Size),
bub_col = bubble_colour(Size))
# Plot data
ggplot(xm, aes(x = Samples, y = fct_rev(Species))) +
geom_point(aes(size = bub_size, fill = bub_col), shape = 21, colour = "black") +
theme(panel.grid.major = element_line(colour = alpha("gray", 0.5), linetype = "dashed"),
text = element_text(family = "serif"),
legend.position = "none") +
scale_size(range = c(1, 20)) +
scale_fill_manual(values = c("brown", "pink")) +
ylab("Species")
I think you are looking for bubble plots in R
https://www.r-graph-gallery.com/bubble-chart/
That said, you probably want to build the right and left the side of the graphic separately and then combine.

Ticktext value does not fix ggplot2 facet_grid() breaking down when combined with ggplotly()

I have a dataframe:
gene_symbol<-c("DADA","SDAASD","SADDSD","SDADD","ASDAD","XCVXCVX","EQWESDA","DASDADS","SDASDASD","DADADASD","sdaadfd","DFSD","SADADDAD","SADDADADA","DADSADSASDWQ","SDADASDAD","ASD","DSADD")
panel<-c("growth","growth","growth","growth","big","big","big","small","small","dfgh","DF","DF","DF","DF","DF","gh","DF","DF")
ASDDA<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDb<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf1<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf2<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf3<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf4<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf5<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDA1<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDb1<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf1<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf11<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf21<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf31<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf41<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf51<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
Gene_states22 <- data.frame(gene_symbol, panel, ASDDA, ASDDb, ASDDAf, ASDDAf1, ASDDAf2,
ASDDAf3, ASDDAf4, ASDDAf5, ASDDA1, ASDDb1, ASDDAf1, ASDDAf11,
ASDDAf21, ASDDAf31, ASDDAf41, ASDDAf51)
And I create a heatmap with:
library(ggplot2); library(reshape2)
HG3 <- split(Gene_states22[,1:15], Gene_states22$panel)
HG4 <- melt(HG3, id.vars= c("gene_symbol","panel"))
HG4 <- HG4[,-5]
pp <- ggplot(HG4, aes(gene_symbol,variable)) +
geom_tile(aes(fill = value),
colour = "grey50") +
facet_grid(~panel, scales = "free" ,space = "free") +
scale_fill_manual(values = c("white", "red", "blue", "black", "yellow", "green", "brown"))
As you can see I use facet_grid to separate my heatmap into groups based on panel value. The problem is that when I use ggplotly(pp) the column width differs from group to group and my plot seems ugly.
In order to fix the issue I used adapted answer of Plotly and ggplot with facet_grid in R: How to to get yaxis labels to use ticktext value instead of range value?
:
library(plotly)
library(ggplot2)
library(data.table)
library(datasets)
#add fake model for use in facet
dt<-data.table(HG4[1:50,])
dt[,variable:=rownames(HG4)]
dt[,panel:=substr(variable,1,regexpr(" ",variable)-1)][panel=="",panel:=variable]
ggplot.test<-ggplot(dt,aes(gene_symbol,variable))+facet_grid(panel~.,scales="free_y",space="free",drop=TRUE)+
geom_tile(aes(fill = value),
colour = "grey50") +
scale_fill_manual(values = c("white", "red", "blue", "black", "yellow", "green", "brown")) +
labs(title = "Heatmap", x = "gene_symbol", y = "sample", fill = "value") +
guides(fill = FALSE)+
theme(panel.background = element_rect(fill = NA),
panel.spacing = unit(0.5, "lines"), ## It was here where you had a 0 for distance between facets. I replaced it by 0.5 .
strip.placement = "outside")
p <- ggplotly(ggplot.test)
len <- length(unique(HG4$panel))
total <- 1
for (i in 2:len) {
total <- total + length(p[['x']][['layout']][[paste('yaxis', i, sep='')]][['ticktext']])
}
spacer <- 0.01 #space between the horizontal plots
total_length = total + len * spacer
end <- 1
start <- 1
for (i in c('', seq(2, len))) {
tick_l <- length(p[['x']][['layout']][[paste('yaxis', i, sep='')]][['ticktext']]) + 1
#fix the y-axis
p[['x']][['layout']][[paste('yaxis', i, sep='')]][['tickvals']] <- seq(1, tick_l)
p[['x']][['layout']][[paste('yaxis', i, sep='')]][['ticktext']][[tick_l]] <- ''
end <- start - spacer
start <- start - (tick_l - 1) / total_length
v <- c(start, end)
#fix the size
p[['x']][['layout']][[paste('yaxis', i, sep='')]]$domain <- v
}
p[['x']][['layout']][['annotations']][[3]][['y']] <- (p[['x']][['layout']][['yaxis']]$domain[2] + p[['x']][['layout']][['yaxis']]$domain[1]) /2
p[['x']][['layout']][['shapes']][[2]][['y0']] <- p[['x']][['layout']][['yaxis']]$domain[1]
p[['x']][['layout']][['shapes']][[2]][['y1']] <- p[['x']][['layout']][['yaxis']]$domain[2]
#fix the annotations
for (i in 3:len + 1) {
#fix the y position
p[['x']][['layout']][['annotations']][[i]][['y']] <- (p[['x']][['layout']][[paste('yaxis', i - 2, sep='')]]$domain[1] + p[['x']][['layout']][[paste('yaxis', i - 2, sep='')]]$domain[2]) /2
#trim the text
p[['x']][['layout']][['annotations']][[i]][['text']] <- substr(p[['x']][['layout']][['annotations']][[i]][['text']], 1, length(p[['x']][['layout']][[paste('yaxis', i - 2, sep='')]][['ticktext']]) * 3 - 3)
}
#fix the rectangle shapes in the background
for (i in seq(0,(len - 2) * 2, 2)) {
p[['x']][['layout']][['shapes']][[i+4]][['y0']] <- p[['x']][['layout']][[paste('yaxis', i /2 + 2, sep='')]]$domain[1]
p[['x']][['layout']][['shapes']][[i+4]][['y1']] <- p[['x']][['layout']][[paste('yaxis', i /2 + 2, sep='')]]$domain[2]
}
p
But the heatmap is still not correct:
So first things first:
In your case I am not even sure whether a plotly heatmap is what you need. In addition you should never convert a complicated ggplot to plotly. It will fail! In 90% of cases. Try recreating your plot in plotly or whereever you want it to end up. Anything else ends up in coding hell.
I started by doing some research:
Here is a good description how to create heatmaps with different colors in plotly
This explains how you can create titles in subplots.
From post 1 I know that I have to create a matrix for each level in your data. So I wrote a function for that:
mymat<-as.matrix(Gene_states22[,-1:-2])
### Creates a 1-NA dummy matrix for each level. The output is stored in a list
dummy_mat<-function(mat,levels,names_col){
mat_list<-lapply(levels,function(x){
mat[mat!=x]=NA
mat[mat==x]=1
mymat=t(apply(mat,2,as.numeric))
colnames(mymat)=names_col
return(mymat)
})
names(mat_list)=levels
return(mat_list)
}
my_mat_list<-dummy_mat(mymat,c('DF','low','normal','over'),Gene_states22$gene_symbol)
### Optional: The heatmap type is peculiar - I created a text-NA matrix for each category as well
text_mat<-function(mat,levels,names_col){
mat_list<-lapply(levels,function(x){
mat[mat!=x]=NA
mat=t(mat)
colnames(mat)=names_col
return(mat)
})
names(mat_list)=levels
return(mat_list)
}
my_mat_list_t<-text_mat(mymat,c('DF','low','normal','over'),as.character(Gene_states22$gene_symbol))
In addition I needed colors for each level. These colors are created using some dataframe. You may write a similar (lapply-)loop here as well:
DF_Color <- data.frame(x = c(0,1), y = c("#DEDEDE", "#DEDEDE"))
colnames(DF_Color) <- NULL
lowColor <- data.frame(x = c(0,1), y = c("#00CCFF", "#00CCFF"))
colnames(lowColor) <- NULL
normColor <- data.frame(x = c(0,1), y = c("#DEDE00", "#DEDE00"))
colnames(normColor) <- NULL
overColor <- data.frame(x = c(0,1), y = c("#DE3333", "#DE3333"))
colnames(overColor) <- NULL
In addition we need the columns in the matrix for each panel-category:
mycols<-lapply(levels(Gene_states22$panel),function(x) grep(x,Gene_states22$panel))
I stored this in a list as well.
Next I use lapply-loop to plot. I store the values in a list and use subplot to put everything together:
library(plotly)
p_list<-lapply(1:length(mycols),function(j){
columns<-mycols[[j]]
p<-plot_ly(
type = "heatmap"
) %>% add_trace(
y=rownames(my_mat_list$DF),x=colnames(my_mat_list$DF)[columns],
z = my_mat_list$DF[,columns],
xgap=3,ygap=3, text=my_mat_list_t$DF[,columns],hoverinfo="x+y+text",
colorscale = DF_Color,
colorbar = list(
len = 0.3,
y = 0.3,
yanchor = 'top',
title = 'DF series',
tickvals = ''
)
) %>% add_trace(
y=rownames(my_mat_list$low),x=colnames(my_mat_list$low)[columns],
z = my_mat_list$low[,columns],
xgap=3,ygap=3,text=my_mat_list_t$low[,columns],hoverinfo="x+y+text",
colorscale = lowColor,
colorbar = list(
len = 0.3,
y = 0.3,
yanchor = 'top',
title = 'low series',
tickvals = ''
)
) %>% add_trace(
y=rownames(my_mat_list$normal),x=colnames(my_mat_list$normal)[columns],
z = my_mat_list$normal[,columns],
xgap=3,ygap=3,text=my_mat_list_t$normal[,columns],hoverinfo="x+y+text",
colorscale = normColor,
colorbar = list(
len = 0.3,
y = 1,
yanchor = 'top',
title = 'normal series',
tickvals = ''
)
) %>% add_trace(
y=rownames(my_mat_list$over),x=colnames(my_mat_list$over)[columns],
z = my_mat_list$over[,columns],
xgap=3,ygap=3,text=my_mat_list_t$over[,columns],hoverinfo="x+y+text",
colorscale = overColor,
colorbar = list(
len = 0.3,
y = 1,
yanchor = 'top',
title = 'over series',
tickvals = ''
)
)
return(p)
})
subplot(p_list[[1]],p_list[[2]],shareY = TRUE) %>%
layout(annotations = list(
list(x = 0.2 , y = 1.05, text = levels(Gene_states22$panel)[1], showarrow = F, xref='paper', yref='paper'),
list(x = 0.8 , y = 1.05, text = levels(Gene_states22$panel)[2], showarrow = F, xref='paper', yref='paper'))
)
POSSIBLE ISSUES:
You have to become create around categories like dfgh which occur only once. If only one column is selected in R, the output is automatically transformed into a (numeric or character) vector-type. Thus maybe add an as.matrix() to all z and text arguments
hover-text doesn't really work. But plotly has a good documentation there. You should be able to figure that out.
You also have to specify the width in the subplot-function. That will be fiddly if you have more than 10 categories.
Interactivity doesn't really work. You can't remove traces. Why? No idea. Do some research if you need it. I guess it is connected with the plot type.
I recommend specifying the extend of the plot(s) in px. That might make the tiles more similar.
Finally you will need some reference for the (subplot) titles and you will need to adjust the margins of your plot. Such that the titles are visible.

Draw vegan graph on ggplot

I am fairly new to vegan and ggplot, I have drawn a species diversity plot in vegan. Ggplot has better graph so I was wondering if these codes could be modified to ggplot code.
Any help would be greatly appreciated. I am using bray in vegan.
library(vegan)
library(mass)
data <- read.table("data.txt", header = T)
attach(data)
rownames(data) <- c("TCI1", "TCI2", "TCI3", "TCII1", "TCII2", "TCII3", "TCIII1", "TCIII2", "TCIII3", "TCIV1", "TCIV2", "TCIV3",
"NCI1", "NCI2", "NCI3", "NCII1", "NCII2", "NCII3", "NCIII1", "NCIII2", "NCIII3", "NCIV1", "NCIV2", "NCIV3","TFI1", "TFI2", "TFI3", "TFII1", "TFII2", "TFII3", "TFIII1", "TFIII2", "TFIII3", "TFIV1", "TFIV2", "TFIV3",
"NFI1", "NFI2", "NFI3", "NFII1", "NFII2", "NFII3", "NFIII1", "NFIII2", "NFIII3", "NFIV1", "NFIV2", "NFIV3")
bcdist <- vegdist(data, "bray")
bcmds <- isoMDS(bcdist, k = 2)
plot(bcmds$points, type = "n", xlab = "", ylab = "")
text(bcmds$points, dimnames(data)[[1]])
You can indeed create a plot that looks like the imgur image. First I created some made-up data for your weeds. Then I called ggplot2 and put the weed names at the points, but made the points transparent.
x <- seq(from = -1, to = 1, .025)
df <- data.frame(valuesX = sample(x, size = 48, replace = TRUE),
valuesY = sample(x, size = 48, replace = TRUE),
seeds = c("TCI1", "TCI2", "TCI3", "TCII1", "TCII2", "TCII3", "TCIII1", "TCIII2", "TCIII3", "TCIV1", "TCIV2", "TCIV3",
"NCI1", "NCI2", "NCI3", "NCII1", "NCII2", "NCII3", "NCIII1", "NCIII2", "NCIII3", "NCIV1", "NCIV2", "NCIV3","TFI1", "TFI2", "TFI3", "TFII1", "TFII2", "TFII3", "TFIII1", "TFIII2", "TFIII3", "TFIV1", "TFIV2", "TFIV3",
"NFI1", "NFI2", "NFI3", "NFII1", "NFII2", "NFII3", "NFIII1", "NFIII2", "NFIII3", "NFIV1", "NFIV2", "NFIV3")
)
ggplot(df, aes(x = valuesX, y = valuesY)) +
geom_point(colour = "transparent") +
geom_text(data = df, aes(label = seeds), hjust = 1.5) +
theme_bw() +
labs(x = "Your axis label", y = "", title = "Weed Distribution") +
theme(axis.ticks= element_blank()) +
theme(plot.title = element_text(face = "bold", size = 12))
You can adjust all the elements of the plot as you see fit.

Resources