I have similar data like the following example:
dat1 <- data.frame(group=c("a", "a","a", "a","a","a","b","b","b","b","b", "b", "b","b","b","c","c","c","c","c","c"),
subgroup=c(paste0("R", rep(1:6)),paste0("R", rep(1:9)),paste0("R", rep(1:6))),
value=c(15,16,12,12,14,5,14,27,20,23,14,10,20,22,14,15,18,14,23,30,32),
pp=c("AT","BT","CT","AA","CC","SE","DN","AS","MM","XT","QQ","HH","MK","HT","dd","US","AG","TT","ZZ","XK","RU"),
clusters=c(rep("cluster1",6),rep("cluster2",9),rep("cluster3",6)))
colors <- c(rep("#74c1e8",6),rep("#808000",9),rep("#FF69B4",6))
names(colors) <- c("cluster1","cluster2","cluster3")
my code is :
pl <- ggplot(dat1, aes(y = pp, x = subgroup))
+ geom_point(aes(size=value))
+ facet_grid(~group, scales="free_x", space = "free")
+ ylab("names")
+ xlab(" ")
+ theme(axis.text.y = element_text(color=colors))
pl
What I want is to add some space on y_axis after each cluster. For example, after cluster 3 (red ones) I want to add some space like space between panels, etc. in the following plot.
Is there a way to do that?
My solution converts the y axis to a factor and add geom_hline between each cluster
library(tidyverse)
dat1 <- data.frame(group=c("a", "a","a", "a","a","a","b","b","b","b","b", "b", "b","b","b","c","c","c","c","c","c"),
subgroup=c(paste0("R", rep(1:6)),paste0("R", rep(1:9)),paste0("R", rep(1:6))),
value=c(15,16,12,12,14,5,14,27,20,23,14,10,20,22,14,15,18,14,23,30,32),
pp=c("AT","BT","CT","AA","CC","SE","DN","AS","MM","XT","QQ","HH","MK","HT","dd","US","AG","TT","ZZ","XK","RU"),
clusters=c(rep("cluster1",6),rep("cluster2",9),rep("cluster3",6)))
colors <- c(rep("#74c1e8",6),rep("#808000",9),rep("#FF69B4",6))
names(colors) <- c("cluster1","cluster2","cluster3")
ggplot(dat1, aes(y = factor(pp), x = subgroup)) + geom_point(aes(size=value)) + facet_grid(~group, scales="free_x", space = "free")+
ylab("names") +
xlab(" ") +
theme(axis.text.y = element_text(color=colors)) +
geom_hline(yintercept = 15.5, color = "white", size = 2) +
geom_hline(yintercept = 6.5, color = "white", size = 2)
Related
I am doing a ring plot with ggplot and I would like to add borders to the categories but they overlap. Is there a way to make the borders internal to the rectangle?
Reproducible example.
Data:
plot.df <- data.frame("number"=c(3455, 3714, 2345),
"group"=c("A","B", "C"))
plot.df$fraction <- plot.df$number / sum(plot.df$number)
plot.df <- plot.df[order(plot.df$fraction), ]
plot.df$ymax <- cumsum(plot.df$fraction)
plot.df$ymin = c(0, head(plot.df$ymax, n=-1))
Plot:
ggplot(plot.df, aes(color = group, fill=group,
ymax=ymax, ymin=ymin,
xmax=4, xmin=2.5)) +
geom_rect(alpha = 0.6, size = 4) +
coord_polar(theta="y") +
xlim(c(0, 4)) +
theme_bw() +
theme(panel.grid=element_blank(), axis.text=element_blank()) +
theme(axis.ticks=element_blank()) +
labs(title="My Ring Plot", x = "", y = "",
fill = "", color = "") +
theme(plot.title = element_text(hjust = 0.5))
I get the following plot, that is correct except for the borders.
For example, between B and C only the the B (green) border is visible and I would like to see a thick green line next to a thick blue line. Did I explain myself?
Thanks for your help!
EDIT:
I found a dirty solution, it is not perfect or elegant but it kind of does the job.
First we need to modify the ymin column
plot.df$ymin = c(0.0125, head(plot.df$ymax, n=-1)+ 0.0125)
and then add a new row for a "ghost" category
plot.df <- rbind(c(234, "D", 0.0125, 0.0125, 0), plot.df)
plot.df[,4] <- as.numeric(plot.df[,4])
plot.df[,5] <- as.numeric(plot.df[,5])
now we can make the plot hiding the "ghost" category
ggplot(plot.df, aes(color = group, fill=group,
ymax=ymax, ymin=ymin,
xmax=4, xmin=3)) +
geom_rect(alpha = 0.6, size = 4) +
coord_polar(theta="y") +
xlim(c(0, 4)) +
theme_bw() +
scale_fill_manual(breaks = c("A", "B", "C"),
values = c("red", "green", "blue", "white"),
aesthetics = c("colour", "fill")) +
theme(panel.grid=element_blank(), axis.text=element_blank()) +
theme(axis.ticks=element_blank()) +
labs(title="My Ring Plot", x = "", y = "",
fill = "", color = "") +
theme(plot.title = element_text(hjust = 0.5))
That looks like what I was looking for, but the way I made it is not ideal.
Any other solution to achieve this? Thanks!
I want to add a legend to my graph. All solutions I found online use scale_color_manual - but it's not working for me. Where is the legend?
Here is my code:
library(ggplot2)
ggplot() +
geom_density(aes(x = rnorm(100)), color = 'red') +
geom_density(aes(x = rnorm(100)), color = 'blue') +
xlab("Age") + ylab("Density") + ggtitle('Age Densities')
theme(legend.position = 'right') +
scale_color_manual(labels = c('first', 'second'), values = c('red', 'blue'))
If for some reason you absolutely need the two geoms to take on different data sources, move the color = XXX portion inside aes() for each, then define the colors manually using a named vector:
ggplot() +
geom_density(aes(x = rnorm(100), color = 'first')) +
geom_density(aes(x = rnorm(100), color = 'second')) +
xlab("Age") + ylab("Density") + ggtitle('Age Densities') +
theme(legend.position = 'right') +
scale_color_manual(values = c('first' = 'red', 'second' = 'blue'))
Your data are not formatted correctly and you are basically creating two separate plots on a common "canvas", please see the code below (creation of the df is the crucial part):
library(ggplot2)
df <- data.frame(
x = c(rnorm(100), runif(100)),
col = c(rep('blue', 100), rep('red', 100))
)
ggplot(df) +
geom_density(aes(x = x, color = col)) +
xlab("Age") + ylab("Density") + ggtitle('Age Densities') +
theme(legend.position = 'right') +
scale_color_manual(labels = c('first', 'second'), values = c('red', 'blue'))
Dataframe:
df <- data.frame('X' = c('a','a','b','b','c','c','d','d'), 'legend' = c('a','b','a','b','c','d','c','d'), 'Y' = c(100,50,50,100,150,100,150,100))
Graph:
ggplot(data=df, aes(x=X, y=Y, group=legend, colour=legend)) +
geom_line(size = 1) +
geom_point(size = 3) +
scale_color_manual(values=c("blue", "red","blue", "red"), labels = c('blue','red')) +
scale_x_discrete(labels = c('Group 1','Group 1','Group 2','Group 2'))
Output:
http://imgur.com/a/U62PU
When I do this the legend reads 'blue', 'red', 'NA', 'NA'. How do I remove the NAs from the legend so it only says 'blue, red'?
You could always set the breaks inside you scale_color_manual. Something like this should work:
ggplot(data=df, aes(x=X, y=Y, group=legend, colour=legend)) +
geom_line(size = 1) +
geom_point(size = 3) +
scale_color_manual(values=c("blue", "red","blue", "red"),
labels = c('blue','red'), breaks = c("a", "b")) + #Set the breaks here
scale_x_discrete(labels = c('Group 1','Group 1','Group 2','Group 2'))
We keep only the first two groups since we want to relabel to only have 2 values in the legend red and blue
This question already has answers here:
Reverse stacked bar order
(2 answers)
Closed 5 years ago.
I'm a biology graduate student learning R. I was hoping someone could help me have the bars go horizontally in the opposite direction (the blue portion should start at 0 and the red at the 100 end of the scale).
Graph with bars in the wrong direction
Here is the data
my_species <- c('apomict_2-17-17_compreh', 'apomict_2-17-17_compreh', 'apomict_2-17-17_compreh', 'apomict_2-17-17_compreh', 'parthenogen_2-17-17_compreh', 'parthenogen_2-17-17_compreh', 'parthenogen_2-17-17_compreh', 'parthenogen_2-17-17_compreh', 'sexual_2-9-17', 'sexual_2-9-17', 'sexual_2-9-17', 'sexual_2-9-17')
my_species <- factor(my_species)
my_species <- factor(my_species,levels(my_species)[c(length(levels(my_species)):1)]) # reorder your species here just by changing the values in the vector :
my_percentage <- c(36.3, 56.3, 2.6, 4.8, 42.2, 50.6, 2.4, 4.8, 56.0, 19.9, 6.7, 17.4)
my_values <- c(522, 811, 38, 69, 608, 729, 35, 68, 806, 286, 96, 252)
category <- c(rep(c("S","D","F","M"),c(1)))
category <-factor(category)
category = factor(category,levels(category)[c(4,1,2,3)])
df = data.frame(my_species,my_percentage,my_values,category)
Here is the code:
# Load the required libraries
library(ggplot2)
library("grid")
# !!! CONFIGURE YOUR PLOT HERE !!!
# Output
#my_output <- paste("/home/loki/","busco_figure.png",sep="/")
my_width <- 20
my_height <- 15
my_unit <- "cm"
# Colors
my_colors <- c("#56B4E9", "#3492C7", "#F0E442", "#F04442")
# Bar height ratio
my_bar_height <- 0.75
# Legend
my_title <- "BUSCO Assessment Results"
# Font
my_family <- "sans"
my_size_ratio <- 1
# Code to produce the graph
labsize = 1
if (length(levels(my_species)) > 10){
labsize = 0.66
}
print("Plotting the figure ...")
figure <- ggplot() +
geom_bar(aes(y = my_percentage, x = my_species, fill = category), data = df, stat="identity", width=my_bar_height) +
coord_flip() +
theme_gray(base_size = 8) +
#scale_y_continuous(labels = c("100","80","60","40","20","0"), breaks = c(100,80,60,40,20,0)) +
scale_y_continuous(labels = c("100","80","60","40","20","0"), breaks = c(100,80,60,40,20,0)) +
#scale_y_continuous(labels = c("100","80","60","40","20","0"), breaks = c(0,20,40,60,80,100)) +
scale_fill_manual(values = my_colors,labels =c(" Complete (C) and single-copy (S) ",
" Complete (C) and duplicated (D)",
" Fragmented (F) ",
" Missing (M)")) +
ggtitle(my_title) +
xlab("") +
ylab("\n%BUSCOs") +
theme(plot.title = element_text(family=my_family, colour = "black", size = rel(2.2)*my_size_ratio, face = "bold")) +
theme(legend.position="top",legend.title = element_blank()) +
theme(legend.text = element_text(family=my_family, size = rel(1.2)*my_size_ratio)) +
theme(panel.background = element_rect(color="#FFFFFF", fill="white")) +
theme(panel.grid.minor = element_blank()) +
theme(panel.grid.major = element_blank()) +
theme(axis.text.y = element_text(family=my_family, colour = "black", size = rel(1.66)*my_size_ratio)) +
theme(axis.text.x = element_text(family=my_family, colour = "black", size = rel(1.66)*my_size_ratio)) +
theme(axis.line = element_line(size=1*my_size_ratio, colour = "black")) +
theme(axis.ticks.length = unit(.85, "cm")) +
theme(axis.ticks.y = element_line(colour="white", size = 0)) +
theme(axis.ticks.x = element_line(colour="#222222")) +
theme(axis.ticks.length = unit(0.4, "cm")) +
theme(axis.title.x = element_text(family=my_family, size=rel(1.2)*my_size_ratio)) +
guides(fill = guide_legend(override.aes = list(colour = NULL))) +
guides(fill=guide_legend(nrow=2,byrow=TRUE))
for(i in rev(c(1:length(levels(my_species))))){
detailed_values <- my_values[my_species==my_species[my_species==levels(my_species)[i]]]
total_buscos <- sum(detailed_values)
figure <- figure +
annotate("text", label=paste("C:", detailed_values[1] + detailed_values[2], " [S:", detailed_values[1], ", D:", detailed_values[2], "], F:", detailed_values[3], ", M:", detailed_values[4], ", n:", total_buscos, sep=""),
y=3, x = i, size = labsize*4*my_size_ratio, colour = "black", hjust=0, family=my_family)
}
my_output="~/temp.png"
ggsave(figure, file=my_output, width = my_width, height = my_height, unit = my_unit)
print("Done")
see ?position_stack:
position_fill() and position_stack() automatically stack values in
reverse order of the group aesthetic, which for bar charts is usually
defined by the fill aesthetic (the default group aesthetic is formed
by the combination of all discrete aesthetics except for x and y).
This default ensures that bar colours align with the default legend.
In order to change the stacking direction, you simply need to add position = position_stack(reverse = TRUE) to geom_bar:
figure <- ggplot() +
geom_bar(
aes(y = my_percentage, x = my_species, fill = category),
data = df, stat="identity", width=my_bar_height,
position = position_stack(reverse = TRUE)) +
coord_flip() +
...
If you don't want to use position_stack, you would have to change factor level and You also have to set filling color breaks to maintain the same legend order.
You need to reorder the factor levels in order for ggplot2 to know what to do. Here is an example of that (note I had to reorder the labels and colors as well):
...
# Colors
my_colors <- c( "#F04442", "#F0E442", "#3492C7", "#56B4E9")
...
df$category = ordered(df$category, levels = c("M", "F", "D", "S"))
figure <- ggplot(data = df[order(df$category, decreasing = F),]) +
geom_bar(aes(y = my_percentage, x = my_species, fill = category), stat="identity", width=my_bar_height) +
coord_flip() +
theme_gray(base_size = 8) +
scale_y_continuous(labels = c("100","80","60","40","20","0"), breaks = c(100,80,60,40,20,0)) +
scale_fill_manual(values = my_colors,labels =c(" Missing (M)",
" Fragmented (F) ",
" Complete (C) and duplicated (D)",
" Complete (C) and single-copy (S) ")) +
...
I have a plot of multiple geom_point and a single stat_function in ggplot2. Is there a way to show a single legend?
df <- data.frame("x"=c(1:5), "a"=c(1,2,3,3,3), "b"=c(1,1.1,1.3,1.5,1.5))
df <- melt(df, "x")
p <- ggplot(df, aes(x=x, y=value)) +
geom_point(aes(colour=variable, shape=variable)) +
stat_function(aes(colour="log2(x)"), fun=log2)
I want to have a single legend with the blue line and the two colored shapes. I tried
scale_colour_discrete(name="legend", breaks=c("a", "b", "log2(x)")) +
scale_shape_discrete(name="legend", breaks=c("a", "b"))
but this does not work. Is there a way to do this automatically or by hand?
Thanks in advance.
Probably an easier alternative is to use override.aes as follows:
ggplot(df, aes(x = x, y = value)) +
geom_point(aes(colour = variable, shape = variable), size = 3) +
stat_function(aes(colour = "log2(x)"), fun = log2, size = 1.5) +
guides(shape = FALSE,
colour = guide_legend(override.aes = list(shape = c(16, 17, NA),
linetype = c("blank", "blank", "solid"))))
which results in:
Specify a . as the shape symbol for your curve and a blank line for your points:
p <- ggplot(df, aes(x=x, y=value)) +
geom_point(aes(colour=variable, shape=variable, linetype = variable), size = 3) +
stat_function(aes(colour="log2(x)", shape = "log2(x)", linetype = "log2(x)"), fun=log2) +
scale_shape_manual(values = setNames(c(16, 17, 46), c("a", "b", "log2(x)"))) +
scale_linetype_manual(values = setNames(c(0, 0, 1), c("a", "b", "log2(x)")))
print(p)