I would like to get the same graph given by this code:
library(ggplot2)
name <- c("A","A","A","A","B","B","B","B")
size <- c("small","small","tall","tall","small","small","tall","tall")
flag <- c(0,1,0,1,0,1,0,1)
quantity <- c(26,13,12,4,19,14,13,5)
df <- data.frame(name,size,flag,quantity)
ggplot(data = df, mapping = aes(x = name, y = quantity)) +
geom_bar(aes(fill = size), position = "dodge", stat = "identity")
Except that I would like to have the bars split according to the variable flag. the ideal would be to have a different shade of color for the portion of the bars that corresponds to flag = 0.
I also need to have a legend for the flag variable.
Using different alpha-values for the flag:
This adds different transparency values for the flag. However, all bars are dodged.
ggplot(data = df, mapping = aes(x = name, y = quantity)) +
geom_bar(aes(fill = size, alpha = factor(flag)), position = "dodge", stat = "identity") +
scale_alpha_manual("flag", values = c(0.3, 1))
A combination of facet_wrap with an interaction on the x-axis (name, size):
Keeps the stacking of the bars for the same size, workaround is needed to have a nice x-axis.
ggplot(data = df, mapping = aes(x = interaction(name, size), y = quantity)) +
geom_bar(aes(fill = size, alpha = factor(flag)),
position = "stack", stat = "identity") +
scale_alpha_manual("flag", values = c(0.3, 1)) +
facet_wrap(~ name, strip.position = "bottom", scales = "free_x") +
theme(axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
axis.title.x = element_blank(),
strip.background = element_blank())
Interaction with size:
With specifying a scale_fill_manual you can assign different colors to the different combinations of size and flag.
ggplot(data = df, mapping = aes(x = name, y = quantity)) +
geom_bar(aes(fill = interaction(size, flag)), position = "dodge", stat = "identity")
library(ggplot2)
df1 <- data.frame(name = c("A","A","A","A","B","B","B","B"),
size = c("small","small","tall","tall","small","small","tall","tall"),
flag = c(0,1,0,1,0,1,0,1),
quantity = c(26,13,12,4,19,14,13,5))
ggplot(data = df1, mapping = aes(x = name, y = quantity)) +
geom_bar(aes(fill = size), alpha = ifelse(flag==0, 0.6, 1),
position = "dodge", stat = "identity")
Adding on top of #kath answer:
library(grid)
library(ggplot2)
gg_color_hue <- function(n) {
hues = seq(15, 375, length = n + 1)
hcl(h = hues, l = 65, c = 100)[1:n]
}
mycols <- gg_color_hue(length(unique(interaction(df$size, df$flag)))/2)
ggplot(data = df, mapping = aes(x = interaction(name, size), y = quantity)) +
geom_bar(aes(fill = interaction(factor(size), factor(flag))),
position = "stack", stat = "identity") +
scale_fill_manual(name = "Size and Flag",
values = c("small.0" = alpha(mycols[1], 3/5),
"tall.0" = alpha(mycols[2], 3/5),
"small.1" = alpha(mycols[1], 1),
"tall.1" = alpha(mycols[2], 1)),
labels = c("Size: small and Flag: 0",
"Size: tall and Flag: 0",
"Size: small and Flag: 1",
"Size: tall and Flag: 1")) +
facet_wrap(~ name, strip.position = "bottom", scales = "free_x") +
theme(axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
strip.background = element_blank(),
panel.spacing = unit(-1.25, "lines")) +
xlab("name")
Related
I'm building a barplot with RNA reads % in ggplot, I did this:
ggplot(tipos_exo,aes(x = reorder(sample, -value),y = value,fill = variable)) +
geom_bar( stat = "identity")
I need to replace the x axis labels with colored bars, each sample belongs to a specific batch and I looking for this effect:
Any thoughts?
One option to achieve your desired result would be to create your axis colorbar as a second plot and glue it to the main plot via the patchwork package.
For the colorbar I use geom_tile and remove all non-data ink using theme_void. As a first step I reorder your sample column by value and get rid of the duplicated sample categories using dplyr::distinct.
Using some fake random example data:
set.seed(123)
tipos_exo <- data.frame(
sample = rep(letters, each = 2),
variable = c("tablaq_readsPerc", "tablaq_shortReadsPerc"),
value = runif(52, 0, 100),
batch = rep(LETTERS, each = 2)
)
library(ggplot2)
library(patchwork)
library(dplyr, warn = FALSE)
p1 <- ggplot(tipos_exo,aes(x = reorder(sample, -value),y = value,fill = variable)) +
geom_bar( stat = "identity") +
scale_y_continuous(expand = c(0, 0)) +
labs(x = NULL) +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.ticks.length.x = unit(0, "pt"))
tipos_exo1 <- tipos_exo |>
mutate(sample = reorder(sample, -value)) |>
distinct(sample, batch)
p_axis <- ggplot(tipos_exo1, aes(x = sample, y = factor(1), fill = batch)) +
geom_tile(width = .9) +
geom_text(aes(label = sample)) +
theme_void() +
theme(axis.title.x = element_text()) +
labs(x = "Batch Annotation") +
guides(fill = "none")
p1 / p_axis + plot_layout(heights = c(8, 1))
UPDATE Adapting my answer on this post Reorder Bars of a Stacked Barchart in R you could reorder your sample column by a helper value "column", e.g. if you want to reorder by "tablaq_readsPerc" you set the values for the other categories to zero and use FUN=sum. Note that I also reversed the order of the stack so that the "tablaq_readsPerc" bars are placed at the bottom.
tipos_exo <- tipos_exo |>
mutate(sample1 = reorder(sample, -ifelse(!variable %in% "tablaq_readsPerc", 0, value), FUN = sum))
p1 <- ggplot(tipos_exo,aes(x = sample1, y = value, fill = variable)) +
geom_bar( stat = "identity", position = position_stack(reverse = TRUE)) +
scale_y_continuous(expand = c(0, 0)) +
labs(x = NULL) +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.ticks.length.x = unit(0, "pt"))
tipos_exo1 <- tipos_exo |>
distinct(sample, sample1, batch)
p_axis <- ggplot(tipos_exo1, aes(x = sample1, y = factor(1), fill = batch)) +
geom_tile(width = .9) +
geom_text(aes(label = sample)) +
theme_void() +
theme(axis.title.x = element_text()) +
labs(x = "Batch Annotation") +
guides(fill = "none")
p1 / p_axis + plot_layout(heights = c(8, 1))
I am trying to create a plot like the one below. I'd like the order the points in each category in such a way that they form an s-shape. Is it possible to do this in ggplot?
Similar data available here
What I have so far:
somatic.variants <- read.delim("data/Lawrence.S2.txt", stringsAsFactors=T)
cancer_rates <- tapply(somatic.variants$logn_coding_mutations, somatic.variants$tumor_type, median)
cancer_rates <- cancer_rates[order(cancer_rates, decreasing=F)]
somatic.variants$tumor_type <- factor(somatic.variants$tumor_type, levels = names(cancer_rates))
library(ggplot2)
library(GGally)
ggplot(data = somatic.variants,
mapping = aes(x = tumor_type,
y = log10(n_coding_mutations))) +
geom_point(position = position_dodge2()) +
scale_x_discrete(position = "top") +
scale_y_continuous(labels = c(0,10,100,1000,10000), expand = c(0,0)) +
geom_stripped_cols() +
theme_bw() +
theme(axis.title.x = element_blank(),
axis.text.x = element_text(angle = 315, hjust = 1, size = 12),
panel.grid = element_blank()) +
labs(y = "Coding mutations count") +
stat_summary(fun = median,
geom="crossbar",
size = 0.25,
width = 0.9,
group = 1,
show.legend = FALSE,
color = "#FF0000")
This could be achieved by
grouping the data by x-axis categories
arranging by the y-axis value
which ensures that the points are plotted in ascending order of the values for each category.
somatic.variants <- read.delim("https://gist.githubusercontent.com/wudustan/57deecdaefa035c1ecabf930afde295a/raw/1594d51a1e3b52f674ff746caace3231fd31910a/Lawrence.S2.txt", stringsAsFactors=T)
cancer_rates <- tapply(somatic.variants$logn_coding_mutations, somatic.variants$tumor_type, median)
cancer_rates <- cancer_rates[order(cancer_rates, decreasing=F)]
somatic.variants$tumor_type <- factor(somatic.variants$tumor_type, levels = names(cancer_rates))
library(ggplot2)
library(GGally)
library(dplyr)
somatic.variants <- somatic.variants %>%
group_by(tumor_type) %>%
arrange(n_coding_mutations)
ggplot(data = somatic.variants,
mapping = aes(x = tumor_type,
y = log10(n_coding_mutations))) +
geom_point(position = position_dodge2(.9), size = .25) +
scale_x_discrete(position = "top") +
scale_y_continuous(labels = c(0,10,100,1000,10000), expand = c(0,0)) +
geom_stripped_cols() +
theme_bw() +
theme(axis.title.x = element_blank(),
axis.text.x = element_text(angle = 315, hjust = 1, size = 12),
panel.grid = element_blank()) +
labs(y = "Coding mutations count") +
stat_summary(fun = median,
geom="crossbar",
size = 0.25,
width = 0.9,
group = 1,
show.legend = FALSE,
color = "#FF0000")
#> Warning: Removed 29 rows containing non-finite values (stat_summary).
I have made a barplot similar to the one below using ggplot2.
I cannot get the percentages on top of the bars to be centered and not overlapping of other bars and numbers. Sample code is below.
library(tidyverse)
cat1=c("cat1","cat1","cat1","cat1","cat1","cat1","cat1","cat1","cat1","cat1","cat1","cat1",
"cat2","cat2","cat2","cat2","cat2","cat2","cat2","cat2","cat2","cat2","cat2","cat2",
"cat3","cat3","cat3","cat3","cat3","cat3","cat3","cat3","cat3","cat3","cat3","cat3",
"cat4","cat4","cat4","cat4","cat4","cat4","cat4","cat4","cat4","cat4","cat4","cat4")
cat2=c("c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12",
"c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12",
"c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12",
"c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12")
count1=round(rnorm(48,10))
fakeperc=rnorm(48,9)
df1=cbind(count1,fakeperc)
df2=cbind(cat1,cat2)
finaldf=as.data.frame(cbind(df1,df2))
finaldf$cat1=as.factor(finaldf$cat1)
finaldf$fakeperc=as.numeric(finaldf$fakeperc)
#finaldf$cat1=factor(finaldf$cat1,levels = c("cat1","cat2","cat3","cat4"))
finaldf$cat2 = factor(finaldf$cat2,
levels = c("c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12"))
a=ggplot(data=finaldf,aes(x=cat1, y=count1,
fill=cat2,group=cat2)) +
geom_bar(stat='identity',color='black',width=.65,position=position_dodge(width=.9))+
scale_y_discrete(limits=0:50,breaks=c(0,10,20,30,40,50))+
scale_fill_brewer(palette="Set3") +
theme_classic() +
geom_text(data = finaldf,
aes(x=cat1,y=count1,group=cat2,
label=format(paste(round(fakeperc),"%",sep = ""))),inherit.aes = F,
color='black',position=position_dodge(.9),vjust=-.5,size=3)
a
When trying to add either nudge_y or nudge_x to the geom_text call, nothing happens. I suspect this is because there is already a position_dodge call. I am open any and all solutions to make these percentages non-overlapping and legible.
What do you think of this?
# I think you meant count1 to be numeric
finaldf$count1 <- as.numeric(finaldf$count1)
ggplot(data = finaldf,
aes(x = cat1,
y = count1,
fill = cat2,
group = cat2)) +
geom_col(color = 'black',
width = 0.65,
position = position_dodge(width = 0.9)) +
geom_text(data = finaldf,
aes(x = cat1,
y = count1,
group = cat2,
label = scales::percent(fakeperc/100, accuracy = 0.01)),
inherit.aes = FALSE,
color = 'black',
position = position_dodge(0.9),
hjust = -0.1,
size = 3) +
scale_y_continuous(limits = c(0,50), breaks = c(0,10,20,30,40,50)) +
scale_fill_brewer(palette = "Set3") +
theme_classic() +
coord_flip()
I cleaned up a bit the code (according to my taste)
I changed scale_y_numeric to scale_y_continuous (since count1 should be numeric)
I used coord_flip() to make it more readable
I used scales::percent to write percentage numbers
(don't know why you set up limits from 0 to 50 but I left them as I suppposed they were intended)
If you don't want to use coor_flip:
finaldf$count1 <- as.numeric(finaldf$count1)
ggplot(data = finaldf,
aes(x = cat1,
y = count1,
fill = cat2,
group = cat2)) +
geom_col(color = 'black',
width = 0.65,
position = position_dodge(width = 0.9)) +
geom_text(data = finaldf,
aes(x = cat1,
y = count1,
group = cat2,
label = scales::percent(fakeperc/100, accuracy = 0.01)),
inherit.aes = FALSE,
color = 'black',
position = position_dodge(0.9),
hjust = -0.1,
angle = 90,
size = 3) +
scale_y_continuous(limits = c(0,50), breaks = c(0,10,20,30,40,50)) +
scale_fill_brewer(palette = "Set3") +
theme_classic()
Is this what you are looking for:
library(ggplot2)
#Code
ggplot(data=finaldf,aes(x=cat2, y=count1,
fill=cat2,group=cat2)) +
geom_bar(stat='identity',color='black',
position=position_dodge(width=1))+
scale_fill_brewer(palette="Set3") +
theme_bw() +
geom_text(aes(x=cat2,y=count1,group=cat2,
label=format(paste(round(fakeperc),"%",sep = ""))),inherit.aes = F,
color='black',position=position_dodge(1),
size=3,vjust=-0.5)+
facet_wrap(.~cat1,scales = 'free_x',nrow = 1,strip.position = 'bottom')+
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
legend.position = 'top',
strip.background = element_blank(),
panel.spacing = unit(2, "lines"),
panel.grid = element_blank())+
guides(fill = guide_legend(nrow = 1))
Output:
So, I have the two dataframes that produces two ggplots with the same facet that I want to combine
The first dataframe produces the following ggplot
Dataframe1
library(ggh4x)
library(ggnomics)
library(ggplot2)
library(data.table)
#dataframe
drug <- c("DrugA","DrugB1","DrugB2","DrugB3","DrugC1","DrugC2","DrugC3","DrugC4")
PR <- c(18,430,156,0,60,66,113,250)
GR <- c(16,425,154,0,56,64,111,248)
PS <- c(28,530,256,3,70,76,213,350)
GS <- c(26,525,254,5,66,74,211,348)
group<-c("n=88","n=1910","n=820","n=8","n=252","n=280","n=648","n=1186")
class<-c("Class A","Class B","Class B","Class B","Class C","Class C","Class C","Class C")
df <-data.frame(drug,group, class,PR,GR,PS,GS)
#make wide to long df
df.long <- melt(setDT(df), id.vars = c("drug","group","class"), variable.name = "type")
#Order of variables
df.long$type <- factor(df.long$type, levels=c("PR","GR","PS","GS"))
df.long$class <- factor(df.long$class, levels= c("Class B", "Class A", "Class C"))
df.long$group <- factor(df.long$group, levels= c("n=1910","n=820","n=8","n=88","n=252","n=280","n=648","n=1186"))
df.long$drug <- factor(df.long$drug, levels= c("DrugB1","DrugB2","DrugB3","DrugA","DrugC1","DrugC2","DrugC3","DrugC4"))
Ggplot for dataframe 1
ggplot(df.long, aes(fill = type, x = drug, y = value)) +
geom_bar(aes(fill = type), stat = "identity", position = "dodge", colour="white") +
geom_text(aes(label = value), position = position_dodge(width = 1.2), vjust = -0.5)+
scale_fill_manual(values = c("#fa9fb5","#dd1c77","#bcbddc","756bb1")) +
scale_y_continuous(expand = c(0, 0), limits = c(0, 600)) +
theme(title = element_text(size = 18),
legend.text = element_text(size = 12),
axis.text.x = element_text(size = 9),
axis.text.y =element_text(size = 15),
plot.title = element_text(hjust = 0.5)) +
ggh4x::facet_nested(.~class + group, scales = "free_x", space= "free_x")
This is the 2nd dataframe
#dataframe 2
drug <- c("DrugA","DrugB1","DrugB2","DrugB3","DrugC1","DrugC2","DrugC3","DrugC4")
Sens <- c(0.99,0.97,NA,0.88,0.92,0.97,0.98,0.99)
Spec <- c(1,0.99,1,0.99,0.99,0.99,0.99,1)
class<-c("Class A","Class B","Class B","Class B","Class C","Class C","Class C","Class C")
df2 <-data.frame(drug,class,Sens,Spec)
#wide to long df2
df2.long <- melt(setDT(df2), id.vars = c("drug","class"), variable.name = "type")
#additional variables
df2.long$UpperCI <- c(0.99,0.99,NA,0.98,0.98,0.99,0.99,0.99,1,1,1,1,1,1,1,1)
df2.long$LowerCI <- c(0.97,0.98,NA,0.61,0.83,0.88,0.93,0.97,0.99,0.99,0.99,0.99,0.98,0.99,0.99,0.99)
#order of variables
df2.long$class <- factor(df2.long$class, levels= c("Class B", "Class A", "Class C"))
Ggplot for dataframe 2
ggplot(df2.long, aes(x=drug, y=value, group=type, color=type)) +
geom_line() +
geom_point()+
geom_errorbar(aes(ymin=LowerCI, ymax=UpperCI), width=.2,
position=position_dodge(0.05)) +
scale_y_continuous(labels=scales::percent)+
labs(x="drug", y = "Percentage")+
theme_classic() +
scale_color_manual(values=c('#999999','#E69F00')) +
theme(legend.text=element_text(size=12),
axis.text.x=element_text(size=9),
axis.text.y =element_text(size=15),
panel.background = element_rect(fill = "whitesmoke"))+
facet_wrap(facets = vars(class),scales = "free_x")
So I am trying to combine the two plots under the one facet (the one from dataframe 1), and so far I have done the following
ggplot(df.long)+
aes(x=drug, y=value,fill = type)+
geom_bar(, stat = "identity", position = "dodge", colour="white") +
geom_text(aes(label=value), position=position_dodge(width=0.9), vjust=-0.5, size=2) +
scale_fill_manual(breaks=c("PR","GR","PS","GS"),
values=c("#dd1c77","#756bb1","#fa9fb5","#e7e1ef","black","black")) +
scale_y_continuous(expand = c(0, 0), limits = c(0, 1100),sec.axis=sec_axis(~./10, labels = function(b) { paste0(b, "%")},name="Percentage")) + #remove space between x axis labels and bottom of chart
theme(legend.text=element_text(size=12),
legend.position = 'bottom',
axis.text.x=element_text(size=9),
axis.text.y =element_text(size=15),
panel.background = element_rect(fill = "whitesmoke"), #color of plot background
panel.border = element_blank(), #remove border panels of each facet
strip.background = element_rect(colour = NA)) + #remove border of strip
labs(y = "Number of isolates", fill = "")+
geom_errorbar(data=df2.long,aes(x=drug, y=value*1000,ymin=LowerCI*1000, ymax=UpperCI*1000,color=type), width=.2,
position=position_dodge(0.05))+
geom_point(data=df2.long,aes(x=drug,y=value*1000,color=type),show.legend = F)+
geom_line(data=df2.long, aes(x=drug, y=value*1000, group=type, color=type)) +
scale_color_manual(values=c('#999999','#E69F00'))
but I'm stuck on adding the facet from the plot1. I hope anyone can help :)
For this specific case, I don't think the nested facets are the appropriate solution as the n = ... seems metadata of the x-axis group instead of a subcategory of the classes.
Here is how you could plot the data with facet_grid() instead:
ggplot(df.long, aes(drug, value, fill = type)) +
geom_col(position = "dodge") +
geom_text(aes(label = value),
position = position_dodge(0.9),
vjust = -0.5, size = 2) +
geom_errorbar(data = df2.long,
aes(y = value * 1000, color = type,
ymin = LowerCI * 1000, ymax = UpperCI * 1000),
position = position_dodge(0.05), width = 0.2) +
geom_point(data = df2.long,
aes(y = value * 1000, color = type),
show.legend = FALSE) +
geom_line(data = df2.long,
aes(y = value * 1000, group = type, color = type)) +
scale_fill_manual(breaks = c("PR", "GR", "PS", "GS"),
values=c("#dd1c77","#756bb1","#fa9fb5","#e7e1ef","black","black")) +
scale_color_manual(values=c('#999999','#E69F00')) +
scale_y_continuous(expand = c(0, 0), limits = c(0, 1100),
sec.axis = sec_axis(~ ./10,
labels = function(b) {
paste0(b, "%")
}, name = "Percentage")) +
scale_x_discrete(
labels = levels(interaction(df.long$drug, df.long$group, sep = "\n"))
) +
facet_grid(~ class, scales = "free_x", space = "free_x") +
theme(legend.text=element_text(size=12),
legend.position = 'bottom',
axis.text.x=element_text(size=9),
axis.text.y =element_text(size=15),
panel.background = element_rect(fill = "whitesmoke"), #color of plot background
panel.border = element_blank(), #remove border panels of each facet
strip.background = element_rect(colour = NA))
If you insist on including the n = ... labels, perhaps a better way is to add these as text somehwere, i.e. adding the following:
stat_summary(fun = sum,
aes(group = drug, y = stage(value, after_stat = -50),
label = after_stat(paste0("n = ", y))),
geom = "text") +
And setting the y-axis limits to c(-100, 1000) for example.
my code looks like this:
p <- ggplot(data = df, aes(x = Name, y = prop, fill = Name)) +
geom_bar(stat = "identity") +
labs(x = "", y = "EQTL / gene") +
scale_fill_brewer(palette="Greens",name = "Number of cis EQTL") +
theme_classic()+
theme(panel.grid.major.x = element_line(size = 0.1, color = "grey"),
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank()
)
p+geom_path(x=c(1,1,2,2),y=c(0.85,0.86,0.86,0.85))+
annotate("text",x=1.5,y=1.2,label="p = 2e-16")
what I am trying to do is just to put a horizontal line between the middle points of those two bars above which would be written: p = 2e-16
But when I run the code I get this error:
Error in annotate("text", x = 1.5, y = 1.2, label = "p = 2e-16") :
unused arguments (x = 1.5, y = 1.2, label = "p = 2e-16")
df <- readr::read_table(' prop Name
1 0.85 All_Genes
2 1.00 Glucose_Response_Genes103')
SOLUTION:
p <- ggplot(data = df, aes(x = Name, y = prop, fill = Name)) +
geom_bar(stat = "identity") +
labs(x = "", y = "Proportion of eGenes") +
scale_fill_brewer(palette="Greens",name = "Number of cis EQTL", labels = c("3124345", "26846")) +
theme_minimal() +
theme(legend.position = "right",
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
axis.line = element_line(),
axis.ticks = element_line())
p + ggplot2::annotate("text", x = 1.5, y = 1.2, label = "p < 2e-16", size = 3.5) +
ggplot2::annotate("rect", xmin = 1, xmax = 2, ymin = 1.1, ymax =1.1, alpha=0.3,colour = "black")
As #Axeman pointed out the issues was with: ggplot2::annotate
Still I should like to know how to edit this code and get smaller and more elegant bars?
The problem is not the annotate call. It's your geom_path call which requires a data frame. I would probably annotate using annotate(geom = 'segment') rather than with geom_path, but here we go:
library(ggplot2)
ggplot(data = iris, aes(x = Species, y = Sepal.Length)) +
geom_bar(stat = "identity") +
geom_path(data = data.frame(x=c(1,1,2,2), y=c(0.85,0.86,0.86,0.85)), aes(x,y))+
annotate("text",x=1.5,y=1.2,label="p = 2e-16")
Created on 2020-01-16 by the reprex package (v0.3.0)