Creating ggplot geom_point() with position dodge 's-shape'

Creating ggplot geom_point() with position dodge 's-shape' - r

I am trying to create a plot like the one below. I'd like the order the points in each category in such a way that they form an s-shape. Is it possible to do this in ggplot?
Similar data available here
What I have so far:
somatic.variants <- read.delim("data/Lawrence.S2.txt", stringsAsFactors=T)
cancer_rates <- tapply(somatic.variants$logn_coding_mutations, somatic.variants$tumor_type, median)
cancer_rates <- cancer_rates[order(cancer_rates, decreasing=F)]
somatic.variants$tumor_type <- factor(somatic.variants$tumor_type, levels = names(cancer_rates))
library(ggplot2)
library(GGally)
ggplot(data = somatic.variants,
mapping = aes(x = tumor_type,
y = log10(n_coding_mutations))) +
geom_point(position = position_dodge2()) +
scale_x_discrete(position = "top") +
scale_y_continuous(labels = c(0,10,100,1000,10000), expand = c(0,0)) +
geom_stripped_cols() +
theme_bw() +
theme(axis.title.x = element_blank(),
axis.text.x = element_text(angle = 315, hjust = 1, size = 12),
panel.grid = element_blank()) +
labs(y = "Coding mutations count") +
stat_summary(fun = median,
geom="crossbar",
size = 0.25,
width = 0.9,
group = 1,
show.legend = FALSE,
color = "#FF0000")

This could be achieved by
grouping the data by x-axis categories
arranging by the y-axis value
which ensures that the points are plotted in ascending order of the values for each category.
somatic.variants <- read.delim("https://gist.githubusercontent.com/wudustan/57deecdaefa035c1ecabf930afde295a/raw/1594d51a1e3b52f674ff746caace3231fd31910a/Lawrence.S2.txt", stringsAsFactors=T)
cancer_rates <- tapply(somatic.variants$logn_coding_mutations, somatic.variants$tumor_type, median)
cancer_rates <- cancer_rates[order(cancer_rates, decreasing=F)]
somatic.variants$tumor_type <- factor(somatic.variants$tumor_type, levels = names(cancer_rates))
library(ggplot2)
library(GGally)
library(dplyr)
somatic.variants <- somatic.variants %>%
group_by(tumor_type) %>%
arrange(n_coding_mutations)
ggplot(data = somatic.variants,
mapping = aes(x = tumor_type,
y = log10(n_coding_mutations))) +
geom_point(position = position_dodge2(.9), size = .25) +
scale_x_discrete(position = "top") +
scale_y_continuous(labels = c(0,10,100,1000,10000), expand = c(0,0)) +
geom_stripped_cols() +
theme_bw() +
theme(axis.title.x = element_blank(),
axis.text.x = element_text(angle = 315, hjust = 1, size = 12),
panel.grid = element_blank()) +
labs(y = "Coding mutations count") +
stat_summary(fun = median,
geom="crossbar",
size = 0.25,
width = 0.9,
group = 1,
show.legend = FALSE,
color = "#FF0000")
#> Warning: Removed 29 rows containing non-finite values (stat_summary).

Related

Adding a black outline for each x category in heat map in R

Here is my data which produces a heat map. What I am hoping to do is produce multiple difference heatmaps with an outline around each of x categories.
data <- data.frame(id=c("john","john","john","kate","kate","kate","chris","chris","chris"),
group=c("geo","his","math","geo","his","math","geo","his","math"),
grade=c(65,76,87,67,89,98,99,97,96),
class=c("A","A","A","A","A","A","B","B","B"))
data
mine.heatmap <- ggplot(data = data, mapping = aes(x = id, y = group, fill = grade)) +
geom_tile() +
xlab(label = "id") +
ylab(label="group") +
labs(fill="grade")+
scale_fill_gradient2(low = "#800080",
high = "#FF8C00",mid = "white")
x <- mine.heatmap + facet_grid(
cols = vars(class), scales = "free", space = "free"
)
x + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, size = 18, margin = margin(b=2)))+
theme(axis.text.y= element_text(angle = 0, vjust = 0.5, hjust=1, size = 18)) +
theme(legend.text = element_text(size=14))+
theme(legend.title = element_text(size=14))+
theme(strip.text = element_text(size=14))+
theme(axis.title.x = element_text(size=18)) +theme(axis.title.y = element_text(size=18))
Original Heat map:
What I am hoping to get are the following heatmaps:

One option to achieve your desired result would be to
put your plotting code in a function which takes as one argument the id for which you want to draw a outline.
Use some data wrangling to convert the categories to be plotted on the x and y aes to numerics per facet variable.
Add a geom_rect to your plotting code to draw the outline which uses the numerics computed in step 2.
library(ggplot2)
library(dplyr)
mine_heatmap <- function(x) {
p <- ggplot(data = data, mapping = aes(x = id, y = group, fill = grade)) +
geom_tile() +
# Add outline via a geom_rect
geom_rect(
data = subset(data, id == x),
aes(
xmin = id_num - .5, xmax = id_num + .5,
ymin = min(group_num) - .5, ymax = max(group_num) + .5
), fill = NA, color = "black", size = 1
) +
labs(x = "id", y = "group", fill = "grade") +
scale_fill_gradient2(
low = "#800080",
high = "#FF8C00", mid = "white"
)
p <- p + facet_grid(
cols = vars(class), scales = "free", space = "free"
)
p + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size = 18, margin = margin(b = 2))) +
theme(axis.text.y = element_text(angle = 0, vjust = 0.5, hjust = 1, size = 18)) +
theme(legend.text = element_text(size = 14)) +
theme(legend.title = element_text(size = 14)) +
theme(strip.text = element_text(size = 14)) +
theme(axis.title.x = element_text(size = 18)) + theme(axis.title.y = element_text(size = 18))
}
# Convert id and group to numerics per facet variable
data <- data |>
group_by(class) |>
mutate(
id_num = as.numeric(factor(id)),
group_num = as.numeric(factor(group))
) |>
ungroup()
mine_heatmap("john")
mine_heatmap("kate")
mine_heatmap("chris")

Replace x axis labels with colored bars to get a gradient effect in R

I'm building a barplot with RNA reads % in ggplot, I did this:
ggplot(tipos_exo,aes(x = reorder(sample, -value),y = value,fill = variable)) +
geom_bar( stat = "identity")
I need to replace the x axis labels with colored bars, each sample belongs to a specific batch and I looking for this effect:
Any thoughts?

One option to achieve your desired result would be to create your axis colorbar as a second plot and glue it to the main plot via the patchwork package.
For the colorbar I use geom_tile and remove all non-data ink using theme_void. As a first step I reorder your sample column by value and get rid of the duplicated sample categories using dplyr::distinct.
Using some fake random example data:
set.seed(123)
tipos_exo <- data.frame(
sample = rep(letters, each = 2),
variable = c("tablaq_readsPerc", "tablaq_shortReadsPerc"),
value = runif(52, 0, 100),
batch = rep(LETTERS, each = 2)
)
library(ggplot2)
library(patchwork)
library(dplyr, warn = FALSE)
p1 <- ggplot(tipos_exo,aes(x = reorder(sample, -value),y = value,fill = variable)) +
geom_bar( stat = "identity") +
scale_y_continuous(expand = c(0, 0)) +
labs(x = NULL) +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.ticks.length.x = unit(0, "pt"))
tipos_exo1 <- tipos_exo |>
mutate(sample = reorder(sample, -value)) |>
distinct(sample, batch)
p_axis <- ggplot(tipos_exo1, aes(x = sample, y = factor(1), fill = batch)) +
geom_tile(width = .9) +
geom_text(aes(label = sample)) +
theme_void() +
theme(axis.title.x = element_text()) +
labs(x = "Batch Annotation") +
guides(fill = "none")
p1 / p_axis + plot_layout(heights = c(8, 1))
UPDATE Adapting my answer on this post Reorder Bars of a Stacked Barchart in R you could reorder your sample column by a helper value "column", e.g. if you want to reorder by "tablaq_readsPerc" you set the values for the other categories to zero and use FUN=sum. Note that I also reversed the order of the stack so that the "tablaq_readsPerc" bars are placed at the bottom.
tipos_exo <- tipos_exo |>
mutate(sample1 = reorder(sample, -ifelse(!variable %in% "tablaq_readsPerc", 0, value), FUN = sum))
p1 <- ggplot(tipos_exo,aes(x = sample1, y = value, fill = variable)) +
geom_bar( stat = "identity", position = position_stack(reverse = TRUE)) +
scale_y_continuous(expand = c(0, 0)) +
labs(x = NULL) +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.ticks.length.x = unit(0, "pt"))
tipos_exo1 <- tipos_exo |>
distinct(sample, sample1, batch)
p_axis <- ggplot(tipos_exo1, aes(x = sample1, y = factor(1), fill = batch)) +
geom_tile(width = .9) +
geom_text(aes(label = sample)) +
theme_void() +
theme(axis.title.x = element_text()) +
labs(x = "Batch Annotation") +
guides(fill = "none")
p1 / p_axis + plot_layout(heights = c(8, 1))

Add an additional legend according to the colors of x axis labels

I have modified the colors of my x axis labels according to their group.
For that, I have used the following pseudocode:
library(ggsci)
library(ggplot2)
x_cols = pal_jco()(length(unique(melted_df$Group)))
names(x_cols) = unique(melted_df$Group)
ggplot(melted_df, ... + theme(axis.text.x = element_text(colour = x_cols))
I would like to add a legend to the plot (if possible, outside the plot), that explains the colouring of the x axis labels.
melted_df dataframe looks like this:
Here the full code:
#Generate color mapping
x_cols = pal_jco()(length(unique(melted_df$Group)))
names(x_cols) = unique(melted_df$Group)
melted_df$mycolors = sapply(as.character(melted_df$Group), function(x) x_cols[x])
#Plot
ggplot(melted_df, aes(fill=variable, y=value, x=fct_inorder(id))) +
geom_bar(position="stack", stat = "identity") + ggtitle("Barplot") +
theme_bw() +
xlab("samples") + ylab("Counts") +
theme(axis.title.y=element_text(size=10), axis.title.x=element_text(size=10),
plot.title = element_text(face = "bold", size = (15), hjust = 0.5),
axis.text.x = element_text(distinct(samples_melt[c("id", "mycolors")])$mycolors)) +
guides(fill=guide_legend(title="Columns"))

In the absence of a reproducible example, here is how you might do it with the built-in iris data set:
library(ggplot2)
ggplot(iris, aes(Species, Sepal.Length)) +
stat_summary(fun = mean, geom = "col", aes(fill = Species)) +
geom_point(aes(color = Species), alpha = 0, key_glyph = draw_key_text) +
theme_bw(base_size = 20) +
labs(color = "") +
guides(color = guide_legend(override.aes = list(alpha = 1, size = 8))) +
theme(axis.text.x = element_text(color = scales::hue_pal()(3), face = 2))

I addressed the issue using Legend() constructor, provided by ComplexHeatmap library.
I first used the code provided above under the EDIT section, and then I added the following code in order to draw an additional legend explaining the x-axis colouring.
lgd = Legend(labels = names(x_cols), title = "Group", labels_gp = gpar(fontsize = 8), nrow = 1, legend_gp = gpar(fill = x_cols))
draw(lgd, x = unit(1.8, "cm"), y = unit(0.3, "cm"), just = c("left", "bottom"))

Overlapping text on top of geom_bar in ggplot2

I have made a barplot similar to the one below using ggplot2.
I cannot get the percentages on top of the bars to be centered and not overlapping of other bars and numbers. Sample code is below.
library(tidyverse)
cat1=c("cat1","cat1","cat1","cat1","cat1","cat1","cat1","cat1","cat1","cat1","cat1","cat1",
"cat2","cat2","cat2","cat2","cat2","cat2","cat2","cat2","cat2","cat2","cat2","cat2",
"cat3","cat3","cat3","cat3","cat3","cat3","cat3","cat3","cat3","cat3","cat3","cat3",
"cat4","cat4","cat4","cat4","cat4","cat4","cat4","cat4","cat4","cat4","cat4","cat4")
cat2=c("c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12",
"c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12",
"c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12",
"c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12")
count1=round(rnorm(48,10))
fakeperc=rnorm(48,9)
df1=cbind(count1,fakeperc)
df2=cbind(cat1,cat2)
finaldf=as.data.frame(cbind(df1,df2))
finaldf$cat1=as.factor(finaldf$cat1)
finaldf$fakeperc=as.numeric(finaldf$fakeperc)
#finaldf$cat1=factor(finaldf$cat1,levels = c("cat1","cat2","cat3","cat4"))
finaldf$cat2 = factor(finaldf$cat2,
levels = c("c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12"))
a=ggplot(data=finaldf,aes(x=cat1, y=count1,
fill=cat2,group=cat2)) +
geom_bar(stat='identity',color='black',width=.65,position=position_dodge(width=.9))+
scale_y_discrete(limits=0:50,breaks=c(0,10,20,30,40,50))+
scale_fill_brewer(palette="Set3") +
theme_classic() +
geom_text(data = finaldf,
aes(x=cat1,y=count1,group=cat2,
label=format(paste(round(fakeperc),"%",sep = ""))),inherit.aes = F,
color='black',position=position_dodge(.9),vjust=-.5,size=3)
a
When trying to add either nudge_y or nudge_x to the geom_text call, nothing happens. I suspect this is because there is already a position_dodge call. I am open any and all solutions to make these percentages non-overlapping and legible.

What do you think of this?
# I think you meant count1 to be numeric
finaldf$count1 <- as.numeric(finaldf$count1)
ggplot(data = finaldf,
aes(x = cat1,
y = count1,
fill = cat2,
group = cat2)) +
geom_col(color = 'black',
width = 0.65,
position = position_dodge(width = 0.9)) +
geom_text(data = finaldf,
aes(x = cat1,
y = count1,
group = cat2,
label = scales::percent(fakeperc/100, accuracy = 0.01)),
inherit.aes = FALSE,
color = 'black',
position = position_dodge(0.9),
hjust = -0.1,
size = 3) +
scale_y_continuous(limits = c(0,50), breaks = c(0,10,20,30,40,50)) +
scale_fill_brewer(palette = "Set3") +
theme_classic() +
coord_flip()
I cleaned up a bit the code (according to my taste)
I changed scale_y_numeric to scale_y_continuous (since count1 should be numeric)
I used coord_flip() to make it more readable
I used scales::percent to write percentage numbers
(don't know why you set up limits from 0 to 50 but I left them as I suppposed they were intended)
If you don't want to use coor_flip:
finaldf$count1 <- as.numeric(finaldf$count1)
ggplot(data = finaldf,
aes(x = cat1,
y = count1,
fill = cat2,
group = cat2)) +
geom_col(color = 'black',
width = 0.65,
position = position_dodge(width = 0.9)) +
geom_text(data = finaldf,
aes(x = cat1,
y = count1,
group = cat2,
label = scales::percent(fakeperc/100, accuracy = 0.01)),
inherit.aes = FALSE,
color = 'black',
position = position_dodge(0.9),
hjust = -0.1,
angle = 90,
size = 3) +
scale_y_continuous(limits = c(0,50), breaks = c(0,10,20,30,40,50)) +
scale_fill_brewer(palette = "Set3") +
theme_classic()

Is this what you are looking for:
library(ggplot2)
#Code
ggplot(data=finaldf,aes(x=cat2, y=count1,
fill=cat2,group=cat2)) +
geom_bar(stat='identity',color='black',
position=position_dodge(width=1))+
scale_fill_brewer(palette="Set3") +
theme_bw() +
geom_text(aes(x=cat2,y=count1,group=cat2,
label=format(paste(round(fakeperc),"%",sep = ""))),inherit.aes = F,
color='black',position=position_dodge(1),
size=3,vjust=-0.5)+
facet_wrap(.~cat1,scales = 'free_x',nrow = 1,strip.position = 'bottom')+
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
legend.position = 'top',
strip.background = element_blank(),
panel.spacing = unit(2, "lines"),
panel.grid = element_blank())+
guides(fill = guide_legend(nrow = 1))
Output:

How can I combine multiple ggplot graphs with different dataframes under the same facet?

So, I have the two dataframes that produces two ggplots with the same facet that I want to combine
The first dataframe produces the following ggplot
Dataframe1
library(ggh4x)
library(ggnomics)
library(ggplot2)
library(data.table)
#dataframe
drug <- c("DrugA","DrugB1","DrugB2","DrugB3","DrugC1","DrugC2","DrugC3","DrugC4")
PR <- c(18,430,156,0,60,66,113,250)
GR <- c(16,425,154,0,56,64,111,248)
PS <- c(28,530,256,3,70,76,213,350)
GS <- c(26,525,254,5,66,74,211,348)
group<-c("n=88","n=1910","n=820","n=8","n=252","n=280","n=648","n=1186")
class<-c("Class A","Class B","Class B","Class B","Class C","Class C","Class C","Class C")
df <-data.frame(drug,group, class,PR,GR,PS,GS)
#make wide to long df
df.long <- melt(setDT(df), id.vars = c("drug","group","class"), variable.name = "type")
#Order of variables
df.long$type <- factor(df.long$type, levels=c("PR","GR","PS","GS"))
df.long$class <- factor(df.long$class, levels= c("Class B", "Class A", "Class C"))
df.long$group <- factor(df.long$group, levels= c("n=1910","n=820","n=8","n=88","n=252","n=280","n=648","n=1186"))
df.long$drug <- factor(df.long$drug, levels= c("DrugB1","DrugB2","DrugB3","DrugA","DrugC1","DrugC2","DrugC3","DrugC4"))
Ggplot for dataframe 1
ggplot(df.long, aes(fill = type, x = drug, y = value)) +
geom_bar(aes(fill = type), stat = "identity", position = "dodge", colour="white") +
geom_text(aes(label = value), position = position_dodge(width = 1.2), vjust = -0.5)+
scale_fill_manual(values = c("#fa9fb5","#dd1c77","#bcbddc","756bb1")) +
scale_y_continuous(expand = c(0, 0), limits = c(0, 600)) +
theme(title = element_text(size = 18),
legend.text = element_text(size = 12),
axis.text.x = element_text(size = 9),
axis.text.y =element_text(size = 15),
plot.title = element_text(hjust = 0.5)) +
ggh4x::facet_nested(.~class + group, scales = "free_x", space= "free_x")
This is the 2nd dataframe
#dataframe 2
drug <- c("DrugA","DrugB1","DrugB2","DrugB3","DrugC1","DrugC2","DrugC3","DrugC4")
Sens <- c(0.99,0.97,NA,0.88,0.92,0.97,0.98,0.99)
Spec <- c(1,0.99,1,0.99,0.99,0.99,0.99,1)
class<-c("Class A","Class B","Class B","Class B","Class C","Class C","Class C","Class C")
df2 <-data.frame(drug,class,Sens,Spec)
#wide to long df2
df2.long <- melt(setDT(df2), id.vars = c("drug","class"), variable.name = "type")
#additional variables
df2.long$UpperCI <- c(0.99,0.99,NA,0.98,0.98,0.99,0.99,0.99,1,1,1,1,1,1,1,1)
df2.long$LowerCI <- c(0.97,0.98,NA,0.61,0.83,0.88,0.93,0.97,0.99,0.99,0.99,0.99,0.98,0.99,0.99,0.99)
#order of variables
df2.long$class <- factor(df2.long$class, levels= c("Class B", "Class A", "Class C"))
Ggplot for dataframe 2
ggplot(df2.long, aes(x=drug, y=value, group=type, color=type)) +
geom_line() +
geom_point()+
geom_errorbar(aes(ymin=LowerCI, ymax=UpperCI), width=.2,
position=position_dodge(0.05)) +
scale_y_continuous(labels=scales::percent)+
labs(x="drug", y = "Percentage")+
theme_classic() +
scale_color_manual(values=c('#999999','#E69F00')) +
theme(legend.text=element_text(size=12),
axis.text.x=element_text(size=9),
axis.text.y =element_text(size=15),
panel.background = element_rect(fill = "whitesmoke"))+
facet_wrap(facets = vars(class),scales = "free_x")
So I am trying to combine the two plots under the one facet (the one from dataframe 1), and so far I have done the following
ggplot(df.long)+
aes(x=drug, y=value,fill = type)+
geom_bar(, stat = "identity", position = "dodge", colour="white") +
geom_text(aes(label=value), position=position_dodge(width=0.9), vjust=-0.5, size=2) +
scale_fill_manual(breaks=c("PR","GR","PS","GS"),
values=c("#dd1c77","#756bb1","#fa9fb5","#e7e1ef","black","black")) +
scale_y_continuous(expand = c(0, 0), limits = c(0, 1100),sec.axis=sec_axis(~./10, labels = function(b) { paste0(b, "%")},name="Percentage")) + #remove space between x axis labels and bottom of chart
theme(legend.text=element_text(size=12),
legend.position = 'bottom',
axis.text.x=element_text(size=9),
axis.text.y =element_text(size=15),
panel.background = element_rect(fill = "whitesmoke"), #color of plot background
panel.border = element_blank(), #remove border panels of each facet
strip.background = element_rect(colour = NA)) + #remove border of strip
labs(y = "Number of isolates", fill = "")+
geom_errorbar(data=df2.long,aes(x=drug, y=value*1000,ymin=LowerCI*1000, ymax=UpperCI*1000,color=type), width=.2,
position=position_dodge(0.05))+
geom_point(data=df2.long,aes(x=drug,y=value*1000,color=type),show.legend = F)+
geom_line(data=df2.long, aes(x=drug, y=value*1000, group=type, color=type)) +
scale_color_manual(values=c('#999999','#E69F00'))
but I'm stuck on adding the facet from the plot1. I hope anyone can help :)

For this specific case, I don't think the nested facets are the appropriate solution as the n = ... seems metadata of the x-axis group instead of a subcategory of the classes.
Here is how you could plot the data with facet_grid() instead:
ggplot(df.long, aes(drug, value, fill = type)) +
geom_col(position = "dodge") +
geom_text(aes(label = value),
position = position_dodge(0.9),
vjust = -0.5, size = 2) +
geom_errorbar(data = df2.long,
aes(y = value * 1000, color = type,
ymin = LowerCI * 1000, ymax = UpperCI * 1000),
position = position_dodge(0.05), width = 0.2) +
geom_point(data = df2.long,
aes(y = value * 1000, color = type),
show.legend = FALSE) +
geom_line(data = df2.long,
aes(y = value * 1000, group = type, color = type)) +
scale_fill_manual(breaks = c("PR", "GR", "PS", "GS"),
values=c("#dd1c77","#756bb1","#fa9fb5","#e7e1ef","black","black")) +
scale_color_manual(values=c('#999999','#E69F00')) +
scale_y_continuous(expand = c(0, 0), limits = c(0, 1100),
sec.axis = sec_axis(~ ./10,
labels = function(b) {
paste0(b, "%")
}, name = "Percentage")) +
scale_x_discrete(
labels = levels(interaction(df.long$drug, df.long$group, sep = "\n"))
) +
facet_grid(~ class, scales = "free_x", space = "free_x") +
theme(legend.text=element_text(size=12),
legend.position = 'bottom',
axis.text.x=element_text(size=9),
axis.text.y =element_text(size=15),
panel.background = element_rect(fill = "whitesmoke"), #color of plot background
panel.border = element_blank(), #remove border panels of each facet
strip.background = element_rect(colour = NA))
If you insist on including the n = ... labels, perhaps a better way is to add these as text somehwere, i.e. adding the following:
stat_summary(fun = sum,
aes(group = drug, y = stage(value, after_stat = -50),
label = after_stat(paste0("n = ", y))),
geom = "text") +
And setting the y-axis limits to c(-100, 1000) for example.

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

Creating ggplot geom_point() with position dodge 's-shape' - r

Related

Adding a black outline for each x category in heat map in R

Replace x axis labels with colored bars to get a gradient effect in R

Add an additional legend according to the colors of x axis labels

Overlapping text on top of geom_bar in ggplot2

How can I combine multiple ggplot graphs with different dataframes under the same facet?

Categories

Resources