Split parts of a stacked barplot into individual series - r

I want to create in R 3.2.2 a barplot with stacked bars but with each part of each bar splitted into an individual series.
Example data frame:
num_var_x = 14
num_var_y = 17
x = runif(num_var_x, 0.0, 1.0)
norm = x/sum(x)
data = data.frame(replicate(num_var_y,sample(norm)))
EDIT:
Thanks to Floo0 I have come up with this continuation of the code:
## preparing dataset for ggplot
require(ggplot2)
require(reshape2)
data$no <- seq_len(nrow(data))
data_molten <- melt(data, id.vars = "no")
data_molten_sort = data_molten[with(data_molten,order(no)),]
## removing elements from variable 'no' whose max. value is e.g. < 0.025
sequence = seq(from=1, to=(num_var_y*num_var_x-num_var_x)+1, by=num_var_x)
for(i in 1:length(sequence))
{
if(isTRUE((max(data_molten_sort$value[(sequence[i]):((num_var_x+sequence[i])-(1))])) < 0.025))
{
data_molten_sort$value[(sequence[i]):((num_var_x+sequence[i])-(1))] = NA
}
}
View(data_molten)
## preparing posterior exporting
#install.packages("Cairo"); "cairo" type in png() has a better quality
library("Cairo")
#preparing exporting
png(file="ggplot.png",type="cairo", width = 4, height = 5, units = 'in',pointsize=8,res=600)
## plotting
ggplot(data_molten[!is.na(data_molten$value),], aes(x = variable, y = value, fill = factor(no))) +
geom_bar(stat = "identity") +
scale_fill_hue(l=40) + facet_grid(no~., as.table=FALSE, scale="free_y", space = "free_y") + theme_minimal() +
geom_vline(xintercept=max(as.numeric(data_molten$variable)) + 0.586, size=0.3) +
theme(legend.position="none",
axis.text.x = element_text(angle = 90, colour="black", vjust = 0.4, hjust=1, size=8),
axis.title.x = element_blank(), axis.title.y = element_blank(),
axis.line.y=element_blank(), axis.text.y=element_blank(), axis.ticks.y=element_blank(),
strip.text.y=element_text(size = 8, colour="black", family="", angle=00,hjust = 0.1),
panel.grid=element_blank(),
axis.line=element_line(size = 0.3, colour = "black", linetype = "solid"),
axis.ticks.x=element_line(size = 0.3, colour = "black", linetype = "solid"),
panel.background=element_blank(), panel.margin = unit(0, "lines"))
## exporting barplot "ggplot.png" to directory
dev.off()
which produces the desired barplot:
http://i.imgur.com/C6h5fPg.png?1

You can use ggplot2 to do that as follows:
require(ggplot2)
require(reshape2)
data$no <- seq_len(nrow(data))
data_molten <- melt(data, id.vars = "no")
If you want the rows to have different hights, have a look at: Different y-Axis Labels facet_grid and sizes
I am not 100% sure in which direction you want the plot to be turned:
Version 1
ggplot(data_molten, aes(x = no, y = value, fill = variable)) + geom_bar(stat = "identity") +
facet_grid(variable~.) + theme(legend.position="none")
Version 2
Thx bergant fot the comment
ggplot(data_molten, aes(x = variable, y = value, fill = factor(no))) + geom_bar(stat = "identity") +
facet_grid(no~.) + theme(legend.position="none")
Original
ggplot(data_molten, aes(x = no, y = value, fill = variable)) + geom_bar(stat = "identity")

Related

Replace x axis labels with colored bars to get a gradient effect in R

I'm building a barplot with RNA reads % in ggplot, I did this:
ggplot(tipos_exo,aes(x = reorder(sample, -value),y = value,fill = variable)) +
geom_bar( stat = "identity")
I need to replace the x axis labels with colored bars, each sample belongs to a specific batch and I looking for this effect:
Any thoughts?
One option to achieve your desired result would be to create your axis colorbar as a second plot and glue it to the main plot via the patchwork package.
For the colorbar I use geom_tile and remove all non-data ink using theme_void. As a first step I reorder your sample column by value and get rid of the duplicated sample categories using dplyr::distinct.
Using some fake random example data:
set.seed(123)
tipos_exo <- data.frame(
sample = rep(letters, each = 2),
variable = c("tablaq_readsPerc", "tablaq_shortReadsPerc"),
value = runif(52, 0, 100),
batch = rep(LETTERS, each = 2)
)
library(ggplot2)
library(patchwork)
library(dplyr, warn = FALSE)
p1 <- ggplot(tipos_exo,aes(x = reorder(sample, -value),y = value,fill = variable)) +
geom_bar( stat = "identity") +
scale_y_continuous(expand = c(0, 0)) +
labs(x = NULL) +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.ticks.length.x = unit(0, "pt"))
tipos_exo1 <- tipos_exo |>
mutate(sample = reorder(sample, -value)) |>
distinct(sample, batch)
p_axis <- ggplot(tipos_exo1, aes(x = sample, y = factor(1), fill = batch)) +
geom_tile(width = .9) +
geom_text(aes(label = sample)) +
theme_void() +
theme(axis.title.x = element_text()) +
labs(x = "Batch Annotation") +
guides(fill = "none")
p1 / p_axis + plot_layout(heights = c(8, 1))
UPDATE Adapting my answer on this post Reorder Bars of a Stacked Barchart in R you could reorder your sample column by a helper value "column", e.g. if you want to reorder by "tablaq_readsPerc" you set the values for the other categories to zero and use FUN=sum. Note that I also reversed the order of the stack so that the "tablaq_readsPerc" bars are placed at the bottom.
tipos_exo <- tipos_exo |>
mutate(sample1 = reorder(sample, -ifelse(!variable %in% "tablaq_readsPerc", 0, value), FUN = sum))
p1 <- ggplot(tipos_exo,aes(x = sample1, y = value, fill = variable)) +
geom_bar( stat = "identity", position = position_stack(reverse = TRUE)) +
scale_y_continuous(expand = c(0, 0)) +
labs(x = NULL) +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.ticks.length.x = unit(0, "pt"))
tipos_exo1 <- tipos_exo |>
distinct(sample, sample1, batch)
p_axis <- ggplot(tipos_exo1, aes(x = sample1, y = factor(1), fill = batch)) +
geom_tile(width = .9) +
geom_text(aes(label = sample)) +
theme_void() +
theme(axis.title.x = element_text()) +
labs(x = "Batch Annotation") +
guides(fill = "none")
p1 / p_axis + plot_layout(heights = c(8, 1))

R ggplot2 package: plotting Line (geom_line)over on Bar Graph (geom_bar) from separate data frames

I have a bar graph (created using ggplot2 package) coming from one set of yearly data (with multiple y values)and I want to overlay on it data from another set of yearly data in the form of a line. Here is my code:
library (zoo)
require(ggplot2)
library(reshape)
library(Cairo)
library(reshape2)
x<-c(2000,2001,2002,2003,2004)
y1<-c(41,62,71,316,172)
y2<-c(3018,2632,2643,2848,2738)
y3<-c(3065,2709,2721,3192,2925)
dat1 <- data.frame(Year=x, y1, y2)
dat.m1 <- melt(dat1, id.vars='Year')
a<-ggplot(dat.m1, aes(Year, value)) +
geom_bar(width=0.6,aes(fill = variable),stat = "identity")+
xlab("Year") + ylab("Water Depth (mm)")+
theme(legend.position="top")+
theme(panel.background = element_rect(fill = 'white', colour = 'black'))+
theme(axis.text=element_text(size=13),axis.title=element_text(size=14))+
theme(legend.text=element_text(size=14))+
theme(plot.margin=unit(c(0.2,0.7,0.5,0.2),"cm"))+
guides(fill = guide_legend(title="", title.position="top", direction="horizontal"))
a
At this stage, bar plot is running nicely but when I tried to add line plot from different data frame as follow:
dat2 <- data.frame(Year=x, y3)
dat.m2 <- melt(dat2, id.vars='Year')
b<-ggplot(dat.m1, aes(Year, value)) +
geom_bar(width=0.6,aes(fill = variable),stat = "identity")+
geom_line(dat.m2, aes(x = x, y = y3), size = 1.5, color="red") +
xlab("Year") + ylab("Water Depth (mm)")+
theme(legend.position="top")+
theme(panel.background = element_rect(fill = 'white', colour = 'black'))+
theme(axis.text=element_text(size=13),axis.title=element_text(size=14))+
theme(legend.text=element_text(size=14))+
theme(plot.margin=unit(c(0.2,0.7,0.5,0.2),"cm"))+
guides(fill = guide_legend(title="", title.position="top", direction="horizontal"))
b
It did not work and I received this error message:
"Error in validate_mapping():
! mapping must be created by aes()
Run rlang::last_error() to see where the error occurred."
Anyone can help me to fix this issue? Also, any suggestion to add a line plot with each bar in the first data frame?
You need to add argument name data in geom_line(). Otherwise dat.m2 is received as mapping to the geom_line function.
dat2 <- data.frame(Year = x, y3)
dat.m2 <- melt(dat2, id.vars = 'Year')
b <- ggplot(dat.m1, aes(Year, value)) +
geom_bar(width = 0.6, aes(fill = variable), stat = "identity") +
geom_line(data = dat.m2, aes(x = x, y = y3), size = 1.5, color = "red") + # adding data argument name
xlab("Year") + ylab("Water Depth (mm)") +
theme(legend.position = "top") +
theme(panel.background = element_rect(fill = 'white', colour = 'black')) +
theme(axis.text = element_text(size = 13),
axis.title = element_text(size = 14)) +
theme(legend.text = element_text(size = 14)) +
theme(plot.margin = unit(c(0.2, 0.7, 0.5, 0.2), "cm")) +
guides(fill = guide_legend(
title = "",
title.position = "top",
direction = "horizontal"
))
b

Add an additional legend according to the colors of x axis labels

I have modified the colors of my x axis labels according to their group.
For that, I have used the following pseudocode:
library(ggsci)
library(ggplot2)
x_cols = pal_jco()(length(unique(melted_df$Group)))
names(x_cols) = unique(melted_df$Group)
ggplot(melted_df, ... + theme(axis.text.x = element_text(colour = x_cols))
I would like to add a legend to the plot (if possible, outside the plot), that explains the colouring of the x axis labels.
melted_df dataframe looks like this:
Here the full code:
#Generate color mapping
x_cols = pal_jco()(length(unique(melted_df$Group)))
names(x_cols) = unique(melted_df$Group)
melted_df$mycolors = sapply(as.character(melted_df$Group), function(x) x_cols[x])
#Plot
ggplot(melted_df, aes(fill=variable, y=value, x=fct_inorder(id))) +
geom_bar(position="stack", stat = "identity") + ggtitle("Barplot") +
theme_bw() +
xlab("samples") + ylab("Counts") +
theme(axis.title.y=element_text(size=10), axis.title.x=element_text(size=10),
plot.title = element_text(face = "bold", size = (15), hjust = 0.5),
axis.text.x = element_text(distinct(samples_melt[c("id", "mycolors")])$mycolors)) +
guides(fill=guide_legend(title="Columns"))
In the absence of a reproducible example, here is how you might do it with the built-in iris data set:
library(ggplot2)
ggplot(iris, aes(Species, Sepal.Length)) +
stat_summary(fun = mean, geom = "col", aes(fill = Species)) +
geom_point(aes(color = Species), alpha = 0, key_glyph = draw_key_text) +
theme_bw(base_size = 20) +
labs(color = "") +
guides(color = guide_legend(override.aes = list(alpha = 1, size = 8))) +
theme(axis.text.x = element_text(color = scales::hue_pal()(3), face = 2))
I addressed the issue using Legend() constructor, provided by ComplexHeatmap library.
I first used the code provided above under the EDIT section, and then I added the following code in order to draw an additional legend explaining the x-axis colouring.
lgd = Legend(labels = names(x_cols), title = "Group", labels_gp = gpar(fontsize = 8), nrow = 1, legend_gp = gpar(fill = x_cols))
draw(lgd, x = unit(1.8, "cm"), y = unit(0.3, "cm"), just = c("left", "bottom"))

Creating ggplot geom_point() with position dodge 's-shape'

I am trying to create a plot like the one below. I'd like the order the points in each category in such a way that they form an s-shape. Is it possible to do this in ggplot?
Similar data available here
What I have so far:
somatic.variants <- read.delim("data/Lawrence.S2.txt", stringsAsFactors=T)
cancer_rates <- tapply(somatic.variants$logn_coding_mutations, somatic.variants$tumor_type, median)
cancer_rates <- cancer_rates[order(cancer_rates, decreasing=F)]
somatic.variants$tumor_type <- factor(somatic.variants$tumor_type, levels = names(cancer_rates))
library(ggplot2)
library(GGally)
ggplot(data = somatic.variants,
mapping = aes(x = tumor_type,
y = log10(n_coding_mutations))) +
geom_point(position = position_dodge2()) +
scale_x_discrete(position = "top") +
scale_y_continuous(labels = c(0,10,100,1000,10000), expand = c(0,0)) +
geom_stripped_cols() +
theme_bw() +
theme(axis.title.x = element_blank(),
axis.text.x = element_text(angle = 315, hjust = 1, size = 12),
panel.grid = element_blank()) +
labs(y = "Coding mutations count") +
stat_summary(fun = median,
geom="crossbar",
size = 0.25,
width = 0.9,
group = 1,
show.legend = FALSE,
color = "#FF0000")
This could be achieved by
grouping the data by x-axis categories
arranging by the y-axis value
which ensures that the points are plotted in ascending order of the values for each category.
somatic.variants <- read.delim("https://gist.githubusercontent.com/wudustan/57deecdaefa035c1ecabf930afde295a/raw/1594d51a1e3b52f674ff746caace3231fd31910a/Lawrence.S2.txt", stringsAsFactors=T)
cancer_rates <- tapply(somatic.variants$logn_coding_mutations, somatic.variants$tumor_type, median)
cancer_rates <- cancer_rates[order(cancer_rates, decreasing=F)]
somatic.variants$tumor_type <- factor(somatic.variants$tumor_type, levels = names(cancer_rates))
library(ggplot2)
library(GGally)
library(dplyr)
somatic.variants <- somatic.variants %>%
group_by(tumor_type) %>%
arrange(n_coding_mutations)
ggplot(data = somatic.variants,
mapping = aes(x = tumor_type,
y = log10(n_coding_mutations))) +
geom_point(position = position_dodge2(.9), size = .25) +
scale_x_discrete(position = "top") +
scale_y_continuous(labels = c(0,10,100,1000,10000), expand = c(0,0)) +
geom_stripped_cols() +
theme_bw() +
theme(axis.title.x = element_blank(),
axis.text.x = element_text(angle = 315, hjust = 1, size = 12),
panel.grid = element_blank()) +
labs(y = "Coding mutations count") +
stat_summary(fun = median,
geom="crossbar",
size = 0.25,
width = 0.9,
group = 1,
show.legend = FALSE,
color = "#FF0000")
#> Warning: Removed 29 rows containing non-finite values (stat_summary).

Bar graph: Combine one stacked bar with one dodged bar

I'm trying to recreate a bar graph found on page 4 of the following report:
The figure has three bars with the first two stacked and the third dodged next to it. I've seen iterations of this question but none that recreate the figure in this exact way.
Here is the data:
a <- rep(c('RHB', 'FERS', 'CSRS'), 3)
b <- c(rep('Assets', 3), rep('Amount Past Due', 3),
rep('Actuarial Liability', 3))
c <- c(45.0, 122.5, 152.3, 47.2, 3.4, 4.8, 114.4, 143.4, 181.3)
df <- data.frame(a,b,c)
names(df) <- c('Fund', 'Condition', 'Value')
And what I've managed so far:
p <- ggplot(subset_data, aes(fill=Condition, y=Value, x=Fund)) +
geom_bar(position="stack", stat="identity") +
coord_flip()
I'm not partial to ggplot so if there's another tool that works better I'm ok using another package.
Taking some ideas from the link #aosmith posted.
You can call geom_bar twice, once with Assets and Amounts Past Due stacked, and again with just Actuarial Liability.
You can use width to make the bars thinner, then nudge one set of bars so the two geom_bar calls are not overlapping. I chose to make the width 0.3 and nudge by 0.3 so the edges just line up. If you nudge by more you will see a gap between the two bars.
Edit: add some more formatting and numeric labels
library(tidyverse)
library(scales)
df_al <- filter(df, Condition == 'Actuarial Liability')
df_xal <- filter(df, Condition != 'Actuarial Liability')
bar_width <- 0.3
hjust_lab <- 1.1
hjust_lab_small <- -0.2 # hjust for labels on small bars
ggplot() +
theme_classic() +
geom_bar(data = df_al,
aes(fill=Condition, y=Value, x=Fund),
position = position_nudge(x = -bar_width),
width = bar_width,
stat="identity") +
geom_bar(data = df_xal,
aes(fill=Condition, y=Value, x=Fund),
position="stack",
stat="identity",
width = bar_width) +
geom_text(data = df_al,
aes(label= dollar(Value, drop0trailing = TRUE), y=Value, x=Fund),
position = position_nudge(x = -bar_width),
hjust = hjust_lab) +
geom_text(data = df_xal,
aes(label= dollar(Value, drop0trailing = TRUE), y=Value, x=Fund),
position="stack",
hjust = ifelse(df_xal$Value < 5, hjust_lab_small, hjust_lab)) +
scale_fill_manual(values = c('firebrick3', 'lightsalmon', 'dodgerblue')) +
scale_y_continuous(breaks = seq(0,180, by = 20), labels = dollar) +
coord_flip() +
labs(x = NULL, y = NULL, fill = NULL) +
theme(legend.position = "bottom")
I think I would use the "sneaky facet" method, after adding a dummy variable to dodge the columns and making Fund a factor with the correct order:
df$not_liability <-df$Condition != "Actuarial Liability"
df$Fund <- factor(df$Fund, levels = c('RHB', 'FERS', 'CSRS'))
Most of the plotting code is then an attempt to copy the look of the supplied plot:
ggplot(df, aes(fill=Condition, y=Value, x=not_liability)) +
geom_bar(position = "stack", stat = "identity") +
scale_x_discrete(expand = c(0.5, 0.5)) +
scale_y_continuous(breaks = 0:10 * 20, labels = scales::dollar) +
coord_flip() +
facet_grid(Fund~., switch = "y") +
scale_fill_manual(values = c("#c00000", "#f7c290", "#0071bf"), name = "") +
theme_classic() +
theme(panel.spacing = unit(0, "points"),
strip.background = element_blank(),
axis.text.y = element_blank(),
axis.ticks.length.y = unit(0, "points"),
axis.title = element_blank(),
strip.placement = "outside",
strip.text = element_text(),
legend.position = "bottom",
panel.grid.major.x = element_line())

Resources