bar z-index for ggplot2 geom_bar - r

I have created the following graphic ggplot
with the following code;
p <- ggplot(df, aes(x=100*prop_select, y=(1500-s_meanResponse), fill=product)) +
geom_bar(stat='identity', width=2, alpha=.6, color='#333333') +
coord_cartesian(xlim = c(0, 5)) +
coord_flip()
print(p)
I am attempting to intentionally overlap the bars. I would like to know how to change the z-index (depth) of each of the bars. I attempted to do this by simply reordering the levels of the factor column that determines my bars
# Order by mean response to make sure that bars are layered correctly (does not work!)
# Switching this minus makes no difference to the bar z-index but does reverse legend entries
df <- df[sort.list(-df$s_meanResponse),]
df$product <- factor(df$product, levels=df$product)
Anybody know if this is possible with ggplot2?
EDIT:
dataframe is structured similar to below
df <- data.frame( product=c('a','b','c'), s_meanResponse=c(1120,1421,1320), prop_select=c(.3,.2,.5))

I'm using the following df with actual overlapping:
df <- data.frame(product=c('a','b','c'),
s_meanResponse=c(1120,1421,1320),
prop_select=c(.311,.32,.329))
It seems that the plotting order remains the same regardless of the factor levels ordering, instead it just plots bars from lowest to highest y-value. To achieve custom ordering, we'll have to do the following, plotting layers explicitly one by one in the desired order:
geom_bars_with_order <- function(vals)
{
l <- list()
for (i in vals)
{
l <- c(l, geom_bar(data = df[df$product == i, ],
stat='identity', width=2, alpha=.6, color='#333333'))
}
l
}
# default order
ggplot(NULL, aes(x=100*prop_select, y=(1500-s_meanResponse), fill=product)) +
geom_bars_with_order(c("a", "b", "c")) +
coord_cartesian(xlim = c(0, 5)) +
coord_flip()
# custom order, "a" on top
ggplot(NULL, aes(x=100*prop_select, y=(1500-s_meanResponse), fill=product)) +
geom_bars_with_order(c("b", "c", "a")) +
coord_cartesian(xlim = c(0, 5)) +
coord_flip()

Related

R ggplot2: place value of column on top of stacked bars

There might be a duplicate, but I do not find an answer that applies to my particular case...
I just have a very simple data frame like the one below, with counts in two columns (Number_NonHit_Cells, Number_Hit_Cells) that I want to show in stacked bars, having the value of another column (Freq) placed on top of the stacked bars.
The MWE below is the best I have been able to get so far, but I only need the value of Freq once, and at the very top of the bars combined...
It would even be better if Freq could be calculated inside the ggplot2 call.
This is my MWE:
clono_df_long <- data.frame(Clonotype=LETTERS[1:5], Number_Hit_Cells=c(234,56,568,34,46),
Number_NonHit_Cells=c(c(52,12,234,21,31)))
clono_df_long$Clonotype_Size <- clono_df_long$Number_Hit_Cells+clono_df_long$Number_NonHit_Cells
clono_df_long$Freq <- round(clono_df_long$Number_Hit_Cells/clono_df_long$Clonotype_Size,4)*100
clono_df_long <- as.data.frame(tidyr::pivot_longer(clono_df_long,
-c(Clonotype,Clonotype_Size,Freq),
names_to = "Cells", values_to = "Value"))
clono_df_long$Clonotype <- factor(clono_df_long$Clonotype, levels=unique(clono_df_long$Clonotype))
clono_df_long$Cells <- factor(clono_df_long$Cells, levels=c('Number_NonHit_Cells','Number_Hit_Cells'))
P <- ggplot2::ggplot(clono_df_long, ggplot2::aes(x=Clonotype, y=Value, fill=Cells)) +
ggplot2::geom_bar(stat="identity") +
ggplot2::scale_fill_manual(values=c('gray70', 'gray40')) +
ggplot2::geom_text(ggplot2::aes(label=paste0(Freq,'%')), vjust=-1) +
ggplot2::theme_light()
grDevices::pdf(file='test.pdf', height=6, width=6)
print(P)
grDevices::dev.off()
Which produces this:
You may try
clono_df_long$Freq <- ifelse(clono_df_long$Cells == "Number_NonHit_Cells", clono_df_long$Freq, NA)
ggplot2::ggplot(clono_df_long, ggplot2::aes(x=Clonotype, y=Value, fill=Cells)) +
ggplot2::geom_bar(stat="identity") +
ggplot2::scale_fill_manual(values=c('gray70', 'gray40')) +
#ggplot2::geom_text(ggplot2::aes(label=paste0(Freq,'%')), vjust=-1) +
ggplot2::theme_light() +
ggplot2::geom_text(aes(label = scales::percent(Freq/100) ),position = "stack")

Add labels above top axis in ggplot2 graph while keeping original x axis on bottom

I'm trying to add some labels to a ggplot2 boxplot to indicate the number of observations, and I'd like that annotation to appear above the top axis of the graph. I can add them inside the graph pretty easily, and I suspect there's an application of ggplot_gtable that might do this, but I don't understand how to use that (a point in the direction of a good tutorial would be much appreciated). Here's some example data with labels:
Count <- sample(100:500, 3)
MyData <- data.frame(Category = c(rep("A", Count[1]), rep("B", Count[2]),
rep("C", Count[3])),
Value = c(rnorm(Count[1], 10),
rnorm(Count[2], 20),
rnorm(Count[3], 30)))
MyCounts <- data.frame(Category = c("A", "B", "C"),
Count = Count)
MyCounts$Label <- paste("n =", MyCounts$Count)
ggplot(MyData, aes(x = Category, y = Value)) +
geom_boxplot() +
annotate("text", x = MyCounts$Category, y = 35,
label = MyCounts$Label)
What I'd love is for the "n = 441" and other labels to appear above the graph rather than just inside the upper boundary. Any suggestions?
Rather than separately calculating the counts, you can add the counts with geom_text and the original data frame (MyData). The key is that we need to add stat="count" inside geom_text so that counts will be calculated and can be used as the text labels.
theme_set(theme_classic())
ggplot(MyData, aes(x = Category, y = Value)) +
geom_boxplot() +
geom_text(stat="count", aes(label=paste0("n=",..count..)), y=1.05*max(MyData$Value)) +
expand_limits(y=1.05*max(MyData$Value))
To put the labels above the plot, add some space above the plot area for the text labels and then use the code in the answer linked by #aosmith to override clipping:
library(grid)
theme_set(theme_bw())
p = ggplot(MyData, aes(x = Category, y = Value)) +
geom_boxplot() +
geom_text(stat="count", aes(label=paste0("n=",..count..)),
y=1.06*max(MyData$Value), size=5) +
theme(plot.margin=margin(t=20))
# Override clipping
gt <- ggplot_gtable(ggplot_build(p))
gt$layout$clip[gt$layout$name == "panel"] <- "off"
grid.draw(gt)

Violin plots with additional points

Suppose I make a violin plot, with say 10 violins, using the following code:
library(ggplot2)
library(reshape2)
df <- melt(data.frame(matrix(rnorm(500),ncol=10)))
p <- ggplot(df, aes(x = variable, y = value)) +
geom_violin()
p
I can add a dot representing the mean of each variable as follows:
p + stat_summary(fun.y=mean, geom="point", size=2, color="red")
How can I do something similar but for arbitrary points?
For example, if I generate 10 new points, one drawn from each distribution, how could I plot those as dots on the violins?
You can give any function to stat_summary provided it just returns a single value. So one can use the function sample. Put extra arguments such as size, in the fun.args
p + stat_summary(fun.y = "sample", geom = "point", fun.args = list(size = 1))
Assuming your points are qualified using the same group names (i.e., variable), you should be able to define them manually with:
newdf <- group_by(df, variable) %>% sample_n(10)
p + geom_point(data=newdf)
The points can be anything, including static numbers:
newdf <- data.frame(variable = unique(df$variable), value = seq(-2, 2, len=10))
p + geom_point(data=newdf)
I had a similar problem. Code below exemplifies the toy problem - How does one add arbitrary points to a violin plot? - and solution.
## Visualize data set that comes in base R
head(ToothGrowth)
## Make a violin plot with dose variable on x-axis, len variable on y-axis
# Convert dose variable to factor - Important!
ToothGrowth$dose <- as.factor(ToothGrowth$dose)
# Plot
p <- ggplot(ToothGrowth, aes(x=dose, y=len)) +
geom_violin(trim = FALSE) +
geom_boxplot(width=0.1)
# Suppose you want to add 3 blue points
# [0.5, 10], [1,20], [2, 30] to the plot.
# Make a new data frame with these points
# and add them to the plot with geom_point().
TrueVals <- ToothGrowth[1:3,]
TrueVals$len <- c(10,20,30)
# Make dose variable a factor - Important for positioning points correctly!
TrueVals$dose <- as.factor(c(0.5, 1, 2))
# Plot with 3 added blue points
p <- ggplot(ToothGrowth, aes(x=dose, y=len)) +
geom_violin(trim = FALSE) +
geom_boxplot(width=0.1) +
geom_point(data = TrueVals, color = "blue")

Stacked bubble chart, "bottom aligned"

New to programming and first time post.
I'm trying to create a stacked bubble chart to display how a population breaks down into it's proportions. My aim is to write this as a function so that I can use it repeatedly easily, but I need to get the meat of the code sorted before turning it to a function.
This is the type of plot I would like:
This is the code I've tried so far:
library(ggplot2)
# some data
observations = c(850, 500, 200, 50)
plot_data = data.frame(
"x" = rep.int(1,length(observations))
,"y" = rep.int(1,length(observations))
, "size" = rep.int(1,length(observations))
,"colour" = c(1:length(observations))
)
# convert to percentage for relative sizes
for (i in 1:length(observations))
{
plot_data$size[i] = (observations[i]/max(observations))*100
}
ggplot(plot_data,aes(x = x, y = y)) +
geom_point(aes(size = size, color = colour)) +
scale_size_identity() +
scale_y_continuous (limits = c(0.5, 1.5)) +
theme(legend.position = "none")
This produces a bullseye type image.
My approach has been to try and work out how the circle radii are calculated, and then update the y value in the for loop for each entry such that all the circles touch at the base - this is where I have been failing.
So my question:
How can I work out what the y coordinates for each circle needs to be?
Thank you for any help and hints.
I think this simplifies the answer that Henrick found:
circle <- function(center, radius, group) {
th <- seq(0, 2*pi, len=200)
data.frame(group=group,
x=center[1] + radius*cos(th),
y=center[2] + radius*sin(th))
}
# Create a named vector for your values
obs <- c(Org1=500, Org2=850, Org3=50, Org4=200)
# this reverse sorts them (so the stacked layered circles work)
# and makes it a list
obs <- as.list(rev(sort(obs)))
# need the radii
rads <- lapply(obs, "/", 2)
# need the max
x <- max(sapply(rads, "["))
# build a data frame of created circles
do.call(rbind.data.frame, lapply(1:length(rads), function(i) {
circle(c(x, rads[[i]]), rads[[i]], names(rads[i]))
})) -> dat
# make the plot
gg <- ggplot(dat)
gg <- gg + geom_polygon(aes(x=x, y=y, group=group, fill=group),
color="black")
gg <- gg + coord_equal()
gg <- gg + ggthemes::theme_map()
gg <- gg + theme(legend.position="right")
gg
You can tweak the guides/colors with standard ggplot functions.

Align multiple ggplot graphs with and without legends [duplicate]

This question already has answers here:
Align multiple plots in ggplot2 when some have legends and others don't
(6 answers)
Closed 5 years ago.
I'm trying to use ggplot to draw a graph comparing the absolute values of two variables, and also show the ratio between them. Since the ratio is unitless and the values are not, I can't show them on the same y-axis, so I'd like to stack vertically as two separate graphs with aligned x-axes.
Here's what I've got so far:
library(ggplot2)
library(dplyr)
library(gridExtra)
# Prepare some sample data.
results <- data.frame(index=(1:20))
results$control <- 50 * results$index
results$value <- results$index * 50 + 2.5*results$index^2 - results$index^3 / 8
results$ratio <- results$value / results$control
# Plot absolute values
plot_values <- ggplot(results, aes(x=index)) +
geom_point(aes(y=value, color="value")) +
geom_point(aes(y=control, color="control"))
# Plot ratios between values
plot_ratios <- ggplot(results, aes(x=index, y=ratio)) +
geom_point()
# Arrange the two plots above each other
grid.arrange(plot_values, plot_ratios, ncol=1, nrow=2)
The big problem is that the legend on the right of the first plot makes it a different size. A minor problem is that I'd rather not show the x-axis name and tick marks on the top plot, to avoid clutter and make it clear that they share the same axis.
I've looked at this question and its answers:
Align plot areas in ggplot
Unfortunately, neither answer there works well for me. Faceting doesn't seem a good fit, since I want to have completely different y scales for my two graphs. Manipulating the dimensions returned by ggplot_gtable seems more promising, but I don't know how to get around the fact that the two graphs have a different number of cells. Naively copying that code doesn't seem to change the resulting graph dimensions for my case.
Here's another similar question:
The perils of aligning plots in ggplot
The question itself seems to suggest a good option, but rbind.gtable complains if the tables have different numbers of columns, which is the case here due to the legend. Perhaps there's a way to slot in an extra empty column in the second table? Or a way to suppress the legend in the first graph and then re-add it to the combined graph?
Here's a solution that doesn't require explicit use of grid graphics. It uses facets, and hides the legend entry for "ratio" (using a technique from https://stackoverflow.com/a/21802022).
library(reshape2)
results_long <- melt(results, id.vars="index")
results_long$facet <- ifelse(results_long$variable=="ratio", "ratio", "values")
results_long$facet <- factor(results_long$facet, levels=c("values", "ratio"))
ggplot(results_long, aes(x=index, y=value, colour=variable)) +
geom_point() +
facet_grid(facet ~ ., scales="free_y") +
scale_colour_manual(breaks=c("control","value"),
values=c("#1B9E77", "#D95F02", "#7570B3")) +
theme(legend.justification=c(0,1), legend.position=c(0,1)) +
guides(colour=guide_legend(title=NULL)) +
theme(axis.title.y = element_blank())
Try this:
library(ggplot2)
library(gtable)
library(gridExtra)
AlignPlots <- function(...) {
LegendWidth <- function(x) x$grobs[[8]]$grobs[[1]]$widths[[4]]
plots.grobs <- lapply(list(...), ggplotGrob)
max.widths <- do.call(unit.pmax, lapply(plots.grobs, "[[", "widths"))
plots.grobs.eq.widths <- lapply(plots.grobs, function(x) {
x$widths <- max.widths
x
})
legends.widths <- lapply(plots.grobs, LegendWidth)
max.legends.width <- do.call(max, legends.widths)
plots.grobs.eq.widths.aligned <- lapply(plots.grobs.eq.widths, function(x) {
if (is.gtable(x$grobs[[8]])) {
x$grobs[[8]] <- gtable_add_cols(x$grobs[[8]],
unit(abs(diff(c(LegendWidth(x),
max.legends.width))),
"mm"))
}
x
})
plots.grobs.eq.widths.aligned
}
df <- data.frame(x = c(1:5, 1:5),
y = c(1:5, seq.int(5,1)),
type = factor(c(rep_len("t1", 5), rep_len("t2", 5))))
p1.1 <- ggplot(diamonds, aes(clarity, fill = cut)) + geom_bar()
p1.2 <- ggplot(df, aes(x = x, y = y, colour = type)) + geom_line()
plots1 <- AlignPlots(p1.1, p1.2)
do.call(grid.arrange, plots1)
p2.1 <- ggplot(diamonds, aes(clarity, fill = cut)) + geom_bar()
p2.2 <- ggplot(df, aes(x = x, y = y)) + geom_line()
plots2 <- AlignPlots(p2.1, p2.2)
do.call(grid.arrange, plots2)
Produces this:
// Based on multiple baptiste's answers
Encouraged by baptiste's comment, here's what I did in the end:
library(ggplot2)
library(dplyr)
library(gridExtra)
# Prepare some sample data.
results <- data.frame(index=(1:20))
results$control <- 50 * results$index
results$value <- results$index * 50 + 2.5*results$index^2 - results$index^3 / 8
results$ratio <- results$value / results$control
# Plot ratios between values
plot_ratios <- ggplot(results, aes(x=index, y=ratio)) +
geom_point()
# Plot absolute values
remove_x_axis =
theme(
axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
axis.title.x = element_blank())
plot_values <- ggplot(results, aes(x=index)) +
geom_point(aes(y=value, color="value")) +
geom_point(aes(y=control, color="control")) +
remove_x_axis
# Arrange the two plots above each other
grob_ratios <- ggplotGrob(plot_ratios)
grob_values <- ggplotGrob(plot_values)
legend_column <- 5
legend_width <- grob_values$widths[legend_column]
grob_ratios <- gtable_add_cols(grob_ratios, legend_width, legend_column-1)
grob_combined <- gtable:::rbind_gtable(grob_values, grob_ratios, "first")
grob_combined <- gtable_add_rows(
grob_combined,unit(-1.2,"cm"), pos=nrow(grob_values))
grid.draw(grob_combined)
(I later realised I didn't even need to extract the legend width, since the size="first" argument to rbind tells it just to have that one override the other.)
It feels a bit messy, but it is exactly the layout I was hoping for.
An alternative & quite easy solution is as follows:
# loading needed packages
library(ggplot2)
library(dplyr)
library(tidyr)
# Prepare some sample data
results <- data.frame(index=(1:20))
results$control <- 50 * results$index
results$value <- results$index * 50 + 2.5*results$index^2 - results$index^3 / 8
results$ratio <- results$value / results$control
# reshape into long format
long <- results %>%
gather(variable, value, -index) %>%
mutate(facet = ifelse(variable=="ratio", "ratio", "values"))
long$facet <- factor(long$facet, levels=c("values", "ratio"))
# create the plot & remove facet labels with theme() elements
ggplot(long, aes(x=index, y=value, colour=variable)) +
geom_point() +
facet_grid(facet ~ ., scales="free_y") +
scale_colour_manual(breaks=c("control","value"), values=c("green", "red", "blue")) +
theme(axis.title.y=element_blank(), strip.text=element_blank(), strip.background=element_blank())
which gives:

Resources