Adjusting y axis origin for stacked geom_bar in ggplot2 - r

I want to plot stacked bar plot with ggplot2 and am having difficulty getting the colour mapping and stack order right. The following example has been developed from this SO answer to achieve a non-zero y-axis origin, but as you can see it creates other problems. The colours don't map properly and the plot order is wrong. Grateful for any pointers on the best way to handle this. The desired output should scale colours as per the factor levels for rating, with colours in the order specified.
require(ggplot2)
d = data.frame(grp = rep(c('A','B'), each = 4),
rating = rep(c('bad','poor','ok','good'), 2),
value = c(15,45,35,5,5,15,55,30), stringsAsFactors = F)
if(require(reshape2)) reshape2::dcast(d, grp ~ rating) # show structure
d$rating = ordered(d$rating, levels=c('bad','poor','ok','good'))
d$grp = ordered(d$grp, levels=c('B','A'))
# split datsets so we can plot 'negative' bars
d1 = subset(d, rating %in% c('ok','good'))
d2 = subset(d, rating %in% c('poor','bad'))
ggplot() +
geom_bar(data = d1, aes(x=grp, y=value, fill=rating), stat='identity', position='stack') +
geom_bar(data = d2, aes(x=grp, y=-value, fill=rating), stat='identity', position='stack') +
scale_fill_manual(values=c('red','pink','lightgreen','green')) +
geom_line(data=d1, aes(x=c(.5,2.5), y=c(0,0)), size=2, linetype='dashed') +
coord_flip()

Perhaps a bit of reordering and using limits() will help:
d2 <- d2[order(d2$rating, decreasing =T),]
ggplot() +
geom_bar(data = d1, aes(x=grp, y=value, fill=rating), stat='identity',
position='stack') +
geom_bar(data = d2, aes(x=grp, y=-value, fill=rating), stat='identity',
position='stack') +
scale_fill_manual(values=c('red','pink','lightgreen','green'),
limits=c("bad","poor","ok","good"))+
geom_line(data=d1, aes(x=c(.5,2.5), y=c(0,0)), size=2, linetype='dashed') +
coord_flip()
For anyone who wishes to learn ggplot2, I strongly recommend getting the Winston Chang's R Graphics Cookbook.

Related

How to highlight a column in ggplot2

I have the following graph and I want to highlight the columns (both) for watermelons as it has the highest juice_content and weight. I know how to change the color of the columns but I would like to WHOLE columns to be highlighted. Any idea on how to achieve this? There doesn't seems to be any similar online.
fruits <- c("apple","orange","watermelons")
juice_content <- c(10,1,1000)
weight <- c(5,2,2000)
df <- data.frame(fruits,juice_content,weight)
df <- gather(df,compare,measure,juice_content:weight, factor_key=TRUE)
plot <- ggplot(df, aes(fruits,measure, fill=compare)) + geom_bar(stat="identity", position=position_dodge()) + scale_y_log10()
An option is to use gghighlight
library(gghighlight)
ggplot(df, aes(fruits,measure, fill = compare)) +
geom_col(position = position_dodge()) +
scale_y_log10() +
gghighlight(fruits == "watermelons")
In response to your comment, how about working with different alpha values
ggplot(df, aes(fruits,measure)) +
geom_col(data = . %>% filter(fruits == "watermelons"),
mapping = aes(fill = compare),
position = position_dodge()) +
geom_col(data = . %>% filter(fruits != "watermelons"),
mapping = aes(fill = compare),
alpha = 0.2,
position = position_dodge()) +
scale_y_log10()
Or you can achieve the same with one geom_col and a conditional alpha (thanks #Tjebo)
ggplot(df, aes(fruits, measure)) +
geom_col(
mapping = aes(fill = compare, alpha = fruits == 'watermelons'),
position = position_dodge()) +
scale_alpha_manual(values = c(0.2, 1)) +
scale_y_log10()
You could use geom_area to highlight behind the bars. You have to force the x scale to discrete first which is why I've used geom_blank (see this answer geom_ribbon overlay when x-axis is discrete) noting that geom_ribbon and geom_area are effectively the same except geom_area always has 0 as ymin
#minor edit so that the level isn't hard coded
watermelon_level <- which(levels(df$fruits) == "watermelons")
AreaDF <- data.frame(fruits = c(watermelon_level-0.5,watermelon_level+0.5))
plot <- ggplot(df, aes(fruits)) +
geom_blank(aes(y=measure, fill=compare))+
geom_area(data = AreaDF, aes( y = max(df$measure)), fill= "yellow")+
geom_bar(aes(y=measure, fill=compare),stat="identity", position=position_dodge()) + scale_y_log10()
Edit to address comment
If you want to highlight multiple fruits then you could do something like this. You need a data.frame with where you want the geom_area x and y, including dropping it to 0 between. I'm sure there's slightly tidier methods of getting the data.frame but this one works
highlight_level <- which(levels(df$fruits) %in% c("apple", "watermelons"))
AreaDF <- data.frame(fruits = unlist(lapply(highlight_level, function(x) c(x -0.51,x -0.5,x+0.5,x+0.51))),
yval = rep(c(1,max(df$measure),max(df$measure),1), length(highlight_level)))
AreaDF <- AreaDF %>% mutate(
yval = ifelse(floor(fruits) %in% highlight_level & ceiling(fruits) %in% highlight_level, max(df$measure), yval)) %>%
arrange(fruits) %>% distinct()
plot <- ggplot(df, aes(fruits)) +
geom_blank(aes(y=measure, fill=compare))+
geom_area(data = AreaDF, aes(y = yval ), fill= "yellow")+
geom_bar(aes(y=measure, fill=compare),stat="identity", position=position_dodge()) + scale_y_log10()
plot

ggplot reorder factors in plot without affecting legend order [duplicate]

I have produced a stacked percent barplot from the following data, which is in a csv file,
,ONE,TWO,THREE
1,2432,420,18
2,276,405,56
3,119,189,110
4,90,163,140
5,206,280,200
6,1389,1080,1075
7,3983,3258,4878
8,7123,15828,28111
9,8608,48721,52576
10,9639,44725,55951
11,8323,45695,32166
12,2496,18254,26600
13,1524,8591,18583
14,7861,1857,1680
15,10269,5165,4618
16,13560,64636,63262
using the following code
library(ggplot2)
library(reshape2)
library(scales)
data <- read.csv(file="file.csv",sep=",",header=TRUE)
data <- data[,2:ncol(data)]
datam <- melt(cbind(data,ind = sort(rownames(data))),is.var = c('ind'))
datam$ind <- as.numeric(datam$ind)
ggplot(datam,aes(x = variable, y = value,fill = factor(as.numeric(ind)))) +
geom_bar(position = "fill") + scale_y_continuous(labels =percent_format()) +
scale_fill_discrete("Barcode\nMatch") +xlab("Barcode")+ylab("Reads")
The result is
The problem is that the items in the legend are not in the same order as the stacks they represent. The colours and the numbers are right but the order is not. In other words, is there a way to invert the order of the items in the legend? Thanks
you can use a new option reverse = TRUE:
ggplot(datam,aes(x = variable, y = value,fill = factor(as.numeric(ind)))) +
geom_bar(position = "fill") + scale_y_continuous(labels =percent_format()) +
scale_fill_discrete("Barcode\nMatch") + xlab("Barcode")+ylab("Reads") +
guides(fill = guide_legend(reverse = TRUE))
Add + scale_fill_hue(breaks=c("new order 1","new order 2","new order...")) as in:
library(ggplot2)
ggplot(data=PlantGrowth, aes(x=group, fill=group)) + geom_bar() +
geom_bar(colour="black", legend=FALSE) +
scale_fill_hue(breaks=c("trt1","ctrl","trt2"))
I'd also check out http://wiki.stdout.org/rcookbook/Graphs/Legends%20(ggplot2)/ for more.
This may have changed and become easier with he new ggplot but I'm not sure.

Keeping unit of measure in facet_wrap while scales="free_y"? [duplicate]

This question already has an answer here:
Setting individual y axis limits with facet wrap NOT with scales free_y
(1 answer)
Closed 4 years ago.
I'm trying to create a facet_wrap() where the unit of measure remains identical across the different plots, while allowing to slide across the y axis.
To clearify with I mean, I have created a dataset df:
library(tidyverse)
df <- tibble(
Year = c(2010,2011,2012,2010,2011,2012),
Category=c("A","A","A","B","B","B"),
Value=c(1.50, 1.70, 1.60, 4.50, 4.60, 4.55)
)
with df, we can create the following plot using facet_wrap:
ggplot(data = df, aes(x=Year, y=Value)) + geom_line() + facet_wrap(.~ Category)
Plot 1
To clarify the differences between both plots, one can use scale = "free_y":
ggplot(data = df, aes(x=Year, y=Value)) + geom_line()
+ facet_wrap(.~ Category, scale="free_y")
Plot 2
Although it's more clear, the scale on the y-axis in plot A isequal to 0.025, while being 0.0125 in B. This could be misleading to someone who's comparing A & B next to each other.
So my question right now is to know whether there exist an elegant way of plotting something like the graph below (with y-scale = 0.025) without having to plot two seperate plots into a grid?
Thanks
Desired result:
Code for the grid:
# Grid
## Plot A
df_A <- df %>%
filter(Category == "A")
plot_A <- ggplot(data = df_A, aes(x=Year, y=Value)) + geom_line() + coord_cartesian(ylim = c(1.5,1.7)) + ggtitle("A")
## Plot B
df_B <- df %>%
filter(Category == "B")
plot_B <- ggplot(data = df_B, aes(x=Year, y=Value)) + geom_line() + coord_cartesian(ylim = c(4.4,4.6)) + ggtitle("B")
grid.arrange(plot_A, plot_B, nrow=1)
Based on the info at Setting individual y axis limits with facet wrap NOT with scales free_y you can you use geom_blank() and manually specified y-limits by Category:
# df from above code
df2 <- tibble(
Category = c("A", "B"),
y_min = c(1.5, 4.4),
y_max = c(1.7, 4.6)
)
df <- full_join(df, df2, by = "Category")
ggplot(data = df, aes(x=Year, y=Value)) + geom_line() +
facet_wrap(.~ Category, scales = "free_y") +
geom_blank(aes(y = y_min)) +
geom_blank(aes(y = y_max))

How to add different lines for facets

I have data where I look at the difference in growth between a monoculture and a mixed culture for two different species. Additionally, I made a graph to make my data clear.
I want a barplot with error bars, the whole dataset is of course bigger, but for this graph this is the data.frame with the means for the barplot.
plant species means
Mixed culture Elytrigia 0.886625
Monoculture Elytrigia 1.022667
Monoculture Festuca 0.314375
Mixed culture Festuca 0.078125
With this data I made a graph in ggplot2, where plant is on the x-axis and means on the y-axis, and I used a facet to divide the species.
This is my code:
limits <- aes(ymax = meansS$means + eS$se, ymin=meansS$means - eS$se)
dodge <- position_dodge(width=0.9)
myplot <- ggplot(data=meansS, aes(x=plant, y=means, fill=plant)) + facet_grid(. ~ species)
myplot <- myplot + geom_bar(position=dodge) + geom_errorbar(limits, position=dodge, width=0.25)
myplot <- myplot + scale_fill_manual(values=c("#6495ED","#FF7F50"))
myplot <- myplot + labs(x = "Plant treatment", y = "Shoot biomass (gr)")
myplot <- myplot + opts(title="Plant competition")
myplot <- myplot + opts(legend.position = "none")
myplot <- myplot + opts(panel.grid.minor=theme_blank(), panel.grid.major=theme_blank())
So far it is fine. However, I want to add two different horizontal lines in the two facets. For that, I used this code:
hline.data <- data.frame(z = c(0.511,0.157), species = c("Elytrigia","Festuca"))
myplot <- myplot + geom_hline(aes(yintercept = z), hline.data)
However if I do that, I get a plot were there are two extra facets, where the two horizontal lines are plotted. Instead, I want the horizontal lines to be plotted in the facets with the bars, not to make two new facets. Anyone a idea how to solve this.
I think it makes it clearer if I put the graph I create now:
Make sure that the variable species is identical in both datasets. If it a factor in one on them, then it must be a factor in the other too
library(ggplot2)
dummy1 <- expand.grid(X = factor(c("A", "B")), Y = rnorm(10))
dummy1$D <- rnorm(nrow(dummy1))
dummy2 <- data.frame(X = c("A", "B"), Z = c(1, 0))
ggplot(dummy1, aes(x = D, y = Y)) + geom_point() + facet_grid(~X) +
geom_hline(data = dummy2, aes(yintercept = Z))
dummy2$X <- factor(dummy2$X)
ggplot(dummy1, aes(x = D, y = Y)) + geom_point() + facet_grid(~X) +
geom_hline(data = dummy2, aes(yintercept = Z))

Sort legend in ggplot2

I have produced a stacked percent barplot from the following data, which is in a csv file,
,ONE,TWO,THREE
1,2432,420,18
2,276,405,56
3,119,189,110
4,90,163,140
5,206,280,200
6,1389,1080,1075
7,3983,3258,4878
8,7123,15828,28111
9,8608,48721,52576
10,9639,44725,55951
11,8323,45695,32166
12,2496,18254,26600
13,1524,8591,18583
14,7861,1857,1680
15,10269,5165,4618
16,13560,64636,63262
using the following code
library(ggplot2)
library(reshape2)
library(scales)
data <- read.csv(file="file.csv",sep=",",header=TRUE)
data <- data[,2:ncol(data)]
datam <- melt(cbind(data,ind = sort(rownames(data))),is.var = c('ind'))
datam$ind <- as.numeric(datam$ind)
ggplot(datam,aes(x = variable, y = value,fill = factor(as.numeric(ind)))) +
geom_bar(position = "fill") + scale_y_continuous(labels =percent_format()) +
scale_fill_discrete("Barcode\nMatch") +xlab("Barcode")+ylab("Reads")
The result is
The problem is that the items in the legend are not in the same order as the stacks they represent. The colours and the numbers are right but the order is not. In other words, is there a way to invert the order of the items in the legend? Thanks
you can use a new option reverse = TRUE:
ggplot(datam,aes(x = variable, y = value,fill = factor(as.numeric(ind)))) +
geom_bar(position = "fill") + scale_y_continuous(labels =percent_format()) +
scale_fill_discrete("Barcode\nMatch") + xlab("Barcode")+ylab("Reads") +
guides(fill = guide_legend(reverse = TRUE))
Add + scale_fill_hue(breaks=c("new order 1","new order 2","new order...")) as in:
library(ggplot2)
ggplot(data=PlantGrowth, aes(x=group, fill=group)) + geom_bar() +
geom_bar(colour="black", legend=FALSE) +
scale_fill_hue(breaks=c("trt1","ctrl","trt2"))
I'd also check out http://wiki.stdout.org/rcookbook/Graphs/Legends%20(ggplot2)/ for more.
This may have changed and become easier with he new ggplot but I'm not sure.

Resources