ggplot bar plot - split fill legend following x-axis factor - r

Is it possible to split the fill legend of a ggplot barplot following the values on the x-axis of the plot?
For example using this data:
library(ggplot2)
data <- data.frame(val=c(2,4,5,6,7,8,9),var1=c("A","A","A","B","B","C","C"),
var2=sample(LETTERS[1:7]))
ggplot(data,aes(x=factor(var1),y=val,fill=var2))+geom_bar(stat="identity")
I get the following plot:
I would like to have something like this to make it easier to find what each fill color corresponds to:

An alternative to the solutions in the links in the comments. The solution assumes that the data is available in an aggregated form, and that each category of var2 appear in one and only one category of var1. That is, the number of keys (and their order) in the legend is correct. All that need happen is for space to be inserted between appropriate keys and text dropped into those spaces. It gets the information it needs to construct the plot from the initial plot or its build data.
library(ggplot2)
library(gtable)
library(grid)
set.seed(1234)
data <- data.frame(val = c(2,4,5,6,7,8,9),
var1 = c("A","A","A","B","B","C","C"),
var2 = sample(LETTERS[1:7]))
# Sort levels of var2
data$var2 = factor(data$var2, labels = data$var2, levels = data$var2)
p = ggplot(data, aes(x = factor(var1), y = val, fill = var2)) +
geom_bar(stat = "identity")
# Get the ggplot grob
g = ggplotGrob(p)
# Get the legend
leg = g$grobs[[which(g$layout$name == "guide-box")]]$grobs[[1]]
# Get the labels from the ggplot build data
gt = ggplot_build(p)
labels = rev(gt$layout$panel_params[[1]]$x.labels)
## Positions of the labels
# Get the number of keys within each label from the ggplot build data
gt$data[[1]]$x
N = as.vector(table(gt$data[[1]]$x))
N = N[-length(N)]
# Get the positions of the labels in the legend gtable
pos = rev(cumsum(N)) + 3
pos = c(pos, 3)
# Add rows to the legend gtable, and add the labels to the new rows
for(i in seq_along(pos)){
leg = gtable_add_rows(leg, unit(1.5, "lines"), pos = pos[i])
leg = gtable_add_grob(leg, textGrob(labels[i], y = 0.1, just = "bottom"),
t = pos[i] + 1, l = 2)
}
# Put the legend back into the plot
g$grobs[[which(g$layout$name == "guide-box")]]$grobs[[1]] = leg
# Draw it
grid.newpage()
grid.draw(g)

Related

How to add legend in ggplot for each facet?

I have the following data.frame. I want to draw a ggplot having three legend tables, the common legend for size, and two separate legends for var = "A" and var = "B" taking the values from 'value' column, these two legends should be placed next to each related graph. So far I have tried this which is creating a single legend for 'var' column.
df <- data.frame(var = c("A","A","B","B"),
value = c("u","v","x","y"),
point = 1:4,
size = 1:4)
ggplot() +geom_point(data = df,
aes(x = point,y = NA,
color = value,size = size)) +
facet_grid(rows = vars(var))
Thanks.
Edit:
I have attached the expected output as suggested by #Tung
What about something like this:
library(gridExtra)
library(ggplot2)
# split data for each "facet"
df <- split(df,f = df$var)
# plot for the first "facet"
p1 <- ggplot(df$A,aes(x = point,y = NA,colour = value, size = size)) +
geom_point() +
facet_wrap(~var, ncol=1) +
# here you set the axis
scale_x_continuous(limits = c(0.5, 4.5))
# do it for each "facet"
p2 <- p1 %+% df$B
# here the plot
grid.arrange(p1,p2)

Coloring a geom_histogram by gradient

I'm trying to plot a geom_histogram where the bars are colored by a gradient.
This is what I'm trying to do:
library(ggplot2)
set.seed(1)
df <- data.frame(id=paste("ID",1:1000,sep="."),val=rnorm(1000),stringsAsFactors=F)
ggplot(df,aes_string(x="val",y="..count..+1",fill="val"))+geom_histogram(binwidth=1,pad=TRUE)+scale_y_log10()+scale_fill_gradient2("val",low="darkblue",high="darkred")
But getting:
Any idea how to get it colored by the defined gradient?
Not sure you can fill by val because each bar of the histogram represents a collection of points.
You can, however, fill by categorical bins using cut. For example:
ggplot(df, aes(val, fill = cut(val, 100))) +
geom_histogram(show.legend = FALSE)
Just for completeness.
If the colors I'd like to have the gradient on to be manually selected here's what I suggest:
data:
library(ggplot2)
set.seed(1)
df <- data.frame(id=paste("ID",1:1000,sep="."),val=rnorm(1000),stringsAsFactors=F)
colors:
bins <- 10
cols <- c("darkblue","darkred")
colGradient <- colorRampPalette(cols)
cut.cols <- colGradient(bins)
cuts <- cut(df$val,bins)
names(cuts) <- sapply(cuts,function(t) cut.cols[which(as.character(t) == levels(cuts))])
plot:
ggplot(df,aes(val,fill=cut(val,bins))) +
geom_histogram(show.legend=FALSE) +
scale_color_manual(values=cut.cols,labels=levels(cuts)) +
scale_fill_manual(values=cut.cols,labels=levels(cuts))
Instead of binning manually another option would be to make use of the bins computed by stat_bin by mapping ..x.. (or factor(..x..) in case of a discrete scale) or after_stat(x) on the fill aesthetic.
An issue with computing the bins manually is that we end up with multiple groups per bin for which the count has to be computed (even if the count is zero most of the time) and which get stacked on top of each other in the histogram. Especially, this gets problematic if one would add labels of counts to the histogram as can be seen in this post, because in that case one ends up with multiple labels per bin.
library(ggplot2)
set.seed(1)
df <- data.frame(id = paste("ID", 1:1000, sep = "."), val = rnorm(1000), stringsAsFactors = F)
ggplot(df, aes(x = val, y = ..count.. + 1, fill = ..x..)) +
geom_histogram(binwidth = .1, pad = TRUE) +
scale_y_log10() +
scale_fill_gradient2(name = "val", low = "darkblue", high = "darkred")
#> Warning: Duplicated aesthetics after name standardisation: pad

Fill specific regions in geom_violin plot

How can I fill a geom_violin plot in ggplot2 with different colors based on a fixed cutoff?
For instance, given the setup:
library(ggplot2)
set.seed(123)
dat <- data.frame(x = rep(1:3,each = 100),
y = c(rnorm(100,-1),rnorm(100,0),rnorm(100,1)))
dat$f <- with(dat,ifelse(y >= 0,'Above','Below'))
I'd like to take this basic plot:
ggplot() +
geom_violin(data = dat,aes(x = factor(x),y = y))
and simply have each violin colored differently above and below zero. The naive thing to try, mapping the fill aesthetic, splits and dodges the violin plots:
ggplot() +
geom_violin(data = dat,aes(x = factor(x),y = y, fill = f))
which is not what I want. I'd like a single violin plot at each x value, but with the interior filled with different colors above and below zero.
Here's one way to do this.
library(ggplot2)
library(plyr)
#Data setup
set.seed(123)
dat <- data.frame(x = rep(1:3,each = 100),
y = c(rnorm(100,-1),rnorm(100,0),rnorm(100,1)))
First we'll use ggplot::ggplot_build to capture all the calculated variables that go into plotting the violin plot:
p <- ggplot() +
geom_violin(data = dat,aes(x = factor(x),y = y))
p_build <- ggplot2::ggplot_build(p)$data[[1]]
Next, if we take a look at the source code for geom_violin we see that it does some specific transformations of this computed data frame before handing it off to geom_polygon to draw the actual outlines of the violin regions.
So we'll mimic that process and simply draw the filled polygons manually:
#This comes directly from the source of geom_violin
p_build <- transform(p_build,
xminv = x - violinwidth * (x - xmin),
xmaxv = x + violinwidth * (xmax - x))
p_build <- rbind(plyr::arrange(transform(p_build, x = xminv), y),
plyr::arrange(transform(p_build, x = xmaxv), -y))
I'm omitting a small detail from the source code about duplicating the first row in order to ensure that the polygon is closed.
Now we do two final modifications:
#Add our fill variable
p_build$fill_group <- ifelse(p_build$y >= 0,'Above','Below')
#This is necessary to ensure that instead of trying to draw
# 3 polygons, we're telling ggplot to draw six polygons
p_build$group1 <- with(p_build,interaction(factor(group),factor(fill_group)))
And finally plot:
#Note the use of the group aesthetic here with our computed version,
# group1
p_fill <- ggplot() +
geom_polygon(data = p_build,
aes(x = x,y = y,group = group1,fill = fill_group))
p_fill
Note that in general, this will clobber nice handling of any categorical x axis labels. So you will often need to do the plot using a continuous x axis and then if you need categorical labels, add them manually.

ggplot faceting with different specifications of ylims

I want to create a ggplot figure with six panels in R. The first five facets should represent five different subsets of data in bar charts, and the final facet should represent the whole data. I further want to have a fixed y-axis scale across the first five facets, but a different scale in the final facet. I am aware that it is currently not possible to specify individual ylims for each facet within the ggplot functionality (https://github.com/hadley/ggplot2/issues/187), but am wondering if I can do something similar using grid and possibly gtable packages, neither of which I'm very familiar with at the moment.
The following is my attempt. I replace the final facet with a facet in another figure.
library("ggplot2")
library("dplyr")
library("grid")
# create data
set.seed(1)
d1 <- data_frame(
value = rnorm(3 * 5, mean = 30, sd = 10),
f = rep(LETTERS[1:3], 5),
p = rep(paste("Panel", 1:5), each = 3)
)
d2 <- d1 %>%
mutate(p = "Total") %>%
rbind(d1)
# make initial figures
plot1 <- ggplot(d2, aes(f, value)) +
geom_bar(stat = "identity") +
facet_wrap(~ p) +
coord_cartesian(ylim = c(0, 50))
plot2 <- ggplot(d2, aes(f, value)) +
geom_bar(stat = "identity") +
facet_wrap(~ p, scales = "free_y")
# extract their grobs
g1 <- ggplotGrob(plot1)
g2 <- ggplotGrob(plot2)
# replace the final facet of plot1 with the final facet of plot2
g1[["grobs"]][[7]] <- g2[["grobs"]][[7]]
g1[["grobs"]][[19]] <- g2[["grobs"]][[19]]
g1[["grobs"]][[25]] <- g2[["grobs"]][[25]]
# draw the figure
grid.newpage()
grid.draw(g1)
And here's what I get.
As can be seen, however, the y-axis label of the final facet overlaps with the preceding facet. Does anyone know a way to avoid the overlap e.g., by making the final facet smaller?
One approach is to extract the "Total" plot from "g2", then insert it into "g1", but first remove the "Total" plot from "g1". But you will notice that the x-axis tick mark labels do not align across the facets.
# Load packages
library(ggplot2)
library(dplyr)
library(gtable)
library(grid)
# create data
set.seed(1)
d1 <- data.frame(
value = rnorm(3 * 5, mean = 30, sd = 10),
f = rep(LETTERS[1:3], 5),
p = rep(paste("Panel", 1:5), each = 3)
)
d2 <- d1 %>%
mutate(p = "Total") %>%
rbind(d1)
# make initial figures
plot1 <- ggplot(d2, aes(f, value)) +
geom_bar(stat = "identity") +
facet_wrap(~ p) +
coord_cartesian(ylim = c(0, 50))
plot2 <- ggplot(d2, aes(f, value)) +
geom_bar(stat = "identity") +
facet_wrap(~ p, scales = "free_y")
# Get the ggplot grobs
g1 <- ggplotGrob(plot1)
g2 <- ggplotGrob(plot2)
# Extract "Total" plot from g2
keep = g2$layout$name %in% c("panel-3-2", "axis-b-3-2", "axis-l-2-3", "strip-t-3-2")
pos = subset(g2$layout, keep, c(t,l,b,r))
g2 = g2[c(min(pos$t):max(pos$b)), c(min(pos$l):max(pos$r))]
# Remove "Total" plot from g1
keep = !g1$layout$name %in% c("panel-3-2", "axis-b-3-2", "strip-t-3-2")
pos = subset(g1$layout, !keep, c(t,l,b,r))
g1$grobs <- g1$grobs[keep]
g1$layout <- g1$layout[keep, ]
# Insert g2 into g1
g1 = gtable_add_grob(g1, g2, t=min(pos$t), b=max(pos$b), l=min(pos$l), r=max(pos$r))
# Draw it
grid.newpage()
grid.draw(g1)
Another approach is to extract the "Total" plot from "g2" as before, but to move its y-axis to the right side of the plot (using code borrowed from here. (I tweaked your "plot2" so that the tick mark labels are better aligned in the final plot.) In this way, the "Total" panel takes as much space as the other panels, and thus the x-axis tick mark labels align, but the y-axis for the "Total" panel sticks out to the right.
# Make initial figures
plot1 <- ggplot(d2, aes(f, value)) +
geom_bar(stat = "identity") +
facet_wrap(~ p) +
coord_cartesian(ylim = c(0, 50))
plot2 <- ggplot(d2, aes(f, value)) +
geom_bar(stat = "identity") +
facet_wrap(~ p, scales = "free_y") +
theme(axis.text.y = element_text(hjust = 0)) ## For better formatting of labels
# extract their grobs
g1 <- ggplotGrob(plot1)
g2 <- ggplotGrob(plot2)
# Extract "Total" plot from g2
keep = g2$layout$name %in% c("panel-3-2", "axis-b-3-2", "axis-l-2-3", "strip-t-3-2")
pos = subset(g2$layout, keep, c(t,l,b,r))
g2 = g2[c(min(pos$t):max(pos$b)), c(min(pos$l):max(pos$r))]
# Get the position of the panel in the layout
panel <- c(subset(g2$layout, grepl("panel", g2$layout$name), se = t:r))
# Get the row number of the y-axis in the layout
rn <- which(grepl("axis-l", g2$layout$name))
# Extract the axis (tick marks and axis text from the gtable)
axis.grob <- g2$grobs[[rn]]
axisl <- axis.grob$children[[2]] # Two children - get the second
axisl # Note: two grobs - tick marks and text
# Reverse the grobs and the widths
axisl$widths <- rev(axisl$widths)
axisl$grobs <- rev(axisl$grobs)
axisl$grobs[[1]]$x <- axisl$grobs[[1]]$x - unit(1, "npc") + unit(2.75, "pt")
axisl$grobs[[2]]$children[[1]]$x = unit(.15, "npc")
# Remove the column containing the left axis
g2 <- g2[, -(panel$r-1)]
## remove empty panels
keep = !g1$layout$name %in% c("panel-3-2", "axis-b-3-2", "strip-t-3-2")
pos = subset(g1$layout, !keep, c(t,l,b,r))
g1$grobs <- g1$grobs[keep]
g1$layout <- g1$layout[keep, ]
# Insert g2 into g1
g1 = gtable_add_grob(g1, g2, t = min(pos$t), b = max(pos$b), l = min(pos$l), r = max(pos$r))
# Add a new column to g1, and add the revised axisl grob to the new column.
pos = subset(g1$layout, grepl("panel", g1$layout$name), c(t,l,b,r)) # position of bottom right panel
g1 <- gtable_add_cols(g1, axisl$widths, max(pos$r))
g1 <- gtable_add_grob(g1, axisl, t = max(pos$b), l = max(pos$r)+1, r = max(pos$r)+2)
# Draw it
grid.newpage()
grid.draw(g1)

Correcting the legend when plotting data from two data frames, some of the lines have symbols

The following code produces three plots. The first plot uses data from df_fault, and plots lines with symbols from df_maint, and that plot is fine also. The problem is with the 3rd plot, that combines the lines with symbols from df_fault with the lines from df_maint. The legend is incorrect, and there are two legends, one for lines and one for symbols. How to get one correct legend with four entries.
Create some sample data
library(zoo)
library(ggplot2)
rDates <- function(N, st="2012/01/01", et="2012/12/31") {
st <- as.POSIXct(as.Date(st))
et <- as.POSIXct(as.Date(et))
dt <- as.numeric(difftime(et,st,unit="sec"))
ev <- sort(runif(N, 0, dt))
rt <- st + ev
}
first_maint <- as.POSIXct(strptime("2014/01/01", "%Y/%m/%d"))
last_maint <- as.POSIXct(strptime("2014/12/31", "%Y/%m/%d"))
first_fault <- as.POSIXct(strptime("2014/05/01", "%Y/%m/%d"))
last_fault <- as.POSIXct(strptime("2014/07/31", "%Y/%m/%d"))
set.seed(31)
nMDates=40
nFDates=10
rMaintDates <- rDates(nMDates,first_maint,last_maint)
rFaultDates <- rDates(nFDates,first_fault,last_fault)
df_fault <- data.frame(date = rFaultDates,
type = "Non-Op",
ci = runif(nFDates,.7,1.8),stringsAsFactors=FALSE)
df_fault$type[sample(1:nFDates,3)] = "Advisory"
z_hr <- zoo(c(0,0,9.9,9.9),c(first_maint,first_fault,last_fault,last_maint))
z_maint <- zoo(,rMaintDates[c(-1,-nMDates)])
z_hr_maint_a <- merge(z_hr,z_maint)
z_hr_maint <- na.approx(z_hr_maint_a)
z_repair <- zoo(c(0,3000,5000,8000),c(first_maint,first_fault,last_fault,last_maint))
z_repair_maint_a <- merge(z_repair,z_maint)
z_repair_maint <- na.approx(z_repair_maint_a)
df_maint <- data.frame(date=index(z_hr_maint),
hrs=coredata(z_hr_maint)/9.8,
repairs=coredata(z_repair_maint)/8000)
Plot the sample data, these examples work
rpr_title = "repairs/8000"
flt_title = "hrs/9.8"
(gp2 <- ggplot(data=df_fault,aes(x=date, y=ci, color=type)) +
labs(x="Date (2014)", y="CI Amplitude",title="Sample, this plot is fine, df_fault") +
geom_line(aes(group=type,shape=type))+
geom_point(aes(group=type,shape=type),size=4)+
theme(plot.title=element_text( size=12),
axis.title=element_text( size=8)) )
(gp2a <- ggplot() + geom_line(data=df_maint,aes(x=date,y=repairs,color=rpr_title))+
geom_line(data=df_maint,aes(x=date,y=hrs,color=flt_title))+
labs(x="Date (2014)", y="CI Amplitude",title="Sample, this plot is fine, df_maint ")
)
This plot shows the fault data
This plot shows the maintenance and usage data
I would like to combine the above two plots into one plot, with four legend entries. Here is my current attempt, but the legend isn't correct
(gp2b <- gp2 + geom_line(data=df_maint,aes(x=date,y=repairs,color=rpr_title))+
geom_line(data=df_maint,aes(x=date,y=hrs,color=flt_title))+
labs(x="Date (2014)", y="CI Amplitude",title="Sample, this plot the legend is wrong")
)
This plot, there are two legends, and neither one is correct. The first "type" legend has the wrong symbols on the line, showing a circle symbol for all the lines. The second "type" legend shows two black symbols, so the colors are incorrect. I would like the 2nd legend removed, and the 1st legend correctly showing lines and colors. Also, it would be nice if the lines without symbols could be wider. The legend line/symbol for "Advisory" is correct. The legend entry for "Non-op" should have a triangle instead of a circle. The legend entries for "hrs/9.8" and "repairs/8000" should only have a line, no symbol.
Brandon suggestions for using meld helps, but the plot below still doesn't have the legend correct...
names(df_fault)[2:3] <- c("variable","value") # for rbind
dat <- melt(df_maint, c("date")) # melted
dat <- rbind(dat, df_fault)
p1 <- ggplot(dat, aes(date,value, group = variable, color = variable)) + geom_line()
p1 + geom_point(data =
dat[dat$variable %in% c("Advisory","Non-Op"),],
aes(date,value, group = variable, color = variable, shape=variable)) +
scale_colour_discrete(name ="Fleet",
breaks=c("hrs", "repairs","Advisory","Non-Op"),
labels=c("usage hrs", "maint repairs","Advisory Faults","Non-Op Faults")) +
scale_shape_discrete(name ="Fleet",
breaks=c("hrs", "repairs","Advisory","Non-Op"),
labels=c("usage hrs", "maint repairs","Advisory Faults","Non-Op Faults"),
guide = "none")
Post script: I want to mention that it took some effort to apply the above procedure to an actual data set. Here's an summary of the process.
1) Identify the x axis variables, and grouping variables.
2) In the two data frames, rename the x axis variable and group variables to the same names
3) Use melt twice (example only used it once) to generate a melted data frame. Use the x axis and group variables as is.vars. Specify the variable that you want to plot as measure.vars.
3b) Do head on the melted data frames. You need to see the X axis variable names and the grouping variable names, followed by the field variable and values. The field variable has text values corresponding to the different y axis names.
4) Use rbind to combine the two melted dataframes
5) Do head on both steps 3 and 4 so you understand the storage of the data
6) Plot the lines for all the data. Include the modification of the legend title in this step, using + guides(color=guide_legend(title="Fleet")). I don't see this command in the example.
7) Create a subset from the melted data frame of the data that will have symbols. Add the symbols, but don't add the 2nd legend from symbols +scale_shape_discrete(name ="Fleet", guide = "none") in the example.
8) Adjust the legend line symbols using + guides(colour = guide_legend(override.aes = list(shape = c(32,32,16,17))))
9) Once you can see a nominal plot of lines with some symbols and the correct legend, you may need to repeat the above process after sorting the combined melted data frame in order to get the correct lines / symbols in front. You may want to sort on variable, and the x axis fields.
By adding guides, and specifying the shape as no shape (32), and matching the other symbols (16, 17), the plot comes out correct
p1 <- ggplot(dat, aes(date,value, group = variable, color = variable)) + geom_line(size=1)
p1 + geom_point(data =
dat[dat$variable %in% c("Advisory","Non-Op"),],
aes(date,value, group = variable, color = variable, shape=variable),size=3) +
scale_colour_discrete(name ="Fleet",
breaks=c("hrs", "repairs","Advisory","Non-Op"),
labels=c("usage hrs", "maint repairs","Advisory Faults","Non-Op Faults")) +
scale_shape_discrete(name ="Fleet",
guide = "none") +
guides(colour = guide_legend(override.aes = list(shape = c(32,32,16,17))))
When in doubt, melt. See the example below:
library(reshape2)
library(ggplot2)
names(df_fault)[2:3] <- c("variable","value") # for rbind
dat <- melt(df_maint, c("date")) # melted
dat <- rbind(dat, df_fault)
p1 <- ggplot(dat, aes(date,value, group = variable, color = variable)) + geom_line()
p1 + geom_point(data =
dat[dat$variable %in% c("Advisory","Non-Op"),],
aes(date,value, group = variable, color = variable, shape=variable)) +
scale_shape(guide = "none")
Notice that I specified "data" in my geom_point() call. Each scale_ has a method for removing the guide by setting it to "none".

Resources