Adding hatches or patterns to ggplot bars [duplicate] - r

This question already has an answer here:
How can I add hatches, stripes or another pattern or texture to a barplot in ggplot?
(1 answer)
Closed 1 year ago.
Suppose I want to show in a barplot the gene expression results (logFC) based on RNA-seq and q-PCR analysis. My dataset looks like that:
set.seed(42)
f1 <- expand.grid(
comp = LETTERS[1:3],
exp = c("qPCR", "RNA-seq"),
geneID = paste("Gene", 1:4)
)
f1$logfc <- rnorm(nrow(f1))
f1$SE <- runif(nrow(f1), min=0, max=1.5)
My R command line
p=ggplot(f1, aes(x=geneID, y=logfc, fill= comp,color=exp))+
geom_bar(stat="identity", position =position_dodge2(preserve="single"))+
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1))```
I have this output:
I want to get any mark patterns or hatches on the bars corresponding to one of the variables (exp or comp) and adding the upper error bars as shown in this plot bellow:
Any help please?

Following the linked answer, it seems quite natural how to extend it to your case. In the example below, I'm using some dummy data structured like the head() data you gave, since the csv link gave me a 404.
library(ggplot2)
library(ggpattern)
#>
#> Attaching package: 'ggpattern'
#> The following objects are masked from 'package:ggplot2':
#>
#> flip_data, flipped_names, gg_dep, has_flipped_aes, remove_missing,
#> should_stop, waiver
# Setting up some dummy data
set.seed(42)
f1 <- expand.grid(
comp = LETTERS[1:3],
exp = c("qPCR", "RNA-seq"),
geneID = paste("Gene", 1:4)
)
f1$logfc <- rnorm(nrow(f1))
ggplot(f1, aes(x = geneID, y = logfc, fill = comp)) +
geom_col_pattern(
aes(pattern = exp),
colour = "black",
pattern_fill = "black",
pattern_angle = 45,
pattern_density = 0.1,
pattern_spacing = 0.01,
position = position_dodge2(preserve = 'single'),
) +
scale_pattern_manual(
values = c("none", "stripe"),
guide = guide_legend(override.aes = list(fill = "grey70")) # <- make lighter
) +
scale_fill_discrete(
guide = guide_legend(override.aes = list(pattern = "none")) # <- hide pattern
)
Created on 2021-04-19 by the reprex package (v1.0.0)
EDIT: if you want to repeat the hatching in the fill legend, you can make an interaction() and then customise a manual fill scale.
ggplot(f1, aes(x = geneID, y = logfc)) +
geom_col_pattern(
aes(pattern = exp,
fill = interaction(exp, comp)), # <- make this an interaction
colour = "black",
pattern_fill = "black",
pattern_angle = 45,
pattern_density = 0.1,
pattern_spacing = 0.01,
position = position_dodge2(preserve = 'single'),
) +
scale_pattern_manual(
values = c("none", "stripe"),
guide = guide_legend(override.aes = list(fill = "grey70")) # <- make lighter
) +
scale_fill_manual(
# Have 3 colours and repeat each twice
values = rep(scales::hue_pal()(3), each = 2),
# Extract the second name after the '.' from the `interaction()` call
labels = function(x) {
vapply(strsplit(x, "\\."), `[`, character(1), 2)
},
# Repeat the pattern over the guide
guide = guide_legend(
override.aes = list(pattern = rep(c("none", "stripe"), 3))
)
)
Created on 2021-04-19 by the reprex package (v1.0.0)
EDIT2: Now with errorbars:
library(ggplot2)
library(ggpattern)
set.seed(42)
f1 <- expand.grid(
comp = LETTERS[1:3],
exp = c("qPCR", "RNA-seq"),
geneID = paste("Gene", 1:4)
)
f1$logfc <- rnorm(nrow(f1))
f1$SE <- runif(nrow(f1), min=0, max=1.5)
ggplot(f1, aes(x = geneID, y = logfc)) +
geom_col_pattern(
aes(pattern = exp,
fill = interaction(exp, comp)), # <- make this an interaction
colour = "black",
pattern_fill = "black",
pattern_angle = 45,
pattern_density = 0.1,
pattern_spacing = 0.01,
position = position_dodge2(preserve = 'single'),
) +
geom_errorbar(
aes(
ymin = logfc,
ymax = logfc + sign(logfc) * SE,
group = interaction(geneID, comp, exp)
),
position = "dodge"
) +
scale_pattern_manual(
values = c("none", "stripe"),
guide = guide_legend(override.aes = list(fill = "grey70")) # <- make lighter
) +
scale_fill_manual(
# Have 3 colours and repeat each twice
values = rep(scales::hue_pal()(3), each = 2),
# Extract the second name after the '.' from the `interaction()` call
labels = function(x) {
vapply(strsplit(x, "\\."), `[`, character(1), 2)
},
# Repeat the pattern over the guide
guide = guide_legend(
override.aes = list(pattern = rep(c("none", "stripe"), 3))
)
)
Created on 2021-04-22 by the reprex package (v1.0.0)

Related

SHAP Summary Plot for XGBoost model in R without displaying Mean Absolute SHAP value on the plot

I don't want to display the Mean Absolute Values on my SHAP Summary Plot in R. I want an output similar to the one produced in python. What line of code will help remove the mean absolute values from the summary plot in R?
I'm currently using this line of code:
shap.plot.summary.wrap1(xgb_model, X = x, top_n = 10)
You can do this by sligtly modifying the source code of shap.plot.summary() as below:
shap.plot.summary.edited <- function(data_long,
x_bound = NULL,
dilute = FALSE,
scientific = FALSE,
my_format = NULL){
if (scientific){label_format = "%.1e"} else {label_format = "%.3f"}
if (!is.null(my_format)) label_format <- my_format
# check number of observations
N_features <- setDT(data_long)[,uniqueN(variable)]
if (is.null(dilute)) dilute = FALSE
nrow_X <- nrow(data_long)/N_features # n per feature
if (dilute!=0){
# if nrow_X <= 10, no dilute happens
dilute <- ceiling(min(nrow_X/10, abs(as.numeric(dilute)))) # not allowed to dilute to fewer than 10 obs/feature
set.seed(1234)
data_long <- data_long[sample(nrow(data_long),
min(nrow(data_long)/dilute, nrow(data_long)/2))] # dilute
}
x_bound <- if (is.null(x_bound)) max(abs(data_long$value))*1.1 else as.numeric(abs(x_bound))
plot1 <- ggplot(data = data_long) +
coord_flip(ylim = c(-x_bound, x_bound)) +
geom_hline(yintercept = 0) + # the y-axis beneath
# sina plot:
ggforce::geom_sina(aes(x = variable, y = value, color = stdfvalue),
method = "counts", maxwidth = 0.7, alpha = 0.7) +
# print the mean absolute value:
#geom_text(data = unique(data_long[, c("variable", "mean_value")]),
# aes(x = variable, y=-Inf, label = sprintf(label_format, mean_value)),
# size = 3, alpha = 0.7,
# hjust = -0.2,
# fontface = "bold") + # bold
# # add a "SHAP" bar notation
# annotate("text", x = -Inf, y = -Inf, vjust = -0.2, hjust = 0, size = 3,
# label = expression(group("|", bar(SHAP), "|"))) +
scale_color_gradient(low="#FFCC33", high="#6600CC",
breaks=c(0,1), labels=c(" Low","High "),
guide = guide_colorbar(barwidth = 12, barheight = 0.3)) +
theme_bw() +
theme(axis.line.y = element_blank(),
axis.ticks.y = element_blank(), # remove axis line
legend.position="bottom",
legend.title=element_text(size=10),
legend.text=element_text(size=8),
axis.title.x= element_text(size = 10)) +
# reverse the order of features, from high to low
# also relabel the feature using `label.feature`
scale_x_discrete(limits = rev(levels(data_long$variable))#,
#labels = label.feature(rev(levels(data_long$variable)))
)+
labs(y = "SHAP value (impact on model output)", x = "", color = "Feature value ")
return(plot1)
}

How can I add an annotation to a faceted ggplot (with a log scale) outside the plot area

I'm looking to add some annotations (ideally a text and an arrow) to a faceted ggplot outside the plot area.
What's that, you say? Hasn't someone asked something similar here, here and here? Well yes. But none of them were trying to do this below an x-axis with a log scale.
With the exception of this amazing answer by #Z.Lin — but that involved a specific package and I'm looking for a more generic solution.
At first glance this would appear to be a very niche question, but for those of you familiar with forest plots this may tweak some interest.
Firstly, some context... I'm interested in presenting the results of a coxph model using a forest plot in a publication. My goal here is to take the results of a model (literally a standalone coxph object) and use it to produce output that is customisable (gotta match the style guide) and helps translate the findings for an audience that might not be au fait with the technical details of hazard ratios. Hence the annotations and directional arrows.
Before you start dropping links to r packages/functions that could help do this... here are those that I've tried so far:
ggforestplot — this package produces lovely customisable forest plots (if you are using odds ratios), but it hard codes a geom_vline at zero which doesn't help for HR's
ggforest — this package is a nerd paradise of detail, but good luck a) editing the variable names and b) trying to theme it (I mentioned earlier that I'm working with a coxph object, what I didn't mention was that the varnames are ugly — they need to be changed for a punter to understand what we're trying to communicate)
finalfit offers a great workflow and its hr_plot kicks out some informative output, but it doesn't play nice if you've already got a coxph object and you just want to plot it
So... backstory out of the way. I've created my own framework for a forest plot below to which I'd love to add — in the space below the x-axis labels and the x-axis title — two annotations that help interpret the result. My current code struggles with:
repeating the code under each facet (this is something I'm trying to avoid)
mirroring the annotations of either side of the geom_vline with a log scale
Any advice anyone might have would be much appreciated... I've added a reproducible example below.
## LOAD REQUIRED PACKAGES
library(tidyverse)
library(survival)
library(broom)
library(ggforce)
library(ggplot2)
## PREP DATA
model_data <- lung %>%
mutate(inst_cat = case_when(
inst %% 2 == 0 ~ 2,
TRUE ~ 1)) %>%
mutate(pat.karno_cat = case_when(
pat.karno < 75 ~ 2,
TRUE ~ 1)) %>%
mutate(ph.karno_cat = case_when(
ph.karno < 75 ~ 2,
TRUE ~ 1)) %>%
mutate(wt.loss_cat = case_when(
wt.loss > 15 ~ 2,
TRUE ~ 1)) %>%
mutate(meal.cal_cat = case_when(
meal.cal > 900 ~ 2,
TRUE ~ 1))
coxph_model <- coxph(
Surv(time, status) ~
sex +
inst_cat +
wt.loss_cat +
meal.cal_cat +
pat.karno_cat +
ph.karno_cat,
data = model_data)
## PREP DATA
plot_data <- coxph_model %>%
broom::tidy(
exponentiate = TRUE,
conf.int = TRUE,
conf.level = 0.95) %>%
mutate(stat_sig = case_when(
p.value < 0.05 ~ "p < 0.05",
TRUE ~ "N.S.")) %>%
mutate(group = case_when(
term == "sex" ~ "gender",
term == "inst_cat" ~ "site",
term == "pat.karno_cat" ~ "outcomes",
term == "ph.karno_cat" ~ "outcomes",
term == "meal.cal_cat" ~ "outcomes",
term == "wt.loss_cat" ~ "outcomes"))
## PLOT FOREST PLOT
forest_plot <- plot_data %>%
ggplot() +
aes(
x = estimate,
y = term,
colour = stat_sig) +
geom_vline(
aes(xintercept = 1),
linetype = 2
) +
geom_point(
shape = 15,
size = 4
) +
geom_linerange(
xmin = (plot_data$conf.low),
xmax = (plot_data$conf.high)
) +
scale_colour_manual(
values = c(
"N.S." = "black",
"p < 0.05" = "red")
) +
annotate(
"text",
x = 0.45,
y = -0.2,
col="red",
label = "indicates y",
) +
annotate(
"text",
x = 1.5,
y = -0.2,
col="red",
label = "indicates y",
) +
labs(
y = "",
x = "Hazard ratio") +
coord_trans(x = "log10") +
scale_x_continuous(
breaks = scales::log_breaks(n = 7),
limits = c(0.1,10)) +
ggforce::facet_col(
facets = ~group,
scales = "free_y",
space = "free"
) +
theme(
legend.position = "bottom",
legend.title = element_blank(),
strip.text = element_text(hjust = 0),
axis.title.x = element_text(margin = margin(t = 25, r = 0, b = 0, l = 0))
)
Created on 2022-05-10 by the reprex package (v2.0.1)
I think I would use annotation_custom here. This requires standard coord_cartesian with clip = 'off', but it should be easy to re-jig your x axis to use scale_x_log10
plot_data %>%
ggplot() +
aes(
x = estimate,
y = term,
colour = stat_sig) +
geom_vline(
aes(xintercept = 1),
linetype = 2
) +
geom_point(
shape = 15,
size = 4
) +
geom_linerange(
xmin = (log10(plot_data$conf.low)),
xmax = (log10(plot_data$conf.high))
) +
scale_colour_manual(
values = c(
"N.S." = "black",
"p < 0.05" = "red")
) +
annotation_custom(
grid::textGrob(
x = unit(0.4, 'npc'),
y = unit(-7.5, 'mm'),
label = "indicates yada",
gp = grid::gpar(col = 'red', vjust = 0.5, hjust = 0.5))
) +
annotation_custom(
grid::textGrob(
x = unit(0.6, 'npc'),
y = unit(-7.5, 'mm'),
label = "indicates bada",
gp = grid::gpar(col = 'blue', vjust = 0.5, hjust = 0.5))
) +
annotation_custom(
grid::linesGrob(
x = unit(c(0.49, 0.25), 'npc'),
y = unit(c(-10, -10), 'mm'),
arrow = arrow(length = unit(3, 'mm')),
gp = grid::gpar(col = 'red'))
) +
annotation_custom(
grid::linesGrob(
x = unit(c(0.51, 0.75), 'npc'),
y = unit(c(-10, -10), 'mm'),
arrow = arrow(length = unit(3, 'mm')),
gp = grid::gpar(col = 'blue'))
) +
labs(
y = "",
x = "Hazard ratio") +
scale_x_log10(
breaks = c(0.1, 0.3, 1, 3, 10),
limits = c(0.1,10)) +
ggforce::facet_col(
facets = ~group,
scales = "free_y",
space = "free"
) +
coord_cartesian(clip = 'off') +
theme(
legend.position = "bottom",
legend.title = element_blank(),
strip.text = element_text(hjust = 0),
axis.title.x = element_text(margin = margin(t = 25, r = 0, b = 0, l = 0)),
panel.spacing.y = (unit(15, 'mm'))
)

How to plot 'outside' of plotting area using ggplot in R?

I recently asked this question. However, I am asking a separate question now as the scope of my new question falls outside the range of the last question.
I am trying to create a heatmap in ggplot... however, outside of the axis I am trying to plot geom_tile. The issue is I cannot find a consistent way to get it to work. For example, the code I am using to plot is:
library(colorspace)
library(ggplot2)
library(ggnewscale)
library(tidyverse)
asd <- expand_grid(paste0("a", 1:9), paste0("b", 1:9))
df <- data.frame(
a = asd$`paste0("a", 1:9)`,
b = asd$`paste0("b", 1:9)`,
c = sample(20, 81, replace = T)
)
# From discrete to continuous
df$a <- match(df$a, sort(unique(df$a)))
df$b <- match(df$b, sort(unique(df$b)))
z <- sample(10, 18, T)
# set color palettes
pal <- rev(diverging_hcl(palette = "Blue-Red", n = 11))
palEdge <- rev(sequential_hcl(palette = "Plasma", n = 11))
# plot
ggplot(df, aes(a, b)) +
geom_tile(aes(fill = c)) +
scale_fill_gradientn(
colors = pal,
guide = guide_colorbar(
frame.colour = "black",
ticks.colour = "black"
),
name = "C"
) +
theme_classic() +
labs(x = "A axis", y = "B axis") +
new_scale_fill() +
geom_tile(data = tibble(a = 1:9,
z = z[1:9]),
aes(x = a, y = 0, fill = z, height = 0.3)) +
geom_tile(data = tibble(b = 1:9,
z = z[10:18]),
aes(x = 0, y = b, fill = z, width = 0.3)) +
scale_fill_gradientn(
colors = palEdge,
guide = guide_colorbar(
frame.colour = "black",
ticks.colour = "black"
),
name = "Z"
)+
coord_cartesian(clip = "off", xlim = c(0.5, NA), ylim = c(0.5, NA)) +
theme(aspect.ratio = 1,
plot.margin = margin(10, 15.5, 25, 25, "pt")
)
This produces something like this:
However, I am trying to find a consistent way to plot something more like this (which I quickly made in photoshop):
The main issue im having is being able to manipulate the coordinates of the new scale 'outside' of the plotting area. Is there a way to move the tiles that are outside so I can position them in an area that makes sense?
There are always the two classic options when plotting outside the plot area:
annotate/ plot with coord_...(clip = "off")
make different plots and combine them.
The latter option usually gives much more flexibility and way less headaches, in my humble opinion.
library(colorspace)
library(tidyverse)
library(patchwork)
asd <- expand_grid(paste0("a", 1:9), paste0("b", 1:9))
df <- data.frame(
a = asd$`paste0("a", 1:9)`,
b = asd$`paste0("b", 1:9)`,
c = sample(20, 81, replace = T)
)
# From discrete to continuous
df$a <- match(df$a, sort(unique(df$a)))
df$b <- match(df$b, sort(unique(df$b)))
z <- sample(10, 18, T)
# set color palettes
pal <- rev(diverging_hcl(palette = "Blue-Red", n = 11))
palEdge <- rev(sequential_hcl(palette = "Plasma", n = 11))
# plot
p_main <- ggplot(df, aes(a, b)) +
geom_tile(aes(fill = c)) +
scale_fill_gradientn("C",colors = pal,
guide = guide_colorbar(frame.colour = "black",
ticks.colour = "black")) +
theme_classic() +
labs(x = "A axis", y = "B axis")
p_bottom <- ggplot() +
geom_tile(data = tibble(a = 1:9, z = z[1:9]),
aes(x = a, y = 0, fill = z, height = 0.3)) +
theme_void() +
scale_fill_gradientn("Z",limits = c(0,10),
colors = palEdge,
guide = guide_colorbar(
frame.colour = "black", ticks.colour = "black"))
p_left <- ggplot() +
theme_void()+
geom_tile(data = tibble(b = 1:9, z = z[10:18]),
aes(x = 0, y = b, fill = z, width = 0.3)) +
scale_fill_gradientn("Z",limits = c(0,10),
colors = palEdge,
guide = guide_colorbar( frame.colour = "black", ticks.colour = "black"))
p_left + p_main +plot_spacer()+ p_bottom +
plot_layout(guides = "collect",
heights = c(1, .1),
widths = c(.1, 1))
Created on 2021-02-21 by the reprex package (v1.0.0)

R - How can I add a bivariate legend to my ggplot2 chart?

I'm trying to add a bivariate legend to my ggplot2 chart but I don't know whether (a) this is possible through some guides options and (b) how to achieve it.
The only way I've managed to produce something close to the desired outcome was by specifically creating a new chart which resembles a legend (named p.legend below) and inserting it, via the cowplot package, somewhere in the original chart (named p.chart below). But surely there must be a better way than this, given that this approach requires creating the legend in the first place and fiddling with its size/location to fit it in the original chart.
Here's code for a dummy example of my approach:
library(tidyverse)
# Create Dummy Data #
set.seed(876)
n <- 2
df <- expand.grid(Area = LETTERS[1:n],
Period = c("Summer", "Winter"),
stringsAsFactors = FALSE) %>%
mutate(Objective = runif(2 * n, min = 0, max = 2),
Performance = runif(2 * n) * Objective) %>%
gather(Type, Value, Objective:Performance)
# Original chart without legend #
p.chart <- df %>%
ggplot(., aes(x = Area)) +
geom_col(data = . %>% filter(Type == "Objective"),
aes(y = Value, fill = Period),
position = "dodge", width = 0.7, alpha = 0.6) +
geom_col(data = . %>% filter(Type == "Performance"),
aes(y = Value, fill = Period),
position = "dodge", width = 0.7) +
scale_fill_manual(values = c("Summer" = "#ff7f00", "Winter" = "#1f78b4"), guide = FALSE) +
theme_minimal() +
theme(panel.grid.major.x = element_blank(),
panel.grid.minor.y = element_blank())
# Create a chart resembling a legend #
p.legend <- expand.grid(Period = c("Summer", "Winter"),
Type = c("Objective", "Performance"),
stringsAsFactors = FALSE) %>%
ggplot(., aes(x = Period, y = factor(Type, levels = c("Performance", "Objective")),
fill = Period, alpha = Type)) +
geom_tile() +
scale_fill_manual(values = c("Summer" = "#ff7f00", "Winter" = "#1f78b4"), guide = FALSE) +
scale_alpha_manual(values = c("Objective" = 0.7, "Performance" = 1), guide = FALSE) +
ggtitle("Legend") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5),
rect = element_rect(fill = "transparent"),
axis.title = element_blank(),
panel.grid.major = element_blank())
# Add legend to original chart #
p.final <- cowplot::ggdraw() +
cowplot::draw_plot(plot = p.chart) +
cowplot::draw_plot(plot = p.legend, x = 0.5, y = 0.65, width = 0.4, height = 0.28, scale = 0.7)
# Save chart #
cowplot::ggsave("Bivariate Legend.png", p.final, width = 8, height = 6, dpi = 500)
... and the resulting chart:
Is there an easier way of doing this?
This might work at some point, but right now the colorbox seems to ignore all breaks, names and labels (#ClausWilke?). Probably because the multiscales package is in really early stages.
Posting since it might work when future readers are here.
library(multiscales)
df %>%
mutate(
period = as.numeric(factor(Period)),
type = as.numeric(factor(Type))
) %>%
ggplot(., aes(x = Area, y = Value, fill = zip(period, type), group = interaction(Area, Period))) +
geom_col(width = 0.7, position = 'dodge') +
bivariate_scale(
"fill",
pal_hue_sat(c(0.07, 0.6), c(0.4, 0.8)),
guide = guide_colorbox(
nbin = 2,
name = c("Period", "Type"), #ignored
breaks = list(1:2, 1:2), #ignored
labels = list(levels(.$Period), levels(.$Type)) #ignored
)

Complex Chart in R/ggplot with Proper Legend Display

This is my first question to StackExchange, and I've searched for answers that have been helpful, but haven't really gotten me to where I'd like to be.
This is a stacked bar chart, combined with a point chart, combined with a line.
Here's my code:
theme_set(theme_light())
library(lubridate)
FM <- as.Date('2018-02-01')
x.range <- c(FM - months(1) - days(1) - days(day(FM) - 1), FM - days(day(FM) - 1) + months(1))
x.ticks <- seq(x.range[1] + days(1), x.range[2], by = 2)
#populate example data
preds <- data.frame(FM = FM, DATE = seq(x.range[1] + days(1), x.range[2] - days(1), by = 1))
preds <- data.frame(preds, S_O = round(seq(1, 1000000, by = 1000000/nrow(preds))))
preds <- data.frame(preds, S = round(ifelse(month(preds$FM) == month(preds$DATE), day(preds$DATE) / 30.4, 0) * preds$S_O))
preds <- data.frame(preds, O = preds$S_O - preds$S)
preds <- data.frame(preds, pred_sales = round(1000000 + rnorm(nrow(preds), 0, 10000)))
preds$ma <- with(preds, stats::filter(pred_sales, rep(1/5, 5), sides = 1))
y.max <- ceiling(max(preds$pred_sales) / 5000) * 5000 + 15000
line.cols <- c(O = 'palegreen4', S = 'steelblue4',
P = 'maroon', MA = 'blue')
fill.cols <- c(O = 'palegreen3', S = 'steelblue3',
P = 'red')
p <- ggplot(data = preds,
mapping = aes(DATE, pred_sales))
p <- p +
geom_bar(data = reshape2::melt(preds[,c('DATE', 'S', 'O')], id.var = 'DATE'),
mapping = aes(DATE, value, group = 1, fill = variable, color = variable),
width = 1,
stat = 'identity',
alpha = 0.5) +
geom_point(mapping = aes(DATE, pred_sales, group = 2, fill = 'P', color = 'P'),
shape = 22, #square
alpha = 0.5,
size = 2.5) +
geom_line(data = preds[!is.na(preds$ma),],
mapping = aes(DATE, ma, group = 3, color = 'MA'),
alpha = 0.8,
size = 1) +
geom_text(mapping = aes(DATE, pred_sales, label = formatC(pred_sales / 1000, format = 'd', big.mark = ',')),
angle = 90,
size = 2.75,
hjust = 1.25,
vjust = 0.4) +
labs(title = sprintf('%s Sales Predictions - %s', 'Overall', format(FM, '%b %Y')),
x = 'Date',
y = 'Volume in MMlbs') +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1, size = 8),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
legend.title = element_blank(),
legend.position = 'bottom',
legend.text = element_text(size = 8),
legend.margin = margin(t = 0.25, unit = 'cm')) +
scale_x_date(breaks = x.ticks,
date_labels = '%b %e',
limits = x.range) +
scale_y_continuous(limits = c(0, y.max),
labels = function(x) { formatC(x / 1000, format='d', big.mark=',') }) +
scale_color_manual(values = line.cols,
breaks = c('MA'),
labels = c(MA = 'Mvg Avg (5)')) +
scale_fill_manual(values = fill.cols,
breaks = c('P', 'O', 'S'),
labels = c(O = 'Open Orders', S = 'Sales', P = 'Predictions'))
p
The chart it generates is this:
As you can see, the legend does a couple of funky things. It's close, but not quite there. I only want boxes with exterior borders for Predictions, Open Orders, and Sales, and only a blue line for the Mvg Avg (5).
Any advice would be appreciated.
Thanks!
Rather late, but if you are still interested to understand this problem, the following should work. Explanations are included as comments within the code:
library(dplyr)
preds %>%
# scale the values for ALL numeric columns in the dataset, before
# passing the dataset to ggplot()
mutate_if(is.numeric, ~./1000) %>%
# since x / y mappings are stated in the top level ggplot(), there's
# no need to repeat them in the subsequent layers UNLESS you want to
# override them
ggplot(mapping = aes(x = DATE, y = pred_sales)) +
# 1. use data = . to inherit the top level data frame, & modify it on
# the fly for this layer; this is neater as you are essentially
# using a single data source for the ggplot object.
# 2. geom_col() is a more succinct way to say geom_bar(stat = "identity")
# (I'm using tidyr rather than reshape package, since ggplot2 is a
# part of the tidyverse packages, & the two play together nicely)
geom_col(data = . %>%
select(S, O, DATE) %>%
tidyr::gather(variable, value, -DATE),
aes(y = value, fill = variable, color = variable),
width = 1, alpha = 0.5) +
# don't show legend for this layer (o/w the fill / color legend would
# include a square shape in the centre of each legend key)
geom_point(aes(fill = 'P', color = 'P'),
shape = 22, alpha = 0.5, size = 2.5, show.legend = FALSE) +
# use data = . %>% ... as above.
# since the fill / color aesthetic mappings from the geom_col layer would
# result in a border around all fill / color legends, avoid it all together
# here by hard coding the line color to "blue", & map its linetype instead
# to create a separate linetype-based legend later.
geom_line(data = . %>% na.omit(),
aes(y = ma, linetype = 'MA'),
color = "blue", alpha = 0.8, size = 1) +
# scales::comma is a more succinct alternative to formatC for this use case
geom_text(aes(label = scales::comma(pred_sales)),
angle = 90, size = 2.75, hjust = 1.25, vjust = 0.4) +
labs(title = sprintf('%s Sales Predictions - %s', 'Overall', format(FM, '%b %Y')),
x = 'Date',
y = 'Volume in MMlbs') +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1, size = 8),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
legend.title = element_blank(),
legend.position = 'bottom',
legend.text = element_text(size = 8),
legend.margin = margin(t = 0.25, unit = 'cm')) +
scale_x_date(breaks = x.ticks,
date_labels = '%b %e',
limits = x.range) +
# as above, scales::comma is more succinct
scale_y_continuous(limits = c(0, y.max / 1000),
labels = scales::comma) +
# specify the same breaks & labels for the manual fill / color scales, so that
# a single legend is created for both
scale_color_manual(values = line.cols,
breaks = c('P', 'O', 'S'),
labels = c(O = 'Open Orders', S = 'Sales', P = 'Predictions')) +
scale_fill_manual(values = fill.cols,
breaks = c('P', 'O', 'S'),
labels = c(O = 'Open Orders', S = 'Sales', P = 'Predictions')) +
# create a separate line-only legend using the linetype mapping, with
# value = 1 (i.e. unbroken line) & specified alpha / color to match the
# geom_line layer
scale_linetype_manual(values = 1,
label = 'Mvg Avg (5)',
guide = guide_legend(override.aes = list(alpha = 1,
color = "blue")))

Resources