Legend mixing shapes with geom_points R - r

My legend is not showing correctly when I am doing my graph in R using ggplot2. One column of my dataset is represented by a geom_bar and the two others are represented by geom_points (one shape each). The circle and the diamond shape are showing for both 2000 and 2008, the circle being in the diamong for both year. However, the graph works totally fine...
Here is a screenshot:
I have created a simplified version of my dataset:
order_var <- c(1, 4, 3, 5, 6, 2)
alt_name <- c('Agriculture', 'Mining', 'Food products',' Manufacture', 'Chemicals', 'Machinery')
y2000 <- c(20, 40, 50, 80, 30, 70)
y2008 <- c(40, 50, 80, 70, 30, 60)
y2018 <- c(10, 30, 80, 50, 40, 50)
datatest <- data.frame("order_var" = order_var, "alt_name" = alt_name, "y2000" = y2000, "y2008" = y2008, "y2018" = y2018)
And the code for my graph:
datatest %>% ggplot(aes(x = reorder(alt_name, order_var))) +
geom_bar(stat = "identity", aes(y = `y2018`, fill = "2018"), width = 0.7, col = "black") +
geom_point(aes(y = `y2008`, col = "2008"), shape = 23, fill = "white", size = 5) +
geom_point(aes(y = `y2000`, col = "2000"), shape = 19, fill = "black", size = 3) +
xlab("Industry") +
ylab("Percentage") +
theme(legend.position = "top") +
scale_fill_manual(name = '', values = c("2018" = "#4F81BD"), breaks = c("2018")) +
scale_colour_manual(name = '', values = c("2008" = "black", "2000" = "orange"))
If you know how to correct this problem, I would be very grateful!!
Thank you :)

That's a very tricky plot you are trying to make because you are in essence mapping the same aesthetics to different geoms.
The first thing you should do is to reshape your data to the long format. I also divided your dataset between 2018 (the bar), and 2000, 2008 (the points).
df2 <- datatest %>%
pivot_longer(cols = -c(order_var, alt_name)) %>%
mutate(bar = if_else(name == "y2018", 1, 0))
data_bar <- df2 %>% filter(bar == 1)
data_point <- df2 %>% filter(bar != 1)
I also find it useful to add a dodge to your points to avoid overlapping one inside the other as in the case of chemicals with position = position_dodge(width = 0.6).
The first solution gives what you want, but it is a bit of a hack, and I wouldn't recommend doing it as a general strategy. You basically add an aesthetics that you are not going to use to the bars (in this case, linetype), and then override it, as suggested in this answer.
ggplot(data_bar, aes(x = reorder(alt_name, order_var))) +
geom_bar(aes(y = value, linetype = name), fill = "#4F81BD", stat = 'identity', color = 'black') +
geom_point(data = data_point, position=position_dodge(width=0.6), aes(y = value, color = name, shape = name, size = name, fill = name)) +
scale_colour_manual(values = c("orange", "black"), labels = c("2000", "2008")) +
scale_fill_manual(values = c("orange", "white"), labels = c("2000", "2008")) +
scale_shape_manual(values = c(19, 23), labels = c("2000", "2008")) +
scale_size_manual(values = c(3, 5), labels = c("2000", "2008")) +
scale_linetype_manual(values = 1, guide = guide_legend(override.aes = list(fill = c("#4F81BD"))), labels = c("2018")) +
theme(legend.position = "top", legend.title = element_blank()) +
labs(x = "Industry", y = "Percentage")
Another solution, more general, is to avoid using the fill aesthetics for the geom_point and changing the shape to a solid one instead:
ggplot(data_bar, aes(x = reorder(alt_name, order_var))) +
geom_bar(aes(y = value, fill = name), stat = 'identity', color = "black") +
geom_point(data = data_point, position=position_dodge(width=0.6), aes(y = value, color = name, shape = name, size = name)) +
scale_fill_manual(values = c("#4F81BD"), labels = c("2018")) +
scale_colour_manual(values = c("orange", "white"), labels = c("2000", "2008")) +
scale_shape_manual(values = c(19, 18), labels = c("2000", "2008")) +
scale_size_manual(values = c(4, 6), labels = c("2000", "2008")) +
theme(legend.position = "top", legend.title = element_blank()) +
labs(x = "Industry", y = "Percentage")

Related

How to present the results of a dataframe in a serial scale using ggplot as in the example attached?

I have this data frame :
Raw.Score = c(0,1,2,3,4,5,6,7,8)
Severity = c(-3.56553994,-2.70296933,-1.63969850,-0.81321707,-0.04629182,
0.73721320,1.61278518,2.76647043,3.94804472)
x = data.frame(Raw.Score = Raw.Score, Severity = Severity)
Raw.score are raw numbers from 0 to 8 (let's consider them as the labels of the severity numbers)
Severity are relative numbres that represent the locations of the scores in the diagram
I want to graphically present the results as in the following example using ggplot (the example includes different numbers but I want something similar)
As a fun exercise in ggplot-ing here is one approach to achieve or come close to your desired result.
Raw.Score = c(0,1,2,3,4,5,6,7,8)
Severity = c(-3.56553994,-2.70296933,-1.63969850,-0.81321707,-0.04629182,
0.73721320,1.61278518,2.76647043,3.94804472)
dat <- data.frame(Raw.Score, Severity)
library(ggplot2)
dat_tile <- data.frame(
Severity = seq(-4.1, 4.1, .05)
)
dat_axis <- data.frame(
Severity = seq(-4, 4, 2)
)
tile_height = .15
ymax <- .5
ggplot(dat, aes(y = 0, x = Severity, fill = Severity)) +
# Axis line
geom_hline(yintercept = -tile_height / 2) +
# Colorbar
geom_tile(data = dat_tile, aes(color = Severity), height = tile_height) +
# Sgements connecting top and bottom labels
geom_segment(aes(xend = Severity, yend = -ymax, y = ymax), color = "orange") +
# Axis ticks aka dots
geom_point(data = dat_axis,
y = -tile_height / 2, shape = 21, stroke = 1, fill = "white") +
# ... and labels
geom_text(data = dat_axis, aes(label = Severity),
y = -tile_height / 2 - .1, vjust = 1, fontface = "bold") +
# Bottom labels
geom_label(aes(y = -ymax, label = scales::number(Severity, accuracy = .01))) +
# Top labels
geom_point(aes(y = ymax, color = Severity), size = 8) +
geom_text(aes(y = ymax, label = Raw.Score), fontface = "bold") +
# Colorbar annotations
annotate(geom = "text", fontface = "bold", label = "MILD", color = "black", x = -3.75, y = 0) +
annotate(geom = "text", fontface = "bold", label = "SEVERE", color = "white", x = 3.75, y = 0) +
# Fixing the scales
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(limits = c(-ymax, ymax)) +
# Color gradient
scale_fill_gradient(low = "orange", high = "red", guide = "none") +
scale_color_gradient(low = "orange", high = "red", guide = "none") +
# Get rid of all non-data ink
theme_void() +
# Add some plot margin
theme(plot.margin = rep(unit(10, "pt"), 4)) +
coord_cartesian(clip = "off")

How to use specific filling colors when using scale_fill_binned()?

I would like to use my own filling colors (ex: c("red", "blue", "grey50", "black")) when using function scale_fill_binned() withing a ggplot code. How can I do this?
Here is a minimal reproducible example:
library(tidyverse)
dat <- mtcars %>%
group_by(cyl) %>%
summarise(n = n(),
mean_hp = mean(hp)) %>%
ungroup
ggplot(data = dat, aes(x = cyl, y = mean_hp, size = n, fill = n)) +
geom_point(shape = 21) +
scale_size_binned(breaks = c(8, 10, 12), guide = guide_bins(show.limits = T)) +
scale_fill_binned(breaks = c(8, 10, 12), guide = guide_bins(show.limits = T), type = "viridis") +
labs(x = "Cylinder", y = "Mean hp", fill = "Nb of cars", size = "Nb of cars") +
theme_minimal()
Here is what the output looks like:
To use this family of functions you need to provide a function that returns a an object with class "ScaleContinuous" "Scale" "ggproto" "gg" (i.e. the equivalent output to scale_fill_viridis_c)!
scale_fill_custom <- function (..., alpha = 1, begin = 0, end = 1, direction = 1,
option = "D", values = NULL, space = "Lab", na.value = "grey50",
guide = "colourbar", aesthetics = "fill") {
continuous_scale(aesthetics, scale_name = "custom",
palette = scales:::gradient_n_pal(c("red", "blue", "grey50", "black"),
values, space), na.value = na.value,
guide = guide, ...)
}
ggplot(data = dat, aes(x = cyl, y = mean_hp, size = n, fill = n)) +
geom_point(shape = 21) +
scale_size_binned(breaks = c(8, 10, 12), guide = guide_bins(show.limits = T)) +
scale_fill_binned(breaks = c(8, 10, 12), guide = guide_bins(show.limits = T),
type = scale_fill_custom) +
labs(x = "Cylinder", y = "Mean hp", fill = "Nb of cars", size = "Nb of cars") +
theme_minimal()
Note that you are using colour as a scale to be translated by the eye into numerically meaningful difference. The colours are interpolated between the manually applied points, so will not actually be your exact colours. If you wish to band your averages by colour it would be preferable to create a factor, then manually apply your theme.
ggplot(data = mutate(dat, n = cut(n, breaks = c(0, 8, 10, 12, 20))),
aes(x = cyl, y = mean_hp, size = n, fill = n)) +
geom_point(shape = 21) +
scale_size_discrete() +
scale_fill_manual(values = c("red", "blue", "grey50", "black")) +
labs(x = "Cylinder", y = "Mean hp", fill = "Nb of cars", size = "Nb of cars") +
theme_minimal()
With the comment of #teunbrand, I was able to come up with something.
cols <- c("red", "blue", "grey50", "black")
ggplot(data = dat, aes(x = cyl, y = mean_hp, size = n, fill = n)) +
geom_point(shape = 21) +
scale_size_binned(breaks = c(8, 10, 12), guide = guide_bins(show.limits = T)) +
labs(x = "Cylinder", y = "Mean hp", fill = "Nb of cars", size = "Nb of cars") +
theme_minimal() +
binned_scale(aesthetics = "fill", scale_name = "custom",
palette = ggplot2:::binned_pal(scales::manual_pal(values = cols)),
guide = "bins",
breaks = c(8, 10, 12), limits = c(min(dat$n), max(dat$n)), show.limits = T)
Here is what the output looks like:

How to add the legend to combo plot?

Here is the data set:
d <- tribble(
~priceseg, ~price_n, ~zet_n, ~zet_n2,
"(0,1]", 16, 2, 24,
"(1,3]", 33, 3, 38,
"(3,5]", 33, 2, 25,
"(5,6]", 17, 1, 13,
)
And here is the visualisation thanks to #d.b
ggplot(d) +
geom_col(aes(x = priceseg, y = price_n), fill = ("#F1948A"), colour="black", size = 0.6) +
geom_line(data = d, mapping = aes(x = priceseg, y = zet_n2, group = 1), colour = "#154360", size = 1) +
geom_label(data = d, mapping = aes(x = priceseg, y = price_n, label = price_n), nudge_y = -0.6)
Now, I want to add the legend for bar plot and line in the visualisation something like this: Combined line & bar geoms: How to generate proper legend?
Also, I would like to add % in geom_label.
But somehow, I could not manage to implement it. Any help?
Here is an option
# Calculate percentage and add as column to `d`
d <- transform(d, perc = sprintf("%2.1f%%", price_n / sum(price_n) * 100))
# Plot
ggplot(d, aes(x = priceseg)) +
geom_col(aes(y = price_n, fill = "bar_data"), colour = "black", size = 0.6) +
geom_line(aes(y = zet_n2, group = 1, colour = "line_data"), size = 1) +
scale_fill_manual("", values = "#F1948A") +
scale_colour_manual("", values = "#154360") +
geom_label(aes(y = price_n, label = perc), nudge_y = -0.6) +
theme(
legend.key = element_blank(),
legend.title = element_blank(),
legend.box = "horizontal")
You can adjust the fill and colour "labels" by changing the strings "bar_data" and "line_data".

Complex Chart in R/ggplot with Proper Legend Display

This is my first question to StackExchange, and I've searched for answers that have been helpful, but haven't really gotten me to where I'd like to be.
This is a stacked bar chart, combined with a point chart, combined with a line.
Here's my code:
theme_set(theme_light())
library(lubridate)
FM <- as.Date('2018-02-01')
x.range <- c(FM - months(1) - days(1) - days(day(FM) - 1), FM - days(day(FM) - 1) + months(1))
x.ticks <- seq(x.range[1] + days(1), x.range[2], by = 2)
#populate example data
preds <- data.frame(FM = FM, DATE = seq(x.range[1] + days(1), x.range[2] - days(1), by = 1))
preds <- data.frame(preds, S_O = round(seq(1, 1000000, by = 1000000/nrow(preds))))
preds <- data.frame(preds, S = round(ifelse(month(preds$FM) == month(preds$DATE), day(preds$DATE) / 30.4, 0) * preds$S_O))
preds <- data.frame(preds, O = preds$S_O - preds$S)
preds <- data.frame(preds, pred_sales = round(1000000 + rnorm(nrow(preds), 0, 10000)))
preds$ma <- with(preds, stats::filter(pred_sales, rep(1/5, 5), sides = 1))
y.max <- ceiling(max(preds$pred_sales) / 5000) * 5000 + 15000
line.cols <- c(O = 'palegreen4', S = 'steelblue4',
P = 'maroon', MA = 'blue')
fill.cols <- c(O = 'palegreen3', S = 'steelblue3',
P = 'red')
p <- ggplot(data = preds,
mapping = aes(DATE, pred_sales))
p <- p +
geom_bar(data = reshape2::melt(preds[,c('DATE', 'S', 'O')], id.var = 'DATE'),
mapping = aes(DATE, value, group = 1, fill = variable, color = variable),
width = 1,
stat = 'identity',
alpha = 0.5) +
geom_point(mapping = aes(DATE, pred_sales, group = 2, fill = 'P', color = 'P'),
shape = 22, #square
alpha = 0.5,
size = 2.5) +
geom_line(data = preds[!is.na(preds$ma),],
mapping = aes(DATE, ma, group = 3, color = 'MA'),
alpha = 0.8,
size = 1) +
geom_text(mapping = aes(DATE, pred_sales, label = formatC(pred_sales / 1000, format = 'd', big.mark = ',')),
angle = 90,
size = 2.75,
hjust = 1.25,
vjust = 0.4) +
labs(title = sprintf('%s Sales Predictions - %s', 'Overall', format(FM, '%b %Y')),
x = 'Date',
y = 'Volume in MMlbs') +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1, size = 8),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
legend.title = element_blank(),
legend.position = 'bottom',
legend.text = element_text(size = 8),
legend.margin = margin(t = 0.25, unit = 'cm')) +
scale_x_date(breaks = x.ticks,
date_labels = '%b %e',
limits = x.range) +
scale_y_continuous(limits = c(0, y.max),
labels = function(x) { formatC(x / 1000, format='d', big.mark=',') }) +
scale_color_manual(values = line.cols,
breaks = c('MA'),
labels = c(MA = 'Mvg Avg (5)')) +
scale_fill_manual(values = fill.cols,
breaks = c('P', 'O', 'S'),
labels = c(O = 'Open Orders', S = 'Sales', P = 'Predictions'))
p
The chart it generates is this:
As you can see, the legend does a couple of funky things. It's close, but not quite there. I only want boxes with exterior borders for Predictions, Open Orders, and Sales, and only a blue line for the Mvg Avg (5).
Any advice would be appreciated.
Thanks!
Rather late, but if you are still interested to understand this problem, the following should work. Explanations are included as comments within the code:
library(dplyr)
preds %>%
# scale the values for ALL numeric columns in the dataset, before
# passing the dataset to ggplot()
mutate_if(is.numeric, ~./1000) %>%
# since x / y mappings are stated in the top level ggplot(), there's
# no need to repeat them in the subsequent layers UNLESS you want to
# override them
ggplot(mapping = aes(x = DATE, y = pred_sales)) +
# 1. use data = . to inherit the top level data frame, & modify it on
# the fly for this layer; this is neater as you are essentially
# using a single data source for the ggplot object.
# 2. geom_col() is a more succinct way to say geom_bar(stat = "identity")
# (I'm using tidyr rather than reshape package, since ggplot2 is a
# part of the tidyverse packages, & the two play together nicely)
geom_col(data = . %>%
select(S, O, DATE) %>%
tidyr::gather(variable, value, -DATE),
aes(y = value, fill = variable, color = variable),
width = 1, alpha = 0.5) +
# don't show legend for this layer (o/w the fill / color legend would
# include a square shape in the centre of each legend key)
geom_point(aes(fill = 'P', color = 'P'),
shape = 22, alpha = 0.5, size = 2.5, show.legend = FALSE) +
# use data = . %>% ... as above.
# since the fill / color aesthetic mappings from the geom_col layer would
# result in a border around all fill / color legends, avoid it all together
# here by hard coding the line color to "blue", & map its linetype instead
# to create a separate linetype-based legend later.
geom_line(data = . %>% na.omit(),
aes(y = ma, linetype = 'MA'),
color = "blue", alpha = 0.8, size = 1) +
# scales::comma is a more succinct alternative to formatC for this use case
geom_text(aes(label = scales::comma(pred_sales)),
angle = 90, size = 2.75, hjust = 1.25, vjust = 0.4) +
labs(title = sprintf('%s Sales Predictions - %s', 'Overall', format(FM, '%b %Y')),
x = 'Date',
y = 'Volume in MMlbs') +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1, size = 8),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
legend.title = element_blank(),
legend.position = 'bottom',
legend.text = element_text(size = 8),
legend.margin = margin(t = 0.25, unit = 'cm')) +
scale_x_date(breaks = x.ticks,
date_labels = '%b %e',
limits = x.range) +
# as above, scales::comma is more succinct
scale_y_continuous(limits = c(0, y.max / 1000),
labels = scales::comma) +
# specify the same breaks & labels for the manual fill / color scales, so that
# a single legend is created for both
scale_color_manual(values = line.cols,
breaks = c('P', 'O', 'S'),
labels = c(O = 'Open Orders', S = 'Sales', P = 'Predictions')) +
scale_fill_manual(values = fill.cols,
breaks = c('P', 'O', 'S'),
labels = c(O = 'Open Orders', S = 'Sales', P = 'Predictions')) +
# create a separate line-only legend using the linetype mapping, with
# value = 1 (i.e. unbroken line) & specified alpha / color to match the
# geom_line layer
scale_linetype_manual(values = 1,
label = 'Mvg Avg (5)',
guide = guide_legend(override.aes = list(alpha = 1,
color = "blue")))

Legend in ggplot2, remove level

My data:
df <- data.frame(sp = c(LETTERS[1:8]),
tr = c("NS", "LS", "NS", "LS", "LS", "HS", "HS", "HS"),
bv = c(14, 5, 11, 5.6, 21, 5.4, 2, 4.8),
av = c(0.0, 14, 21, 48.4, 15, 55.6, 37, 66.2))
I do the bar plot
library(reshape2)
df1 <- melt(df, id.vars = c("sp", "tr"))
ggplot(aes(sp, value, fill = variable) , data = df1) + theme_classic() +
geom_bar(aes(lty = tr), lwd = 1.2, data = df1, stat = "identity", colour = "black", width =.8) +
theme(legend.position = "bottom" ) +
scale_linetype_discrete(name = "ja")
Output
What I does not like is the legend. I'd like to have just the lines type from the second part "ja" and remove "variable" part. I'd like to have the white background in the legend boxes, not the grey one.
You can remove the variable legend by setting fill = FALSE in guides and you change the backgroundcolor with override.aes in guide_legend (also inside guides) as follows:
ggplot(df1, aes(sp, value, fill = variable)) +
geom_bar(aes(lty = tr), lwd = 1.2, stat = "identity", colour = "black", width =.8) +
scale_linetype_discrete(name = "ja") +
guides(fill = FALSE,
lty = guide_legend(override.aes = list(lty = c('dotted', 'dashed', 'solid'),
fill = "white"))) +
theme_classic() +
theme(legend.position = "bottom")
this results in the following plot:

Resources