Modify the size of each legend icon in ggplot2 - r

I am using ggplot/usmap libararies to plot highly skewed data onto a map.
Because the data is so skewed, I created uneven interval brackets. See below;
My Code:
library(dplyr)
library(tidyverse)
library(usmap)
library(ggplot2)
library(readxl)
library(rgdal)
plot_usmap(regions = "states",
# fill = 'orange',
labels = TRUE) +
geom_point(data = grant_sh,
size = 5,
aes(x = x,
y = y,
color = funding_cat)) +
theme(
legend.title = element_text(size = 16),
#change legend title font size
legend.text = element_text(size = 14),
#change legend text font size
legend.position = 'left',
plot.title = element_text(size = 22),
plot.subtitle = element_text(size = 16)
) + #+
scale_color_manual(
values = c('#D4148C', # pink muesaum
'#049CFC', #library,blue
'#1C8474',
'#7703fC'),
name = "Map Key",
labels = c(
'$1,500 - $4,000 (n = 7)',
'$4,001 - $6,000 (n = 12)',
'$6,001 - $20,000 (n = 6)',
'$20,001 - $40,000 (n = 25)'
)
) +
guides(colour = guide_legend(override.aes = list(size = 3)))
Current Output
Desired Output
I would like to adjust the legend key to reflect the size of each interval. So, for example 1500-400 would be the smallest icon, and 20,001-40,000 would be the largest.
I want to do this so that the viewer immediately knows that the intervals are not even. Any solution to achieve this outcome is greatly appreciated!
See how the sign/oval next to each interval represents the range of the interval in my example below.

One option to create this kind of legend would be to make it as a second plot and glue it to the main plot using e.g. patchwork.
Note: Especially with a map as the main plot and the export size if any, this approach requires some fiddling to position the legend, e.g. in my code below a added a helper row to the patchwork design to shift the legend upwards.
UPDATE: Update the code to include the counts in the labels. Added a second approach to make the legend using geom_col and a separate dataframe.
library(dplyr, warn = FALSE)
library(usmap)
library(ggplot2)
library(patchwork)
# Make example data
set.seed(123)
cat1 <- c(1500, 4001, 6001, 20001)
cat2 <- c(4000, 6000, 2000, 40000)
n = c(7, 12, 6, 25)
funding_cat <- paste0("$", cat1, " - $", cat2, " (n=", n, ")")
funding_cat <- factor(funding_cat, levels = rev(funding_cat))
grant_sh <- utils::read.csv(system.file("extdata", "us_states_centroids.csv", package = "usmapdata"))
grant_sh$funding_cat = sample(funding_cat, 51, replace = TRUE, prob = n / sum(n))
# Make legend plot
grant_sh_legend <- data.frame(
funding_cat = funding_cat,
n = c(7, 12, 6, 25)
)
legend <- ggplot(grant_sh, aes(y = funding_cat, fill = funding_cat)) +
geom_bar(width = .6) +
scale_y_discrete(position = "right") +
scale_fill_manual(
values = c('#D4148C',
'#049CFC',
'#1C8474',
'#7703fC')
) +
theme_void() +
theme(axis.text.y = element_text(hjust = 0),
plot.title = element_text(size = rel(1))) +
guides(fill = "none") +
labs(title = "Map Key")
map <- plot_usmap(regions = "states",
labels = TRUE) +
geom_point(data = grant_sh,
size = 5,
aes(x = x,
y = y,
color = funding_cat)) +
theme(
legend.position = 'none',
plot.title = element_text(size = 22),
plot.subtitle = element_text(size = 16)
) + #+
scale_color_manual(
values = c('#D4148C', # pink muesaum
'#049CFC', #library,blue
'#1C8474',
'#7703fC'),
name = "Map Key",
labels = c(
'$1,500 - $4,000 (n = 7)',
'$4,001 - $6,000 (n = 12)',
'$6,001 - $20,000 (n = 6)',
'$20,001 - $40,000 (n = 25)'
)
) +
guides(colour = guide_legend(override.aes = list(size = 3)))
# Glue together
design <- "
#B
AB
#B
"
legend + map + plot_layout(design = design, heights = c(5, 1, 1), widths = c(1, 10))
Using geom_bar the counts are computed from your dataset grant_sh. A second option would be to compute the counts manually or use a manually created dataframe and then use geom_col for the legend plot:
grant_sh_legend <- data.frame(
funding_cat = funding_cat,
n = c(7, 12, 6, 25)
)
legend <- ggplot(grant_sh, aes(y = funding_cat, n = n, fill = funding_cat)) +
geom_col(width = .6) +
scale_y_discrete(position = "right") +
scale_fill_manual(
values = c('#D4148C',
'#049CFC',
'#1C8474',
'#7703fC')
) +
theme_void() +
theme(axis.text.y = element_text(hjust = 0),
plot.title = element_text(size = rel(1))) +
guides(fill = "none") +
labs(title = "Map Key")

Related

R ggplot vs barplot 2

I am trying to replicate the stacked bar chart built via ggplot() into a chart that uses barplot(). My code is below. My problem is that barplot() produces not a stacked chart. What do I do in a wrong way here? Thanks!
Data is accessible at the following link.
# link to the data: https://drive.google.com/file/d/16FQ7APc0r1IYS_geMdeBRoMiUMaF01t3/view?usp=sharing
df <- read.csv("US Sectoral Balances 3.csv")
# ggplot()
df %>%
# Plotting the data via ggplot2()
ggplot(aes(x = TimePeriod, y = DataValue_per_GDP, color = Sectors, fill = Sectors)) +
geom_col(aes(fill = Sectors), position = "stack", width = 1) +
scale_y_continuous(labels = percent,
sec.axis = sec_axis(~., name = "", labels = percent)) +
scale_x_discrete(name=NULL, breaks = brks) +
scale_fill_manual(values=c("#FF3399", "#3399FF", "#66CC99","#0022CC","#11AA00")) +
scale_color_manual(values=c("#FF3399", "#3399FF", "#66CC99","#0022CC","#11AA00")) +
guides(fill = guide_legend(title.theme =
element_text(size = 9, face = "bold",
colour = "black", angle = 0))) +
labs(x ="", y = "(% of GDP)",
subtitle = "Three-sector breakdown",
title = paste0("U.S. Sectoral Balances",
" through ", year(max(df$date))," ",quarters(max(df$date))),
caption = paste0("\nNote: data is from the BEA's Table 5.1. Saving and Investment by Sector.",
"\n\nSource: BEA.")) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 10),
plot.caption = element_text(hjust = 0, size = 8))
# barplot()
l <- min(
length(df[substr(df$Sectors,1,1)=="A",]$DataValue_per_GDP),
length(df[substr(df$Sectors,1,1)=="B",]$DataValue_per_GDP),
length(df[substr(df$Sectors,1,1)=="C",]$DataValue_per_GDP)
)
bar_data <- matrix(data = rbind(df[substr(df$Sectors,1,1)=="A",]$DataValue_per_GDP*100,
df[substr(df$Sectors,1,1)=="B",]$DataValue_per_GDP*100,
df[substr(df$Sectors,1,1)=="C",]$DataValue_per_GDP*100),
nrow = 3, ncol = l)
colnames(bar_data) <- df[substr(df$Sectors,1,1)=="A",]$TimePeriod[1:l]
rownames(bar_data) <- c("A. General government","B. Private","C. External")
barplot(bar_data, col=c("red","blue","green"), beside = FALSE)
barplot(bar_data, col=c("red","blue","green"), beside = TRUE)

ggplot: how to assign both color and shape for one factor, and also shape for another factor?

I must code factor age with both color and shape. I know how to do that (see the plot and data/code below).
In addition, I also have to code factor day with shape.
Is it somehow possible to assign specified different shapes to two different factors?
Below is the legend I would love to achieve (I made an example in power point).
The plot is not quite right, as only factor age is coded with color and shape.
df = data.frame(test = c(1,2,3, 1,2,3, 1,2,3, 1,2,3, 1,2,3, 1,2,3),
age = c(1,1,1, 2,2,2, 3,3,3, 1,1,1, 2,2,2, 3,3,3),
day = c(1,1,1,1,1,1,1,1,1, 2,2,2,2,2,2,2,2,2),
result = c(1,2,2,1,1,2,2,1,0, 2,2,0,1,2,1,2,1,0))
df$test <- factor((df$test))
df$age <- factor((df$age))
df$day <- factor((df$day))
windows(width=4, height=3 )
df %>%
ggplot( aes(x=test, y=result)) +
geom_point(aes(color=age, shape=age, group=age),
position=position_jitterdodge(dodge.width=0.8,jitter.height=0.2, jitter.width=0),
size=2, stroke=0.8) +
scale_shape_manual(values=c(16, 15, 17), name="", labels=c("young","older","the oldest")) +
scale_color_manual(name="", labels=c("young","older","the oldest"), values=c('#009E73','#56B4E9','#D55E00')) +
theme_bw()+
theme(panel.border = element_blank(), axis.ticks = element_blank(),
legend.position=c(), legend.text=element_text(size=10, face="bold"), legend.title=element_text(size=10),
panel.grid.major.x = element_blank() ,
panel.grid.major.y = element_blank() ,
plot.title = element_text(size=10, face = "bold"), axis.title=element_text(size=11),
axis.text.y = element_text(size=9, angle = 45),
axis.text.x = element_text(size=9, angle = 90),
plot.margin = unit(c(0.5,0.2,0,0), "cm")) +
labs(title= "", x = "",y = "Test result") +
scale_y_continuous(breaks=c(0,1,2), labels=c('good','better','the best')) +
geom_vline(xintercept=c(0.5,1.5,2.5),color="grey90")+
geom_hline(yintercept=-0.5, color="grey90")+
expand_limits(x=3.9, y=c(0,2.35)) +
scale_x_discrete(limits = c("1", "2", "3"),labels = c("test a", "test b", "test c")) +
coord_cartesian(clip = "off")
You can use shapes on an interaction between age and day, and use color only one age. Then remove the color legend and color the shape legend manually with override.aes.
This comes close to what you want - labels can be changes, I've defined them when creating the factors.
how to make fancy legends
However, you want a quite fancy legend, so the easiest would be to build the legend yourself as a separate plot and combine to the main panel. ("Fake legend"). This requires some semi-hardcoding, but you're not shy to do this anyways given the manual definition of your shapes. See Part Two how to do this.
Part one
library(ggplot2)
df = data.frame(test = c(1,2,3, 1,2,3, 1,2,3, 1,2,3, 1,2,3, 1,2,3),
age = c(1,1,1, 2,2,2, 3,3,3, 1,1,1, 2,2,2, 3,3,3),
day = c(1,1,1,1,1,1,1,1,1, 2,2,2,2,2,2,2,2,2),
result = c(1,2,2,1,1,2,2,1,0, 2,2,0,1,2,1,2,1,0))
df$test <- factor(df$test)
## note I'm changing this here already!! If you udnergo the effor tof changing to
## factor, define levels and labels here
df$age <- factor(df$age, labels = c("young", "older", "the oldest"))
df$day <- factor(df$day, labels = paste("Day", 1:2))
ggplot(df, aes(x=test, y=result)) +
geom_jitter(aes(color=age, shape=interaction(day, age)),
width = .1, height = .1) +
## you won't get around manually defining the shapes
scale_shape_manual(values = c(0, 15, 1, 16, 2, 17)) +
scale_color_manual(values = c('#009E73','#56B4E9','#D55E00')) +
guides(color = "none",
shape = guide_legend(
override.aes = list(color = rep(c('#009E73','#56B4E9','#D55E00'), each = 2)),
ncol = 3))
Part two - the fake legend
library(ggplot2)
library(dplyr)
library(patchwork)
## df and factor creation as above !!!
p_panel <-
ggplot(df, aes(x=test, y=result)) +
geom_jitter(aes(color=age, shape=interaction(day, age)),
width = .1, height = .1) +
## you won't get around manually defining the shapes
scale_shape_manual(values = c(0, 15, 1, 16, 2, 17)) +
scale_color_manual(values = c('#009E73','#56B4E9','#D55E00')) +
## for this solution, I'm removing the legend entirely
theme(legend.position = "none")
## make the data frame for the fake legend
## the y coordinates should be defined relative to the y values in your panel
y_coord <- c(.9, 1.1)
df_legend <- df %>% distinct(day, age) %>%
mutate(x = rep(1:3,2), y = rep(y_coord,each = 3))
## The legend plot is basically the same as the main plot, but without legend -
## because it IS the legend ... ;)
lab_size = 10*5/14
p_leg <-
ggplot(df_legend, aes(x=x, y=y)) +
geom_point(aes(color=age, shape=interaction(day, age))) +
## I'm annotating in separate layers because it keeps it clearer (for me)
annotate(geom = "text", x = unique(df_legend$x), y = max(y_coord)+.1,
size = lab_size, angle = 45, hjust = 0,
label = c("young", "older", "the oldest")) +
annotate(geom = "text", x = max(df_legend$x)+.2, y = y_coord,
label = paste("Day", 1:2), size = lab_size, hjust = 0) +
scale_shape_manual(values = c(0, 15, 1, 16, 2, 17)) +
scale_color_manual(values = c('#009E73','#56B4E9','#D55E00')) +
theme_void() +
theme(legend.position = "none",
plot.margin = margin(r = .3,unit = "in")) +
## you need to turn clipping off and define the same y limits as your panel
coord_cartesian(clip = "off", ylim = range(df$result))
## now combine them
p_panel + p_leg +
plot_layout(widths = c(1,.2))
So again following: Filled and hollow shapes where the fill color = the line color the following code provides the goods without giving you the legend.
df %>%
ggplot( aes(x=test, y=result)) +
geom_point(aes(color=age,
shape=age,
group=age,
fill=factor(ifelse(day==1, NA, age))), # STEP 1
position=position_jitterdodge(dodge.width=0.8,jitter.height=0.2, jitter.width=0),
size=2, stroke=0.8) +
scale_shape_manual(values=c(22,21,24), name="", labels=c("young","older","the oldest")) +
scale_color_manual(name="", labels=c("young","older","the oldest"), values=c('#009E73','#56B4E9','#D55E00')) +
scale_fill_manual(name="",
labels=c("young","older","the oldest"),
values=c('#009E73','#56B4E9','#D55E00'),
na.value=NA, guide="none") # STEP 2
I was misleading in my comment, rather than "hallow" shapes, we want shapes 21 through 26. These apparently accept distinct fill and color.

Produce an inset in each facet of an R ggplot while preserving colours of the original facet content

I would like to produce a graphic combining four facets of a graph with insets in each facet showing a detail of the respective plot. This is one of the things I tried:
#create data frame
n_replicates <- c(rep(1:10,15),rep(seq(10,100,10),15),rep(seq(100,1000,100),15),rep(seq(1000,10000,1000),15))
sim_years <- rep(sort(rep((1:15),10)),4)
sd_data <- rep (NA,600)
for (i in 1:600) {
sd_data[i]<-rnorm(1,mean=exp(0.1 * sim_years[i]), sd= 1/n_replicates[i])
}
max_rep <- sort(rep(c(10,100,1000,10000),150))
data_frame <- cbind.data.frame(n_replicates,sim_years,sd_data,max_rep)
#do first basic plot
library(ggplot2)
plot1<-ggplot(data=data_frame, aes(x=sim_years,y=sd_data,group =n_replicates, col=n_replicates)) +
geom_line() + theme_bw() +
labs(title ="", x = "year", y = "sd")
plot1
#make four facets
my_breaks = c(2, 10, 100, 1000, 10000)
facet_names <- c(
`10` = "2, 3, ..., 10 replicates",
`100` = "10, 20, ..., 100 replicates",
`1000` = "100, 200, ..., 1000 replicates",
`10000` = "1000, 2000, ..., 10000 replicates"
)
plot2 <- plot1 +
facet_wrap( ~ max_rep, ncol=2, labeller = as_labeller(facet_names)) +
scale_colour_gradientn(name = "number of replicates", trans = "log",
breaks = my_breaks, labels = my_breaks, colours = rainbow(20))
plot2
#extract inlays (this is where it goes wrong I think)
library(ggpmisc)
library(tibble)
library(dplyr)
inset <- tibble(x = 0.01, y = 10.01,
plot = list(plot2 +
facet_wrap( ~ max_rep, ncol=2, labeller = as_labeller(facet_names)) +
coord_cartesian(xlim = c(13, 15),
ylim = c(3, 5)) +
labs(x = NULL, y = NULL, color = NULL) +
scale_colour_gradient(guide = FALSE) +
theme_bw(10)))
plot3 <- plot2 +
expand_limits(x = 0, y = 0) +
geom_plot_npc(data = inset, aes(npcx = x, npcy = y, label = plot)) +
annotate(geom = "rect",
xmin = 13, xmax = 15, ymin = 3, ymax = 5,
linetype = "dotted", fill = NA, colour = "black")
plot3
That leads to the following graphic:
As you can see, the colours in the insets are wrong, and all four of them appear in each of the facets even though I only want the corresponding inset of course. I read through a lot of questions here (to even get me this far) and also some examples in the ggpmisc user guide but unfortunately I am still a bit lost on how to achieve what I want. Except maybe to do it by hand extracting four insets and then combining them with plot2. But I hope there will be a better way to do this. Thank you for your help!
Edit: better graphic now thanks to this answer, but problem remains partially unsolved:
The following code does good insets, but unfortunately the colours are not preserved. As in the above version each inset does its own rainbow colours anew instead of inheriting the partial rainbow scale from the facet it belongs to. Does anyone know why and how I could change this? In comments I put another (bad) attempt at solving this, it preserves the colors but has the problem of putting all four insets in each facet.
library(ggpmisc)
library(tibble)
library(dplyr)
# #extract inlays: good colours, but produces four insets.
# fourinsets <- tibble(#x = 0.01, y = 10.01,
# x = c(rep(0.01, 4)),
# y = c(rep(10.01, 4)),
# plot = list(plot2 +
# facet_wrap( ~ max_rep, ncol=2) +
# coord_cartesian(xlim = c(13, 15),
# ylim = c(3, 5)) +
# labs(x = NULL, y = NULL, color = NULL) +
# scale_colour_gradientn(name = "number of replicates", trans = "log", guide = FALSE,
# colours = rainbow(20)) +
# theme(
# strip.background = element_blank(),
# strip.text.x = element_blank()
# )
# ))
# fourinsets$plot
library(purrr)
pp <- map(unique(data_frame$max_rep), function(x) {
plot2$data <- plot2$data %>% filter(max_rep == x)
plot2 +
coord_cartesian(xlim = c(12, 14),
ylim = c(3, 4)) +
labs(x = NULL, y = NULL) +
theme(
strip.background = element_blank(),
strip.text.x = element_blank(),
legend.position = "none",
axis.text=element_blank(),
axis.ticks=element_blank()
)
})
#pp[[2]]
inset_new <- tibble(x = c(rep(0.01, 4)),
y = c(rep(10.01, 4)),
plot = pp,
max_rep = unique(data_frame$max_rep))
final_plot <- plot2 +
geom_plot_npc(data = inset_new, aes(npcx = x, npcy = y, label = plot, vp.width = 0.3, vp.height =0.6)) +
annotate(geom = "rect",
xmin = 12, xmax = 14, ymin = 3, ymax = 4,
linetype = "dotted", fill = NA, colour = "black")
#final_plot
final_plot then looks like this:
I hope this clarifies the problem a bit. Any ideas are very welcome :)
Modifying off #user63230's excellent answer:
pp <- map(unique(data_frame$max_rep), function(x) {
plot2 +
aes(alpha = ifelse(max_rep == x, 1, 0)) +
coord_cartesian(xlim = c(12, 14),
ylim = c(3, 4)) +
labs(x = NULL, y = NULL) +
scale_alpha_identity() +
facet_null() +
theme(
strip.background = element_blank(),
strip.text.x = element_blank(),
legend.position = "none",
axis.text=element_blank(),
axis.ticks=element_blank()
)
})
Explanation:
Instead of filtering the data passed into plot2 (which affects the mapping of colours), we impose a new aesthetic alpha, where lines belonging to the other replicate numbers are assigned 0 for transparency;
Use scale_alpha_identity() to tell ggplot that the alpha mapping is to be used as-is: i.e. 1 for 100%, 0 for 0%.
Add facet_null() to override plot2's existing facet_wrap, which removes the facet for the inset.
Everything else is unchanged from the code in the question.
I think this will get you started although its tricky to get the size of the inset plot right (when you include a legend).
#set up data
library(ggpmisc)
library(tibble)
library(dplyr)
library(ggplot2)
# create data frame
n_replicates <- c(rep(1:10, 15), rep(seq(10, 100, 10), 15), rep(seq(100,
1000, 100), 15), rep(seq(1000, 10000, 1000), 15))
sim_years <- rep(sort(rep((1:15), 10)), 4)
sd_data <- rep(NA, 600)
for (i in 1:600) {
sd_data[i] <- rnorm(1, mean = exp(0.1 * sim_years[i]), sd = 1/n_replicates[i])
}
max_rep <- sort(rep(c(10, 100, 1000, 10000), 150))
data_frame <- cbind.data.frame(n_replicates, sim_years, sd_data, max_rep)
# make four facets
my_breaks = c(2, 10, 100, 1000, 10000)
facet_names <- c(`10` = "2, 3, ..., 10 replicates", `100` = "10, 20, ..., 100 replicates",
`1000` = "100, 200, ..., 1000 replicates", `10000` = "1000, 2000, ..., 10000 replicates")
Get overall plot:
# overall facet plot
overall_plot <- ggplot(data = data_frame, aes(x = sim_years, y = sd_data, group = n_replicates, col = n_replicates)) +
geom_line() +
theme_bw() +
labs(title = "", x = "year", y = "sd") +
facet_wrap(~max_rep, ncol = 2, labeller = as_labeller(facet_names)) +
scale_colour_gradientn(name = "number of replicates", trans = "log", breaks = my_breaks, labels = my_breaks, colours = rainbow(20))
#plot
overall_plot
which gives:
Then from the overall plot you want to extract each plot, see here. We can map over the list to extract one at a time:
pp <- map(unique(data_frame$max_rep), function(x) {
overall_plot$data <- overall_plot$data %>% filter(max_rep == x)
overall_plot + # coord_cartesian(xlim = c(13, 15), ylim = c(3, 5)) +
labs(x = NULL, y = NULL) +
theme_bw(10) +
theme(legend.position = "none")
})
If we look at one of these (I've removed the legend) e.g.
pp[[1]]
#pp[[2]]
#pp[[3]]
#pp[[4]]
Gives:
Then we want to add these inset plots into a dataframe so that each plot has its own row:
inset <- tibble(x = c(rep(0.01, 4)),
y = c(rep(10.01, 4)),
plot = pp,
max_rep = unique(data_frame$max_rep))
Then merge this into the overall plot:
overall_plot +
expand_limits(x = 0, y = 0) +
geom_plot_npc(data = inset, aes(npcx = x, npcy = y, label = plot, vp.width = 0.8, vp.height = 0.8))
Gives:
Here is a solution based on Z. Lin's answer, but using ggforce::facet_wrap_paginate() to do the filtering and keeping colourscales consistent.
First, we can make the 'root' plot containing all the data with no facetting.
library(ggpmisc)
library(tibble)
library(dplyr)
n_replicates <- c(rep(1:10,15),rep(seq(10,100,10),15),rep(seq(100,1000,100),15),rep(seq(1000,10000,1000),15))
sim_years <- rep(sort(rep((1:15),10)),4)
sd_data <- rep (NA,600)
for (i in 1:600) {
sd_data[i]<-rnorm(1,mean=exp(0.1 * sim_years[i]), sd= 1/n_replicates[i])
}
max_rep <- sort(rep(c(10,100,1000,10000),150))
data_frame <- cbind.data.frame(n_replicates,sim_years,sd_data,max_rep)
my_breaks = c(2, 10, 100, 1000, 10000)
facet_names <- c(
`10` = "2, 3, ..., 10 replicates",
`100` = "10, 20, ..., 100 replicates",
`1000` = "100, 200, ..., 1000 replicates",
`10000` = "1000, 2000, ..., 10000 replicates"
)
base <- ggplot(data=data_frame,
aes(x=sim_years,y=sd_data,group =n_replicates, col=n_replicates)) +
geom_line() +
theme_bw() +
scale_colour_gradientn(
name = "number of replicates",
trans = "log10", breaks = my_breaks,
labels = my_breaks, colours = rainbow(20)
) +
labs(title ="", x = "year", y = "sd")
Next, the main plot will be just the root plot with facet_wrap().
main <- base + facet_wrap(~ max_rep, ncol = 2, labeller = as_labeller(facet_names))
Then the new part is to use facet_wrap_paginate with nrow = 1 and ncol = 1 for every max_rep, which we'll use as insets. The nice thing is that this does the filtering and it keeps colour scales consistent with the root plot.
nmax_rep <- length(unique(data_frame$max_rep))
insets <- lapply(seq_len(nmax_rep), function(i) {
base + ggforce::facet_wrap_paginate(~ max_rep, nrow = 1, ncol = 1, page = i) +
coord_cartesian(xlim = c(12, 14), ylim = c(3, 4)) +
guides(colour = "none", x = "none", y = "none") +
theme(strip.background = element_blank(),
strip.text = element_blank(),
axis.title = element_blank(),
plot.background = element_blank())
})
insets <- tibble(x = rep(0.01, nmax_rep),
y = rep(10.01, nmax_rep),
plot = insets,
max_rep = unique(data_frame$max_rep))
main +
geom_plot_npc(data = insets,
aes(npcx = x, npcy = y, label = plot,
vp.width = 0.3, vp.height = 0.6)) +
annotate(geom = "rect",
xmin = 12, xmax = 14, ymin = 3, ymax = 4,
linetype = "dotted", fill = NA, colour = "black")
Created on 2020-12-15 by the reprex package (v0.3.0)

Letters appearing outside plots when using multiple facet plotting with ggplot and ggplotly

I want to create a facet plot using both ggplot and plotly (ggplotly to be precise). Almost everything works fine. The following code :
library(dplyr)
library(plotly)
library(ggplot2)
Year <- c(2000:2008)
Name <- c('A', 'B')
Size <- rep(c('Small', 'Medium', 'Large'), each=6)
City <- c('NY', 'PARIS', 'BERLIN')
Frequency <- sample(x = c(100:1000), size = 144)
Rel_Freq <- sample(x = c(1:100), size = 144, replace = TRUE)
StackData <- data.frame(Year, Name, Size, City, Frequency, Rel_Freq)
StackData$Size <- factor(StackData$Size, levels = c("Small", "Medium", "Large"))
ggplotly(ggplot(StackData, aes(x= Year, y= Frequency, shape = Name, col = Name)) +
geom_point(size = 3)+
scale_shape_manual(values= c(17, 6))+
scale_color_manual(values = c("#37D9E1", "#3D3D3F")) +
facet_grid(City ~ Size, scales="free_y")+
theme_bw()+
theme(legend.position = "bottom",
panel.background = element_rect(fill = "transparent"),
axis.text.x = element_text(angle = 30, hjust = 1),
strip.text.x = element_text( size = 12, face = "bold" ),
strip.text.y = element_text( size = 12, face = "bold" ))+
scale_fill_manual(values = c("#D3D3D3", "#A9A9A9", "#696969"), guide=FALSE)+
scale_y_continuous(trans = "log10",
labels = scales::unit_format(
unit = "",
scale = 1))+
labs(y= "",
x= ""),
tooltip = c("x","y","colour"),
autosize = T, width = 680, height = 530) %>%
layout(showlegend = FALSE,
margin = list(l = 0, r = 25, t = 50, b = 130),
annotations = list(x = .5, y = -0.25, #position of text adjust as needed
text = "Super cool Plot",
showarrow = F,
xref='paper',
yref='paper',
xanchor='auto',
yanchor='bottom',
xshift=0,
yshift=0,
font=list(size=9, color="black")))
Results in this
Like shown in the image, there is a letter showing up in the upper right corner. After some changes, I realized it is the first letter of the variable to which I redirect the color and the shapes in ggplot (in this case 'name').
How can I do get the same plot without this letter appearing there? And perhaps more interesting, why is this occuring?
Thanks in advance,
That weird "N" is coming from the legend portion of your theme in ggplot:
theme(legend.position = "bottom")
In fact, this is quite a thorny problem. ggplotly actually does not transfer everything form ggplot correctly. There is a github issue on this topic, but I beleive that the problem persists.
See:
(legend.position always 'right' in ggplotly, except when legend.position = 'none'
) https://github.com/ropensci/plotly/issues/1049
In your case, the legend.position = "bottom" argument is being ignored by ggplotly.
Option 1:
It looks like you may not actually want the legend in the chart. In that case, you might be better off synchronizing the legend calls across ggplot and ggplotly:
# ggplot portion
theme(legend.position = "none")
# plotly portion:
layout(showlegend = FALSE)
Option 2:
Format the legend only in plotly. From the github issue link above, this was one of the suggested ideas:
ggplotly(
ggplot(df, aes(year, freq, color = clas)) +
geom_line() +
theme(legend.position = 'top')
) %>%
layout(legend = list(
orientation = "h"
)
)
I modified your code using option 1 and came up with the below. The weird "N" is now gone!
library(dplyr)
library(plotly)
library(ggplot2)
Year <- c(2000:2008)
Name <- c('A', 'B')
Size <- rep(c('Small', 'Medium', 'Large'), each=6)
City <- c('NY', 'PARIS', 'BERLIN')
Frequency <- sample(x = c(100:1000), size = 144)
Rel_Freq <- sample(x = c(1:100), size = 144, replace = TRUE)
StackData <- data.frame(Year, Name, Size, City, Frequency, Rel_Freq)
StackData$Size <- factor(StackData$Size, levels = c("Small", "Medium", "Large"))
StackData
ggplotly(ggplot(StackData, aes(x= Year, y= Frequency, shape = Name, col = Name)) +
geom_point(size = 3)+
scale_shape_manual(values= c(17, 6))+
scale_color_manual(values = c("#37D9E1", "#3D3D3F")) +
facet_grid(City ~ Size, scales="free_y")+
theme_bw()+
theme(legend.position = "none", ## this is the only change to your code
panel.background = element_rect(fill = "transparent"),
axis.text.x = element_text(angle = 30, hjust = 1),
strip.text.x = element_text( size = 12, face = "bold" ),
strip.text.y = element_text( size = 12, face = "bold" ))+
scale_fill_manual(values = c("#D3D3D3", "#A9A9A9", "#696969"), guide=FALSE)+
scale_y_continuous(trans = "log10",
labels = scales::unit_format(
unit = "",
scale = 1))+
labs(y= "",
x= ""),
tooltip = c("x","y","colour"),
autosize = T, width = 680, height = 530) %>%
layout(showlegend = FALSE,
margin = list(l = 0, r = 25, t = 50, b = 130),
annotations = list(x = .5, y = -0.25, #position of text adjust as needed
text = "Super cool Plot",
showarrow = F,
xref='paper',
yref='paper',
xanchor='auto',
yanchor='bottom',
xshift=0,
yshift=0,
font=list(size=9, color="black")))

Complex Chart in R/ggplot with Proper Legend Display

This is my first question to StackExchange, and I've searched for answers that have been helpful, but haven't really gotten me to where I'd like to be.
This is a stacked bar chart, combined with a point chart, combined with a line.
Here's my code:
theme_set(theme_light())
library(lubridate)
FM <- as.Date('2018-02-01')
x.range <- c(FM - months(1) - days(1) - days(day(FM) - 1), FM - days(day(FM) - 1) + months(1))
x.ticks <- seq(x.range[1] + days(1), x.range[2], by = 2)
#populate example data
preds <- data.frame(FM = FM, DATE = seq(x.range[1] + days(1), x.range[2] - days(1), by = 1))
preds <- data.frame(preds, S_O = round(seq(1, 1000000, by = 1000000/nrow(preds))))
preds <- data.frame(preds, S = round(ifelse(month(preds$FM) == month(preds$DATE), day(preds$DATE) / 30.4, 0) * preds$S_O))
preds <- data.frame(preds, O = preds$S_O - preds$S)
preds <- data.frame(preds, pred_sales = round(1000000 + rnorm(nrow(preds), 0, 10000)))
preds$ma <- with(preds, stats::filter(pred_sales, rep(1/5, 5), sides = 1))
y.max <- ceiling(max(preds$pred_sales) / 5000) * 5000 + 15000
line.cols <- c(O = 'palegreen4', S = 'steelblue4',
P = 'maroon', MA = 'blue')
fill.cols <- c(O = 'palegreen3', S = 'steelblue3',
P = 'red')
p <- ggplot(data = preds,
mapping = aes(DATE, pred_sales))
p <- p +
geom_bar(data = reshape2::melt(preds[,c('DATE', 'S', 'O')], id.var = 'DATE'),
mapping = aes(DATE, value, group = 1, fill = variable, color = variable),
width = 1,
stat = 'identity',
alpha = 0.5) +
geom_point(mapping = aes(DATE, pred_sales, group = 2, fill = 'P', color = 'P'),
shape = 22, #square
alpha = 0.5,
size = 2.5) +
geom_line(data = preds[!is.na(preds$ma),],
mapping = aes(DATE, ma, group = 3, color = 'MA'),
alpha = 0.8,
size = 1) +
geom_text(mapping = aes(DATE, pred_sales, label = formatC(pred_sales / 1000, format = 'd', big.mark = ',')),
angle = 90,
size = 2.75,
hjust = 1.25,
vjust = 0.4) +
labs(title = sprintf('%s Sales Predictions - %s', 'Overall', format(FM, '%b %Y')),
x = 'Date',
y = 'Volume in MMlbs') +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1, size = 8),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
legend.title = element_blank(),
legend.position = 'bottom',
legend.text = element_text(size = 8),
legend.margin = margin(t = 0.25, unit = 'cm')) +
scale_x_date(breaks = x.ticks,
date_labels = '%b %e',
limits = x.range) +
scale_y_continuous(limits = c(0, y.max),
labels = function(x) { formatC(x / 1000, format='d', big.mark=',') }) +
scale_color_manual(values = line.cols,
breaks = c('MA'),
labels = c(MA = 'Mvg Avg (5)')) +
scale_fill_manual(values = fill.cols,
breaks = c('P', 'O', 'S'),
labels = c(O = 'Open Orders', S = 'Sales', P = 'Predictions'))
p
The chart it generates is this:
As you can see, the legend does a couple of funky things. It's close, but not quite there. I only want boxes with exterior borders for Predictions, Open Orders, and Sales, and only a blue line for the Mvg Avg (5).
Any advice would be appreciated.
Thanks!
Rather late, but if you are still interested to understand this problem, the following should work. Explanations are included as comments within the code:
library(dplyr)
preds %>%
# scale the values for ALL numeric columns in the dataset, before
# passing the dataset to ggplot()
mutate_if(is.numeric, ~./1000) %>%
# since x / y mappings are stated in the top level ggplot(), there's
# no need to repeat them in the subsequent layers UNLESS you want to
# override them
ggplot(mapping = aes(x = DATE, y = pred_sales)) +
# 1. use data = . to inherit the top level data frame, & modify it on
# the fly for this layer; this is neater as you are essentially
# using a single data source for the ggplot object.
# 2. geom_col() is a more succinct way to say geom_bar(stat = "identity")
# (I'm using tidyr rather than reshape package, since ggplot2 is a
# part of the tidyverse packages, & the two play together nicely)
geom_col(data = . %>%
select(S, O, DATE) %>%
tidyr::gather(variable, value, -DATE),
aes(y = value, fill = variable, color = variable),
width = 1, alpha = 0.5) +
# don't show legend for this layer (o/w the fill / color legend would
# include a square shape in the centre of each legend key)
geom_point(aes(fill = 'P', color = 'P'),
shape = 22, alpha = 0.5, size = 2.5, show.legend = FALSE) +
# use data = . %>% ... as above.
# since the fill / color aesthetic mappings from the geom_col layer would
# result in a border around all fill / color legends, avoid it all together
# here by hard coding the line color to "blue", & map its linetype instead
# to create a separate linetype-based legend later.
geom_line(data = . %>% na.omit(),
aes(y = ma, linetype = 'MA'),
color = "blue", alpha = 0.8, size = 1) +
# scales::comma is a more succinct alternative to formatC for this use case
geom_text(aes(label = scales::comma(pred_sales)),
angle = 90, size = 2.75, hjust = 1.25, vjust = 0.4) +
labs(title = sprintf('%s Sales Predictions - %s', 'Overall', format(FM, '%b %Y')),
x = 'Date',
y = 'Volume in MMlbs') +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1, size = 8),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
legend.title = element_blank(),
legend.position = 'bottom',
legend.text = element_text(size = 8),
legend.margin = margin(t = 0.25, unit = 'cm')) +
scale_x_date(breaks = x.ticks,
date_labels = '%b %e',
limits = x.range) +
# as above, scales::comma is more succinct
scale_y_continuous(limits = c(0, y.max / 1000),
labels = scales::comma) +
# specify the same breaks & labels for the manual fill / color scales, so that
# a single legend is created for both
scale_color_manual(values = line.cols,
breaks = c('P', 'O', 'S'),
labels = c(O = 'Open Orders', S = 'Sales', P = 'Predictions')) +
scale_fill_manual(values = fill.cols,
breaks = c('P', 'O', 'S'),
labels = c(O = 'Open Orders', S = 'Sales', P = 'Predictions')) +
# create a separate line-only legend using the linetype mapping, with
# value = 1 (i.e. unbroken line) & specified alpha / color to match the
# geom_line layer
scale_linetype_manual(values = 1,
label = 'Mvg Avg (5)',
guide = guide_legend(override.aes = list(alpha = 1,
color = "blue")))

Resources