label end of lines outside of plot area - r

I am trying to replicate this figure from the Financial Times.
Here is a gist with the data. I'm struggling to label the end of the lines because I run out of room in the plot. I found a few ways to expand the limits of the plot area, but this is not ideal because the gridlines extend as well.
library(tidyverse)
library(ggrepel)
covid %>%
ggplot(aes(x = date, y = deaths_roll7_100k, color = Province_State)) +
geom_line() +
scale_y_continuous(breaks = seq(0, 2.4, .2)) +
scale_x_date(breaks = seq.Date(from=as.Date('2020-09-01'),
to=as.Date('2021-07-12'),
by="month"),
labels = function(x) if_else(month(x) == 9 | month(x) == 1,
paste(month(x, label = TRUE),
"\n", year(x)),
paste(month(x, label = TRUE))),
limits = as.Date(c("2020-09-01", "2021-11-01"))) +
geom_text_repel(aes(label = label),
segment.alpha = 0,
hjust = 0,
direction="y",
na.rm = TRUE,
xlim = as.Date(c("2021-08-01", "2021-11-01")))

An alternative to ggrepel is to use geom_text and turn "clipping" off (similar to this question/answer), e.g.
covid %>%
ggplot(aes(x = date, y = deaths_roll7_100k, color = Province_State)) +
geom_line() +
scale_y_continuous(breaks = seq(0, 2.4, .2)) +
scale_x_date(breaks = seq.Date(from=as.Date('2020-09-01'),
to=as.Date('2021-07-12'),
by="month"),
date_labels = "%b\n%Y",
limits = as.Date(c("2020-09-01", "2021-07-01"))) +
geom_text(data = . %>% filter(date == max(date)),
aes(color = Province_State, x = as.Date(Inf),
y = deaths_roll7_100k),
hjust = 0, size = 4, vjust = 0.7,
label = c("Arizona\n", "North Carolina")) +
coord_cartesian(expand = FALSE, clip = "off")
--
With some more tweaks and the Financial-Times/ftplottools R theme you can get the plot looking pretty similar to the Financial Times figure, e.g.
library(tidyverse)
#remotes::install_github("Financial-Times/ftplottools")
library(ftplottools)
library(extrafont)
#font_import()
#fonts()
covid %>%
ggplot() +
geom_line(aes(x = date, y = deaths_roll7_100k,
group = Province_State, color = Province_State)) +
geom_text(data = . %>% filter(date == max(date)),
aes(color = Province_State, x = as.Date(Inf),
y = deaths_roll7_100k),
hjust = 0, size = 4, vjust = 0.7,
label = c("Arizona\n", "North Carolina")) +
coord_cartesian(expand = FALSE, clip = "off") +
ft_theme(base_family = "Arimo for Powerline") +
theme(plot.margin = unit(c(1,6,1,1), "lines"),
legend.position = "none",
plot.background = element_rect(fill = "#FFF1E6"),
axis.title = element_blank(),
panel.grid.major.x = element_line(colour = "gray75"),
plot.caption = element_text(size = 8, color = "gray50")) +
scale_color_manual(values = c("#E85D8C", "#0D5696")) +
scale_x_date(breaks = seq.Date(from = as.Date('2020-09-01'),
to = as.Date('2021-07-01'),
by = "1 month"),
limits = as.Date(c("2020-09-01", "2021-07-01")),
date_labels = "%b\n%Y") +
scale_y_continuous(breaks = seq(from = 0, to = 2.4, by = 0.2)) +
labs(title = "New deaths attributed to Covid-19 in North Carolina and Arizona",
subtitle = "Seven-day rolling average of new deaths (per 100k)\n",
caption = "Source: Analysis of data from John Hopkins SSE\nUpdated: 12th July 2021 | CCBY4.0")

Related

Cannot create a BarChartRace with ggplot

I have a dataframe that has daily data about Covid19 (such as: total_cases,total_deaths) in European countries (there are 49 countries in total). You can see a preview here and you can have the whole dataframe here. I want to create a Bar Chart Race for the variable total_cases for all the European countries with ggplot. So, I followed the steps from this link or (this video) and I wrote the below code:
library(ggplot2)
g1 = ggplot(data = data.europe,
aes(x = as.Date(date),y = total_cases,group = location,
color = location)) + geom_line(size = 0.5) +
labs(y = "Total Cases", x = "Date") +
theme(legend.position = "bottom",legend.box = "vertical",
legend.title = element_blank(),
legend.text = element_text(size = 10))
Then I wrote the below code in order to create the dynamic plot
g1_star = ggplot(data = data.europe,
aes(x = as.Date(date),y = total_cases,group = location,
color = location)) + geom_line(aes(group = as.Date(date)),linetype=1) +
labs(y= "Total Cases", x = "Date") +
theme(legend.position = "bottom",legend.box = "vertical",
legend.title = element_blank(),
legend.text = element_text(size = 10)) +
transition_reveal(as.Date(date))
#We wil create the an animation
library(gifski)
library(gganimate)
animate(g1_star,height= 538,width = 866)
data_star = data.europe %>% group_by(as.Date(date))
However when I wrote these lines:
g1_star_anim = ggplot(data_star,aes(x = as.Date(date),
y = total_cases,
group = location,
fill = location,
color = location)) +
geom_tile(aes(height = total_cases,width = 0.9), alpha = 0.8,color = NA) +
geom_text(aes(y = 0, label = paste(location, " ")), vjust = 0.2, hjust = 1) +
scale_y_continuous(labels = scales::comma) + theme(axis.line=element_blank())
anim1 = g1_star_anim + transition_states(as.Date(date), transition_length = 4,
state_length = 1) +
view_follow(fixed_x = TRUE) +
labs(title = 'Total_cases per year')
The result is:
which isn't expected.
What should I change? Or which code should I write? Can anyone help me because I have been searching for a very long time?
Thanks in advance!
I found that this code shows the top 10 countries based on their total_cases
library(gganimate)
library(hrbrthemes)
library(tidyverse)
data.europe.not.na.star = data.europe.not.na %>%
group_by(as.Date(date)) %>%
arrange(-total_cases) %>%
mutate(rank = row_number()) %>%
filter(rank<=10)
col = c("cadetblue1","aquamarine","chocolate1","gray13","blue3","darkgoldenrod2",
"darkolivegreen1","darkorchid1","lightcoral","deeppink","greenyellow","mediumvioletred",
"midnightblue","olivedrab1","mediumaquamarine","red","seagreen1")
p = data.europe.not.na.star %>%
ggplot(aes(x = -rank,y = total_cases, group = location)) +
geom_tile(aes(y = total_cases / 2, height = total_cases,fill = location),width = 0.9) +
geom_text(aes(label = location), hjust = "right", colour = "gold",fontface = "bold",
nudge_y = -100000) +
geom_text(aes(label = scales::comma(total_cases)), hjust = "left",nudge_y = 100000,
colour = "grey30") +
coord_flip(clip="off") +
scale_fill_manual(name = 'location', values = col) +
scale_x_discrete("") +
scale_y_continuous("",labels=scales::comma) +
hrbrthemes::theme_ipsum(plot_title_size = 32, subtitle_size = 24,caption_size = 20,
base_size = 20) +
theme(panel.grid.major.y=element_blank(),
panel.grid.minor.x=element_blank(),
plot.margin = margin(1,1,1,2,"cm"),
axis.text.y=element_blank()) +
# gganimate code to transition by year:
transition_time(as.Date(date)) +
ease_aes('cubic-in-out') +
labs(title='Bar Char Race of Total Cases in Europe(Top 10)',
subtitle='Total Cases in {round(frame_time,0)}')
animate(p, nframes = 750, fps = 25, end_pause = 50, width = 1200, height = 900)
The result is here

R ggplot vs barplot 2

I am trying to replicate the stacked bar chart built via ggplot() into a chart that uses barplot(). My code is below. My problem is that barplot() produces not a stacked chart. What do I do in a wrong way here? Thanks!
Data is accessible at the following link.
# link to the data: https://drive.google.com/file/d/16FQ7APc0r1IYS_geMdeBRoMiUMaF01t3/view?usp=sharing
df <- read.csv("US Sectoral Balances 3.csv")
# ggplot()
df %>%
# Plotting the data via ggplot2()
ggplot(aes(x = TimePeriod, y = DataValue_per_GDP, color = Sectors, fill = Sectors)) +
geom_col(aes(fill = Sectors), position = "stack", width = 1) +
scale_y_continuous(labels = percent,
sec.axis = sec_axis(~., name = "", labels = percent)) +
scale_x_discrete(name=NULL, breaks = brks) +
scale_fill_manual(values=c("#FF3399", "#3399FF", "#66CC99","#0022CC","#11AA00")) +
scale_color_manual(values=c("#FF3399", "#3399FF", "#66CC99","#0022CC","#11AA00")) +
guides(fill = guide_legend(title.theme =
element_text(size = 9, face = "bold",
colour = "black", angle = 0))) +
labs(x ="", y = "(% of GDP)",
subtitle = "Three-sector breakdown",
title = paste0("U.S. Sectoral Balances",
" through ", year(max(df$date))," ",quarters(max(df$date))),
caption = paste0("\nNote: data is from the BEA's Table 5.1. Saving and Investment by Sector.",
"\n\nSource: BEA.")) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 10),
plot.caption = element_text(hjust = 0, size = 8))
# barplot()
l <- min(
length(df[substr(df$Sectors,1,1)=="A",]$DataValue_per_GDP),
length(df[substr(df$Sectors,1,1)=="B",]$DataValue_per_GDP),
length(df[substr(df$Sectors,1,1)=="C",]$DataValue_per_GDP)
)
bar_data <- matrix(data = rbind(df[substr(df$Sectors,1,1)=="A",]$DataValue_per_GDP*100,
df[substr(df$Sectors,1,1)=="B",]$DataValue_per_GDP*100,
df[substr(df$Sectors,1,1)=="C",]$DataValue_per_GDP*100),
nrow = 3, ncol = l)
colnames(bar_data) <- df[substr(df$Sectors,1,1)=="A",]$TimePeriod[1:l]
rownames(bar_data) <- c("A. General government","B. Private","C. External")
barplot(bar_data, col=c("red","blue","green"), beside = FALSE)
barplot(bar_data, col=c("red","blue","green"), beside = TRUE)

How to combine different layers in r ggplot with 2 different datasets - one for map & other for geom_area?

I have created a plot with geom_area(), geom_line() in it. Now I would like to add a country map background in the plot and for same I am trying to use: map_data() & geom_ploygon() but it's giving error, probably because one's xaxis is on date scale & other's is longitude.
Error:
Error: Invalid input: date_trans works with objects of class Date only
Here is my code & plot without map:
library(tidyverse)
library(glue)
library(scales)
library(tidytext)
data:
file_url <- "https://raw.githubusercontent.com/johnsnow09/covid19-df_stack-code/main/ts_all_long3.csv"
ts_all_long <- read.csv(url(file_url))
Step 1:
confirm_col = "#32a4ba"
death_col = "#f08080"
Country_selected = c("India")
scaleFactor = max(ts_all_long %>%
filter(Country.Region %in% Country_selected) %>%
pull(Confirmed_daily)) /
max(ts_all_long %>%
filter(Country.Region %in% Country_selected) %>%
pull(Death_daily))
ts_all_long %>%
filter(Country.Region %in% c("India") ) %>%
ggplot(aes(x = date)) +
geom_area(aes(y = Confirmed_daily), fill = confirm_col, alpha = .7) +
geom_line(aes(y = Death_daily * scaleFactor), col = death_col,
size = 0.8, alpha = 0.8) +
scale_y_continuous(name = "Daily Cases", sec.axis = sec_axis(~./scaleFactor, name = "Daily Deaths"),
labels = scales::comma_format()) +
scale_x_date(date_breaks = "1 month", date_labels = "%b") +
geom_hline(yintercept = c(25000,95000), lty = 2, col = "grey50") +
geom_vline(xintercept = c(ymd("2020-07-08"),ymd("2020-09-10"),
ymd("2021-03-15"),ymd("2021-04-03")),
lty = 2, col = "grey50") +
annotate("text", x = ymd("2020-08-10"), y = 96000, col = death_col, size = 3,
label = "25k to 95k in \n~2months+") +
annotate("text", x = ymd("2021-03-18"), y = 96000, col = death_col, size = 3,
label = "25k to 95k in \n~2weeks+") +
geom_text(
label=glue("1st wave"), angle = 90, x=ymd("2020-07-30"), y= 75000,
size = 4, color = confirm_col
) +
geom_text(
label=glue("2nd wave"), angle = 90, x=ymd("2021-03-18"), y= 75000,
size = 4, color = confirm_col
) +
theme_excel_new() +
theme(
axis.title.y.left=element_text(color=confirm_col),
axis.text.y.left=element_text(color=confirm_col),
axis.title.y.right=element_text(color=death_col),
axis.text.y.right=element_text(color=death_col),
plot.title = element_markdown(face = "plain", family = "serif", size = 14),
panel.grid.major = element_blank()
) +
labs(title = glue("<i>{Country_selected}</i>: Daily Cases to jump to 1 lac in 1st & 2nd wave, as of: {max(ts_all_long$date)}"),
# subtitle = "Cases to jump from 25,000 to 95,000 in first & second wave",
caption = "Data source: covid19.analytics
Created by: ViSa")
Step 2: Code & image for map:
ggplot() +
geom_polygon(data = map_data("world", region = "India"),
aes(x = long, y = lat, group = group),
fill="lightgray", colour = "white")
Step 3: When I try to combine code for above 2 steps I get an error:
confirm_col = "#32a4ba"
death_col = "#f08080"
Country_selected = c("India")
scaleFactor = max(ts_all_long %>%
filter(Country.Region %in% Country_selected) %>%
pull(Confirmed_daily)) /
max(ts_all_long %>%
filter(Country.Region %in% Country_selected) %>%
pull(Death_daily))
ts_all_long %>%
filter(Country.Region %in% c("India") ) %>%
ggplot() +
# added country map here from step2
geom_polygon(data = map_data("world", region = "India"),
aes(x = long, y = lat, group = group),
fill="lightgray", colour = "white") +
# usual plot of step1
geom_area(aes(x = date, y = Confirmed_daily), fill = confirm_col, alpha = .7) +
geom_line(aes(x = date, y = Death_daily * scaleFactor), col = death_col,
size = 0.8, alpha = 0.8) +
scale_y_continuous(name = "Daily Cases", sec.axis = sec_axis(~./scaleFactor, name = "Daily Deaths"),
labels = scales::comma_format()) +
scale_x_date(date_breaks = "1 month", date_labels = "%b") +
geom_hline(yintercept = c(25000,95000), lty = 2, col = "grey50") +
geom_vline(xintercept = c(ymd("2020-07-08"),ymd("2020-09-10"),
ymd("2021-03-15"),ymd("2021-04-03")),
lty = 2, col = "grey50") +
annotate("text", x = ymd("2020-08-10"), y = 96000, col = death_col, size = 3,
label = "25k to 95k in \n~2months+") +
annotate("text", x = ymd("2021-03-18"), y = 96000, col = death_col, size = 3,
label = "25k to 95k in \n~2weeks+") +
geom_text(
label=glue("1st wave"), angle = 90, x=ymd("2020-07-30"), y= 75000,
size = 4, color = confirm_col
) +
geom_text(
label=glue("2nd wave"), angle = 90, x=ymd("2021-03-18"), y= 75000,
size = 4, color = confirm_col
) +
theme_excel_new() +
theme(
axis.title.y.left=element_text(color=confirm_col),
axis.text.y.left=element_text(color=confirm_col),
axis.title.y.right=element_text(color=death_col),
axis.text.y.right=element_text(color=death_col),
plot.title = element_markdown(face = "plain", family = "serif", size = 14),
panel.grid.major = element_blank()
) +
labs(title = glue("<i>{Country_selected}</i>: Daily Cases to jump to 1 lac in 1st & 2nd wave, as of: {max(ts_all_long$date)}"),
# subtitle = "Cases to jump from 25,000 to 95,000 in first & second wave",
caption = "Data source: covid19.analytics
Created by: ViSa")
I would suggest to add the map as a background image to your plot which could be done via e.g. the ggimage package like so:
library(ggimage)
map <- ggplot() +
geom_polygon(data = map_data("world", region = "India"),
aes(x = long, y = lat, group = group),
fill="lightgray", colour = "white") +
theme_void()
ggsave("map.png")
#> Saving 7 x 5 in image
ggbackground(p, "map.png")
p:
d <- ts_all_long %>%
filter(Country.Region %in% c("India")) %>%
mutate(date = as.Date(date))
p <- ggplot(d, aes(x = date)) +
geom_area(aes(y = Confirmed_daily), fill = confirm_col, alpha = .7) +
geom_line(aes(y = Death_daily * scaleFactor), col = death_col,
size = 0.8, alpha = 0.8) +
scale_y_continuous(name = "Daily Cases", sec.axis = sec_axis(~./scaleFactor, name = "Daily Deaths"),
labels = scales::comma_format()) +
scale_x_date(date_breaks = "1 month", date_labels = "%b") +
geom_hline(yintercept = c(25000,95000), lty = 2, col = "grey50") +
geom_vline(xintercept = c(ymd("2020-07-08"),ymd("2020-09-10"),
ymd("2021-03-15"),ymd("2021-04-03")),
lty = 2, col = "grey50") +
annotate("text", x = ymd("2020-08-10"), y = 96000, col = death_col, size = 3,
label = "25k to 95k in \n~2months+") +
annotate("text", x = ymd("2021-03-18"), y = 96000, col = death_col, size = 3,
label = "25k to 95k in \n~2weeks+") +
geom_text(
label=glue("1st wave"), angle = 90, x=ymd("2020-07-30"), y= 75000,
size = 4, color = confirm_col
) +
geom_text(
label=glue("2nd wave"), angle = 90, x=ymd("2021-03-18"), y= 75000,
size = 4, color = confirm_col
) +
theme_excel_new() +
theme(
axis.title.y.left=element_text(color=confirm_col),
axis.text.y.left=element_text(color=confirm_col),
axis.title.y.right=element_text(color=death_col),
axis.text.y.right=element_text(color=death_col),
plot.title = element_markdown(face = "plain", family = "serif", size = 14),
panel.grid.major = element_blank()
) +
labs(title = glue("<i>{Country_selected}</i>: Daily Cases to jump to 1 lac in 1st & 2nd wave, as of: {max(ts_all_long$date)}"),
# subtitle = "Cases to jump from 25,000 to 95,000 in first & second wave",
caption = "Data source: covid19.analytics
Created by: ViSa")

Formatting dates for {closest_state} output in r (bar chart race)

In this bar chart race, with a Y-m-d date format ("2010-11-30… etc"), the gif (below) runs through the dates fine...
...but when I convert those same dates to a %Y %b format ("2010 Nov… etc"), months are missing throughout the animation, as shown in this second gif, below
Furthermore, I’d ideally like the date to be displayed as %b %Y format (Nov 2010… etc). I've spent weeks trying to sort this out but to no avail. Any help would be appreciated.
Here's the code
df <- read.csv(file="Data/Carmakers market caps monthly.csv")
# Renames headings
df<-rename(df, c(General.Motors = "General Motors", Toyota.Motor = "Toyota Motor"))
meltdf <- melt(df,id="Date")
names(meltdf) <- c("Date", "Company", "Value")
meltdf$Date <- as.Date(meltdf$Date, "%d/%m/%Y")
meltdf$Value <- as.numeric(as.character(meltdf$Value))
meltdf = meltdf %>%
group_by(Date)%>%
mutate(rank = rank(-Value),
Value_rel = Value/Value[rank==1],
Value_lbl = paste0(" ",Value/1000000000)) %>%
group_by(Company)
meltdf$Value_lbl <- as.numeric(as.character(meltdf$Value_lbl))
meltdf$Value <- as.numeric(as.character(meltdf$Value/1000000000))
meltdf$Value_lbl <- sprintf(meltdf$Value_lbl, fmt = '%#.1f')
strftime(meltdf$Date, format = "%Y %b") ->
meltdf$Date
#plotting graph
anim <-ggplot(meltdf,aes(rank,
group=Company,
fill=as.factor(Company),
color=as.factor(Company))) +
geom_tile(aes(y = Value/2,
height = Value,
width = 0.9), alpha = 0.8, color = NA) +
geom_text(aes(y = 0, label = paste(Company, " ")), vjust = 0.2, hjust = 1)+
geom_text(aes(y=Value,label = Value_lbl, hjust=0)) +
coord_flip(clip = "off", expand = TRUE) +
scale_y_continuous(labels = scales::comma) +
scale_x_reverse() +
guides(color = FALSE, fill = FALSE) +
theme_minimal() +
theme(
plot.title=element_text(size=23, hjust=0.5, face="bold", colour="grey", vjust=-1),
plot.subtitle=element_text(size=18, hjust=0.5, face="italic", color="grey",
margin = margin(t = 15, r = 0, b = 0, l = 0)),
plot.caption =element_text(size=8, hjust=0.5, face="italic", color="grey"),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
plot.margin = margin(1,1,1,2, "cm")) +
transition_states(states = Date, transition_length = 12, state_length = 1, wrap = FALSE) +
ease_aes('cubic-in-out') +
#view_follow(fixed_x = TRUE) +
labs(title = 'Largest car companies in the world {closest_state}',
subtitle = "Market capitalization",
caption = "Data source: Refinitiv",
x="", y="$ billion")
#Create gif
animate(anim, nframes = 400,fps = 8.1, width = 550, height = 350,
renderer = gifski_renderer("car_companies_2.gif"), end_pause = 15, start_pause = 25)
And here's a sample of the data that created this chart.
Date Tesla Toyota Motor General Motors Daimler
1 30/11/2010 3295253866 132694537161 51300000000 52944591823
2 31/12/2010 2483798768 136160803584 55290000000 53967411400
3 31/1/2011 2247823894 142843809831 54735000000 56926590672
4 28/2/2011 2277562013 161097730179 52331714768 54401346072
5 31/3/2011 2645556545 138915112426 48429857121 53122249064
6 30/4/2011 2639462630 136650698149 50084659687 55615851126
118 31/8/2020 464338876502 215889700906 42403389651 45497228943
119 30/9/2020 399755220356 214500698099 42346145790 49249966873
120 31/10/2020 367823400433 212138493292 49423051428 47500782647
121 30/11/2020 538028456051 219024834051 62748525184 60564258296
122 31/12/2020 668905110256 250711385128 59599648464 61825906062
123 31/1/2021 668905110256 250711385128 59599648464 61825906062
I have performed all the manipulation in the same pipe using tidyverse functions. You can try :
library(tidyverse)
library(gganimate)
library(lubridate)
df %>%
pivot_longer(cols = -Date, names_to = 'Company', values_to = 'Value') %>%
mutate(Date = dmy(Date)) %>%
group_by(Date)%>%
mutate(rank = rank(-Value),
Value_rel = Value/Value[rank==1],
Value_lbl = paste0(" ",round(Value/1000000000, 2)),
date_format = format(Date, '%b-%Y')) %>%
arrange(Date) %>%
mutate(date_format = factor(date_format, unique(date_format))) %>%
group_by(Company) %>%
ggplot(aes(rank,
group=Company,
fill=as.factor(Company),
color=as.factor(Company))) +
geom_tile(aes(y = Value/2,
height = Value,
width = 0.9), alpha = 0.8, color = NA) +
geom_text(aes(y = 0, label = paste(Company, " ")), vjust = 0.2, hjust = 1)+
geom_text(aes(y=Value,label = Value_lbl, hjust=0)) +
coord_flip(clip = "off", expand = TRUE) +
scale_y_continuous(labels = scales::comma) +
scale_x_reverse() +
guides(color = FALSE, fill = FALSE) +
theme_minimal() +
theme(
plot.title=element_text(size=23, hjust=0.5, face="bold", colour="grey", vjust=-1),
plot.subtitle=element_text(size=18, hjust=0.5, face="italic", color="grey",
margin = margin(t = 15, r = 0, b = 0, l = 0)),
plot.caption =element_text(size=8, hjust=0.5, face="italic", color="grey"),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
plot.margin = margin(1,1,1,2, "cm")) +
transition_states(states = date_format, transition_length = 12, state_length = 1, wrap = FALSE) +
ease_aes('cubic-in-out') +
#view_follow(fixed_x = TRUE) +
labs(title = 'Largest car companies in the world {closest_state}',
subtitle = "Market capitalization",
caption = "Data source: Refinitiv",
x="", y="$ billion") -> anim
animate(anim, nframes = 400,fps = 8.1, width = 550, height = 350,
renderer = gifski_renderer("car_companies_2.gif"), end_pause = 15, start_pause = 25)
From the limited data that I could copy from what you have shared the animation looks like this. The important part is that the dates are in order.
data
df <- structure(list(Date = c("30/11/2010", "31/12/2010", "31/1/2011",
"28/2/2011", "31/3/2011", "30/4/2011", "31/8/2020", "30/9/2020",
"31/10/2020", "30/11/2020", "31/12/2020", "31/1/2021"), Tesla = c(3295253866,
2483798768, 2247823894, 2277562013, 2645556545, 2639462630, 464338876502,
399755220356, 367823400433, 538028456051, 668905110256, 668905110256
), Toyota_Motor = c(132694537161, 136160803584, 142843809831,
161097730179, 138915112426, 136650698149, 215889700906, 214500698099,
212138493292, 219024834051, 250711385128, 250711385128), General_Motors = c(5.13e+10,
5.529e+10, 5.4735e+10, 52331714768, 48429857121, 50084659687,
42403389651, 42346145790, 49423051428, 62748525184, 59599648464,
59599648464), Daimler = c(52944591823, 53967411400, 56926590672,
54401346072, 53122249064, 55615851126, 45497228943, 49249966873,
47500782647, 60564258296, 61825906062, 61825906062)),
class = "data.frame", row.names = c(NA, -12L))

Complex Chart in R/ggplot with Proper Legend Display

This is my first question to StackExchange, and I've searched for answers that have been helpful, but haven't really gotten me to where I'd like to be.
This is a stacked bar chart, combined with a point chart, combined with a line.
Here's my code:
theme_set(theme_light())
library(lubridate)
FM <- as.Date('2018-02-01')
x.range <- c(FM - months(1) - days(1) - days(day(FM) - 1), FM - days(day(FM) - 1) + months(1))
x.ticks <- seq(x.range[1] + days(1), x.range[2], by = 2)
#populate example data
preds <- data.frame(FM = FM, DATE = seq(x.range[1] + days(1), x.range[2] - days(1), by = 1))
preds <- data.frame(preds, S_O = round(seq(1, 1000000, by = 1000000/nrow(preds))))
preds <- data.frame(preds, S = round(ifelse(month(preds$FM) == month(preds$DATE), day(preds$DATE) / 30.4, 0) * preds$S_O))
preds <- data.frame(preds, O = preds$S_O - preds$S)
preds <- data.frame(preds, pred_sales = round(1000000 + rnorm(nrow(preds), 0, 10000)))
preds$ma <- with(preds, stats::filter(pred_sales, rep(1/5, 5), sides = 1))
y.max <- ceiling(max(preds$pred_sales) / 5000) * 5000 + 15000
line.cols <- c(O = 'palegreen4', S = 'steelblue4',
P = 'maroon', MA = 'blue')
fill.cols <- c(O = 'palegreen3', S = 'steelblue3',
P = 'red')
p <- ggplot(data = preds,
mapping = aes(DATE, pred_sales))
p <- p +
geom_bar(data = reshape2::melt(preds[,c('DATE', 'S', 'O')], id.var = 'DATE'),
mapping = aes(DATE, value, group = 1, fill = variable, color = variable),
width = 1,
stat = 'identity',
alpha = 0.5) +
geom_point(mapping = aes(DATE, pred_sales, group = 2, fill = 'P', color = 'P'),
shape = 22, #square
alpha = 0.5,
size = 2.5) +
geom_line(data = preds[!is.na(preds$ma),],
mapping = aes(DATE, ma, group = 3, color = 'MA'),
alpha = 0.8,
size = 1) +
geom_text(mapping = aes(DATE, pred_sales, label = formatC(pred_sales / 1000, format = 'd', big.mark = ',')),
angle = 90,
size = 2.75,
hjust = 1.25,
vjust = 0.4) +
labs(title = sprintf('%s Sales Predictions - %s', 'Overall', format(FM, '%b %Y')),
x = 'Date',
y = 'Volume in MMlbs') +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1, size = 8),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
legend.title = element_blank(),
legend.position = 'bottom',
legend.text = element_text(size = 8),
legend.margin = margin(t = 0.25, unit = 'cm')) +
scale_x_date(breaks = x.ticks,
date_labels = '%b %e',
limits = x.range) +
scale_y_continuous(limits = c(0, y.max),
labels = function(x) { formatC(x / 1000, format='d', big.mark=',') }) +
scale_color_manual(values = line.cols,
breaks = c('MA'),
labels = c(MA = 'Mvg Avg (5)')) +
scale_fill_manual(values = fill.cols,
breaks = c('P', 'O', 'S'),
labels = c(O = 'Open Orders', S = 'Sales', P = 'Predictions'))
p
The chart it generates is this:
As you can see, the legend does a couple of funky things. It's close, but not quite there. I only want boxes with exterior borders for Predictions, Open Orders, and Sales, and only a blue line for the Mvg Avg (5).
Any advice would be appreciated.
Thanks!
Rather late, but if you are still interested to understand this problem, the following should work. Explanations are included as comments within the code:
library(dplyr)
preds %>%
# scale the values for ALL numeric columns in the dataset, before
# passing the dataset to ggplot()
mutate_if(is.numeric, ~./1000) %>%
# since x / y mappings are stated in the top level ggplot(), there's
# no need to repeat them in the subsequent layers UNLESS you want to
# override them
ggplot(mapping = aes(x = DATE, y = pred_sales)) +
# 1. use data = . to inherit the top level data frame, & modify it on
# the fly for this layer; this is neater as you are essentially
# using a single data source for the ggplot object.
# 2. geom_col() is a more succinct way to say geom_bar(stat = "identity")
# (I'm using tidyr rather than reshape package, since ggplot2 is a
# part of the tidyverse packages, & the two play together nicely)
geom_col(data = . %>%
select(S, O, DATE) %>%
tidyr::gather(variable, value, -DATE),
aes(y = value, fill = variable, color = variable),
width = 1, alpha = 0.5) +
# don't show legend for this layer (o/w the fill / color legend would
# include a square shape in the centre of each legend key)
geom_point(aes(fill = 'P', color = 'P'),
shape = 22, alpha = 0.5, size = 2.5, show.legend = FALSE) +
# use data = . %>% ... as above.
# since the fill / color aesthetic mappings from the geom_col layer would
# result in a border around all fill / color legends, avoid it all together
# here by hard coding the line color to "blue", & map its linetype instead
# to create a separate linetype-based legend later.
geom_line(data = . %>% na.omit(),
aes(y = ma, linetype = 'MA'),
color = "blue", alpha = 0.8, size = 1) +
# scales::comma is a more succinct alternative to formatC for this use case
geom_text(aes(label = scales::comma(pred_sales)),
angle = 90, size = 2.75, hjust = 1.25, vjust = 0.4) +
labs(title = sprintf('%s Sales Predictions - %s', 'Overall', format(FM, '%b %Y')),
x = 'Date',
y = 'Volume in MMlbs') +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1, size = 8),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
legend.title = element_blank(),
legend.position = 'bottom',
legend.text = element_text(size = 8),
legend.margin = margin(t = 0.25, unit = 'cm')) +
scale_x_date(breaks = x.ticks,
date_labels = '%b %e',
limits = x.range) +
# as above, scales::comma is more succinct
scale_y_continuous(limits = c(0, y.max / 1000),
labels = scales::comma) +
# specify the same breaks & labels for the manual fill / color scales, so that
# a single legend is created for both
scale_color_manual(values = line.cols,
breaks = c('P', 'O', 'S'),
labels = c(O = 'Open Orders', S = 'Sales', P = 'Predictions')) +
scale_fill_manual(values = fill.cols,
breaks = c('P', 'O', 'S'),
labels = c(O = 'Open Orders', S = 'Sales', P = 'Predictions')) +
# create a separate line-only legend using the linetype mapping, with
# value = 1 (i.e. unbroken line) & specified alpha / color to match the
# geom_line layer
scale_linetype_manual(values = 1,
label = 'Mvg Avg (5)',
guide = guide_legend(override.aes = list(alpha = 1,
color = "blue")))

Resources