Display the basic summary statistics next to the ggplot2 boxplot - r

Is it possible to display the summary statistics next to the boxplot like:
throu<-structure(list(case_id = c("WC4132791", "WC4130879", "WC4128064",
"WC4121569", "WC4121568", "WC4130112", "WC4131829", "WC4130841",
"WC4130306", "WC4130417", "WC4124741", "WC4130114", "WC4131990",
"WC4121986", "WC4128062", "WC4122478", "WC4130337", "WC4125822",
"WC4127231", "WC4124761", "WC4129398", "WC4131040", "WC4123072",
"WC4131822", "WC4120712", "WC4121978", "WC4130110", "WC4123522",
"WC4130307", "WC4122643", "WC4130383", "WC4122248", "WC4122299",
"WC4122727", "WC4126769", "WC4131186", "WC4125978", "WC4129089",
"WC4121339", "WC4126469", "WC4131800", "WC4125572", "WC4132378",
"WC4123345", "WC4130314", "WC4127722", "WC4129978", "WC4131838",
"WC4130812", "WC4126953"), throughput_time = c(134.283333333333,
93.0756944444445, 83.5340277777778, 67.7833333333333, 65.3069444444444,
63.5402777777778, 59.6861111111111, 56.9791666666667, 55.9048611111111,
54.3826388888889, 52.6958333333333, 52.5125, 51.1680555555556,
50.9520833333333, 50.5402777777778, 49.9291666666667, 49.8201388888889,
49.7375, 49.0916666666667, 46.3069444444444, 45.30625, 45.2451388888889,
44.9722222222222, 44.8215277777778, 44.8048611111111, 43.0701388888889,
42.6840277777778, 42.6576388888889, 42.55, 42.2868055555556,
42.2805555555556, 41.9027777777778, 41.7409722222222, 41.6506944444444,
41.3527777777778, 40.7305555555556, 40.2861111111111, 40.2159722222222,
40.0854166666667, 40.0486111111111, 39.7930555555556, 39.6576388888889,
39.4638888888889, 39.4527777777778, 39.3569444444444, 39.3513888888889,
39.1854166666667, 39.0791666666667, 39.0743055555556, 39.0055555555556
)), row.names = c(NA, 50L), class = "data.frame")
I also have already extracted those in a separate dataframe:
quarts<- structure(list(min = 0, q1 = 7.1515625, median = 11.4881944444444,
mean = 12.3112423835125, q3 = 14.8456597222222, max = 93.0756944444445,
st_dev = 6.72704434885421, iqr = 7.69409722222222), class = "data.frame", row.names = c(NA,
-1L))
# A really basic boxplot.
ggplot(throu, aes( y=throughput_time)) +
geom_boxplot(fill="slateblue", alpha=0.2,width=0.05) +
xlim(-0.1, 0.1) +
xlab("")+ylab("Case duration in days")+ theme_classic()+
theme(axis.ticks.x = element_blank(),
axis.text.x = element_blank())

You could do this using annotate
ggplot(throu, aes( y=throughput_time)) +
geom_boxplot(fill="slateblue", alpha=0.2, width=0.05) +
annotate(geom = 'text', x = 0.05, y = 60, hjust = 0, color = 'gray50',
label = paste(names(quarts), collapse = '\n')) +
annotate(geom = 'text', x = 0.07, y = 60, hjust = 0,
label = paste(round(unlist(quarts), 3), collapse = '\n')) +
xlim(-0.1, 0.1) +
xlab("")+
ylab("Case duration in days")+
theme_classic()+
theme(axis.ticks.x = element_blank(),
axis.text.x = element_blank())

Related

R ggplot mixed legend of two legends

Here is the graph that I have as a basis:
color_two_groups_type_2 <- c("dark red", "black")
pd <- position_dodge(0.4)
hedonic_price_indices %>% ggplot(aes(x=year, y=index, group=factor(sample_sizes), color=factor(sample_sizes))) +
geom_line(linetype = "dashed", position = pd) +
geom_point(size = 0.5, position = pd) +
geom_errorbar(aes(ymin = index_lower_ci, ymax = index_upper_ci), width = 0.15, size = 0.25, colour="black", position = pd) +scale_y_continuous(breaks = c(0.5,1.0,1.5,2.0,2.5), limits = c(0.125, 2.85)) +
theme(legend.position="bottom", legend.margin=margin(0,0,0,0), legend.box.margin=margin(-20,0,0,0)) +
scale_color_manual(labels = c("Alternative", "Normal"), values = color_two_groups_type_2, guide = guide_legend(reverse = TRUE)) +
labs(title = "", x = "", y = "Index value (2000 = 1)", color = "") +
scale_x_discrete(breaks = c(1985,1990,1995,2000,2005,2010,2015,2020))
Now I would like to have two different line types.
desired_linetype <- c("dotted", "solid")
color_two_groups_type_2 <- c("dark red", "black")
pd <- position_dodge(0.4)
hedonic_price_indices %>% ggplot(aes(x=year, y=index, group=factor(sample_sizes), color=factor(sample_sizes))) +
#geom_line(linetype = "dashed", position = pd) +
geom_line(aes(linetype = sample_sizes), position = pd) +
scale_linetype_manual(values = desired_linetype) +
geom_point(size = 0.5, position = pd) +
geom_errorbar(aes(ymin = index_lower_ci, ymax = index_upper_ci), width = 0.15, size = 0.25, colour="black", position = pd) +
scale_y_continuous(breaks = c(0.5,1.0,1.5,2.0,2.5), limits = c(0.125, 2.85)) + theme(legend.position="bottom", legend.margin=margin(0,0,0,0), legend.box.margin=margin(-20,0,0,0)) +
scale_color_manual(labels = c("Alternative", "Normal"), values = color_two_groups_type_2, guide = guide_legend(reverse = TRUE)) +
labs(title = "", x = "", y = "Index value (2000 = 1)", color = "") +
scale_x_discrete(breaks = c(1985,1990,1995,2000,2005,2010,2015,2020))
Unfortunately, I have got two legends by now. By adding + guides(col = "none") (e.g. at the bottom), the left part of the legend gets removed:
And alternatively, by changing scale_linetype_manual(values = desired_linetype) to scale_linetype_manual(values = desired_linetype, guide="none"), the right part of the legend is gets removed:
However, I would like to have mixed version of these two legends. I.e. a legend that shows both the line type and the color. How could I obtain this result? (and I would prefer not to have a legend title ("sample sizes"), as in my initial graph).
I would be thankful for any suggestion!
Here is some code to reproduce the graphs:
hedonic_price_indices <- structure(list(estimate = c(-0.575412358998748, -0.52549627191954, -0.48635414326085, -0.732792998304216, -0.562889873546058, -0.913572700671539, -1.13936126077503, -1.08231133221031, -1.3515171997382, -0.94983790292841 ), lower_ci = c(-0.626714841953077, -0.584959417015897, -0.542829387483941, -0.790953736050918, -0.620938372048851, -1.02481824744291, -1.26017870739697, -1.17246349249945, -1.41331442736626, -1.01254016013769), upper_ci = c(-0.524109876044418, -0.466033126823183, -0.429878899037759, -0.674632260557514, -0.504841375043265, -0.802327153900171, -1.01854381415308, -0.992159171921177, -1.28971997211013, -0.887135645719133), year = c("1984", "1985", "1986", "1987", "1988", "1984", "1985", "1986", "1987", "1988"), estimate_exp = c(-0.437527119774759, -0.408738135115574, -0.38513598119696, -0.519435103003286, -0.430439275221177, -0.598911308640654, -0.679976631974547, -0.661188486027214, -0.741152760388594, -0.613196281876959), lower_ci_exp = c(-0.465655673667104, -0.442871528710716, -0.41889823785973, -0.546587846514592, -0.462560117662101, -0.641138316492387, -0.71639666004378, -0.69039670436256, -0.756664572496545, -0.636705020910341 ), upper_ci_exp = c(-0.407917843611993, -0.372513502931199, -0.349412123229172, -0.490656308062782, -0.3963986859341, -0.551715477774212, -0.63887958407625, -0.629224741409214, -0.724652122619944, -0.588166297456909), index = c(0.562472880225241, 0.591261864884426, 0.61486401880304, 0.480564896996714, 0.569560724778823, 0.401088691359346, 0.320023368025453, 0.338811513972786, 0.258847239611406, 0.386803718123041), index_lower_ci = c(0.534344326332896, 0.557128471289284, 0.58110176214027, 0.453412153485408, 0.537439882337899, 0.358861683507613, 0.28360333995622, 0.30960329563744, 0.243335427503455, 0.363294979089659), index_upper_ci = c(0.592082156388007, 0.627486497068801, 0.650587876770828, 0.509343691937218, 0.6036013140659, 0.448284522225788, 0.36112041592375, 0.370775258590786, 0.275347877380056, 0.411833702543091), sample_sizes = c("Normal", "Normal", "Normal", "Normal", "Normal", "Alternative", "Alternative", "Alternative", "Alternative", "Alternative")), row.names = c("normal_sale_1984", "normal_sale_1985", "normal_sale_1986", "normal_sale_1987", "normal_sale_1988", "foreclosure_1984", "foreclosure_1985", "foreclosure_1986", "foreclosure_1987", "foreclosure_1988"), class = "data.frame")
To merge your legends use the same labels and guide in both scale_color and scale_linetype and the same name in labs:
library(ggplot2)
library(dplyr)
desired_linetype <- c("dotted", "solid")
color_two_groups_type_2 <- c("dark red", "black")
pd <- position_dodge(0.4)
hedonic_price_indices %>%
ggplot(aes(x = year, y = index, group = factor(sample_sizes), color = factor(sample_sizes))) +
geom_line(aes(linetype = sample_sizes), position = pd) +
geom_point(size = 0.5, position = pd) +
geom_errorbar(aes(ymin = index_lower_ci, ymax = index_upper_ci),
width = 0.15, size = 0.25, colour = "black", position = pd
) +
scale_x_discrete(breaks = c(1985, 1990, 1995, 2000, 2005, 2010, 2015, 2020)) +
scale_y_continuous(breaks = c(0.5, 1.0, 1.5, 2.0, 2.5), limits = c(0.125, 2.85)) +
scale_color_manual(
labels = c("Alternative", "Normal"),
values = color_two_groups_type_2,
guide = guide_legend(reverse = TRUE)
) +
scale_linetype_manual(
labels = c("Alternative", "Normal"),
values = desired_linetype,
guide = guide_legend(reverse = TRUE)
) +
labs(
title = "", x = "", y = "Index value (2000 = 1)",
color = "", linetype = ""
) +
theme(
legend.position = "bottom",
legend.margin = margin(0, 0, 0, 0),
legend.box.margin = margin(-20, 0, 0, 0)
)

How to apply slope plot R code to another data

I have dataframe which represents sales by model within 2 different years. 'change' column stands for absolute change by models from 2020 to 2021 while 'chng.percent' measures this change in percentages.
However, I am struggling to apply the given Code of slope plot to my data.
df <- data.frame (model = c("A", "A", "B","B"),
year = c(2020,2021,2020,2021),
sale =c(105,190,110,180),
chang = c(85,NA,70,NA),
chng.percent = c(80.9,NA, 63.6,NA))
Expected outcome (Like this)
Here's a way to do it all within ggplot using your existing data:
ggplot(df, aes(year, sale, color = model)) +
geom_line(arrow = arrow(type = "closed", angle = 20),
key_glyph = draw_key_point) +
geom_vline(aes(xintercept = year)) +
geom_text(aes(label = sale, hjust = ifelse(year == 2020, 1.3, -0.3)),
color = "black",
size = 6) +
geom_text(aes(x = min(df$year) + 0.25, y = 105,
label = paste0("+", chang[1], "; ", chng.percent[1], "%"),
color = "A"), size = 5) +
geom_text(aes(x = max(df$year) - 0.25, y = 150,
label = paste0("+", chang[3], "; ", chng.percent[3], "%"),
color = "B"), size = 5) +
theme_void(base_size = 16) +
coord_cartesian(clip = "off") +
scale_x_continuous(breaks = c(2020, 2021)) +
guides(color = guide_legend(override.aes = list(size = 5))) +
scale_color_brewer(palette = "Set1") +
theme(plot.margin = margin(30, 30, 30, 30),
aspect.ratio = 1.5,
axis.text.x = element_text(size = 20))
you can try something like this :
df <- data.frame(model = c("A", "B"),
sale_2020 =c(105,110),
sale_2021 =c(190,180),
chang = c(85,70),
chng.percent = c(80.9, 63.6))
df %>%
ggplot() +
geom_segment(aes(x = 1, xend = 2,
y = sale_2020,
yend = sale_2021,
group = model,
col = model),
size = 1.2) +
# set the colors
scale_color_manual(values = c("#468189", "#9DBEBB"), guide = "none") +
# remove all axis stuff
theme_classic() +
theme(axis.line = element_blank(),
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank()) +
geom_text(aes(x = x, y = y, label = label),
data = data.frame(x = 1:2,
y = 10 + max(df$sale_2021),
label = c("2020", "2021")),
col = "grey30",
size = 6) +
# add vertical lines that act as axis for 2020
geom_segment(x = 1, xend = 1,
y = min(df$sale_2020) -10,
yend = max(df$sale_2020) + 81,
col = "grey70", size = 1.5) +
# add vertical lines that act as axis for 2021
geom_segment(x = 2, xend = 2,
y = min(df$sale_2021) - 80,
yend = max(df$sale_2021) + 1,
col = "grey70", size = 1.5) +
# add the success rate next to each point on 2021 axis
geom_text(aes(x = 2 + 0.08,
y = sale_2021,
label = paste0(round(sale_2021, 1))),
col = "grey30") +
# add the success rate next to each point on 2021 axis
geom_text(aes(x = 1 - 0.08,
y = sale_2020,
label = paste0(round(sale_2020, 1))),
col = "grey30") +
# add the success rate next to each point on 2020 axis
geom_text(aes(x = 2 - 0.5,
y = c(156, 135),
label = paste0(round(chng.percent, 1), "%")),
col = "grey30")

Adding a Legend in ggplot with multiple datasets

I am trying to combine multiple datasets into one figure in r. My code doesn't seem to be generating a legend for this data.
My two questions are:
Can I add some code expression into my lines to generate a legend element for each set of data? I thought this would be a command such as 'fill' or 'shape' - but I am wrong
Is there a reason why I am not g generating a legend full stop? I was expecting at least a legend with one set of data - but it appears with none - is one deconflicting with another?
If the above cannot be done, then is there a straightforward way to plot this figure using a 3 column data table using the groupings of each data?
The code is below. You wont be able to reproduce it as I would need to send you the CSV files ( the datasets).
I have included the output figure from the code copied below. Idealy I would like a legend that lists a label for each different line/point +/- a text label at each line.
Thank you for any help!
library(tidyverse)
#These are the datasets I am turning into three dataframes #
bowen.data <- bowen
df <- data.frame(bowen)
lit.data <- lit_data
df <- data.frame(lit_data)
shock.data <- shock_tube_tests
df <- data.frame(shock_tube_tests)
high.data <- high_shock_data
df <- data.frame(high_shock_data)
low.data <- low_shock_data
df <- data.frame(low_shock_data)
#setting limit for my axis#
options(scipen = 1000000)
library(ggplot2)
#Now I wish to plot alll this data on one figure. This data describes duration and peak overpressure of shock waves. Each line of geom_line is trying to plot each datasset to produce each line, or points. I then set the X & Y limiitss, the labelss and the colours. #
ggplot(bowen.data, aes(x=Duration)) +
geom_line(aes(y = survival99), color = "cornflowerblue", linetype="longdash") + geom_line(aes(y = survival90), color="dodgerblue1", linetype="dashed") +
geom_line(aes(y = survival50), color="steelblue", linetype="solid") +
geom_line(aes(y = survival10), color="dodgerblue2", linetype="dotdash") +
geom_line(aes(y = survival1), color="dodgerblue3", linetype="twodash") + geom_line(aes(y = lung), color="darkslategrey", linetype="solid") +
ylim(100,1100000) + xlim(0.2,20) + ylab("Peak Overpressure") + xlab("Duration") +
geom_point(data=high.data, aes(x=duration, y=high), color='seagreen4') +
geom_point(data=low.data, aes(x=duration, y=low), color='indianred') +
geom_point(data=low.data, aes(x=duration, y=low), color='indianred', shape = 13) +
geom_point(data=shock.data, aes(x=duration, y=tube), color='violetred4', shape = 17) +
geom_point(data=lit.data, aes(x=dura, y=lit), color='orange')
Using dput:
structure(list(Duration = c(0.2, 0.3, 0.4, 0.5, 0.6), survival99 = c(3509982.865,
2422907.195, 1883026.274, 1555445.788, 1348277.839), survival90 = c(4138911.806,
2846984, 2206434.933, 1822870.548, 1566705.278), survival50 = c(5104973.144,
3490782.825, 2693285.691, 2217270.161, 1900462.526), survival10 = c(6313217.275,
4294375.461, 3299414.021, 2705203.586, 2313630.72), survival1 = c(7513231.158,
5090961.551, 3899360.722, 3190981.429, 2711178.007), lung = c(1020994.629,
698156.565, 538657.1381, 443454.0321, 380092.5051), X8 = c(NA,
NA, NA, NA, NA)), row.names = c(NA, -5L), class = c("tbl_df",
"tbl", "data.frame"))
[
shock data
structure(list(duration = c(1.00244911, 0.947052916, 1.675566344,
1.6586253, 1.837305476), tube = c(24973.80469, 28125.45703, 169033.3438,
165488.6719, 285638.9375)), row.names = c(NA, -5L), class = c("tbl_df",
"tbl", "data.frame"))
lit data
structure(list(dura = c(2, 6, 1.3, 6.9, 2), lit = c(1000000,
1000000, 760000, 760000, 450000)), row.names = c(NA, -5L), class = c("tbl_df",
"tbl", "data.frame"))
[ `enter image description here][1]
I don't see your desired output. The idea is to assign a colour to each line by mapping the colour aesthetic to a constant string. The simplest option is to select the string that will appear in the legend. It is critical that this is done within the aes call; you are creating a mapping to this variable.
These strings can now be mapped to the appropriate colours by scale color manual.
library(ggplot2)
library(tidyverse)
library(ggthemes)
ggplot(bowen.data, aes(x=Duration)) +
geom_line(aes(y = survival99, color="survival99"), linetype="longdash", size=2) +
geom_line(aes(y = survival90, color="survival90"), linetype="dashed", size=2) +
geom_line(aes(y = survival50, color="survival50"), linetype="solid", size=2) +
geom_line(aes(y = survival10, color="survival10"), linetype="dotdash", size=2) +
geom_line(aes(y = survival1, color="survival1"), linetype="twodash", size=2) +
geom_line(aes(y = lung, color="Lung"), linetype="solid", size=2) +
ylim(100,1100000) + xlim(0.2,20) + ylab("Peak Overpressure") + xlab("Duration") +
#geom_point(data=high.data, aes(x=duration, y=high), color='seagreen4') +
#geom_point(data=low.data, aes(x=duration, y=low), color='indianred') +
#geom_point(data=low.data, aes(x=duration, y=low), color='indianred', shape = 13) +
geom_point(data=shock.data, aes(x=duration, y=tube, color='shock.data'), shape = 17, size=3) +
geom_point(data=lit.data, aes(x=dura, y=lit, color='lit.data'), size=3)+
theme_pander()+
theme(axis.text.x = element_text( hjust = 1, face="bold", size=12, color="black"),
axis.title.x = element_text(face="bold", size=16, color="black"),
axis.text.y = element_text(face="bold", size=12, color="black"),
axis.title.y = element_text(face="bold", size=16, color="black"),
legend.title=element_blank(),
legend.text = element_text(family="Times", color = "black", size = 16,face="bold"))+
scale_color_manual(values = c("survival1"="dodgerblue3","survival10"="dodgerblue2", "survival50"="steelblue", "survival90"="dodgerblue1", "survival99"="cornflowerblue", "lit.data"="orange","Lung"="darkslategrey","shock.data"="violetred4"))
Plot:
Also you need to adjust the range of the ylim and xlim. Below ylim(0,6600000) + xlim(0.2, 8)
If you want to add labels to lines (bowen.data), I recommend using the using the directlabels library.
Sample code:
library(dplyr)
library(ggplot2)
library(ggthemes)
library(directlabels)
bowen.data%>%
pivot_longer(cols = -1) %>%
ggplot(aes(x=Duration,y=value))+
geom_line(aes(linetype=name, color=name), size=2 )+
scale_color_manual(values = c("lung"="darkslategrey", "survival1"="dodgerblue3","survival10"="dodgerblue2", "survival50"="steelblue", "survival90"="dodgerblue1", "survival99"="cornflowerblue"))+
scale_linetype_manual(values=c("lung"="solid","survival1" = "twodash", "survival10" = "dotdash", "survival50" = "solid" , "survival90" = "dashed", "survival99" = "longdash"))+
geom_dl(aes(label = name), method = list(dl.trans(x = x + 0.2),"last.points", cex = 1.2, fontface='bold'))+
ylim(0,6600000) + xlim(0.2, 8) +
labs(x="Duration",y="Peak Overpressure", fill="Factors") +
theme_pander()+
theme(axis.text.x = element_text( hjust = 1, face="bold", size=12, color="black"),
axis.title.x = element_text(face="bold", size=16, color="black"),
axis.text.y = element_text(face="bold", size=12, color="black"),
axis.title.y = element_text(face="bold", size=16, color="black"),
legend.title=element_blank(),
legend.text = element_text(family="Times", color = "black", size = 16,face="bold"))
Plot:
Sample data:
bowen.data <-
structure(
list(
Duration = c(0.2, 0.3, 0.4, 0.5, 0.6),
survival99 = c(
3509982.865,
2422907.195,
1883026.274,
1555445.788,
1348277.839
),
survival90 = c(4138911.806,
2846984, 2206434.933, 1822870.548, 1566705.278),
survival50 = c(
5104973.144,
3490782.825,
2693285.691,
2217270.161,
1900462.526
),
survival10 = c(6313217.275,
4294375.461, 3299414.021, 2705203.586, 2313630.72),
survival1 = c(
7513231.158,
5090961.551,
3899360.722,
3190981.429,
2711178.007
),
lung = c(1020994.629,
698156.565, 538657.1381, 443454.0321, 380092.5051),
X8 = c(NA,
NA, NA, NA, NA)
),
row.names = c(NA,-5L),
class = c("tbl_df",
"tbl", "data.frame")
)
shock.data<-structure(
list(
duration = c(1.00244911, 0.947052916, 1.675566344,
1.6586253, 1.837305476),
tube = c(
24973.80469,
28125.45703,
169033.3438,
165488.6719,
285638.9375
)
),
row.names = c(NA,-5L),
class = c("tbl_df",
"tbl", "data.frame")
)
lit.data<-structure(
list(
dura = c(2, 6, 1.3, 6.9, 2),
lit = c(1000000,
1000000, 760000, 760000, 450000)
),
row.names = c(NA,-5L),
class = c("tbl_df",
"tbl", "data.frame")
)
#These are the datasets I am turning into three dataframes #
bowen.data <- bowen
df <- data.frame(bowen)
lit.data <- lit_data
df <- data.frame(lit_data)
shock.data <- shock_tube_tests
df <- data.frame(shock_tube_tests)
#high.data <- high_shock_data
#df <- data.frame(high_shock_data)
#low.data <- low_shock_data
#df <- data.frame(low_shock_data)
#setting limit for my axis#
#options(scipen = 1000000)

Formatting dates for {closest_state} output in r (bar chart race)

In this bar chart race, with a Y-m-d date format ("2010-11-30… etc"), the gif (below) runs through the dates fine...
...but when I convert those same dates to a %Y %b format ("2010 Nov… etc"), months are missing throughout the animation, as shown in this second gif, below
Furthermore, I’d ideally like the date to be displayed as %b %Y format (Nov 2010… etc). I've spent weeks trying to sort this out but to no avail. Any help would be appreciated.
Here's the code
df <- read.csv(file="Data/Carmakers market caps monthly.csv")
# Renames headings
df<-rename(df, c(General.Motors = "General Motors", Toyota.Motor = "Toyota Motor"))
meltdf <- melt(df,id="Date")
names(meltdf) <- c("Date", "Company", "Value")
meltdf$Date <- as.Date(meltdf$Date, "%d/%m/%Y")
meltdf$Value <- as.numeric(as.character(meltdf$Value))
meltdf = meltdf %>%
group_by(Date)%>%
mutate(rank = rank(-Value),
Value_rel = Value/Value[rank==1],
Value_lbl = paste0(" ",Value/1000000000)) %>%
group_by(Company)
meltdf$Value_lbl <- as.numeric(as.character(meltdf$Value_lbl))
meltdf$Value <- as.numeric(as.character(meltdf$Value/1000000000))
meltdf$Value_lbl <- sprintf(meltdf$Value_lbl, fmt = '%#.1f')
strftime(meltdf$Date, format = "%Y %b") ->
meltdf$Date
#plotting graph
anim <-ggplot(meltdf,aes(rank,
group=Company,
fill=as.factor(Company),
color=as.factor(Company))) +
geom_tile(aes(y = Value/2,
height = Value,
width = 0.9), alpha = 0.8, color = NA) +
geom_text(aes(y = 0, label = paste(Company, " ")), vjust = 0.2, hjust = 1)+
geom_text(aes(y=Value,label = Value_lbl, hjust=0)) +
coord_flip(clip = "off", expand = TRUE) +
scale_y_continuous(labels = scales::comma) +
scale_x_reverse() +
guides(color = FALSE, fill = FALSE) +
theme_minimal() +
theme(
plot.title=element_text(size=23, hjust=0.5, face="bold", colour="grey", vjust=-1),
plot.subtitle=element_text(size=18, hjust=0.5, face="italic", color="grey",
margin = margin(t = 15, r = 0, b = 0, l = 0)),
plot.caption =element_text(size=8, hjust=0.5, face="italic", color="grey"),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
plot.margin = margin(1,1,1,2, "cm")) +
transition_states(states = Date, transition_length = 12, state_length = 1, wrap = FALSE) +
ease_aes('cubic-in-out') +
#view_follow(fixed_x = TRUE) +
labs(title = 'Largest car companies in the world {closest_state}',
subtitle = "Market capitalization",
caption = "Data source: Refinitiv",
x="", y="$ billion")
#Create gif
animate(anim, nframes = 400,fps = 8.1, width = 550, height = 350,
renderer = gifski_renderer("car_companies_2.gif"), end_pause = 15, start_pause = 25)
And here's a sample of the data that created this chart.
Date Tesla Toyota Motor General Motors Daimler
1 30/11/2010 3295253866 132694537161 51300000000 52944591823
2 31/12/2010 2483798768 136160803584 55290000000 53967411400
3 31/1/2011 2247823894 142843809831 54735000000 56926590672
4 28/2/2011 2277562013 161097730179 52331714768 54401346072
5 31/3/2011 2645556545 138915112426 48429857121 53122249064
6 30/4/2011 2639462630 136650698149 50084659687 55615851126
118 31/8/2020 464338876502 215889700906 42403389651 45497228943
119 30/9/2020 399755220356 214500698099 42346145790 49249966873
120 31/10/2020 367823400433 212138493292 49423051428 47500782647
121 30/11/2020 538028456051 219024834051 62748525184 60564258296
122 31/12/2020 668905110256 250711385128 59599648464 61825906062
123 31/1/2021 668905110256 250711385128 59599648464 61825906062
I have performed all the manipulation in the same pipe using tidyverse functions. You can try :
library(tidyverse)
library(gganimate)
library(lubridate)
df %>%
pivot_longer(cols = -Date, names_to = 'Company', values_to = 'Value') %>%
mutate(Date = dmy(Date)) %>%
group_by(Date)%>%
mutate(rank = rank(-Value),
Value_rel = Value/Value[rank==1],
Value_lbl = paste0(" ",round(Value/1000000000, 2)),
date_format = format(Date, '%b-%Y')) %>%
arrange(Date) %>%
mutate(date_format = factor(date_format, unique(date_format))) %>%
group_by(Company) %>%
ggplot(aes(rank,
group=Company,
fill=as.factor(Company),
color=as.factor(Company))) +
geom_tile(aes(y = Value/2,
height = Value,
width = 0.9), alpha = 0.8, color = NA) +
geom_text(aes(y = 0, label = paste(Company, " ")), vjust = 0.2, hjust = 1)+
geom_text(aes(y=Value,label = Value_lbl, hjust=0)) +
coord_flip(clip = "off", expand = TRUE) +
scale_y_continuous(labels = scales::comma) +
scale_x_reverse() +
guides(color = FALSE, fill = FALSE) +
theme_minimal() +
theme(
plot.title=element_text(size=23, hjust=0.5, face="bold", colour="grey", vjust=-1),
plot.subtitle=element_text(size=18, hjust=0.5, face="italic", color="grey",
margin = margin(t = 15, r = 0, b = 0, l = 0)),
plot.caption =element_text(size=8, hjust=0.5, face="italic", color="grey"),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
plot.margin = margin(1,1,1,2, "cm")) +
transition_states(states = date_format, transition_length = 12, state_length = 1, wrap = FALSE) +
ease_aes('cubic-in-out') +
#view_follow(fixed_x = TRUE) +
labs(title = 'Largest car companies in the world {closest_state}',
subtitle = "Market capitalization",
caption = "Data source: Refinitiv",
x="", y="$ billion") -> anim
animate(anim, nframes = 400,fps = 8.1, width = 550, height = 350,
renderer = gifski_renderer("car_companies_2.gif"), end_pause = 15, start_pause = 25)
From the limited data that I could copy from what you have shared the animation looks like this. The important part is that the dates are in order.
data
df <- structure(list(Date = c("30/11/2010", "31/12/2010", "31/1/2011",
"28/2/2011", "31/3/2011", "30/4/2011", "31/8/2020", "30/9/2020",
"31/10/2020", "30/11/2020", "31/12/2020", "31/1/2021"), Tesla = c(3295253866,
2483798768, 2247823894, 2277562013, 2645556545, 2639462630, 464338876502,
399755220356, 367823400433, 538028456051, 668905110256, 668905110256
), Toyota_Motor = c(132694537161, 136160803584, 142843809831,
161097730179, 138915112426, 136650698149, 215889700906, 214500698099,
212138493292, 219024834051, 250711385128, 250711385128), General_Motors = c(5.13e+10,
5.529e+10, 5.4735e+10, 52331714768, 48429857121, 50084659687,
42403389651, 42346145790, 49423051428, 62748525184, 59599648464,
59599648464), Daimler = c(52944591823, 53967411400, 56926590672,
54401346072, 53122249064, 55615851126, 45497228943, 49249966873,
47500782647, 60564258296, 61825906062, 61825906062)),
class = "data.frame", row.names = c(NA, -12L))

Extend bars on a ggplot2 to show the data labels not squished

Here is a data frame:
library(tidyverse)
example_df <- structure(list(Funnel = c("Sessions", "AddToCart", "Registrations", "ShippingDetails", "Checkout", "Transactions"), Sum = c(1437574, 385281, 148181, 56989, 35613, 29671), End = c(NA, 1437574, 385281, 148181, 56989, 35613), xpos = c(0.5, 1.5, 2.5, 3.5, 4.5, 5.5), Diff = c(NA, 1052293, 237100, 91192, 21376, 5942), Percent = c("NA %", "73.2 %", "61.5 %", "61.5 %", "37.5 %", "16.7 %")), .Names = c("Funnel", "Sum", "End", "xpos", "Diff", "Percent"), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -6L))
And here is a ggplot2:
ggplot(example_df, aes(x = reorder(Funnel, -Sum), y = Sum)) +
geom_col(alpha = 0.6, fill = "#008080") +
stat_summary(aes(label = scales::comma(..y..)), fun.y = 'sum',
geom = 'text', col = 'white', vjust = 1.5) +
geom_segment(aes(x=xpos, y = End, xend = xpos, yend = Sum)) +
geom_text(aes(x=xpos,y = End-Diff / 2, label=Percent), hjust = -0.2) +
theme(axis.title.x = element_blank(),
axis.title.y = element_blank()) +
scale_y_continuous(labels = function(l) {l = l / 1000; paste0(l, "K")}) +
Here's what it looks like:
The values on the plot from Shipping Details: Transactions are tricky to read because the bars are smaller.
I wondered if there was a good approach to dealing with this. I tried extending the range with:
+ expand_limits(y = -100000)
But that just lowers the y axis.
Is there a sensible solution to visualizing the data points in a way they are not squished? If I could somehow lower the green bars into the minus region without impacting the proportions?
Very dirty solution, but works. Add dummy geom_bar's bellow each segment (ie., extend original segment by adding negative bar) with the same color and alpha.
Bars to add:
geom_bar(data = data.frame(x = example_df$Funnel, y = -2e4),
aes(x, y),
stat = "identity", position = "dodge",
alpha = 0.6, fill = "#008080")
Final code:
# Using OPs data
library(ggplot2)
ggplot(example_df, aes(x = reorder(Funnel, -Sum), y = Sum)) +
geom_col(alpha = 0.6, fill = "#008080") +
geom_segment(aes(x=xpos, y = End, xend = xpos, yend = Sum)) +
geom_text(aes(x=xpos,y = End-Diff / 2, label=Percent), hjust = -0.2) +
theme(axis.title.x = element_blank(),
axis.title.y = element_blank()) +
scale_y_continuous(labels = function(l) {l = l / 1000; paste0(l, "K")}) +
geom_bar(data = data.frame(x = example_df$Funnel, y = -2e4),
aes(x, y),
stat = "identity", position = "dodge",
alpha = 0.6, fill = "#008080") +
stat_summary(aes(label = scales::comma(..y..)), fun.y = 'sum',
geom = 'text', col = 'white', vjust = 1.5) +
theme_classic()
Plot:
PS:
You have to add stat_summary after geom_bar

Resources