I can't have a really back-to-back bar plot [duplicate] - r

I am wanting to plot back-to-back barplot, however each side is on an independent axes. I can plot them back to back by taking the negative of one set, but that leaves them on the same access and because pvalues are smaller their bars are barely represented.
library(ggplot2)
df <-structure(list(Description = c("a", "b", "c", "d", "e", "f",
"g", "h", "a", "b", "c", "d", "e", "f", "g", "h"), test = c("size",
"size", "size", "size", "size", "size", "size", "size", "p",
"p", "p", "p", "p", "p", "p", "p"), value = c(0.1, 0.1, 0.125,
0.1, 0.075, 0.1, 0.075, 0.125, 0.000230705311441713, 0.000314488619269942,
0.00106639822095382, 0.00108290238851994, 0.00114723539549198,
0.00160204850890075, 0.0019276388745184, 0.00320371567547557)), .Names = c("Description",
"test", "value"), row.names = c(NA, -16L), class = "data.frame")
df$value[df$test == 'p'] <- -(df$value[df$test == 'p'])
ggplot(df, aes(x=Description, y= value, group=test, fill=test)) + geom_col() +coord_flip()
Ideally I would like each group on independent axes so that the bars meet at zero (in the middle of the plot region) but be on different scales for this example ylim would be something like ylim(0,0.13) and for pvalue c(0, 0.0035)

You can do this by using facets, and tweaking to remove the spacing between facets:
ggplot(df, aes(x=Description, y= value, fill=test)) +
facet_wrap(~ test, scales = "free_x") +
geom_col() +
coord_flip() +
scale_y_continuous(expand = c(0, 0)) +
theme(panel.spacing.x = unit(0, "mm"))
It might create some issues with axis labels, and these would be a bit tricky to solve. In that case, it might be easier to keep some space between the facets, at the expense of not having the bars meet in the middle.
Output:
PS: you can also remove the negative axis labels with something like:
scale_y_continuous(
expand = c(0, 0),
labels = function(x) signif(abs(x), 3)
)

I have adapted this elegant solution to my needs. Kudos to Lingyun Zhang.
library(dplyr)
library(ggplot2)
set.seed(123)
ten_positive_rand_numbers <- abs(rnorm(10)) + 0.1
the_prob <- ten_positive_rand_numbers / sum(ten_positive_rand_numbers)
fk_data <- data.frame(job_type = sample(LETTERS[1:10], 1000,
replace = TRUE, prob = the_prob),
gender = sample(c("Male", "Female"), 1000,
replace = TRUE))
# prepare data for plotting
plotting_df <-
fk_data %>%
group_by(job_type, gender) %>%
summarise(Freq = n()) %>%
# a trick!
mutate(Freq = if_else(gender == "Male", -Freq, Freq))
## find the order
temp_df <-
plotting_df %>%
filter(gender == "Female") %>%
arrange(Freq)
the_order <- temp_df$job_type
# plot
p <-
plotting_df %>%
ggplot(aes(x = job_type, y = Freq, group = gender, fill = gender)) +
geom_bar(stat = "identity", width = 0.75) +
coord_flip() +
scale_x_discrete(limits = the_order) +
# another trick!
scale_y_continuous(breaks = seq(-150, 150, 50),
labels = abs(seq(-150, 150, 50))) +
labs(x = "Job type", y = "Count", title = "Back-to-back bar chart") +
theme(legend.position = "bottom",
legend.title = element_blank(),
plot.title = element_text(hjust = 0.5),
panel.background = element_rect(fill = "grey90")) +
# reverse the order of items in legend
# guides(fill = guide_legend(reverse = TRUE)) +
# change the default colors of bars
scale_fill_manual(values = c("red", "blue"),
name = "",
breaks = c("Male", "Female"),
labels = c("Male", "Female"))
print(p)
It can be improved with other minor details, including geom_hline(yintercept = 0, colour = "black").

#Marius solution is easier than this solution but this allows more control of each graph independently.
I have to removed the plot margins on the right of p1 and and left of p2. For some reason there is padding on the left margin so needed -3.5pt to bring it flush, not sure whether this will be consistent across all plots. The other manual thing is changing the breaks on one axis so 0 isn't plotted on top of each other.
I also don't need to negative the p values just use scale_y_reverse
p1 <- ggplot(df[df$test == 'p',], aes(x=Description, y= value)) + geom_col(fill='red') + theme_minimal()+
coord_flip() + scale_y_reverse(name= "axis1",expand = expand_scale(mult= c(c(0.05,0)))) +
theme(panel.spacing.x = unit(0, "mm")) +theme(plot.margin = unit(c(5.5, 0, 5.5, 5.5), "pt"))
p2 <- ggplot(df[df$test != 'p',], aes(x=Description, y= value)) + geom_col(fill='blue') +
scale_y_continuous(name = "axis2", breaks = seq(0.025, 0.125, 0.025) ,expand = expand_scale(mult= c(c(0,0.05)))) +
coord_flip() +
theme(panel.spacing.x = unit(0, "mm"))+ theme_minimal() +
theme(axis.title.y=element_blank(), axis.text.y=element_blank(),
axis.line.y = element_blank(), axis.ticks.y=element_blank(),
plot.margin = unit(c(5.5, 5.5, 5.5, -3.5), "pt"))
grid.newpage()
grid.draw(cbind(ggplotGrob(p1), ggplotGrob(p2), size = "last"))
I have also have used theme_minimal but that was just for my aesthetic preference.

Related

Issue with scale_color_identity() in ggplot -- labels reordered automatically by colour name

I want to control my font colour using explicitly named colours in the label_col column. However, it seems that the colours "black" and "white" end up being ordered alphabetically by the colour name, instead of following the same fill and position specified in ggplot(aes()) and geom_col().
Here is my minimal example with black and white:
list_order <- c("a", "b", "c", "d", "e", "f")
test <- data.frame(indicator = rep(list_order, 2),
mean = c(1.1, 0.3, 6.3, 4, 7, 2.1, 1.2, 0.7, 6.9, 3.3, 5.4, 1.8),
group = c(rep("Group A", 6), rep("Group B", 6)))
test %>%
mutate(indicator = factor(indicator, levels = list_order),
label = round(mean, 1),
label_col = ifelse(indicator %in% c("a", "d", "e", "f"), "black", "white")) %>%
ggplot(aes(x = group, y = mean, fill = indicator)) +
geom_col(width = 0.7, position = "stack") +
geom_text(aes(label = label, color = label_col), size = 3, position = position_stack(vjust = 0.5)) +
scale_color_identity() +
scale_fill_viridis(discrete = T, option = "plasma")
Which gives me this:
If I change the colours in label_col to both "white", or both "black", I get them in the correct places like here:
Is there something I am not understanding about how ggplot positions labels and text? I know how to work around this by using scale_color_manual(), but would also like to understand why scale_color_identity() is not working here, and the solution for it.
The problem is with no explicit grouping of variables. I modified your code by adding group = group to aesthetics specification and it seem to work.
I am not entirely sure where specifically lies the problem, but I assume that the problem is with some reordering done under the hood -- you can see in your example that all white labels are brought to the bottom and all black are brought to the top.
test %>%
mutate(indicator = factor(indicator, levels = list_order),
label = round(mean, 1),
label_col = ifelse(indicator %in% c("a", "d", "e", "f"), "black", "white")) %>%
ggplot(aes(x = group, y = mean, fill = indicator, group = group)) +
geom_col(width = 0.7, position = "stack") +
geom_text(aes(label = label, color = label_col), size = 3, position = position_stack(vjust = 0.5)) +
scale_color_identity() +
scale_fill_viridis(discrete = T, option = "plasma")

ggplot: how to assign both color and shape for one factor, and also shape for another factor?

I must code factor age with both color and shape. I know how to do that (see the plot and data/code below).
In addition, I also have to code factor day with shape.
Is it somehow possible to assign specified different shapes to two different factors?
Below is the legend I would love to achieve (I made an example in power point).
The plot is not quite right, as only factor age is coded with color and shape.
df = data.frame(test = c(1,2,3, 1,2,3, 1,2,3, 1,2,3, 1,2,3, 1,2,3),
age = c(1,1,1, 2,2,2, 3,3,3, 1,1,1, 2,2,2, 3,3,3),
day = c(1,1,1,1,1,1,1,1,1, 2,2,2,2,2,2,2,2,2),
result = c(1,2,2,1,1,2,2,1,0, 2,2,0,1,2,1,2,1,0))
df$test <- factor((df$test))
df$age <- factor((df$age))
df$day <- factor((df$day))
windows(width=4, height=3 )
df %>%
ggplot( aes(x=test, y=result)) +
geom_point(aes(color=age, shape=age, group=age),
position=position_jitterdodge(dodge.width=0.8,jitter.height=0.2, jitter.width=0),
size=2, stroke=0.8) +
scale_shape_manual(values=c(16, 15, 17), name="", labels=c("young","older","the oldest")) +
scale_color_manual(name="", labels=c("young","older","the oldest"), values=c('#009E73','#56B4E9','#D55E00')) +
theme_bw()+
theme(panel.border = element_blank(), axis.ticks = element_blank(),
legend.position=c(), legend.text=element_text(size=10, face="bold"), legend.title=element_text(size=10),
panel.grid.major.x = element_blank() ,
panel.grid.major.y = element_blank() ,
plot.title = element_text(size=10, face = "bold"), axis.title=element_text(size=11),
axis.text.y = element_text(size=9, angle = 45),
axis.text.x = element_text(size=9, angle = 90),
plot.margin = unit(c(0.5,0.2,0,0), "cm")) +
labs(title= "", x = "",y = "Test result") +
scale_y_continuous(breaks=c(0,1,2), labels=c('good','better','the best')) +
geom_vline(xintercept=c(0.5,1.5,2.5),color="grey90")+
geom_hline(yintercept=-0.5, color="grey90")+
expand_limits(x=3.9, y=c(0,2.35)) +
scale_x_discrete(limits = c("1", "2", "3"),labels = c("test a", "test b", "test c")) +
coord_cartesian(clip = "off")
You can use shapes on an interaction between age and day, and use color only one age. Then remove the color legend and color the shape legend manually with override.aes.
This comes close to what you want - labels can be changes, I've defined them when creating the factors.
how to make fancy legends
However, you want a quite fancy legend, so the easiest would be to build the legend yourself as a separate plot and combine to the main panel. ("Fake legend"). This requires some semi-hardcoding, but you're not shy to do this anyways given the manual definition of your shapes. See Part Two how to do this.
Part one
library(ggplot2)
df = data.frame(test = c(1,2,3, 1,2,3, 1,2,3, 1,2,3, 1,2,3, 1,2,3),
age = c(1,1,1, 2,2,2, 3,3,3, 1,1,1, 2,2,2, 3,3,3),
day = c(1,1,1,1,1,1,1,1,1, 2,2,2,2,2,2,2,2,2),
result = c(1,2,2,1,1,2,2,1,0, 2,2,0,1,2,1,2,1,0))
df$test <- factor(df$test)
## note I'm changing this here already!! If you udnergo the effor tof changing to
## factor, define levels and labels here
df$age <- factor(df$age, labels = c("young", "older", "the oldest"))
df$day <- factor(df$day, labels = paste("Day", 1:2))
ggplot(df, aes(x=test, y=result)) +
geom_jitter(aes(color=age, shape=interaction(day, age)),
width = .1, height = .1) +
## you won't get around manually defining the shapes
scale_shape_manual(values = c(0, 15, 1, 16, 2, 17)) +
scale_color_manual(values = c('#009E73','#56B4E9','#D55E00')) +
guides(color = "none",
shape = guide_legend(
override.aes = list(color = rep(c('#009E73','#56B4E9','#D55E00'), each = 2)),
ncol = 3))
Part two - the fake legend
library(ggplot2)
library(dplyr)
library(patchwork)
## df and factor creation as above !!!
p_panel <-
ggplot(df, aes(x=test, y=result)) +
geom_jitter(aes(color=age, shape=interaction(day, age)),
width = .1, height = .1) +
## you won't get around manually defining the shapes
scale_shape_manual(values = c(0, 15, 1, 16, 2, 17)) +
scale_color_manual(values = c('#009E73','#56B4E9','#D55E00')) +
## for this solution, I'm removing the legend entirely
theme(legend.position = "none")
## make the data frame for the fake legend
## the y coordinates should be defined relative to the y values in your panel
y_coord <- c(.9, 1.1)
df_legend <- df %>% distinct(day, age) %>%
mutate(x = rep(1:3,2), y = rep(y_coord,each = 3))
## The legend plot is basically the same as the main plot, but without legend -
## because it IS the legend ... ;)
lab_size = 10*5/14
p_leg <-
ggplot(df_legend, aes(x=x, y=y)) +
geom_point(aes(color=age, shape=interaction(day, age))) +
## I'm annotating in separate layers because it keeps it clearer (for me)
annotate(geom = "text", x = unique(df_legend$x), y = max(y_coord)+.1,
size = lab_size, angle = 45, hjust = 0,
label = c("young", "older", "the oldest")) +
annotate(geom = "text", x = max(df_legend$x)+.2, y = y_coord,
label = paste("Day", 1:2), size = lab_size, hjust = 0) +
scale_shape_manual(values = c(0, 15, 1, 16, 2, 17)) +
scale_color_manual(values = c('#009E73','#56B4E9','#D55E00')) +
theme_void() +
theme(legend.position = "none",
plot.margin = margin(r = .3,unit = "in")) +
## you need to turn clipping off and define the same y limits as your panel
coord_cartesian(clip = "off", ylim = range(df$result))
## now combine them
p_panel + p_leg +
plot_layout(widths = c(1,.2))
So again following: Filled and hollow shapes where the fill color = the line color the following code provides the goods without giving you the legend.
df %>%
ggplot( aes(x=test, y=result)) +
geom_point(aes(color=age,
shape=age,
group=age,
fill=factor(ifelse(day==1, NA, age))), # STEP 1
position=position_jitterdodge(dodge.width=0.8,jitter.height=0.2, jitter.width=0),
size=2, stroke=0.8) +
scale_shape_manual(values=c(22,21,24), name="", labels=c("young","older","the oldest")) +
scale_color_manual(name="", labels=c("young","older","the oldest"), values=c('#009E73','#56B4E9','#D55E00')) +
scale_fill_manual(name="",
labels=c("young","older","the oldest"),
values=c('#009E73','#56B4E9','#D55E00'),
na.value=NA, guide="none") # STEP 2
I was misleading in my comment, rather than "hallow" shapes, we want shapes 21 through 26. These apparently accept distinct fill and color.

Alluvial plot with 2 different sources but a converging/shared variable [R]

I have experience with making alluvial plots using the ggalluvial package. However, I have run in to an issue where I am trying to create an alluvial plot with two different sources that converge onto 1 variable.
here is example data
library(dplyr)
library(ggplot2)
library(ggalluvial)
data <- data.frame(
unique_alluvium_entires = seq(1:10),
label_1 = c("A", "B", "C", "D", "E", rep(NA, 5)),
label_2 = c(rep(NA, 5), "F", "G", "H", "I", "J"),
shared_label = c("a", "b", "c", "c", "c", "c", "c", "a", "a", "b")
)
here is the code I use to make the plot
#prep the data
data <- data %>%
group_by(shared_label) %>%
mutate(freq = n())
data <- reshape2::melt(data, id.vars = c("unique_alluvium_entires", "freq"))
data$variable <- factor(data$variable, levels = c("label_1", "shared_label", "label_2"))
#ggplot
ggplot(data,
aes(x = variable, stratum = value, alluvium = unique_alluvium_entires,
y = freq, fill = value, label = value)) +
scale_x_discrete(expand = c(.1, .1)) +
geom_flow() +
geom_stratum(color = "grey", width = 1/4, na.rm = TRUE) +
geom_text(stat = "stratum", size = 4) +
theme_void() +
theme(
axis.text.x = element_text(size = 12, face = "bold")
)
(apparently I cannot embed images yet)
As you can see, I can remove the NA values, but the shared_label does not properly "stack". Each unique row should stack on top of each other in the shared_label column. This would also fix the sizing issue so that they are equal size along the y axis.
Any ideas how to fix this? I have tried ggsankey but the same issue arises and I cannot remove NA values. Any tips is greatly appreciated!
This plot is the expected result of the "flow" statistical transformation, which is the default for the "flow" graphical object. (That is, geom_flow() = geom_flow(stat = "flow").) It looks like what you want is to specify the "alluvium" statistical transformation instead. Below i've used all your code but only copied and edited the ggplot() call.
#ggplot
ggplot(data,
aes(x = variable, stratum = value, alluvium = unique_alluvium_entires,
y = freq, fill = value, label = value)) +
scale_x_discrete(expand = c(.1, .1)) +
geom_flow(stat = "alluvium") + # <-- specify alternate stat
geom_stratum(color = "grey", width = 1/4, na.rm = TRUE) +
geom_text(stat = "stratum", size = 4) +
theme_void() +
theme(
axis.text.x = element_text(size = 12, face = "bold")
)
#> Warning: Removed 2 rows containing missing values (geom_text).
Created on 2021-12-10 by the reprex package (v2.0.1)

changing legend of faceted boxplot in ggplot2 to have groups with similar names inside

This question builds off of enter link description here but is in the context of faceted boxplots.
So, I have the following code:
set.seed(20210714)
dd <- data.frame(Method = rep(c("A", "B", "C"), each = 60), Pattern = rep(c("X", "Y", "Z"), times = 30), X1 = runif(180), Complexity = rep(c("High", "Low"), times = 90), nsim = rep(rep(1:10, times = 9), each = 2), n = 10)
dd1 <- data.frame(Method = rep(c("A", "B", "C"), each = 60), Pattern = rep(c("X", "Y", "Z"), times = 30), X1 = runif(180), Complexity = rep(c("High", "Low"), times = 90), nsim = rep(rep(1:10, times = 9), each = 2), n = 5)
dd <- rbind(dd, dd1)
library(ggplot2)
# create dummy dataframe.
dummy.df <- dd
dummy.df[nrow(dd) + 1:2,"Pattern"] <- unique(dd$Pattern)[-3]
dummy.df[nrow(dd) + 1:2,"Method"] <- "ZZZ"
dummy.df[nrow(dd) + 1:2,"Complexity"] <- c("High","Low")
dummy.df$dummy <- interaction(dummy.df$Method,dummy.df$Pattern)
ggplot(dummy.df, aes(x = dummy, y = X1, fill = Method)) +
geom_boxplot(aes(fill = Method)) +
facet_grid(~Complexity) +
theme_light() +
theme(legend.position = 'bottom') +
guides(fill = guide_legend(nrow=1)) +
geom_line(aes(x = dummy,
group=interaction(Pattern,nsim)),
size = 0.35, alpha = 0.35, colour = I("#525252")) +
geom_point(aes(x = dummy,
group=interaction(Pattern,nsim)),
size = 0.35, alpha = 0.25, colour = I("#525252")) +
scale_x_discrete(labels = c("","X", "", "", "", "Y", "", "", "", "Z","","")) +
xlab("Pattern") +
scale_fill_brewer(breaks=c("A", "B", "C"), type="qual", palette="Paired")
dummy.df <- dd
dummy.df[nrow(dd) + 1:2,"Pattern"] <- unique(dd$Pattern)[-3]
dummy.df[nrow(dd) + 1:2,"Method"] <- "ZZZ"
dummy.df[nrow(dd) + 1:2,"Complexity"] <- c("High","Low")
dummy.df$dummy <- interaction(dummy.df$Method,dummy.df$Pattern)
dummy.df$fill <- interaction(dummy.df$Method, dummy.df$n)
dummy.df$dummy <- interaction(dummy.df$fill, dummy.df$Pattern)
dummy.df$dummy <- factor(dummy.df$dummy, levels = levels(dummy.df$dummy)[-c(4, 12, 20, 24)])
dummy.df$dummy[361:362] <- "A.10.Z" ## dummy variables to get rid of NAs
theme_set(theme_bw(base_size = 14))
ggplot(dummy.df, aes(x = dummy, y = X1, fill = fill)) +
geom_boxplot(aes(fill = fill),lwd=0.1,outlier.size = 0.01) +
facet_grid(~Complexity) +
theme(legend.position = 'bottom') +
guides(fill = guide_legend(nrow=1)) +
geom_line(aes(x = dummy,
group=interaction(Pattern,nsim,n)),
size = 0.35, alpha = 0.35, colour = I("#525252")) +
geom_point(aes(x = dummy,
group=interaction(Pattern,nsim,n)),
size = 0.35, alpha = 0.25, colour = I("#525252")) +
scale_x_discrete(labels = c("X", "Y", "Z"), breaks = paste("A.10.", c("X", "Y", "Z"), sep = ""),drop=FALSE) +
xlab("Pattern") +
scale_fill_brewer(breaks= levels(dummy.df$fill)[-c(4,8)], type="qual", palette="Paired")
This yields the following plot.
All is well, except with the legend. I would like the following: the dark colors to be in the First group titled "n=5" on the left, with "A", "B", "C" for the three dark colors, and the light colors to be to the right, in a Second group titled "n=10" on the right, with "A", "B", "C" for the three light colors. Sort of like in the link enter link description here above.
What I can not figure out is how to call the boxplot twice to mimic the solution there.
Is there a way to do this? Please feel free to let me know if the question is not clear.
Thanks again, in advance, for any help!
Adapting my answer on your former question this could be achieved like so:
library(ggplot2)
fill <- levels(dummy.df$fill)[-c(4,8)]
fill <- sort(fill)
labels <- gsub("\\.\\d+", "", fill)
labels <- setNames(labels, fill)
colors <- scales::brewer_pal(type="qual", palette="Paired")(6)
colors <- setNames(colors, fill)
library(ggnewscale)
ggplot(dummy.df, aes(x = dummy, y = X1, fill = fill)) +
geom_boxplot(aes(fill = fill), lwd=0.1,outlier.size = 0.01) +
scale_fill_manual(name = "n = 5", breaks= fill[grepl("5$", fill)], labels = labels[grepl("5$", fill)], values = colors,
guide = guide_legend(title.position = "left", order = 1)) +
new_scale_fill() +
geom_boxplot(aes(fill = fill), lwd=0.1,outlier.size = 0.01) +
scale_fill_manual(name = "n = 10", breaks = fill[grepl("10$", fill)], labels = labels[grepl("10$", fill)], values = colors,
guide = guide_legend(title.position = "left", order = 2)) +
facet_grid(~Complexity) +
theme(legend.position = 'bottom') +
guides(fill = guide_legend(nrow=1)) +
geom_line(aes(x = dummy,
group=interaction(Pattern,nsim,n)),
size = 0.35, alpha = 0.35, colour = I("#525252")) +
geom_point(aes(x = dummy,
group=interaction(Pattern,nsim,n)),
size = 0.35, alpha = 0.25, colour = I("#525252")) +
scale_x_discrete(labels = c("X", "Y", "Z"), breaks = paste("A.10.", c("X", "Y", "Z"), sep = ""),drop=FALSE) +
xlab("Pattern")
#> Warning: Removed 2 rows containing non-finite values (new_stat_boxplot).

How to adjust current scatter plot code to match target scatter plot in ggplot2 (R)?

There is a data set that looks like this showing winners of each election from 1860-today (snippet provided):
year winner win_party ec_pct popular_pct
1860 Abraham Lincoln Rep. 0.59 0.4
1864 Abraham Lincoln Rep. 0.9 0.55
Using the full data set, the image below has been produced (x axis = popular_pct, y axis = ec_pct):
I'm trying to reproduce this in ggplot2 in R. Using the following code, I have reached the second image. How can I edit this code to (a) include the two grey lines in image one, (b) enlarge Donald Trump and Joe Biden (and include the years next to their names) and ensure that they are the only names not faded, (c) adjust the axes so that they match? (e.g. the first image has 40%, 50%, 60% whereas the second image also includes 45% and 55%). And (d) reduce the size of the names of the winners as they are smaller/clearer in image 1.
ggplot(data, aes(x = popular_pct, y = ec_pct)) +
geom_point(alpha = 0.3) + # Fades the dots
scale_y_continuous(labels = function(eldata) paste0(eldata*100, "%")) +
scale_x_continuous(labels = function(eldata) paste0(eldata*100, "%")) +
expand_limits(y = c(0.35, 1)) +
aes(colour = win_party) +
scale_color_manual(values = c("Rep." = "red", "Dem." = "blue")) +
geom_text_repel(
aes(label = winner),
alpha = 0.3, # Fades the winners
hjust = 0, vjust = 0
) +
theme_bw() + # Removes grid background
theme(legend.position = "none")
Using some random example data this could be achieved like so:
The grey lines could be added using geom_h/vline where I used the mean for the inctercepts
The axis tick labels could be set via breaks
Addjusting the label size and alpha and adding the year for specific points you could make use of ifelse and scale_size/alpha_manual
Additonally I used the same idea to set the alpha and the size of the points.
library(ggplot2)
library(ggrepel)
# Random Example data
set.seed(42)
data <- data.frame(
winner = LETTERS,
year = seq(1920, 2020, 4),
win_party = sample(c("Rep.", "Dem."), 26, replace = TRUE),
popular_pct = runif(26, .35, .65),
ec_pct = runif(26, .35, 1)
)
ggplot(data, aes(x = popular_pct, y = ec_pct)) +
geom_point(aes(size = ifelse(winner %in% c("B", "T"), "btp", "otherp"),
alpha = ifelse(winner %in% c("B", "T"), "bt", "other"))) + # Fades the dots
geom_vline(xintercept = mean(data$popular_pct), color = "grey") +
geom_hline(yintercept = mean(data$ec_pct), color = "grey") +
scale_y_continuous(breaks = c(.4, .6, .8, 1), labels = function(eldata) paste0(eldata*100, "%")) +
scale_x_continuous(breaks = c(.4, .5, .6), labels = function(eldata) paste0(eldata*100, "%")) +
expand_limits(y = c(0.35, 1)) +
aes(colour = win_party) +
scale_color_manual(values = c("Rep." = "red", "Dem." = "blue")) +
scale_size_manual(values = c("bt" = 12 / .pt, "other" = 8 / .pt, btp = 2, otherp = 1)) +
scale_alpha_manual(values = c("bt" = 1, "other" = .3)) +
geom_text_repel(
aes(label = ifelse(winner %in% c("B", "T"), paste(winner, year), winner),
size = ifelse(winner %in% c("B", "T"), "bt", "other"),
alpha = ifelse(winner %in% c("B", "T"), "bt", "other")),
hjust = 0, vjust = 0
) +
theme_bw() + # Removes grid background
theme(legend.position = "none")
Created on 2020-11-25 by the reprex package (v0.3.0)

Resources