march madness brackets with ggplot2 - r

I'm trying to make a march madness bracket with ggplot2 and am having issues annotating the plot. Specifically, I've found that while I have no issues placing team names, I can't get the text to keep an absolute size. Hence, depending on the size of the window from which users view their ggplot object some team names look too big.
To demonstrate what I'm talking about, look at the following two images:
small bracket and medium bracket
In both images the size of the text is unchanged, despite the length of the bars being different. I'd like to know if there's a way to have the size of the text scale with the image, as opposed to being fixed in size. (My code is below; I apologize in advance for how tortuous it may seem.)
### Helper functions
first_evens <- function(x) {seq(from=2,to=2*x,length.out=x)}
first_odds <- function(x) {seq(from=1,to=2*x-1,length.out=x)}
### calculate y-values for horizontal lines:
### this is for top-left corner of the bracket,
### but multiplying sequences by -1 makes these
### values work for bottom right and left corners;
### final round has teams at y=2*off.set
r1.y.width <- 1.5*strheight(s="Virginia Common",units="in") # this effects the width of the first round
r1.y.offset <- 0.125*r1.y.width # this effects distance from y=0
r1.y <- seq(from=r1.y.offset,to=r1.y.offset+r1.y.width,length.out=16)
r2.y <- seq(from=mean(r1.y[1:2]),to=mean(r1.y[15:16]),length.out=8)
r3.y <- seq(from=mean(r2.y[1:2]),to=mean(r2.y[7:8]),length.out=4)
r4.y <- seq(from=mean(r3.y[1:2]),to=mean(r3.y[3:4]),length.out=2)
r5.y <- seq(from=mean(r4.y[1:2]),to=mean(r4.y[1:2]),length.out=1)
r6.y <- 1.5*r1.y.offset
### calculate horizontal bar start and stop coordinates
### note that there are 6 total rounds -- 5 rounds per quadrant
r1.x.width <- 1.25*strwidth("Viriginia Commonwealth","inches") # how long should horizontal lines be?
r1.x.offset <- 1
round.break.points <- -(seq(from=0,to=7*r1.x.width,by=r1.x.width)+r1.x.offset)
r1.x <- round.break.points[7:6]
r2.x <- round.break.points[6:5]
r3.x <- round.break.points[5:4]
r4.x <- round.break.points[4:3]
r5.x <- round.break.points[3:2]
r6.x <- round.break.points[2:1]
### calculate verticals line coordinates: these are based off of
### r1.y values. Round 5 verticals need to connect the four subtrees
### via the top-left <-> bottom-left and top-right <-> bottom-right
r1.verticals.start <- r1.y[first_odds(8)]
r1.verticals.stop <- r1.y[first_evens(8)]
r2.verticals.start <- r2.y[first_odds(4)]
r2.verticals.stop <- r2.y[first_evens(4)]
r3.verticals.start <- r3.y[first_odds(2)]
r3.verticals.stop <- r3.y[first_evens(2)]
r4.verticals.start <- r4.y[first_odds(1)]
r4.verticals.stop <- r4.y[first_evens(1)]
r5.verticals.start <- r5.y[1]
r5.verticals.stop <- -r5.y[1]
empty.bracket <- ggplot() + theme_bw() + theme(axis.line=element_blank(), axis.text.x=element_blank(), axis.text.y=element_blank(), axis.ticks=element_blank(), axis.title.x=element_blank(), axis.title.y=element_blank(), panel.border=element_blank(), panel.grid.major.x=element_blank(), panel.grid.minor.x=element_blank(), panel.grid.major.y=element_blank(), panel.grid.minor.y=element_blank(), plot.margin=unit(c(0,0,-6,-6),"mm"), text=element_text(size=12,hjust=0,vjust=0)) + coord_cartesian(ylim = c(-1.05*r1.y[16],1.05*r1.y[16]), xlim = c(1.025*r1.x[1],-1.025*r1.x[1]))
### add first round bars, and vertical connectors, make addition of each quadrant verbose
empty.bracket <- empty.bracket + geom_segment(aes(x=r1.x[1],y=r1.y,yend=r1.y,xend=r1.x[2])) + geom_segment(aes(x=r1.x[2],xend=r1.x[2],y=r1.verticals.start,yend=r1.verticals.stop)) + geom_segment(aes(x=r1.x[1],y=-r1.y,yend=-r1.y,xend=r1.x[2])) + geom_segment(aes(x=r1.x[2],xend=r1.x[2],y=-r1.verticals.start,yend=-r1.verticals.stop)) + geom_segment(aes(x=-r1.x[1],y=r1.y,yend=r1.y,xend=-r1.x[2])) + geom_segment(aes(x=-r1.x[2],xend=-r1.x[2],y=r1.verticals.start,yend=r1.verticals.stop)) + geom_segment(aes(x=-r1.x[1],y=-r1.y,yend=-r1.y,xend=-r1.x[2])) + geom_segment(aes(x=-r1.x[2],xend=-r1.x[2],y=-r1.verticals.start,yend=-r1.verticals.stop))
### add second round
empty.bracket <- empty.bracket + geom_segment(aes(x=r2.x[1],y=r2.y,yend=r2.y,xend=r2.x[2])) + geom_segment(aes(x=r2.x[2],xend=r2.x[2],y=r2.verticals.start,yend=r2.verticals.stop)) + geom_segment(aes(x=r2.x[1],y=-r2.y,yend=-r2.y,xend=r2.x[2])) + geom_segment(aes(x=r2.x[2],xend=r2.x[2],y=-r2.verticals.start,yend=-r2.verticals.stop)) + geom_segment(aes(x=-r2.x[1],y=r2.y,yend=r2.y,xend=-r2.x[2])) + geom_segment(aes(x=-r2.x[2],xend=-r2.x[2],y=r2.verticals.start,yend=r2.verticals.stop)) + geom_segment(aes(x=-r2.x[1],y=-r2.y,yend=-r2.y,xend=-r2.x[2])) + geom_segment(aes(x=-r2.x[2],xend=-r2.x[2],y=-r2.verticals.start,yend=-r2.verticals.stop))
### add third round
empty.bracket <- empty.bracket + geom_segment(aes(x=r3.x[1],y=r3.y,yend=r3.y,xend=r3.x[2])) + geom_segment(aes(x=r3.x[2],xend=r3.x[2],y=r3.verticals.start,yend=r3.verticals.stop)) + geom_segment(aes(x=r3.x[1],y=-r3.y,yend=-r3.y,xend=r3.x[2])) + geom_segment(aes(x=r3.x[2],xend=r3.x[2],y=-r3.verticals.start,yend=-r3.verticals.stop)) + geom_segment(aes(x=-r3.x[1],y=r3.y,yend=r3.y,xend=-r3.x[2])) + geom_segment(aes(x=-r3.x[2],xend=-r3.x[2],y=r3.verticals.start,yend=r3.verticals.stop)) + geom_segment(aes(x=-r3.x[1],y=-r3.y,yend=-r3.y,xend=-r3.x[2])) + geom_segment(aes(x=-r3.x[2],xend=-r3.x[2],y=-r3.verticals.start,yend=-r3.verticals.stop))
### add fourth round
empty.bracket <- empty.bracket + geom_segment(aes(x=r4.x[1],y=r4.y,yend=r4.y,xend=r4.x[2])) + geom_segment(aes(x=r4.x[2],xend=r4.x[2],y=r4.verticals.start,yend=r4.verticals.stop)) + geom_segment(aes(x=r4.x[1],y=-r4.y,yend=-r4.y,xend=r4.x[2])) + geom_segment(aes(x=r4.x[2],xend=r4.x[2],y=-r4.verticals.start,yend=-r4.verticals.stop)) + geom_segment(aes(x=-r4.x[1],y=r4.y,yend=r4.y,xend=-r4.x[2])) + geom_segment(aes(x=-r4.x[2],xend=-r4.x[2],y=r4.verticals.start,yend=r4.verticals.stop)) + geom_segment(aes(x=-r4.x[1],y=-r4.y,yend=-r4.y,xend=-r4.x[2])) + geom_segment(aes(x=-r4.x[2],xend=-r4.x[2],y=-r4.verticals.start,yend=-r4.verticals.stop))
### add fifth round: add necessary horizontal bars and then
### vertical bars
empty.bracket <- empty.bracket + geom_segment(aes(x=r5.x[1],y=r5.y,yend=r5.y,xend=r5.x[2])) + geom_segment(aes(x=r5.x[1],y=-r5.y,yend=-r5.y,xend=r5.x[2])) + geom_segment(aes(x=r5.x[2],y=-r5.y, yend=r5.y, xend=r5.x[2])) + geom_segment(aes(x=-r5.x[1],y=r5.y,yend=r5.y,xend=-r5.x[2])) + geom_segment(aes(x=-r5.x[1],y=-r5.y,yend=-r5.y,xend=-r5.x[2])) + geom_segment(aes(x=-r5.x[2],y=-r5.y,yend=r5.y,xend=-r5.x[2]))
### due to symmetry, the 6th (and final round)
empty.bracket <- empty.bracket + geom_segment(aes(x=r6.x[1],y=r6.y,xend=r6.x[2],yend=r6.y)) + geom_segment(aes(x=-r6.x[1],y=-r6.y,xend=-r6.x[2],yend=-r6.y))
### add winner location
empty.bracket <- empty.bracket + geom_segment(aes(x=mean(r6.x),xend=-mean(r6.x),y=0,yend=0))
### put some test labels on the bracket slots
Labels <- c("Alabama", "Alaska", "Arizona", "Arkansas", "Virginia Commonwealth")
TextFrame <- data.frame(X = r1.x[1], Y = sample(r1.y,5), LAB = Labels)
TextFrame <- transform(TextFrame, w = strwidth(LAB, 'inches') + 0.05, h = strheight(LAB, 'inches') + 0.5)
### display results
empty.bracket + geom_rect(data = TextFrame, aes(xmin = X, xmax = X + w, ymin = Y, ymax = Y + h),alpha=0) + geom_text(data=TextFrame,aes(x=X,y=Y,label=LAB),size=rel(3),hjust=0,vjust=0)

I'll take your question in a few separate parts.
Window size:
Hence, depending on the size of the window from
which users view their ggplot object some team names
look too big.
When viewing an R graphic, the text size remains constant, whilst the graphic objects scale to the window. This means that what you see in an R graph window, isn't necessary the same as what you'll get when you print it out (unless you use the savePlot command).
Text scaling:
In both images the size of the text is unchanged, despite the
length of the bars being different. I'd like to know if there's a
way to have the size of the text scale with the image, as opposed to
being fixed in size.
I believe the short answer is no. You specify the text size and it's fixed. However, if outputting to a pdf device, you could scale the text size according to the width of the pdf.

Related

reduce space between elements in plot_summs

I need to "shrink" the white space in the plot - I need those two coefficients closer to each other. How can I do this?
I use this code:
library(jtools)
library(ggplot2)
states <- as.data.frame(state.x77)
fit1 <- lm(Income ~ Frost + Illiteracy + Murder +
Population + Area + `Life Exp` + `HS Grad`,
data = states, weights = runif(50, 0.1, 3))
p <- plot_summs(fit1,
coefs = c("Frost Days" = "Frost", "% Illiterate" = "Illiteracy"),
scale = TRUE)
p +
geom_text(aes(label = round(estimate)), vjust=-1)+
theme(legend.position = "none")
There are three options to get rid of the excess space between the lines:
Reduce the vertical dimension of your plotting window
Keep the plotting window the same size but reduce the height of the plot within the plotting window
Keep the plotting window and plot the same size but reduce the space between the lines
Here's how to do each one:
1. Reduce the vertical dimension of your plotting window
If you simply drag your window to make it less tall, your plot becomes:
2. Reduce the height of the plot within the plotting window
If you want to do this programmatically without changing your plot output size, you can set aspect.ratio in theme:
p +
geom_text(aes(label = round(estimate)), vjust=-1)+
theme(legend.position = "none", aspect.ratio = 1/4)
Though now of course you just have more white space around the plot.
3. Reduce the space between the lines
If you want the lines closer togther without reduing the dimensions of the axes etc, you can do:
p +
geom_text(aes(label = round(estimate)), vjust=-1)+
theme(legend.position = "none") +
scale_y_discrete(expand = c(2, 1))
Though of course, you now have lots of wasted space on either side of your lines.
In my opinion, the first option is by far the most aesthetic, professional and sensible.

facet_zoom() while setting axis limits

I would like to use facet_zoom() to zoom in on part of an axis that has limits explicitly set. However, using scale_*(limits = *) and coord_cartesian(xlim = *) overrides the zoomed facet's scales as well such that both have the same limits. Is there a way around this? Maybe I could add some data points near the limits and then set their alpha = 0... Any other ideas?
library(ggplot2)
library(ggforce)
# works with no limits specified
ggplot(mpg, aes(x = hwy, y = cyl)) +
geom_point() +
facet_zoom(xlim = c(20, 25))
# fails with limits specified
ggplot(mpg, aes(x = hwy, y = cyl)) +
scale_x_continuous(limits = c(0, 50)) +
geom_point() +
facet_zoom(xlim = c(20, 25))
# fails with coord_cartesian()
ggplot(mpg, aes(x = hwy, y = cyl)) +
scale_x_continuous() +
coord_cartesian(xlim = c(0, 50)) +
geom_point() +
facet_zoom(xlim = c(20, 25))
I don't have enough knowledge of the underlying intricacies in FacetZoom, but you can check if the following workarounds provide a reasonable starting point.
Plot for demonstration
One of the key differences between setting limits in scales_* vs. coord_* is the clipping effect (screenshot taken from the ggplot2 cheatsheet found here). Since this effect isn't really clear in a scatterplot, I added a geom_line layer and adjusted the specified limits so that the limits extend beyond the data range on one end of the x-axis, & clips the data on the other end.
p <- ggplot(mpg, aes(x = hwy, y = cyl)) +
geom_point() +
geom_line(aes(colour = fl), size = 2) +
facet_zoom(xlim = c(20, 25)) +
theme_bw()
# normal zoomed plot / zoomed plot with limits set in scale / coord
p0 <- p
p1 <- p + scale_x_continuous(limits = c(0, 35))
p2 <- p + coord_cartesian(xlim = c(0, 35))
We can see that while p0 behaves as expected, both p1 & p2 show both the original facet (top) & the zoomed facet (bottom) with the same range of c(0, 35).
In p1's case, the shaded box also expanded to cover the entire top facet. In p2's case, the zoom box stayed in exactly the same position as p0, & as a result no longer covers the zoomed range of c(20, 25).
Workaround for limits set in scale_*
# convert ggplot objects to form suitable for rendering
gp0 <- ggplot_build(p0)
gp1 <- ggplot_build(p1)
# re-set zoomed facet's limits to match zoomed range
k <- gp1$layout$layout$SCALE_X[gp1$layout$layout$name == "x"]
gp1$layout$panel_scales_x[[k]]$limits <- gp1$layout$panel_scales_x[[k]]$range$range
# re-set zoomed facet's panel parameters based on original version p0
k <- gp1$layout$layout$PANEL[gp1$layout$layout$name == "x"]
gp1$layout$panel_params[[k]] <- gp0$layout$panel_params[[k]]
# convert built ggplot object to gtable of grobs as usual & print result
gt1 <- ggplot_gtable(gp1)
grid::grid.draw(gt1)
The zoomed facet now shows the zoomed range c(20, 25) correctly, while the shaded box shrinks to cover the correct range in the original facet. Since this method removes unseen data points, all lines in the original facet stay within the confines of the facet.
Workaround for limits set in coord_*
# convert ggplot objects to form suitable for rendering
gp0 <- ggplot_build(p0)
gp1 <- ggplot_build(p1)
# apply coord limits to original facet's scale limits
k <- gp2$layout$layout$SCALE_X[gp2$layout$layout$name == "orig"]
gp2$layout$panel_scales_x[[k]]$limits <- gp2$layout$coord$limits$x
# re-set zoomed facet's panel parameters based on original version without setting
# limits in scale
k <- gp1$layout$layout$PANEL[gp1$layout$layout$name == "x"]
gp2$layout$panel_params[[k]] <- gp0$layout$panel_params[[k]]
# convert built ggplot object to gtable of grobs as usual,
# & print result
gt2 <- ggplot_gtable(gp2)
grid::grid.draw(gt2)
The zoomed facet now shows the zoomed range c(20, 25) correctly, while the shaded box shifts to cover the correct range in the original facet. Since this method includes unseen data points, some lines in the original facet extend beyond the facet's confines.
Note: These workarounds should work with zoom in y + limits set in y-axis as well, as long as all references to "x" / panel_scales_x / SCALE_X above are changed to "y" / panel_scales_y / SCALE_Y. I haven't tested this for other combinations such as zoom in both x & y, but the broad principle ought to be similar.

Fix Plot Size in ggplot2 relative to plot title

I'm using ggplot2 to create some figures with titles, but finding that when titles have a descender (e.g., lowercase p, q, g, y) the actual size of the plot shrinks slightly to accommodate the larger space needed by the title.
Are there ways within normal ggplot functionality to fix the plot size so that figures are in 100% consistent position regardless of title?
Here's some quick sample code that shows the issue; folks might need to run code locally to clearly see the differences in the images.
library(ggplot2)
# No letters with descenders in title
ggplot(data=mtcars,aes(x=disp,y=mpg)) +
geom_point() + ggtitle("Scatter Plot")
# Title has a descender (lowercase 'p')
ggplot(data=mtcars,aes(x=disp,y=mpg)) +
geom_point() + ggtitle("Scatter plot")
you can set the relevant height in the gtable,
library(ggplot2)
p1 <- ggplot() + ggtitle("a")
p2 <- ggplot() + ggtitle("a\nb")
gl <- lapply(list(p1,p2), ggplotGrob)
th <- do.call(grid::unit.pmax, lapply(gl, function(g) g$heights[3]))
gl <- lapply(gl, function(g) {g$heights[3] <- th; g})
gridExtra::grid.arrange(grobs = gl, nrow=1)
Edit: here's how to edit one plot for simplicity
g = ggplotGrob(qplot(1,1) + ggtitle('title'))
g$heights[3] = grid::unit(3,"line")
grid.draw(g)

Plot features cropped by Margin

When I compile the following MWE I observe that the maximum point (3,5) is significantly cut/cropped by the margins.
The following example is drastically reduced for simplicity.
In my actual data the following are all impacted by limiting my coord_cartesian manually if the coresponding x-axis aesthetic is on the max x value.
Point symbol
Error bars
Statistical symbols inserted by text annotation
MWE
library(ggplot2)
library("grid")
print("Program started")
n = c(0.1,2, 3, 5)
s = c(0,1, 2, 3)
df = data.frame(n, s)
gg <- ggplot(df, aes(x=s, y=n))
gg <- gg + geom_point(position=position_dodge(width=NULL), size = 1.5)
gg <- gg + geom_line(position=position_dodge(width=NULL))
gg <- gg + coord_cartesian( ylim = c(0, 5), xlim = c((-0.05)*3, 3));
print(gg)
print("Program complete - a graph should be visible.")
To show my data appropriately I would consider using any of the following that are possible (influenced by the observation that the x-axis labels themselves are never cut):
Make the margin transparent so the point isn't cut
unless the point is cut by the plot area and not the margin
Bring the panel with the plot area to the front
unless the point is cut by the plot area and not the margin so order is independent
Use xlim = c((-0.05)*3, (3*0.05)) to extend the axis range but implement some hack to not show the overhanging axis bar after the maximum point of 3?
this is how I had it originally but I was told to remove the overhang after the 3 as it was unacceptable.
Is this what you mean by option 1:
gg <- ggplot(df, aes(x=s, y=n)) +
geom_point(position=position_dodge(width=NULL), size = 3) +
geom_line(position=position_dodge(width=NULL)) +
coord_cartesian(xlim=c(0,3), ylim=c(0,5))
# Turn of clipping, so that point at (3,5) is not clipped by the panel grob
gg1 <- ggplot_gtable(ggplot_build(gg))
gg1$layout$clip[gg1$layout$name=="panel"] <- "off"
grid.draw(gg1)

Add sequential arrows to a ggplot bubbleplot

I have a line plot that I have placed an overlay of bubbles onto. Before I overlaid the bubbles, I was able to connect each point with an arrow to show the sequential relationship using this advice
But now that I have overlaid my bubbles, I still want to connect each bubble by an arrow as previous.
Here is my data:
X <- c(-0.373,-0.256,-0.272,0.048,0.219,0.313,0.209,0.112)
Y <- c(-0.055,-0.091,0.100,0.153,-0.139,-0.004,0.040,-0.004)
Size <- c(37,31,25,10,5,4,6,10)
Label <- c(1,2,3,4,5,6,7,8)
DF <- data.frame(X,Y,Size,Label)
Using the above advice, I can try and draw a plot with arrows connecting each bubble, but the size of the bubbles obscures the arrow heads.
ggplot(DF,aes(x=X, y=Y, size=Size,label=Label),legend=FALSE) +
geom_segment(aes(xend=c(tail(X,n=-1),NA), yend=c(tail(Y,n=-1),NA)),
size=0.3, arrow=arrow(length=unit(0.3,'cm'))) +
geom_point(color='darkblue',fill="red", shape=21) +
geom_text(size=2) +
theme_bw() +
scale_size(range = c(4, 30), name="Size", breaks=c(10,25,50),
limits = c(1, 100))
I would basically like the above plot, but with the arrow heads visible. I know it is possible to write the arrows overtop the bubbles so I can see each arrow, but that is not what I am looking for. What I would like would be an arrow drawn from the outer edge of one bubble to the outer edge of the next bubble. So I need someway to shorten the head of each arrow by the radius of bubble it is pointing to.
And I have no idea why I get the warning at the end
Removed 1 rows containing missing values (geom_segment).
You can start with the following:
Size_penalty <- 1000
X <- c(-0.373,-0.256,-0.272,0.048,0.219,0.313,0.209,0.112)
X_next <- c(X[-1], NA)
Y <- c(-0.055,-0.091,0.100,0.153,-0.139,-0.004,0.040,-0.004)
Y_next <- c(Y[-1], NA)
Arrow_length <- sqrt((X - X_next)^2 + (Y - Y_next)^2)
Size <- c(37,31,25,10,5,4,6,10)
Size_next <- c(Size[-1], NA)
X_begin <- X + Size / Size_penalty * (X_next - X) / Arrow_length
Y_begin <- Y + Size / Size_penalty * (Y_next - Y) / Arrow_length
X_end <- X_next + Size_next / Size_penalty * (X - X_next) / Arrow_length
Y_end <- Y_next + Size_next / Size_penalty * (Y - Y_next) / Arrow_length
Label <- c(1,2,3,4,5,6,7,8)
DF <- data.frame(X, Y, X_begin, Y_begin, X_end, Y_end, Size, Label)
ggplot(DF, aes(x=X, y=Y, size=Size, label=Label),legend=FALSE) +
geom_point(color='darkblue', fill="red", shape=21) +
geom_segment(aes(x=X_begin, y=Y_begin, xend=X_end, yend=Y_end),
size=0.3, arrow=arrow(length=unit(0.3, 'cm'))) +
geom_text(size=4) +
theme_bw() +
scale_size(range = c(4, 30), name="Size", breaks=c(10, 25, 50),
limits = c(1, 60))
Here I use Size / Size_penalty as a proxy to bubble radius, which is obviously quite far from being elegant. But this is the best I can do, since there's a scale_size, so that conversion from size to radius is implicit. All that is left is to find a conversion function like
rad <- function(ggplot_size_after_scaling) {}

Resources