How to stack (overlap) legend items in ggplot2 - r

I was wondering if it is possible to "stack" the items of the legend on a ggplot2 map (or any package that allows to produce the same result).
For spatial (sf, etc.) objects there is at least one package, mapsf, that produces the desired output, but I would like to produce this type of legend also for non-spatial objects (dataframes/tibble, etc.).
See here a reprex:
# A regular plot with ggplot2
library(ggplot2)
# A plot
ggplot(mtcars, aes(wt, mpg)) +
geom_point(aes(size = drat))
# A map with the circles of the legend "stacked" (overlapping points)
# Need to conver to sf
mtcars_sf <- sf::st_as_sf(mtcars, coords = c("wt", "mpg"))
library(mapsf)
# See the legend on the top-right corner (blue arrow)
mf_map(x = mtcars_sf)
mf_map(x = mtcars_sf, var = "drat", type = "prop",
inches = .2)

As Allan said, there is (to my knowledge) no general way to do this. But you can hack this together for your custom graph. Thomas Pedersen's awesome packages ggforce and patchwork are your friend.
I am essentially faking a legend. The challenge is to get the right dimensions between main plot and legend, and to stitch the legend in a reasonable way to the main plot. This is achieved by using carefully chosen radii on geom_ellipse, setting the coordinate ratio accordingly, and adjusting the coordinate limits from both plots. More comments in the code.
library(ggplot2)
library(ggforce)
library(patchwork)
## we cannot just use geom_point,
## because the size of the circles need to correspond to the legend later
## you will need to play around with this constant
r_constant <- 25
mtcars$r <- mtcars$drat / r_constant
## this is to make the points round - it will have an effect on the panel dimension
ellipse_fac <- .1
p <-
ggplot(mtcars) +
geom_ellipse(aes(x0 = wt, y0 = mpg, a = r, b = r / ellipse_fac, angle = 0), fill = "darkred") +
theme(legend.position = "none") +
coord_equal(ellipse_fac)
## for the legend, chose rounded values from the radius range
unique_r <- unique(plyr::round_any(mtcars$r, .1))
y_circles <- floor(min(mtcars$mpg))
## I'm sorting the radii decreasingly, so that the circles overlap correctly
circles <- data.frame(x = 0, r = sort(unique_r, decreasing = TRUE))
## the segment / label poistion is also arbitrary
x_lab <- max(circles$r) + .1
y_seg <- y_circles + 2 * circles$r / ellipse_fac
p_leg <-
ggplot(circles) +
geom_ellipse(aes(x0 = x, y0 = y_circles + r / ellipse_fac, a = r, b = r / ellipse_fac, angle = 0), fill = "darkred", alpha = .5) +
geom_segment(aes(x = x, xend = x_lab, y = y_seg, yend = y_seg)) +
geom_text(aes(x = x_lab, y = y_seg, label = r), hjust = 0) +
## you need to set the ylimits similar to the main plot for control of legend position
coord_equal(ratio = ellipse_fac, ylim = range(mtcars$mpg), clip = "off") +
theme_void() +
## also need to set a margin
theme(plot.margin = margin(r = .2, unit = "in"))
p + p_leg
I've added an alpha just for aesthetic reasons

Related

Is there any alternative to ggtern in R?

Its looks like that ggtern has not been synchronised with new version of ggplot2.
Therefore we can not use ggtern.
library(ggtern)
set.seed(1)
plot <- ggtern(data = data.frame(x = runif(100),
y = runif(100),
z = runif(100)),
aes(x, y, z))
plot + stat_density_tern(geom = 'polygon',
n = 200,
aes(fill = ..level..,
alpha = ..level..)) +
geom_point() +
theme_rgbw() +
labs(title = "Example Density/Contour Plot") +
scale_fill_gradient(low = "blue",high = "red") +
guides(color = "none", fill = "none", alpha = "none")
Error: geom_point requires the following missing aesthetics: x and y
Does anyone have in find other options for ternary diagrams apart from ggtern in R?
Manually, you could plot the points with a function: (I used the formulas at https://en.wikipedia.org/wiki/Ternary_plot)
I'm not familiar with the output of stat_density_tern so I'm not sure what is expected from that part.
library(tidyverse)
tern <- function(df) {
df %>% mutate(x_pos = 0.5 * (2*y + z) / (x+y+z),
y_pos = sqrt(3) / 2 * z / (x+y+z))
}
tern(plot) %>%
ggplot(aes(x_pos, y_pos)) +
geom_point() +
annotate("path", x = c(0, 0.5, 1, 0), y = c(0,sqrt(3)/2,0,0)) +
coord_equal()
this worked for me! Uninstall ggtern and ggplot2 then
install_version("ggplot2", version = "3.3.0", repos = "http://cran.us.r-project.org")
install.packages("ggtern")
library(ggtern)
I use the following script which also supports making diagrams with 4 or more corners. It also divides the points into colored clusters by cutting a hierarchical clustering at the height where it has 32 subtrees, and it draws a line from each point to its two nearest neighbors.
library(tidyverse)
library(ggforce)
library(colorspace)
t=as.matrix(read.csv("https://pastebin.com/raw/1EDJJtHU",row.names=1,check.names=F))/100
fst=as.matrix(read.csv("https://pastebin.com/raw/6JmN2hRY",row.names=1))
mult=t%*%cmdscale(fst,ncol(fst)-1)
# t=cbind(t[,2]+t[,1],t[,8]+t[,9],rowSums(t[,-c(1,2,8,9)]))
# colnames(t)=c("Baltic + North_Atlantic","Siberian + East_Asian","Other")
# t=cbind(t[,2],t[,1],t[,8]+t[,9],rowSums(t[,-c(1,2,8,9)]))
# colnames(t)=c("Baltic","North_Atlantic","Siberian + East_Asian","Other")
ncorn=ncol(t)
start=ifelse(ncorn==4,.25,0)
corners=sapply(c(sin,cos),\(x)x((start+seq(0,2,,ncorn+1)[-(ncorn+1)])*pi))
corners=corners*min(2/diff(apply(corners,2,range)))
corners[,2]=corners[,2]-mean(range(corners[,2]))
xy=t%*%corners
grid=if(ncorn==3)do.call(rbind.data.frame,apply(simplify=F,rbind(c(1,2,3,2),c(1,3,2,3),c(2,1,3,1)),1,\(x)cbind(
seq(corners[x[1],1],corners[x[2],1],,11),
seq(corners[x[1],2],corners[x[2],2],,11),
seq(corners[x[3],1],corners[x[4],1],,11),
seq(corners[x[3],2],corners[x[4],2],,11)
)))else if(ncorn==4)do.call(rbind.data.frame,apply(simplify=F,rbind(c(1,2,4,3),c(1,4,2,3)),1,\(x)cbind(
seq(corners[x[1],1],corners[x[2],1],,11),
seq(corners[x[1],2],corners[x[2],2],,11),
seq(corners[x[3],1],corners[x[4],1],,11),
seq(corners[x[3],2],corners[x[4],2],,11)
)))else rbind.data.frame(cbind(corners,rbind(corners[-1,],corners[1,])),cbind(corners,matrix(colMeans(corners),ncorn,2,T)))
seg=as.data.frame(cbind(xy[rep(1:nrow(xy),each=2),],xy[apply(as.matrix(dist(mult)),1,\(x)order(x)[2:3]),]))
k=as.factor(cutree(hclust(dist(mult)),32))
set.seed(0)
hue=seq(0,360,,nlevels(k)+1)%>%head(-1)%>%sample()
pal1=hex(colorspace::HSV(hue,.6,1))
pal2=hex(colorspace::HSV(hue,.3,1))
angle=head(seq(360,0,length.out=ncorn+1),-1)
angle=ifelse(angle>90&angle<=270,angle+180,angle)
ggplot(as.data.frame(xy),aes(x=V1,y=V2))+
geom_polygon(data=as.data.frame(corners),fill="gray25")+
(if(ncorn>=5)geom_text(data=as.data.frame(corners),aes(x=1.04*V1,y=1.04*V2),label=colnames(t),size=3.2,angle=angle,color="gray85") # use rotated labels
else geom_text(data=as.data.frame(corners),aes(x=V1,y=1.03*V2),vjust=(1-corners[,2])/2,hjust=(1+corners[,1])/2,label=colnames(t),size=3.2,color="gray85"))+ # don't rotate labels
geom_segment(data=grid,aes(x=V1,y=V2,xend=V3,yend=V4),color="gray30",size=.4)+
ggforce::geom_mark_hull(aes(group=!!k,color=!!k,fill=!!k),concavity=1000,radius=unit(.15,"cm"),expand=unit(.15,"cm"),alpha=.15,size=.1)+
geom_segment(data=seg,aes(x=V1,y=V2,xend=V3,yend=V4),color="gray10",size=.25)+
geom_point(aes(color=k),size=.5)+
geom_text(aes(label=rownames(xy),color=!!k),size=2.2,vjust=-.6)+
coord_fixed(xlim=c(-1,1),ylim=c(-1,1))+
scale_fill_manual(values=pal1)+
scale_color_manual(values=pal2)+
theme(
axis.text=element_blank(),
axis.ticks=element_blank(),
axis.title=element_blank(),
legend.position="none",
panel.background=element_rect(fill="gray20"),
panel.grid=element_blank(),
plot.background=element_rect(fill="gray20",color=NA,size=0),
plot.margin=margin(0,0,0,0)
)
ggsave("1.png",width=7,height=7)
Or here's another version that uses Voronoi tesselation to plot the points (https://ggforce.data-imaginist.com/reference/geom_delvor.html):
t=read.table("https://pastebin.com/raw/CeLAEiAq")
t=distinct(t[,-1]) # geom_voronoi_tile doesn't handle a large number of overlapping points
pop=t[,1]
t=as.matrix(t[,-1])
ncorn=ncol(t)
start=ifelse(ncorn==4,.25,0)
corners=sapply(c(sin,cos),\(x)x((start+seq(0,2,,ncorn+1)[-(ncorn+1)])*pi))
corners=corners*min(2/diff(apply(corners,2,range))) # resize so bigger one of width and height is 2
corners[,2]=corners[,2]-mean(range(corners[,2])) # center vertically
xy=as.data.frame(t%*%corners)
# # use a simple grid with line from each corner to center for a plot with more than 3 corners
# grid=rbind.data.frame(cbind(corners,rbind(corners[-1,],corners[1,])),cbind(corners,matrix(colMeans(corners),ncorn,2,T)))
# use a grid with 10 subdivisions per side for a triangle plot
grid=do.call(rbind.data.frame,apply(simplify=F,rbind(c(1,2,3,2),c(1,3,2,3),c(2,1,3,1)),1,\(x)cbind(
seq(corners[x[1],1],corners[x[2],1],,11),
seq(corners[x[1],2],corners[x[2],2],,11),
seq(corners[x[3],1],corners[x[4],1],,11),
seq(corners[x[3],2],corners[x[4],2],,11)
)))
centers=data.frame(aggregate(xy,list(pop),mean),row.names=1)
set.seed(0)
color=as.factor(sample(length(unique(pop))))
cl=rbind(c(60,80),c(25,95),c(30,70),c(70,50),c(60,100),c(20,50),c(15,40))
hues=max(ceiling(length(color)/nrow(cl)),8)
pal1=as.vector(apply(cl,1,\(x)hcl(seq(15,375,,hues+1)[-(hues+1)],x[1],x[2])))
pal2=as.vector(apply(cl,1,\(x)hcl(seq(15,375,,hues+1)[-(hues+1)],if(x[2]>=60).5*x[1]else .1*x[1],if(x[2]>=60).2*x[2]else 95)))
xy=xy+runif(nrow(xy)*2)/1e3 # add a small random factor to prevent errors because of overlapping points
ggplot(xy,aes(V1,V2))+
geom_segment(data=grid,aes(V1,V2,xend=V3,yend=V4),color="gray85",size=.3)+
ggforce::geom_voronoi_tile(aes(group=0,fill=color[as.factor(pop)],color=color[as.factor(pop)]),size=.07,max.radius=.055)+ # `group=0` is just an arbitrary constant
# ggrepel::geom_label_repel(data=centers,aes(V1,V2,color=color,fill=color),label=rownames(centers),max.overlaps=Inf,point.size=0,size=2.3,alpha=.8,label.r=unit(.1,"lines"),label.padding=unit(.1,"lines"),label.size=.1,box.padding=0,segment.size=.3)+
geom_label(data=centers,aes(V1,V2,color=color,fill=color),label=rownames(centers),size=2.3,alpha=.8,label.r=unit(.1,"lines"),label.padding=unit(.1,"lines"),label.size=.1)+
coord_fixed(xlim=c(-1.08,1.08),ylim=c(-1.08,1.08),expand=F)+
scale_fill_manual(values=pal1)+
scale_color_manual(values=pal2)+
theme(
axis.text=element_blank(),
axis.ticks=element_blank(),
axis.title=element_blank(),
legend.position="none",
panel.background=element_rect(fill="white")
)
ggsave("1.png",width=7,height=7)

Legend position based on coordinates in ggplot2

Using ggplot2's legend.position (and legend.justification), the two available parameters indicate the relative position of the legend, but what if I want to position the legend based on the coordinates of the plot?
I can't find a way to do it.
This is strange as annotate gives an x and y argument that allows such things.
Here is some toy data
library(ggplot2)
ggplot(data = mtcars, aes(x = mpg,y = disp,color = factor(cyl))) +
geom_point() +
theme(legend.position = c(0.01,0.01),
legend.justification = c(0,0))
Which gives:
What about if I want the bottom-left corner of the legend to be on coordinates (10,100)?
I don't think there is an easy way to do it. The only approach i could think of is to build the plot object to extract the ranges of the axes in order to convert (10, 100) into a relative coordinate that can be used with legend position. Admittedly, this is very hacky...
library(tidyverse)
p <- ggplot(data = mtcars, aes(x = mpg, y = disp, color = factor(cyl))) +
geom_point()
ranges <- ggplot_build(p) %>%
pluck("layout", "panel_params", 1) %>%
`[`(c("x.range", "y.range"))
x <- (10 - ranges$x.range[1]) / (ranges$x.range[2] - ranges$x.range[1])
y <- (100 - ranges$y.range[1]) / (ranges$y.range[2] - ranges$y.range[1])
p + theme(legend.position = c(x, y), legend.justification = c(0, 0))
Created on 2021-07-21 by the reprex package (v1.0.0)

Consistent hexagon sizes and legend for manually assignment of colors

This is a continuation of a question I recently asked (Manually assigning colors with scale_fill_manual only works for certain hexagon sizes).
I was unable to plot geom_hex() so that all hexagons were the same size. Someone solved the problem. However, their solution removed the legend key. Now, I am unable to keep all the hexagons the same size while also retaining the legend.
To be specific, I really want to keep the legend labels sensical. In the example below, the legend has values (0,2,4,6,8,20), rather than hexadecimal labels (#08306B, #08519C, etc).
Below is MWE illustrating the problem. At the end, as per the 3 comments, you can see that I am able to 1) Create a plot with consistent hexagon sizes but no legend, 2) Create a plot with legend, but inconsistent hexagon sizes, 3) Attempt to create a plot with consistent hexagon sizes and legend but fail:
library(ggplot2)
library(hexbin)
library(RColorBrewer)
library(reshape)
set.seed(1)
xbins <- 10
x <- abs(rnorm(10000))
y <- abs(rnorm(10000))
minVal <- min(x, y)
maxVal <- max(x, y)
maxRange <- c(minVal, maxVal)
buffer <- (maxRange[2] - maxRange[1]) / (xbins / 2)
bindata = data.frame(x=x,y=y,factor=as.factor(1))
h <- hexbin(bindata, xbins = xbins, IDs = TRUE, xbnds = maxRange, ybnds = maxRange)
counts <- hexTapply (h, bindata$factor, table)
counts <- t (simplify2array (counts))
counts <- melt (counts)
colnames (counts) <- c ("factor", "ID", "counts")
counts$factor =as.factor(counts$factor)
hexdf <- data.frame (hcell2xy (h), ID = h#cell)
hexdf <- merge (counts, hexdf)
my_breaks <- c(2, 4, 6, 8, 20, 1000)
clrs <- brewer.pal(length(my_breaks) + 3, "Blues")
clrs <- clrs[3:length(clrs)]
hexdf$countColor <- cut(hexdf$counts, breaks = c(0, my_breaks, Inf), labels = rev(clrs))
# Has consistent hexagon sizes, but no legend
ggplot(hexdf, aes(x=x, y=y, hexID=ID, counts=counts, fill=countColor)) + geom_hex(stat="identity", fill=hexdf$countColor) + scale_fill_manual(labels = as.character(c(0, my_breaks)), values = rev(clrs), name = "Count") + geom_abline(intercept = 0, color = "red", size = 0.25) + labs(x = "A", y = "C") + coord_fixed(xlim = c(-0.5, (maxRange[2]+buffer)), ylim = c(-0.5, (maxRange[2]+buffer))) + theme(aspect.ratio=1)
# Has legend, but inconsistent hexagon sizes
ggplot(hexdf, aes(x=x, y=y, hexID=ID, counts=counts, fill=countColor)) + geom_hex(data=hexdf, stat="identity", aes(fill=countColor)) + scale_fill_manual(labels = as.character(c(0, my_breaks)), values = rev(clrs), name = "Count") + geom_abline(intercept = 0, color = "red", size = 0.25) + labs(x = "A", y = "C") + coord_fixed(xlim = c(-0.5, (maxRange[2]+buffer)), ylim = c(-0.5, (maxRange[2]+buffer))) + theme(aspect.ratio=1)
# One attempt to create consistent hexagon sizes and retain legend
ggplot(hexdf, aes(x=x, y=y, hexID=ID, counts=counts, fill=countColor)) + geom_hex(data=hexdf, aes(fill=countColor)) + geom_hex(stat="identity", fill=hexdf$countColor) + scale_fill_manual(labels = as.character(c(0, my_breaks)), values = rev(clrs), name = "Count") + geom_abline(intercept = 0, color = "red", size = 0.25) + labs(x = "A", y = "C") + coord_fixed(xlim = c(-0.5, (maxRange[2]+buffer)), ylim = c(-0.5, (maxRange[2]+buffer))) + theme(aspect.ratio=1)
Any suggestions on how to keep the hexagon sizes consistent while retaining the legend would be very helpful!
Wow, this is an interesting one -- geom_hex seems to really dislike mapping color/fill onto categorical variables. I assume that's because it is designed to be a two-dimensional histogram and visualize continuous summary statistics, but if anyone has any insight into what's going on behind the scenes, I would love to know.
For your specific problem, that really throws a wrench in the works, because you're attempting to have categorical colorization that assigns non-linear groups to the individual hexagons. Conceptually, you might consider why you're doing that. There may be a good reason, but you're essentially taking a linear color gradient and mapping it non-linearly onto your data, which can end up being visually misleading.
However, if that is what you want to do, the best approach I could come up with was to create a new continuous variable that mapped linearly onto your chosen colors and then use those to create a color gradient. Let me try to walk you through my thought process.
You essentially have a continuous variable (counts) that you want to map onto colors. That's easy enough with a simple color gradient, which is the default in ggplot2 for continuous variables. Using your data:
ggplot(hexdf, aes(x=x, y=y)) +
geom_hex(stat="identity", aes(fill=counts))
yields something close.
However, the bins with really high counts wash out the gradient for points with much lower counts, so we need to change the way the gradient maps colors onto values. You've already declared the colors you want to use in the clrs variable; we just need to add a column to your data frame to use in conjunction with these colors to create a smooth gradient. I did that as follows:
all_breaks <- c(0, my_breaks)
breaks_n <- 1:length(all_breaks)
get_break_n <- function(n) {
break_idx <- max(which((all_breaks - n) < 0))
breaks_n[break_idx]
}
hexdf$bin <- sapply(hexdf$counts, get_break_n)
We create the bin variable as the index of the break that is nearest the count variable without exceeding it. Now, you'll notice that:
ggplot(hexdf, aes(x=x, y=y)) +
geom_hex(stat="identity", aes(fill=bin))
is getting much closer to the goal.
The next step is to change how the color gradient maps onto that bin variable, which we can do by adding a call to scale_fill_gradientn:
ggplot(hexdf, aes(x=x, y=y)) +
geom_hex(stat="identity", aes(fill=bin)) +
scale_fill_gradientn(colors=rev(clrs[-1])) # odd color reversal to
# match OP's color mapping
This takes a vector of colors between which you want to interpolate a gradient. The way we've set it up, the points along the interpolation will perfectly match up with the unique values of the bin variable, meaning each value will get one of the colors specified.
Now we're cooking with gas, and the only thing left to do is add the various bells and whistles from the original graph. Most importantly, we need to make the legend look the way we want. This requires three things: (1) changing it from the default color bar to a discretized legend, (2) specifying our own custom labels, and (3) giving it an informative title.
# create the custom labels for the legend
all_break_labs <- as.character(all_breaks[1:(length(allb)-1)])
ggplot(hexdf, aes(x=x, y=y)) +
geom_hex(stat="identity", aes(fill=bin)) +
scale_fill_gradientn(colors=rev(clrs[-1]),
guide="legend", # (1) make legend discrete
labels=all_break_labs, # (2) specify labels
name="Count") + # (3) legend title
# All the other prettification from the OP
geom_abline(intercept = 0, color = "red", size = 0.25) +
labs(x = "A", y = "C") +
coord_fixed(xlim = c(-0.5, (maxRange[2]+buffer)),
ylim = c(-0.5, (maxRange[2]+buffer))) +
theme(aspect.ratio=1)
All of this leaves us with the following graph:
Hopefully that helps you out. For completeness, here's the new code in full:
# ... the rest of your code before the plots
clrs <- clrs[3:length(clrs)]
hexdf$countColor <- cut(hexdf$counts,
breaks = c(0, my_breaks, Inf),
labels = rev(clrs))
### START OF NEW CODE ###
# create new bin variable
all_breaks <- c(0, my_breaks)
breaks_n <- 1:length(all_breaks)
get_break_n <- function(n) {
break_idx <- max(which((all_breaks - n) < 0))
breaks_n[break_idx]
}
hexdf$bin <- sapply(hexdf$counts, get_break_n)
# create legend labels
all_break_labs <- as.character(all_breaks[1:(length(all_breaks)-1)])
# create final plot
ggplot(hexdf, aes(x=x, y=y)) +
geom_hex(stat="identity", aes(fill=bin)) +
scale_fill_gradientn(colors=rev(clrs[-1]),
guide="legend",
labels=all_break_labs,
name="Count") +
geom_abline(intercept = 0, color = "red", size = 0.25) +
labs(x = "A", y = "C") +
coord_fixed(xlim = c(-0.5, (maxRange[2]+buffer)),
ylim = c(-0.5, (maxRange[2]+buffer))) +
theme(aspect.ratio=1)

R adding legend and directlabels to ggplot2 contour plot

I have a raster map that I want to plot using ggplot2 using a continuous scale and labeled isolines on top of that.
For that I'm using the directlabels package and am close to getting what I want but I can't get both the legend and the labeled isolines on the same map
The following code reproduces my problem:
install.packages(c('ggplot2', 'directlabels'))
library('ggplot2')
library('directlabels')
df <- expand.grid(x=1:100, y=1:100)
df$z <- df$x * df$y
# Plot 1: this plot is fine but without contours
p <- ggplot(aes(x=x, y=y, z=z), data = df) +
geom_raster(data=df, aes(fill=z)) +
scale_fill_gradient(limits=range(df$z), high = 'white', low = 'red')
p
# Plot 2: This plot adds the isolines but no labels and it also adds a second legend for level which I don't want
p <- p + geom_contour(aes(colour = ..level..), color='gray30', na.rm=T, show.legend=T)
p
# Plot 3: This plot has the labeled isolines but it removes the z legend that I want to show
direct.label(p, list("bottom.pieces", colour='black'))
Plot 1
Plot 2
Plot 3
I would like to have the coloured raster in the background, with it's color legend on the side and the labeled isolines on top. Is there a way to do this?
Also is there a way to get the labels placed in the middle of the isolines instead of the bottom or top?
Thanks in advance
Pablo
First, fixing the issue to do with the legends.
library(ggplot2)
library(directlabels)
df <- expand.grid(x=1:100, y=1:100)
df$z <- df$x * df$y
p <- ggplot(aes(x=x, y=y, z=z), data = df) +
geom_raster(data=df, aes(fill=z), show.legend = TRUE) +
scale_fill_gradient(limits=range(df$z), high = 'white', low = 'red') +
geom_contour(aes(colour = ..level..)) +
scale_colour_gradient(guide = 'none')
p1 = direct.label(p, list("bottom.pieces", colour='black'))
p1
There aren't too many options for positioning the labels. One possibility is angled.boxes, but the fill colour might not be too nice.
p2 = direct.label(p, list("angled.boxes"))
p2
To change the fill colour to transparent (using code from here.
p3 = direct.label(p, list("far.from.others.borders", "calc.boxes", "enlarge.box",
box.color = NA, fill = "transparent", "draw.rects"))
p3
And to move the labels off the contour lines:
p4 = direct.label(p, list("far.from.others.borders", "calc.boxes", "enlarge.box",
hjust = 1, vjust = 1, box.color = NA, fill = "transparent", "draw.rects"))
p4

How to smartly place text labels beside points of different sizes in ggplot2?

I am trying to make a labeled bubble plot with ggplot2 in R. Here is the simplified scenario:
I have a data frame with 4 variables: 3 quantitative variables, x, y, and z, and another variable that labels the points, lab.
I want to make a scatter plot, where the position is determined by x and y, and the size of the points is determined by z. I then want to place text labels beside the points (say, to the right of the point) without overlapping the text on top of the point.
If the points did not vary in size, I could try to simply modify the aesthetic of the geom_text layer by adding a scaling constant (e.g. aes(x=x+1, y=y+1)). However, even in this simple case, I am having a problem with positioning the text correctly because the points do not scale with the output dimensions of the plot. In other words, the size of the points remains constant in a 500x500 plot and a 1000x1000 plot - they do not scale up with the dimensions of the outputted plot.
Therefore, I think I have to scale the position of the label by the size (e.g. dimensions) of the output plot, or I have to get the radius of the points from ggplot somehow and shift my text labels. Is there a way to do this in ggplot2?
Here is some code:
# Stupid data
df <- data.frame(x=c(1,2,3),
y=c(1,2,3),
z=c(1,2,1),
lab=c("a","b","c"), stringsAsFactors=FALSE)
# Plot with bad label placement
ggplot(aes(x=x, y=y), data=df) +
geom_point(aes(size=z)) +
geom_text(aes(label=lab),
colour="red") +
scale_size_continuous(range=c(5, 50), guide="none")
EDIT: I should mention, I tried hjust and vjust inside of geom_text, but it does not produce the desired effect.
# Trying hjust and vjust, but it doesn't look nice
ggplot(aes(x=x, y=y), data=df) +
geom_point(aes(size=z)) +
geom_text(aes(label=lab), hjust=0, vjust=0.5,
colour="red") +
scale_size_continuous(range=c(5, 50), guide="none")
EDIT: I managed to get something that works for now, thanks to Henrik and shujaa. I will leave the question open just in case someone shares a more general solution.
Just a blurb of what I am using this for: I am plotting a map, and indicating the amount of precipitation at certain stations with a point that is sized proportionally to the amount of precipitation observed. I wanted to add a station label beside each point in an aesthetically pleasing manner. I will be making more of these plots for different regions, and my output plot may have a different resolution or scale (e.g. due to different projections) for each plot, so a general solution is desired. I might try my hand at creating a custom position_jitter, like baptiste suggested, if I have time during the weekend.
It appears that position_*** don't have access to the scales used by other layers, so it's a no go. You could make a clone of GeomText that shifts the labels according to the size mapped,
but it's a lot of effort for a very kludgy and fragile solution,
geom_shiftedtext <- function (mapping = NULL, data = NULL, stat = "identity",
position = "identity",
parse = FALSE, ...) {
GeomShiftedtext$new(mapping = mapping, data = data, stat = stat, position = position,
parse = parse, ...)
}
require(proto)
GeomShiftedtext <- proto(ggplot2:::GeomText, {
objname <- "shiftedtext"
draw <- function(., data, scales, coordinates, ..., parse = FALSE, na.rm = FALSE) {
data <- remove_missing(data, na.rm,
c("x", "y", "label"), name = "geom_shiftedtext")
lab <- data$label
if (parse) {
lab <- parse(text = lab)
}
with(coord_transform(coordinates, data, scales),
textGrob(lab, unit(x, "native") + unit(0.375* size, "mm"),
unit(y, "native"),
hjust=hjust, vjust=vjust, rot=angle,
gp = gpar(col = alpha(colour, alpha),
fontfamily = family, fontface = fontface, lineheight = lineheight))
)
}
})
df <- data.frame(x=c(1,2,3),
y=c(1,2,3),
z=c(1.2,2,1),
lab=c("a","b","c"), stringsAsFactors=FALSE)
ggplot(aes(x=x, y=y), data=df) +
geom_point(aes(size=z), shape=1) +
geom_shiftedtext(aes(label=lab, size=z),
hjust=0, colour="red") +
scale_size_continuous(range=c(5, 100), guide="none")
This isn't a very general solution, because you'll need to tweak it every time, but you should be able to add to the x value for the text some value that's linear depending on z.
I had luck with
ggplot(aes(x=x, y=y), data=df) +
geom_point(aes(size=z)) +
geom_text(aes(label=lab, x = x + .06 + .14 * (z - min(z))),
colour="red") +
scale_size_continuous(range=c(5, 50), guide="none")
but, as the font size depends on your window size, you would need to decide on your output size and tweak accordingly. I started with x = x + .05 + 0 * (z-min(z)) and calibrated the intercept based on the smallest point, then when I was happy with that I adjusted the linear term for the biggest point.
Another alternative. Looks OK with your test data, but you need to check how general it is.
dodge <- abs(scale(df$z))/4
ggplot(data = df, aes(x = x, y = y)) +
geom_point(aes(size = z)) +
geom_text(aes(x = x + dodge), label = df$lab, colour = "red") +
scale_size_continuous(range = c(5, 50), guide = "none")
Update
Just tried position_jitter, but the width argument only takes one value, so right now I am not sure how useful that function would be. But I would be happy to find that I am wrong. Example with another small data set:
df3 <- mtcars[1:10, ]
ggplot(data = df3, aes(x = wt, y = mpg)) +
geom_point(aes(size = qsec), alpha = 0.1) +
geom_text(label = df3$carb, position = position_jitter(width = 0.1, height = 0)) +
scale_size_continuous(range = c(5, 50), guide = "none")

Resources