ggplot2: Adjust the symbol size in legends - r

How should I change the size of symbols in legends? I checked the document of theme but found no answer.
Here is an example:
library(ggplot2);library(grid)
set.seed(1000)
x <- 1:6
mu <- sin(x)
observed <- mu + rnorm(length(x), 0, 0.5*sd(mu))
data <- data.frame(
t=rep(x, 2),
value=c(mu, observed) - min(mu, observed) + 0.5,
class = rep(c("mu", "observed"), each=length(x)))
mu <- data$value[1:length(x)]
observed <- data$value[1:length(x) + length(x)]
mu.min <- mu - 3 * 0.5 * sd(mu)
mu.max <- mu + 3 * 0.5 * sd(mu)
g <- ggplot(data=data)
g <- g + geom_point(aes(x=value, y=t, shape=class, size=24)) + scale_size(guide="none")
g <- g + scale_shape(name="", labels=expression(paste(S[u](t), ", the observation at time ", t), paste(mu[u](t), ", the mean of ", tilde(S)[u](t), " ")))
stat_function.color <- gray(0.5)
g <- g + geom_segment(aes(y=1:6, yend=1:6, x=mu.min, xend=mu.max, linetype="2", alpha = 1), color=stat_function.color) + scale_alpha(guide="none") + scale_linetype(name= "", labels=expression(paste("probability density function (pdf) of ", tilde(S)[u], " at time ", t)))
for(i in 1:length(x)) {
g <- g + stat_function(fun=function(x, i) {
ifelse( x <= mu.max[i] & x >= mu.min[i], dnorm(x, mu[i], sd(mu)) + i, NA)
}, color=stat_function.color, args=list(i=i))
}
background.color <- gray(0.75)
g <- g + theme(
axis.text=element_blank(),
title=element_text(size=rel(1.5)),
legend.text=element_text(size=rel(1.5)),
legend.position="top",
legend.direction="vertical",
# legend.key.size = unit(2, "cm"),
panel.background=element_rect(fill=background.color),
panel.grid.major=element_line(color=background.color),
panel.grid.minor=element_line(color=background.color)
) + coord_flip()
plot(g)

You should use:
theme(legend.key.size = unit(3,"line"))

You can make these kinds of changes manually using the override.aes argument to guide_legend():
g <- g + guides(shape = guide_legend(override.aes = list(size = 5)))
print(g)

Marius's answer did not work for me as of R version 3.2.2. You can still call guide_legend() with the same override.aes argument but you will need to specify color instead of shape in the wrapper function.
So if you're running a later version of R, try this instead:
g + guides(color = guide_legend(override.aes = list(size=5)))
EDIT
As pointed out by #Ibo in the comment, this may have been due to the color scale in the ggplot object. If the ggplot object contains a color scale, the mapping of size (size=5) has to be set on the color instead.

If you want to change the sizes of 2 components of a legend independently, it gets trickier, but it can be done by manually editing the individual components of the plot using the grid package.
Example based on this SO answer:
set.seed(1)
dat <- data.frame(x = runif(n = 100),
x2 = factor(rep(c('first', 'second'), each = 50)))
set.seed(1)
dat$y = 5 + 1.8 * as.numeric(dat$x2) + .3 * dat$x + rnorm(100)
# basic plot
g <- ggplot(data = dat,
aes(x = x, y = y, color = x2))+
theme_bw()+
geom_point()+
geom_smooth(method = 'lm')
# make the size of the points & lines in the legend larger
g + guides(color = guide_legend(override.aes = list(size = 2)))
# Make JUST the legend points larger without changing the size of the legend lines:
# To get a list of the names of all the grobs in the ggplot
g
grid::grid.ls(grid::grid.force())
# Set the size of the point in the legend to 2 mm
grid::grid.gedit("key-[-0-9]-1-1", size = unit(4, "mm"))
# save the modified plot to an object
g2 <- grid::grid.grab()
ggsave(g2, filename = 'g2.png')

As of 12/1/2022, in ggplot2 version 3.4.0, the argument:
guides(shape = guide_legend(override.aes = list(size = 5)))
no longer works....instead, replace "size = 5" with "linewidth = 5"

Related

When using a color transformation in ggplot2, change the legend gradient instead of the legend break positions

Suppose I have a raster plot where the fill color gradient isn't used very efficiently because the values are skewed, like this:
library(ggplot2)
set.seed(20)
d = expand.grid(x = seq(0, 10, len = 100), y = seq(0, 10, len = 100))
d = transform(d, z =
1e-4 * ((x - 2)^2 + (2*y - 4)^2 + 10*rnorm(nrow(d)))^2)
ggplot(d) +
geom_raster(aes(x, y, fill = z)) +
scale_fill_distiller(palette = "Spectral",
limits = c(0, 12), breaks = 0 : 12) +
theme(legend.key.height = unit(20, "mm"))
I can quantile-transform the color scale like this:
ggplot(d) +
geom_raster(aes(x, y, fill = z)) +
scale_fill_distiller(palette = "Spectral",
limits = c(0, 12), breaks = 0 : 12,
trans = scales::trans_new("q",
function(x) ecdf(d$z)(x),
function(x) unname(quantile(d$z, x)))) +
theme(legend.key.height = unit(20, "mm"))
I like what this does for the main part of the plot, but not the legend. The legend uses the same gradient as the original, while moving the breaks according to the transformation. I'd prefer to keep the breaks where they are, while transforming the gradient instead. Also, I'd like to avoid the floating-point noise that's been added to the break labels. How can I accomplish these changes?
I had a very similar idea to chemdork123, but wanted to stay a bit closer to the quantile idea. The idea is to set an exact palette of colours (i.e., one colour for every value) and space this out such that it follows the data.
library(ggplot2)
library(scales)
#> Warning: package 'scales' was built under R version 4.0.3
set.seed(20)
d = expand.grid(x = seq(0, 10, len = 100), y = seq(0, 10, len = 100))
d = transform(d, z =
1e-4 * ((x - 2)^2 + (2*y - 4)^2 + 10*rnorm(nrow(d)))^2)
# The 'distiller' palette outside of the scale,
# we need this to generate `length(d$z)` number of colours.
pal <- gradient_n_pal(brewer_pal(palette = "Spectral", direction = -1)(7))
ggplot(d) +
geom_raster(aes(x, y, fill = z)) +
scale_fill_gradientn(
colours = pal(c(0, rescale(seq_along(d$z)), 1)), # <- extra 0, 1 for out-of-bounds
limits = c(0, 12), breaks = 0:12,
values = c(0, rescale(sort(d$z), from = c(0, 12)), 1) # <- extra 0, 1 again
) +
theme(legend.key.height = unit(10, "mm"))
Created on 2021-03-31 by the reprex package (v1.0.0)
You can use the values argument for the scale_fill_distiller() function. The distiller scales extend brewer to continuous scales by interpolating 7 colors from any palette. By default, the scaling is linearly applied from 0 (lowest value on the scale) to 1 (highest value on the scale). You can recreate this mapping via: scales::rescale(1:7). If you supply a new vector to the values argument, you can remap each of the 7 colors to a new location. You do not have to supply 7 values - the rest are interpolated linearly - just as long as you specify the max at 1 (or you'll cut the scale).
Best way is to play around with it - I've tried mapping to specific functions before, but honestly it tends to work for me the best when I just mess with the numbers until I get something I like:
ggplot(d) +
geom_raster(aes(x, y, fill = z)) +
scale_fill_distiller(palette = "Spectral", values = c(0,0.05,0.1, 0.5,1)) +
theme(legend.key.height = unit(20, "mm"))

Add an additional X axis to the plot and some lines/annotations to show the percentage of data under it

I was trying to recreate this plot:
using the following code -
library(tidyverse)
set.seed(0); r <- rnorm(10000);
df <- as.data.frame(r)
avg <- round(mean(r),2)
SD <- round(sd(r),2)
x.scale <- seq(from = avg - 3*SD, to = avg + 3*SD, by = SD)
x.lab <- c("-3SD", "-2SD", "-1SD", "Mean", "1SD", "2SD", "3SD")
df %>% ggplot(aes(r)) +
geom_histogram(aes(y=..density..), bins = 20,
colour="black", fill="lightblue") +
geom_density(alpha=.2, fill="darkblue") +
scale_x_continuous(breaks = x.scale, labels = x.lab) +
labs(x = "")
Using the code I plotted this:
,
but this isn't near to the plot that I am trying to create. How do I make an additional axis with the X axis? How do I add the lines to automatically show the percentage of observations? Is there any way, that I can create the plot as nearly identical as possible using ggplot2?
Welcome to SO. Excellent first question!
It's actually quite tricky. You'd need to create a second plot (the second x axis) but it's not the most straight forward to align both perfectly.
I will be using Z.lin's amazing modification of the cowplot package.
I am not using the reprex package, because I think I'd need to define every single function (and I don't know how to use trace within reprex.)
library(tidyverse)
library(cowplot)
set.seed(0); r <- rnorm(10000);
foodf <- as.data.frame(r)
avg <- round(mean(r),2)
SD <- round(sd(r),2)
x.scale <- round(seq(from = avg - 3*SD, to = avg + 3*SD, by = SD), 1)
x.lab <- c("-3SD", "-2SD", "-1SD", "Mean", "1SD", "2SD", "3SD")
x2lab <- -3:3
# calculate the density manually
dens_r <- density(r)
# for each x value, calculate the closest x value in the density object and get the respective y values
y_dens <- dens_r$y[sapply(x.scale, function(x) which.min(abs(dens_r$x - x)))]
# added annotation for segments and labels.
# Arrow segments can be added in a similar way.
p1 <-
ggplot(foodf, aes(r)) +
geom_histogram(aes(y=..density..), bins = 20,
colour="black", fill="lightblue") +
geom_density(alpha=.2, fill="darkblue") +
scale_x_continuous(breaks = x.scale, labels = x.lab) +
labs(x = NULL) +# use NULL here
annotate(geom = "segment", x = x.scale, xend = x.scale,
yend = 1.1 * max(dens_r$y), y = y_dens, lty = 2 ) +
annotate(geom = "text", label = x.lab,
x = x.scale, y = 1.2 * max(dens_r$y))
p2 <-
ggplot(foodf, aes(r)) +
scale_x_continuous(breaks = x.scale, labels = x2lab) +
labs(x = NULL) +
theme_classic() +
theme(axis.line.y = element_blank())
# This is with the modified plot_grid() / align_plot() function!!!
plot_grid(p1, p2, ncol = 1, align = "v", rel_heights = c(1, 0.1))

How to override an aes color (controlled by a variable) based on a condition?

I'm trying to graph multiple nonlinear least squares regression in r in different colors based on the value of a variable.
However, I also display the equation of the last one, and I would like the color in the nonlinear regression corresponding to the equation to be black as well.
What I've tried is shown in the geom_smooth() layer - I tried to include an ifelse() statement, but this doesn't work because of reasons described here: Different between colour argument and aes colour in ggplot2?
test <- function() {
require(ggplot2)
set.seed(1);
master <- data.frame(matrix(NA_real_, nrow = 0, ncol = 3))
for( i in 1:5 ) {
df <- data.frame(matrix(NA_real_, nrow = 50, ncol = 3))
colnames(df) <- c("xdata", "ydata", "test")
df$xdata = as.numeric(sample(1:100, size = nrow(df), replace = FALSE))
df$ydata = as.numeric(sample(1:3, size = nrow(df), prob=c(.60, .25, .15), replace = TRUE))
# browser()
df$test = i
master <- rbind(master, df)
}
df <- master
last <- 5
# based on https://stackoverflow.com/questions/18305852/power-regression-in-r-similar-to-excel
power_eqn = function(df, start = list(a=300,b=1)) {
m = nls(as.numeric(reorder(xdata,-ydata)) ~ a*ydata^b, start = start, data = df)
# View(summary(m))
# browser()
# eq <- substitute(italic(hat(y)) == a ~italic(x)^b*","~~italic(r)^2~"="~r2*","~~p~"="~italic(pvalue),
eq <- substitute(italic(y) == a ~italic(x)^b*","~~italic('se')~"="~se*","~~italic(p)~"="~pvalue,
list(a = format(coef(m)[1], digits = 6), # a
b = format(coef(m)[2], digits = 6), # b
# r2 = format(summary(m)$r.squared, digits = 3),
se = format(summary(m)$parameters[2,'Std. Error'], digits = 6), # standard error
pvalue = format(summary(m)$coefficients[2,'Pr(>|t|)'], digits=6) )) # p value (based on t statistic)
as.character(as.expression(eq))
}
plot1 <- ggplot(df, aes(x = as.numeric(reorder(xdata,-ydata)), y = ydata ) ) +
geom_point(color="black", shape=1 ) +
# PROBLEM LINE
stat_smooth(aes(color=ifelse(test==5, "black", test)), method = 'nls', formula = 'y~a*x^b', method.args = list(start= c(a =1,b=1)),se=FALSE, fullrange=TRUE) +
geom_text(x = quantile(df$xdata)[4], y = max(df$ydata), label = power_eqn(df), parse = TRUE, size=4, color="black") + # make bigger? add border around?
theme(legend.position = "none", axis.ticks.x = element_blank() ) + #, axis.title.x = "family number", axis.title.y = "number of languages" ) # axis.text.x = element_blank(),
labs( x = "xdata", y = "ydata", title="test" )
plot1
}
test()
This is the graph I got.
I would like the line corresponding to the points and equation to be black as well. Does anyone know how to do this?
I do not want to use a scale_fill_manual, etc., because my real data would have many, many more lines - unless the scale_fill_manual/etc. can be randomly generated.
You could use scale_color_manual using a custom created palette where your level of interest (in your example where test equals 5) is set to black. Below I use palettes from RColorBrewer, extend them if necessary to the number of levels needed and sets the last color to black.
library(RColorBrewer) # provides several great palettes
createPalette <- function(n, colors = 'Greens') {
max_colors <- brewer.pal.info[colors, ]$maxcolors # Get maximum colors in palette
palette <- brewer.pal(min(max_colors, n), colors) # Get RColorBrewer palette
if (n > max_colors) {
palette <- colorRampPalette(palette)(n) # make it longer i n > max_colros
}
# assume that n-th color should be black
palette[n] <- "#000000"
# return palette
palette[1:n]
}
# create a palette with 5 levels using the Spectral palette
# change from 5 to the needed number of levels in your real data.
mypalette <- createPalette(5, 'Spectral') # palettes from RColorBrewer
We can then use mypalette with scale_color_manual(values=mypalette) to color points and lines according to the test variable.
Please note that I have updated geom_point and stat_smooth to so that they use aes(color=as.factor(test)). I have also changed the call to power_eqn to only use data points where df$test==5. The black points, lines and equation should now be based on the same data.
plot1 <- ggplot(df, aes(x = as.numeric(reorder(xdata,-ydata)), y = ydata )) +
geom_point(aes(color=as.factor(test)), shape=1) +
stat_smooth(aes(color=as.factor(test)), method = 'nls', formula = 'y~a*x^b', method.args = list(start= c(a =1,b=1)),se=FALSE, fullrange=TRUE) +
geom_text(x = quantile(df$xdata)[4], y = max(df$ydata), label = power_eqn(df[df$test == 5,]), parse = TRUE, size=4, color="black") +
theme(legend.position = "none", axis.ticks.x = element_blank() ) +
labs( x = "xdata", y = "ydata", title="test" ) +
scale_color_manual(values = mypalette)
plot1
See resulting figure here (not reputation enough to include them)
I hope you find my answer useful.

Parallel co-ordinates plot in R (ggparcoord)

I am facing a somewhat strange situation while plotting a parallel co-ordinates plot using ggparcoord. I am running the following code and it is running perfectly fine:
# Load required packages
require(GGally)
# Load datasets
data(state)
df <- data.frame(state.x77,
State = state.name,
Abbrev = state.abb,
Region = state.region,
Division = state.division
)
# Generate basic parallel coordinate plot
p <- ggparcoord(data = df,
# Which columns to use in the plot
columns = 1:4,
# Which column to use for coloring data
groupColumn = 11,
# Allows order of vertical bars to be modified
order = "anyClass",
# Do not show points
showPoints = FALSE,
# Turn on alpha blending for dense plots
alphaLines = 0.6,
# Turn off box shading range
shadeBox = NULL,
# Will normalize each column's values to [0, 1]
scale = "uniminmax" # try "std" also
)
# Start with a basic theme
p <- p + theme_minimal()
# Decrease amount of margin around x, y values
p <- p + scale_y_continuous(expand = c(0.02, 0.02))
p <- p + scale_x_discrete(expand = c(0.02, 0.02))
# Remove axis ticks and labels
p <- p + theme(axis.ticks = element_blank())
p <- p + theme(axis.title = element_blank())
p <- p + theme(axis.text.y = element_blank())
# Clear axis lines
p <- p + theme(panel.grid.minor = element_blank())
p <- p + theme(panel.grid.major.y = element_blank())
# Darken vertical lines
p <- p + theme(panel.grid.major.x = element_line(color = "#bbbbbb"))
# Move label to bottom
p <- p + theme(legend.position = "bottom")
# Figure out y-axis range after GGally scales the data
min_y <- min(p$data$value)
max_y <- max(p$data$value)
pad_y <- (max_y - min_y) * 0.1
# Calculate label positions for each veritcal bar
lab_x <- rep(1:4, times = 2) # 2 times, 1 for min 1 for max
lab_y <- rep(c(min_y - pad_y, max_y + pad_y), each = 4)
# Get min and max values from original dataset
lab_z <- c(sapply(df[, 1:4], min), sapply(df[, 1:4], max))
# Convert to character for use as labels
lab_z <- as.character(lab_z)
# Add labels to plot
p <- p + annotate("text", x = lab_x, y = lab_y, label = lab_z, size = 3)
# Display parallel coordinate plot
print(p)
I get the following output:
The moment I want to subset the data to display fewer region levels using the following statement:
df<-df[which(df$Region %in% c('South','West','Northeast')),]
I start receiving the following error:
Error in `contrasts<-`(`*tmp*`, value = contr.funs[1 + isOF[nn]]) :
contrasts can be applied only to factors with 2 or more levels
Why am I getting this error when the number of levels I want to display are clearly more than 2?
Any help on this would be much appreciated.
I figured what the problem was. I had to convert the column into factor.
df$Region <- factor(df$Region)
The above piece of code fixes the error.

ACF Plot with ggplot2: Setting width of geom_bar

With acf we can make ACF plot in base R graph.
x <- lh
acf(x)
The following code can be used to get the ACF plot in ggplot2.
conf.level <- 0.95
ciline <- qnorm((1 - conf.level)/2)/sqrt(length(x))
bacf <- acf(x, plot = FALSE)
bacfdf <- with(bacf, data.frame(lag, acf))
library(ggplot2)
q <- ggplot(data=bacfdf, mapping=aes(x=lag, y=acf)) +
geom_bar(stat = "identity", position = "identity")
q
Question
How to get lines rather than bars or how to set the width of bars so that they look like lines? Thanks
You're probably better off plotting with line segments via geom_segment()
library(ggplot2)
set.seed(123)
x <- arima.sim(n = 200, model = list(ar = 0.6))
bacf <- acf(x, plot = FALSE)
bacfdf <- with(bacf, data.frame(lag, acf))
q <- ggplot(data = bacfdf, mapping = aes(x = lag, y = acf)) +
geom_hline(aes(yintercept = 0)) +
geom_segment(mapping = aes(xend = lag, yend = 0))
q
How about using geom_errorbar with width=0?
ggplot(data=bacfdf, aes(x=lag, y=acf)) +
geom_errorbar(aes(x=lag, ymax=acf, ymin=0), width=0)
#konrad; try the following code:
library(ggfortify)
p1 <- autoplot(acf(AirPassengers, plot = FALSE), conf.int.fill = '#0000FF', conf.int.value = 0.8, conf.int.type = 'ma')
print(p1)
library(cowplot)
ggdraw(switch_axis_position(p1, axis = 'xy', keep = 'xy'))
From the forecast package comes a function ggtsdisplay that plots both ACF and PACF with ggplot. x is the residuals from the model fit (fit$residuals).
forecast::ggtsdisplay(x,lag.max=30)
From your answers, I synthesized a ggplot ACF / PACF plotting method :
require(zoo)
require(tseries)
require(ggplot2)
require(cowplot)
ts= zoo(data[[2]]) # data[[2]] because my time series data was the second column
# Plot ACP / ACF with IC
# How to compute IC for ACF and PACF :
# https://stats.stackexchange.com/questions/211628/how-is-the-confidence-interval-calculated-for-the-acf-function
ic_alpha= function(alpha, acf_res){
return(qnorm((1 + (1 - alpha))/2)/sqrt(acf_res$n.used))
}
ggplot_acf_pacf= function(res_, lag, label, alpha= 0.05){
df_= with(res_, data.frame(lag, acf))
# IC alpha
lim1= ic_alpha(alpha, res_)
lim0= -lim1
ggplot(data = df_, mapping = aes(x = lag, y = acf)) +
geom_hline(aes(yintercept = 0)) +
geom_segment(mapping = aes(xend = lag, yend = 0)) +
labs(y= label) +
geom_hline(aes(yintercept = lim1), linetype = 2, color = 'blue') +
geom_hline(aes(yintercept = lim0), linetype = 2, color = 'blue')
}
acf_ts= ggplot_acf_pacf(res_= acf(ts, plot= F)
, 20
, label= "ACF")
pacf_ts= ggplot_acf_pacf(res_= pacf(ts, plot= F)
, 20
, label= "PACF")
# Concat our plots
acf_pacf= plot_grid(acf_ts, pacf_ts, ncol = 2, nrow = 1)
acf_pacf
Results:
forecast::ggAcf() is another option:
library(ggplot2)
library(forecast)
ggAcf(wineind,lag.max=24)+
labs(title='wineind')

Resources