customize ggplot2 axis labels with different colors - r

I have a basic bar graph I've created from ggplot2. The y variable contains both positive and negative values and about half the vector of values are negative. I would like to customize the axis labels such that when the y value of that corresponding x factor is a negative, its label is red. Here's a reproducible example:
#Create data
x <- c("a","b","c","d","e","f")
y <- c("10", "9","-10","11","-3","-15")
data <- data.frame(x, y)
data$y <- as.numeric(as.character(data$y))
data$category <- ifelse(as.numeric(data$y)<0, 0, 1)
data$category <- as.factor(data$category)
#Graph
library(cowplot) #theme
library(ggplot2)
ggplot(data, aes(x=x, y=y)) +
geom_bar(stat = "identity", aes(fill=category)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(axis.text.x = element_text(colour = "black"))
What I need is a way to change the label colors of "c", "e", and "f" to the color of my choosing. I tried toggling theme(aes(axis.text.x=element_text(colour=Air_pricier))) but that produced an error.

You can provide a vector of colors to the axis.text.x option of theme():
a <- ifelse(data$category == 0, "red", "blue")
ggplot(data, aes(x = x, y = y)) +
geom_bar(stat = "identity", aes(fill = category)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, colour = a))

I, too, get the warning message mentioned in #Mark Neal's comment; it makes me nervous. Here's an alternative approach with the ggtext package. You can wrap the categories for the x-axis in <span>s and specify the color you want, and then use element_markdown in the theme:
library(ggtext)
library(tidyverse)
data %>%
mutate(x.label = paste("<span style = 'color: ",
ifelse(y > 0, "black", "red"),
";'>",
x,
"</span>", sep = ""),
x.label = fct_reorder(x.label, as.character(x))) %>%
ggplot(aes(x=x.label, y=y)) +
geom_bar(stat = "identity", aes(fill=category)) +
theme(axis.text.x = element_markdown(angle = 45, hjust = 1))

Building on a-s-k's answer I put this in a more flexible form using glue templates and a discrete scale. With this option you don't have to change your data but just define a labeler in the scale that does everything for you, this is handy if you want to color the x-axis in many similar plots with different data.
(In the case of the original question the color depends on more of the data, than just the x-values, but I guess this could still be handy for some users.)
library(ggtext)
library(tidyverse)
library(glue)
#Create data
x <- c("a","b","c","d","e","f")
y <- c("10", "9","-10","11","-3","-15")
data <- data.frame(x, y)
data$y <- as.numeric(as.character(data$y))
data$category <- ifelse(as.numeric(data$y)<0, 0, 1)
data$category <- as.factor(data$category)
# create the labels
my_labels <- glue_data(
data,
"<span style='color: {if_else(category==0, 'red', 'blue')}'>{x}</span>"
)
names(my_labels) <- data$x
# plot as you normally would
# use element_markdown as axis.text.x
# and the labels defined before as labels in a discrete scale
ggplot(data, aes(x=x, y=y)) +
geom_bar(stat = "identity", aes(fill=category)) +
theme(
axis.text.x = element_markdown(angle = 45, hjust = 1)
) +
scale_x_discrete(labels=my_labels)

Related

How do I add p-values to a plotting function that I created to produce many boxplots from a data frame

I've created the plotting function below:
library(tidyverse)
#Data
airquality$Month <- as.factor(airquality$Month)
dat1 <- airquality%>%
filter(Month %in% c("5" , "6")) %>%
na.omit()
#Plotting function
plotseries1<-function(yvar) {
ggplot(dat1, aes_(x= ~Month, y= as.name(yvar))) +
geom_boxplot(aes (fill=Month)) +
scale_fill_manual(values=c("red", "blue")) +
geom_jitter(shape=16, position=position_jitter(0.03)) +
theme_bw() +
scale_y_continuous(expand = expansion(mult = c(0, 0.15)))
}
plots <- lapply(names ((dat1)[c(-5,-6)]), plotseries1)
plots
I would like to work out p-values using the wilcox.test and add them to the individual plots using geom_text. I'm aware that I could use stat_compare_means using the rstatix package, but the wilcoxon test in that package is not exact. I've tried the below code, but it's not working.
plotseries1<-function(yvar) {
#I would like to add wilcoxon test here
test <- wilcox.test(dat1$yvar~ dat1$Month) #I'm not sure how to get this to work
pvalue <- test$p.value
ggplot(dat1, aes_(x= ~Month, y= as.name(yvar))) +
geom_boxplot(aes (fill=Month)) +
scale_fill_manual(values=c("red", "blue")) +
geom_jitter(shape=16, position=position_jitter(0.03)) +
theme_bw() +
scale_y_continuous(expand = expansion(mult = c(0, 0.15)))
#I would like to add p values to individual graphs using geom_text
geom_text(data = test,
aes(x = 1.5, y = Inf, label = format.pval(p.value, digits = 3)), #I'm not sure if this will work
inherit.aes = FALSE, hjust = "inward", vjust = "inward", size = 3.5)
}
How do I get the plotting function to work?
I was able to get the plot to add the p value, but to note since you have ties, exact methods may not work. for the moment I am ignoring that statistical issue and focusing on the programming to get the value on the plot. Also, ggplot likes data in data.frame, which your test cant be coerced to as-is, so tweaked that slightly. Aside from that, the below code should do what you want:
plotseries1 <- function(yvar) {
fm <- as.formula(paste0("dat1$",yvar,"~dat1$Month"))
test <- wilcox.test(fm, exact = FALSE) # Cant do exact as there are ties
pvalue <- data.frame(test$p.value)
ggplot(dat1, aes_(x= ~Month, y= as.name(yvar))) +
geom_boxplot(aes (fill=Month)) +
scale_fill_manual(values=c("red", "blue")) +
geom_jitter(shape=16, position=position_jitter(0.03)) +
theme_bw() +
scale_y_continuous(expand = expansion(mult = c(0, 0.15))) +
geom_text(data = pvalue,
aes(x = 1.5, y = Inf, label = round(pvalue, 3)), #I'm not sure if this will work
inherit.aes = FALSE, hjust = "inward", vjust = "inward", size = 3.5)
}
plot.pval <- lapply(names((dat1)[c(-5,-6)]), plotseries1)
plot.pval

Change colors of part of y axis ticks in heatmap

I have a heatmap:
heatmap <- ggplot(df, aes(ID, Name)) +
geom_raster(aes(fill = N))
I want to change colors of y ticks in heatmap, if they are equal to 300, 301, 302. How could in do that? Its unclear to me how to do that for part of ticks. For all, i add
theme(axis.text.x = element_text(colour="black"), axis.text.y = element_text(colour="red"))
but how just for 300, 301, 302?
Instead of single color you've to use a vector of color in axis.text.y
library(ggplot2)
# using mtcars data
df <- data.frame(Name = 300:331,
ID = 1:nrow(mtcars),
N = mtcars$mpg)
# for df id 300, 301, 302
# first make a vector of black color
tick_colors <- rep("black", length(df$Name))
# change colors matching Name to red
tick_colors[df$Name %in% c(300, 301, 302)] <- "red"
heatmap <- ggplot(df, aes(ID, Name)) +
geom_raster(aes(fill = N))
heatmap + scale_y_continuous(breaks = df$Name) + theme(
axis.text.x = element_text(colour = "black"),
axis.text.y = element_text(colour = tick_colors)
)
Output:
Axis text color changed
You need a vector of colors in element_text with the same number of elements as you have ticks. I'll change the 3rd element of the x axis ("C") green as a demo:
x <- LETTERS[1:5]
y <- paste0("var", seq(1,5))
data <- expand.grid(X=x, Y=y)
data$Z <- runif(25, 0, 5)
ggplot(data, aes(X, Y, fill= Z)) +
geom_tile() +
scale_fill_gradient(low="white", high="blue") +
theme(axis.text.x = element_text(colour = c('black', 'black','green', 'black', 'black')))
Note: ggplot doesn't love vectorized input to element_text and will post a warning saying as such.

ggplot2 add data from additional data frame next to plot

I would like to be able to extend my boxplots with additional information. Here is a working example for ggplot2:
library(ggplot2)
ToothGrowth$dose <- as.factor(ToothGrowth$dose)
# Basic box plot
p <- ggplot(ToothGrowth, aes(x=dose, y=len)) +
geom_boxplot()
# Rotate the box plot
p + coord_flip()
I would like to add additional information from a separate data frame. For example:
extra <- data.frame(dose=factor(c(0.5,1,2)), label=c("Label1", "Label2", "Label3"), n=c("n=42","n=52","n=35"))
> extra
dose label n
1 0.5 Label1 n=42
2 1 Label2 n=52
3 2 Label3 n=35
I would like to create the following figure where the information to each dose (factor) is outside the plot and aligns with each of the dose levels (I made this in powerpoint as an example):
EDIT:
I would like to ask advice for an extension of the initial question.
What about this extension where I use fill to split up dose by the two groups?
ToothGrowth$dose <- as.factor(ToothGrowth$dose)
ToothGrowth$group <- head(rep(1:2, 100), dim(ToothGrowth)[1])
ToothGrowth$group <- factor(ToothGrowth$group)
p <- ggplot(ToothGrowth, aes(x=dose, y=len, fill=group)) +
geom_boxplot()
# Rotate the box plot
p + coord_flip()
extra <- data.frame(
dose=factor(rep(c(0.5,1,2), each=2)),
group=factor(rep(c(1:2), 3)),
label=c("Label1A", "Label1B", "Label2A", "Label2B", "Label3A", "Label3B"),
n=c("n=12","n=30","n=20", "n=32","n=15","n=20")
)
Is it possible to align data from the new data frame (extra, 6 rows) with each of the dose/group combinations?
We can use geom_text with clip = "off" inside coord_flip:
ggplot(ToothGrowth, aes(x=dose, y=len)) +
geom_boxplot() +
geom_text(
y = max(ToothGrowth$len) * 1.1,
data = extra,
aes(x = dose, label = sprintf("%s\n%s", label, n)),
hjust = 0) +
coord_flip(clip = "off") +
theme(plot.margin = unit(c(1, 5, 0.5, 0.5), "lines"))
Explanation: We place text outside of the plot area with geom_text and disable clipping with clip = "off" inside coord_flip. Lastly, we increase the plot margin to accommodate the additional labels. You can adjust the vertical y position in the margin (so the horizontal position in the plot because of the coordinate flip) by changing the factor in y = max(ToothGrowth$len) * 1.1.
In response to your edit, here is a possibility
extra <- data.frame(
dose=factor(rep(c(0.5,1,2), each=2)),
group=factor(rep(c(1:2), 3)),
label=c("Label1A", "Label1B", "Label2A", "Label2B", "Label3A", "Label3B"),
n=c("n=12","n=30","n=20", "n=32","n=15","n=20")
)
library(tidyverse)
ToothGrowth %>%
mutate(
dose = as.factor(dose),
group = as.factor(rep(1:2, nrow(ToothGrowth) / 2))) %>%
ggplot(aes(x = dose, y = len, fill = group)) +
geom_boxplot(position = position_dodge(width = 1)) +
geom_text(
data = extra %>%
mutate(
dose = as.factor(dose),
group = as.factor(group),
ymax = max(ToothGrowth$len) * 1.1),
aes(x = dose, y = ymax, label = sprintf("%s\n%s", label, n)),
position = position_dodge(width = 1),
size = 3,
hjust = 0) +
coord_flip(clip = "off", ylim = c(0, max(ToothGrowth$len))) +
theme(
plot.margin = unit(c(1, 5, 0.5, 0.5), "lines"),
legend.position = "bottom")
A few comments:
We ensure that labels match the dodged bars by using position_dodge(with = 1) inside geom_text and geom_boxplot.
It seems that position_dodge does not like a global y (outside of aes). So we include the y position for the labels in extra and use it inside aes. As a result, we need to explicitly limit the range of the y axis. We can do that inside coord_flip with ylim = c(0, max(ToothGrowth$len)).

R bubble plot using ggplot manually selecting the colour and axis names

I using ggplot to create a bubble plot. With this code:
ggplot(df, aes(x = order, y = mean, size = n, fill = name)) +
geom_point(shape = 21) +
theme_bw() +
theme() +
scale_size(range = c(1, 50)) +
ylim(0,100)
It is working perfectly apart from 2 things:
For each name (fill) I would like to manually specify the colour used (via a dataframe that maps name to colour) - this is to provide consistency across multiple figures.
I would like to substitute the numbers on the y for text labels (for several reasons I cannot use the text labels from the outset due to ordering issues)
I have tried several methods using scale_color_manual() and scale_y_continuous respectively and I am getting nowhere! Any help would be very gratefully received!
Thanks
Since you have not specified an example df, I created one of my own.
To manually specify the color, you have to use scale_fill_manual with a named vector as the argument of values.
Edit 2
This appears to do what you want. We use scale_y_continuous. The breaks argument specifies the vector of positions, while the labels argument specifies the labels which should appear at those positions. Since we already created the vectors when creating the data frame, we simply pass those vectors as arguments.
ggplot(df, aes(x = order, y = mean, size = n, fill = name)) +
geom_point(shape = 21) +
scale_fill_manual(values = gcolors) +
scale_size(limits = c(min(df$n), max(df$n))) +
scale_y_continuous(breaks = mean, labels = order_label)
Edit 1
From your comment, it appears that you want to label the circles. One option would be to use geom_text. Code below. You may need to experiment with values of nudge_y to get the position correct.
order <- c(1, 2)
mean <- c(0.75, 0.3)
n <- c(180, 200)
name <- c("a", "b")
order_label <- c("New York", "London")
df <- data.frame(order, mean, n, name, order_label, stringsAsFactors = FALSE)
color <- c("blue", "red")
name_color <- data.frame(name, color, stringsAsFactors = FALSE)
gcolors <- name_color[, 2]
names(gcolors) <- name_color[, 1]
ggplot(df, aes(x = order, y = mean, size = n, fill = name)) +
geom_point(shape = 21) +
geom_text(aes(label = order_label), size = 3, hjust = "inward",
nudge_y = 0.03) +
scale_fill_manual(values = gcolors) +
scale_size(limits = c(min(df$n), max(df$n))) +
theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank()) +
ylab(NULL)
Original Answer
It is not clear what you mean by "substitute the numbers on the y for text labels". In the example below, I have formatted the y-axis as a percentage using the scales::percent_format() function. Is this similar to what you want?
order <- c(1, 2)
mean <- c(0.75, 0.3)
n <- c(180, 200)
name <- c("a", "b")
df <- data.frame(order, mean, n, name, stringsAsFactors = FALSE)
color <- c("blue", "red")
name_color <- data.frame(name, color, stringsAsFactors = FALSE)
gcolors <- name_color[, 2]
names(gcolors) <- name_color[, 1]
ggplot(df, aes(x = order, y = mean, size = n, fill = name)) +
geom_point(shape = 21) +
scale_fill_manual(values = gcolors) +
scale_size(limits = c(min(df$n), max(df$n))) +
scale_y_continuous(labels = scales::percent_format())
Thanks, for all your help, this worked perfectly:
ggplot(df, aes(x = order, y = mean, size = n, fill = name)) +
geom_point(shape = 21) +
scale_fill_manual(values = gcolors) +
scale_size(limits = c(min(df$n), max(df$n))) +
scale_x_continuous(breaks = order, labels = order_label)

Plot with multiple breaks of different sizes

I would like to create a plot with multiple breaks of different sized intervals on the y axis. The closest post I could find is this Show customised X-axis ticks in ggplot2 But it doesn't fully solve my problem.
# dummy data
require(ggplot2)
require(reshape2)
a<-rnorm(mean=15,sd=1.5, n=100)
b<-rnorm(mean=1500,sd=150, n=100)
df<-data.frame(a=a,b=b)
df$x <- factor(seq(100), ordered = T)
df.m <- melt(df)
ggplot(data = df.m, aes(x = x, y=value, colour=variable, group=variable)) +
geom_line() + scale_y_continuous(breaks = c(seq(from = 0, to = 20, by = 1),
seq(from = 1100, to = max(y), by = 100))) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
The problem is how to get the first set of breaks to be proportional to the second (thus visible).
Any pointer would be very much appreciated, thanks!
You can try something like this:
# Rearrange the factors in the data.frame
df.m$variable <- factor(df.m$variable, levels = c("b", "a"))
ggplot(data = df.m, aes(x = x, y=value, colour=variable, group=variable)) +
geom_line() + facet_grid(variable~., scales = "free")
Hope this helps

Resources