I'm working on a larger project for which I am creating several plots in ggplot2. The plots are concerned with plotting several different outcomes across several different discreet categories (think: countries, species, types). I would like to completely fix the mapping of discrete types to colors such that Type=A is always displayed in red, Type=B is always displayed in blue, and so on across all plots irrespective of what other factors are present. I know about scale_fill_manual() where I can provide color values manually and then work with drop = FALSE which helps in dealing with unused factor levels. However, I find this extremely cumbersome since every plot will need some manual work to deal with sorting the factors in the right way, sorting color values to match factor sorting, dropping unused levels, etc.
What I am looking for is a way where I can map once and globally factor levels to specific colors (A=green, B=blue, C=red, ...) and then just go about plotting whatever I please and ggplot picking the right colors.
Here is some code to illustrate the point.
# Full set with 4 categories
df1 <- data.frame(Value = c(40, 20, 10, 60),
Type = c("A", "B", "C", "D"))
ggplot(df1, aes(x = Type, y = Value, fill = Type)) + geom_bar(stat = "identity")
# Colors change complete because only 3 factor levels are present
df2 <- data.frame(Value = c(40, 20, 60),
Type = c("A", "B", "D"))
ggplot(df2, aes(x = Type, y = Value, fill = Type)) + geom_bar(stat = "identity")
# Colors change because factor is sorted differently
df3 <- data.frame(Value = c(40, 20, 10, 60),
Type = c("A", "B", "C", "D"))
df3$Type <- factor(df3$Type, levels = c("D", "C", "B", "A"), ordered = TRUE)
ggplot(df3, aes(x = Type, y = Value, fill = Type)) + geom_bar(stat = "identity")
You could define your own custom scale, if you like. If you look at the source for scale_fill_manual,
scale_fill_manual
#> function (..., values)
#> {
#> manual_scale("fill", values, ...)
#> }
#> <environment: namespace:ggplot2>
it's actually quite simple:
library(ggplot2)
scale_fill_chris <- function(...){
ggplot2:::manual_scale(
'fill',
values = setNames(c('green', 'blue', 'red', 'orange'), LETTERS[1:4]),
...
)
}
df1 <- data.frame(Value = c(40, 20, 10, 60),
Type = c("A", "B", "C", "D"))
ggplot(df1, aes(x = Type, y = Value, fill = Type)) +
geom_col() +
scale_fill_chris()
df2 <- data.frame(Value = c(40, 20, 60),
Type = c("A", "B", "D"))
ggplot(df2, aes(x = Type, y = Value, fill = Type)) +
geom_col() +
scale_fill_chris()
df3 <- data.frame(Value = c(40, 20, 10, 60),
Type = c("A", "B", "C", "D"))
df3$Type <- factor(df3$Type, levels = c("D", "C", "B", "A"), ordered = TRUE)
ggplot(df3, aes(x = Type, y = Value, fill = Type)) +
geom_col() +
scale_fill_chris()
You could make a custom plot function (including scale_fill_manual and reasonable default colours) in order to avoid repeating code:
library(ggplot2)
custom_plot <- function(.data,
colours = c("A" = "green", "B" = "blue", "C" = "red", "D" = "grey")) {
ggplot(.data, aes(x=Type, y=Value, fill= Type)) + geom_bar(stat="identity") +
scale_fill_manual(values = colours)
}
df1 <- data.frame(Value=c(40, 20, 10, 60), Type=c("A", "B", "C", "D"))
df2 <- data.frame(Value=c(40, 20, 60), Type=c("A", "B", "D"))
df3 <- data.frame(Value=c(40, 20, 10, 60), Type=c("A", "B", "C", "D"))
df3$Type <- factor(df3$Type, levels=c("D", "C", "B", "A"), ordered=TRUE)
custom_plot(df1)
custom_plot(df2)
custom_plot(df3)
Another options is to make drop = F the default by defining the default colour scales as follows:
scale_colour_discrete <- function(...)
scale_colour_manual(..., drop = F)
scale_fill_discrete <- function(...)
scale_fill_manual(..., drop = F)
That way colours are always consistent for different factors.
make sure you convert that column into Factor first and then create a variable to store the color value for each factor...
df$color <- as.factor(df$color, levels = c(1, 0))
cbPallete <- c("1"= "green", "0"="red")
ggplot(data = df) + geom_bar(x = df$x,
y = df$y,
fill = df$color) +
scale_fill_manual(values = cbPallete)
Related
I want to have a graph with a color scale, however the graph has certain deviations, where the first category is missing and then the color scale does not work any more. Is there a method to skip the first discrete color in a color scale?
I provide an example that does not work below. The first graph has category A in light blue, and the second B. But also in the second B should be dark blue.
I also found this question: How to change default color scheme in ggplot2?
if (!require("pacman")) install.packages("pacman")
pacman::p_load('tidyverse')
first_column <- c("value_1", "value_3", "value_2")
second_column <- c("A", "B", "C", "D", "A", "B", "C", "D")
freq <-c(23, 41, 32, 58, 11, 16, 19, 38)
df2 <- data.frame(first_column, second_column, freq)
ggplot(df2,
aes(x = first_column,
y = freq,
fill = second_column )) +
geom_bar(stat = "identity") +
scale_fill_brewer(palette="Paired")
df3 <-df2
df3$second_column <- ifelse(df3$second_column == "A", "C", df3$second_column)
ggplot(df3,
aes(x = first_column,
y = freq,
fill = second_column )) +
geom_bar(stat = "identity") +
scale_fill_brewer(palette="Paired")
(As noted in the comments, your dataframe couldn't be created as given so I edited it.)
You can change second_column to a factor and add drop = FALSE to scale_fill_brewer(). This will retain factor level "A" in the legend even though there are no values, which will keep the colors consistent across plots.
library(ggplot2)
df2$second_column <- factor(df2$second_column)
# ... unchanged ggplot code for first plot ...
df3 <-df2
df3$second_column[df3$second_column == "A"] <- "C"
ggplot(df3, aes(x = first_column, y = freq, fill = second_column)) +
geom_bar(stat = "identity") +
scale_fill_brewer(palette = "Paired", drop = FALSE)
Edited example data:
first_column <- c(rep(c("value_1", "value_3"), each = 4))
second_column <- c("A", "B", "C", "D", "A", "B", "C", "D")
freq <- c(23, 41, 32, 58, 11, 16, 19, 38)
df2 <- data.frame(first_column, second_column, freq)
One option would be to switch to scale_fill_manual and use a named vector to assign colors to categories.
library(ggplot2)
pal_fill <- scales::brewer_pal(palette = "Paired")(4)
names(pal_fill) <- LETTERS[1:4]
pal_fill
#> A B C D
#> "#A6CEE3" "#1F78B4" "#B2DF8A" "#33A02C"
ggplot(
df3,
aes(
x = first_column,
y = freq,
fill = second_column
)
) +
geom_bar(stat = "identity") +
scale_fill_manual(values = pal_fill)
DATA
df3 <- structure(list(first_column = c("value_1", "value_3", "value_2",
"value_1", "value_3", "value_2", "value_1", "value_3"), second_column = c("C",
"B", "C", "D", "C", "B", "C", "D"), freq = c(23, 41, 32, 58,
11, 16, 19, 38)), row.names = c(NA, -8L), class = "data.frame")
I'm working on a larger project for which I am creating several plots in ggplot2. The plots are concerned with plotting several different outcomes across several different discreet categories (think: countries, species, types). I would like to completely fix the mapping of discrete types to colors such that Type=A is always displayed in red, Type=B is always displayed in blue, and so on across all plots irrespective of what other factors are present. I know about scale_fill_manual() where I can provide color values manually and then work with drop = FALSE which helps in dealing with unused factor levels. However, I find this extremely cumbersome since every plot will need some manual work to deal with sorting the factors in the right way, sorting color values to match factor sorting, dropping unused levels, etc.
What I am looking for is a way where I can map once and globally factor levels to specific colors (A=green, B=blue, C=red, ...) and then just go about plotting whatever I please and ggplot picking the right colors.
Here is some code to illustrate the point.
# Full set with 4 categories
df1 <- data.frame(Value = c(40, 20, 10, 60),
Type = c("A", "B", "C", "D"))
ggplot(df1, aes(x = Type, y = Value, fill = Type)) + geom_bar(stat = "identity")
# Colors change complete because only 3 factor levels are present
df2 <- data.frame(Value = c(40, 20, 60),
Type = c("A", "B", "D"))
ggplot(df2, aes(x = Type, y = Value, fill = Type)) + geom_bar(stat = "identity")
# Colors change because factor is sorted differently
df3 <- data.frame(Value = c(40, 20, 10, 60),
Type = c("A", "B", "C", "D"))
df3$Type <- factor(df3$Type, levels = c("D", "C", "B", "A"), ordered = TRUE)
ggplot(df3, aes(x = Type, y = Value, fill = Type)) + geom_bar(stat = "identity")
You could define your own custom scale, if you like. If you look at the source for scale_fill_manual,
scale_fill_manual
#> function (..., values)
#> {
#> manual_scale("fill", values, ...)
#> }
#> <environment: namespace:ggplot2>
it's actually quite simple:
library(ggplot2)
scale_fill_chris <- function(...){
ggplot2:::manual_scale(
'fill',
values = setNames(c('green', 'blue', 'red', 'orange'), LETTERS[1:4]),
...
)
}
df1 <- data.frame(Value = c(40, 20, 10, 60),
Type = c("A", "B", "C", "D"))
ggplot(df1, aes(x = Type, y = Value, fill = Type)) +
geom_col() +
scale_fill_chris()
df2 <- data.frame(Value = c(40, 20, 60),
Type = c("A", "B", "D"))
ggplot(df2, aes(x = Type, y = Value, fill = Type)) +
geom_col() +
scale_fill_chris()
df3 <- data.frame(Value = c(40, 20, 10, 60),
Type = c("A", "B", "C", "D"))
df3$Type <- factor(df3$Type, levels = c("D", "C", "B", "A"), ordered = TRUE)
ggplot(df3, aes(x = Type, y = Value, fill = Type)) +
geom_col() +
scale_fill_chris()
You could make a custom plot function (including scale_fill_manual and reasonable default colours) in order to avoid repeating code:
library(ggplot2)
custom_plot <- function(.data,
colours = c("A" = "green", "B" = "blue", "C" = "red", "D" = "grey")) {
ggplot(.data, aes(x=Type, y=Value, fill= Type)) + geom_bar(stat="identity") +
scale_fill_manual(values = colours)
}
df1 <- data.frame(Value=c(40, 20, 10, 60), Type=c("A", "B", "C", "D"))
df2 <- data.frame(Value=c(40, 20, 60), Type=c("A", "B", "D"))
df3 <- data.frame(Value=c(40, 20, 10, 60), Type=c("A", "B", "C", "D"))
df3$Type <- factor(df3$Type, levels=c("D", "C", "B", "A"), ordered=TRUE)
custom_plot(df1)
custom_plot(df2)
custom_plot(df3)
Another options is to make drop = F the default by defining the default colour scales as follows:
scale_colour_discrete <- function(...)
scale_colour_manual(..., drop = F)
scale_fill_discrete <- function(...)
scale_fill_manual(..., drop = F)
That way colours are always consistent for different factors.
make sure you convert that column into Factor first and then create a variable to store the color value for each factor...
df$color <- as.factor(df$color, levels = c(1, 0))
cbPallete <- c("1"= "green", "0"="red")
ggplot(data = df) + geom_bar(x = df$x,
y = df$y,
fill = df$color) +
scale_fill_manual(values = cbPallete)
With ggplot2 I want to plot two vectors (vec1_num, vec2_num) in two dimensions and colour the points by a group variable (vec3_char). Some data points are overlapping.
library(ggplot2)
vec1_num = c(1,2,3,4,1,3,4,5,5,5)
vec2_num = c(1,2,3,4,1,3,4,5,5,5)
vec3_char = c("A", "B", "C", "A", "B", "C", "C", "A", "B", "C")
# plot 1
ggplot(data = NULL) +
geom_point(aes(x=vec1_num, y=vec2_num, colour=vec3_char), alpha=0.4, size=4) +
scale_colour_manual(values=c("A"="darkblue", "B"="darkred", "C"="orange")) +
theme(panel.grid = element_blank())
I know I can attenuate the overlap by reducing alpha or working with geom_jitter adding a bit of noise. Like this:
# plot 2
ggplot(data = NULL) +
geom_jitter(aes(x=vec1_num, y=vec2_num, colour=vec3_char), alpha=0.4, size=4, width = 0.1) +
scale_colour_manual(values=c("A"="darkblue", "B"="darkred", "C"="orange")) +
theme(panel.grid = element_blank())
However, is it possible to make use of plot 1 but colour the overlapping points differently? So that, for example, "A" = "darkblue, "AB" = "black", "ABC" = "grey", "B" = "darkred", "BC" = "pink", "C"="orange"? And can I additionally add a small Venn Diagram (legend) that visualises the color choice for the point overlap?
Thanks!
My way of doing this would be to convert the letters into numbers, sum them and covert back into letters.
NB The one complication is that the letters need to be A, B, D, H, ... so there is only one way of making each number combination. Though there is probably a way to start with A, B, C, ... and encode for unique values
library(tidyverse)
vec1_num = c(1,2,3,4,1,3,4,5,5,5)
vec2_num = c(1,2,3,4,1,3,4,5,5,5)
vec3_char = c("A", "B", "D", "A", "B", "D", "D", "A", "B", "D")
removeDup <- function(str) paste(rle(strsplit(str, "")[[1]])$values, collapse="") # Function to remove duplicated values in a string
data <- data.frame(x = vec1_num, y = vec2_num, col = match(vec3_char, LETTERS))
data <- data %>%
group_by(x) %>%
mutate(colour = glue::glue_collapse(col, sep = "")) %>%
select(-col) %>%
distinct(x, y, .keep_all = TRUE) %>%
mutate(colour = removeDup(colour)) %>%
mutate(colour = sapply(str_extract_all(colour, '\\d'), function(x) sum(as.integer(x)))) %>%
mutate(colour = case_when(
colour == 1 ~ "A",
colour == 2 ~ "B",
colour == 3 ~ "AB",
colour == 4 ~ "D",
colour == 5 ~ "AD",
colour == 6 ~ "BD",
colour == 7 ~ "ABD"
))
# plot 1
ggplot(data) +
geom_point(aes(x=x, y=y, colour = as_factor(colour)), alpha=0.4, size=4) +
geom_text(aes(x = x, y = y, label = colour), vjust = 2) +
scale_colour_manual(values=c("A"="darkblue", "B"="darkred", "AB"="orange", "D" = "green", "AD" = "black", "BD" = "orange", "ABD" = "purple"), name = "Colour") +
theme(panel.grid = element_blank())
.
I would firstly create a dataframe. Then I would extract for every x y combination (list(df$vec1_num, df$vec2_num)) what characters are present (...unique(xy_i$vec3_char)...). Like this:
df <- data.frame(vec1_num, vec2_num, vec3_char)
df_new <- do.call("rbind.data.frame", by(df, list(df$vec1_num, df$vec2_num), function(xy_i){
chars_i <- paste0(sort(unique(xy_i$vec3_char)),collapse= "")
xy_i$chars_comb <- factor(chars_i, levels= c("A", "AB", "AC", "ABC", "B", "BC", "C"))
xy_i
}))
If you now make the plot it shows you what characters overlap at which point.
ggplot(data = df_new) +
geom_point(aes(x=vec1_num, y=vec2_num, colour=chars_comb), alpha=0.4, size=4) +
scale_colour_manual(values=c("AB" = "black", "ABC" = "grey", "B" = "darkred", "C"="orange", "AC"= "red")) +
theme(panel.grid = element_blank())
I've created a dodged bar chart in ggplot2 with geom_col(). The code looks like this:
cat <- c("A", "A", "A", "A","B", "B", "B", "B")
var <- c("X", "Y", "Z", "T", "X", "Y", "Z", "T")
val <- c(35, 25, 20, 20, 40, 10, 15, 35)
df <- data.frame(var, cat, val)
ggplot(data = df) +
geom_col(aes(x = var, y = val, fill = cat), position = "dodge")
This produces the following plot:
I would like each variable to have a different filling colour, for example T = Green, X = Blue etc. and still keep a colour separation between the categories, for example T-A = darkgreen, T-B = lightgreen, X-A = darkblue, X-B = lightblue etc.
Is there an easy way to add this feature?
Thanks!
I think the easiest way to do what you're asking is to use the alpha scale:
ggplot(data = df) +
geom_col(aes(x = var, y = val, fill = var, alpha = cat),
position = "dodge") +
scale_alpha_discrete(range = c(0.5, 1), guide = guide_none()) +
theme_classic()
If you really want to use a grid in the background and don't want to see lines through the pale bars, make sure you plot some white bars of the same dimension underneath:
ggplot(data = df) +
geom_col(aes(x = var, y = val, group = cat),
position = "dodge", fill = "white", alpha = 1) +
geom_col(aes(x = var, y = val, fill = var, alpha = cat),
position = "dodge") +
scale_alpha_discrete(range = c(0.5, 1), guide = guide_none())
Maybe this can be useful:
library(ggplot2)
#Data
cat <- c("A", "A", "A", "A","B", "B", "B", "B")
var <- c("X", "Y", "Z", "T", "X", "Y", "Z", "T")
val <- c(35, 25, 20, 20, 40, 10, 15, 35)
df <- data.frame(var, cat, val)
#Plot
ggplot(data = df) +
geom_col(aes(x = var, y = val, fill = interaction(var,cat)), position = "dodge")+
labs(fill='Var')
Output:
You can customize colors with scale_fill_*(). Here an example using a fill scale from ggsci package:
#Plot 2
ggplot(data = df) +
geom_col(aes(x = var, y = val, fill = interaction(var,cat)), position = "dodge")+
labs(fill='Var')+
ggsci::scale_fill_futurama()
Output:
I have a dataframe like the following:
df = data.frame(cat = rep(c("A", "B", "C", "D"), each = 20), val = runif(80))
And an annotation dataframe like the following:
ann = data.frame(cat = c("A", "B", "C", "D"), col = c(34, 84, 23, 85))
I want to make a boxplot for each of these cats along the x-axis, and the value in the data frame as the y-axis, but I also want to color each boxplot by the value in ann$col (continuous color mapping).
I can get the boxplot like the following:
ggplot(df, aes(x = variable, y = BACC)) +
geom_boxplot(width = 0.12)
But I am unsure how to color each boxplot by the category value.
How can this be done?
Thanks,
Jack
There are few (minor) syntax errors in your data. Nevertheless, you are probably looking for this:
df <- data.frame(cat = rep(c("A", "B", "C", "D"), 20), val = runif(80))
ann <- data.frame(cat = ("A", "B", "C", "D"),
col = c(34, 84, 23, 85), 20)
library(dplyr) # inner_join comes with dplyr
df_ann <- inner_join(df, ann, by = 'cat') #join both into a three column dataframe
#(cat, val, col) by mathcing column 'cat'.
ggplot(df_ann, aes(x = cat, y = val, fill = col)) +
geom_boxplot(width = 0.12) + scale_fill_gradientn(colours = rainbow(9))