Add custom ggplot legend - r

I'm trying to add a custom legend to my ggplot, similar to the examples in: http://docs.ggplot2.org/0.9.2.1/scale_gradientn.html
I want the bars in the plot to be colored according to the df$col column and for that reason I'm using scale_fill_manual with values = coloursv.
set.seed(1)
df <- data.frame(log10.p.value = -10*log10(runif(10,0,1)), y = letters[1:10], col = rep("#E0E0FF",10), stringsAsFactors = F)
#specify color by log10.p.value
df$col[which(df$log10.p.value > 2)] <- "#EBCCD6"
df$col[which(df$log10.p.value > 4)] <- "#E09898"
df$col[which(df$log10.p.value > 6)] <- "#C74747"
df$col[which(df$log10.p.value > 8)] <- "#B20000"
#truncate bars
df$log10.p.value[which(df$log10.p.value > 10)] <- 10
coloursv <- df$col
names(coloursv) <- df$col
p <- ggplot(df, aes(y=log10.p.value,x=y,fill=as.factor(col)))+
geom_bar(stat="identity",width=0.2) +
scale_y_continuous(limits=c(0,10)) +
theme(axis.text=element_text(size=10)) +
scale_fill_manual(values = coloursv)+coord_flip()+
scale_fill_gradientn(colours=c("#EBCCD6","#E09898","#C74747","#B20000","#E0E0FF"),
breaks=c(-4,-3,-2,-1,0),guide="colorbar",labels=c(2,4,6,8,10))
And getting nothing:

You're not getting a legend, because you have scale_fill_manual(values = coloursv,guide=F) and guide=F prevents a fill legend from being shown.
On the other hand, scale_color_gradientn sets a color aesthetic, but you don't have a color aesthetic in your plot. You probably meant scale_fill_gradient here (in which case you wouldn't want to also have scale_fill_manual). However, even if you switch this statement to scale_fill_manual, you've set the breaks to be at values that are outside the range of the values in your data (breaks range from -4 to 0, but data ranges from 0.4 to 10).
Adding df$col doesn't set the colors that get plotted. It just creates a categorical variable with different category values in different ranges of log10.p.value. You could have called the category values anything, and ggplot2 has a default color palette that's the same regardless of the category names and depends only on the number of categories. If you want categorical values, you can instead use the cut function as shown below.
Here are a few examples to illustrate various fill options and legends:
# Create log10.p.value categories
df$log10.p.value.cat = cut(df$log10.p.value, seq(0,10,2))
# Fill bars based on log10.p.value.cat
p1=ggplot(df, aes(y=log10.p.value, x=y, fill=log10.p.value.cat)) +
geom_bar(stat="identity", width=0.2) +
scale_y_continuous(limits=c(0,10)) +
theme(axis.text=element_text(size=10)) +
coord_flip()
The plot below is the one in your question with the legend included. Note that the ordering of the colors in scale_fill_manual has to match the order of the corresponding values in log10.p.value.cat in order to get the desired color for each category.
# Fill bars based on log10.p.value.cat with custom colors
p1a=ggplot(df, aes(y=log10.p.value, x=y, fill=log10.p.value.cat)) +
geom_bar(stat="identity", width=0.2) +
scale_y_continuous(limits=c(0,10)) +
theme(axis.text=element_text(size=10)) +
coord_flip() +
scale_fill_manual(values=c("#E0E0FF","#EBCCD6","#E09898","#C74747","#B20000"))
# Continuous fill gradient based on log10.p.value
p2=ggplot(df, aes(y=log10.p.value, x=y, fill=log10.p.value)) +
geom_bar(stat="identity", width=0.2) +
scale_y_continuous(limits=c(0,10)) +
theme(axis.text=element_text(size=10)) +
coord_flip()
# Continuous fill gradient based on log10.p.value with custom colors
p2a=ggplot(df, aes(y=log10.p.value, x=y, fill=log10.p.value)) +
geom_bar(stat="identity", width=0.2) +
scale_y_continuous(limits=c(0,10)) +
theme(axis.text=element_text(size=10)) +
coord_flip() +
scale_fill_gradientn(colours=c("#EBCCD6","#E09898","#C74747","#B20000","#E0E0FF"),
breaks=seq(0,10,2))

Related

R: ggplot2 density plot shows wrong fill colors

I would like to plot densities of two variables ("red_variable", "green_variable") from two independent dataframes on one density plot, using red and green color for the two variables.
This is my attempt at coding:
library(ggplot2)
### Create dataframes
red_dataframe <- data.frame(red_variable = c(10,11,12,13,14))
green_dataframe <- data.frame(green_variable = c(6,7,8,9,10))
mean(red_dataframe$red_variable) # mean is 12
mean(green_dataframe$green_variable) # mean is 8
### Set colors
red_color= "#FF0000"
green_color= "#008000"
### Trying to plot densities with correct colors and correct legend entries
ggplot() +
geom_density(aes(x=red_variable, fill = red_color, alpha=0.5), data=red_dataframe) +
geom_density(aes(x=green_variable, fill = green_color, alpha=0.5), data=green_dataframe) +
scale_fill_manual(labels = c("Density of red_variable", "Density of green_variable"), values = c(red_color, green_color)) +
xlab("X value") +
ylab("Density") +
labs(fill = "Legend") +
guides(alpha=FALSE)
Result: The legend shows correct colors, but the colors on the plot are wrong: The "red" variable is plotted with green color, the "green" variable with red color. The "green" density (mean=8) should appear left and the "red" density (mean=12) on the right on the x-axis. This behavior of the plot doesn't make any sense to me.
I can in fact get the desired result by switching red and green in the code:
### load ggplot2
library(ggplot2)
### Create dataframes
red_dataframe <- data.frame(red_variable = c(10,11,12,13,14))
green_dataframe <- data.frame(green_variable = c(6,7,8,9,10))
mean(red_dataframe$red_variable) # mean is 12
mean(green_dataframe$green_variable) # mean is 8
### Set colors
red_color= "#FF0000"
green_color= "#008000"
### Trying to plot densities with correct colors and correct legend entries
ggplot() +
geom_density(aes(x=red_variable, fill = green_color, alpha=0.5), data=red_dataframe) +
geom_density(aes(x=green_variable, fill = red_color, alpha=0.5), data=green_dataframe) +
scale_fill_manual(labels = c("Density of red_variable", "Density of green_variable"), values = c(red_color, green_color)) +
xlab("X value") +
ylab("Density") +
labs(fill = "Legend") +
guides(alpha=FALSE)
... While the plot makes sense now, the code doesn't. I cannot really trust code doing the opposite of what I would expect it to do. What's the problem here? Am I color blind?
On your code, in order to have color at the right position, you need to specify fill = red_color or fill = green_color (as well as alpha as it is a constant - as pointed out by #Gregor) outside of the aes such as:
...+
geom_density(aes(x=red_variable), alpha=0.5, fill = red_color, data=red_dataframe) +
geom_density(aes(x=green_variable), alpha=0.5, fill = green_color, data=green_dataframe) + ...
Alternatively, you can bind your dataframes together, reshape them into a longer format (much more appropriate to ggplot) and then add color column that you can use with scale_fill_identity function (https://ggplot2.tidyverse.org/reference/scale_identity.html):
df <- cbind(red_dataframe,green_dataframe)
library(tidyr)
library(ggplot2)
library(dplyr)
df <- df %>% pivot_longer(.,cols = c(red_variable,green_variable), names_to = "var",values_to = "val") %>%
mutate(Color = ifelse(grepl("red",var),red_color,green_color))
ggplot(df, aes(val, fill = Color))+
geom_density(alpha = 0.5)+
scale_fill_identity(guide = "legend", name = "Legend", labels = levels(as.factor(df$var)))+
xlab("X value") +
ylab("Density")
Does it answer your question ?
You're trying to use ggplot as if it's base graphics... the mindset shift can take a little while to get used to. dc37's answer shows how you should do it. I'll try to explain what goes wrong in your attempt:
When you put fill = green_color inside aes(), because it's inside aes() ggplot essentially creates a new column of data filled with the green_color values in your green_data_frame, i.e., "#008000", "#008000", "#008000", .... Ditto for the red color values in the red data frame. We can see this if we modify your plot by simply deleting your scale:
ggplot() +
geom_density(aes(x = red_variable, fill = green_color, alpha = 0.5), data =
red_dataframe) +
geom_density(aes(x = green_variable, fill = red_color, alpha = 0.5), data =
green_dataframe) +
xlab("X value") +
ylab("Density") +
labs(fill = "Legend") +
guides(alpha = FALSE)
We can actually get what you want by putting the identity scale, which is designed for the (common in base, rare in ggplot2) case where you actually put color values in the data.
ggplot() +
geom_density(aes(x = red_variable, fill = green_color, alpha = 0.5), data =
red_dataframe) +
geom_density(aes(x = green_variable, fill = red_color, alpha = 0.5), data =
green_dataframe) +
scale_fill_identity() +
xlab("X value") +
ylab("Density") +
labs(fill = "Legend") +
guides(alpha = FALSE)
When you added your scale_fill_manual, ggplot was like "okay, cool, you want to specify colors and labels". But you were thinking in the order that you added the layers to the plot (much like base graphics), whereas ggplot was thinking of these newly created variables "#FF0000" and "#008000", which it ordered alphabetically by default (just as if they were factor or character columns in a data frame). And since you happened to add the layers in reverse alphabetical order, it was switched.
dc37's answer shows a couple better methods. With ggplot you should (a) work with a single, long-format data frame whenever possible (b) don't put constants inside aes() (constant color, constant alpha, etc.), (c) set colors in a scale_fill_* or scale_color_* function when they're not constant.

Visualizing crosstab tables with a plot in R - changing colours

I have the following code in R which is modified from here, which plots a crosstab table:
#load ggplot2
library(ggplot2)
# Set up the vectors
xaxis <- c("A", "B")
yaxis <- c("A","B")
# Create the data frame
df <- expand.grid(xaxis, yaxis)
df$value <- c(120,5,30,200)
#Plot the Data
g <- <- ggplot(df, aes(Var1, Var2)) + geom_point(aes(size = value), colour = "lightblue") + theme_bw() + xlab("") + ylab("")
g + scale_size_continuous(range=c(10,30)) + geom_text(aes(label = value))
It produces the right figure, which is great, but I was hoping to custom colour the four dots, ideally so that the top left and bottom right are both one colour and the top right and bottom left are another.
I have tried to use:
+ scale_color_manual(values=c("blue","red","blue","red"))
but that doesn't seem to work. Any ideas?
I would suggest that you colour by a vector in your data frame, as you don't have a column that gives you this, you can either create one, or make a rule based on existing columns (which I have done below):
g <- ggplot(df, aes(Var1, Var2)) + geom_point(aes(size = value, colour = (Var2!=Var1))) + theme_bw() + xlab("") + ylab("")
g + scale_size_continuous(range=c(10,30)) + geom_text(aes(label = value))
The important part is: colour = (Var2!=Var1), note that i put this inside the aesthetic (aes) for the geom_point
Edit: if you wish to remove the legend (you annotate the chart with totals, so I guess you don't really need it), you can add: g + theme(legend.position="none") to remove it

overlaying plots in ggplot2

How to overlay one plot on top of the other in ggplot2 as explained in the following sentences? I want to draw the grey time series on top of the red one using ggplot2 in R (now the red one is above the grey one and I want my graph to be the other way around). Here is my code (I generate some data in order to show you my problem, the real dataset is much more complex):
install.packages("ggplot2")
library(ggplot2)
time <- rep(1:100,2)
timeseries <- c(rep(0.5,100),rep(c(0,1),50))
upper <- c(rep(0.7,100),rep(0,100))
lower <- c(rep(0.3,100),rep(0,100))
legend <- c(rep("red should be under",100),rep("grey should be above",100))
dataset <- data.frame(timeseries,upper,lower,time,legend)
ggplot(dataset, aes(x=time, y=timeseries)) +
geom_line(aes(colour=legend, size=legend)) +
geom_ribbon(aes(ymax=upper, ymin=lower, fill=legend), alpha = 0.2) +
scale_colour_manual(limits=c("grey should be above","red should be under"),values = c("grey50","red")) +
scale_fill_manual(values = c(NA, "red")) +
scale_size_manual(values=c(0.5, 1.5)) +
theme(legend.position="top", legend.direction="horizontal",legend.title = element_blank())
Convert the data you are grouping on into a factor and explicitly set the order of the levels. ggplot draws the layers according to this order. Also, it is a good idea to group the scale_manual codes to the geom it is being applied to for readability.
legend <- factor(legend, levels = c("red should be under","grey should be above"))
c <- data.frame(timeseries,upper,lower,time,legend)
ggplot(c, aes(x=time, y=timeseries)) +
geom_ribbon(aes(ymax=upper, ymin=lower, fill=legend), alpha = 0.2) +
scale_fill_manual(values = c("red", NA)) +
geom_line(aes(colour=legend, size=legend)) +
scale_colour_manual(values = c("red","grey50")) +
scale_size_manual(values=c(1.5,0.5)) +
theme(legend.position="top", legend.direction="horizontal",legend.title = element_blank())
Note that the ordering of the values in the scale_manual now maps to "grey" and "red"

altering the color of one value in a ggplot histogram

I have a simplified dataframe
library(ggplot2)
df <- data.frame(wins=c(1,1,3,1,1,2,1,2,1,1,1,3))
ggplot(df,aes(x=wins))+geom_histogram(binwidth=0.5,fill="red")
I would like to get the final value in the sequence,3, shown with either a different fill or alpha. One way to identify its value is
tail(df,1)$wins
In addition, I would like to have the histogram bars shifted so that they are centered over the number. I tried unsuccesfully subtracting from the wins value
You can do this with a single geom_histogram() by using aes(fill = cond).
To choose different colours, use one of the scale_fill_*() functions, e.g. scale_fill_manual(values = c("red", "blue").
library(ggplot2)
df <- data.frame(wins=c(1,1,3,1,1,2,11,2,11,15,1,1,3))
df$cond <- df$wins == tail(df,1)$wins
ggplot(df, aes(x=wins, fill = cond)) +
geom_histogram() +
scale_x_continuous(breaks=df$wins+0.25, labels=df$wins) +
scale_fill_manual(values = c("red", "blue"))
1) To draw bins in different colors you can use geom_histogram() for subsets.
2) To center bars along numbers on the x axis you can invoke scale_x_continuous(breaks=..., labels=...)
So, this code
library(ggplot2)
df <- data.frame(wins=c(1,1,3,1,1,2,11,2,11,15,1,1,3))
cond <- df$wins == tail(df,1)$wins
ggplot(df, aes(x=wins)) +
geom_histogram(data=subset(df,cond==FALSE), binwidth=0.5, fill="red") +
geom_histogram(data=subset(df,cond==TRUE), binwidth=0.5, fill="blue") +
scale_x_continuous(breaks=df$wins+0.25, labels=df$wins)
produces the plot:

Vary the color gradient on a scatter plot created with ggplot2

Is it possible to vary a plot's color gradient by aesthetic? I'm generating a plot using code similar the lines presented below and finding in some cases that it is not always easy to distinguish between the various groups. For example, on the chart below it would be easier to distinguish the results if I could have the group A points use a white-blue gradient and the group B points use a white-red gradient.
data <- data.frame(x=c(1,2,3,4,5,6,1,2,3,4,5,6),
y=c(1,2,3,4,5,6,1,2,3,4,5,6), grp=c(rep("A",6),rep("B",6)),
dt=c("2010-06-30","2010-05-31","2010-04-30",
"2010-03-31","2010-02-26","2010-01-29","2010-06-30",
"2010-05-31","2010-04-30",
"2010-03-31","2010-02-26","2010-01-29"))
p <- ggplot(data, aes(x,y,color=as.integer(as.Date(data$dt)))) +
geom_jitter(size=4, alpha=0.75, aes(shape=grp)) +
scale_colour_gradient(limits=as.integer(as.Date(c("2010-01-29","2010-06-30"))),
low="white", high="blue") +
scale_shape_discrete(name="") +
opts(legend.position="none")
print(p)
you can do that by preparing color by yourself before calling ggplot2.
Here is an example:
data$sdt <- rescale(as.numeric(as.Date(data$dt))) # data scaled [0, 1]
cols <- c("red", "blue") # colour of gradients for each group
# here the color for each value are calculated
data$col <- ddply(data, .(grp), function(x)
data.frame(col=apply(colorRamp(c("white", cols[as.numeric(x$grp)[1]]))(x$sdt),
1,function(x)rgb(x[1],x[2],x[3], max=255)))
)$col
p <- ggplot(data, aes(x,y, shape=grp, colour=col)) +
geom_jitter(size=4, alpha=0.75) +
scale_colour_identity() + # use identity colour scale
scale_shape_discrete(name="") +
opts(legend.position="none")
print(p)

Resources