I first plot histogram for a group of simulated data and fill the bars with one colour. Then I add the line of the density function from which the data was simulated from and make the line with a different colour. Now I want use legends to show one colour (the fill colour of the histogram) is for samples whereas the other (the colour of the line) is for theoretical density. How can I achieve this?
The code is as follows
require(ggplot2)
df <- data.frame(x=rnorm(10^4))
p <- ggplot(df, aes(x=x)) + geom_histogram(aes(y=..density..), fill='steelblue', colour='black', alpha=0.8, width=0.2)
x <- seq(-4, 4, 0.01)
df <- data.frame(x=x, y=dnorm(x))
p <- p + geom_line(data=df, aes(x=x, y=y), colour='red', size=1.5)
p
You can do this by adding a new column to each of your data frames to create fill and colour aesthetics to go into the legend. In each case, there's only one category, but putting them inside the aes() gives you the legends you want:
require(ggplot2)
df <- data.frame(x=rnorm(10^4), fill=rep("Sample", 10^4))
p <- ggplot(df, aes(x=x)) + geom_histogram(aes(y=..density.., fill=fill),
colour='black', alpha=0.8, width=0.2) +
scale_fill_manual(values="steelblue") + labs(fill="")
x <- seq(-4, 4, 0.01)
df <- data.frame(x=x, y=dnorm(x), colour=rep("Theoretical Density",length(x)))
p <- p + geom_line(data=df, aes(x=x, y=y, colour=line), size=1.5) +
scale_colour_manual(values="red") + labs(colour="")
Without changing your data at all, you can specify literal aes() values that you can define later via manual scales.
df <- data.frame(x=rnorm(10^4))
p <- ggplot(df, aes(x=x)) + geom_histogram(aes(y=..density.., fill="samples"),
alpha=0.8, colour="black", width=0.2)
p <- p+scale_fill_manual("",breaks="samples", values="steelblue")
x <- seq(-4, 4, 0.01)
df <- data.frame(x=x, y=dnorm(x))
p <- p + geom_line(data=df, aes(x=x, y=y, colour="theory"), size=1.5)
p <- p+scale_color_manual("",breaks="theory", values="red")
Related
I have the following dataset I would like to plot.
library(tidyverse)
df <- data.frame(first=c(40,40,40),second=c(40,80,160),third=c(40,160,640), ID=c("ID1","ID2","ID3")) %>% pivot_longer(cols=-ID)
I am using:
ggplot(df2, aes(x=name, y=value, group=ID)) +
geom_line() +
geom_point(size=4)
Currently, I have:
Is there a way to change the values shown on the y axis:
1:40
1:80
1:160
1:320
1:640
Basically, I am writing a string for the continues numeric values on y
You can set the breaks= and label= parameters to change your y-axis labels
ggplot(df, aes(x=name, y=value, group=ID)) +
geom_line() +
geom_point(size=4) +
scale_y_continuous(breaks=c(40,80,160,320,640), label=function(x) paste0("1:", x))
I'm trying to use custom colours defined in scale_colour_manual to fill a geom_ribbon in ggplot2. Here is an example I took from Custom ggplot2 shaded error areas on categorical line plot:
set.seed(12345)
data <- cbind(rep("A", 100), rnorm(100, 0, 1))
data <- rbind(data, cbind(rep("B", 100), rnorm(100, 5, 1)))
data <- rbind(data, cbind(rep("C", 100), rnorm(100, 10, 1)))
data <- rbind(data, cbind(rep("D", 100), rnorm(100, 15, 1)))
data <- cbind(rep(1:100, 4), data)
data <- data.frame(data)
names(data) <- c("num", "category", "value")
data$num <- as.numeric(data$num)
data$value <- as.numeric(data$value)
data$upper <- data$value+10
data$lower <- data$value-10
data = data[order(data$category, data$num),]
data$upperLoess = unlist(lapply(LETTERS[1:4],
function(x) predict(loess(data$upper[data$category==x] ~
data$num[data$category==x]))))
data$lowerLoess = unlist(lapply(LETTERS[1:4],
function(x) predict(loess(data$lower[data$category==x] ~
data$num[data$category==x]))))
ggplot(data, aes(num, value, colour=category, fill=category)) +
scale_colour_manual(values = c("A"="black", "B"="red", "C"="magenta", "D"="green")) +
geom_smooth(method="loess", se=FALSE) +
geom_ribbon(aes(x=num, ymax=upperLoess, ymin=lowerLoess, fill=category),
alpha=0.2)
Wrong color ribbons:
Obviously, the colours defined for the categorical variable "category" are not used. Instead, the default palette (scale_colour_hue?) is used. I can place the fill argument outside the aes:
ggplot(data, aes(num, value, colour=category, fill=category)) +
scale_colour_manual(values = c("A"="black", "B"="red", "C"="magenta", "D"="green")) +
geom_smooth(method="loess", se=FALSE) +
geom_ribbon(aes(x=num, ymax=upperLoess, ymin=lowerLoess), fill="red",
alpha=0.2)
which results in red ribbons
Any ideas? Thanks alot!
Try adding scale_fill_manual using the same colors defined in you scale_color_manual argument.
ggplot(data, aes(num, value, colour=category, fill=category)) +
scale_colour_manual(values = c("A"="black", "B"="red", "C"="magenta", "D"="green")) +
geom_smooth(method="loess", se=FALSE) +
geom_ribbon(aes(x=num, ymax=upperLoess, ymin=lowerLoess, fill=category),
alpha=0.2) +
scale_fill_manual(values = c("A"="black", "B"="red", "C"="magenta", "D"="green"))
matching colors
Assuming I have two data.frames with different data but in the same range of x-values
a <-data.frame(x=c(1,1,1,2,2,2,3,3,3),
y=c(0.3,0.4,0.3,0.2,0.5,0.3,0.4,0.4,0.2),
z=c("do","re","mi","do","re","mi","do","re","mi"))
b <- data.frame(x=c(1,2,3),y=c(10,15,8))
Both, a and b have the same range of X values (1,2,3) but while a is a data.frame with 9 rows, b is a data.frame with 3 rows.
I use geom_bar in order to plot the distribution of values of a, like this:
ggplot(a, aes(x=x, y=y, fill=z)) +
geom_bar(position="stack",stat="identity") +
ylab("") +
xlab("x")
And I use geom_line to plot b data, like this:
ggplot(b, aes(x=x, y=y)) +
geom_line(stat="identity") +
ylab("") + xlab("x") + ylim(0,15)
Now I would like to overlay this geom_line plot to the previous geom_bar plot. My first try was to do the following:
ggplot(a, aes(x=x, y=y, fill=z)) +
geom_bar(position="stack",stat="identity") +
ylab("") + xlab("x") +
ggplot(b, aes(x=x, y=y)) +
geom_line(stat="identity") +
ylab("") + xlab("x") + ylim(0,15)
With no success.
How can I overlay a geom_line plot to a geom_bar plot?
Try this
p <- ggplot()
p <- p + geom_bar(data = a, aes(x=x, y=y, fill=z), position="stack",stat="identity")
p <- p + geom_line(data = b, aes(x=x, y=y/max(y)), stat="identity")
p
Update:
You can rescale the one y to make them the same. As I don't know the relations between the two ys, I rescaled them by using y/max(y). Does this solve you problem?
Try merging the datasets first, then plotting, like this:
require(ggplot2)
df <- merge(a,b,by="x")
ggplot(df, aes(x=x, y=y.x, fill=z)) +
geom_bar(position="stack",stat="identity") +
geom_line(aes(x=x, y=y.y)) +
ylab("") + xlab("x")
Output:
I edited the sample data to better illustrate the effects, because the y-axis scaling of the original data would not have matched well:
a <-data.frame(x=c(1,1,1,2,2,2,3,3,3),
y=c(0.3,0.4,0.3,0.2,0.5,0.3,0.4,0.4,0.2),
z=c("do","re","mi","do","re","mi","do","re","mi"))
b <- data.frame(x=c(1,2,3),y=c(.4,1,.4))
How to plot based on the combination of two column levels(here: treatment, replicate)?
set.seed(0)
x <- rep(1:10, 4)
y <- sample(c(rep(1:10, 2)+rnorm(20)/5, rep(6:15, 2) + rnorm(20)/5))
treatment <- sample(gl(8, 5, 40, labels=letters[1:8]))
replicate <- sample(gl(8, 5, 40))
d <- data.frame(x=x, y=y, treatment=treatment, replicate=replicate)
plots: color based on single column levels
ggplot(d, aes(x=x, y=y, colour=treatment)) + geom_point()
ggplot(d, aes(x=x, y=y, colour=replicate)) + geom_point()
The combination of two column levels would be a-1, a-2, a-3, ... h-6, h-7, h-8.
64 colours will be uninterpretable. How about point labels instead:
ggplot(d, aes(x=x, y=y, colour=treatment)) +
geom_text(aes(label=paste0(treatment, replicate)), size=3, show.legend=FALSE) +
theme_classic()
Or, if you're trying to spot differences in patterns for different treatments, maybe faceting would help:
ggplot(d, aes(x=x, y=y, colour=treatment)) +
geom_text(aes(label=paste0(treatment, replicate)), size=3, show.legend=FALSE) +
facet_wrap(~ treatment, ncol=4) +
scale_x_continuous(expand=c(0,0.7)) +
theme_bw() + theme(panel.grid=element_blank())
But, if you really want a whole bunch of colours...
ggplot(d, aes(x=x, y=y, colour=interaction(treatment,replicate,sep="-",lex.order=TRUE))) +
geom_point() +
labs(colour="Treatment-Replicate") +
theme_classic()
(If you want all potential treatment-replicate combinations to be listed in the legend, regardless of whether they're present in the data, then add + scale_colour_discrete(drop=FALSE) to the plot code.)
Here is an example:
library(ggplot2)
set.seed(112)
df<-data.frame(g=sample(c("A", "B"), 100, T),
x=rnorm(100),
y=rnorm(100,2,3),
f=sample(c("i","ii"), 100, T))
ggplot(df, aes(x=x,y=y, colour=factor(g)))+
geom_point()+geom_smooth(method="lm", fill="NA")+facet_wrap(~f)
My question is how to add text like the second plot by group into the plot.
You can manually create another data.frame for your text and add the layer on the original plot.
df_text <- data.frame(g=rep(c("A", "B")), x=-2, y=c(9, 8, 9, 8),
f=rep(c("i", "ii"), each=2),
text=c("R=0.2", "R=-0.3", "R=-0.05", "R=0.2"))
ggplot(df, aes(x=x,y=y, colour=factor(g))) +
geom_point() + geom_smooth(method="lm", fill="NA") +
geom_text(data=df_text, aes(x=x, y=y, color=factor(g), label=text),
fontface="bold", hjust=0, size=5, show.legend=FALSE) +
facet_wrap(~f)
Another option is to calculate the correlations on the fly and use the underlying numeric values of the factor variable g to place the text so that the red and blue labels don't overlap. This reduces the amount of code needed and makes label placement a bit easier.
library(dplyr)
ggplot(df, aes(x=x, y=y, colour=g)) +
geom_point() +
geom_smooth(method="lm", fill=NA) + # Guessing you meant fill=NA here
#geom_smooth(method="lm", se=FALSE) # Better way to remove confidence bands
facet_wrap(~f) +
geom_text(data=df %>% group_by(g, f) %>% summarise(corr = cor(x,y)),
aes(label=paste0("R = ", round(corr,2)), y = 10 - as.numeric(g)),
x=-2, hjust=0, fontface="bold", size=5, show.legend=FALSE)