Legend for summary statistics in ggplot2 - r

Here is the code for the plot
library(ggplot2)
df <- data.frame(gp = factor(rep(letters[1:3], each = 10)), y = rnorm(30))
library(plyr)
ds <- ddply(df, .(gp), summarise, mean = mean(y), sd = sd(y))
ggplot(df, aes(x = gp, y = y)) +
geom_point() +
geom_point(data = ds, aes(y = mean), colour = 'red', size = 3)
I want to have a legend for this plot that will identify the data values and mean values some thing like this
Black point = Data
Red point = Mean.
How can I achieve this?

Use a manual scale, i.e. in your case scale_colour_manual. Then map the colours to values in the scale using the aes() function of each geom:
ggplot(df, aes(x = gp, y = y)) +
geom_point(aes(colour="data")) +
geom_point(data = ds, aes(y = mean, colour = "mean"), size = 3) +
scale_colour_manual("Legend", values=c("mean"="red", "data"="black"))

You can combine the mean variable and data in the same data.frame and colour /size by column which is a factor, either data or mean
library(reshape2)
# in long format
dsl <- melt(ds, value.name = 'y')
# add variable column to df data.frame
df[['variable']] <- 'data'
# combine
all_data <- rbind(df,dsl)
# drop sd rows
data_w_mean <- subset(all_data,variable != 'sd',drop = T)
# create vectors for use with scale_..._manual
colour_scales <- setNames(c('black','red'),c('data','mean'))
size_scales <- setNames(c(1,3),c('data','mean') )
ggplot(data_w_mean, aes(x = gp, y = y)) +
geom_point(aes(colour = variable, size = variable)) +
scale_colour_manual(name = 'Type', values = colour_scales) +
scale_size_manual(name = 'Type', values = size_scales)
Or you could not combine, but include the column in both data sets
dsl_mean <- subset(dsl,variable != 'sd',drop = T)
ggplot(df, aes(x = gp, y = y, colour = variable, size = variable)) +
geom_point() +
geom_point(data = dsl_mean) +
scale_colour_manual(name = 'Type', values = colour_scales) +
scale_size_manual(name = 'Type', values = size_scales)
Which gives the same results

Related

How to correctly specify a column as the fill colour in geom_ribbon?

I can't seem to be able to set different fill colours for geom_ribbon(), using one of the columns as input to fill
library(ggplot2)
time <- as.factor(c('A','B','C','D'))
grouping <- as.factor(c('GROUP1','GROUP1','GROUP1','GROUP1',
'GROUP2','GROUP2','GROUP2','GROUP2'))
x <- c(1.00,1.03,1.03,1.06,0.5,0.43,0.2,0.1)
x.upper <- x+0.05
x.lower <- x-0.05
df <- data.frame(time, x, x.upper, x.lower,grouping)
ggplot(data = df,aes(as.numeric(time),x,group=grouping,color=grouping)) +
geom_ribbon(data = df, aes(x=as.numeric(time), ymax=x.upper, ymin=x.lower),
fill=grouping, alpha=.5) +
geom_point() + labs(title="My ribbon plot",x="Time",y="Value") +
scale_x_continuous(breaks = 1:4, labels = levels(df$time))
I get the error Error: Unknown colour name: grouping but fill=c("pink","blue") works fine. I don't want to specify the colours manually.
All other examples I can find simply list the column in the fill argument so I'm not sure what I'm doing incorrectly.
Move fill = grouping inside aes so that this column is mapped to the fill variable.
ggplot(data = df, aes(as.numeric(time), x, color = grouping)) +
geom_ribbon(data = df, aes(ymax = x.upper, ymin = x.lower,
fill = grouping), alpha = 0.5) +
geom_point() +
labs(title = "My ribbon plot", x = "Time", y = "Value") +
scale_x_continuous(breaks = 1:4, labels = levels(df$time))

R: How to combine grouping and colour aesteric in ggplot line plot

I am trying to create a line plot with 2 types of measurements, but my data is missing some x values. In Line break when no data in ggplot2 I have found how to create plot that will make a break when there is now data, but id does not allow to plot 2 lines (one for each Type).
1) When I try
ggplot(Data, aes(x = x, y = y, group = grp)) + geom_line()
it makes only one line, but with break when there is no data
2) When I try
ggplot(Data, aes(x = x, y = y, col = Type)) +
geom_line()
it makes 2 lines, but with break when there is no data
3) When I try
ggplot(Data, aes(x = x, y = y, col = Type, group = grp)) +
geom_line()
it makes unreadyble chart
4) of course I could combine the Type and grp to make new variable, but then the legend is not nice, and I get 4 groups (and colours) insted of 2.
5) also I could make something like that, but it dose not produce a legend, and in my real dataset i have way to many Types to do that
ggplot() +
geom_line(data = Data[Data$Type == "A",], aes(x = x, y = y, group = grp), col = "red") +
geom_line(data = Data[Data$Type == "B",], aes(x = x, y = y, group = grp), col = "blue")
Data sample:
Data <- data.frame(x = c(1:100, 201:300), y = rep(c(1, 2), 100), Type = rep(c("A", "B"), 100), grp = rep(c(1, 2), each = 100))
One way is to use interaction() to specify a grouping of multiple columns:
library(ggplot2)
Data <- data.frame(x = c(1:100, 201:300), y = rep(c(1, 2), 100), Type = rep(c("A", "B"), 100), grp = rep(c(1, 2), each = 100))
ggplot(Data, aes(x = x, y = y, col = Type, group = interaction(grp,Type))) +
geom_line()

R - geom_bar - 'stack' position without summing the values

I have this data frame
df <- data.frame(profile = rep(c(1,2), times = 1, each = 3), depth = c(100, 200, 300), value = 1:3)
This is my plot
ggplot() +
geom_bar(data = df, aes(x = profile, y = - depth, fill = value), stat = "identity")
My problem is the y labels which doesn't correspond to the depth values of the data frame
To help, my desired plot seems like this :
ggplot() +
geom_point(data = df, aes(x = profile, y = depth, colour = value), size = 20) +
xlim(c(0,3))
But with bar intead of points vertically aligned
nb : I don't want to correct it manually in changing ticks with scale_y_discrete(labels = (desired_labels))
Thanks for help
Considering you want a y-axis from 0 to -300, using facet_grid() seems to be a right option without summarising the data together.
ggplot() + geom_bar(data = df, aes(x = as.factor(profile), y = -depth, fill = value), stat = 'identity') + facet_grid(~ value)
I have it !
Thanks for your replies and to this post R, subtract value from previous row, group by
To resume; the data :
df <- data.frame(profile = rep(c(1,2), times = 1, each = 3), depth = c(100, 200, 300), value = 1:3)
Then we compute the depth step of each profile :
df$diff <- ave(df$depth, df$profile, FUN=function(z) c(z[1], diff(z)))
And finally the plot :
ggplot(df, aes(x = factor(profile), y = -diff, fill = value)) + geom_col()

R ggplot2 : continuous x + colors

I'm trying to create a boxplot using ggplot2 with :
X as a continuous variable
Colors for different groups
Here is an example :
x <- sample(c(1,2,5),300,replace = TRUE)
y <- sapply(x,function(mu) rnorm(1,mean = mu))
color <- sample(c("color 1","color 2"),300,replace = TRUE)
data <- data.frame(x, y, color)
I can either have colors and x as a factor :
ggplot(data = data) + geom_boxplot(aes(x = factor(x),y = y,col = color))
or x as a continuous variable and no colors :
ggplot(data = data) + geom_boxplot(aes(x = x,y = y,group = x))
But not both.
Does somebody know how to do this ?
Thanks
I think you need one more column for group, which is the combination of color and x. For example, how about simply paste()ing them?
set.seed(1)
x <- sample(c(1,2,5),300,replace = TRUE)
y <- sapply(x,function(mu) rnorm(1,mean = mu))
color <- sample(c("color 1","color 2"),300,replace = TRUE)
data <- data.frame(x, y, color)
library(ggplot2)
ggplot(data = data) +
geom_boxplot(aes(x = x, y = y, col = color, group = paste(color, x)))
You can use scales to change the x-axis scale.
library(ggplot2)
library(scales)
x <- sample(c(1,2,5),300,replace = TRUE)
y <- sapply(x,function(mu) rnorm(1,mean = mu))
color <- sample(c("color 1","color 2"),300,replace = TRUE)
data <- data.frame(x, y, color)
ggplot(data = data) + geom_boxplot(aes(x = factor(x),y = y,col = color)) + scale_x_discrete(limit = c('1','2','3','4','5'))
Hack for dynamic limits:
min = min(data$x)
max = max(data$x)
limits <- as.character(seq(min:max))
ggplot(data = data) + geom_boxplot(aes(x = factor(x),y = y,col = color)) + scale_x_discrete(limit = limits)
You could misuse the fill aesthetic:
ggplot(data = data) +
geom_boxplot(aes(x = x, y = y, col = color, fill = factor(x))) +
scale_fill_manual(values = rep(NA, 3), guide = "none")

ggplot2: Legend for NA in scale_fill_brewer

I wonder how I can get legend category for NA values in scale_fill_brewer. Here is my MWE.
set.seed(12345)
dat <-
data.frame(
Row = rep(x = LETTERS[1:5], times = 10)
, Col = rep(x = LETTERS[1:10], each = 5)
, Y = c(rnorm(n = 48, mean = 500, sd = 1), NA, NA)
)
dat$Y1 <- addNA(cut(log(dat$Y), 5))
levels(dat$Y1)
[1] "(6.21,6.212]" "(6.212,6.214]" "(6.214,6.216]" "(6.216,6.218]" "(6.218,6.22]" NA
library(ggplot2)
ggplot(data = dat, aes(x = Row, y = Col)) +
geom_tile(aes(fill = Y1), colour = "white") +
scale_fill_brewer(palette = "PRGn")
You could explicitly treat the missing values as another level of your Y1 factor to get it on your legend.
After cutting the variable as before, you will want to add NA to the levels of the factor. Here I add it as the last level.
dat$Y1 <- cut(log(dat$Y), 5)
levels(dat$Y1) <- c(levels(dat$Y1), "NA")
Then change all the missing values to the character string NA.
dat$Y1[is.na(dat$Y1)] <- "NA"
This makes NA part of the legend in your plot:
I've found a workaround without changing the original data frame, adding an extra legend based on this post:
ggplot(data = dat, aes(x = Row, y = Col)) +
geom_tile(aes(fill = Y1), colour = "white") +
scale_fill_brewer(palette = "PRGn")+
geom_point(data = dat, aes(size="NA"), shape =NA, colour = "grey95")+
guides(size=guide_legend("NA", override.aes=list(shape=15, size = 10)))
Colouring the NAs:
ggplot(data = dat, aes(x = Row, y = Col)) +
geom_tile(aes(fill = Y1), colour = "white") +
scale_fill_brewer(palette = "PRGn", na.value="red")+
geom_point(data = dat, aes(size="NA"), shape =NA, colour = "red")+
guides(size=guide_legend("NA", override.aes=list(shape=15, size = 10)))

Resources