I'm trying to create a boxplot using ggplot2 with :
X as a continuous variable
Colors for different groups
Here is an example :
x <- sample(c(1,2,5),300,replace = TRUE)
y <- sapply(x,function(mu) rnorm(1,mean = mu))
color <- sample(c("color 1","color 2"),300,replace = TRUE)
data <- data.frame(x, y, color)
I can either have colors and x as a factor :
ggplot(data = data) + geom_boxplot(aes(x = factor(x),y = y,col = color))
or x as a continuous variable and no colors :
ggplot(data = data) + geom_boxplot(aes(x = x,y = y,group = x))
But not both.
Does somebody know how to do this ?
Thanks
I think you need one more column for group, which is the combination of color and x. For example, how about simply paste()ing them?
set.seed(1)
x <- sample(c(1,2,5),300,replace = TRUE)
y <- sapply(x,function(mu) rnorm(1,mean = mu))
color <- sample(c("color 1","color 2"),300,replace = TRUE)
data <- data.frame(x, y, color)
library(ggplot2)
ggplot(data = data) +
geom_boxplot(aes(x = x, y = y, col = color, group = paste(color, x)))
You can use scales to change the x-axis scale.
library(ggplot2)
library(scales)
x <- sample(c(1,2,5),300,replace = TRUE)
y <- sapply(x,function(mu) rnorm(1,mean = mu))
color <- sample(c("color 1","color 2"),300,replace = TRUE)
data <- data.frame(x, y, color)
ggplot(data = data) + geom_boxplot(aes(x = factor(x),y = y,col = color)) + scale_x_discrete(limit = c('1','2','3','4','5'))
Hack for dynamic limits:
min = min(data$x)
max = max(data$x)
limits <- as.character(seq(min:max))
ggplot(data = data) + geom_boxplot(aes(x = factor(x),y = y,col = color)) + scale_x_discrete(limit = limits)
You could misuse the fill aesthetic:
ggplot(data = data) +
geom_boxplot(aes(x = x, y = y, col = color, fill = factor(x))) +
scale_fill_manual(values = rep(NA, 3), guide = "none")
Related
I have data and a plot like this,
x = c(1,2,3,4,5,6,7,8,9,10,11,12)
y1 = x^2-5
y2 = -x^2+1
data <- data.frame(x,y1,y2)
data1 = data.frame(pivot_longer(data,2:3))
ggplot(data1, aes(x, y = value, color = name))+
geom_point()+
geom_smooth(method = 'lm',se = FALSE)
Is there a way to have the trendline only applying to values for x greater than a certain number, like 3?
You can do this:
ggplot(data1, aes(x, y = value, color = name))+
geom_point()+
geom_smooth(data=dplyr::filter(data1,x>3), method = 'lm',se = FALSE)
You can apply the current aes to geom_point only, and create a new column (i.e. x2 in my code) for mapping to geom_smooth.
library(tidyverse)
x = c(1,2,3,4,5,6,7,8,9,10,11,12)
y1 = x^2-5
y2 = -x^2+1
data <- data.frame(x,y1,y2)
data1 = data.frame(pivot_longer(data,2:3))
data1 %>% mutate(x2 = ifelse(x > 3, x, NA)) %>%
ggplot()+
geom_point(aes(x, y = value, color = name)) +
geom_smooth(aes(x2, y = value, color = name), method = 'lm',se = FALSE)
Created on 2022-05-07 by the reprex package (v2.0.1)
Similar to both above just using subset:
ggplot(data1, aes(x, y = value, color = name))+
geom_point()+
geom_smooth(data=subset(data1, x > 3), method = 'lm',se = FALSE)
I am trying to create a line plot with 2 types of measurements, but my data is missing some x values. In Line break when no data in ggplot2 I have found how to create plot that will make a break when there is now data, but id does not allow to plot 2 lines (one for each Type).
1) When I try
ggplot(Data, aes(x = x, y = y, group = grp)) + geom_line()
it makes only one line, but with break when there is no data
2) When I try
ggplot(Data, aes(x = x, y = y, col = Type)) +
geom_line()
it makes 2 lines, but with break when there is no data
3) When I try
ggplot(Data, aes(x = x, y = y, col = Type, group = grp)) +
geom_line()
it makes unreadyble chart
4) of course I could combine the Type and grp to make new variable, but then the legend is not nice, and I get 4 groups (and colours) insted of 2.
5) also I could make something like that, but it dose not produce a legend, and in my real dataset i have way to many Types to do that
ggplot() +
geom_line(data = Data[Data$Type == "A",], aes(x = x, y = y, group = grp), col = "red") +
geom_line(data = Data[Data$Type == "B",], aes(x = x, y = y, group = grp), col = "blue")
Data sample:
Data <- data.frame(x = c(1:100, 201:300), y = rep(c(1, 2), 100), Type = rep(c("A", "B"), 100), grp = rep(c(1, 2), each = 100))
One way is to use interaction() to specify a grouping of multiple columns:
library(ggplot2)
Data <- data.frame(x = c(1:100, 201:300), y = rep(c(1, 2), 100), Type = rep(c("A", "B"), 100), grp = rep(c(1, 2), each = 100))
ggplot(Data, aes(x = x, y = y, col = Type, group = interaction(grp,Type))) +
geom_line()
I have a ggplot graph defined like this:
x <- seq(0, 10, by = 0.1)
y1 <- cos(x)
y2 <- sin(x)
df1 <- data.frame(x = x, y = y1, type = "sin", id = 1)
df2 <- data.frame(x = x, y = y2, type = "cos", id = 2)
df3 <- data.frame(x = 2, y = 0.5, type = "constant", id = 3)
df4 <- data.frame(x = 4, y = 0.2, type = "constant", id = 4)
combined <- rbind(df1, df2, df3, df4)
ggplot(combined, aes(x, y, colour = interaction(type, id))) + geom_line() +
geom_point(data = subset(combined, type == "constant"))
This works very well as illustrated below:
Now I would like to extract the interaction in a variable to reuse it later (e.g. customize the legend style or labels).
I did that in a very naïve way:
my.interaction <- interaction(combined$type, combined$id)
ggplot(combined, aes(x, y, colour = my.interaction)) + geom_line() +
geom_point(data = subset(combined, type == "constant"))
But then I have an error:
Error: Aesthetics must be either length 1 or the same as the data (2):
x, y, colour
Edit:
Here is the kind of manipulation I could do: edit the linetype of the legend
displayed <- levels(factor(my.interaction))
line.style <- rep(1, length.out = length(displayed))
line.style[grep("constant", displayed)] <- 0
That works:
ggplot(combined, aes(x, y, colour = interaction(type, id))) + geom_line() +
geom_point(data = subset(combined, type == "constant")) +
guides(colour=guide_legend(override.aes=list(linetype = line.style)))
That does not:
ggplot(combined, aes(x, y, colour = my.interation) + geom_line() +
geom_point(data = subset(combined, type == "constant")) +
guides(colour=guide_legend(override.aes=list(linetype = line.style)))
In the end, I could also edit the shapes or the legend labels (e.g. "Id: 1 / Type: sin" or any other advanced transformation of the labels based on the interaction values).
This'll work. What's wrong with adding a column to your data frame?
combined %>% mutate(my.interaction = paste(type, id, sep='.')) %>%
ggplot(aes(x, y, colour = my.interaction)) + geom_line() +
geom_point(data = subset(combined, type == "constant"))
I created a plot with several geom_area according to the following code :
library(ggplot2)
set.seed(1)
dat <- data.frame(matrix(rnorm(100, 10, 2), 100, 1))
dat_density <- data.frame(density(dat[, 1])[c("x", "y")])
quant <- quantile(dat[, 1], probs = seq(0, 1, 0.10))
library(RColorBrewer)
color_pal <- brewer.pal(length(quant)-1, "RdYlBu")
dens <- ggplot(data = dat_density, aes(x = x, y = y)) +
geom_line(size = 2)
for(i in 1:(length(color_pal))){
dens <- dens +
geom_area(data = subset(dat_density, x > quant[[i]] & x < quant[[i + 1]]), fill = color_pal[i])
}
dens
How can I add a common legend with each color of the color_pal vector (corresponding to all the 10% area of data) ?
The easiest way is to define the groups in your dataset
dat_density$quant <- cut(dat_density$x, breaks = c(-Inf, quant, Inf))
ggplot(data = dat_density, aes(x = x, y = y, fill = quant)) +
geom_line(size = 2) +
geom_area() +
scale_fill_brewer(palette = "RdYlBu")
Here is the code for the plot
library(ggplot2)
df <- data.frame(gp = factor(rep(letters[1:3], each = 10)), y = rnorm(30))
library(plyr)
ds <- ddply(df, .(gp), summarise, mean = mean(y), sd = sd(y))
ggplot(df, aes(x = gp, y = y)) +
geom_point() +
geom_point(data = ds, aes(y = mean), colour = 'red', size = 3)
I want to have a legend for this plot that will identify the data values and mean values some thing like this
Black point = Data
Red point = Mean.
How can I achieve this?
Use a manual scale, i.e. in your case scale_colour_manual. Then map the colours to values in the scale using the aes() function of each geom:
ggplot(df, aes(x = gp, y = y)) +
geom_point(aes(colour="data")) +
geom_point(data = ds, aes(y = mean, colour = "mean"), size = 3) +
scale_colour_manual("Legend", values=c("mean"="red", "data"="black"))
You can combine the mean variable and data in the same data.frame and colour /size by column which is a factor, either data or mean
library(reshape2)
# in long format
dsl <- melt(ds, value.name = 'y')
# add variable column to df data.frame
df[['variable']] <- 'data'
# combine
all_data <- rbind(df,dsl)
# drop sd rows
data_w_mean <- subset(all_data,variable != 'sd',drop = T)
# create vectors for use with scale_..._manual
colour_scales <- setNames(c('black','red'),c('data','mean'))
size_scales <- setNames(c(1,3),c('data','mean') )
ggplot(data_w_mean, aes(x = gp, y = y)) +
geom_point(aes(colour = variable, size = variable)) +
scale_colour_manual(name = 'Type', values = colour_scales) +
scale_size_manual(name = 'Type', values = size_scales)
Or you could not combine, but include the column in both data sets
dsl_mean <- subset(dsl,variable != 'sd',drop = T)
ggplot(df, aes(x = gp, y = y, colour = variable, size = variable)) +
geom_point() +
geom_point(data = dsl_mean) +
scale_colour_manual(name = 'Type', values = colour_scales) +
scale_size_manual(name = 'Type', values = size_scales)
Which gives the same results