I have the following data:
groups = c(rep(1,5),rep(2,5),rep(3,5))
scores = c(seq(1,5),seq(1,5),seq(1,5))
times1 = rnorm(15, mean = 3 , sd = 2)
times2 = rnorm(15, mean = 1 , sd = 0.5)
df = data.frame(groups,scores, times1,times2)
and I have the following plot
df = data.frame(groups,scores, times1,times2)
plt = ggplot(df, aes(x = scores, y = times1, color = factor(groups)))
plt = plt + geom_point(cex = 4) + geom_line() + theme_bw()
plt = plt + geom_point(aes(x = scores, y=times2),pch = 23, cex =4)+ geom_line(aes(x = scores, y=times2))
plt = plt + facet_wrap(~ groups, ncol = 4, scales = "free_x")
plt
which results in
How can I add a guide for the diamond points, and how can I change the title of each of the corresponding guides.
If you want a legend for something, it should be specified as an aesthetic. Perhaps something like
ggplot(df, aes(x = scores, color = factor(groups))) +
geom_point(aes(y=times1, shape="times1"), cex = 4) +
geom_line(aes(y=times1)) +
geom_point(aes(y=times2, shape="times2"),cex =4) +
geom_line(aes(y=times2)) +
facet_wrap(~ groups, ncol = 4, scales = "free_x") + theme_bw()
Rather than mannally adding layers, it would be even better if you properly rehaped your data to a format that ggplot perfers
ggplot(reshape2::melt(df, id=c("groups","scores")),
aes(x=scores,y=value, shape=variable, color=factor(groups))) +
geom_point() +
geom_line() +
facet_wrap(~groups)
Related
using the data set airquality I have written the following code:
library("tidyverse")
data(airquality)
airquality <- na.omit(airquality)
airquality$date <- as.Date(paste("1973", airquality$Month, airquality$Day,
sep="-"))
p1 <- ggplot(airquality, aes(x= date, y = Ozone, col=factor(Month))) +
geom_point() +
geom_line()
p1
Now I would like to plot in the same graph the mean of ozone for each months. How can I do this?
You could add the mean as a dashed line. The easiest way to do this might be to simply pass the data you want to a geom_line layer:
ggplot(airquality, aes(x = date, y = Ozone, col = factor(Month))) +
geom_point() +
geom_line(alpha = 0.5) +
geom_line(data = airquality %>%
group_by(Month) %>%
summarise(Ozone = mean(Ozone),
date = c(first(date), last(date)),
Month = mean(Month)),
linetype = 2, size = 1) +
scale_color_brewer(palette = "Set1") +
theme_minimal(base_size = 16)
If you just want points showing the mean, you could simplify things with stat_mean from ggpubr
ggplot(airquality, aes(x = date, y = Ozone, col = factor(Month))) +
geom_point() +
geom_line(alpha = 0.5) +
ggpubr::stat_mean(size = 5, shape = 21,
aes(fill = factor(Month)), color = "black") +
scale_color_brewer(palette = "Set1") +
scale_fill_brewer(palette = "Set1") +
theme_minimal(base_size = 16)
To join these dots up, you could do:
ggplot(airquality, aes(x = date, y = Ozone, col = factor(Month))) +
geom_point() +
geom_line(alpha = 0.5) +
geom_line(data = airquality %>%
group_by(Month) %>%
summarise(Ozone = mean(Ozone), date = mean(date)),
color = "black", linetype = 2) +
ggpubr::stat_mean(size = 5, shape = 21,
aes(fill = factor(Month)), color = "black") +
scale_color_brewer(palette = "Set1") +
scale_fill_brewer(palette = "Set1") +
theme_minimal(base_size = 16)
I am trying to make an overlapping histogram like this:
ggplot(histogram, aes = (x), mapping = aes(x = value)) +
geom_histogram(data = melt(tpm_18_L_SD), breaks = seq(1,10,by = 1),
aes(y = 100*(..count../sum(..count..))), alpha=0.2) +
geom_histogram(data = melt(tpm_18_S_SD), breaks = seq(1,10,by = 1),
aes(y = 100*(..count../sum(..count..))), alpha=0.2) +
geom_histogram(data = melt(tpm_18_N_SD), breaks = seq(1,10,by = 1),
aes(y = 100*(..count../sum(..count..))), alpha=0.2) +
facet_wrap(~variable, scales = 'free_x') +
ylim(0, 20) +
ylab("Percentage of Genes") +
xlab("Standard Deviation")
My code can only make them plot side by side and I would like to also make them overlap. Thank you! I based mine off of the original post where this came from but it did not work for me. It was originally 3 separate graphs which I combined with grid and ggarrange. It looks like this right now.
Here is the code of the three separate graphs.
SD_18_L <- ggplot(data = melt(tpm_18_L_SD), mapping = aes(x = value)) +
geom_histogram(aes(y = 100*(..count../sum(..count..))), breaks = seq(1, 10, by = 1)) +
facet_wrap(~variable, scales = 'free_x') +
ylim(0, 20) +
ylab("Percentage of Genes") +
xlab("Standard Deviation")
SD_18_S <- ggplot(data = melt(tpm_18_S_SD), mapping = aes(x = value)) +
geom_histogram(aes(y = 100*(..count../sum(..count..))), breaks = seq(1, 10, by = 1)) +
facet_wrap(~variable, scales = 'free_x') +
ylim(0, 20) +
ylab("Percentage of Genes") +
xlab("Standard Deviation")
SD_18_N <- ggplot(data = melt(tpm_18_N_SD), mapping = aes(x = value)) +
geom_histogram(aes(y = 100*(..count../sum(..count..))), breaks = seq(1, 10, by = 1)) +
facet_wrap(~variable, scales = 'free_x') +
ylim(0, 20) +
ylab("Percentage of Genes") +
xlab("Standard Deviation")
What my graphs look like now:
ggplot expects dataframes in a long format. I'm not sure what your data looks like, but you shouldn't have to call geom_histogram for each category. Instead, get all your data into a single dataframe (you can use rbind for this) in long format (what you're doing already with melt) first, then feed it into ggplot and map fill to whatever your categorical variable is.
Your call to facet_wrap is what puts them in 3 different plots. If you want them all on the same plot, take that line out.
An example using the iris data:
ggplot(iris, aes(x = Sepal.Length, fill = Species)) +
geom_histogram(alpha = 0.6, position = "identity")
I decreased alpha in geom_histogram so you can see where colors overlap, and added position = "identity" so observations aren't being stacked. Hope that helps!
I am using facet_grid() to plot multiple plot divided per groups of data. For each plot, I want to add in the corner the highest value of the Y axis. I've tried several hacks but it never gives me the expected results. This answer partially helps me but the value I want to add will constantly be changing, therefore I don't see how I can apply it.
Here is a minimal example, I'd like to add the red numbers on the graph below:
library(ggplot2)
data <- data.frame('group'=rep(c('A','B'),each=4),'hour'=rep(c(1,2,3,4),2),'value'=c(5,4,2,3,6,7,4,5))
ggplot(data,aes(x = hour, y = value)) +
geom_line() +
geom_point() +
theme(aspect.ratio=1) +
scale_x_continuous(name ="hours", limits=c(1,4)) +
scale_y_continuous(limits=c(1,10),breaks = seq(1, 10, by = 2))+
facet_grid( ~ group)
Thanks for your help!
library(dplyr)
data2 <- data %>% group_by(group) %>% summarise(Max = max(value))
ggplot(data,aes(x = hour, y = value)) +
geom_line() +
geom_point() +
geom_text(aes(label = Max), x = Inf, y = Inf, data2,
hjust = 2, vjust = 2, col = 'red') +
theme(aspect.ratio=1) +
scale_x_continuous(name ="hours", limits=c(1,4)) +
scale_y_continuous(limits=c(1,10),breaks = seq(1, 10, by = 2))+
facet_grid( ~ group)
This does the trick. If you always have fixed ranges you can position the text manually.
library(ggplot2)
data <- data.frame('group'=rep(c('A','B'),each=4),'hour'=rep(c(1,2,3,4),2),'value'=c(5,4,2,3,6,7,4,5))
ggplot(data,aes(x = hour, y = value)) +
geom_line() +
geom_point() +
geom_text(
aes(x, y, label=lab),
data = data.frame(
x=Inf,
y=Inf,
lab=tapply(data$value, data$group, max),
group=unique(data$group)
),
vjust="inward",
hjust = "inward"
) +
theme(aspect.ratio=1) +
scale_x_continuous(name ="hours", limits=c(1,4)) +
scale_y_continuous(limits=c(1,10),breaks = seq(1, 10, by = 2))+
facet_grid( ~ group)
I would like to colour the dashed lines, which are the average values of the two respective categories, with the same colour of the default palette used by ggplot to fill the distributions:
Click here to view the distribution
This is the code used:
library(ggplot2)
print(ggplot(dati, aes(x=ECU_fuel_consumption_L_100Km_CF, fill=Model))
+ ggtitle("Fuel Consumption density histogram, by Model")
+ ylab("Density")
+ geom_density(alpha=.3)
+ scale_x_continuous(breaks=pretty(dati$ECU_fuel_consumption_L_100Km_CF, n=10))
+ geom_vline(aes(xintercept = mean(ECU_fuel_consumption_L_100Km_CF[dati$Model == "500X"])), linetype="dashed", size=1)
+ geom_vline(aes(xintercept = mean(ECU_fuel_consumption_L_100Km_CF[dati$Model == "Renegade"])), linetype="dashed", size=1)
)
Thank you all in advance!
No reproducible example, but you probably want to do something like this:
library(dplyr)
# make up some data
d <- data.frame(x = c(mtcars$mpg, mtcars$hp),
var = rep(c('mpg', 'hp'), each = nrow(mtcars)))
means <- d %>% group_by(var) %>% summarize(m = mean(x))
ggplot(d, aes(x, fill = var)) +
geom_density(alpha = 0.3) +
geom_vline(data = means, aes(xintercept = m, col = var),
linetype = "dashed", size = 1)
This approach is extendable to any number of groups.
An option that doesn't require pre-calculation, but is also a bit more hacky, is:
ggplot(d, aes(x, fill = var)) +
geom_density(alpha = 0.3) +
geom_vline(aes(col = 'hp', xintercept = x), linetype = "dashed", size = 1,
data = data.frame(x = mean(d$x[d$var == 'hp']))) +
geom_vline(aes(col = 'mpg', xintercept = x), linetype = "dashed", size = 1,
data = data.frame(x = mean(d$x[d$var == 'mpg'])))
I am working on the Boston data set and trying to see where the 8 room data are on each graph. I have commented out the line which is giving me the error.
I have to add vertical lines at all the points corresponding to rm = 8, to see the spread of data, in every graph of the grid. I want to know:
1. what I have done wrong.
2. A better way to find/represent data points where rm = 8.
library(ggplot2)
library(reshape2)
library(MASS)
library(data.table)
data("Boston")
Boston <- as.data.table(Boston)
molten_boston <- Boston[, `:=`(rm = round(rm),
nox = nox * 100,
chas = chas * 10)]
molten_boston <- melt(data = molten_boston, id.vars = "rm")
comments_bar <- ggplot(molten_boston) +
geom_bar(binwidth = 1, aes(x = value), color = "black", fill = "salmon") +
# geom_vline(data = molten_boston[rm == 8, .SD, by = variable, .SDcols = "value"], aes(xintercept = value)) +
facet_wrap(~ variable, scales = "free")
print(comments_bar)
One other visualization would be stacked bars, it looks ok when large:
molten_boston$EightRooms <- as.factor(molten_boston$rm == 8)
molten_boston$EightRooms <- relevel(molten_boston$EightRooms, 2)
ggplot(molten_boston, aes(x = value, fill = EightRooms)) +
geom_bar(binwidth = 1, color = "black") +
facet_wrap(~ variable, scales = "free")
Using a density plot in the background would be nice, but is a bit tricky in this case because of the changing y-axis. You probably have to do some pre-calculation. Here's my best attempt:
ggplot(molten_boston, aes(x = value)) +
geom_density(data = subset(molten_boston, rm == 8), aes(y =..density.. * 300),
fill = 'blue', alpha = 0.5) +
geom_bar(binwidth = 1, color = "black", fill = "salmon", alpha = 0.5) +
facet_wrap(~ variable, scales = "free")
Another way to do this would be using a rug-plot, with the rugs at the top. This is more or less the same as using geom_vline(...) but the lines don't extend all the way down, obscuring the bars. Also, I don't see why you want to use binwidth=1.
ggplot(molten_boston) +
geom_bar(aes(x = value), color = "grey50", fill = "salmon") +
geom_rug(data=molten_boston[rm==8,value, by=variable],
aes(x=value), sides="t", color="blue") +
facet_wrap(~ variable, scales = "free")
I don't have package data.table, so I can't tell if the problem lies in the data.table part of the code or not. But you need a single value for each room size, so
Boston$rm = round(Boston$rm)
molten_boston <- melt(data =Boston, id.vars = "rm")
rm.means = aggregate.data.frame(molten_boston$value,by=molten_boston[,1:2],FUN=mean)
comments_bar <- ggplot(molten_boston) +
geom_bar(binwidth = 1, aes(x = value), color = "black", fill = "salmon") +
geom_vline(data = rm.means[rm.means$rm==8,], aes(xintercept = x)) +
facet_wrap(~ variable, scales = "free")
print(comments_bar)
seems to work.