geom_density NOT displaying over geom_histogram - r

I am trying to draw a density curve over histogram using ggplot but to no avail. dlist is a vector with numeric values.
Here is my code:
ggplot() +
geom_histogram(aes(x=dlist), bins = 30, fill = "#B3E4F7") +
geom_density() +
geom_vline(aes(xintercept = mean(dlist)),
color="#D2091F", linetype="dashed",size=1)

You need to set y to ..density... For example:
ggplot(data.frame(dlist), aes(x=dlist, y = ..density..)) +
geom_histogram(bins = 30, fill = "#B3E4F7") +
geom_density() +
geom_vline(aes(xintercept = mean(dlist)),
color="#D2091F", linetype="dashed",size=1)
A reproducible example:
library(ggplot2)
ggplot(mtcars, aes(x = mpg, y = ..density..)) +
geom_histogram(bins = 30, fill = "#B3E4F7") +
geom_density()

The geom_desntity has no data. Put the data in the ggplot() or in all functions.
ggplot(aes(x=dlist)) +
geom_histogram(bins = 30, fill = "#B3E4F7") +
geom_density() +
geom_vline(aes(xintercept = mean(dlist)),color="#D2091F", linetype="dashed",size=1)
However, if you want to compare both, you may want to plot the histogram with a density stat:
ggplot(aes(x=dlist)) +
geom_histogram(aes(y = ..density..),bins = 30, fill = "#B3E4F7") +
geom_density() +
geom_vline(aes(xintercept = mean(dlist)),color="#D2091F", linetype="dashed",size=1)
If you have a numeric vector dlist, you can create a data.frame before ggplot as follows:
dlist <- rnorm(1000)
tibble(dlist = dlist) %>%
ggplot(aes(x=dlist)) +
geom_histogram(aes(y = ..density..),bins = 30, fill = "#B3E4F7") +
geom_density() +
geom_vline(aes(xintercept = mean(dlist)),
color="#D2091F", linetype="dashed",size=1)

Related

plot data and their means in the same graph using ggplot

using the data set airquality I have written the following code:
library("tidyverse")
data(airquality)
airquality <- na.omit(airquality)
airquality$date <- as.Date(paste("1973", airquality$Month, airquality$Day,
sep="-"))
p1 <- ggplot(airquality, aes(x= date, y = Ozone, col=factor(Month))) +
geom_point() +
geom_line()
p1
Now I would like to plot in the same graph the mean of ozone for each months. How can I do this?
You could add the mean as a dashed line. The easiest way to do this might be to simply pass the data you want to a geom_line layer:
ggplot(airquality, aes(x = date, y = Ozone, col = factor(Month))) +
geom_point() +
geom_line(alpha = 0.5) +
geom_line(data = airquality %>%
group_by(Month) %>%
summarise(Ozone = mean(Ozone),
date = c(first(date), last(date)),
Month = mean(Month)),
linetype = 2, size = 1) +
scale_color_brewer(palette = "Set1") +
theme_minimal(base_size = 16)
If you just want points showing the mean, you could simplify things with stat_mean from ggpubr
ggplot(airquality, aes(x = date, y = Ozone, col = factor(Month))) +
geom_point() +
geom_line(alpha = 0.5) +
ggpubr::stat_mean(size = 5, shape = 21,
aes(fill = factor(Month)), color = "black") +
scale_color_brewer(palette = "Set1") +
scale_fill_brewer(palette = "Set1") +
theme_minimal(base_size = 16)
To join these dots up, you could do:
ggplot(airquality, aes(x = date, y = Ozone, col = factor(Month))) +
geom_point() +
geom_line(alpha = 0.5) +
geom_line(data = airquality %>%
group_by(Month) %>%
summarise(Ozone = mean(Ozone), date = mean(date)),
color = "black", linetype = 2) +
ggpubr::stat_mean(size = 5, shape = 21,
aes(fill = factor(Month)), color = "black") +
scale_color_brewer(palette = "Set1") +
scale_fill_brewer(palette = "Set1") +
theme_minimal(base_size = 16)

Change the color inside the plot

In this plot
library(ggplot2)
df <- data.frame(year = c(2011,2012,2013,2014,2015,2016,2017,2018),
value = c(337,423,551,661,846,1387,2222,3580))
ggplot(df, aes(year, value)) +
geom_point() +
geom_line() +
geom_text(aes(label = value, y = (value - 50)*0.9))
How is it possible to make the color of numbers of value red?
Like this?
library(ggplot2)
df <- data.frame(year = c(2011,2012,2013,2014,2015,2016,2017,2018),
value = c(337,423,551,661,846,1387,2222,3580))
ggplot(df, aes(year, value)) +
geom_point() +
geom_line() +
geom_text(aes(label = value, y = (value - 50)*0.9), color = "red")
Or like this?
library(ggplot2)
df <- data.frame(year = c(2011,2012,2013,2014,2015,2016,2017,2018),
value = c(337,423,551,661,846,1387,2222,3580))
ggplot(df, aes(year, value)) +
geom_point() +
geom_line() +
geom_text(aes(label = value, y = (value - 50)*0.9), color = "red") +
theme(axis.text.y = element_text(colour = "red"))

ggplot2 - how to limit panel and axis?

I want to know how to turn this plot:
Into this plot:
As you can see the panel and axis on the 2nd plot are limited to the data extent. I made the second graph using design software but want to know the code.
Ive already limited the x and y axis using
xlim and ylim but no difference.
Please see my code below, sorry its so messy, first time using r studio. Thanks!
ggplot() +
geom_errorbar(data = U1483_Coiling_B_M_Removed_R, mapping = aes(x = `Age (Ma) Linear Age Model`, ymin = `Lower interval*100`, ymax = `Upper interval*100`), width = 0.025, colour = 'grey') +
geom_line(data = U1483_Coiling_B_M_Removed_R, aes(x = `Age (Ma) Linear Age Model`, y = `Percent Dextral`)) +
geom_point(data = U1483_Coiling_B_M_Removed_R, aes(x = `Age (Ma) Linear Age Model`, y = `Percent Dextral`), colour = 'red') +
geom_point(data = U1483_Coiling_B_M_Removed_R, aes(x = `Age (Ma) Linear Age Model`, y = `Lab?`)) +
theme(axis.text.x=element_text(angle=90, size=10, vjust=0.5)) +
theme(axis.text.y=element_text(angle=90, size=10, vjust=0.5)) +
theme_classic() +
theme(panel.background = element_rect(colour = 'black', size = 1)) +
xlim(0, 2.85) +
ylim(0, 100)
You can use expand when specifying axis scales, like so:
# Load library
library(ggplot2)
# Set RNG
set.seed(0)
# Create dummy data
df <- data.frame(x = seq(0, 3, by = 0.1))
df$y <- 100 - abs(rnorm(nrow(df), 0, 10))
# Plot results
# Original
ggplot(df, aes(x, y)) +
geom_line() +
geom_point(colour = "#FF3300", size = 5)
# With expand
ggplot(df, aes(x, y)) +
geom_line() +
geom_point(colour = "#FF3300", size = 5) +
scale_y_continuous(expand = c(0, 0))

Make overlapping histogram in with geom_histogram

I am trying to make an overlapping histogram like this:
ggplot(histogram, aes = (x), mapping = aes(x = value)) +
geom_histogram(data = melt(tpm_18_L_SD), breaks = seq(1,10,by = 1),
aes(y = 100*(..count../sum(..count..))), alpha=0.2) +
geom_histogram(data = melt(tpm_18_S_SD), breaks = seq(1,10,by = 1),
aes(y = 100*(..count../sum(..count..))), alpha=0.2) +
geom_histogram(data = melt(tpm_18_N_SD), breaks = seq(1,10,by = 1),
aes(y = 100*(..count../sum(..count..))), alpha=0.2) +
facet_wrap(~variable, scales = 'free_x') +
ylim(0, 20) +
ylab("Percentage of Genes") +
xlab("Standard Deviation")
My code can only make them plot side by side and I would like to also make them overlap. Thank you! I based mine off of the original post where this came from but it did not work for me. It was originally 3 separate graphs which I combined with grid and ggarrange. It looks like this right now.
Here is the code of the three separate graphs.
SD_18_L <- ggplot(data = melt(tpm_18_L_SD), mapping = aes(x = value)) +
geom_histogram(aes(y = 100*(..count../sum(..count..))), breaks = seq(1, 10, by = 1)) +
facet_wrap(~variable, scales = 'free_x') +
ylim(0, 20) +
ylab("Percentage of Genes") +
xlab("Standard Deviation")
SD_18_S <- ggplot(data = melt(tpm_18_S_SD), mapping = aes(x = value)) +
geom_histogram(aes(y = 100*(..count../sum(..count..))), breaks = seq(1, 10, by = 1)) +
facet_wrap(~variable, scales = 'free_x') +
ylim(0, 20) +
ylab("Percentage of Genes") +
xlab("Standard Deviation")
SD_18_N <- ggplot(data = melt(tpm_18_N_SD), mapping = aes(x = value)) +
geom_histogram(aes(y = 100*(..count../sum(..count..))), breaks = seq(1, 10, by = 1)) +
facet_wrap(~variable, scales = 'free_x') +
ylim(0, 20) +
ylab("Percentage of Genes") +
xlab("Standard Deviation")
What my graphs look like now:
ggplot expects dataframes in a long format. I'm not sure what your data looks like, but you shouldn't have to call geom_histogram for each category. Instead, get all your data into a single dataframe (you can use rbind for this) in long format (what you're doing already with melt) first, then feed it into ggplot and map fill to whatever your categorical variable is.
Your call to facet_wrap is what puts them in 3 different plots. If you want them all on the same plot, take that line out.
An example using the iris data:
ggplot(iris, aes(x = Sepal.Length, fill = Species)) +
geom_histogram(alpha = 0.6, position = "identity")
I decreased alpha in geom_histogram so you can see where colors overlap, and added position = "identity" so observations aren't being stacked. Hope that helps!

How to add curve which fits bar chart

How can I fit the bar chart with a curve similar the density plot for a histogram?
library(ggplot2)
library(plyr)
y<-hist(rnorm(1000),breaks=30)$count
df<-data.frame(x=1:length(y),y=y,key="A")
df2<-data.frame(x=1:length(y),y=y*0.4,key="B")
df<-rbind(df,df2)
p<-ggplot(df,aes(x=x))
p<-p + geom_bar(subset=.(key =="A"),aes(y = y),stat="identity",fill = "blue", alpha = 0.2)
p<-p + geom_bar(subset=.(key =="B"),aes(y = y),stat="identity",fill = "blue", alpha = 0.2)
#p<-p + geom_density(subset=.(key =="A"), aes(y=y),alpha=.2, fill="#0000FF")
p
Use ..density.. to do the transformation:
data = data.frame(x = rnorm(500))
ggplot(data) +
geom_histogram(aes(x = x, y = ..density..)) +
geom_density(aes(x), colour = I('red'))
For your data:
ggplot(df) +
geom_histogram(aes(y, ..density.., fill=key)) +
geom_density(aes(y, colour = key))
And try to avoid names like 'df' (df is a R function i guess)

Resources