Vertical line in histogram in r - r

I'm struggeling a bit with a peace of code in R. I am trying to create 6 different histograms in the same figure. It works fine, but I need to place 1 vertical line in each of the 6 histograms. The code I am working with could look something like this:
require(ggplot2)
require(reshape2)
require(gdata)
MC_beta=rbind(rnorm(1000,-2,0.1),rnorm(1000,-1,0.1),rnorm(1000,0,0.1),rnorm(1000,0.5,0.1),rnorm(1000,1,0.1),rnorm(1000,2,0.1))
df <- data.frame(MC_beta[1,], MC_beta[2,], MC_beta[3,], MC_beta[4,],MC_beta[5,],MC_beta[6,])
names(df)[1:6]<-c("1", "2", "3", "4","5","6")
df2 = melt(df)
z=c(-2,-1,0,0.5,1,2)
ggplot(df2, aes(x=value, fill = variable)) + geom_vline(xintercept = z, colour="black") +geom_histogram(binwidth=0.03,colour = "black") + scale_fill_manual(name = "",values = c('red','blue',"red","blue","red","blue")) +
facet_wrap(~variable,nrow=6, ncol=1) + scale_x_continuous(breaks=seq(-2.5,2.5,0.5)) + guides(fill=FALSE) +
theme_bw() + theme(strip.background = element_blank(),axis.text=element_text(size=14.5),strip.text.x = element_text(size = 14.5)) + stat_function(fun = dnorm)
The problem is with the statement geom_vline(xintercept = z, colour = "black"). Apparently instead of placing one vertical line in each histogram, it places all 6 lines in each histogram. So instead, I want the first element in z to make a vertical line in the first histogram, the second element in z to make a vertical line in the second histogram and so fourth.
Thanks

Your z needs to be a data.frame with the corresponding xintercept for every value of the variable that defines the facet. Try these changes:
z <- data.frame(variable=levels(df2$variable),
mean=c(-2,-1,0,0.5,1,2))
ggplot(df2, aes(x=value, fill = variable))+
geom_vline(data=z, aes(xintercept = mean), colour="black") +
geom_histogram(binwidth=0.03,colour = "black") +
scale_fill_manual(name = "",values = c('red','blue',"red","blue","red","blue")) +
facet_wrap(~variable,nrow=6, ncol=1) +
scale_x_continuous(breaks=seq(-2.5,2.5,0.5))+ guides(fill=FALSE) +
theme_bw() +
theme(strip.background = element_blank(), axis.text=element_text(size=14.5), strip.text.x = element_text(size = 14.5)) +
stat_function(fun = dnorm)
I hope that helps.

You have z outside the data, so you will draw a vertical line in each facet. Use
df2 <- (merge(df2, cbind.data.frame(variable=names(df), z)))
and then
geom_vline(aes(xintercept = z), colour="black")

Related

How to fix: when overlaying two scatter plots with using reorder of aes, the reorder gets lost

I have two scatter plots obtained from two sets of data that I would like to overlay, when using the ggplo2 for creating single plot i am using log scale and than ordering the numbers sothe scatter plot falls into kind if horizontal S shape. Byt when i want to overlay, the information about reordering gets lost, and the plot loses its shape.
this is how the df looks like (one has 1076 entries and the other 1448)
protein Light_Dark log10
AT1G01080 1.1744852 0.06984755
AT1G01090 1.0710359 0.02980403
AT1G01100 0.4716955 -0.32633823
AT1G01320 156.6594802 2.19495668
AT1G02500 0.6406005 -0.19341276
AT1G02560 1.3381804 0.12651467
AT1G03130 0.6361147 -0.19646458
AT1G03475 0.7529015 -0.12326181
AT1G03630 0.7646064 -0.11656207
AT1G03680 0.8340107 -0.07882836
this is for single plot:
p1 <- ggplot(ratio_log_ENR4, aes(x=reorder(protein, -log10), y=log10)) +
geom_point(size = 1) +
#coord_cartesian(xlim = c(0, 1000)) +
geom_hline(yintercept=0.1, col = "red") + #check gene
geom_hline(yintercept=-0.12, col = "red") +#check gene
labs(x = "Protein")+
theme_classic()+
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())+
labs(y = "ratio Light_Dark log10")+
labs(x="Protein")
image=p1
ggsave(file="p1_ratio_data_ENR4_cys.svg", plot=image, width=10, height=8)
and for over lay:
p1_14a <- ggplot(ratio_log_ENR1, aes(x=reorder(protein, -log10), y=log10)) +
geom_point(size = 1) +
#coord_cartesian(xlim = c(0, 1000)) +
geom_hline(yintercept=0.1, col = "red") + #check gene
geom_hline(yintercept=-0.12, col = "red") +#check gene
labs(x = "Protein")+
theme_classic()+
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())+
labs(y = "ratio Light_Dark log10")+
labs(x="Protein")+
geom_point()+
geom_point(data=ratio_log_ENR4, color="red")
p=ggplot(ratio_log_ENR1, aes(x=reorder(protein, -log10), y=log10)) +
geom_point(size = 1) +
#coord_cartesian(xlim = c(0, 1000)) +
geom_hline(yintercept=0.1, col = "red") + #check gene
geom_hline(yintercept=-0.12, col = "red") +#check gene
labs(x = "Protein")+
theme_classic()+
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())+
labs(y = "ratio Light_Dark log10")+
labs(x="Protein")
p = p + geom_point(data=ratio_log_ENR4, aes(x=reorder(protein, -log10), y=log10), color ="red" )
p
I tried to change classes... but it cant be the problem since for single plot its working like it is
The easiest solution I see for you is just binding together your two dataframes before plotting.
a$color <- 'red'
b$color <- 'blue'
ab <- a %>%
rbind(b)
ggplot(ab, aes(x = fct_reorder(protein, -log10), y = log10, color = color)) +
geom_point() +
scale_color_identity()
You can find a nice cheat-sheet for working with factors here: https://stat545.com/block029_factors.html

Make overlapping histogram in with geom_histogram

I am trying to make an overlapping histogram like this:
ggplot(histogram, aes = (x), mapping = aes(x = value)) +
geom_histogram(data = melt(tpm_18_L_SD), breaks = seq(1,10,by = 1),
aes(y = 100*(..count../sum(..count..))), alpha=0.2) +
geom_histogram(data = melt(tpm_18_S_SD), breaks = seq(1,10,by = 1),
aes(y = 100*(..count../sum(..count..))), alpha=0.2) +
geom_histogram(data = melt(tpm_18_N_SD), breaks = seq(1,10,by = 1),
aes(y = 100*(..count../sum(..count..))), alpha=0.2) +
facet_wrap(~variable, scales = 'free_x') +
ylim(0, 20) +
ylab("Percentage of Genes") +
xlab("Standard Deviation")
My code can only make them plot side by side and I would like to also make them overlap. Thank you! I based mine off of the original post where this came from but it did not work for me. It was originally 3 separate graphs which I combined with grid and ggarrange. It looks like this right now.
Here is the code of the three separate graphs.
SD_18_L <- ggplot(data = melt(tpm_18_L_SD), mapping = aes(x = value)) +
geom_histogram(aes(y = 100*(..count../sum(..count..))), breaks = seq(1, 10, by = 1)) +
facet_wrap(~variable, scales = 'free_x') +
ylim(0, 20) +
ylab("Percentage of Genes") +
xlab("Standard Deviation")
SD_18_S <- ggplot(data = melt(tpm_18_S_SD), mapping = aes(x = value)) +
geom_histogram(aes(y = 100*(..count../sum(..count..))), breaks = seq(1, 10, by = 1)) +
facet_wrap(~variable, scales = 'free_x') +
ylim(0, 20) +
ylab("Percentage of Genes") +
xlab("Standard Deviation")
SD_18_N <- ggplot(data = melt(tpm_18_N_SD), mapping = aes(x = value)) +
geom_histogram(aes(y = 100*(..count../sum(..count..))), breaks = seq(1, 10, by = 1)) +
facet_wrap(~variable, scales = 'free_x') +
ylim(0, 20) +
ylab("Percentage of Genes") +
xlab("Standard Deviation")
What my graphs look like now:
ggplot expects dataframes in a long format. I'm not sure what your data looks like, but you shouldn't have to call geom_histogram for each category. Instead, get all your data into a single dataframe (you can use rbind for this) in long format (what you're doing already with melt) first, then feed it into ggplot and map fill to whatever your categorical variable is.
Your call to facet_wrap is what puts them in 3 different plots. If you want them all on the same plot, take that line out.
An example using the iris data:
ggplot(iris, aes(x = Sepal.Length, fill = Species)) +
geom_histogram(alpha = 0.6, position = "identity")
I decreased alpha in geom_histogram so you can see where colors overlap, and added position = "identity" so observations aren't being stacked. Hope that helps!

Add same gradient to each rectangle in ggplot

I am trying to display color gradient in below created ggplot2. So with using following data and code
vector <- c(9, 10, 6, 5, 5)
Names <- c("Leadership", "Management\n", "Problem Solving",
"Decision Making\n", "Social Skills")
# add \n
Names[seq(2, length(Names), 2)] <- paste0("\n" ,Names[seq(2, length(Names), 2)])
# data.frame, including a grouping vector
d <- data.frame(Names, vector, group=c(rep("Intra-capacity", 3), rep("Inter-capacity", 2)))
# correct order
d$Names <- factor(d$Names, levels= unique(d$Names))
d$group_f = factor(d$group, levels=c('Intra-capacity','Inter-capacity'))
# plot the bars
p <- ggplot(d, aes(x= Names, y= vector, group= group, fill=vector, order=vector)) +
geom_bar(stat= "identity") +
theme_bw()+
scale_fill_gradient(low="white",high="blue")
# use facet_grid for the groups
#p + facet_grid(.~group_f, scales= "free_x", space= "free_x")
p+ theme(text = element_text(size=23),plot.background = element_rect(fill = "white"),
strip.background = element_rect(fill="Dodger Blue")) +
facet_grid(.~group_f, scales= "free_x", space= "free_x") + xlab("") +ylab("") +
theme(strip.text.x = element_text(size = 18, colour = "white" )) +
geom_text(size=10, aes(label=vector))
My output is this:
But now I would like to insert color gradient so each rectangle would look like picture below (my desired output):
I've also looked at this:
R: gradient fill for geom_rect in ggplot2
create an arrow with gradient color
http://www.computerworld.com/article/2935394/business-intelligence/my-ggplot2-cheat-sheet-search-by-task.html
Color Gradients With ggplot
Label minimum and maximum of scale fill gradient legend with text: ggplot2
How can I apply a gradient fill to a geom_rect object in ggplot2?
And also tried using:
scale_fill_gradient(low="white",high="blue") or
scale_fill_gradientn(colours = c("blue","white","red"),
values = c(0,5,10),
guide = "colorbar", limits=c(0,10))
But I am clearly doing something wrong.
I'm with #RomanLustrik here. However, if you can't use Excel (= prly much easier), maybe just adding a white rectangle with an alpha-gradient is already enough:
ggplot(d, aes(x= Names, y= vector, group= group,order=vector)) +
geom_bar(stat= "identity", fill="blue") +
theme_bw() +
scale_fill_gradient(low="white",high="blue") +
annotation_custom(
grid::rasterGrob(paste0("#FFFFFF", as.hexmode(1:255)),
width=unit(1,"npc"),
height = unit(1,"npc"),
interpolate = TRUE),
xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=5
) +
geom_text(aes(label=vector), color="white", y=2, size=12)

R - ggplot2: geom_area loses its fill if limits are defined to max and min values from a data.frame

I am trying to reproduce a sparkline with ggplot2 like the one at the bottom of this image:
Using the following code I get the result displayed at the end of the code.
Note: My actual data.frame has only 2 rows. Therefore the result looks like a single line.
# Create sparkline for MM monthly
# sparkline(dailyMM2.aggregate.monthly$Count, type = 'line')
p <- ggplot(dailyMM2.aggregate.monthly, aes(x=seq(1:nrow(dailyMM2.aggregate.monthly)), y=Count)) +
geom_area(fill="#83CAF5") +
geom_line(color = "#2C85BB", size = 1.5) +
scale_x_continuous(expand=c(0,0)) +
scale_y_continuous(expand=c(0,0))
p + theme(axis.line=element_blank(),axis.text.x=element_blank(),
axis.text.y=element_blank(),axis.ticks=element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank(),legend.position="none",
panel.background=element_blank(),panel.border=element_blank(),panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),plot.background=element_blank())
However, as I try to only show trends with the sparkline and, therefore, absolute values aren't relevant for me, I have to adapt the config of the ggplot to limit the visible area between the min and max of my axis.y. I do it using the limits option:
# Create sparkline for MM monthly
# sparkline(dailyMM2.aggregate.monthly$Count, type = 'line')
p <- ggplot(dailyMM2.aggregate.monthly, aes(x=seq(1:nrow(dailyMM2.aggregate.monthly)), y=Count)) +
geom_area(fill="#83CAF5") +
geom_line(color = "#2C85BB", size = 1.5) +
scale_x_continuous(expand=c(0,0)) +
scale_y_continuous(expand=c(0,0), limits = c(min(dailyMM2.aggregate.monthly$Count)-100, max(dailyMM2.aggregate.monthly$Count)+100))
p + theme(axis.line=element_blank(),axis.text.x=element_blank(),
axis.text.y=element_blank(),axis.ticks=element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank(),legend.position="none",
panel.background=element_blank(),panel.border=element_blank(),panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),plot.background=element_blank())
However, the result is not like expected, as the whole geom_area's fill dissapears, as shown in the folllowing image:
Can anyone shed light why this behaviour is happening and maybe help me with a proper way to solve this problem?
If you check ?geom_area you will note that the minimum is fixed to 0. It might be easier to use geom_ribbon. It has a ymin aesthetic. Set the maximum y value using limits or coord_cartesian.
library(reshape2)
library(ggplot2)
# Some data
df=data.frame(year = rep(2010:2014, each = 4),
quarter=rep(c("Q1","Q2","Q3","Q4"),5),
da=c(46,47,51,50,56.3,53.6,55.8,58.9,61.0,63,58.8,62.5,59.5,61.7,60.6,63.9,68.4,62.2,62,70.4))
df.m <- melt(data = df, id.vars = c("year", "quarter"))
ymin <- min(df.m$value)
ymax <- max(df.m$value)
ggplot(data = df.m, aes(x = interaction(quarter,year), ymax = value, group = variable)) +
geom_ribbon(aes(ymin = ymin), fill = "#83CAF5") +
geom_line(aes(y = value), size = 1.5, colour = "#2C85BB") +
coord_cartesian(ylim = c(ymin, ymax)) +
scale_y_continuous(expand = c(0,0)) +
scale_x_discrete(expand = c(0,0)) +
theme_void()

How to add multiple geom_hlines with color equal to grouping variable

I've created a grouped boxplot and added three specific geom_hlines to the plot. However, I want to set the hline colors to fill=factor(Training.Location), rather than trying to match the colors manually with a color palette. Is there a way to do this?
ggplot(aes(x=factor(CumDes),y=Mn_Handle), data=NH_C) +
geom_boxplot( aes(fill=factor(Training.Location))) +
geom_point( aes(color=factor(Training.Location)),
position=position_dodge(width=0.75) ) +
theme(axis.ticks = element_blank(), axis.text.x = element_blank()) +
coord_cartesian(ylim = c(0, 2000)) +
geom_hline(yintercept=432, linetype="dashed", lwd=1.2) +
geom_hline(yintercept=583, linetype="dashed", lwd=1.2) +
geom_hline(yintercept=439, linetype="dashed", lwd=1.2)
This is the sort of thing that seems easiest with a new dataset. I'm not sure how you are calculating the values you are using for the horizontal lines, but often times I want to calculate these from the original dataset and use some sort of aggregation function/package for that.
Here is a modified example from the help page for geom_hline.
Make the dataset to give to geom_hline, including the values for the horizontal lines as well as the grouping variable.
mean_wt = data.frame(cyl = c(4, 6, 8), wt = c(2.28, 3.11, 4.00))
Then just plot with the new dataset for that layer, using whatever aesthetic you wish with the grouping variable.
ggplot(mtcars, aes(x = factor(vs), wt) ) +
geom_boxplot(aes(fill = factor(cyl))) +
geom_point(aes(color = factor(cyl)), position = position_dodge(.75)) +
geom_hline(data = mean_wt, aes(yintercept = wt, color = factor(cyl)) )
Here's a somewhat hackish solution (I had to improvise on the data, feel free to improve)
# install.packages("ggplot2", dependencies = TRUE)
library(ggplot2)
col <- c("#CC6666", "#9999CC", "#66CC99")
ggplot(mtcars, aes(x = factor(cyl), y=mpg)) +
geom_boxplot(aes(fill=gear)) +
geom_point( aes(color=factor(gear)),
position=position_dodge(width=0.75) ) +
scale_colour_manual(values= col) +
theme(axis.ticks = element_blank(), axis.text.x = element_blank()) + coord_cartesian(ylim = c(8, 35)) +
geom_hline(yintercept=12, linetype="dashed", lwd=1.2, color=col[1]) +
geom_hline(yintercept=18, linetype="dashed", lwd=1.2, color=col[2]) +
geom_hline(yintercept=28, linetype="dashed", lwd=1.2, color=col[3])

Resources