How to adjust value position separately for each bar in ggplot2? - r

Is there any way to position a label separately for each bar in ggplot2?
In descriptive pseudo-code that would be something like
geom_bar(bar 1) +
geom_text(#bar 1) +
geom_bar(bar 2) +
geom_text(#bar 2)

One possible solution ...
# Grouping 1 has levels G1_A, G1_B
# Grouping 2 has levels G2_A, G2_B
# make plot for G1_A and both {G2_A, G2_B}
# {label here} ============= | ================ {label here}
# {label here} ======= | =========== {label here}
# {label here} === | ====== {label here}
# Init ggplot object
g <- ggplot(data = subset(testdf, grouping_1=="G1_A"), aes(x=Layer, y=Number, fill=grouping_2))
# Layer data for G2_A
g <- g + geom_bar(data = subset(testdf, grouping_2=="G2_A" & grouping_1=="G1_A") , stat = "identity")
# Layer data for G2_B
g <- g + geom_bar(data = subset(testdf, grouping_2=="G2_B" & grouping_1=="G1_A") , stat = "identity")
g <- g + labs(title="G1_A", x =" ", y = "Number (x1000)")
# Axes + ticks
g <- g + scale_y_continuous(breaks = seq(-270000, 80000, 25000),
labels = paste0(as.character(c(seq(270, 0, -25), seq(5, 80, 25))) ) )
# Limits to make sure labels show well
g <- g + expand_limits(y=c(-310000,120000))
# Layer labels for G2_A (move to the left)
g <- g + geom_text(data = subset(testdf, grouping_2=="G2_A" & grouping_1=="G1_A"),
aes(label=Reporting), vjust=+0.5, hjust=1.2, color="black", size=2.5)
# Layer labels for G2_B (move to the right)
g <- g + geom_text(data = subset(testdf, grouping_2=="G2_B" & grouping_1=="G1_A"),
aes(label=Reporting), vjust=+0.5, hjust=-0.3, color="black", size=2.5)
# Common. Add 90deg rotation here
g <- g + scale_fill_brewer(palette = "Set1") +
theme_minimal() + theme(axis.text.x = element_text(angle = 60)) +
coord_flip()

Related

How to make a sorted geom_bar() ggplot [duplicate]

This question already has answers here:
Order discrete x scale by frequency/value
(7 answers)
Closed 5 years ago.
My dataframe is called:
d3with variable names : course_name,id,total_enrolled,total_capacity
I did:
d3a <- head(d3[order(d3$total_capacity, decreasing = T),], 15)
d3.plottable <- d3a[, c(1,3,4)]
d3.plottable <- melt(d3.plottable, id.vars = "course_name")
library(ggplot2)
g <- ggplot(d3.plottable, aes(x = course_name, y = value))
g + geom_bar(aes(fill = variable), position = position_dodge(), stat = "identity") +
coord_flip() + theme(legend.position = "top")
g <- g + labs(x = "Course Name")
g <- g+ labs(y = "Number of Students")
g
And what I get is this:
No matter what I do I can't sort the orange bar in descending order.
Is there a way to do that? I would like to sort on the variable total_enrolled.
PS:I apologize for the badly formatted code,I am still figuring out stackoverflow.
Here is a an example redefining the order of the factor levels.
Note, since you don't provide sample data I will simulate some data.
# Sample data
set.seed(2017);
df <- cbind.data.frame(
course_name = rep(LETTERS[1:6], each = 2),
value = sample(300, 12),
variable = rep(c("total_enrolled", "total_capacity"), length.out = 12)
);
# Relevel factor levels, ordered by subset(df, variable == "total_enrolled")$value
df$course_name <- factor(
df$course_name,
levels = as.character(subset(df, variable == "total_enrolled")$course_name[order(subset(df, variable == "total_enrolled")$value)]));
# Plot
require(ggplot2);
g <- ggplot(df, aes(x = course_name, y = value))
g <- g + geom_bar(aes(fill = variable), position = position_dodge(), stat = "identity");
g <- g + coord_flip() + theme(legend.position = "top");
g <- g + labs(x = "Course Name")
g <- g + labs(y = "Number of Students")
g;

Add a specific value of x-axis on ggplot

I am using the ggplot function to plot this kind of graph
image
I want to add the specific value of the x-axis as shown in the picture
this is my code :
quantiles <- quantile(mat,prob = quant)
x <- as.vector(mat)
d <- as.data.frame(x=x)
p <- ggplot(data = d,aes(x=x)) + theme_bw() +
geom_histogram(aes(y = ..density..), binwidth=0.001,color="black",fill="white") +
geom_density(aes(x=x, y = ..density..),fill="blue", alpha=0.5, color = 'black')
x.dens <- density(x)
df.dens <- data.frame(x = x.dens$x, y = x.dens$y)
p <- p + geom_area(data = subset(df.dens, x <= quantiles), aes(x=x,y=y),
fill = 'green', alpha=0.6)
print(p)

Set legend according to line colour using ggplot

I have a plot as
Using ggplot, how can I set the legend with two labels only, i.e, red lines with name say "prediction intervals" and blue line with name "fit line"
I used following R lines for this
dfs <- data.frame("x"=1:50,"fit" = rnorm(50,30,4),"upper"=rnorm(50,30,4)+15, "lower"=rnorm(50,30,4)-15)
df_melt <- reshape2::melt(dfs,id="x")
g <- ggplot(df_melt,aes(x=x,y=value,colour=variable)) + geom_line(linetype=5)
g <- g + scale_colour_manual(values=c("blue","red","red"))
g <- g + theme_grey(base_size = 16) + theme(axis.text=element_text(colour = "black",size = 12))
g
You can simply add a new column with 2 values ("prediction intervals" or "fit line")
# Add a new group column
df_melt$group[df_melt$variable == "fit"] <- "fit line"
df_melt$group[df_melt$variable != "fit"] <- "prediction intervals"
# don't forget `group = variable`
g <- ggplot(df_melt, aes(x = x, y = value, colour = group, group = variable)) +
geom_line(linetype=5)
g <- g + scale_colour_manual(values = c("blue" ,"red", "red"))
g <- g + theme_grey(base_size = 16) +
theme(axis.text = element_text(colour = "black", size = 12))
g

Overlaying different vlines in R with ggplot facet_wrap

I am trying to produce a set of density plots showing the difference in expression level distributions for two sets of genes in four cell types. In addition to the density plots, I would like to have the median expression level for both groups overlaid onto each plot. Based on answers to a few similar questions, I've been able to get correct plots OR correct medians but not both at the same time. I'm out of ideas and hoping someone can set me right. Thanks!
Sample data is available here: https://github.com/adadiehl/sample_data/blob/master/sample.data
First Attempt. Produces correct plots, but same medians are plotted on all four:
dat = read.table("sample.data")
g = ggplot(dat[which(dat$FPKM > 0),], aes(x = FPKM))
g = g + geom_density(aes(y = ..density.., group = class, color = class, fill = class), alpha=0.2)
g = g + geom_vline(data=dat, aes(xintercept = median(dat$FPKM[ which(dat$FPKM > 0 & dat$class == "Other") ]) ), colour="turquoise3", linetype="longdash")
g = g + geom_vline(data=dat, aes(xintercept = median(dat$FPKM[ which(dat$FPKM > 0 & dat$class == "a_MCKG") ]) ), colour="tomato1", linetype="longdash")
g = g + facet_wrap(~source, ncol=2, scales="free")
g = g + ggtitle("Distribution of FPKM, MCKG vs. Other")
g = g + xlab("FPKM > 0")
Second Attempt: Correct plots but places all medians on all plots:
dat = read.table("sample.data")
vline.dat = data.frame(z=levels(dat$source), vl=tapply(dat$FPKM[which(dat$class != "a_MCKG" & dat$FPKM > 0)], dat$source[which(dat$class != "a_MCKG" & dat$FPKM > 0)], median), vm=tapply(dat$FPKM[which(dat$class == "a_MCKG" & dat$FPKM > 0)], dat$source[which(dat$class == "a_MCKG" & dat$FPKM > 0)], median))
g = ggplot(dat[which(dat$FPKM > 0),], aes(x = FPKM))
g = g + geom_density(aes(y = ..density.., group = class, color = class, fill = class), alpha=0.2)
g = g + facet_wrap(~source, ncol=2, scales="free")
g = g + geom_vline(data=vline.dat, aes(xintercept = vl), colour="turquoise3", linetype="longdash")
g = g + geom_vline(data=vline.dat, aes(xintercept = vm), colour="tomato1", linetype="longdash")
g = g + facet_wrap(~source, ncol=2, scales="free")
g = g + ggtitle("Distribution of FPKM, MCKG vs. Other")
g = g + xlab("FPKM > 0")
Third Attempt: Plots are all the same but have correct medians.
dat = read.table("sample.data")
vline.dat = data.frame(z=levels(dat$source), vl=tapply(dat$FPKM[which(dat$class != "a_MCKG" & dat$FPKM > 0)], dat$source[which(dat$class != "a_MCKG" & dat$FPKM > 0)], median), vm=tapply(dat$FPKM[which(dat$class == "a_MCKG" & dat$FPKM > 0)], dat$source[which(dat$class == "a_MCKG" & dat$FPKM > 0)], median))
g = ggplot(dat[which(dat$FPKM > 0),], aes(x = FPKM))
g = g + geom_density(aes(y = ..density.., group = class, color = class, fill = class), alpha=0.2)
g = g + facet_wrap(~source, ncol=2, scales="free")
g = g + geom_vline(data=vline.dat, aes(xintercept = vl), colour="turquoise3", linetype="longdash")
g = g + geom_vline(data=vline.dat, aes(xintercept = vm), colour="tomato1", linetype="longdash")
g = g + facet_wrap(~z, ncol=2, scales="free")
g = g + ggtitle("Distribution of FPKM, MCKG vs. Other")
g = g + xlab("FPKM > 0")
Passing pre-summarized data is the way to go:
library(plyr)
names(dat) <- c("FPKM", "class", "source")
dat2 <- subset(dat, FPKM > 0)
ggplot(dat2, aes(x = FPKM)) +
geom_density(aes(y = ..density.., group = class, color = class, fill = class), alpha=0.2) +
geom_vline(data = ddply(dat2, .(source, class), summarize, mmed = median(FPKM)),
aes(xintercept = mmed, color = class)) +
facet_wrap(~ source, ncol = 2, scales = "free") +
ggtitle("Distribution of FPKM, MCKG vs. Other") +
xlab("FPKM > 0")
Alternatively, you can achieve the same with base R:
dat3 <- aggregate(FPKM ~ source + class, data = dat2, FUN = median)
ggplot(dat2, aes(x = FPKM)) +
geom_density(aes(y = ..density.., group = class, color = class, fill = class), alpha=0.2) +
geom_vline(data = dat3,
aes(xintercept = FPKM, color = class)) +
facet_wrap(~ source, ncol = 2, scales = "free") +
ggtitle("Distribution of FPKM, MCKG vs. Other") +
xlab("FPKM > 0")
N.B. You may want to avoid column names such as source and class as these conflict with built-in functions.

Population pyramid plot with ggplot2 and dplyr (instead of plyr)

I am trying to reproduce the simple population pyramid from the post Simpler population pyramid in ggplot2
using ggplot2 and dplyr (instead of plyr).
Here is the original example with plyr and a seed
set.seed(321)
test <- data.frame(v=sample(1:20,1000,replace=T), g=c('M','F'))
require(ggplot2)
require(plyr)
ggplot(data=test,aes(x=as.factor(v),fill=g)) +
geom_bar(subset=.(g=="F")) +
geom_bar(subset=.(g=="M"),aes(y=..count..*(-1))) +
scale_y_continuous(breaks=seq(-40,40,10),labels=abs(seq(-40,40,10))) +
coord_flip()
Works fine.
But how can I generate this same plot with dplyr instead? The example uses plyr in the subset = .(g == statements.
I have tried the following with dplyr::filter but got an error:
require(dplyr)
ggplot(data=test,aes(x=as.factor(v),fill=g)) +
geom_bar(dplyr::filter(test, g=="F")) +
geom_bar(dplyr::filter(test, g=="M"),aes(y=..count..*(-1))) +
scale_y_continuous(breaks=seq(-40,40,10),labels=abs(seq(-40,40,10))) +
coord_flip()
Error in get(x, envir = this, inherits = inh)(this, ...) :
Mapping should be a list of unevaluated mappings created by aes or aes_string
You avoid the error by specifying the argument data in geom_bar:
ggplot(data = test, aes(x = as.factor(v), fill = g)) +
geom_bar(data = dplyr::filter(test, g == "F")) +
geom_bar(data = dplyr::filter(test, g == "M"), aes(y = ..count.. * (-1))) +
scale_y_continuous(breaks = seq(-40, 40, 10), labels = abs(seq(-40, 40, 10))) +
coord_flip()
You can avoid both dplyr and plyr when making population pyramids with recent versions of ggplot2.
If you have counts of the sizes of age-sex groups then use the answer here
If your data is at the individual level (as yours is) then use the following:
set.seed(321)
test <- data.frame(v=sample(1:20,1000,replace=T), g=c('M','F'))
head(test)
# v g
# 1 20 M
# 2 19 F
# 3 5 M
# 4 6 F
# 5 8 M
# 6 7 F
library("ggplot2")
ggplot(data = test, aes(x = as.factor(v), fill = g)) +
geom_bar(data = subset(test, g == "F")) +
geom_bar(data = subset(test, g == "M"),
mapping = aes(y = - ..count.. ),
position = "identity") +
scale_y_continuous(labels = abs) +
coord_flip()
To build an Age Pyramid with individual data or microdata you can use:
test <- data.frame(v=sample(1:100, 1000, replace=T), g=c('M','F'))
ggplot(data = test, aes(x = v, fill = g)) +
geom_histogram(data = subset(test, g == "F"), binwidth = 5, color="white", position = "identity") +
geom_histogram(data = subset(test, g == "M"), binwidth = 5, color="white", position = "identity",
mapping = aes(y = - ..count.. )) +
scale_x_continuous("Age", breaks = c(seq(0, 100, by=5))) +
scale_y_continuous("Population", breaks = seq(-30, 30, 10), labels = abs) +
scale_fill_discrete(name = "Sex") +
coord_flip() +
theme_bw()
Changing the binwidth in geom_histogram() can group your data in wider categories.
Changing binwidth to 10 and adjusting the axis breaks:
ggplot(data = test, aes(x = v, fill = g)) +
geom_histogram(data = subset(test, g == "F"), binwidth = 10, color="white", position = "identity") +
geom_histogram(data = subset(test, g == "M"), binwidth = 10, color="white", position = "identity",
mapping = aes(y = - ..count.. )) +
scale_x_continuous("Age", breaks = c(seq(0, 100, by = 10))) +
scale_y_continuous("Population", breaks = seq(-100, 100, 10), labels = abs) +
scale_fill_discrete(name = "Sex") +
coord_flip() +
theme_bw()

Resources