How to make a sorted geom_bar() ggplot [duplicate] - r

This question already has answers here:
Order discrete x scale by frequency/value
(7 answers)
Closed 5 years ago.
My dataframe is called:
d3with variable names : course_name,id,total_enrolled,total_capacity
I did:
d3a <- head(d3[order(d3$total_capacity, decreasing = T),], 15)
d3.plottable <- d3a[, c(1,3,4)]
d3.plottable <- melt(d3.plottable, id.vars = "course_name")
library(ggplot2)
g <- ggplot(d3.plottable, aes(x = course_name, y = value))
g + geom_bar(aes(fill = variable), position = position_dodge(), stat = "identity") +
coord_flip() + theme(legend.position = "top")
g <- g + labs(x = "Course Name")
g <- g+ labs(y = "Number of Students")
g
And what I get is this:
No matter what I do I can't sort the orange bar in descending order.
Is there a way to do that? I would like to sort on the variable total_enrolled.
PS:I apologize for the badly formatted code,I am still figuring out stackoverflow.

Here is a an example redefining the order of the factor levels.
Note, since you don't provide sample data I will simulate some data.
# Sample data
set.seed(2017);
df <- cbind.data.frame(
course_name = rep(LETTERS[1:6], each = 2),
value = sample(300, 12),
variable = rep(c("total_enrolled", "total_capacity"), length.out = 12)
);
# Relevel factor levels, ordered by subset(df, variable == "total_enrolled")$value
df$course_name <- factor(
df$course_name,
levels = as.character(subset(df, variable == "total_enrolled")$course_name[order(subset(df, variable == "total_enrolled")$value)]));
# Plot
require(ggplot2);
g <- ggplot(df, aes(x = course_name, y = value))
g <- g + geom_bar(aes(fill = variable), position = position_dodge(), stat = "identity");
g <- g + coord_flip() + theme(legend.position = "top");
g <- g + labs(x = "Course Name")
g <- g + labs(y = "Number of Students")
g;

Related

Make geom_histogram display x-axis labels as integers instead of numerics

I have a data.frame that has counts for several groups:
set.seed(1)
df <- data.frame(group = sample(c("a","b"),200,replace = T),
n = round(runif(200,1,2)))
df$n <- as.integer(df$n)
And I'm trying to display a histogram of df$n, facetted by the group using ggplot2's geom_histogram:
library(ggplot2)
ggplot(data = df, aes(x = n)) + geom_histogram() + facet_grid(~group) + theme_minimal()
Any idea how to get ggplot2 to label the x-axis ticks with the integers the histogram is summarizing rather than the numeric values it is currently showing?
You could tweak this by the binwidth argument of geom_histogram:
library(ggplot2)
ggplot(data = df, aes(x = n)) +
geom_histogram(binwidth = 0.5) +
facet_grid(~group) +
theme_minimal()
Another example:
set.seed(1)
df <- data.frame(group = sample(c("a","b"),200,replace = T),
n = round(runif(200,1,5)))
library(ggplot2)
ggplot(data = df, aes(x = n)) +
geom_histogram(binwidth = 0.5) +
facet_grid(~group) +
theme_minimal()
You can manually specify the breaks with scale_x_continuous(breaks = seq(1, 2)). Alternatively, you can set the breaks and labels separately as well.

Change order of x label in facet plot using ggplot

I'm trying to plot the facet plot below in alphabetical order, I've tried multiple ways of doing it and even tried getting rid of the facet and just plotting Overpaid vs proportion, but the plot still had an alphabetical x axis.
Initially I tried assigning a frequency value to every element then ordering the list by that value but that didn't change the plot.
overpaid_each_country <- survey_df %>%
filter(!is.na(Overpaid)) %>%
filter(Country %in% Country_Sum$Var1)
overpaid_each_country <- overpaid_each_country[ , which(names(overpaid_each_country) %in% c("Overpaid","Country"))]
overpaid_each_country <- transform(overpaid_each_country, freq = ave(seq(nrow(overpaid_each_country)), Overpaid, FUN=length))
overpaid_each_country <- overpaid_each_country[order(overpaid_each_country$freq), ]
Then I tried setting the factor levels but although the factor levels changed, the plot didn't.
overpaid_each_country %>%
mutate(Overpaid2 = factor(Overpaid, levels = c("Greatly overpaid", "Somewhat overpaid", "Greatly underpaid", "Neither underpaid nor overpaid", "Somewhat underpaid"))) %>%
ggplot(aes(x = Overpaid2, y = ..prop.., group = 1)) +
geom_bar( color = "white", fill = "#42dff4") +
facet_wrap(~ Country, nrow = 4) +
aes(stringr::str_wrap(Overpaid, 10)) +
xlab("OverPaid orUnderpaid") +
ylab("Proportion of Respondents")+
labs(title = "Are you Overpaid or Underpaid?") +
theme(axis.title.x=element_blank(), axis.text.y=element_text(size=12), axis.text.x = element_text(size=12), plot.title = element_text(size = 17))
You could use forcats::fct_reorder() to re-order your factor-levels, or you change the levels from somethink like "a" to "e", and then pass a named vector (where names match factor levels) to scale_x_discrete() as labels-argument.
library(ggplot2)
d <- data.frame(
x = factor(sample(1:5, size = 100, replace = TRUE), labels = c("a", "b", "c", "d", "e"))
)
ggplot(d, aes(x = x, y = ..prop..)) + geom_bar()
labels <- c(a = "Greatly underpaid", b = "Somewhat underpaid", c = "Neither/nor", d = "somewhat overpaid", e = "greatly overpaid")
ggplot(d, aes(x = x, y = ..prop..)) +
geom_bar() +
scale_x_discrete(labels = labels)
In your 6th line
aes(stringr::str_wrap(Overpaid, 10)) +
change to overpaid2
aes(stringr::str_wrap(Overpaid2, 10)) +

ggplot axis custom order with duplicate labels

set.seed(357)
x <- data.frame(name = sample(letters, 10), val = runif(10), stringsAsFactors = F)
x[c(2,6),"name"] <- c("k","k")
ggplot(x, aes(x = name, y = val)) + theme_bw() + geom_bar(stat = "identity")
How can I plot the axis in the same order as x$name? (Yes, the k is duplicate, I want that to show up in the plot like this axis: c k g f o k s v t q)
In the past I used to do:
x$name <- factor(x$name, levels = x$name[order(x$val)], ordered = T)
wich doesn't work any more thanks to:
http://r.789695.n4.nabble.com/factors-with-non-unique-quot-duplicated-quot-levels-have-been-deprecated-since-2009-are-more-depreca-td4721481.html
This is no duplicate of: ggplot: order of factors with duplicate levels
His data structure is completely different.
Also, I have tried setting limits in x_scale_discrete. Doesn't work.
Try this...
x$name2 <- 1:nrow(x)
ggplot(x, aes(x = factor(name2), y = val)) + theme_bw() + geom_bar(stat = "identity") +
scale_x_discrete(labels=x$name)
Actually, simply add the following setting xlab(x$name)
ggplot(x, aes(x = name, y = val)) + theme_bw() + geom_bar(stat = "identity") + xlab(x$name)

Population pyramid plot with ggplot2 and dplyr (instead of plyr)

I am trying to reproduce the simple population pyramid from the post Simpler population pyramid in ggplot2
using ggplot2 and dplyr (instead of plyr).
Here is the original example with plyr and a seed
set.seed(321)
test <- data.frame(v=sample(1:20,1000,replace=T), g=c('M','F'))
require(ggplot2)
require(plyr)
ggplot(data=test,aes(x=as.factor(v),fill=g)) +
geom_bar(subset=.(g=="F")) +
geom_bar(subset=.(g=="M"),aes(y=..count..*(-1))) +
scale_y_continuous(breaks=seq(-40,40,10),labels=abs(seq(-40,40,10))) +
coord_flip()
Works fine.
But how can I generate this same plot with dplyr instead? The example uses plyr in the subset = .(g == statements.
I have tried the following with dplyr::filter but got an error:
require(dplyr)
ggplot(data=test,aes(x=as.factor(v),fill=g)) +
geom_bar(dplyr::filter(test, g=="F")) +
geom_bar(dplyr::filter(test, g=="M"),aes(y=..count..*(-1))) +
scale_y_continuous(breaks=seq(-40,40,10),labels=abs(seq(-40,40,10))) +
coord_flip()
Error in get(x, envir = this, inherits = inh)(this, ...) :
Mapping should be a list of unevaluated mappings created by aes or aes_string
You avoid the error by specifying the argument data in geom_bar:
ggplot(data = test, aes(x = as.factor(v), fill = g)) +
geom_bar(data = dplyr::filter(test, g == "F")) +
geom_bar(data = dplyr::filter(test, g == "M"), aes(y = ..count.. * (-1))) +
scale_y_continuous(breaks = seq(-40, 40, 10), labels = abs(seq(-40, 40, 10))) +
coord_flip()
You can avoid both dplyr and plyr when making population pyramids with recent versions of ggplot2.
If you have counts of the sizes of age-sex groups then use the answer here
If your data is at the individual level (as yours is) then use the following:
set.seed(321)
test <- data.frame(v=sample(1:20,1000,replace=T), g=c('M','F'))
head(test)
# v g
# 1 20 M
# 2 19 F
# 3 5 M
# 4 6 F
# 5 8 M
# 6 7 F
library("ggplot2")
ggplot(data = test, aes(x = as.factor(v), fill = g)) +
geom_bar(data = subset(test, g == "F")) +
geom_bar(data = subset(test, g == "M"),
mapping = aes(y = - ..count.. ),
position = "identity") +
scale_y_continuous(labels = abs) +
coord_flip()
To build an Age Pyramid with individual data or microdata you can use:
test <- data.frame(v=sample(1:100, 1000, replace=T), g=c('M','F'))
ggplot(data = test, aes(x = v, fill = g)) +
geom_histogram(data = subset(test, g == "F"), binwidth = 5, color="white", position = "identity") +
geom_histogram(data = subset(test, g == "M"), binwidth = 5, color="white", position = "identity",
mapping = aes(y = - ..count.. )) +
scale_x_continuous("Age", breaks = c(seq(0, 100, by=5))) +
scale_y_continuous("Population", breaks = seq(-30, 30, 10), labels = abs) +
scale_fill_discrete(name = "Sex") +
coord_flip() +
theme_bw()
Changing the binwidth in geom_histogram() can group your data in wider categories.
Changing binwidth to 10 and adjusting the axis breaks:
ggplot(data = test, aes(x = v, fill = g)) +
geom_histogram(data = subset(test, g == "F"), binwidth = 10, color="white", position = "identity") +
geom_histogram(data = subset(test, g == "M"), binwidth = 10, color="white", position = "identity",
mapping = aes(y = - ..count.. )) +
scale_x_continuous("Age", breaks = c(seq(0, 100, by = 10))) +
scale_y_continuous("Population", breaks = seq(-100, 100, 10), labels = abs) +
scale_fill_discrete(name = "Sex") +
coord_flip() +
theme_bw()

How to plot a barplot/barchart with continuous x axis

I would like to plot a barplot but I have dates on the x axis and I want those dates to be correctly spaced (as it is NON categorical)
set.seed(1)
m = matrix(abs(rnorm(6)),3,2)
rownames(m) = as.Date(c('2011-01-01','2011-01-03','2011-01-10'))
barplot(t(m),beside=T,col=c('red','blue'),las=2)
On this example I would like 14984 to be offset on the right.
I'd rather a graphics solution but ggplot2 is fine too
Would you mind to use ´ggplot´ instead?
library(ggplot2)
set.seed(1)
df <- data.frame(y=abs(rnorm(6)),
x=rep(as.Date(c('2011-01-01','2011-01-03','2011-01-10')),
times = 2),
g = factor(rep(c(1,2), each = 3)))
ggplot(aes(x=x, y=y, group = g, fill = g), data = df) +
geom_bar(stat = 'identity', position = 'dodge')
You can improve axis formatting with `scale_x_date´
library(scales)
ggplot(aes(x=x, y=y, group = g, fill = g), data = df) +
geom_bar(stat = 'identity', position = 'dodge') +
scale_x_date(breaks = '1 day') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5))
And customize it to your purpose
ggplot(aes(x=x, y=y, group = g, fill = g), data = df) +
geom_bar(stat = 'identity', position = 'dodge') +
scale_x_date(breaks = '1 day') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) +
scale_fill_manual('My\nclasses', values = c('1'='red', '2' = 'blue')) +
labs(list(title = 'Barplot\n', x = ('Date'), y = 'Values'))
With graphics, you probably have to prepare the data appropriately (with missing values for dates you don't consider) in order to do this. Then you can use barplot.
# matrix definition
set.seed(1)
m = matrix(abs(rnorm(6)),3,2)
rownames(m) = as.Date(c('2011-01-01','2011-01-03','2011-01-10'))
# get all dates in between
dts <- do.call(":", as.list(range(rownames(m))))
dts <- dts[!dts%in%rownames(m)]
mat <- matrix(NA, nrow=length(dts), ncol=2, dimnames=list(dts, NULL))
# combine with original matrix
m <- rbind(m, mat)
m <- m[order(rownames(m)), ]
which(!is.na(m[,1]))
# plot
barplot(t(m), beside=T, col=c('red','blue'),las=2, axes=FALSE, axisnames=FALSE)
axis(2)
axis(1, at=3*which(!is.na(m[,1]))-1, labels=rownames(m[!is.na(m[,1]),]))

Resources