This question already has answers here:
ggplot bar plot with facet-dependent order of categories
(4 answers)
Closed 5 years ago.
In the df below, I want to reorder bars from highest to lowest in each facet
I tried
df <- df %>% tidyr::gather("var", "value", 2:4)
ggplot(df, aes (x = reorder(id, -value), y = value, fill = id))+
geom_bar(stat="identity")+facet_wrap(~var, ncol =3)
It gave me
It didn't order the bars from highest to lowest in each facet.
I figured out another way to get what I want. I had to plot each variable at a time, then combine all plots using grid.arrange()
#I got this function from #eipi10's answer
#http://stackoverflow.com/questions/38637261/perfectly-align-several-plots/38640937#38640937
#Function to extract legend
# https://github.com/hadley/ggplot2/wiki/Share-a-legend-between-two-ggplot2-graphs
g_legend<-function(a.gplot) {
tmp <- ggplot_gtable(ggplot_build(a.gplot))
leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
legend <- tmp$grobs[[leg]]
return(legend)
}
p1 <- ggplot(df[df$var== "A", ], aes (x = reorder(id, -value), y = value, fill = id))+
geom_bar(stat="identity") + facet_wrap(~var, ncol =3)
fin_legend <- g_legend(p1)
p1 <- p1 + guides(fill= F)
p2 <- ggplot(df[df$var== "B", ], aes (x = reorder(id, -value), y = value, fill = id))+
geom_bar(stat="identity") + facet_wrap(~var, ncol =3)+guides(fill=FALSE)
p3 <- ggplot(df[df$var== "C", ], aes (x = reorder(id, -value), y = value, fill = id))+
geom_bar(stat="identity") + facet_wrap(~var, ncol =3)+guides(fill=FALSE)
grid.arrange(p1, p2, p3, fin_legend, ncol =4, widths = c(1.5, 1.5, 1.5, 0.5))
The result is what I want
I wonder if there is a straightforward way that can help me order the bars from highest to lowest in all facets without having to plot each variable separtely and then combine them. Any suggestions will be much appreciated.
DATA
df <- read.table(text = c("
id A B C
site1 10 15 20
site2 20 10 30
site3 30 20 25
site4 40 35 40
site5 50 30 35"), header = T)
The approach below uses a specially prepared variable for the x-axis with facet_wrap() but uses the labels parameter to scale_x_discrete() to display the correct x-axis labels:
Prepare data
I'm more fluent in data.table, so this is used here. Feel free to use what ever package you prefer for data manipulation.
Edit: Removed second dummy variable, only ord is required
library(data.table)
# reshape from wide to long
molten <- melt(setDT(df), id.vars = "id")
# create dummy var which reflects order when sorted alphabetically
molten[, ord := sprintf("%02i", frank(molten, variable, -value, ties.method = "first"))]
molten
# id variable value ord
# 1: site1 A 10 05
# 2: site2 A 20 04
# 3: site3 A 30 03
# 4: site4 A 40 02
# 5: site5 A 50 01
# 6: site1 B 15 09
# 7: site2 B 10 10
# 8: site3 B 20 08
# 9: site4 B 35 06
#10: site5 B 30 07
#11: site1 C 20 15
#12: site2 C 30 13
#13: site3 C 25 14
#14: site4 C 40 11
#15: site5 C 35 12
Create plot
library(ggplot2)
# `ord` is plotted on x-axis instead of `id`
ggplot(molten, aes(x = ord, y = value, fill = id)) +
# geom_col() is replacement for geom_bar(stat = "identity")
geom_col() +
# independent x-axis scale in each facet,
# drop absent factor levels (not the case here)
facet_wrap(~ variable, scales = "free_x", drop = TRUE) +
# use named character vector to replace x-axis labels
scale_x_discrete(labels = molten[, setNames(as.character(id), ord)]) +
# replace x-axis title
xlab("id")
Data
df <- read.table(text = "
id A B C
site1 10 15 20
site2 20 10 30
site3 30 20 25
site4 40 35 40
site5 50 30 35", header = T)
If you're willing to lose the X axis labels, you can do this by using the actual y values as the x aesthetic, then dropping unused factor levels in each facet:
ggplot(df, aes (x = factor(-value), y = value, fill = id))+
geom_bar(stat="identity", na.rm = TRUE)+
facet_wrap(~var, ncol =3, scales = "free_x", drop = TRUE) +
theme(
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
)
Result:
The loss of x-axis labels is probably not too bad here as you still have the colours to go on (and the x-axis is confusing anyway since it's not consistent across facets).
Related
I have the following data:
unigrams Freq
1 the 236133
2 to 154296
3 and 128165
4 a 127434
5 i 124599
6 of 103380
7 in 81985
8 you 69504
9 is 65243
10 for 62425
11 it 60298
12 that 58605
13 on 45935
14 my 45424
15 with 38270
16 this 34799
17 was 33009
18 be 32725
19 have 31728
20 at 30255
and this set of data:
bigrams Freq
1 of the 20707
2 in the 19443
3 for the 11090
4 to the 10939
5 on the 10280
6 to be 9555
7 at the 7184
8 i have 6408
9 and the 6387
10 i was 6143
11 is a 6114
12 and i 5993
13 i am 5843
14 in a 5770
15 it was 5644
16 for a 5343
17 if you 5326
18 it is 5196
19 with the 5092
20 have a 4936
I would like to place two qplots together side-by-side, ncol = 2. I tried the gridExtra library, but it is generating errors that I can't seem to figure out how to correct. Any ideas on how to do this, please?
library(gridExtra)
# The 20 most unigrams in the dataset
ugrams <- as.data.frame(unigrams)
graph.data <- ugrams[order(ugrams$Freq, decreasing = T), ]
graph.data <- graph.data[1:20, ]
p1 <- qplot(unigrams,Freq, data=graph.data,fill=unigrams,geom=c("histogram"))
# The 20 most bigrams in the dataset
bgrams <- as.data.frame(bigrams)
graph.data <- bgrams[order(bgrams$Freq, decreasing = T), ]
graph.data <- graph.data[1:20, ]
p2 <- qplot(bigrams,Freq, data=graph.data,fill=bigrams,geom=c("histogram"))
grid.arrange(p1,p2,ncol=2)
This is the error that is generated:
<error/rlang_error>
stat_bin() can only have an x or y aesthetic.
Backtrace:
1. (function (x, ...) ...
2. ggplot2:::print.ggplot(x)
4. ggplot2:::ggplot_build.ggplot(x)
5. ggplot2:::by_layer(function(l, d) l$compute_statistic(d, layout))
6. ggplot2:::f(l = layers[[i]], d = data[[i]])
7. l$compute_statistic(d, layout)
8. ggplot2:::f(..., self = self)
9. self$stat$setup_params(data, self$stat_params)
10. ggplot2:::f(...)
I would like to have the graphs resemble this one:
Which was accomplished by the following code:
# The 20 most quadgrams in the dataset
qgrams <- as.data.frame(quadgrams)
graph.data <- qgrams[order(qgrams$Freq, decreasing = T), ]
graph.data <- graph.data[1:20, ]
ggplot(data=graph.data, aes(x=quadgrams, y=Freq, fill=quadgrams)) + geom_bar(stat="identity") +
theme(axis.text.x = element_text(angle = 40, hjust = 1))
Is that possible
Edited for your shift from histograms to bar plots. Assuming that graph.data is actually your ugrams dataset, the working single plot is
Putting them side-by-side can be done with facets:
dplyr::bind_rows(
unigrams = select(ugrams, grams = unigrams, Freq),
bigrams = select(bigrams, grams = bigrams, Freq),
.id = "id") %>%
arrange(-Freq) %>%
mutate(
id = factor(id, levels = c("unigrams", "bigrams")),
grams = factor(grams, levels = grams)
) %>%
ggplot(aes(x = grams, y = Freq, fill = grams)) +
facet_wrap(~ id, ncol = 2, scales = "free_x") +
geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 40, hjust = 1))
(Obviously, these are "too small" to hold all of the legend, but that depends on where you are using it. I wonder if the legend shouldn't be included, since it is somewhat redundant with the x-axis labels.)
The y-axis on the left is harder to see because it is dwarfed by the unigrams on the right. While it does bias the plot (it might be natural to compare the vertical levels of the plot on the left with those on the right), you can alleviate that by freeing both the "x" (already free) and "y" axes with scales="free":
I have prepare a dataframe and use a ggplot on him. But the initial order is not respected. How i can respect this order ?
Patient Nb_peptides Type_affinite
1 22 563 a
2 22 1040 b
3 22 11139 c
4 24 489 a
5 24 1120 b
6 24 11779 c
7 13 467 a
8 13 1239 b
9 13 14600 c
g_plot <- ggplot(data = nb_peptides_type,
aes(x = reorder(Patient, True_order),
y = Nb_peptides,
fill = Type_affinite)) +
geom_bar(stat = "identity")
print(g_plot)
Please provide stand-alone code to make it easier.
I would use levels outside of your plot to reorder factor levels : Is it what you're looking for ?
## fake dataframe
df <- data.frame(patient = as.factor(rep((21:30), each=3)),
nb = rpois(30, 1000),
type=sample(letters[1:3], replace =T, size =30))
## initial plot
ggplot(data = df,
aes(x = patient,
y = nb,
fill = type)) +
geom_bar(stat = "identity")
## adjust factors levels
True_order <- sample((21:30), 10)
levels(df$patient) <- True_order
## re-plot
ggplot(data = df,
aes(x = patient,
y = nb,
fill = type)) +
geom_bar(stat = "identity")
I am plotting a simple panel of data with ggplot2. Observations from the same individual (region) are from two different waves, and I want to plot my graph ordering individuals by the value of only one of the waves. However, ggplot by default orders by the mean value of both waves. Here's a basic sample of the data.
data <- read.table(text = "
ID Country time Theil0
1 AT1 2004 0.10358155
2 AT2 2004 0.08181044
3 AT3 2004 0.08238252
4 BE1 2004 0.14754138
5 BE2 2004 0.07205898
6 BE3 2004 0.09522730
7 AT1 2010 0.10901556
8 AT2 2010 0.09593889
9 AT3 2010 0.07579683
10 BE1 2010 0.16500438
11 BE2 2010 0.08313131
12 BE3 2010 0.10281853
", sep = "", header = TRUE)
And here's the code for the plot:
library(ggplot2)
pd <- position_dodge(0.4)
ggplot(data, aes(x=reorder(Country, Theil0), y=Theil0, colour = as.factor(time))) +
geom_point(size=3, position = pd)+
xlab("Region") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
ylab("Index") +
ggtitle("2004 and 2010")
And the resulting plot:
As you can see, ordering by the values of 2010 only (and not the average of both years) would make the BE2 and AT3 observations switch order, which is what I would prefer in the graph. Thank you for any help on this.
I created a reproducible example that uses generic xs and ys. Basically, you need to use the ordered function on your factor:
x <- letters[1:4]
y1 <- 1:4
y2 <- c(1, 4, 2, 5) + 1
library(ggplot2)
library(reshape2) # used to melt the dummy dataset
df <- data.frame(x = x, y1 = y1, y2 = y2)
df2 <- melt(df, id.vars = "x", variable.name = "Group", value.name = "y")
df2$Group <- factor(df2$Group)
gg1 <- ggplot(data = df2, aes( x = x, y = y, color = Group)) +
geom_point()
ggsave("eample1.jpg", gg1, width = 3, height = 3)
Gives a plot similar to what you had:
However, x may be reorder:
df2$x2 <- ordered(df2$x, x[order(y2)])
gg2 <- ggplot(data = df2, aes( x = x2, y = y, color = Group)) +
geom_point()
ggsave("eample2.jpg", gg2, width = 3, height = 3)
which gives this figure:
Also, I get tripped up on this a lot. I find adjusting levels in ggplot2 to be trick at times.
I have a data frame which I generated using the following piece of code,
x <- c(1:10)
y <- x^3
z <- y-20
s <- z/3
t <- s*6
q <- s*y
x1 <- cbind(x,y,z,s,t,q)
x1 <- data.frame(x1)
The data frame x1 thus has the following data,
x y z s t q
1 1 1 -19 -6.333333 -38 -6.333333
2 2 8 -12 -4.000000 -24 -32.000000
3 3 27 7 2.333333 14 63.000000
4 4 64 44 14.666667 88 938.666667
5 5 125 105 35.000000 210 4375.000000
6 6 216 196 65.333333 392 14112.000000
7 7 343 323 107.666667 646 36929.666667
8 8 512 492 164.000000 984 83968.000000
9 9 729 709 236.333333 1418 172287.000000
10 10 1000 980 326.666667 1960 326666.666667
Now I want to plot columns x vs y, z vs s and t vs q in the same plot, so for this I use the following code,
p <- ggplot() +
geom_line(data = x1, aes(x = x1[,1], y = x1[,2], color = "red")) +
geom_line(data = x1, aes(x = x1[,3], y = x1[,4], color = "blue")) +
geom_line(data = x1, aes(x = x1[,5], y = x1[,6], color = "green")) +
xlab('x') +
ylab('y')
While the above piece of code works fine for a data frame of just 6 columns, I would like to perform the same operation for a data frame with many number of columns. For example if there are 20 columns in a data frame, there should be one single plot generated containing plot of col 1 vs 2, col 3 vs 4, col 5 vs 6 and so on until col 19 vs 20. To do this I use this following piece of code,
p <- ggplot() + geom_line(data = x1, aes(x = x1[,1], y = x1[,2], color = "red")) + xlab('x') + ylab('y')
ctr <- 1
for (iz in seq(3, ncol(x1), by = 2))
{
p$ctr <- p + geom_line(data = x1, aes(x = x1[,iz], y = x1[,iz+1], color = "green"))
ctr <- ctr+1
}
So the plots should be layered incrementally and the last object should contain the entire plot. Using the above code the plot gets overwritten every time when the loop runs, could some one point out how to capture the full data. I would like to display a legend for each of the plot as well.
Thanks
You don't need a loop if you put your data into the right format. You can create a long data frame based on your original data frame.
x1_long <- data.frame(x = unlist(x1[c(TRUE, FALSE)]),
y = unlist(x1[c(FALSE, TRUE)]),
ind = gl(ncol(x1) / 2, nrow(x1)))
Now, a single geom_line command is sufficient:
library(ggplot2)
ggplot(x1_long) +
geom_line(aes(x = x, y = y, colour = ind))
(Note. The red line is plotted too but its values are quite small.)
How about this?
ggplot() +
lapply(seq(1,ncol(x1),2), # every second col index
function(x){ # return the geom_line calls in a list
geom_line(aes_string(x=x1[x], # remember to use aes_string for x
y=x1[x+1]), # and y
color=factor(x), # then color
size=2) # and size
}) +
xlab('x') + ylab('y')
I created a stacked bar plot which depicts the distribution of council seats (= y-axis) among parties within a municipality over several years (= x-axis). The used code and some data is below. Unfortunately, I don't have yet sufficient points to post the graph.
The different parties are also associated with a variable called "ideology" as a category for different political orientations ("progressive", "moderate", "conservative").
I would like to modify the colors in such a way that all e.g. conservative parties have different kinds of blues; all progressive parties different kinds of green; and all moderate parties e.g. different kinds of red;
The variable on the ideology is in the same dataframe (y).
Any hint how to get this modification? I already tried color=factor(ideology) and group=ideology but to no avail. I am also aware of this related entry Using a pre-defined color palette in ggplot however it doesn't pertain specifically to my problem.
Many thanks.
Used command:
municipality.plot <- ggplot(y, aes(x=as.factor(year), y=seats, fill=party, color=party)) +
geom_bar(bandwidth=1, stat="identity", group="party", position="fill") +
labs(x="year", y="% of seats for municipality")
Sample data:
year district.id party seats ideology
1 2012 127 Stranka Pravde I Razvoja Bosne I Hercegovine 1 p
2 2012 127 Savez Za Bolju Buducnost (SBB) 3 p
3 2008 127 Stranka Demokratske Akcije (SDA) 13 p
4 2004 127 Stranka Demokratske Akcije (SDA) 14 p
5 2008 127 Hrvatska Demokratska Zajednica (HDZ) 1 c
6 2008 127 Stranka Pravde I Razvoja Bosne I Hercegovine 1 p
7 2012 127 Stranka Za Bosnu I Hercegovinu (SzBiH) 4 p
8 2000 127 Socijaldemokratska Partija (SDP) 8 m
9 2012 127 Narodna Stranka Radom Za Boljitak (NSRzB) 2 m
10 2012 127 Socijaldemokratska Unija Bih (SDU) 1 p
11 2000 127 Koalicija - SDA, SBiH 15 p
12 2008 127 Socijaldemokratska Partija (SDP) 5 m
13 2008 127 Narodna Stranka Radom Za Boljitak (NSRzB) 1 m
14 2008 127 Koalicija - LDS, SDU 2 m
15 2000 127 Lgk-liberalno-gradanska Koalicija Bih (liberali Bih, Gds Bih) 1 m
16 2000 127 Nova Hrvatska Inicijativa (NHI) 1 c
17 1997 127 Socijaldemokratska Partija (SDP) 3 m
18 2012 127 Socijaldemokratska Partija (SDP) 6 m
19 2004 127 Stranka Za Bosnu I Hercegovinu (SzBiH) 5 p
20 1997 127 Bosanskohercegovacka Patriotska Stranka (BPS) 9 p
21 2000 127 Bosanskohercegovacka Patriotska Stranka (BPS) 3 p
22 2008 127 Stranka Za Bosnu I Hercegovinu (SzBiH) 4 p
23 1997 127 Hrvatska Demokratska Zajednica (HDZ) 5 c
24 2000 127 Hrvatska Demokratska Zajednica (HDZ) 2 c
25 2012 127 Stranka Demokratske Akcije (SDA) 10 p
26 2004 127 Socijaldemokratska Partija (SDP) 6 m
27 1997 127 Koalicija - SDA, SBiH, Liberali, GDS 13 p
# load relevant packages
library(scales)
library(grid)
library(ggplot2)
library(plyr)
# assume your data is called df
# order data by year, ideology and party
df2 <- arrange(df, year, ideology, party)
########################################
# create one colour palette per ideology
# count number of parties per ideology
tt <- with(df2[!duplicated(df2$party), ], table(ideology))
# conservative parties blues
# progressive parties green
# moderate parties red
blue <- brewer_pal(pal = "Blues")(tt[names(tt) == "c"])
green <- brewer_pal(pal = "Greens")(tt[names(tt) == "p"])
red <- brewer_pal(pal = "Reds")(tt[names(tt) == "m"])
# create data on party and ideology
party_df <- df2[!duplicated(df2$party), c("party", "ideology")]
# set levels of ideologies; c, p, m
party_df$ideology <- factor(party_df$ideology, levels = c("c", "p", "m"))
# order by ideology and party
party_df <- arrange(party_df, ideology, party)
# add fill colours
party_df$fill <- c(blue, green, red)
# set levels of parties based on the order of parties in party_df
party_df$party <- factor(party_df$party, levels = party_df$party)
# use same factor levels for parties in df2
df2$party <- factor(df2$party, levels = party_df$party)
##################################
# Alternative 1. Plot with one legend
g1 <- ggplot(data = df2, aes(x = as.factor(year),
y = seats,
fill = party)) +
geom_bar(stat = "identity", position = "fill") +
labs(x = "year", y = "% of seats for municipality") +
coord_cartesian(ylim = c(0, 1)) +
scale_fill_manual(values = party_df$fill, name = "Parties") +
theme_classic()
g1
#####################################3
# alt 2. Plot with separate legends for each ideology
# create separate plots for each ideology to get legends
# conservative parties blue
cons <- ggplot(data = df2[df2$ideology == "c", ],
aes(x = as.factor(year),
y = seats,
fill = party)) +
geom_bar(stat = "identity", position = "fill") +
scale_fill_manual(values = blue, name = "Conservative parties" )
# extract 'cons' legend
tmp <- ggplot_gtable(ggplot_build(cons))
leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
legend_cons <- tmp$grobs[[leg]]
# progressive parties green
prog <- ggplot(data = df2[df2$ideology == "p", ],
aes(x = as.factor(year),
y = seats,
fill = party)) +
geom_bar(stat = "identity", position = "fill") +
scale_fill_manual(values = green, name = "Progressive parties" )
# extract 'prog' legend
tmp <- ggplot_gtable(ggplot_build(prog))
leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
legend_prog <- tmp$grobs[[leg]]
# moderate parties red
mod <- ggplot(data = df2[df2$ideology == "m", ],
aes(x = as.factor(year),
y = seats,
fill = party)) +
geom_bar(stat = "identity", position = "fill") +
scale_fill_manual(values = red, name = "Moderate parties" )
# extract 'mod' legend
tmp <- ggplot_gtable(ggplot_build(mod))
leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
legend_mod <- tmp$grobs[[leg]]
#######################################
# arrange plot and legends
# define plotting regions (viewports) for plot and legends
vp_plot <- viewport(x = 0.25, y = 0.5,
width = 0.5, height = 1)
vp_legend_cons <- viewport(x = 0.66, y = 0.87,
width = 0.5, height = 0.15)
vp_legend_prog <- viewport(x = 0.7, y = 0.55,
width = 0.5, height = 0.60)
vp_legend_mod <- viewport(x = 0.75, y = 0.2,
width = 0.5, height = 0.30)
# clear current device
grid.newpage()
# add objects to the viewports
# plot without legend
print(g1 + theme(legend.position = "none"), vp = vp_plot)
upViewport(0)
# legends
pushViewport(vp_legend_cons)
grid.draw(legend_cons)
upViewport(0)
pushViewport(vp_legend_prog)
grid.draw(legend_prog)
upViewport(0)
pushViewport(vp_legend_mod)
grid.draw(legend_mod)