I have the following workflow:
rm(list=ls())
data(mtcars)
attach(mtcars)
library(ggplot2)
library(plyr)
library(dplyr)
library(scales)
library(reshape2)
library(lazyeval)
my_func <- function(x, y) {
test<<-mtcars %>% group_by_(x, y) %>%
summarise(Freq = n()) %>%
mutate(Freq = Freq/sum(Freq))
test
}
my_func('gear', 'cyl')
ggplot(test, aes(x=gear, y=Freq))+
geom_bar(stat="identity", aes(fill=cyl), position=position_dodge(width=0.1))+
scale_y_continuous(labels=percent_format(), limits = c(0,1))
However, the resulting plot does not show the bars next to one another, but rather some on top of one another. What gives and how do I fix this?
You need to convert the cyl column to factor.
test$cyl <- as.factor(test$cyl)
ggplot(test, aes(x=gear, y=Freq))+
geom_bar(stat="identity", aes(fill=cyl), position=position_dodge(width=1))+
scale_y_continuous(labels=percent_format(), limits = c(0,1))
Related
How to set this plot in ascending order? many thanks in advance.
library(ggplot2)
library(reshape2)
iris2 <- melt(iris, id.vars="Species"); iris2
ggplot(data=iris2, aes(x=Species, y=value, fill=variable))+
geom_bar(stat="identity", position="dodge")
You can use reorder to set the bars in ascending overall order :
iris2$variable <- reorder(iris2$variable, iris2$value)
ggplot(data=iris2, aes(x=Species, y=value, fill=variable))+
geom_bar(stat="identity", position="dodge")
Notice though that the ordering is the same for all 3 groups, which means that setosa has one bar "out of place".
It is possible, but a lot trickier, to get the bars in ascending order for every species.
library(tidyverse)
iris2 %>%
group_by(variable, Species) %>%
summarise(value = max(value)) %>%
mutate(xval = as.numeric(as.factor(Species))) %>%
group_by(Species) %>%
mutate(xval = 0.2 * order(value) - 0.5 + xval) %>%
ggplot(aes(x=xval, y=value, fill=variable))+
geom_col(position="dodge", width = 0.2) +
scale_x_continuous(breaks = 1:3, labels = unique(iris2$Species),
name = "Species")
I want to arrange N ggplot (each one is facetted) on a grid with grid.arrange.
library(tidyverse)
library(ggplot2)
library(gridExtra)
plots <- lapply(unique(mtcars$cyl), function(cyl) {
data <- mtcars %>% filter(cyl == cyl)
ggplot(data, aes(x=mpg, y=hp))+
geom_point(color = "blue")+
facet_wrap(.~carb)}) %>%
do.call(grid.arrange, .)
do.call(grid.arrange, plots )
The problem is that all the plots are based on the entire dataset and they render the same plot, while they shuold be different as I filter them in line
data <- mtcars %>% filter(cyl == cyl).
filter deals with cyl too letteral and treated as a string, therefore cyl==cyl is TRUE for the entire dataset. You can solve this by unquote cyl using !! or use another variable name in the function e.g. x.
#Option 1
data <- mtcars %>% filter(cyl == !!cyl)
#Option 2
... function(x) {
data <- mtcars %>% filter(cyl == x)
...
Here is a tidyverse approach
library(tidyverse)
group_plots <- mtcars %>%
group_split(cyl) %>%
map(~ggplot(., aes(x = mpg, y = hp))+
geom_point(color = "blue") +
facet_wrap(.~carb))
do.call(gridExtra::grid.arrange, group_plots)
Try use split() first:
library(tidyverse)
library(gridExtra)
l <- split(mtcars, mtcars$cyl) # divide based on cyl in a list
plots <- lapply(l, function(x) {
ggplot(x, aes(x=mpg, y=hp)) +
geom_point(color = "blue") +
facet_wrap(.~carb)
}) # call lapply() on each element
do.call(grid.arrange, plots)
I'm trying to loop through every column of the iris data set and plot a histogram in ggplot. So I'm expecting 5 different histograms to appear. However, my for loop below returns nothing. How can I fix this?
library(ggplot2)
for (i in colnames(iris)){
ggplot(iris, aes(x = i))+
geom_histogram()
}
Instead of using a for loop, the tidyverse/ggplot way would be to reshape the data from wide to long and then plot using facet_wrap
library(tidyverse)
iris %>%
gather(key, val, -Species) %>%
ggplot(aes(val)) +
geom_histogram(bins = 30) +
facet_wrap(~key, scales = "free_x")
Using dplyr, tidyr and ggplot:
library(ggplot2)
library(dplyr)
library(tidyr)
iris %>%
gather(Mesure, Value, -Species) %>%
ggplot(aes(x=Value)) + geom_histogram() + facet_grid(rows=vars(Species), cols=vars(Mesure))
Result:
How can I make the panels of separate ggplots align when the y-axis labels change in length across plots? Below I've saved two subsets of mtcars with longer and shorter model names. Although the overall plots are the same size, the panels are smaller in the mt_long plot because the y-axis labels take up more of the plot.
library(dplyr)
library(ggplot2)
ds_mt <- mtcars %>% rownames_to_column("model")
mt_short <- ds_mt %>% arrange(nchar(model)) %>% slice(1:4)
mt_long <- ds_mt %>% arrange(-nchar(model)) %>% slice(1:4)
plot_short <-
mt_short %>%
ggplot(aes(x = model, y = mpg)) +
geom_col() +
coord_flip()
plot_long <-
mt_long %>%
ggplot(aes(x = model, y = mpg)) +
geom_col() +
coord_flip()
plot_short
plot_long
For this reprex, it is important that the plots be separate. Is there any way to set just the panel dimensions of the plot rather than the overall size of the plot?
We can use gridarrange from the egg package
library(egg)
ggarrange(plot_short, plot_long, ncol = 1)
To save, use
gg <- ggarrange(plot_short, plot_long, ncol = 1)
ggsave("file.png", gg)
try egg::set_panel_size(plot_short)
I intend to plot every categorical column in the dataframe in a descending order depends on the frequency of levels in a variable.
I have already found out how to plot every column and reorder the levels, but I cannot figure out how to combine them together. Could you please give me some suggestions?
Code for plot every column:
require(purrr)
library(tidyr)
library(ggplot2)
diamonds %>%
keep(is.factor) %>%
gather() %>%
ggplot(aes(value)) +
facet_wrap(~ key, scales = "free") +
geom_bar()
Code for reorder the levels of one variable:
tb <- table(x)
factor(x, levels = names(tb[order(tb, decreasing = TRUE)]))
BTW, if you feel there is a better way writing these codes, please let me know.
Thanks.
Alternative 1
No need to use gridExtra to emulate facet_wrap, just include the function reorder_size inside aes:
reorder_size <- function(x) {
factor(x, levels = names(sort(table(x), decreasing = TRUE)))
}
diamonds %>%
keep(is.factor) %>%
gather() %>%
ggplot(aes(x = reorder_size(value))) +
facet_wrap(~ key, scales = "free") +
geom_bar()
Alternative 2
Using dplyrto calculate the count grouping by key and value. Then we reorder the value in descending order by count inside aes.
library(dplyr)
diamonds %>%
keep(is.factor) %>%
gather() %>%
group_by(key,value) %>%
summarise(n = n()) %>%
ggplot(aes(x = reorder(value, -n), y = n)) +
facet_wrap(~ key, scales = "free") +
geom_bar(stat='identity')
Output
The problem with your approach is that the long form of your data-frame will introduce a lot of factors that would be plotted as 0 for the geom_bar().
Instead of relying on facet_wrap and dealing with the long data-form, here's an alternative.
Reordering by size function:
reorder_size <- function(x) {
factor(x, levels = names(sort(table(x), decreasing=T)))
}
Using gridExtra::grid.arrange function to deliver similar facet_wrap style figure:
library(gridExtra)
a <- ggplot(diamonds, aes(x=reorder_size(cut))) + geom_bar()
b <- ggplot(diamonds, aes(x=reorder_size(color))) + geom_bar()
c <- ggplot(diamonds, aes(x=reorder_size(clarity))) + geom_bar()
grid.arrange(a,b,c, nrow=1)