Maintaining a scale of a histogram when using ..ncount - r

Using the code below, I'm generating a set of simple histogram:
data(mtcars); Vectorize(require)(package = c("ggplot2", "ggthemes", "dplyr"))
mtcars %>%
add_rownames(var = "model") %>%
gather(var, value, -model, -am) %>%
filter(var %in% c("hp")) %>%
# Define chart
ggplot(aes(value)) +
geom_histogram(aes(y = ..ncount..), colour = "black", fill = "gray58",
binwidth = 15) +
geom_density(aes(y = ..scaled..), colour = "red") +
facet_wrap( ~am, ncol = 3, scales = "free")
I would like to maintain scale that is produced when generating a histogram without the ..ncount.. special variable, as in the example:
mtcars %>%
add_rownames(var = "model") %>%
gather(var, value, -model, -am) %>%
filter(var %in% c("hp")) %>%
# Define chart
ggplot(aes(value)) +
geom_histogram(colour = "black", fill = "gray58",
binwidth = 15) +
geom_density(aes(y = ..scaled..), colour = "red") +
facet_wrap( ~am, ncol = 3, scales = "free")
But it makes the geom_density look poor.
Task
So what I want boils down to:
keep scale of y axis from the second one
keep graphics from the first one

Related

Placing data labels for stacked bar chart at top of bar

I have been attempting to add a label on top of each bar to represent the proportion that each ethnic group makes up in referrals.
For some reason I cannot get the labels to be placed at the top of each bar. How do I fix this?
My code below
freq <- df %>%
group_by(ethnicity) %>%
summarise(n = n()) %>%
mutate(f = round((n/sum(n)*100, 1))
df %>%
group_by(pathway) %>%
count(ethnicity) %>%
ggplot(aes(x = ethnicity, y = n , fill = pathway)) +
geom_bar(stat = "identity", position = "stack") +
geom_text(data = freq,
aes(x= ethnicity, y = f, label = f),
inherit.aes = FALSE) +
theme(legend.position = "bottom") +
scale_fill_manual(name = "",
values = c("light blue", "deepskyblue4"),
labels = "a", "b") +
xlab("") +
ylab("Number of Referrals") +
scale_y_continuous(breaks = seq(0, 2250, 250), expand = c(0,0)
Here is what it currently looks like
Since you are using the count as your y-axis position in geom_bar, you need to use the same thing in your geom_text to get the labels in the right place. Below is an example using mtcars dataset. Using vjust = -1 I put a little bit of space between the label and the bars to make it more legible and aesthetically pleasing.
library(tidyverse)
mtcars %>%
group_by(carb) %>%
summarise(n = n()) %>%
mutate(f = round(proportions(n) * 100, 1)) -> frq
mtcars %>%
group_by(gear) %>%
count(carb) -> df
df %>%
ggplot(aes(x = carb, y = n, fill = gear)) +
geom_bar(stat = "identity", position = "stack") +
geom_text(data = frq,
vjust = -1,
aes(x= carb, y = n, label = f),
inherit.aes = FALSE)
Created on 2022-10-31 by the reprex package (v2.0.1)

Order grouped scatterplot by mean

I am plotting a geom_point for several groups (Loc) and want in addition a line that indicates the mean of the points for each group. The groups should be ordered based on the mean of the Size for each group. I am trying to do this by reorder(Loc, Size.Mean) but it does not reorder.
ggplot(data,aes(Loc,Size,color=Loc)) +
geom_point() +
geom_point(data %>%
group_by(Loc) %>%
summarise(Size.Mean = mean(Size)),
mapping = aes(y = Size.Mean, x = reorder(Loc, Size.Mean)),
color = "black", shape = '-') +
theme_pubr(base_size=8) +
scale_y_continuous(trans="log10") +
theme(axis.text.x = element_text(angle = 90,hjust = 1)) +
theme(legend.position = "none")
ggplot orders discrete x ticks according to their level if the variable is a factor:
library(tidyverse)
iris_means <-
iris %>%
group_by(Species) %>%
summarise(mean = mean(Sepal.Length)) %>%
arrange(-mean)
iris %>%
mutate(Species = Species %>% factor(levels = iris_means$Species)) %>%
ggplot(aes(Species, Sepal.Length)) +
geom_point() +
geom_crossbar(data = iris_means, mapping = aes(y = mean, ymin = mean, ymax = mean), color = "red")
Created on 2021-09-10 by the reprex package (v2.0.1)

R, ggplot2, limit rows in a faceted barplot

Sup,
Consider the following lines:
data
df=data.frame(
prod=sample(1:30, 1000, replace=TRUE),
mat=sample(c('yes', 'no'), 1000, replace=TRUE),
fj=sample(c(1,2), 1000, replace = TRUE)
)
plot
df %>%
group_by(mat, prod, fj) %>%
summarise(n = n()) %>%
arrange(desc(n)) %>%
slice(1:5) %>%
ggplot(aes(x = reorder(prod, n), y = n)) +
geom_col(fill = RColorBrewer::brewer.pal(3, 'Dark2')[2], colour = "grey", alpha = 0.8) +
labs(x = "Prod", y = "Qnt") +
scale_y_continuous(labels = scales::comma) +
coord_flip() +
facet_wrap(fj ~ mat, scale="free") +
theme_minimal()
which gives me
Now, if i drop fj variable, as in
df %>%
group_by(mat, prod) %>%
summarise(n = n()) %>%
arrange(desc(n)) %>%
slice(1:5) %>%
ggplot(aes(x = reorder(prod, n), y = n)) +
geom_col(fill = RColorBrewer::brewer.pal(3, 'Dark2')[2], colour = "grey", alpha = 0.8) +
labs(x = "Prod", y = "Qnt") +
scale_y_continuous(labels = scales::comma) +
coord_flip() +
facet_wrap(~ mat, scale="free") +
theme_minimal()
slice(1:5) does it's job and i've got:
Question
why slice and reorder doesn't seems to work properly when there's 3+ variables and what should i do to limit the first plot to 5 lines each?
When you call summarize you loose one level of grouping. In this case, you lost fj, so when you slice it's not included in the group divisions.
If you first ungroup then group_by mat and fj, I think you'll end up with what you are looking for.
df %>%
group_by(mat, prod, fj) %>%
summarise(n = n()) %>%
ungroup()%>%
group_by(mat, fj) %>%
arrange(desc(n)) %>%
slice(1:5) %>%
ggplot(aes(x = reorder(prod, n), y = n)) +
geom_col(fill = RColorBrewer::brewer.pal(3, 'Dark2')[2], colour = "grey", alpha = 0.8) +
labs(x = "Prod", y = "Qnt") +
scale_y_continuous(labels = scales::comma) +
coord_flip() +
facet_wrap(fj ~ mat, scale="free") +
theme_minimal()
This leaves the problem of reordering the prod variable within each facet. It doesn't work in the example above because you are ordering by the entire data frame, and some of the values of Prod are repeated in several of the facets. As discussed in this blog post by #drsimonj you need to create an order variable and plot based on that. This follows/blatently copies the method outlined in the blog post.
df %>%
group_by(mat, prod, fj) %>%
summarise(n = n()) %>%
group_by(mat, fj) %>%
arrange(desc(n)) %>%
slice(1:5) %>%
ungroup() %>%
arrange(fj,mat, n) %>% # arrange the entire table by the facets first, then by the n value
mutate(row.order = row_number()) %>% # create dummy variable
ggplot(aes(x = row.order, y = n)) + # plot by the dummy variable
geom_col(fill = RColorBrewer::brewer.pal(3, 'Dark2')[2], colour = "grey", alpha = 0.8, position = "dodge") +
labs(x = "Prod", y = "Qnt") +
scale_y_continuous(labels = scales::comma) +
scale_x_continuous( # add back in the Prod values
breaks = df2$row.order,
labels = df2$prod
)+
coord_flip() +
facet_wrap(fj ~ mat, scales = "free") +
theme_minimal()

naming facets in facet_wrap

I am having issues trying to name a set of plots created with the facet_wrap feature. I am specifically trying to wrap the titles onto multiple lines. I have looked into this issue extensively within stack overflow and cannot find the error that I am generating. The code is below. a2$variable is a column of character strings (for grouping purposes), a2$ma_3 and a2$ma_12 are moving averages that I am trying to plot. The error that is generated is:
Error in as.integer(n) :
cannot coerce type 'closure' to vector of type 'integer'
p1=a2 %>%
ggplot(aes(x = date, color = variable)) +
geom_line(aes(y = ma_12), color = "aquamarine3", alpha = 0.5,size=.7) +
geom_line(aes(y = ma_3), color = "gray40", alpha = 0.5,size=.7) +
facet_wrap(~ variable, ncol = 3, scale = "free_y",label_wrap_gen(width=10))
Thanks in advance.
You're close. To modify the facet_wrap labels, we use the labeller argument:
library(tibble)
library(ggplot2)
mtcars %>%
rownames_to_column() %>%
head() %>%
ggplot(aes(x = mpg, color = cly)) +
geom_point(aes(y = wt), color = "aquamarine3", alpha = 0.5,size=5) +
geom_point(aes(y = qsec), color = "gray40", alpha = 0.5,size=5) +
facet_wrap(~ rowname, ncol = 3, scale = "free_y",
labeller = label_wrap_gen(width = 10))
Output:
I'd suggest formatting the variable before you send it to ggplot, like this:
library(tidyverse)
mtcars %>%
rownames_to_column() %>%
head() %>%
mutate(carname = stringr::str_wrap(rowname, 10)) %>%
ggplot(aes(x = mpg, color = cly)) +
geom_point(aes(y = wt), color = "aquamarine3", alpha = 0.5,size=5) +
geom_point(aes(y = qsec), color = "gray40", alpha = 0.5,size=5) +
facet_wrap(~ carname, ncol = 3, scale = "free_y")

R ggplot2: Adding another geom to coord_polar

I have a plot i wish to add another layer to
Th plot is below. I want to overlay another polar plot on it to see that the numbers "match up"
In the example below I have create the plot for one species of the iris dataset. I would like to overlay another plot of a different species
Thank you for your time
library(ggplot2)
library(dplyr)
mydf <- iris
plot.data <- tidyr::gather(mydf,key = attribute ,value = avg_score, Sepal.Length:Petal.Width)
plot.data <- plot.data %>%
filter(Species == 'setosa') %>%
group_by(attribute) %>%
summarise(attr_mean = mean(avg_score))
ggplot(plot.data, aes(x=attribute, y = attr_mean, col = attribute)) +
geom_bar(stat = "identity", fill = 'white') +
coord_polar(theta = "x") +
theme_bw()
This is quite the pedestrian way of doing things.
plot.setosa <- plot.data %>%
filter(Species == 'setosa') %>%
group_by(attribute) %>%
summarise(attr_mean = mean(avg_score))
plot.virginica <- plot.data %>%
filter(Species == 'virginica') %>%
group_by(attribute) %>%
summarise(attr_mean = mean(avg_score))
ggplot(plot.setosa, aes(x=attribute, y = attr_mean, col = attribute)) +
geom_bar(stat = "identity", fill = 'blue', alpha = 0.25) +
geom_bar(data = plot.virginica, stat = "identity", fill= "green", alpha = 0.25,
aes(x = attribute, y = attr_mean, col = attribute)) +
coord_polar(theta = "x") +
theme_bw()
And a slightly less pedestrian.
xy <- plot.data %>%
group_by(Species, attribute) %>%
summarise(attr_mean = mean(avg_score))
ggplot(xy, aes(x = attribute, y = attr_mean, color = attribute, fill = Species)) +
theme_bw() +
geom_bar(stat = "identity", alpha = 0.25) +
coord_polar(theta = "x")

Resources