Selecting using geom_bar - r

I am using ggplot2 and geom_bar to summarize the number of spices in a column. I want the number of bars to be restricted to a value, say count > 10.
My code is:
ggplot(r_df, aes(spice, colour="black", fill = "orange")) +
geom_bar(stat = "count") +
stat_count(geom = "text", colour = "black", size = 3.,
aes(label = after_stat(count)), position=position_stack(vjust=1.05)) +
theme(axis.text.x = element_text(angle=90, vjust=.6))
Since the code is counting the number of each item for me, I don't think I can subset prior to executing the plot. I am hoping there is a simple way to put the count>10 into the ggplot function itself.

Since I don't have the exact data in r_df, I'm taking a guess here. You should be able to use filter() prior to building your plot:
library(tidyverse)
r_df %>%
group_by(spice) %>%
filter(n() > 10) %>%
ungroup() %>%
ggplot(aes(spice, colour = "black", fill = "orange")) +
geom_bar(stat = "count") +
stat_count(
geom = "text",
colour = "black",
size = 3.,
aes(label = after_stat(count)),
position = position_stack(vjust = 1.05)
) +
theme(axis.text.x = element_text(angle = 90, vjust = .6))

Related

Show alpha/opacity in ggplot legend

I have two dodge bar chart which I have put on top of each other to create this plot.
Groups <- c(1, 2,1,2,1,2)
variable <- c("Yes", "Yes", "Maybe", "Maybe", "No", "No")
value <- c(50,60,70,80,90,100)
df <- data.frame(Groups, variable, value)
Groups <- c(1, 2,1,2,1,2)
variable <- c("Yes*", "Yes*", "Maybe*", "Maybe*", "No*", "No*")
value <- c(5,6,7,8,9,10)
df2 <- data.frame(Groups, variable, value)
ggplot() +
geom_bar(data=df, aes(x=Groups, y=value, fill=variable),
stat="identity", position=position_dodge(), alpha=0.2)+
geom_bar(data=df2, aes(x=Groups, y=value, fill=variable),
stat="identity", position=position_dodge())
I would like for the opacity/alpha from the plot behind to show in the legend.
I have tried +guides(colour = guide_legend(override.aes = list(alpha = 0.2))) but this does not work.
Thanks.
You can use a little trick with after_scale to achieve this. Map fill to the variable in one geom layer, and map color to variable in the second layer, but set the second layer's fill to after_scale(color). This will give you two legends, one for the first data set with its transparency, and the other for the second set, fully opaque. You can call these whatever you like using labs
ggplot(df, aes(Groups, value)) +
geom_col(aes(fill = variable), position = 'dodge', alpha = 0.2) +
geom_col(data = df2, aes(color = variable, fill = after_scale(color)),
position = 'dodge') +
labs(color = 'df2', fill = 'df1')
If you want everything in a single legend, you can do this in a couple of ways, for example, using a manual fill value with 6 colors, 3 of which have alpha set, or just merging the two legends in the method above using theme tweaks:
ggplot(df, aes(Groups, value)) +
geom_col(aes(fill = variable), position = 'dodge', alpha = 0.2) +
geom_col(data = df2, position = 'dodge', size = 0,
aes(color = variable, fill = after_scale(color))) +
labs(color = 'key', fill = NULL) +
guides(fill = guide_legend(order = 2),
color = guide_legend(order = 1,
override.aes = list(color = 'white', size = 0.3))) +
theme(legend.spacing.y = unit(-2, 'mm'),
legend.title = element_text(vjust = 5))

How to avoid overlaying annotations?

I'm trying to plot some annotation in a geom_histogram() ggplot. See image below. These annotations are the count of the histogram for each bin, each group. However, I don't know how to distance the different annotations when the counts are similar. I only know to fix the annotation with vjust or hjust but I wonder if there's a relative way. I don't think an example is necessary. Probably just looking at my code will be easy for someone more experienced.
This is the code I have used:
bind_rows(
RN_df %>% mutate(type='RN'),
RVNM_df %>% mutate(type='RVM')
) %>% group_by(hash) %>%
summarise(n_eps = n(), genre, type) %>%
ggplot(aes(x = n_eps, fill = genre)) +
geom_histogram(binwidth = 1) +
stat_count(aes(y=..count..,label=..count.., colour = genre),geom="text",vjust= -1, hjust = 0.5, size = 3) +
facet_wrap(~type)
This is my output image:
You can use geom_text_repel from ggrepel:
library(ggrepel)
ggplot(df, aes(x = n_eps, fill = genre)) +
geom_histogram(binwidth = 1) +
geom_text_repel(aes(label = ..count..), stat = 'count',
position = position_stack(vjust = 0.5), direction = 'y') +
facet_wrap(~type)

How to add a legend manually for line chart

i need the plan legend
How to add a legend manually for geom_line
ggplot(data = impact_end_Current_yr_m_actual, aes(x = month, y = gender_value)) +
geom_col(aes(fill = gender))+theme_classic()+
geom_line(data = impact_end_Current_yr_m_plan, aes(x=month, y= gender_value, group=1),color="#288D55",size=1.2)+
geom_point(data = impact_end_Current_yr_m_plan, aes(x=month, y=gender_value))+
theme(axis.line.y = element_blank(),axis.ticks = element_blank(),legend.position = "bottom", axis.text.x = element_text(face = "bold", color = "black", size = 10, angle = 0, hjust = 1))+
labs(x="", y="End Beneficiaries (in Num)", fill="")+
scale_fill_manual(values=c("#284a8d", "#00B5CE","#0590eb","#2746c2"))+
scale_y_continuous(labels = function(x) format(x, scientific = FALSE)
The neatest way to do it I think is to add colour = "[label]" into the aes() section of geom_line() then put the manual assigning of a colour into scale_colour_manual() here's an example from mtcars (apologies that it uses stat_summary instead of geom_line but does the same trick):
library(tidyverse)
mtcars %>%
ggplot(aes(gear, mpg, fill = factor(cyl))) +
stat_summary(geom = "bar", fun = mean, position = "dodge") +
stat_summary(geom = "line",
fun = mean,
size = 3,
aes(colour = "Overall mean", group = 1)) +
scale_fill_discrete("") +
scale_colour_manual("", values = "black")
Created on 2020-12-08 by the reprex package (v0.3.0)
The limitation here is that the colour and fill legends are necessarily separate. Removing labels (blank titles in both scale_ calls) doesn't them split them up by legend title.
In your code you would probably want then:
...
ggplot(data = impact_end_Current_yr_m_actual, aes(x = month, y = gender_value)) +
geom_col(aes(fill = gender))+
geom_line(data = impact_end_Current_yr_m_plan,
aes(x=month, y= gender_value, group=1, color="Plan"),
size=1.2)+
scale_color_manual(values = "#288D55") +
...
(but I cant test on your data so not sure if it works)

ggplot2 with side by side and proportional fill

I have data that looks like this:
My goal is to have a barplot grid as follows: Each plot will be specific to 1 race_ethnicity group. The x-axis in each plot will be the different age_bin groups. For each age_bin, there will be two bars: 1 for men, and 1 for women. For each bar, I want it to be filled with the proportion of Likely/(Unlikely + Likely). Preferably, each bar would have a height of 1 and a line cut through it so Likely% of that bar is one color with a label. This is what I currently have:
I am running into issues with 1) using a predefined proportion as the fill, and 2) having two different "fills" (one for biological sex, one for the predefined proportion.
Thanks to anyone who can help with this. My code is currently the following:
ggplot(data=who_votes_data, aes(x=age_bin,y=1, fill=gender)) +
geom_bar(stat='identity',aes(fill = gender), position = position_dodge2()) +
facet_wrap(~race_ethnicity, nrow = 2, scales = "free") +
geom_text(aes(label=paste0(sprintf("%1.1f", prop*100),"%"), y=prop),
colour="white") +
labs(x = expression("Age Group"), y= ("Prortion of Likely Voters"),
title = "Proportion of Likely Voters Across Age Groups, Race/Ethnicity, and Sex",
caption="Figure 1") + theme(plot.caption = element_text(hjust = 0.5, vjust = -0.5, size = 18))
https://docs.google.com/spreadsheets/d/1a7433iwXNSwcuXDJOvqsxNDN6oaYULVlyw22E41JROU/edit?usp=sharing
Updated Code:
library(tidyverse)
library(ggplot2)
df<- read.csv("samplevotes.csv")
df %>%
group_by(race_ethnicity, age_bin, gender) %>%
summarise(Likely = sum(Likely),
Unlikely = sum(Unlikely),
proportion = Likely/(Likely+Unlikely)) %>% ungroup() %>%
ggplot(aes(x = age_bin, y = proportion, fill = gender)) +
geom_bar(stat = "identity", position = "dodge") +
facet_wrap(~race_ethnicity, nrow = 2) +
geom_text(aes(label=paste0(sprintf("%1.1f", proportion*100),"%"), y=proportion), position = position_dodge(width = 1), colour="Black", size = 2.2) +
labs(x = expression("Age Group"), y= ("Proportion of Likely Voters"), title = "Proportion of Likely Voters Across Age Groups, Race/Ethnicity, and Sex", caption="Figure 1") +
theme(plot.caption = element_text(hjust = 0.5, vjust = -0.5, size = 18))
Here is the code I would use. I did make some changes based on the way the data was combined.
df %>%
group_by(race_ethnicity, age_bin, gender) %>%
summarise(Likely = sum(Likely),
Unlikely = sum(Unlikely),
proportion = Likely/(Likely+Unlikely)) %>% ungroup() %>%
ggplot(aes(x = age_bin, y = proportion, fill = gender)) +
geom_bar(stat = "identity", position = "dodge") +
facet_wrap(~race_ethnicity, nrow = 2) +
geom_text(aes(label=paste0(sprintf("%1.1f", proportion*100),"%"), y=proportion), position = position_dodge(width = 1), colour="Black", size = 2.2) +
labs(x = expression("Age Group"), y= ("Proportion of Likely Voters"), title = "Proportion of Likely Voters Across Age Groups, Race/Ethnicity, and Sex", caption="Figure 1") +
theme(plot.caption = element_text(hjust = 0.5, vjust = -0.5, size = 18))
Here is what it looks like

R geom_bar and facet_grid labels on top of bars

I am trying to make this graph look better, and I'm stuck with the labels (the numbers in this case). How can I make them to show on the top of their correspondent bar? Notice it is a facet_grid.
I have the following code and the output:
ggplot(articles_europaoccidental_sex_count_unique_group, aes(Country, percentage)) + geom_bar(stat = "identity", position = "dodge", aes(fill=Gender)) +
facet_grid(~Propuesta) + geom_text(aes(label = round(percentage, 2)), position = position_dodge(width = 0.9), vjust = -1)
Thanks!
You are almost there, just you need to move aes(fill=Gender) to inside ggplot
library(tidyverse)
#Reproducible data set
test_mtcars <- mtcars %>% group_by(cyl,am, gear) %>% summarise(mean = mean(mpg))
ggplot(test_mtcars, aes(as.factor(cyl), mean, fill=as.factor(am))) + geom_bar(stat = "identity", position = "dodge") +
facet_grid(~gear) + geom_text(aes(label = round(mean, 2)), position = position_dodge(width = 0.9), vjust = -1)

Resources