gghighlight in clustered (grouped) bar chart in R - r

I need to use gghighlight in a clustered bar chart in R in order to highlight only one single bar. My code and sample data looks like this:
library(tidyr)
library(ggplot2)
dat <- data.frame(country=c('USA','Brazil','Ghana','England','Australia'), Stabbing=c(15,10,9,6,7), Accidents=c(20,25,21,28,15), Suicide=c(3,10,7,8,6))
dat.m <- melt(dat, id.vars='country')
dat.g <- gather(dat, type, value, -country)
ggplot(dat.g, aes(type, value)) +
geom_bar(aes(fill = country), stat = "identity", position = "dodge") +
gghighlight(type == "Accidents" & country == "Brazil")
But this gives me this awkward
How can I get gghighlight to highlight only one single bar of one group (so combining two conditions for two discrete variables)?

Here are two alternative options for highlighting a single column in this type of plot:
1) make a new variable (named highlight below) and fill by that (and, if you like, use the line colors to color by country)
2) manually annotate the one column you want to highlight with an arrow and/or text (or work out how to automate the positioning, but that would be more involved) - could be an option for one final figure
library(tidyr)
library(ggplot2)
dat <- data.frame(country=c('USA','Brazil','Ghana','England','Australia'),
Stabbing=c(15,10,9,6,7),
Accidents=c(20,25,21,28,15), Suicide=c(3,10,7,8,6))
dat.m <- reshape2::melt(dat, id.vars='country')
dat.g <- gather(dat, type, value, -country)
## set highlighted bar
dat.g$highlight <- ifelse(dat.g$type == "Accidents" & dat.g$country == "Brazil", TRUE, FALSE)
## option 1: use fill to highlight, colour for country
ggplot(dat.g, aes(type, value, fill = highlight, colour=country), alpha=.6) +
geom_bar(stat = "identity", position = "dodge2", size=1) +
scale_fill_manual(values = c("grey20", "red"))+
guides(fill = FALSE) +
## option 2: use annotate to manually label a specific column:
annotate(geom = "curve", x = 1.15, y = 30, xend = 1.35, yend = 26,
curvature = .2, arrow = arrow(length = unit(2, "mm"))) +
annotate(geom = "text", x = 1, y = 31, label = "Highlight", hjust = "left")
Created on 2020-03-10 by the reprex package (v0.3.0)

I think gghighlight is not built for this kind of plot - not yet! You could file a feature request ? It is a bit unclear though if this visualisation is very helpful. Gghighlight always draws everything - this makes the "weird" shadows when dodging.
If you want to keep using gghightlight, maybe try faceting, which they suggest in their vignette
A suggestion - Use facets:
(using mtcars as example)
library(tidyverse)
library(gghighlight)
mtcars2 <- mtcars %>% mutate(cyl = as.character(cyl), gear = as.character(gear))
ggplot(mtcars2, aes(cyl, disp, fill = gear)) +
geom_col() + #no dodge
gghighlight(cyl == "4") + #only one variable
facet_grid(~ gear) #the other variable is here
#> Warning: Tried to calculate with group_by(), but the calculation failed.
#> Falling back to ungrouped filter operation...
Created on 2020-03-09 by the reprex package (v0.3.0)
Or, here without gghighlight, in a more traditional subsetting approach.
You need to make a subset of data which contains rows for each group you want to dodge by, in this case "cyl" and "gear". I replace the irrelevant data with "NA", you could also use "0".
library(tidyverse)
mtcars2 <- mtcars %>%
mutate(cyl = as.character(cyl), gear = as.character(gear)) %>%
group_by(cyl, gear) %>%
summarise(disp = mean(disp))
subset_mt <- mtcars2 %>% mutate(highlight = if_else(cyl == '4' & gear == '3', disp, NA_real_))
ggplot() +
geom_col(data = mtcars2, aes(cyl, disp, group = gear), fill = 'grey', alpha = 0.6, position = 'dodge') +
geom_col(data = subset_mt, aes(cyl, highlight, fill = gear), position = 'dodge')
#> Warning: Removed 7 rows containing missing values (geom_col).
Created on 2020-03-10 by the reprex package (v0.3.0)

Related

Creating a Stacked Percentage Bar Chart in R with ggplot with labels

I have a dataset that has the variables "SEXO" (M or F) and "Class" (0 or 1). I want to create a bar plot using ggplot2 that shows, for each sex, the distribution of Class as a percentage. I was able to get the plot, but I can't seem to get the labels working on the bars itself. I don't want to change the labels on the axis, I just want to get the % shown on the plot for each SEXO.
This is the code I have been using:
ggplot(data = df, aes(x = SEXO, fill = Class)) + geom_bar(position = 'fill')
I also attach an image of the plot produced by the code:
This would be the ideal outcome:
Here an example using the mtcars dataset where you can calculate the percentage per group and use these to place in your bars using label with geom_text like this:
library(ggplot2)
library(dplyr)
mtcars %>%
group_by(am, vs) %>%
summarise(cnt = n()) %>%
mutate(perc = round(cnt/sum(cnt), 2)) %>%
ggplot(aes(x = factor(vs), fill = factor(am), y = perc)) +
geom_col(position = 'fill') +
geom_text(aes(label = paste0(perc*100,"%"), y = perc), position = position_stack(vjust = 0.5), size = 3) +
labs(fill = 'Class', x = 'vs') +
scale_y_continuous(limits = c(0,1))
#> `summarise()` has grouped output by 'am'. You can override using the `.groups`
#> argument.
Created on 2022-11-02 with reprex v2.0.2

how to connect the means with a line within a single category in ggplot

Here is a dummy code :
library(ggplot2)
library(dplyr)
diamonds |> dplyr::filter(color %in% c("D","E", "F"), cut %in% c("Ideal","Fair"), clarity %in% c("SI2","VS2","IF")) |> ggplot(aes(x = clarity, y =carat, color=color, shape=cut)) +
stat_summary(fun.data= mean_cl_boot, geom="errorbar", width=0.05, position=position_dodge(0.7)) +
stat_summary(fun=mean, geom="point", size=2, position= position_dodge(0.7))
I would like to connect the means with a line within each clarity category ( ie connect circle to the triangle: shown in red colour on the picture as an example):
If I use geom_stat or geom_line: it gives an error that geom_path: Each group consists of only one observation. Do you need to adjust the group aesthetic? which makes sense since both of them are within a single clarity group. I tried to use group=interaction() but it did not work either, I only were able to do it for points within different clarity groups
I think best to use a manual dodge
library(ggplot2)
library(dplyr)
df <- diamonds %>% dplyr::filter(color %in% c("D","E", "F"), cut %in% c("Ideal","Fair"), clarity %in% c("SI2","VS2","IF"))
## make a names vector for your manual dodge
## this of course needs adjustment depending on your actual data. can be automated
dodge_vec <- seq(-.25, .25, length = 6)
names(dodge_vec) <- unique(with(df, paste(cut, color, sep = "_")))
## some data alterations - assign dodge by subsetting with named vector
df <- df %>%
mutate(cut_col = dodge_vec[paste(cut, color, sep = "_")])
## summarise for your lines
df_line <-
df %>%
group_by(clarity, cut, color, cut_col) %>%
summarise(mean_carat = mean(carat))
#> `summarise()` has grouped output by 'clarity', 'cut', 'color'. You can override
#> using the `.groups` argument.
## need to pass your original x as an integer and add your new doding column
ggplot(df, aes(x = as.integer(factor(clarity)) + cut_col, y =carat, color=color, shape=cut)) +
stat_summary(fun.data= mean_cl_boot, geom="errorbar", width=0.05) +
stat_summary(fun=mean, geom="point", size=2) +
## add lines with your new data, using an interaction variable
geom_line(data = df_line, aes(y = mean_carat, group = interaction( as.integer(clarity), color))) +
scale_x_continuous(breaks = 1:3, labels = unique(df$clarity))
#> Warning: Using shapes for an ordinal variable is not advised
Your question suggests that you're dealing with paired data, therefore my suggestion in the comment. I wanted to give an example, but the diamond data set doesn't have paired data, thus it would be a bit difficult to fake that.
Created on 2022-05-31 by the reprex package (v2.0.1)

Label percentage in faceted filled barplot in ggplot2

I got stuck when trying to add percentage labels to a faceted bar plot with bars filled by another variable, such as the example below:
mtcars %>%
ggplot(aes(x = factor(gear) %>% droplevels(), fill = factor(am))) +
facet_grid(
cols = vars(cyl), scales = "free_x", space = "free_x", margins = TRUE
) +
geom_bar(position = "fill") +
geom_text(
aes(label = ..count.., y = ..count..), stat = "count",
position = position_fill(vjust = .5)
)
Created on 2021-02-26 by the reprex package (v0.3.0)
In the example, the labels are counts instead of percentages of am by gear for each cyl. I therefore tried to replace the label = argument in the aes() of geom_text() as
label = scales::percent(..count.. / tapply(..count.., list(..PANEL.., ..x..), sum)[..PANEL.., ..x..], accuracy = 1)
but it didn't work.
This seems to be asked a lot, but after reviewing many similar questions including the following:
percentage on y lab in a faceted ggplot
barchart?
R: ggplot stacked bar chart with counts on y axis but percentage as
label
Ggplot filled barplot with percentage labels [duplicate]
I still didn't manage to correctly reference the tapply() sums for creating the percentage labels as illustrated in my code above, and I think the overall panel makes it more complicated if I have to pre-calculate the percentages before plotting (I may need to duplicate the whole dataset and mutate cyl into a new variable facet, and then use facet_wrap() on the new variable instead of facet_grid()), as illustrated in my attempt below:
mtcars %>%
bind_rows(mutate(mtcars, facet = "(all)")) %>%
mutate(
facet = if_else(is.na(facet), as.character(cyl), facet) %>%
factor(levels = c("4", "6", "8", "(all)"))
) %>%
group_by(facet, gear, am) %>%
summarise(freq = n()) %>%
summarise(am = am, freq = freq, pct = freq / sum(freq), .groups = "drop_last") %>%
ggplot(aes(x = factor(gear) %>% droplevels(), y = pct, fill = factor(am))) +
facet_grid(cols = vars(facet), scales = "free_x", space = "free_x") +
geom_col(position = "stack") +
geom_text(
aes(label = scales::percent(pct, accuracy = 1L)),
position = position_stack(vjust = .5)
)
#> `summarise()` regrouping output by 'facet', 'gear' (override with `.groups` argument)
Created on 2021-03-02 by the reprex package (v0.3.0)
However, it looks more verbose than the first solution, although my duplication of the data for including the "(all)" panel may not be the best way.
Any help fixing my first solution (with a little explanation) and improving the second solution will be greatly appreciated!
I managed to do it, but it's not pretty.
I still think the best way is to pre-process the data before plotting.
mtcars %>%
ggplot(aes(x = factor(gear) %>% droplevels(), fill = factor(am))) +
facet_grid(
cols = vars(cyl), scales = "free_x", space = "free_x", margins = TRUE
) +
geom_bar(position = "fill") +
geom_text(
aes(label = unlist(tapply(..count.., list(..x.., ..PANEL..),
function(a) paste(round(100*a/sum(a), 2), '%'))),
y = ..count.. ), stat = "count",
position = position_fill(vjust = .5)
)
The general idea is that you have to do the tapply on the counts based on ..x.. and ..PANEL.. (in that order), which generates vectors of counts for each bar. You then generate the labels per bar from that vector by getting the percentage, rounding or whatever you need.
Finally, you have to unlist the tapply results so that ggplot takes it like a given vector of labels.
This outputs the following plot :

How to get the plots side by side and that too sorted according to Fill in R Language [duplicate]

I am making a dodged barplot in ggplot2 and one grouping has a zero count that I want to display. I remembered seeing this on HERE a while back and figured the scale_x_discrete(drop=F) would work. It does not appear to work with dodged bars. How can I make the zero counts show?
For instance, (code below) in the plot below, type8~group4 has no examples. I would still like the plot to display the empty space for the zero count instead of eliminating the bar. How can I do this?
mtcars2 <- data.frame(type=factor(mtcars$cyl),
group=factor(mtcars$gear))
m2 <- ggplot(mtcars2, aes(x=type , fill=group))
p2 <- m2 + geom_bar(colour="black", position="dodge") +
scale_x_discrete(drop=F)
p2
Here's how you can do it without making summary tables first.
It did not work in my CRAN versioin (2.2.1) but in the latest development version of ggplot (2.2.1.900) I had no issues.
ggplot(mtcars, aes(factor(cyl), fill = factor(vs))) +
geom_bar(position = position_dodge(preserve = "single"))
http://ggplot2.tidyverse.org/reference/position_dodge.html
Updated geom_bar() needs stat = "identity"
For what it's worth: The table of counts, dat, above contains NA. Sometimes, it is useful to have an explicit 0 instead; for instance, if the next step is to put counts above the bars. The following code does just that, although it's probably no simpler than Joran's. It involves two steps: get a crosstabulation of counts using dcast, then melt the table using melt, followed by ggplot() as usual.
library(ggplot2)
library(reshape2)
mtcars2 = data.frame(type=factor(mtcars$cyl), group=factor(mtcars$gear))
dat = dcast(mtcars2, type ~ group, fun.aggregate = length)
dat.melt = melt(dat, id.vars = "type", measure.vars = c("3", "4", "5"))
dat.melt
ggplot(dat.melt, aes(x = type,y = value, fill = variable)) +
geom_bar(stat = "identity", colour = "black", position = position_dodge(width = .8), width = 0.7) +
ylim(0, 14) +
geom_text(aes(label = value), position = position_dodge(width = .8), vjust = -0.5)
The only way I know of is to pre-compute the counts and add a dummy row:
dat <- rbind(ddply(mtcars2,.(type,group),summarise,count = length(group)),c(8,4,NA))
ggplot(dat,aes(x = type,y = count,fill = group)) +
geom_bar(colour = "black",position = "dodge",stat = "identity")
I thought that using stat_bin(drop = FALSE,geom = "bar",...) instead would work, but apparently it does not.
I asked this same question, but I only wanted to use data.table, as it's a faster solution for much larger data sets. I included notes on the data so that those that are less experienced and want to understand why I did what I did can do so easily. Here is how I manipulated the mtcars data set:
library(data.table)
library(scales)
library(ggplot2)
mtcars <- data.table(mtcars)
mtcars$Cylinders <- as.factor(mtcars$cyl) # Creates new column with data from cyl called Cylinders as a factor. This allows ggplot2 to automatically use the name "Cylinders" and recognize that it's a factor
mtcars$Gears <- as.factor(mtcars$gear) # Just like above, but with gears to Gears
setkey(mtcars, Cylinders, Gears) # Set key for 2 different columns
mtcars <- mtcars[CJ(unique(Cylinders), unique(Gears)), .N, allow.cartesian = TRUE] # Uses CJ to create a completed list of all unique combinations of Cylinders and Gears. Then counts how many of each combination there are and reports it in a column called "N"
And here is the call that produced the graph
ggplot(mtcars, aes(x=Cylinders, y = N, fill = Gears)) +
geom_bar(position="dodge", stat="identity") +
ylab("Count") + theme(legend.position="top") +
scale_x_discrete(drop = FALSE)
And it produces this graph:
Furthermore, if there is continuous data, like that in the diamonds data set (thanks to mnel):
library(data.table)
library(scales)
library(ggplot2)
diamonds <- data.table(diamonds) # I modified the diamonds data set in order to create gaps for illustrative purposes
setkey(diamonds, color, cut)
diamonds[J("E",c("Fair","Good")), carat := 0]
diamonds[J("G",c("Premium","Good","Fair")), carat := 0]
diamonds[J("J",c("Very Good","Fair")), carat := 0]
diamonds <- diamonds[carat != 0]
Then using CJ would work as well.
data <- data.table(diamonds)[,list(mean_carat = mean(carat)), keyby = c('cut', 'color')] # This step defines our data set as the combinations of cut and color that exist and their means. However, the problem with this is that it doesn't have all combinations possible
data <- data[CJ(unique(cut),unique(color))] # This functions exactly the same way as it did in the discrete example. It creates a complete list of all possible unique combinations of cut and color
ggplot(data, aes(color, mean_carat, fill=cut)) +
geom_bar(stat = "identity", position = "dodge") +
ylab("Mean Carat") + xlab("Color")
Giving us this graph:
Use count and complete from dplyr to do this.
library(tidyverse)
mtcars %>%
mutate(
type = as.factor(cyl),
group = as.factor(gear)
) %>%
count(type, group) %>%
complete(type, group, fill = list(n = 0)) %>%
ggplot(aes(x = type, y = n, fill = group)) +
geom_bar(colour = "black", position = "dodge", stat = "identity")
You can exploit the feature of the table() function, which computes the number of occurrences of a factor for all its levels
# load plyr package to use ddply
library(plyr)
# compute the counts using ddply, including zero occurrences for some factor levels
df <- ddply(mtcars2, .(group), summarise,
types = as.numeric(names(table(type))),
counts = as.numeric(table(type)))
# plot the results
ggplot(df, aes(x = types, y = counts, fill = group)) +
geom_bar(stat='identity',colour="black", position="dodge")

Don't drop zero count: dodged barplot

I am making a dodged barplot in ggplot2 and one grouping has a zero count that I want to display. I remembered seeing this on HERE a while back and figured the scale_x_discrete(drop=F) would work. It does not appear to work with dodged bars. How can I make the zero counts show?
For instance, (code below) in the plot below, type8~group4 has no examples. I would still like the plot to display the empty space for the zero count instead of eliminating the bar. How can I do this?
mtcars2 <- data.frame(type=factor(mtcars$cyl),
group=factor(mtcars$gear))
m2 <- ggplot(mtcars2, aes(x=type , fill=group))
p2 <- m2 + geom_bar(colour="black", position="dodge") +
scale_x_discrete(drop=F)
p2
Here's how you can do it without making summary tables first.
It did not work in my CRAN versioin (2.2.1) but in the latest development version of ggplot (2.2.1.900) I had no issues.
ggplot(mtcars, aes(factor(cyl), fill = factor(vs))) +
geom_bar(position = position_dodge(preserve = "single"))
http://ggplot2.tidyverse.org/reference/position_dodge.html
Updated geom_bar() needs stat = "identity"
For what it's worth: The table of counts, dat, above contains NA. Sometimes, it is useful to have an explicit 0 instead; for instance, if the next step is to put counts above the bars. The following code does just that, although it's probably no simpler than Joran's. It involves two steps: get a crosstabulation of counts using dcast, then melt the table using melt, followed by ggplot() as usual.
library(ggplot2)
library(reshape2)
mtcars2 = data.frame(type=factor(mtcars$cyl), group=factor(mtcars$gear))
dat = dcast(mtcars2, type ~ group, fun.aggregate = length)
dat.melt = melt(dat, id.vars = "type", measure.vars = c("3", "4", "5"))
dat.melt
ggplot(dat.melt, aes(x = type,y = value, fill = variable)) +
geom_bar(stat = "identity", colour = "black", position = position_dodge(width = .8), width = 0.7) +
ylim(0, 14) +
geom_text(aes(label = value), position = position_dodge(width = .8), vjust = -0.5)
The only way I know of is to pre-compute the counts and add a dummy row:
dat <- rbind(ddply(mtcars2,.(type,group),summarise,count = length(group)),c(8,4,NA))
ggplot(dat,aes(x = type,y = count,fill = group)) +
geom_bar(colour = "black",position = "dodge",stat = "identity")
I thought that using stat_bin(drop = FALSE,geom = "bar",...) instead would work, but apparently it does not.
I asked this same question, but I only wanted to use data.table, as it's a faster solution for much larger data sets. I included notes on the data so that those that are less experienced and want to understand why I did what I did can do so easily. Here is how I manipulated the mtcars data set:
library(data.table)
library(scales)
library(ggplot2)
mtcars <- data.table(mtcars)
mtcars$Cylinders <- as.factor(mtcars$cyl) # Creates new column with data from cyl called Cylinders as a factor. This allows ggplot2 to automatically use the name "Cylinders" and recognize that it's a factor
mtcars$Gears <- as.factor(mtcars$gear) # Just like above, but with gears to Gears
setkey(mtcars, Cylinders, Gears) # Set key for 2 different columns
mtcars <- mtcars[CJ(unique(Cylinders), unique(Gears)), .N, allow.cartesian = TRUE] # Uses CJ to create a completed list of all unique combinations of Cylinders and Gears. Then counts how many of each combination there are and reports it in a column called "N"
And here is the call that produced the graph
ggplot(mtcars, aes(x=Cylinders, y = N, fill = Gears)) +
geom_bar(position="dodge", stat="identity") +
ylab("Count") + theme(legend.position="top") +
scale_x_discrete(drop = FALSE)
And it produces this graph:
Furthermore, if there is continuous data, like that in the diamonds data set (thanks to mnel):
library(data.table)
library(scales)
library(ggplot2)
diamonds <- data.table(diamonds) # I modified the diamonds data set in order to create gaps for illustrative purposes
setkey(diamonds, color, cut)
diamonds[J("E",c("Fair","Good")), carat := 0]
diamonds[J("G",c("Premium","Good","Fair")), carat := 0]
diamonds[J("J",c("Very Good","Fair")), carat := 0]
diamonds <- diamonds[carat != 0]
Then using CJ would work as well.
data <- data.table(diamonds)[,list(mean_carat = mean(carat)), keyby = c('cut', 'color')] # This step defines our data set as the combinations of cut and color that exist and their means. However, the problem with this is that it doesn't have all combinations possible
data <- data[CJ(unique(cut),unique(color))] # This functions exactly the same way as it did in the discrete example. It creates a complete list of all possible unique combinations of cut and color
ggplot(data, aes(color, mean_carat, fill=cut)) +
geom_bar(stat = "identity", position = "dodge") +
ylab("Mean Carat") + xlab("Color")
Giving us this graph:
Use count and complete from dplyr to do this.
library(tidyverse)
mtcars %>%
mutate(
type = as.factor(cyl),
group = as.factor(gear)
) %>%
count(type, group) %>%
complete(type, group, fill = list(n = 0)) %>%
ggplot(aes(x = type, y = n, fill = group)) +
geom_bar(colour = "black", position = "dodge", stat = "identity")
You can exploit the feature of the table() function, which computes the number of occurrences of a factor for all its levels
# load plyr package to use ddply
library(plyr)
# compute the counts using ddply, including zero occurrences for some factor levels
df <- ddply(mtcars2, .(group), summarise,
types = as.numeric(names(table(type))),
counts = as.numeric(table(type)))
# plot the results
ggplot(df, aes(x = types, y = counts, fill = group)) +
geom_bar(stat='identity',colour="black", position="dodge")

Resources