Standard column width in facetted and grouped ggplot bar plot - r

I've made a bar chart using ggplot with grouped data, and facetted with facet_grid. The column widths are inconsistent, so I want to make them all the same. I've read this can be done with preserve="single, but it seems to mess up the position dodging. Any idea how to prevent this happening??
Here is a small sample of the data:
data <- structure(list(grp2 = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L,
7L, 7L, 7L, 7L, 7L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 7L, 7L, 7L, 7L,
7L), .Label = c("CSF1", "CSF2", "PC", "NC", "GPC", "GNC", "standard"
), class = "factor"), label2 = structure(c(7L, 8L, 9L, 7L, 8L,
9L, 7L, 15L, 15L, 15L, 15L, 15L, 7L, 8L, 9L, 7L, 8L, 9L, 7L,
15L, 15L, 15L, 15L, 15L), .Label = c("CSF1_raw", "CSF1_supernatant",
"CSF1_pellet", "CSF2_raw", "CSF2_supernatant", "CSF2_pellet",
"PC_raw", "PC_supernatant", "PC_pellet", "NC_raw", "NC_supernatant",
"NC_pellet", "GPC", "GNC", "standard", "NC"), class = "factor"),
mda_label = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 1L, 1L, 1L, 1L, 1L
), .Label = c("none", "mda_20", "mda_200"), class = "factor"),
conc = c(`7` = 0, `8` = 0, `9` = 0.324886127298521, `55` = 4.14765656994934,
`56` = 1.16840050032707, `57` = 8.33529714053568, `76` = 10.6220645144775,
`77` = 48.9241552191721, `78` = 4.51513315624087, `79` = 1.03887911533275,
`80` = 0.0445944796011582, `81` = 0.00484116548901831, `89` = 0,
`90` = 0, `91` = 0.322922569348207, `137` = 6.38488684568018,
`138` = 1.68909814271646, `139` = 7.61828609738757, `158` = 15.3082130743032,
`159` = 41.3127531345335, `160` = 4.64193087683391, `161` = 0.411672491030815,
`162` = 0.0568193835425769, `163` = 0.00439419098560105)), row.names = c(NA,
-24L), class = c("tbl_df", "tbl", "data.frame"))
Here's the initial plot:
ggplot(data, aes(x=label2, y=conc, colour=mda_label, fill=mda_label)) +
facet_grid(. ~ grp2, scales="free_x", space="free") +
stat_summary(fun = mean, geom = "bar", position = position_dodge()) +
stat_summary(fun.data = mean_se, geom = "errorbar", colour="black", width=0.5,
position = position_dodge(width=0.9)) +
geom_point(position = position_dodge(width=0.9), pch=21, colour="black") +
scale_y_continuous(trans='pseudo_log',
labels = scales::number_format(accuracy=0.01)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
But when I try to standardise the column widths with preserve="single", it gets messed up:
ggplot(data, aes(x=label2, y=conc, colour=mda_label, fill=mda_label)) +
facet_grid(. ~ grp2, scales="free_x", space="free") +
stat_summary(fun = mean, geom = "bar", position = position_dodge(preserve="single")) +
stat_summary(fun.data = mean_se, geom = "errorbar", colour="black", width=0.5,
position = position_dodge(width=0.9, preserve="single")) +
geom_point(position = position_dodge(width=0.9, preserve="single"), pch=21, colour="black") +
scale_y_continuous(trans='pseudo_log',
labels = scales::number_format(accuracy=0.01)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

Since you're using data that as 0 values, you could make the 0 values for the other 'mda_label' on grp2/label2 standard categories.
data <- rbind(data, data.frame(grp2 = c("standard", "standard"),
label2 = c("standard", "standard"),
mda_label = c("mda_20", "mda_200"),
conc = c(0, 0)))
Also you never actually make the bar plot
data %>%
ggplot(aes(label2, conc, fill = mda_label)) +
geom_col(position = position_dodge(width = 1)) +
facet_grid(. ~ grp2, scales = "free", space = "free")

Related

coord_polar: moving labels only for the outer ring to the outside

I can only find a solution for this in relation to single-level pie charts. I have the chart below and some of the labels on the outer ring don't fit well.
I'd like to keep the labels for the inner ring where they are but move the labels for the second ring to the outside (or at least the ones that don't fit).
Here is my code
ggplot(usage.may, aes(x = Level, y = Percent, fill = Subcategory, label = Label)) +
geom_bar(stat = "identity", color='white', show.legend = FALSE) +
geom_text(aes(label = paste0(Label, "\n", Value, " (", Per_label, ")")),
size = 2.5,
colour = "white",
check_overlap = TRUE,
position = position_stack(vjust = 0.5)) +
coord_polar('y') +
scale_fill_manual(values = c("C01" = "#404688FF",
"C011" = "#3B528BFF","C012" = "#3B528BFF","C013" = "#3B528BFF","C014" = "#3B528BFF",
"C02" = "#287C8EFF",
"C021" = "#287C8EFF",
"C03" = "#27AD81FF",
"C031" = "#35B779FF","C032" = "#35B779FF","C033" = "#35B779FF",
"C04" = "#8FD744FF",
"C041" = "#8FD744FF","C042" = "#8FD744FF")) +
labs(title = "Electricity Usage May 2022") + ylab("") + xlab("") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
plot.title = element_text(hjust = 0.5),
panel.border = element_blank(),
panel.background = element_blank(),
strip.background = element_blank(),
axis.text= element_blank(),
axis.ticks= element_blank())
And sample data
structure(list(Level = structure(c(2L, 3L, 3L, 3L, 3L, 2L, 3L,
2L, 3L, 3L, 3L, 2L, 3L, 3L, 1L), levels = c("0", "1", "2"), class = "factor"),
Category = structure(c(2L, 2L, 2L, 2L, 2L, 3L, 3L, 4L, 4L,
4L, 4L, 5L, 5L, 5L, 1L), levels = c("C00", "C01", "C02",
"C03", "C04"), class = "factor"), Subcategory = structure(c(2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
1L), levels = c("C00", "C01", "C011", "C012", "C013", "C014",
"C02", "C021", "C03", "C031", "C032", "C033", "C04", "C041",
"C042"), class = "factor"), Colour = structure(c(2L, 3L,
3L, 3L, 3L, 4L, 5L, 6L, 7L, 7L, 7L, 8L, 9L, 9L, 1L), levels = c("0",
"1", "2", "3", "4", "5", "6", "7", "8"), class = "factor"),
Label = c("Cafe (R1 & R2) ", "Non-checked ", "Spider Monkey ",
"Signing-in Cabin", "Solar (cafe)", "Vet Room", "Non-checked",
"Butchery", "Non-checked", "Solar (lynx)", "Solar (butchery)",
"Tiger Block", "Farm", "Non-checked", ""), Value = c(5323L,
921L, 2611L, 34L, 1791L, 534L, 534L, 8479L, 6689L, 1371L,
419L, 3596L, 87L, 3247L, 0L), Percent = c(30L, 5L, 15L, 0L,
10L, 3L, 3L, 47L, 37L, 8L, 2L, 20L, 2L, 18L, 0L), Per_label = c("30%",
"5%", "15%", "0%", "10%", "3%", "3%", "47%", "37%", "8%",
"2%", "20%", "2%", "18%", "0%")), row.names = c(NA, -15L), class = "data.frame")
Thanks in advance
One option would be to use an ifelse to shift the x position of the labels for the outer ring. Additionally I use an ifelse + scale_color_identity to conditionally set the font color of the labels:
library(ggplot2)
ggplot(usage.may, aes(x = Level, y = Percent, fill = Subcategory, label = Label)) +
geom_bar(stat = "identity", color='white', show.legend = FALSE) +
geom_text(aes(label = paste0(Label, "\n", Value, " (", Per_label, ")"),
x = as.numeric(Level) + ifelse(Level == 2, 1, 0),
color = ifelse(Level == 2, "black", "white")),
size = 2.5,
check_overlap = TRUE,
position = position_stack(vjust = 0.5)) +
coord_polar('y') +
scale_fill_manual(values = c("C01" = "#404688FF",
"C011" = "#3B528BFF","C012" = "#3B528BFF","C013" = "#3B528BFF","C014" = "#3B528BFF",
"C02" = "#287C8EFF",
"C021" = "#287C8EFF",
"C03" = "#27AD81FF",
"C031" = "#35B779FF","C032" = "#35B779FF","C033" = "#35B779FF",
"C04" = "#8FD744FF",
"C041" = "#8FD744FF","C042" = "#8FD744FF")) +
scale_color_identity() +
labs(title = "Electricity Usage May 2022") + ylab("") + xlab("") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
plot.title = element_text(hjust = 0.5),
panel.border = element_blank(),
panel.background = element_blank(),
strip.background = element_blank(),
axis.text= element_blank(),
axis.ticks= element_blank())

Adding lines connecting means to ggplot (Raincloud Plots)

I have a ggplot to which I want to add a line connecting the means. However, I keep getting an Error message:
"geom_path: Each group consists of only one
observation. Do you need to adjust the group
aesthetic?"
I tried solutions suggested on here, but these seemingly stopped working years ago. Hence, I opened a new post.
#some packages
if (!require("pacman")) install.packages("pacman")
pacman::p_load(here, readr, cowplot, tidyr, ggplot2, dplyr)
#some functions from https://github.com/RainCloudPlots/RainCloudPlots
source("R_rainclouds.R")
source("summarySE.R")
source("simulateData.R")
#some data
df3 <- structure(list(participant = c(1L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L,
24L, 25L, 26L, 1L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L,
1L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L), condition = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("RT_Cau", "RT_Moro",
"RT_Asi"), class = "factor"), RT = c(1.44248448543333, 2.73934002973517,
1.89927013090706, 1.32510448686595, 2.44101598069973, 2.84290772015973,
1.19419819108836, 2.12124958877175, 1.14707311860052, 2.72286767178203,
1.15914495363538, 1.5340050993702, 1.62616192435053, 1.32694796283192,
1.2720800304128, 0.99275928310549, 1.04329096409593, 1.43288644582691,
1.60302970699442, 1.3393626055176, 1.24088162033185, 2.42448868318791,
1.6398716779282, 1.53816275909702, 1.51033130413559, 3.226993255043,
2.1915727996463, 1.39240057519678, 3.0538809712989, 2.52658416881183,
1.16366335020089, 2.33377114484134, 1.39357978132538, 2.691606623485,
1.21999657945028, 1.72195011524003, 1.38834235226937, 1.44350802586345,
1.29563539425317, 0.909762618509679, 1.13583585924538, 1.58240957515452,
1.82142351906117, 1.3644415734435, 1.32141664778601, 2.23277562688125,
1.5773976029336, 1.43200172590417, 1.68991681725, 2.9617422858462,
1.60886625604519, 1.38647850513866, 3.46156610375971, 2.96950698342897,
1.17905107770577, 2.36256332626113, 1.31254065801458, 3.204902618708,
1.21067325368702, 1.80371515914087, 1.57816183853565, 1.40761655308155,
1.27304559913463, 1.07621914272144, 1.04203150853998, 1.58958820979388,
1.79859778873147, 1.19249820050996, 1.4116357628608, 2.15806795062162,
1.70597872926531, 1.66135756110131)), row.names = c(NA, -72L), class = "data.frame")
#make a summary of the data
df4 <- summarySE(df3, measurevar = "RT", groupvars = c("condition"))
#a working plot that shows dots, boxplot, distribution, and mean+SE
#I want to have lines connecting the mean dots.
ggplot(df3,aes(x=condition,y=RT,fill=condition,col=condition))+
geom_flat_violin(position = position_nudge(x = .2, y = 0), alpha = .6,adjust =4)+
geom_point(aes(x = as.numeric(condition)-.15, y = RT, colour = condition),position = position_jitter(width = .05), size = .25, shape = 20)+
geom_boxplot(aes(x = condition, y = RT, fill = condition),outlier.shape = NA, alpha = .5, width = .1, colour = "black") +
geom_point(data = df4, aes(x = as.numeric(condition)+.1, y = RT_mean, group = condition, colour = condition), shape = 18) +
geom_errorbar(data = df4, aes(x = as.numeric(condition)+.1, y = RT_mean, group = condition, colour = condition, ymin = RT_mean-se, ymax = RT_mean+se), width = .05) +
ylab('RT')+
scale_fill_brewer(palette = "Dark2")+scale_colour_brewer(palette = "Dark2")+
guides(fill = FALSE, col = FALSE) +
theme_bw() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_rect(fill = "transparent",colour = NA),
plot.background = element_rect(fill = "transparent",colour = NA)
) # +
# geom_line(data = df4, aes(x = as.numeric(condition)+.1, #y = RT_mean, group = condition, colour = condition), #linetype = 3)
#The commented outpart is my attempt to connect it with lines, which produces the described error.
#If you do not want to load the package from github, here is the raw code of the summariseSE function:
# summarySE function
summarySE <- function(data = NULL, measurevar, groupvars = NULL, na.rm = FALSE,
conf.interval = .95, .drop = TRUE) {
library(plyr)
# New version of length which can handle NA's: if na.rm==T, don't count them
length2 <- function(x, na.rm = FALSE) {
if (na.rm) {
sum(!is.na(x))
} else {
length(x)
}
}
# This does the summary. For each group's data frame, return a vector with
# N, mean, median, and sd
datac <- plyr::ddply(data, groupvars, .drop=.drop,
.fun = function(xx, col) {
c(N = length2(xx[[col]], na.rm=na.rm),
mean = mean(xx[[col]], na.rm=na.rm),
median = median(xx[[col]], na.rm=na.rm),
sd = sd(xx[[col]], na.rm=na.rm)
)
},
measurevar
)
# Rename the "mean" and "median" columns
datac <- plyr::rename(datac, c("mean" = paste(measurevar, "_mean", sep = "")))
datac <- plyr::rename(datac, c("median" = paste(measurevar, "_median", sep = "")))
datac$se <- datac$sd / sqrt(datac$N) # Calculate standard error of the mean
# Confidence interval multiplier for standard error
# Calculate t-statistic for confidence interval:
# e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
ciMult <- qt(conf.interval / 2 + .5, datac$N - 1)
datac$ci <- datac$se * ciMult
return(datac)
}
The final plot should have lines connecting the means, similar to Plot 11 of https://wellcomeopenresearch.org/articles/4-63/v2.
Thanks already for your help!
This can be useful:
#Code
ggplot(df3,aes(x=condition,y=RT,fill=condition,col=condition))+
geom_flat_violin(position = position_nudge(x = .2, y = 0),
alpha = .6,adjust =4)+
geom_point(aes(x = as.numeric(condition)-.15, y = RT,
colour = condition),
position = position_jitter(width = .05), size = .25, shape = 20)+
geom_boxplot(aes(x = condition, y = RT, fill = condition),
outlier.shape = NA, alpha = .5,
width = .1, colour = "black") +
geom_point(data = df4, aes(x = as.numeric(condition)+.1,
y = RT_mean,
group = condition, colour = condition), shape = 18) +
geom_errorbar(data = df4, aes(x = as.numeric(condition)+.1, y = RT_mean, group = condition, colour = condition, ymin = RT_mean-se, ymax = RT_mean+se), width = .05) +
ylab('RT')+
scale_fill_brewer(palette = "Dark2")+scale_colour_brewer(palette = "Dark2")+
guides(fill = FALSE, col = FALSE) +
theme_bw() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_rect(fill = "transparent",colour = NA),
plot.background = element_rect(fill = "transparent",colour = NA)
) +
geom_line(data = df4, aes(x = as.numeric(condition)+.1,
y = RT_mean, group = 1)
Output:

Change color for specific data in ggplot2

I have 15 measurement points and i defined "renkler" color palette for them. I want to change the color of 2 (red: DEF-2 and DEF-13 points in the ps_no column) in these 15.
My codes are
library(ggplot2)
library(reshape)
dat <- read.delim("a.txt")
dat$Date <- as.Date(dat$Date,"%d/%m/%Y")
# order
dat$parameter <- factor(dat$parameter, levels = c("DEF-2", "DEF-13"))
dat$ps_no <- factor(dat$ps_no, levels = c("DEF-2", "PS.584", "PS.585", "PS.586", "PS.603", "PS.630", "DEF-13", "PS.600", "PS.667", "PS.690", "PS.714", "PS.734", "PS.754", "PS.811", "PS.813"))
# create own color palette
library(RColorBrewer)
renkler = c(brewer.pal(name="Set2", n = 7), brewer.pal(name="Set2", n = 8))
# Setup plot without facets
p <- ggplot(data = dat, aes(x = Date, y = value)) +
geom_line(aes(color = ps_no)) +
geom_point(aes(color = ps_no)) +
scale_color_manual(values = renkler) + # oluşturduğumuz paleti yüklemek için
scale_x_date(date_breaks = "1 months",date_labels = "%Y-%m",
limits = as.Date.character(c("01/12/2017","31/12/2018"),
format = "%d/%m/%Y")) +
ylab("[mm/year]") +
xlab("") +
facet_grid(parameter ~ .) +
theme_bw()
p + theme(
axis.text.x = element_text(angle = 45, hjust = 1),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
)
and the data output with dput(dat):
structure(list(parameter = structure(c(2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("DEF-13",
"DEF-2"), class = "factor"), ps_no = structure(c(3L, 3L, 3L,
3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 7L, 7L, 7L, 8L, 8L, 8L, 2L,
2L, 2L, 6L, 6L, 6L, 9L, 9L, 9L, 10L, 10L, 10L, 11L, 11L, 11L,
12L, 12L, 12L, 13L, 13L, 13L, 14L, 14L, 14L, 15L, 15L, 15L, 1L,
1L, 1L), .Label = c("DEF-13", "DEF-2", "PS.584", "PS.585", "PS.586",
"PS.600", "PS.603", "PS.630", "PS.667", "PS.690", "PS.714", "PS.734",
"PS.754", "PS.811", "PS.813"), class = "factor"), Date = structure(c(17534,
17546, 17870, 17882, 17534, 17546, 17870, 17882, 17534, 17546,
17870, 17882, 17534, 17546, 17882, 17534, 17546, 17882, 17536,
17557, 17879, 17534, 17546, 17882, 17534, 17546, 17882, 17534,
17546, 17882, 17534, 17546, 17882, 17534, 17546, 17882, 17534,
17546, 17882, 17534, 17546, 17882, 17534, 17546, 17882, 17536,
17549, 17886), class = "Date"), value = c(0, 1.23684, -12.15729097,
-11.4102363, 0, 2.45200798, 1.12950398, -2.76779102, 0, 0.924571,
-7.1917482, -6.2764626, 0, -4.0725265, 0.4847485, 0, 0.290382,
-6.098794, 0, 0.813289109, -0.426076522, 0, 1.7502, -5.139665,
0, -29.67012, -14.956098, 0, 12.8852143, 7.4377433, 0, 1.404183,
-12.426633, 0, -24.09551, -7.619493, 0, -4.194441, -16.258703,
0, -0.835691, -10.504454, 0, 1.311699, 6.30102, 0, -1.49366556,
-1.835284539)), row.names = c(NA, -48L), class = "data.frame")
And also I need to change legend tittle (ps_no) and the texts on the right side of plots (DEF-2 and DEF-13).
Thank you.
Edit:
I filter the data which I want to show different color with using filter command. After filter command, I add a command line for geom_line and another command line for geom_point. It is working in the plot. But this is not the answer literally because the colors in the legend do not change.
So this the the new version of codes:
library(ggplot2)
library(reshape)
dat <- read.delim("aroundDEF.txt")
dat$Date <- as.Date(dat$Date,"%d/%m/%Y")
# order
dat$parameter <- factor(dat$parameter, levels = c("DEF-2", "DEF-13"))
dat$ps_no <- factor(dat$ps_no, levels = c("DEF-2", "PS.584", "PS.585", "PS.586", "PS.603", "PS.630", "DEF-13", "PS.600", "PS.667", "PS.690", "PS.714", "PS.734", "PS.754", "PS.811", "PS.813"))
# create own color palette
library(RColorBrewer)
renkler = c(brewer.pal(name="Set2", n = 7), brewer.pal(name="Set2", n = 8))
geom_line(aes(color = ps_no)) +
geom_line(data=highlight_df, aes(color = ps_no), color='#da0018') +
geom_point(aes(color = ps_no)) +
geom_point(data=highlight_df, aes(color = ps_no), color='#da0018') +
# filter dataframe to get data to be highligheted
highlight_df <- dat %>%
filter(ps_no=="DEF-2" | ps_no=="DEF-13")
# Setup plot without facets
p <- ggplot(data = dat, aes(x = Date, y = value)) +
scale_color_manual(values = renkler) +
scale_x_date(date_breaks = "1 months",date_labels = "%Y-%m",
limits = as.Date.character(c("01/12/2017","31/12/2018"),
format = "%d/%m/%Y")) +
ylab("[mm/year]") +
xlab("") +
facet_grid(parameter ~ .
, labeller = as_labeller( c("DEF-2" = "DEF-2 and around", "DEF-13" = "DEF-13 and around"))) +
theme_bw()
p + theme(
axis.text.x = element_text(angle = 45, hjust = 1),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
)
In short, still I need an answer...
After renkler variable:
renkler[1]= "#DA0018"
renkler[7]= "#DA0018"
For the legend title:
scale_color_manual(values = renkler, name="new name")

With both stacked and dodged bars, how can you remove dodge-bar elements from legend?

Thanks to combine stacked bars and dodged bars, I created the plot below using the data frame shown. But now, since the axis titles name the bars, how can I remove the legend elements other than for the one stacked bar? That is, can the legend show only the segments of the Big8 bar?
> dput(combo)
structure(list(firm = structure(c(12L, 1L, 11L, 13L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L), .Label = c("Avg.", "Co", "Firm1",
"Firm2", "Firm3", "Firm4", "Firm5", "Firm6", "Firm7", "Firm8",
"Median", "Q1", "Q3"), class = "factor"), metric = structure(c(5L,
1L, 4L, 6L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Avg.",
"Big8", "Co", "Median", "Q1", "Q3"), class = "factor"), value = c(0.0012,
0.0065, 0.002, 0.0036, 0.0065, 0.000847004466666667, 0.000658907411111111,
0.0002466389, 8.41422555555556e-05, 8.19149222222222e-05, 7.97185555555556e-05,
7.82742555555556e-05, 7.56679888888889e-05), grp = structure(c(1L,
2L, 3L, 6L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), .Label = c("Q1",
"Avg.", "Median", "Co", "Big8", "Q3"), class = "factor")), .Names = c("firm",
"metric", "value", "grp"), row.names = c(NA, -13L), class = "data.frame")
Here is the plotting code.
ggplot(combo, aes(x=grp, y=value, fill=firm)) +
geom_bar(stat="identity") +
labs(x = "", y = "") +
theme(legend.position = "bottom") +
guides(fill = guide_legend(nrow = 2))
The plot, which ideally would have a smaller set of elements in the legend.
You can manually set the breaks for scale_fill_discrete:
library(ggplot2)
ggplot(combo, aes(x=grp, y=value, fill=firm)) +
geom_bar(stat="identity") +
labs(x = "", y = "") +
theme(legend.position = "bottom") +
guides(fill = guide_legend(nrow = 2)) +
scale_fill_discrete(breaks = combo$firm[combo$metric=="Big8"])
I'm not 100% sure which labels you want to keep, but a manually entered vector, combo$firm and combo$metric will all work.

How to add marginal rugs above bars of a bar chart with ggplot2

Is it possible to add marginal rug lines above bars? Using the data set below, how can you add 4 rug lines above Brazil, 8 above Canada, etc.
ctryfees <- feesctry %>% group_by(country) %>% summarise(total = sum(fees))
library(ggplot2)
library(ggthemes)
ggplot(ctryfees, aes(x = country, y = total)) +
geom_bar(stat = "identity") + theme_tufte() +
ggtitle("Fees Paid Law Firms per Country\nNumber of Firms Paid\n") +
labs(x = "", y = "") +
scale_y_continuous(label = dollar.format) +
geom_rug(data = feesctry, mapping = aes(x = country, y = firms), sides = "top")
The code does not work after the scale_y_continuous line as it throws this error: Error: Discrete value supplied to continuous scale
> dput(feesctry)
structure(list(country = structure(c(1L, 1L, 1L, 1L, 2L, 3L,
4L, 4L, 5L, 5L, 6L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 6L, 6L, 3L, 3L,
3L, 3L), .Label = c("Brazil", "Canada", "China", "France", "Germany",
"UK"), class = "factor"), firms = structure(c(1L, 2L, 3L, 4L,
5L, 13L, 18L, 19L, 20L, 21L, 22L, 6L, 7L, 8L, 9L, 10L, 11L, 12L,
23L, 24L, 14L, 15L, 16L, 17L), .Label = c("brazil1", "brazil2",
"brazil3", "brazil4", "can1", "can2", "can3", "can4", "can5",
"can6", "can7", "can8", "china1", "china2", "china3", "china4",
"china5", "france1", "france2", "german1", "german2", "uk1",
"uk2", "uk3"), class = "factor"), fees = c(80000, 80000, 80000,
80000, 1e+05, 5e+05, 2e+05, 2e+05, 1e+05, 1e+05, 5e+05, 1e+05,
1e+05, 1e+05, 1e+05, 1e+05, 1e+05, 1e+05, 5e+05, 5e+05, 5e+05,
5e+05, 5e+05, 5e+05)), .Names = c("country", "firms", "fees"), row.names = c(NA,
-24L), class = "data.frame")
From
p <-
ggplot(ctryfees, aes(x = country, y = total)) +
geom_bar(stat = "identity") + theme_tufte() +
ggtitle("Fees Paid Law Firms per Country\nNumber of Firms Paid\n") +
labs(x = "", y = "") +
scale_y_continuous(label = dollar_format())
you could try
p + geom_rug(data = transform(feesctry, id = as.numeric(country)),
mapping = aes(x = ave(id, id, FUN = function(x)
x + scale(seq_along(x), scale = 50)),
y = 1),
sides = "top")
or just
p + geom_rug(data = feesctry,
mapping = aes(x = jitter(as.numeric(country)),
y = 1),
sides = "top")

Resources