Related
I'd like to display the sample size of each species on my boxplot, but the plot is fairly cluttered. Is there a way to display the sample size on the "outside" of the plot with "N" as the header (sample size should go in the red area I highlighted)?
df <- data.frame(year = rep(c(2019, 2020), each = 10),
month = rep(c("March", "October"), each = 1),
site = rep(c("1", "2", "3", "4", "5"), each = 2),
type = rep(c("baitfish", "shark"), each = 1),
salinity = sample(x = 20:35, size = 20, replace = TRUE),
num = sample(x = 0:10, size = 20, replace = TRUE))
count_by_row <- data.frame(df[rep(row.names(df), df$num), 1:5])
ggplot(data=subset(count_by_row, !is.na(salinity)), aes(x = reorder(type, -salinity, FUN = median), y = salinity)) +
geom_boxplot(outlier.shape = 1, outlier.size = 2) +
coord_flip() +
xlab("") +
ylab("salinity (PSU)")
You could add a custom annotation after increasing the right margin:
ggplot(data=subset(count_by_row, !is.na(salinity)),
aes(y = reorder(type, -salinity, FUN = median), x = salinity)) +
geom_boxplot(outlier.shape = 1, outlier.size = 2, orientation = "y") +
coord_cartesian(clip = "off") +
annotation_custom(grid::textGrob(c("N", table(count_by_row$type)),
x = 1.1, y = c(0.9, 0.28, 0.72),
gp = grid::gpar(cex = 1.5))) +
ylab("") +
xlab("salinity (PSU)") +
theme_bw(base_size = 20) +
theme(plot.margin = margin(20, 100, 20, 20))
This question builds off of enter link description here but is in the context of faceted boxplots.
So, I have the following code:
set.seed(20210714)
dd <- data.frame(Method = rep(c("A", "B", "C"), each = 60), Pattern = rep(c("X", "Y", "Z"), times = 30), X1 = runif(180), Complexity = rep(c("High", "Low"), times = 90), nsim = rep(rep(1:10, times = 9), each = 2), n = 10)
dd1 <- data.frame(Method = rep(c("A", "B", "C"), each = 60), Pattern = rep(c("X", "Y", "Z"), times = 30), X1 = runif(180), Complexity = rep(c("High", "Low"), times = 90), nsim = rep(rep(1:10, times = 9), each = 2), n = 5)
dd <- rbind(dd, dd1)
library(ggplot2)
# create dummy dataframe.
dummy.df <- dd
dummy.df[nrow(dd) + 1:2,"Pattern"] <- unique(dd$Pattern)[-3]
dummy.df[nrow(dd) + 1:2,"Method"] <- "ZZZ"
dummy.df[nrow(dd) + 1:2,"Complexity"] <- c("High","Low")
dummy.df$dummy <- interaction(dummy.df$Method,dummy.df$Pattern)
ggplot(dummy.df, aes(x = dummy, y = X1, fill = Method)) +
geom_boxplot(aes(fill = Method)) +
facet_grid(~Complexity) +
theme_light() +
theme(legend.position = 'bottom') +
guides(fill = guide_legend(nrow=1)) +
geom_line(aes(x = dummy,
group=interaction(Pattern,nsim)),
size = 0.35, alpha = 0.35, colour = I("#525252")) +
geom_point(aes(x = dummy,
group=interaction(Pattern,nsim)),
size = 0.35, alpha = 0.25, colour = I("#525252")) +
scale_x_discrete(labels = c("","X", "", "", "", "Y", "", "", "", "Z","","")) +
xlab("Pattern") +
scale_fill_brewer(breaks=c("A", "B", "C"), type="qual", palette="Paired")
dummy.df <- dd
dummy.df[nrow(dd) + 1:2,"Pattern"] <- unique(dd$Pattern)[-3]
dummy.df[nrow(dd) + 1:2,"Method"] <- "ZZZ"
dummy.df[nrow(dd) + 1:2,"Complexity"] <- c("High","Low")
dummy.df$dummy <- interaction(dummy.df$Method,dummy.df$Pattern)
dummy.df$fill <- interaction(dummy.df$Method, dummy.df$n)
dummy.df$dummy <- interaction(dummy.df$fill, dummy.df$Pattern)
dummy.df$dummy <- factor(dummy.df$dummy, levels = levels(dummy.df$dummy)[-c(4, 12, 20, 24)])
dummy.df$dummy[361:362] <- "A.10.Z" ## dummy variables to get rid of NAs
theme_set(theme_bw(base_size = 14))
ggplot(dummy.df, aes(x = dummy, y = X1, fill = fill)) +
geom_boxplot(aes(fill = fill),lwd=0.1,outlier.size = 0.01) +
facet_grid(~Complexity) +
theme(legend.position = 'bottom') +
guides(fill = guide_legend(nrow=1)) +
geom_line(aes(x = dummy,
group=interaction(Pattern,nsim,n)),
size = 0.35, alpha = 0.35, colour = I("#525252")) +
geom_point(aes(x = dummy,
group=interaction(Pattern,nsim,n)),
size = 0.35, alpha = 0.25, colour = I("#525252")) +
scale_x_discrete(labels = c("X", "Y", "Z"), breaks = paste("A.10.", c("X", "Y", "Z"), sep = ""),drop=FALSE) +
xlab("Pattern") +
scale_fill_brewer(breaks= levels(dummy.df$fill)[-c(4,8)], type="qual", palette="Paired")
This yields the following plot.
All is well, except with the legend. I would like the following: the dark colors to be in the First group titled "n=5" on the left, with "A", "B", "C" for the three dark colors, and the light colors to be to the right, in a Second group titled "n=10" on the right, with "A", "B", "C" for the three light colors. Sort of like in the link enter link description here above.
What I can not figure out is how to call the boxplot twice to mimic the solution there.
Is there a way to do this? Please feel free to let me know if the question is not clear.
Thanks again, in advance, for any help!
Adapting my answer on your former question this could be achieved like so:
library(ggplot2)
fill <- levels(dummy.df$fill)[-c(4,8)]
fill <- sort(fill)
labels <- gsub("\\.\\d+", "", fill)
labels <- setNames(labels, fill)
colors <- scales::brewer_pal(type="qual", palette="Paired")(6)
colors <- setNames(colors, fill)
library(ggnewscale)
ggplot(dummy.df, aes(x = dummy, y = X1, fill = fill)) +
geom_boxplot(aes(fill = fill), lwd=0.1,outlier.size = 0.01) +
scale_fill_manual(name = "n = 5", breaks= fill[grepl("5$", fill)], labels = labels[grepl("5$", fill)], values = colors,
guide = guide_legend(title.position = "left", order = 1)) +
new_scale_fill() +
geom_boxplot(aes(fill = fill), lwd=0.1,outlier.size = 0.01) +
scale_fill_manual(name = "n = 10", breaks = fill[grepl("10$", fill)], labels = labels[grepl("10$", fill)], values = colors,
guide = guide_legend(title.position = "left", order = 2)) +
facet_grid(~Complexity) +
theme(legend.position = 'bottom') +
guides(fill = guide_legend(nrow=1)) +
geom_line(aes(x = dummy,
group=interaction(Pattern,nsim,n)),
size = 0.35, alpha = 0.35, colour = I("#525252")) +
geom_point(aes(x = dummy,
group=interaction(Pattern,nsim,n)),
size = 0.35, alpha = 0.25, colour = I("#525252")) +
scale_x_discrete(labels = c("X", "Y", "Z"), breaks = paste("A.10.", c("X", "Y", "Z"), sep = ""),drop=FALSE) +
xlab("Pattern")
#> Warning: Removed 2 rows containing non-finite values (new_stat_boxplot).
I would like to use my own filling colors (ex: c("red", "blue", "grey50", "black")) when using function scale_fill_binned() withing a ggplot code. How can I do this?
Here is a minimal reproducible example:
library(tidyverse)
dat <- mtcars %>%
group_by(cyl) %>%
summarise(n = n(),
mean_hp = mean(hp)) %>%
ungroup
ggplot(data = dat, aes(x = cyl, y = mean_hp, size = n, fill = n)) +
geom_point(shape = 21) +
scale_size_binned(breaks = c(8, 10, 12), guide = guide_bins(show.limits = T)) +
scale_fill_binned(breaks = c(8, 10, 12), guide = guide_bins(show.limits = T), type = "viridis") +
labs(x = "Cylinder", y = "Mean hp", fill = "Nb of cars", size = "Nb of cars") +
theme_minimal()
Here is what the output looks like:
To use this family of functions you need to provide a function that returns a an object with class "ScaleContinuous" "Scale" "ggproto" "gg" (i.e. the equivalent output to scale_fill_viridis_c)!
scale_fill_custom <- function (..., alpha = 1, begin = 0, end = 1, direction = 1,
option = "D", values = NULL, space = "Lab", na.value = "grey50",
guide = "colourbar", aesthetics = "fill") {
continuous_scale(aesthetics, scale_name = "custom",
palette = scales:::gradient_n_pal(c("red", "blue", "grey50", "black"),
values, space), na.value = na.value,
guide = guide, ...)
}
ggplot(data = dat, aes(x = cyl, y = mean_hp, size = n, fill = n)) +
geom_point(shape = 21) +
scale_size_binned(breaks = c(8, 10, 12), guide = guide_bins(show.limits = T)) +
scale_fill_binned(breaks = c(8, 10, 12), guide = guide_bins(show.limits = T),
type = scale_fill_custom) +
labs(x = "Cylinder", y = "Mean hp", fill = "Nb of cars", size = "Nb of cars") +
theme_minimal()
Note that you are using colour as a scale to be translated by the eye into numerically meaningful difference. The colours are interpolated between the manually applied points, so will not actually be your exact colours. If you wish to band your averages by colour it would be preferable to create a factor, then manually apply your theme.
ggplot(data = mutate(dat, n = cut(n, breaks = c(0, 8, 10, 12, 20))),
aes(x = cyl, y = mean_hp, size = n, fill = n)) +
geom_point(shape = 21) +
scale_size_discrete() +
scale_fill_manual(values = c("red", "blue", "grey50", "black")) +
labs(x = "Cylinder", y = "Mean hp", fill = "Nb of cars", size = "Nb of cars") +
theme_minimal()
With the comment of #teunbrand, I was able to come up with something.
cols <- c("red", "blue", "grey50", "black")
ggplot(data = dat, aes(x = cyl, y = mean_hp, size = n, fill = n)) +
geom_point(shape = 21) +
scale_size_binned(breaks = c(8, 10, 12), guide = guide_bins(show.limits = T)) +
labs(x = "Cylinder", y = "Mean hp", fill = "Nb of cars", size = "Nb of cars") +
theme_minimal() +
binned_scale(aesthetics = "fill", scale_name = "custom",
palette = ggplot2:::binned_pal(scales::manual_pal(values = cols)),
guide = "bins",
breaks = c(8, 10, 12), limits = c(min(dat$n), max(dat$n)), show.limits = T)
Here is what the output looks like:
This is what is the output.I have a data set which contains unit, weight of each unit and compliance score for each unit in year 2016.
I was not able to add the table but here is the screenshot for the data in csv
I have named the columns in the data as unit, weight and year(which is compliance score) .
I want to create a sunburst chart where the first ring will be the unit divided based on weight and the second ring will be the same but will have labels compliance score.
The colour for each ring will be different.
I was able to do some code with the help from an online blog and the output I have gotten is similar to what I want but I am facing difficulty in positioning of the labels and also the colour coding for each ring
#using ggplot
library(ggplot2) # Visualisation
library(dplyr) # data wrangling
library(scales) # formatting
#read file
weight.eg = read.csv("Dummy Data.csv", header = FALSE, sep =
";",encoding = "UTF-8")
#change column names
colnames(weight.eg) <- c ("unit","weight","year")
#as weight column is factor change into integer
weight.eg$weight = as.numeric(levels(weight.eg$weight))
[as.integer(weight.eg$weight)]
weight.eg$year = as.numeric(levels(weight.eg$year))
[as.integer(weight.eg$year)]
#Nas are introduced, remove
weight.eg <- na.omit(weight.eg)
#Sum of the total weight
sum_total_weight = sum(weight.eg$weight)
#First layer
firstLevel = weight.eg %>% summarize(total_weight=sum(weight))
sunburst_0 = ggplot(firstLevel) # Just a foundation
#this will generate a bar chart
sunburst_1 =
sunburst_0 +
geom_bar(data=firstLevel, aes(x=1, y=total_weight),
fill='darkgrey', stat='identity') +
geom_text(aes(x=1, y=sum_total_weight/2, label=paste("Total
Weight", comma(total_weight))), color='black')
#View
sunburst_1
#this argument is used to rotate the plot around the y-axis which
the total weight
sunburst_1 + coord_polar(theta = "y")
sunburst_2=
sunburst_1 +
geom_bar(data=weight.eg,
aes(x=2, y=weight.eg$weight, fill=weight.eg$weight),
color='white', position='stack', stat='identity', size=0.6)
+
geom_text(data=weight.eg, aes(label=paste(weight.eg$unit,
weight.eg$weight), x=2, y=weight.eg$weight), position='stack')
sunburst_2 + coord_polar(theta = "y")
sunburst_3 =
sunburst_2 +
geom_bar(data=weight.eg,
aes(x=3, y=weight.eg$weight,fill=weight.eg$weight),
color='white', position='stack', stat='identity',
size=0.6)+
geom_text(data = weight.eg,
aes(label=paste(weight.eg$year),x=3,y=weight.eg$weight),position =
'stack')
sunburst_3 + coord_polar(theta = "y")
sunburst_3 + scale_y_continuous(labels=comma) +
scale_fill_continuous(low='white', high='darkred') +
coord_polar('y') + theme_minimal()
Output for dput(weight.eg)
structure(list(unit = structure(2:7, .Label = c("", "A", "B",
"C", "D", "E", "F", "Unit"), class = "factor"), weight = c(30,
25, 10, 17, 5, 13), year = c(70, 80, 50, 30, 60, 40)), .Names =
c("unit",
"weight", "year"), row.names = 2:7, class = "data.frame", na.action
= structure(c(1L,
8L), .Names = c("1", "8"), class = "omit"))
output for dput(firstLevel)
structure(list(total_weight = 100), .Names = "total_weight", row.names
= c(NA,
-1L), na.action = structure(c(1L, 8L), .Names = c("1", "8"), class =
"omit"), class = "data.frame")
So I think I might have some sort of solution for you. I wasn't sure what you wanted to color-code on the outer ring; from your code it seems you wanted it to be the weight again, but it was not obvious to me. For different colour scales per ring, you could use the ggnewscale package:
library(ggnewscale)
For the centering of the labels you could write a function:
cs_fun <- function(x){(cumsum(x) + c(0, cumsum(head(x , -1))))/ 2}
Now the plotting code could look something like this:
ggplot(weight.eg) +
# Note: geom_col is equivalent to geom_bar(stat = "identity")
geom_col(data = firstLevel,
aes(x = 1, y = total_weight)) +
geom_text(data = firstLevel,
aes(x = 1, y = total_weight / 2,
label = paste("Total Weight:", total_weight)),
colour = "black") +
geom_col(aes(x = 2,
y = weight, fill = weight),
colour = "white", size = 0.6) +
scale_fill_gradient(name = "Weight",
low = "white", high = "darkred") +
# Open up new fill scale for next ring
new_scale_fill() +
geom_text(aes(x = 2, y = cs_fun(weight),
label = paste(unit, weight))) +
geom_col(aes(x = 3, y = weight, fill = weight),
size = 0.6, colour = "white") +
scale_fill_gradient(name = "Another Weight?",
low = "forestgreen", high = "white") +
geom_text(aes(label = paste0(year), x = 3,
y = cs_fun(weight))) +
coord_polar(theta = "y")
Which looks like this:
I am trying to align three text labels i.e. mean, median and current value outside the crossbars.I appreciate any help.
My Data
structure(list(variable = structure(1:10, .Label = c("GrossNetEquity",
"GrossTotalEquityPerfAttr", "LongNetEquity", "LongTotalEquity",
"NetEquity", "NetEquityPerfAttr", "NetTotalEquity", "ShortNetEquity",
"ShortTotalEquity", "TotalNetEquity"), class = "factor"), mx = c(134.5,
8.1, 95.6, 106.4, 61, 6.8, 71.6, -21.4, -24.9, 148.7), mn = c(71.1,
-4.6, 49.7, 66.2, 27, -4.1, 36.4, -46.3, -47.4, 96), avg = c(112.173148148148,
1.14814814814815, 77.7388888888889, 84.5111111111111, 43.262037037037,
1.05092592592593, 48.0694444444444, -34.4194444444444, -36.4416666666667,
120.952777777778), sd = c(14.5968093202928, 2.39877232936504,
9.87368667081958, 8.7204382695887, 7.29159953981859, 2.24405738054356,
7.05196278547511, 6.04899711056417, 5.77265751334298, 13.0003483658092
), md = c(114.15, 1.4, 77.35, 82.65, 41.45, 1.25, 46.35, -34.1,
-35.55, 119.75), firstldiff = c(82.9795295075625, -3.64939651058193,
57.9915155472497, 67.0702345719337, 28.6788379573998, -3.4371888351612,
33.9655188734942, -46.5174386655728, -47.9869816933526, 94.9520810461593
), firstlsum = c(141.366766788734, 5.94569280687823, 97.4862622305281,
101.951987650289, 57.8452361166742, 5.53904068701305, 62.1733700153947,
-22.3214502233161, -24.8963516399807, 146.953474509396), secldiff = c(68.3827201872697,
-6.04816883994697, 48.1178288764302, 58.349796302345, 21.3872384175813,
-5.68124621570476, 26.9135560880191, -52.566435776137, -53.7596392066956,
81.9517326803501), seclsum = c(155.963576109027, 8.34446513624327,
107.359948901348, 110.672425919877, 65.1368356564928, 7.78309806755661,
69.2253328008698, -16.2724531127519, -19.1236941266377, 159.953822875205
), value = c(116.1, -1.2, 88, 92.3, 58.8, -1.2, 63, -28.1, -29.3,
121.6), Criteria = c(NA, NA, "", "", "orange", "", "orange",
"orange", "", "orange")), .Names = c("variable", "mx", "mn",
"avg", "sd", "md", "firstldiff", "firstlsum", "secldiff", "seclsum",
"value", "Criteria"), row.names = c(NA, -10L), class = "data.frame")
My Code
I am trying to show Mean, Median and Current Value in the form of bars on geom_crossbar.But finding it hard to align it.
ggplot(df3,aes(variable,mn))+
geom_crossbar(aes(ymin = mn, ymax = mx,fill = Criteria),
width = 0.5,alpha = 0.50,position =position_dodge())+
geom_point(data=df3, aes(x=variable,y=md,group=1),
shape = "|", size = 10,color ="brown1")+
geom_text(data=df3, aes(x=variable, y=md, label = paste("Median",md)),
size = 3, vjust = 2,hjust = -1.0,color = "brown1",
position = position_dodge(width=0.9))+
geom_point(data=df3, aes(x=variable,y=avg,group=1),
shape = "|", size = 10,color = "coral4")+
geom_text(data=df3, aes(x=variable, y=avg, label = paste("Mean",mn)),
size = 3, vjust = 2.5, hjust = -1.0,color ="coral4")+
geom_point(data=df3, aes(x=variable,y=value,group=1),
shape = "|", size = 10,color ="brown1")+
geom_text(data=df3,aes(x=variable, y=value,label = paste("Current Value",value)),
size = 2, vjust = 3, hjust = -1.0,color = "brown1")+
coord_flip()
If you wish to align your geom_text layers, you can assign them the same y value. I've included an example below. I also removed some repetitive parts from your code, where the different layers can inherit the data / aesthetic mappings from the top ggplot() level.
ggplot(df3, aes(variable, mx))+
geom_crossbar(aes(ymin = mn, ymax = mx, fill = Criteria),
width = 0.5, alpha = 0.50, position = position_dodge()) +
# vertical bars
geom_point(aes(y = md), shape = "|", size = 10, color ="brown1") +
geom_point(aes(y = avg), shape = "|", size = 10, color = "coral4") +
geom_point(aes(y = value), shape = "|", size = 10, color ="brown1") +
# labels (vjust used to move the three layers vertically away from one another;
# nudge_y used to shift them uniformly rightwards)
# note that the original label for "Mean" used paste("Mean", mn), but that didn't
# look right to me, since the vertical bar above used avg instead of mn, & mn appears
# to correspond to "min", not "mean".
geom_text(aes(label = paste("Median", md)),
size = 3, vjust = -1, nudge_y = 5, hjust = 0, color = "brown1") +
geom_text(aes(label = paste("Mean", avg)),
size = 3, vjust = 0, nudge_y = 5, hjust = 0, color ="coral4") +
geom_text(aes(label = paste("Current Value", value)),
size = 2, vjust = 1, nudge_y = 5, hjust = 0, color = "brown1") +
coord_flip() +
expand_limits(y = 200) # expand rightwards to give more space for labels
Note: The above follows the approach in your code, which repeats the same geom layers for different columns in the wide format data. In general, ggplot prefers to deal with data in long format. It looks cleaner, and would be easier to maintain as you only need to make changes (e.g. increase font size, change number of decimal places in the label) once, rather than repeat the change for every affected layer. A long format approach to this problem could look like this:
# create long format data frame for labels
df3.labels <- df3 %>%
select(variable, mx, md, avg, value) %>%
tidyr::gather(type, value, -variable, -mx) %>%
mutate(label = paste0(case_when(type == "md" ~ "Median",
type == "avg" ~ "Mean",
TRUE ~ "Current Value"),
": ",
round(value, 2)),
vjust = case_when(type == "md" ~ -1,
type == "avg" ~ 0,
TRUE ~ 1))
# place df3.labels in the top level call, since there are two geom layers that
# use it as the data source, & only one that uses df3.
ggplot(df3.labels,
aes(x = variable, y = value, color = type, label = label)) +
geom_crossbar(data = df3,
aes(x = variable, y = mn, ymin = mn, ymax = mx, fill = Criteria),
inherit.aes = FALSE,
width = 0.5, alpha = 0.50) +
geom_point(shape = "|", size = 10) +
geom_text(aes(y = mx, vjust = vjust), size = 3, nudge_y = 5, hjust = 0) +
# change colour mappings here
scale_color_manual(values = c("md" = "brown1", "avg" = "coral4", "value" = "brown1"),
guide = FALSE) +
coord_flip() +
expand_limits(y = 200)