ggplot2: label n observations for alpha on stacked bars - r

I have a df as it follows:
fruit <- data.frame(Sample=1:100,
Fruit=c(rep("Apple", 10), rep("Strawberry", 25), rep("Grape", 20),
rep("Watermelon", 15), rep("Lime", 11), rep("Blueberry", 10),
rep("Plum", 9)),
Color=c(rep("Red", 30), rep("Green", 45),
rep("Blue", 25)),
Ripe=c(rep(c(T, F), 50)))+
fruit$Fruit <- factor(fruit$Fruit, unique(fruit$Fruit))+
fruit$Color <- factor(fruit$Color, unique(fruit$Color))
Then, I've plotted the bar graph:
foo <- aggregate(Sample ~ Color, data = fruit, FUN = length)
library(ggplot2)
ggplot(fruit, aes(Color, fill = Color, alpha = Ripe)) +
geom_bar(color = "black") +
geom_text(data = foo, aes(label = Sample, y = Sample), alpha = "1", vjust = -1)
scale_alpha_discrete(range = c(1, 0.6)) +
theme(axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank()) +
guides(fill = guide_legend(override.aes = list(colour = NA)))
With the command above I was able to create the following bar-graph:
So...I was able to put the total number of observations for each Color above each bar...but I don't this....rather, I'm wonder how can I put the total n of observation for TRUE in each color bar instead. In this case it would be one n observation for each bar, with one above the above the TRUE bar for the TRUE n observation for that particular Color...

You can use calculating power of stat in ggplot2
ggplot(fruit, aes(Color, fill = Color, alpha = Ripe)) +
geom_bar() +
geom_text(stat = "count", aes(y = ..count.., label = ..count..),
position = "stack", show.legend = FALSE) +
scale_alpha_discrete(range = c(1, 0.6)) +
theme(axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank()) +
guides(fill = guide_legend(override.aes = list(colour = NA)))

Related

How to display p-values above boxplots on exponential (log10) y-axis?

I have a data frame with three groups (group1, group2, group3). I would like to show the p-value of their mean comparisons in ggplot2 which I can do however, the values are stacked ontop of one another making it difficult to see what is being compared. When I try to adjust where the p-values are located using the y_position() function, the boxplots collapse (I think because the y-axis is log10) but the p-values are no longer stacked ontop of one another. How can I keep the boxplots from collapsing and keep the p-values displayed so that you can see what is being compared?
Example data
library(ggplot2)
library(dplyr)
library(ggsignif)
df <- data.frame(matrix(ncol = 2, nrow = 30))
colnames(df)[1:2] <- c("group", "value")
df$group <- rep(c("group1","group2","group3"), each = 10)
df[1:10,2] <- rexp(10, 1/10)
df[11:20,2] <- rexp(10, 1/100)
df[21:30,2] <- rexp(10, 1/900)
# Need to say what should be compared for p-value determination
my_comparisons <- list(c("group1", "group2"),
c("group1", "group3"),
c("group2", "group3"))
Boxplots showing the distribution of value for each group however the p-values are ontop of one another so you cannot compare among groups.
df %>%
mutate(group = factor(group, levels = c("group3","group2","group1"))) %>%
ggplot(aes(x = group, y = value)) +
geom_signif(comparisons = my_comparisons,
map_signif_level = function(x) paste("p =", scales::pvalue(x))) +
scale_y_log10() +
geom_boxplot(outlier.colour="white", outlier.fill = "white", outlier.shape = 1, outlier.size = 0) +
geom_jitter(shape=1, position=position_jitter(0.2), color = "black", fill = "white", size = 2) +
labs(x = "",
y = "value") +
theme_bw() +
theme(axis.text.x = element_text(size = 16, color = "black"),
axis.text.y = element_text(size = 16, color = "black"),
axis.title = element_text(size = 16, color = "black"),
axis.title.x = element_text(vjust = -0.5),
panel.grid = element_blank(),
panel.background = element_blank())
Adjusting the y_position() of where the p-values should display but this collapses the y-axis. I have tried several values within y_position.
df %>%
mutate(group = factor(group, levels = c("group3","group2","group1"))) %>%
ggplot(aes(x = group, y = value)) +
geom_signif(y_position = c(2000,1800,1600),
comparisons = my_comparisons,
map_signif_level = function(x) paste("p =", scales::pvalue(x))) +
scale_y_log10() +
geom_boxplot(outlier.colour="white", outlier.fill = "white", outlier.shape = 1, outlier.size = 0) +
geom_jitter(shape=1, position=position_jitter(0.2), color = "black", fill = "white", size = 2) +
labs(x = "",
y = "value") +
theme_bw() +
theme(axis.text.x = element_text(size = 16, color = "black"),
axis.text.y = element_text(size = 16, color = "black"),
axis.title = element_text(size = 16, color = "black"),
axis.title.x = element_text(vjust = -0.5),
panel.grid = element_blank(),
panel.background = element_blank())
For some reason this parameter ignores the axis transformation. You therefore need to use the log10 values of the desired positions:
df %>%
mutate(group = factor(group, levels = c("group3","group2","group1"))) %>%
ggplot(aes(x = group, y = value)) +
geom_signif(comparisons = my_comparisons,
y_position = log10(c(5000, 10000, 25000)),
map_signif_level = function(x) paste("p =", scales::pvalue(x))) +
scale_y_log10() +
geom_boxplot(outlier.colour="white", outlier.fill = "white",
-outlier.shape = 1, outlier.size = 0) +
geom_jitter(shape=1, position=position_jitter(0.2), color = "black",
fill = "white", size = 2) +
labs(x = "",
y = "value") +
theme_bw() +
theme(axis.text.x = element_text(size = 16, color = "black"),
axis.text.y = element_text(size = 16, color = "black"),
axis.title = element_text(size = 16, color = "black"),
axis.title.x = element_text(vjust = -0.5),
panel.grid = element_blank(),
panel.background = element_blank())

Issue with ggplot barchart with multiple subgroups

I am trying to adapt the approach from (ggplot2 multiple sub groups of a bar chart) but something is not as it should be.
The code is:
library(grid)
MethodA= rep(c("ARIMA"), 6)
MethodB=rep(c("LSTM"), 6)
MethodC = rep(c("ARIMA-LSTM"),6)
MethodD=rep(c("SSA"),6)
Method=c(MethodA, MethodB, MethodC, MethodD)
Measure = rep(c("RMSE", "RMSE", "MAE", "MAE", "MAPE", "MAPE"), 4)
trtest=rep(c("train", "test"), 12)
Value=sample(x = 4000:7000, size = 24, replace = TRUE)
df2 <- data.frame(Method, Measure, trtest, Value)
dodge <- position_dodge(width = 0.9)
g1 <- ggplot(data = df, aes(x = interaction(Variety, Trt), y = yield, fill = factor(geno))) +
geom_bar(stat = "identity", position = position_dodge()) +
#geom_errorbar(aes(ymax = yield + SE, ymin = yield - SE), position = dodge, width = 0.2) +
coord_cartesian(ylim = c(0, 7500)) +
annotate("text", x = 1:6, y = - 10,
label = rep(c("Variety 1", "Variety 2", "Variety 3"), 2)) +
annotate("text", c(1.5, 3.5), y = - 20, label = c("Irrigated", "Dry")) +
theme_classic() +
theme(plot.margin = unit(c(1, 1, 4, 1), "lines"),
axis.title.x = element_blank(),
axis.text.x = element_blank())
# remove clipping of x axis labels
g2 <- ggplot_gtable(ggplot_build(g1))
g2$layout$clip[g2$layout$name == "panel"] <- "off"
grid.draw(g2)
The problem is aslo in a sequence that interaction function generates - the sequences are not by the order - ARIMA - RMSE, MAE, MAPE, then LSTM - RMSE, MAE, MAPE ...
I would appreciate for any help.
Best,
Nikola
Instead of using interaction, it might be a lot clearer if you use facets.
Note that your example is not reproducible (your sample data has different variable names from the ones you use in your plotting code, so I had to guess which you meant to substitute):
ggplot(data = df2, aes(x = Measure, y = Value, fill = trtest)) +
geom_bar(stat = "identity", position = position_dodge()) +
coord_cartesian(ylim = c(0, 7500)) +
facet_grid(.~Method, switch = 'x') +
theme_classic() +
theme(strip.placement = 'outside',
strip.background = element_blank(),
strip.text = element_text(face = 'bold', size = 16),
panel.spacing.x = unit(0, 'mm'),
panel.border = element_rect(fill = NA, color = 'gray'))

Add legend to boxplot rstudio

I use the following code to create a boxplot:
plot <- ggplot(WL, aes(y = wavelength, x = factor(category, level = c("A", "B")))) +
theme_bw() +
geom_boxplot(outlier.colour = "gray30", outlier.shape = 8, outlier.size = 2, lwd=1, fill = c("#C6DBEF", "#FEE391")) +
ylab(expression(lambda[(km)])) +
theme(plot.margin = unit(c(2,2,2,2), "cm"),
axis.title.x = element_blank(),
axis.title.y=element_text(size=20),
plot.title = element_text(size = 22, vjust = 2))
How can I add a legend? I want to place it inside the plot (topright). It should show the two colors and a description.
I am trying to use legends() which does not work for me.
The issue is where you specified the fill - The following code will give you a legend, and changing the position in the last line will allow you to change it where you want:
# sample data
WL <- data.frame(wavelength = rnorm(100, 0,1),
category = sample(LETTERS[1:2], 100, replace = TRUE))
# plot
ggplot(WL, aes(y = wavelength, x = factor(category, level = c("A", "B")), fill = category)) +
theme_bw() +
geom_boxplot(outlier.colour = "gray30", outlier.shape = 8, outlier.size = 2, lwd = 1) +
ylab(expression(lambda[(km)])) +
theme(plot.margin = unit(c(2,2,2,2), "cm"),
axis.title.x = element_blank(),
axis.title.y=element_text(size=20),
plot.title = element_text(size = 22, vjust = 2)) +
scale_fill_manual(values = c("#C6DBEF", "#FEE391")) +
theme(legend.position = "bottom")

ggplot: How to set different alignments on the geom_text position based on type of variable?

I have a 100% stacked bar chart that displays 3 types of variable. I've set a example db so I could create a graph more easily.
I've already adjust the chart with the colors and information I need. But I'm not being able to independently position the labels. Here's the current code and output.
Code:
(empilhado<-ggplot(dfm, aes(y = Year, x = abs(value), fill = variable)) +
scale_x_continuous(sec.axis = sec_axis(trans = ~.*1, name="Trab."), expand=expansion(mult=c(0,0.05)))+
geom_col(data = rotulo, aes(y = Year, x=abs(trabalho), fill=NULL), width = .7, colour="black", lwd=0.1, position = "fill", orientation = "y") +
geom_label(data = rotulo, aes(y= Year, x = abs(trabalho), fill=NULL, label=paste(format(round(trabalho, digits=0), nsmall=0, decimal.mark=",", big.mark="."),
format(round(aprovado, digits=0), nsmall=0, decimal.mark=",", big.mark="."))
), color="black", size=4, position = position_fill(vjust=1.06)) +
geom_col(width = .7, colour="black", lwd=0.1, position = "fill", orientation = "y") +
geom_text(aes(label=format(round(value, digits=0), nsmall=0, decimal.mark=",", big.mark=".")),
size=4, color="white", position = position_fill(vjust=0.5)) +
theme(panel.grid.major = element_line(colour = "gray90",size=0.75), panel.grid.minor = element_line(colour = "gray90",size=0.75),
legend.position="top", axis.text.x = element_blank(), axis.ticks.x = element_blank(),
axis.title.x = element_blank(), panel.background = element_blank())+
scale_fill_manual(values = c("#000000","tomato","blue"))
Output:
How is it now? Position_fill(vjust=0.5), so all the labels are centered inside its respective bar.
What I want? To be able to set the position of the 'Marionete' label on the left(like a vjust=0 would do), keep the 'Pedido' label as is (in the center of the 'Pedido' stacked bar) and place the 'Fatura' label on the right (like a vjust=1 would do).
Thanks in advance!
One option to achieve your desired result would be to compute/set the positions for each label and the horizontal alignment manually instead of making use of position="fill":
Making use of some random mock data:
library(ggplot2)
library(dplyr)
dfm <- dfm %>%
group_by(Year) %>%
arrange(desc(variable)) %>%
mutate(
pct = value / sum(value),
x_label = case_when(
variable == "Marionete" ~ 0,
variable == "Pedido" ~ .5 * (cumsum(pct) + lag(cumsum(pct))),
TRUE ~ 1
),
hjust = case_when(
variable == "Marionete" ~ 0,
variable == "Pedido" ~ .5,
TRUE ~ 1
)
)
ggplot(dfm, aes(y = Year, x = abs(value), fill = variable)) +
scale_x_continuous(sec.axis = sec_axis(trans = ~ . * 1, name = "Trab."), expand = expansion(mult = c(0, 0.05))) +
geom_col(width = .7, colour = "black", lwd = 0.1, position = "fill", orientation = "y") +
geom_text(aes(x = x_label, label = format(round(value, digits = 0), nsmall = 0, decimal.mark = ",", big.mark = "."), hjust = hjust),
size = 4, color = "white"
) +
theme(
panel.grid.major = element_line(colour = "gray90", size = 0.75), panel.grid.minor = element_line(colour = "gray90", size = 0.75),
legend.position = "top", axis.text.x = element_blank(), axis.ticks.x = element_blank(),
axis.title.x = element_blank(), panel.background = element_blank()
) +
scale_fill_manual(values = c("#000000", "tomato", "blue"))
DATA
set.seed(123)
dfm <- data.frame(
Year = rep(c(2006:2016), each = 3),
value = sample(1:100, 3 * 11, replace = TRUE),
variable = c("Fatura", "Pedido", "Marionete")
)
dfm$variable <- factor(dfm$variable, levels = c("Fatura", "Pedido", "Marionete"))
dfm$Year <- factor(dfm$Year)

Center geom_text on ggplot while controlling bar width

I am trying to make a horizontal bar chart in ggplot2 where the bars are of equal width and with text labels centered on the bars. There are two groups on the y axis -- one with 2 bars, and one with three.
There are a lot of similar questions on SO that address both of these issues, but I haven't been able to fix one without breaking the other. Here's my data:
## data
df <- tibble(var1 = c("a", "b", "b", "c", "c"),
var2 = c("x", "y", "x", "y", "x"),
proportion = c(100, 33.3, 66.7, 66.7, 33.3)) %>%
mutate(var1 = factor(var1, levels = var1_order))
var1_order <- c("a", "c", "b")
Here's an example where the widths are good, but the labels of the y group are off:
## labels bad
df %>%
ggplot(aes(x = proportion, y = var2, fill = var1,
label = paste0(round(proportion, 1), "%"))) +
geom_col(position = position_dodge2(preserve = "single", padding = 0), width = .9) +
geom_text(size = 3, position = position_dodge2(width = 0.9), hjust = -.5,
color = "black", aes(group = var1)) +
scale_fill_manual(name = "", values = c("#093D6E","#5D8AA8", "#00918B",
"#F8AF54", "#CD9575")) +
labs(x = NULL) +
theme(axis.ticks = element_blank(),
axis.title.y = element_blank(),
axis.line=element_blank(),
axis.text.x = element_blank(),
panel.background = element_blank(),
strip.text = element_text(size = 7, face = "bold")) +
scale_x_continuous(expand = c(.2, .2)) +
guides(fill = guide_legend(reverse = TRUE))
And here's an example where the labels are good but the widths are now off:
## col widths bad
df %>%
ggplot(aes(x = proportion, y = var2, fill = var1,
label = paste0(round(proportion, 1), "%"))) +
geom_col(position = position_dodge(width = 0.9)) +
geom_text(size = 3, position = position_dodge(width = 0.9), hjust = -.5,
color = "black", aes(group = var1)) +
scale_fill_manual(name = "", values = c("#093D6E","#5D8AA8", "#00918B",
"#F8AF54", "#CD9575")) +
labs(x = NULL) +
theme(axis.ticks = element_blank(),
axis.title.y = element_blank(),
axis.line=element_blank(),
axis.text.x = element_blank(),
panel.background = element_blank(),
strip.text = element_text(size = 7, face = "bold")) +
scale_x_continuous(expand = c(.2, .2)) +
guides(fill = guide_legend(reverse = TRUE))
Note that this will be part of a parameterized report, so it needs to be capable of dealing with different numbers of var1 and var2 groups. Thanks!
Try this approach. You can use position_dodge2() to keep uniform bars. Here the code:
library(ggplot2)
#Code
df %>%
ggplot(aes(x = proportion, y = var2, fill = var1,
label = paste0(round(proportion, 1), "%"))) +
geom_col(position = position_dodge2(preserve = 'single',width = 0.9)) +
geom_text(size = 3, position = position_dodge2(preserve = 'single',width = 0.9), hjust = -.5,
color = "black", aes(group = var1)) +
scale_fill_manual(name = "", values = c("#093D6E","#5D8AA8", "#00918B",
"#F8AF54", "#CD9575")) +
labs(x = NULL) +
theme(axis.ticks = element_blank(),
axis.title.y = element_blank(),
axis.line=element_blank(),
axis.text.x = element_blank(),
panel.background = element_blank(),
strip.text = element_text(size = 7, face = "bold")) +
scale_x_continuous(expand = c(.2, .2)) +
guides(fill = guide_legend(reverse = TRUE))
Output:

Resources