Stacked geom_bar() with 2 bars per x value - r

Forgive me is this is asked and answered, but so far I have not found a solution that meets my use case. If you know of one, please point me in the right direction.
Here is a small working sample of my code:
library(tidyverse)
source <- c("D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P")
subject <- c("M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R")
grade <- c(1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 2, 2, 2, 1, 1, 2, 2, 2, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2)
domain <- c("Alg", "Alg", "Alg", "Alg", "Alg", "Alg", "Geo", "Geo", "Geo", "Geo", "Geo", "Geo", "Comp", "Comp", "Comp", "Comp", "Comp", "Comp", "Voc", "Voc", "Voc", "Voc", "Voc", "Voc", "Alg", "Alg", "Alg", "Alg", "Alg", "Geo", "Geo", "Geo", "Geo", "Geo", "Comp", "Comp", "Comp", "Comp", "Comp", "Voc", "Voc", "Voc", "Voc", "Voc", "Voc")
placement <- c("A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A")
qty <- c(425, 389, 96, 460, 293, 163, 518, 291, 101, 366, 349, 201, 889, 661, 150, 680, 617, 465, 445, 293, 112, 381, 292, 208, 223, 232, 131, 270, 72, 27, 45, 9, 99, 40, 79, 194, 72, 126, 133, 123, 456, 98, 234, 432, 65)
test <- data.frame(source, subject, grade, domain, placement, qty)
plot4 <- test %>%
ggplot(aes(x = grade, y = qty, fill = placement)) +
geom_bar(stat = "identity", position = "fill") +
stat_count(aes(label = paste(sprintf("%1.2f", ..count../sum(..count..)*100), "%\n", ..count..), y = 0.5*..count..),
geom = "text",
colour = "black",
size = 2.5,
position = position_fill(vjust = 0.5)) +
scale_x_discrete("Grade", limits = c(1, 2),
labels = c("1st", "2nd"),
guide = guide_axis(angle = 90)) +
scale_y_continuous("Prop place") +
scale_fill_manual("Placement",
values = rev(c("#de7e7e", "#ebeb4d", "#70e65e")),
labels = rev(c("C",
"B",
"A"))
) +
theme(axis.title.y = element_blank(),
axis.text.y = element_blank()) +
facet_wrap(vars(subject, domain), scales = "free_x")
plot4
which produces the following plot:
What I need for each grade level (in this case, 1st and 2nd) is to have two stacked bars with the data centered in each region of the stack. There should be one bar from each source ("D" and "P") associated with each grade level.
I do not know how to add second variable to the x-axis to create the bar from each source FOR EACH grade. I can either do grade, or source, but not both.
*BONUS POINTS for helping me figure out the stat_count line. I need the n() and % of each stack relative to the total for each source/grade/placement value. These numbers aren't even close, but this code has worked for me on less complicated plots.

This may be what you are after
my_labels <- test %>%
group_by(grade, source,domain, subject) %>%
summarise(n = qty,p = qty/sum(qty)) %>%
mutate(lab = paste(n,"\n",sprintf("%1.2f",p*100),"%"),
x = interaction (grade,source)) # get the labels calculated and add at the end
The interaction() function helps to get combinations of grade and source for plotting
test %>%
ggplot(aes(x = interaction (grade,source),
y = qty, fill = placement)) +
geom_bar(stat = "identity", position ="fill") +
scale_x_discrete("Grade",
labels = c("1st D","2nd D","1st P","2nd P"), # add new X labels
guide = guide_axis(angle = 90)) +
scale_fill_manual("Placement",
values = rev(c("#de7e7e", "#ebeb4d", "#70e65e")),
labels = rev(c("C","B","A"))) +
scale_y_continuous("Prop place") +
facet_wrap(vars(subject, domain), scales = "free_x") +
geom_text(data = my_labels, aes(label = lab,y = p),position = position_stack(vjust = 0.2)) # adjust position to get labels where you want.
Some comments here may help Adding percentage labels to ggplot when using stat_count

Thank you #e.matt. With your answer (and a little tweaking) I was able to get this exactly how I wanted it. I used mutate() to add a pcnt column, making the geom_text() layer a bit easier to work with. The interaction() function worked perfectly to give me two stacked and dodged bar charts per category (Grade in this case). Below is the modified solution code that I was able to make work out:
source <- c("D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P")
subject <- c("M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R")
grade <- c(1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 2, 2, 2, 1, 1, 2, 2, 2, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2)
domain <- c("Alg", "Alg", "Alg", "Alg", "Alg", "Alg", "Geo", "Geo", "Geo", "Geo", "Geo", "Geo", "Comp", "Comp", "Comp", "Comp", "Comp", "Comp", "Voc", "Voc", "Voc", "Voc", "Voc", "Voc", "Alg", "Alg", "Alg", "Alg", "Alg", "Geo", "Geo", "Geo", "Geo", "Geo", "Comp", "Comp", "Comp", "Comp", "Comp", "Voc", "Voc", "Voc", "Voc", "Voc", "Voc")
placement <- c("A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A")
qty <- c(425, 389, 96, 460, 293, 163, 518, 291, 101, 366, 349, 201, 889, 661, 150, 680, 617, 465, 445, 293, 112, 381, 292, 208, 223, 232, 131, 270, 72, 27, 45, 9, 99, 40, 79, 194, 72, 126, 133, 123, 456, 98, 234, 432, 65)
#pcnt was added to the dataframe using mutate() as shown below
pcnt <- pcnt <- c(46.7, 41.16, 11.31, 50.22, 30.08, 16.77, 56.92, 38.39, 12.38, 39.96, 32.92, 20.85, 56.04, 30.97, 16.74, 45.52, 32.07, 19.21, 52.75, 35.9, 16.38, 43.67, 19.5, 13.5, 52.29, 44.33, 10.37, 38.59, 41.58, 37.74, 47.29, 38.26, 10.65, 57.43, 22.92, 19.21, 53.41, 44.51, 9.94, 37.68, 33.44, 22.72, 48, 34.29, 7.8, 72.64, 29.01, 5.38, 52.35, 37.28, 14.27, 43.25, 40.39, 31.37, 38.05, 36.65, 27.7, 73.17, 50, 33.33, 29.41, 6.08, 60, 15.27, 10.7, 58.68, 7.22, 21.83, 16.94, 29.65, 51.99, 22.14, 50.34, 44.29, 22.9, 49.49, 21.75, 41.04, 39.82, 21.24, 78.06, 22.12, 12.5, 17.2, 15.56, 77.62, 10.13, 15.18, 37.73, 32.9, 17.36, 100)
test <- data.frame(source, subject, grade, domain, placement, qty)
test <- test %>%
group_by(subject, grade, domain, placement) %>%
mutate(pcnt = round(qty / sum(qty) * 100, 2)) %>%
arrange(domain, desc(qty))
plot <- test %>%
ggplot(aes(x = interaction(source, grade),
y = qty,
fill = placement)) +
geom_bar(stat = "identity", position = "fill") +
scale_x_discrete("Grade",
labels = c("1.D", "1.P","2.D", "2.P"),
guide = guide_axis(angle = 90)) +
scale_y_continuous("Prop place") +
scale_fill_manual("Relative Placement",
values = rev(c("#de7e7e", "#ebeb4d", "#70e65e")),
labels = rev(c("C", "B", "A"))
) +
facet_wrap(vars(domain), scales = "free_x") +
geom_text(size = 2.5, aes(label = paste(sprintf("%1.2f", pcnt), "%\n", qty), y = 0.5 * qty),
position = position_fill(vjust=0.5))
plot

Related

igraph in R: saving as .eps places black boxes around the spherical vertices

I have made a network in igraph and need to save it as a .eps file. However, when it is saved, there are black boxes in the background for each vertex. How can I remove the black boxes?
A reproducible example
stack.vtx <- data.frame(Org = c("A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "AA", "BB", "CC", "DD", "EE", "FF", "GG", "HH", "II", "JJ", "KK"),
Col = c("#8B4513", "#8B4513", "#CDC8B1", "#FF6347", "#FF6347", "#00CDCD", "#8A2BE2", "#8A2BE2", "#8A2BE2", "#8A2BE2", "#FFB90F", "#FFB90F", "#FF6347", "#8A2BE2", "#8B4513", "#00CDCD", "#8A2BE2", "#8B4513", "#8B4513", "#CDC8B1", "#FFB90F", "#FFB90F", "#FF6347", "#00CDCD", "#8A2BE2", "#8A2BE2", "#FF6347", "#8A2BE2", "#8B4513", "#00CDCD", "#8B4513", "#8B4513", "#CDC8B1", "#FF6347", "#FF6347", "#00CDCD", "#8A2BE2"),
Counts = c(1, 5, 3, 2, 1, 1, 3, 6, 4, 2, 1, 4, 5, 2, 5, 12, 2, 7, 12, 10, 4, 3, 2, 1, 11, 14, 8, 9, 7, 1, 15, 7, 3, 2, 12, 6, 8))
stack.edges <- data.frame(from = c("A", "K", "L", "D", "N", "O", "L", "H", "J", "J", "G", "P", "Q", "R", "S", "X", "Y", "Z", "AA", "BB", "CC", "DD", "EE", "FF", "GG", "HH", "II", "JJ", "KK"),
to = c("I", "B", "C", "M", "E", "F", "G", "C", "C", "J", "B", "T", "U", "V", "W", "H", "I", "J", "AA", "BB", "CC", "DD", "EE", "FF", "GG", "F", "G", "C", "C"),
Col = c("#7FC97F", "#386CB0", "#666666", "#BEAED4", "#666666", "#F0027F", "#666666", "#BEAED4", "#F0027F", "#666666", "#7FC97F", "#7FC97F", "#386CB0", "#666666", "#BEAED4", "#F0027F", "#666666", "#BEAED4", "#7FC97F", "#386CB0", "#666666", "#BEAED4", "#666666", "#F0027F", "#666666", "#BEAED4", "#F0027F", "#666666", "#7FC97F"))
Graphing the data with igraph
library(igraph)
stack_graph <- graph_from_data_frame(stack.edges, vertices = stack.vtx, directed = TRUE)
Saving the igraph as a .eps
setEPS()
postscript("Stack.eps")
plot(stack_graph,
layout = layout_in_circle(stack_graph),
vertex.color = V(stack_graph)$Col,
vertex.shape = "sphere",
vertex.label.color = "black",
vertex.size = V(stack_graph)$Counts * 2,
vertex.label = "",
edge.size = 0.3,
edge.color = E(stack_graph)$Col,
edge.curved = 0.2,
edge.arrow.size = 0.5)
dev.off()
This produces the following graph:
To note, if I were to pass vertex.shape = "circle" there are no black boxes around the vertices. Is there a way to remove these black boxes when vertex.shape = "sphere"?
In case it is needed:
sessioninfo()
R version 4.2.1 (2022-06-23)
Platform: x86_64-apple-darwin17.0 (64-bit)
Running under: macOS Monterey 12.6.2
Any help is appreciated!

Efficient way to use geom_boxplot with specified quantiles and long data

I have a dataset with calculated quantiles for each department and country. It looks like this:
df <- structure(list(quantile = c("p5", "p25", "p50", "p75", "p95",
"p5", "p25", "p50", "p75", "p95", "p5", "p25", "p50", "p75",
"p95", "p5", "p25", "p50", "p75", "p95"), value = c(6, 12, 20,
33, 61, 6, 14, 23, 38, 63, 7, 12, 17, 26, 50, 7, 12, 18, 26,
51), country = c("A", "A", "A", "A", "A", "B", "B", "B", "B",
"B", "A", "A", "A", "A", "A", "B", "B", "B", "B", "B"), dep = c("D",
"D", "D", "D", "D", "D", "D", "D", "D", "D", "I", "I", "I", "I",
"I", "I", "I", "I", "I", "I"), kpi = c("F", "F", "F", "F", "F",
"F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F",
"F", "F")), row.names = c(NA, -20L), class = c("tbl_df", "tbl",
"data.frame"))
Now, I would like to build a boxplot for each department comparing countries and using p5/p95 instead of min/max similar to this plot but without outliers (hence, Train_number would be countries):
The corresponding code to this plot is (from question ggplot2, geom_boxplot with custom quantiles and outliers):
ggplot(MyData, aes(factor(Stations), Arrival_Lateness,
fill = factor(Train_number))) +
stat_summary(fun.data = f, geom="boxplot",
position=position_dodge(1))+
stat_summary(aes(color=factor(Train_number)),fun.y = q, geom="point",
position=position_dodge(1))
I tried to derive a solution from the code above and the provided answers. Unfortunately I lack the knowledge how to provide the neccessary values from the variables quantile and value to ggplot(). Is there an argument in the stat_summary() function I missed and could use? Or just another simple solution?
Whatever data you have provided from that you can generate the following plot
library(ggplot2)
f <- function(x) {
r <- quantile(x, probs = c(0.05, 0.25, 0.5, 0.75, 0.95))
names(r) <- c("ymin", "lower", "middle", "upper", "ymax")
r
}
ggplot(df, aes(factor(dep), value)) +
stat_summary(fun.data = f, geom="boxplot",
position=position_dodge(1))+
facet_grid(.~country, scales="free")
I don't know whether it is correct or not.

R graph reorder a factor by levels for only a specified level

I am trying to create a graph where the x axis (a factor) is reordered by descending order of the y axis (numerical values), but only for one of two levels of another factor.
Originally, I tried using the code below:
reorder(factor1, desc(value1))
However, this code only reorganizes the graph (in a descending order) by the sum of the two values under each factor2 (I presume); while I am only interested in reorganizing the data for one level (i.e. "A") under factor2.
Here is some sample data to illustrate better.
sampledata <- data.frame(factor1 = c("A", "A", "B", "B", "C", "C", "D", "D", "E", "E",
"F", "F", "G", "G", "H", "H", "I", "I", "J", "J"),
factor2 = c("A", "H", "A", "H", "A", "H", "A", "H", "A", "H",
"A", "H", "A", "H", "A", "H", "A", "H", "A", "H"),
value1 = c(1, 5, 6, 2, 6, 8, 10, 21, 30, 5,
3, 5, 4, 50, 4, 7, 15, 48, 20, 21))
Here is what I used previously:
sampledata %>%
ggplot(aes(x=reorder(factor1, desc(value1)), y=value1, group=factor2, color=factor2)) +
geom_point()
The reason why I would like to reorder by a specific level (say factor2=="A") is that I can view any deviance of the values for factor2=="H" away from "A" points.
I would appreciate using tidyverse or dplyr as means to solve this problem.
library(ggplto2)
library(dplyr)
sampledata %>%
mutate(value2 = +(factor2=="A")*value1) %>%
ggplot(aes(x=reorder(factor1, desc(value2 + value1/max(value1))), y=value1,
group=factor2, color=factor2)) +
geom_point() +
xlab("factor1")

Error for graph saving loop - Must be length 1 (a summary value)

I'm trying to create and save graphs for individual organizations. I keep getting an error that says "Error in summarise_impl(.data, dots) :
Column Improved must be length 1 (a summary value), not 0"
The graphs work when I combine all the organizations together, so I'm not sure what is going on here!
Starting with this data:
library(ggpubr)
structure(list(Organization = c("A", "B", "C", "D", "E", "F",
"G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S"
), imp_imp20_Improved = c(55.6, 100, 50, 0, 57.1, 0, 0, 45, 50,
60, 100, 50, 66.7, 66.7, 33.3, 0, 50, 0, 50)), row.names = c(NA,
-19L), class = c("tbl_df", "tbl", "data.frame"))
org<- c("A", "B", "C", "D", "E", "F",
"G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S"
)
This is my code for the graph loop:
for(i in org) {
tiff(paste0("//graphs/",i,"_graph11.tiff"), units="in", width=3.5, height=3, res=300)
indicator_graph1<- indicators_ong %>%
filter(Organization==i) %>%
summarise(Improved = imp_imp20_Improved,
"Not Improved" = 100-imp_imp20_Improved)%>%
gather(key="group") %>%
arrange(desc(group))
labs <- paste0(indicator_graph1$group, "\n (", indicator_graph1$value,"%)")
z <- ggpie(indicator_graph1,"value",label=labs, fill= "group", color = "black", palette = c("darkgoldenrod1","azure3"), lab.pos = "in", lab.font = c(3,"black"),title="Improve 20")+
theme(legend.position ="none")+
font("title", size=10, hjust=0.5)
print(z)
dev.off()
}

Cross table graph similar to excel

I need to put a 3D kind of graph similar to attached image created in excel. I am not sure whether we can do this in ggplot?
structure(list(Name = c("A", "B", "C", "D"), P = c(15089, NA,
NA, 43083), Q = c(1589, NA, NA, 18120), R = c(93751, NA, 4709,
211649), S = c(34167, 1323, 1520, 82378), T = c(8831, NA, 4544,
15157)), .Names = c("Name", "P", "Q", "R", "S", "T"), row.names = c(NA,
4L), class = "data.frame")
I have worked with this following code.
ggplot(a, aes(x = a$A, y = a$Amount, fill = a$B)) +
geom_col(position = 'stack') +
geom_text(aes(label = a$Amount), position = position_stack(vjust = .5),
color='grey25', size=2) + coord_flip()
The problem is the labels which shows on top the the graph is overlapping
Updated:
Actually, I thought I need to reshape the data to achieve this kind of graph, not so sure though. So I reshaped the like below
structure(list(AA = c("A", "A", "A", "A", "A", "B", "B", "B",
"B", "B", "C", "C", "C", "C", "C", "D", "D", "D", "D", "D"),
BB = c("P", "Q", "R", "S", "T", "P", "Q", "R", "S", "T",
"P", "Q", "R", "S", "T", "P", "Q", "R", "S", "T"), Amount = c(15089,
1589, 93751, 34167, 8831, NA, NA, NA, 1323, NA, NA, NA, 4709,
1520, 4544, 43083, 18120, 211649, 82378, 15157)), .Names = c("AA",
"BB", "Amount"), row.names = c(NA, 20L), class = "data.frame")
I tried the following code to achieve this to which the labels are overlapping
ggplot(a, aes(x = AA, y = Amount, fill = BB)) +
geom_col(position = 'stack')+
geom_text(aes(label = Amount),
position = position_stack(vjust = 0.2),
color='grey25',
size=2) +
coord_flip()
Also, when I supply this to ggploty for shiny, the graph is not coming in dashboard

Resources