ggplot: geom_text does not center-align above geom_col() - r

Please find my data p below. I had to include 100 samples to reproduce the error.
Question: why is geom_text not printing consistently center-aligned above the geom_col - e.g. 21 and 28 in All in the right SSA-facet? I tried adjusting position.dodge2 and vjust, but that did not work.
This thread addressed the issue but did not solve my problem.
My script
ggplot(p %>%
mutate(nystudie=as.character(study),
best.resp =as.factor(response)) %>%
group_by(nystudie,best.resp) %>%
summarise(N=n(),Val=unique(treatment)) %>%
bind_rows(p %>% filter(response %in% 1:4, treatment!="Control") %>% droplevels() %>%
mutate(nystudie=as.character(study),
best.resp =as.factor(response)) %>%
group_by(best.resp,treatment) %>% summarise(N=n()) %>%
mutate(nystudie="All") %>%
rename(Val=treatment)),
aes(nystudie, N, color = best.resp, fill= best.resp)) +
geom_col(position = position_dodge2(preserve = "single", padding = 0.1)) +
facet_wrap(~Val,ncol = 2, scales="free") +
scale_fill_grey(name="") +
scale_color_grey(name="") +
scale_y_continuous(breaks = seq(0,120,20)) +
geom_text(aes(label=N),position = position_dodge2(.5), vjust=0, fontface=2, cex=4.5, show.legend = F) +
theme(strip.background = element_blank(),
strip.text = element_text(color = "black", size = 15),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
plot.margin = unit(c(1,3,1,1), "lines"))
Data
p <- structure(list(study = structure(c(8L, 12L, 12L, 12L, 4L, 4L,
1L, 11L, 11L, 13L, 1L, 13L, 14L, 9L, 9L, 10L, 12L, 11L, 4L, 11L,
11L, 12L, 8L, 11L, 13L, 11L, 6L, 15L, 6L, 4L, 7L, 13L, 11L, 4L,
1L, 6L, 1L, 11L, 16L, 1L, 10L, 15L, 1L, 11L, 1L, 6L, 1L, 11L,
12L, 11L, 13L, 16L, 1L, 8L, 11L, 10L, 4L, 4L, 12L, 10L, 6L, 15L,
12L, 14L, 12L, 1L, 1L, 16L, 12L, 12L, 8L, 7L, 1L, 1L, 13L, 13L,
14L, 9L, 14L, 2L, 11L, 4L, 1L, 16L, 15L, 11L, 9L, 4L, 13L, 12L,
6L, 16L, 4L, 1L, 15L, 6L, 4L, 1L, 9L, 2L), .Label = c("1", "2",
"3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14",
"15", "22"), class = "factor"), response = c("1", "3", "4", "4",
"3", "3", "3", "4", "4", "4", "4", "4", "3", "4", "4", "4", "3",
"4", "4", "4", "4", "3", "1", "4", "4", "4", "3", "4", "3", "3",
"4", "4", "4", "3", "4", "4", "4", "4", "4", "3", "4", "4", "3",
"4", "4", "3", "3", "4", "3", "4", "4", "4", "4", "3", "3", "4",
"4", "3", "3", "4", "3", "4", "4", "4", "3", "3", "4", "4", "4",
"4", "2", "4", "4", "4", "4", "4", "3", "4", "3", "3", "4", "4",
"4", "4", "4", "4", "3", "3", "4", "4", "3", "4", "4", "4", "4",
"3", "3", "4", "2", "3"), treatment = structure(c(2L, 2L, 2L,
2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L,
1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L,
1L), .Label = c("SSTR", "SSA"), class = "factor")), row.names = c(NA,
-100L), class = "data.frame")

When adding labels you have to take care to use the same positioning as for geom_col. To align the labels with the bars use position_dodge2(preserve = "single", width = .9, padding = 0.1):
library(ggplot2)
library(dplyr)
d1 <- p %>%
mutate(
nystudie = as.character(study),
best.resp = as.factor(response)
) %>%
group_by(nystudie, best.resp) %>%
summarise(N = n(), Val = unique(treatment))
#> `summarise()` regrouping output by 'nystudie' (override with `.groups` argument)
d2 <- p %>%
filter(response %in% 1:4, treatment != "Control") %>%
droplevels() %>%
mutate(
nystudie = as.character(study),
best.resp = as.factor(response)
) %>%
group_by(best.resp, treatment) %>%
summarise(N = n()) %>%
mutate(nystudie = "All") %>%
rename(Val = treatment)
#> `summarise()` regrouping output by 'best.resp' (override with `.groups` argument)
d <- bind_rows(d1, d2)
ggplot(d, aes(nystudie, N, color = best.resp, fill = best.resp)) +
geom_col(position = position_dodge2(preserve = "single", padding = 0.1)) +
facet_wrap(~Val, ncol = 2, scales = "free") +
scale_fill_grey(name = "") +
scale_color_grey(name = "") +
scale_y_continuous(breaks = seq(0, 120, 20)) +
geom_text(aes(label = N), position = position_dodge2(preserve = "single", width = .9, padding = 0.1), vjust = 0, fontface = 2, cex = 4.5, show.legend = F) +
theme(
strip.background = element_blank(),
strip.text = element_text(color = "black", size = 15),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),
plot.margin = unit(c(1, 3, 1, 1), "lines")
)

Related

How can I increase room for ggplot y-axis?

My plot is cutting off a portion of the y-axis when viewed and when saved as a jpeg. I am sure this is an easy fix, but I can't seem to figure it out. Any advice? Thanks
treat_freqplot<-ggplot(Treat_occur, aes(Trial, freq, fill = Treatment)) + geom_bar(stat = 'identity', alpha = 1) +
#facet_grid(~Trial, scales = "free")+
scale_y_continuous(labels = scales::percent, breaks=seq(0,100,1/10), expand=c(0,0)) +
labs(x = 'Trial', y = "Proportional Deer Use") +
#scale_x_discrete(guide = guide_axis(n.dodge = 2)) +
scale_fill_manual("Treatment",values=c("dark Green", "dark gray", "fire brick 4","dark blue"))+
theme(axis.text = element_text(size=30, colour = "black",vjust=0.3),axis.title=element_text(size=30,face="bold")) +
theme(legend.position = "right",legend.background = element_rect(color="black",linetype="solid", fill="gray100"),
legend.key.size = unit(1, 'cm'),legend.text = element_text(size=25), legend.title=element_text(size=30)) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_rect(fill = NA, color = "black"))
treat_freqplot
#ggsave('C:\\Projects\\CaptiveStudy\\Analysis\\Output2\\treatfreq_plot.jpeg',
width = 20, height = 12, units = "in",treat_freqplot)
structure(list(Trial = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 3L, 3L), .Label = c("1", "2", "3", "4", "5", "6", "7", "8",
"9", "10", "11", "12", "13", "14"), class = "factor"), Treatment = structure(c(1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L), .Label = c("Control", "30%Shade",
"60%Shade", "90%Shade"), class = "factor"), Use = c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), n = c(2L, 7L, 5L, 30L, 1L, 2L, 6L,
45L, 9L, 3L), freq = c(0.037037037037037, 0.12962962962963, 0.0925925925925926,
0.555555555555556, 0.0185185185185185, 0.037037037037037, 0.111111111111111,
0.833333333333333, 0.166666666666667, 0.0555555555555556)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -10L), groups = structure(list(
Trial = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L
), .Label = c("1", "2", "3", "4", "5", "6", "7", "8", "9",
"10", "11", "12", "13", "14"), class = "factor"), Treatment = structure(c(1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L), .Label = c("Control",
"30%Shade", "60%Shade", "90%Shade"), class = "factor"), .rows = structure(list(
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -10L), .drop = TRUE))

R ggplot2 show significance between subgroups

I am trying to show the significance levels within a group consisting of two factors, but I seem to always get the significance levels between groups which is not what I want.
df <- structure(list(Datum = structure(c(2L, 1L, 3L, 1L, 1L, 3L, 1L,
2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 3L, 1L, 2L), .Label = c("2021-04-08",
"2021-05-17", "2021-07-07"), class = "factor"), Soll = c("1202",
"172", "119", "1192", "119", "1189", "1189", "552", "1189", "1192",
"2484", "119", "1189", "1189", "172", "552", "1192", "172", "1189",
"172"), Plot = c("6", "5", "3", "4", "6", "5", "4", "5", "7",
"8", "3", "6", "6", "1", "8", "3", "1", "3", "8", "4"), Entfernung = structure(c(2L,
1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L,
1L, 2L, 2L), .Label = c("2", "5"), class = "factor"), Behandlung = structure(c(1L,
1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
2L, 1L, 1L), .Label = c("a", "b"), class = "factor"), DGUnkraut = c(3.5,
0, 2.8, 3, 0.3, 2, 1, 3, 0, 0.3, 10, 0, 1.7, 2.5, 0.2, 0.3, 9,
0.3, 2.5, 0.2)), class = "data.frame", row.names = c(NA, -20L
))
This what I have tried so far:
library(tidyverse)
library(ggsignif)
df %>% group_by (Datum, Entfernung)%>%
ggplot(., aes(Entfernung, DGUnkraut , color = Datum)) +
geom_boxplot()+
geom_signif(comparisons =list (c("2","5")),
map_signif_level = T)
So I would like to see the significant differences between "2" and "5" for each of the three dates, so for example that the significance level of the red boxplot with the date "2021-04-08" and Entfernung = "2" is compared to the one where Entfernung = "5".
Facets don't seem to work with {ggsignif}, but you could fake them, by looping over your dates, and then patching the plots together.
Below one way
library(ggsignif)
library(patchwork)
df %>%
split(., .$Datum) %>%
map(~{
ggplot(., aes(Entfernung, DGUnkraut , color = Datum)) +
geom_boxplot()+
geom_signif(comparisons =list(c("2","5")),
map_signif_level = T) +
scale_x_discrete(drop = FALSE)
}) %>%
wrap_plots() + plot_layout(guides = "collect")

Trying to normalize data, but got undefined columns selected error in R

I have a dataset contains a variable nr.employed. Its numeric.
I am normalizing it in using code
markting_train_dim_deleted =
"","custAge","profession","marital","schooling","default","contact","month","campaign","previous","poutcome","cons.price.idx","cons.conf.idx","euribor3m","nr.employed","pmonths","pastEmail","responded"
"1",0.486842105263158,"1","3","7","2","1","8",0,0,"2",0.389321901792677,0.368200836820084,0.806393108138744,5195.8,999,0,"1"
"2",0.342105263157895,"2","2","1","1","1","4",0,0,"2",0.669134840218243,0.338912133891213,0.980729993198821,5228.1,999,0,"1"
"3",0.315789473684211,"10","2","4","1","2","7",0,0,"2",0.698752922837102,0.602510460251046,0.95737927907504,5191,999,0,"1"
"4",0.486842105263158,"5","1","1","2","1","4",0.0256410256410256,0,"2",0.669134840218243,0.338912133891213,0.981183405123555,5228.1,999,0,"1"
"5",0.215870043275927,"1","1","7","1","1","7",0.102564102564103,0.166666666666667,"1",0.26968043647701,0.192468619246862,0.148945817274994,5099.1,999,1,"1"
"6",0.381578947368421,"2","2","1","1","2","7",0,0,"2",0.698752922837102,0.602510460251046,0.95737927907504,5191,999,0,"1"
cnames=c("custAge","campaign","previous","cons.price.idx","cons.conf.idx",
"euribor3m"," nr.employed","pmonths","pastEmail")
for(i in cnames){
print(i)
print(markting_train_dim_deleted[,i])
markting_train_dim_deleted[,i]=
(markting_train_dim_deleted[,i]-min(markting_train_dim_deleted[,i]))/
(max(markting_train_dim_deleted[,i]-min(markting_train_dim_deleted[,i])))
}
After processing euribor3m it is printing nr.employed, it throws exception
Error in `[.data.frame`(markting_train_dim_deleted, , i) :
undefined columns selected
I have looked at the structure. Its a numeric datatype with no missing values.
output
dput(head(markting_train_dim_deleted))
structure(list(custAge = c(0.486842105263158, 0.342105263157895,
0.315789473684211, 0.486842105263158, 0.215870043275927, 0.381578947368421
), profession = structure(c(1L, 2L, 10L, 5L, 1L, 2L), .Label = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12"), class = "factor"),
marital = structure(c(3L, 2L, 2L, 1L, 1L, 2L), .Label = c("1",
"2", "3", "4"), class = "factor"), schooling = structure(c(7L,
1L, 4L, 1L, 7L, 1L), .Label = c("1", "2", "3", "4", "5",
"6", "7", "8"), class = "factor"), default = structure(c(2L,
1L, 1L, 2L, 1L, 1L), .Label = c("1", "2", "3"), class = "factor"),
contact = structure(c(1L, 1L, 2L, 1L, 1L, 2L), .Label = c("1",
"2"), class = "factor"), month = structure(c(8L, 4L, 7L,
4L, 7L, 7L), .Label = c("1", "2", "3", "4", "5", "6", "7",
"8", "9", "10"), class = "factor"), campaign = c(0, 0, 0,
0.0256410256410256, 0.102564102564103, 0), previous = c(0,
0, 0, 0, 0.166666666666667, 0), poutcome = structure(c(2L,
2L, 2L, 2L, 1L, 2L), .Label = c("1", "2", "3"), class = "factor"),
cons.price.idx = c(0.389321901792677, 0.669134840218243,
0.698752922837102, 0.669134840218243, 0.26968043647701, 0.698752922837102
), cons.conf.idx = c(0.368200836820084, 0.338912133891213,
0.602510460251046, 0.338912133891213, 0.192468619246862,
0.602510460251046), euribor3m = c(0.806393108138744, 0.980729993198821,
0.95737927907504, 0.981183405123555, 0.148945817274994, 0.95737927907504
), nr.employed = c(5195.8, 5228.1, 5191, 5228.1, 5099.1,
5191), pmonths = c(999, 999, 999, 999, 999, 999), pastEmail = c(0L,
0L, 0L, 0L, 1L, 0L), responded = structure(c(1L, 1L, 1L,
1L, 1L, 1L), .Label = c("1", "2"), class = "factor")), .Names = c("custAge",
"profession", "marital", "schooling", "default", "contact", "month",
"campaign", "previous", "poutcome", "cons.price.idx", "cons.conf.idx",
"euribor3m", "nr.employed", "pmonths", "pastEmail", "responded"
), row.names = c(NA, 6L), class = "data.frame")
The mistake is simply having " nr.employed" (with a space) rather than "nr.employed" in cnames.
Also, something like
markting_train_dim_deleted[, cnames] <- sapply(markting_train_dim_deleted[, cnames],
function(x) (x - min(x)) / (max(x) - min(x)))
would make the normalization easier to read.

Visualization of Groups of Poisson random samples using ggridges

I have two sets of data, all in one data frame. The first set is related to data collected in Location 1 and the second set is collected in Location 2. Each location has different count data (column value) for 5 months.
# DataSet
-----------------
rp_data <- structure(list(Month = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), .Label = c("1",
"2", "3", "4", "5"), class = "factor"), location = c("1", "1",
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "1", "1", "1", "1", "2", "2", "2", "2",
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2",
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2",
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2",
"2", "2", "2", "2", "2", "2", "2"), value = c(0L, 1L, 1L, 1L,
2L, 1L, 0L, 0L, 1L, 1L, 3L, 2L, 1L, 4L, 1L, 3L, 1L, 1L, 1L, 1L,
2L, 2L, 1L, 0L, 2L, 4L, 3L, 5L, 5L, 0L, 4L, 3L, 3L, 4L, 2L, 5L,
2L, 3L, 10L, 6L, 5L, 6L, 4L, 6L, 4L, 5L, 6L, 5L, 3L, 7L, 1L,
1L, 1L, 1L, 0L, 0L, 2L, 1L, 2L, 0L, 2L, 3L, 4L, 1L, 2L, 1L, 2L,
0L, 2L, 2L, 4L, 4L, 5L, 1L, 4L, 5L, 4L, 5L, 1L, 4L, 3L, 7L, 7L,
4L, 2L, 5L, 4L, 1L, 5L, 3L, 7L, 3L, 4L, 8L, 5L, 7L, 1L, 1L, 6L,
3L)), .Names = c("Month", "location", "value"), row.names = c(NA,
-100L), class = "data.frame")
I used this example below, as illustrated on the ggridges examples webpage, to display the various count values across different months.
# Plot 1 , filtering data related to location = 1
#---------------
ggplot(rp_data[rp_data$location == '1',], aes(x = value, y = Month, group = Month)) +
geom_density_ridges2(aes(fill = Month), stat = "binline", binwidth = 1, scale = 0.95) +
geom_text(stat = "bin",
aes(y = group + 0.95*(..count../max(..count..)),
label = ifelse(..count..>0, ..count.., "")),
vjust = 1.4, size = 3, color = "white", binwidth = 1) +
scale_x_continuous(breaks = c(0:12), limits = c(-.5, 13), expand = c(0, 0),
name = "random value") +
scale_y_discrete(expand = c(0.01, 0), name = "Month",
labels = c("5.0", "4.0", "3.0", "2.0", "1.0")) +
scale_fill_cyclical(values = c("#0000B0", "#7070D0")) +
labs(title = "Poisson random samples location 1 different Month",
subtitle = "sample size n=10") +
guides(y = "none") +
theme_ridges(grid = FALSE) +
theme(axis.title.x = element_text(hjust = 0.5),
axis.title.y = element_text(hjust = 0.5))
# Plot 2 , filtering data related to location = 2
#---------------
ggplot(rp_data[rp_data$location == '2',], aes(x = value, y = Month, group = Month)) +
geom_density_ridges2(aes(fill = Month), stat = "binline", binwidth = 1, scale = 0.95) +
geom_text(stat = "bin",
aes(y = group + 0.95*(..count../max(..count..)),
label = ifelse(..count..>0, ..count.., "")),
vjust = 1.4, size = 3, color = "white", binwidth = 1) +
scale_x_continuous(breaks = c(0:12), limits = c(-.5, 13), expand = c(0, 0),
name = "random value") +
scale_y_discrete(expand = c(0.01, 0), name = "Month",
labels = c("5.0", "4.0", "3.0", "2.0", "1.0")) +
scale_fill_cyclical(values = c("#0000B0", "#7070D0")) +
labs(title = "Poisson random samples location 2 different Month",
subtitle = "sample size n=10") +
guides(y = "none") +
theme_ridges(grid = FALSE) +
theme(axis.title.x = element_text(hjust = 0.5),
axis.title.y = element_text(hjust = 0.5))
Result for plot 1:
My question is how can I combine these two plots, sort of like an overlay plot as shown in this example:
I don't want to plot them in two separate plots.
You need to create a grouping variable that contains both Month and location. You can do that by using paste0(Month, location). For now, I'm leaving out the text labels, though they may be possible with a little more thought as well. (But I think they'd make the figure too busy.)
ggplot(rp_data,
aes(x = value, y = Month,
group = paste0(Month, location),
fill = paste0(Month, location))) +
geom_density_ridges2(stat = "binline", binwidth = 1,
scale = 0.95, alpha = 0.7) +
scale_x_continuous(breaks = c(0:12), limits = c(-.5, 13),
expand = c(0, 0), name = "random value") +
scale_y_discrete(expand = c(0.01, 0), name = "Month",
labels = c("5.0", "4.0", "3.0", "2.0", "1.0")) +
scale_fill_cyclical(values = c("#0000B0", "#B00000",
"#7070D0", "#FC5E5E")) +
labs(title = "Poisson random samples location 1 different Month",
subtitle = "sample size n=10") +
guides(y = "none") +
theme_ridges(grid = FALSE, center = TRUE)
Edit: Now with text labels.
ggplot(rp_data, aes(x = value, y = Month, group = paste0(Month, location), fill = paste0(Month, location))) +
geom_density_ridges2(stat = "binline", binwidth = 1, scale = 0.95, alpha = 0.7) +
geom_text(stat = "bin",
aes(y = ceiling(group/2) + 0.95*(..count../max(..count..)),
label = ifelse(..count..>0, ..count.., ""), color = location),
vjust = 1.4, size = 3, binwidth = 1, fontface = "bold") +
scale_x_continuous(breaks = c(0:12), limits = c(-.5, 13), expand = c(0, 0),
name = "random value") +
scale_y_discrete(expand = c(0.01, 0), name = "Month",
labels = c("5.0", "4.0", "3.0", "2.0", "1.0")) +
scale_fill_cyclical(values = c("#0000B0", "#B00000", "#7070D0", "#FC5E5E")) +
scale_color_cyclical(values = c("white", "black")) +
labs(title = "Poisson random samples location 1 different Month",
subtitle = "sample size n=10") +
guides(y = "none") +
theme_ridges(grid = FALSE, center = TRUE)
Again, not sure it's a good idea, but there you go.

R statistics ggplot How to order x-axis of stacked bars from high to low based on a factor level of the stack

Am trying to arrange the order of stacked bars using the following data frame:
DF <- structure(list(Group = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L,
1L, 2L, 1L),
.Label = c("1", "2"), class = "factor"),
Response = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("1", "2", "3"
), class = "factor"), Count = c(531L, 472L, 374L, 326L, 207L,
168L, 76L, 60L, 51L, 43L, 37L, 18L, 6L, 0L, 247L, 149L, 86L,
48L, 45L, 36L, 29L, 14L, 10L, 4L, 3L, 3L, 0L, 0L, 531L, 230L,
173L, 93L, 87L, 76L, 30L, 29L, 29L, 22L, 13L, 3L, 2L, 0L),
Percent = c(85.23, 75.76, 60.03, 52.33, 33.23, 26.97, 12.2, 9.63, 8.19, 6.9, 5.94,
2.89, 0.96, 0, 39.65, 23.92, 13.8, 7.7, 7.22, 5.78, 4.65, 2.25,
1.61, 0.64, 0.48, 0.48, 0, 0, 85.23, 36.92, 27.77, 14.93, 13.96,
12.2, 4.82, 4.65, 4.65, 3.53, 2.09, 0.48, 0.32, 0),
Items = structure(c(4L, 2L, 3L, 5L, 7L, 1L, 4L, 5L, 2L, 3L, 7L, 1L, 6L, 6L, 7L, 1L, 3L,
5L, 2L, 7L, 1L, 4L, 6L, 3L, 2L, 5L, 4L, 6L, 6L, 1L, 5L, 7L, 3L,
6L, 2L, 1L, 3L, 2L, 5L, 7L, 4L, 4L),
.Label = c("A", "B", "C", "D", "E", "F", "G"), class = "factor")),
.Names = c("Group", "Response", "Count", "Percent", "Items"),
row.names = c("18",
"36", "2", "8", "24", "42", "17", "7", "35", "1", "23", "41",
"54", "53", "26", "44", "4", "10", "38", "25", "43", "20", "56",
"3", "37", "9", "19", "55", "58", "46", "12", "28", "6", "57",
"40", "45", "5", "39", "11", "27", "22", "21"), class = "data.frame")
library(ggplot2)
cPalette = c("#F8766D","#619CFF","#00BA38")
ggplot(DF, aes(x = Items, y = Percent, fill = Response, order = Response )) +
geom_bar(stat = "identity" ) +
labs(title="Acceptance",
x ="Items", y = "Percent") +
scale_fill_manual(values=cPalette,
breaks=c("1","2","3"),
labels=c("Negative", "Neutral","Positive") ) +
theme(text = element_text(size=15, color = "blue", face = "bold"),
axis.text.x = element_text(color = "black", face = "bold"),
axis.text.y = element_text(color = "black", face = "bold" ))
I would like to have the bars on the x-axis going from left to right:
D,B,C,E,G,A,F instead of alphabetically A,B,C,D,E,F,G
This should not be ordered manually because it depends on %age contribution
of the factor within the stack.
Extensive search of SO gave me this link which comes closest to what I wish to achieve.
How does one get the bars to display left to right in decreasing order of negative Responses? I am stuck with this problem last couple of days.
You can order the levels of Items by the percentage, then plot
## Order the Items by %Response == 1
agg <- aggregate(Percent ~ Items, data=DF[DF$Response==1,], sum)
DF$Items <- factor(DF$Items, levels=agg[order(agg$Percent, decreasing = T), "Items"])
## The plot as you had it
cPalette = c("#F8766D","#619CFF","#00BA38")
ggplot(DF, aes(x = Items, y = Percent, fill = Response, order = Response )) +
geom_bar(stat = "identity" ) +
labs(title="Acceptance",
x ="Items", y = "Percent") +
scale_fill_manual(values=cPalette,
breaks=c("1","2","3"),
labels=c("Negative", "Neutral","Positive") ) +
theme(text = element_text(size=15, color = "blue", face = "bold"),
axis.text.x = element_text(color = "black", face = "bold"),
axis.text.y = element_text(color = "black", face = "bold" ))

Resources