Add count and labels to stacked bar plot with facet_wrap - r

I want to use ggplo2 to analyse likert scale variables. I would like to get this kind of graphic (below) but I don't know how to add labels on stacked bars and insert different counts and means for each grouping variable and facet variable (for facet_wrap).
I would be grateful for any help!
Data can be obtained from here
My code:
library(ggplot2)
library(scales)
library(RColorBrewer)
ggplot(example,aes(GroupungVar,fill=VarOfInterest)) + geom_bar(position='fill') +
scale_fill_manual(values = (brewer.pal(5, "Greens"))) +
facet_wrap(~FacetVar,ncol=1) + coord_flip() +
scale_y_continuous(labels=percent) + ylab('Percent')
What I get...
..and what I want to achieve (numbers ale not the same as in dataset). I want to have count (N) in labels of each group, percentage labels on bars and mean value on the right side (for each group of course). Percentages and mean values should be for all bars in the plot, I only add them to the first few, just to show what I mean.

I spend night with R and ggplot2 and I get what I wanted :)
library('ggplot2')
library('plyr')
library('RColorBrewer')
library(scales)
label_positions<- function(x) {
n<-length(x)
wynik<-numeric(n)
for (i in 1:n){
if (i==1) {
wynik[i]<-0+x[i]/2
}
else {
wynik[i]<-x[i]-(x[i]-x[i-1])/2
}
}
return(wynik)
}
exam1<-ddply(example,.(GroupingVar,FacetVar,VarOfInterest), 'nrow')
exam1.1<-ddply(example,.(GroupingVar,FacetVar),summarise, sr=mean(as.numeric(VarOfInterest),na.rm=T),
odch=sd(as.numeric(VarOfInterest,na.rm=T)))
exam1<-merge(exam1,exam1.1,by.x=c('GroupingVar','FacetVar'),by.y=c('GroupingVar','FacetVar'))
names(exam1)[4]<-'Count'
exam2<-mutate(exam1,cumul=ave(Count,list(GroupingVar,FacetVar),FUN=cumsum),
N=ave(cumul, list(GroupingVar,FacetVar),FUN=max),
CumSumPercent=cumul/N*100,
Freq=Count/N*100)
exam2<-mutate(exam2,cfrq = ave(CumSumPercent, list(GroupingVar,FacetVar), FUN = label_positions))
exam2$XLabel<-paste(exam2$GroupingVar,' (N=',exam2$N,')',sep='')
exam2$PosMean<-105
p<-ggplot(exam2, aes(x = Etykieta, y = Freq, fill = VarOfInterest)) +
geom_bar(stat = 'identity',colour="black") +
labs (x = "", y = "Percentage", fill=" ") +
scale_fill_brewer(name="Rating", palette="Greens", breaks = rev(levels(exam2$VarOfInterest))) +
geom_text(aes(y = cfrq, label=paste(sprintf("%.01f",Freq), "%", sep='')), size=5) +
geom_text(aes(y=PosMean,label=paste(sprintf("%.02f",sr),' (',sprintf("%.02f",odch),')',sep='')),size=5)+
facet_wrap(~FacetVar,ncol=1) +
coord_flip() + ylab('Procent odpowiedzi') +
guides(fill=guide_legend(title=NULL)) + theme_bw() +
theme(legend.position="bottom",strip.text.x=element_text(size=15,face='bold'),
axis.text.x =element_text(size=12,face='bold'), axis.text.y =element_text(size=12,face='bold'),
axis.title.x=element_text(size=15,face='bold'), axis.title.y=element_text(size=15,face='bold'),
strip.background=element_rect(colour='black'))
plot(p)
And result

For the sample sizes, I would probably just put them in the axis labels, rather than on the graph itself:
library(plyr)
example <- ddply(example,.(FacetVar,GroupungVar),
transform,
GroupingVar = paste(as.character(GroupungVar)," - (n=",length(GroupungVar),")",sep = ""))
ggplot(example,aes(GroupingVar,fill=VarOfInterest)) +
geom_bar(position='fill') +
scale_fill_manual(values = (brewer.pal(5, "Greens"))) +
facet_wrap(~FacetVar,ncol=1) +
coord_flip() +
scale_y_continuous(labels=percent) +
ylab('Percent')

Related

How to add percentages on top of an histogram when data is grouped

This is not my data (for confidentiality reasons), but I have tried to create a reproducible example using a dataset included in the ggplot2 library. I have an histogram summarizing the value of some variable by group (factor of 2 levels). First, I did not want the counts but proportions of the total, so I used that code:
library(ggplot2)
library(dplyr)
df_example <- diamonds %>% as.data.frame() %>% filter(cut=="Premium" | cut=="Ideal")
ggplot(df_example,aes(x=z,fill=cut)) +
geom_histogram(aes(y=after_stat(width*density)),binwidth=1,center=0.5,col="black") +
facet_wrap(~cut) +
scale_x_continuous(breaks=seq(0,9,by=1)) +
scale_y_continuous(labels=scales::percent_format(accuracy=2,suffix="")) +
scale_fill_manual(values=c("#CC79A7","#009E73")) +
labs(x="Depth (mm)",y="Count") +
theme_bw() + theme(legend.position="none")
It gave me this as a result.
enter image description here
The issue is that I would like to print the numeric percentages on top of the bins and haven't find a way to do so.
As I saw it done for printing counts elsewhere, I attempted to print them using stat_bin(), including the same y and label values as the y in geom_histogram, thinking it would print the right numbers:
ggplot(df_example,aes(x=z,fill=cut)) +
geom_histogram(aes(y=after_stat(width*density)),binwidth=1,center=0.5,col="black") +
stat_bin(aes(y=after_stat(width*density),label=after_stat(width*density*100)),geom="text",vjust=-.5) +
facet_wrap(~cut) +
scale_x_continuous(breaks=seq(0,9,by=1)) +
scale_y_continuous(labels=scales::percent_format(accuracy=2,suffix="")) +
scale_fill_manual(values=c("#CC79A7","#009E73")) +
labs(x="Depth (mm)",y="%") +
theme_bw() + theme(legend.position="none")
However, it does print way more values than there are bins, these values do not appear consistent with what is portrayed by the bar heights and they do not print in respect to vjust=-.5 which would make them appear slightly above the bars.
enter image description here
What am I missing here? I know that if there was no grouping variable/facet_wrap, I could use after_stat(count/sum(count)) instead of after_stat(width*density) and it seems that it would have fixed my issue. But I need the histograms for both groups to appear next to each other. Thanks in advance!
You have to use the same arguments in stat_bin as for the histogram when adding your labels to get same binning for both layers and to align the labels with the bars:
library(ggplot2)
library(dplyr)
df_example <- diamonds %>%
as.data.frame() %>%
filter(cut == "Premium" | cut == "Ideal")
ggplot(df_example, aes(x = z, fill = cut)) +
geom_histogram(aes(y = after_stat(width * density)),
binwidth = 1, center = 0.5, col = "black"
) +
stat_bin(
aes(
y = after_stat(width * density),
label = scales::number(after_stat(width * density), scale = 100, accuracy = 1)
),
geom = "text", binwidth = 1, center = 0.5, vjust = -.25
) +
facet_wrap(~cut) +
scale_x_continuous(breaks = seq(0, 9, by = 1)) +
scale_y_continuous(labels = scales::number_format(scale = 100)) +
scale_fill_manual(values = c("#CC79A7", "#009E73")) +
labs(x = "Depth (mm)", y = "%") +
theme_bw() +
theme(legend.position = "none")

Variable geom_text is overwritten when plots saved in list [duplicate]

This question already has an answer here:
List for Multiple Plots from Loop (ggplot2) - List elements being overwritten
(1 answer)
Closed 2 years ago.
I am trying to organize several dozens of plots using ggarrange, so I have setup a loop in which I save each plot in a list. Each plot differs from each other with different data, title, etc. Everything works perfectly until I try to use geom_text to place some text inside the plot. When the plots are saved in the list, each plot inherits the geom_text from the last plot in the list. I don't know how to avoid this.
my.list=vector("list", length = 2);
dt=data.table(x=c(1,100,100000),y=c(1,100,100000))
plotname=c('first','second')
for (i in 1:length(my.list)) {
my.list[[i]]=ggplot(data = dt, aes(x = x, y = y )) + geom_point(size=1.5,aes(color=c('red'))) + labs(x=NULL, y=NULL)
+ scale_color_manual(values='red')
+ theme_bw() + theme(panel.background = element_rect(fill='light grey', colour='black'),legend.position = "none")
+ geom_text(inherit.aes=FALSE,aes(x=500, y=100000, label=paste0('NRMSE:',i))) + ggtitle(paste0(plotname[i])) + coord_equal()
+ geom_abline(slope=1)
+ scale_y_log10(breaks = c(1,10,100,1000,10000,100000),limits=c(1,100000))
+ scale_x_log10(breaks = c(1,10,100,1000,10000,1000000),limits=c(1,100000))
+ labs(x=NULL, y=NULL)
+ theme_bw() + theme(panel.background = element_rect(fill='light grey', colour='black'),legend.position = "none")
}
after this I do
plotosave=ggarrange(plotlist=my.list)
Using lapply instead of forloop works fine:
my.list <- lapply(1:2, function(i) {
ggplot(data = dt, aes(x = x, y = y )) +
geom_point(size=1.5) +
labs(x=NULL, y=NULL) +
theme_bw() +
theme(panel.background = element_rect(fill='light grey', colour='black'),
legend.position = "none") +
geom_text(inherit.aes=FALSE,aes(x=50000, y=100000,
label=paste0('NRMSE:',i))) +
ggtitle(paste0(plotname[i]))
})
ggarrange(plotlist = my.list)
Note: the issue is not with ggarrange.
Roland:
The plot is build when you print the ggplot object. Anything that is not part of the data passed will be taken from the enclosing environment at exactly that time point. If you use the iterator of a for loop in the plot, it has its last value then (or any value you change it to later on). lapply avoids the issue because of the stuff explained in the Note in its documentation.
Related post:
the problem is that ggplot() waits until you print the plot to resolve the variables in the aes() command.
I don't exactly know why this occurs but if you remove aes from geom_text it works.
library(ggplot2)
my.list = vector("list", length = 2)
dt = data.table::data.table(x=c(1,100,100000),y=c(1,100,100000))
plotname = c('first','second')
for (i in 1:length(my.list)) {
my.list[[i]]= ggplot(data = dt, aes(x = x, y = y )) +
geom_point(size=1.5) +
labs(x=NULL, y=NULL) +
theme_bw() +
theme(panel.background = element_rect(fill='light grey', colour='black'),
legend.position = "none") +
geom_text(x=50000, y=100000, label=paste0('NRMSE:',i)) +
ggtitle(paste0(plotname[i]))
}
plotosave = ggpubr::ggarrange(plotlist=my.list)

ggplot reorder factors in plot without affecting legend order [duplicate]

I have produced a stacked percent barplot from the following data, which is in a csv file,
,ONE,TWO,THREE
1,2432,420,18
2,276,405,56
3,119,189,110
4,90,163,140
5,206,280,200
6,1389,1080,1075
7,3983,3258,4878
8,7123,15828,28111
9,8608,48721,52576
10,9639,44725,55951
11,8323,45695,32166
12,2496,18254,26600
13,1524,8591,18583
14,7861,1857,1680
15,10269,5165,4618
16,13560,64636,63262
using the following code
library(ggplot2)
library(reshape2)
library(scales)
data <- read.csv(file="file.csv",sep=",",header=TRUE)
data <- data[,2:ncol(data)]
datam <- melt(cbind(data,ind = sort(rownames(data))),is.var = c('ind'))
datam$ind <- as.numeric(datam$ind)
ggplot(datam,aes(x = variable, y = value,fill = factor(as.numeric(ind)))) +
geom_bar(position = "fill") + scale_y_continuous(labels =percent_format()) +
scale_fill_discrete("Barcode\nMatch") +xlab("Barcode")+ylab("Reads")
The result is
The problem is that the items in the legend are not in the same order as the stacks they represent. The colours and the numbers are right but the order is not. In other words, is there a way to invert the order of the items in the legend? Thanks
you can use a new option reverse = TRUE:
ggplot(datam,aes(x = variable, y = value,fill = factor(as.numeric(ind)))) +
geom_bar(position = "fill") + scale_y_continuous(labels =percent_format()) +
scale_fill_discrete("Barcode\nMatch") + xlab("Barcode")+ylab("Reads") +
guides(fill = guide_legend(reverse = TRUE))
Add + scale_fill_hue(breaks=c("new order 1","new order 2","new order...")) as in:
library(ggplot2)
ggplot(data=PlantGrowth, aes(x=group, fill=group)) + geom_bar() +
geom_bar(colour="black", legend=FALSE) +
scale_fill_hue(breaks=c("trt1","ctrl","trt2"))
I'd also check out http://wiki.stdout.org/rcookbook/Graphs/Legends%20(ggplot2)/ for more.
This may have changed and become easier with he new ggplot but I'm not sure.

Annotate x-axis with N in faceted plot

I'm trying to produce a boxplot of some numeric outcome broken down by treatment condition and visit number, with the number of observations in each box placed under the plot, and the visit numbers labeled as well. Here's some fake data that will serve to illustrate, and I give two examples of things I've tried that didn't quite work.
library(ggplot2)
library(plyr)
trt <- factor(rep(LETTERS[1:2],150),ordered=TRUE)
vis <- factor(c(rep(1,150),rep(2,100),rep(3,50)),ordered=TRUE)
val <- rnorm(300)
data <- data.frame(trt,vis,val)
data.sum <- ddply(data, .(vis, trt), summarise,
N=length(na.omit(val)))
mytheme <- theme_bw() + theme(panel.margin = unit(0, "lines"), strip.background = element_blank())
The below code produces a plot that has N labels where I want them. It does this by grabbing summary data from an auxiliary dataset I created. However, I couldn't figure out how to also label visit on the x-axis (ideally, below the individual box labels), or to delineate visits visually in other ways (e.g. lines separating them into panels).
plot1 <- ggplot(data) +
geom_boxplot(aes(x=vis:trt,y=val,group=vis:trt,colour=trt), show.legend=FALSE) +
scale_x_discrete(labels=paste(data.sum$trt,data.sum$N,sep="\n")) +
labs(x="Visit") + mytheme
The plot below is closer to what I want than the one above, in that it has a nice hierarchy of treatments and visits, and a pretty format delineating the visits. However, for each panel it grabs the Ns from the first row in the summary data that matches the treatment condition, because it doesn't "know" that each facet needs to use the row corresponding to that visit.
plot2 <- ggplot(data) + geom_boxplot(aes(x=trt,y=val,group=trt,colour=trt), show.legend=FALSE) +
facet_wrap(~ vis, drop=FALSE, switch="x", nrow=1) +
scale_x_discrete(labels=paste(data.sum$trt,data.sum$N,sep="\n")) +
labs(x="Visit") + mytheme
One workaround is to manipulate your dataset so your x variable is the interaction between trt and N.
Working off what you already have, you can add N to the original dataset via a merge.
test = merge(data, data.sum)
Then make a new variable that is the combination of trt and N.
test = transform(test, trt2 = paste(trt, N, sep = "\n"))
Now make the plot, using the new trt2 variable on the x axis and using scales = "free_x" in facet_wrap to allow for the different labels per facet.
ggplot(test) +
geom_boxplot(aes(x = trt2, y = val, group = trt, colour = trt), show.legend = FALSE) +
facet_wrap(~ vis, drop = FALSE, switch="x", nrow = 1, scales = "free_x") +
labs(x="Visit") +
mytheme
Since this functionality isn't built in a good work-around is grid.extra:
library(gridExtra)
p1 <- ggplot(data[data$vis==1,]) + geom_boxplot(aes(x=trt,y=val,group=trt,colour=trt), show.legend=FALSE) +
#facet_wrap(~ vis, drop=FALSE, switch="x", nrow=1) +
scale_x_discrete(labels=lb[1:2]) + #paste(data.sum$trt,data.sum$N,sep="\n")
labs(x="Visit") + mytheme
p2 <- ggplot(data[data$vis==2,]) + geom_boxplot(aes(x=trt,y=val,group=trt,colour=trt), show.legend=FALSE) +
#facet_wrap(~ vis, drop=FALSE, switch="x", nrow=1) +
scale_x_discrete(labels=lb[3:4]) + #paste(data.sum$trt,data.sum$N,sep="\n")
labs(x="Visit") + mytheme
p3 <- ggplot(data[data$vis==3,]) + geom_boxplot(aes(x=trt,y=val,group=trt,colour=trt), show.legend=FALSE) +
#facet_wrap(~ vis, drop=FALSE, switch="x", nrow=1) +
scale_x_discrete(labels=lb[5:6]) + #paste(data.sum$trt,data.sum$N,sep="\n")
labs(x="Visit") + mytheme
grid.arrange(p1,p2,p3,nrow=1,ncol=3) # fully customizable
Related:
Varying axis labels formatter per facet in ggplot/R
You can also make them vertical or do other transformations:

Time series and legend with ggplot2

Thi is my data:
x <- c("22-01-16","26-01-16","28-01-16","01-02-16","05-02-16","16-02-16","17-03-16","18-03-16","04-04-16","05-04-16","06-04-16","08-04-16")
y <- c(97.14,75,54.44,70.45,110.56,66.3,178.76,171.90,419.41,424,518.63,242.17)
z <- c("ADCP","ADCP","ADCP","ADCP","ADCP","ADCP","ADCP","ADCP","ADCP","ADCP","ADCP","ADCP")
So I make the dataframe
Datos <- data.frame(x)
Datos$Caudal <- y
Datos$Tipo <- z
Datos$Fecha <- as.Date(Datos$x, "%d-%m-%y")
and plot using ggplot2
Serie_Caudal <-
ggplot(Datos, aes(Fecha, Caudal)) +
geom_line(size=1, colour="red") +
geom_point(shape=23,size=1, colour="blue",fill = "blue") +
scale_x_date(date_breaks = "1 week",labels = date_format("%d/%b"))+
xlab("Fecha") + ylab(bquote('Caudal ('*m^3~s^-1*')')) +
ggtitle("Caudales Diarios (01-06/2016)")
Serie_Caudal
I try to plot a legend but i can´t the way, i try use Melt but my data change in a way i can´t plot. Also try scale_fill_manual but the legend don´t show up. I want to know if there is a way to put a legend manualy.
The legend must show a blue point and ADCP
This shows only a blue dot.
ggplot(aes(Fecha, Caudal, colour = "ADCP"), data = Datos) +
geom_point() +
geom_point(shape=23,size=1,color="blue",fill = "blue") +
scale_color_manual(values = c("ADCP"="blue"),name = "") +
geom_line(color="red", size=1) +
scale_x_date(date_breaks = "1 week",labels = date_format("%d/%b")) +
xlab("Fecha") + ylab(bquote('Caudal ('*m^3~s^-1*')')) +
ggtitle("Caudales Diarios (01-06/2016)")

Resources