Percents on top of stacked bar graph R - r

I am trying to create a stacked bar graph in ggplot2 with percent shares on top of them. After looking at many other posts, I couldn't figure out a solution, so here are some sample data:
# load some libraries
library(ggplot2)
# make basic data frame
df <- data.frame(type = c("A", "A", "B", "B"),
year = c(2001,2002, 2001, 2002),
count_A = c(3, 2, NA, NA),
count_B = c(NA, NA, 8, 1),
sum_year_AB = c(11,3,11,3),
total_count_with_irrelevant_types = c(13,14,19,23))
# create single percentage variable for top of bar
df$percent_AB_year = (df$sum_year_AB/df$total_count_with_irrelevant_types)*100
df$percent_AB_year = round(df$percent_AB, 1)
df$percent_final = paste0(df$percent_AB_year, "%")
df$percent_AB_year = ifelse(df$type=="B", NA, df$percent_AB_year)
df$percent_final = ifelse(df$type=="B", NA, df$percent_final)
Here is my code for the bar graph:
ggplot(df, aes(fill=type, x=year, y=sum_year_AB)) +
geom_bar(position="stack", stat="identity", width = .9) +
labs(fill="Type",
x = "Year",
y = "Count",
title = "Count by Year") +
scale_x_continuous(breaks = seq(1999,2003,1)) +
geom_text(aes(label = percent_final), size = 3) +
scale_fill_grey(start = .4, end = .6)
And here is the output of the graph:
How do I put the percents on top?

Use position = "stack" inside geom_text too:
ggplot(df, aes(fill=type, x=year, y=sum_year_AB)) +
geom_bar(position="stack", stat="identity", width = .9) +
labs(fill="Type",
x = "Year",
y = "Count",
title = "Count by Year") +
scale_x_continuous(breaks = seq(1999,2003,1)) +
geom_text(aes(label = percent_final), size = 3, position = "stack", vjust = -0.2) +
scale_fill_grey(start = .4, end = .6)

To avoid confusion, I would include it in the bar plot rather than placing it at the top (because the other percent_final values in your df are NA).
Sample code:
library(ggplot2)
library(ggthemes)
ggplot(df, aes(fill=type, x=year, y=sum_year_AB)) +
geom_bar(position="stack", stat="identity", width = .9) +
labs(fill="Type",
x = "Year",
y = "Count",
title = "Count by Year") +
scale_x_continuous(breaks = seq(1999,2003,1)) +
geom_text(aes(label = percent_final),position=position_stack(vjust=0.5), colour="blue", size = 9) +
theme_economist()
plot:

Related

How to make log10 ONLY first y-axis (not secondary y-axis) in ggplot in R

I would like to plot ONLY y-axis1 DATA (left axis, Var1, dotted line) as a log10 scale. The dotted line would therefore look higher on the y-axis and differences between 1 and 2 would be noticeable.
I have tried several things, but does not work ( I believe I am using them in the wrong order/place) such as:
+coord_trans(y='log10')--> empty plot
scale_y_continuous(trans = log10_trans(),... --> makes both Var1 and Var 2 log10
scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x),labels = trans_format("log10", math_format(10^.x)))--> makes both y axis log10 and removes y-axis2 (Var2)
data<- data.frame(
Day=c(1,2,3,1,2,3,1,2,3),
Name=rep(c(rep("a",3),rep("b",3),rep("c",3))),
Var1=c(1090,484,64010,1090,484,64010,1090,484,64010),
Var2= c(4,16,39,2,22,39,41,10,3))
ggplot(data) +
geom_bar(aes(fill=Name, y=Var2*1000, x=Day),stat="identity", colour="black", position= position_stack(reverse = TRUE))+
geom_line(aes(x=Day, y=Var1),stat="identity",color="black", linetype="dotted", size=0.8)+
geom_point(aes(Day, Var1), shape=8)+
labs(title= "",
x="",y=expression('Var1'))+
scale_y_continuous(
sec.axis=sec_axis(~./1000, name= expression(paste("Var2"))))+
theme_classic()+
scale_fill_grey(start = 1, end=0.1,name = "", labels = c("a", "b", "c"))
I think the easiest way is to have the primary axis be the linear one, but put it on the right side of the plot. Then, you can have the secondary one be your log-transformed axis.
library(ggplot2)
data<- data.frame(
Day=c(1,2,3,1,2,3,1,2,3),
Name=rep(c(rep("a",3),rep("b",3),rep("c",3))),
Var1=c(1090,484,64010,1090,484,64010,1090,484,64010),
Var2= c(4,16,39,2,22,39,41,10,3))
# Max of secondary divided by max of primary
upper <- log10(3e6) / 80
breakfun <- function(x) {
10^scales::extended_breaks()(log10(x))
}
ggplot(data) +
geom_bar(aes(fill=Name, y=Var2, x=Day),
stat="identity", colour="black", position= position_stack(reverse = TRUE))+
geom_line(aes(x=Day, y=log10(Var1) / upper),
stat="identity",color="black", linetype="dotted", size=0.8)+
geom_point(aes(Day, log10(Var1) / upper), shape=8)+
labs(title= "",
x="",y=expression('Var1'))+
scale_y_continuous(
position = "right",
name = "Var2",
sec.axis = sec_axis(~10^ (. * upper), name= expression(paste("Var1")),
breaks = breakfun)
)+
theme_classic() +
scale_fill_grey(start = 1, end=0.1,name = "", labels = c("a", "b", "c"))
Created on 2022-02-09 by the reprex package (v2.0.1)
Here is a custom breaks function:
br <- function(limits) {
10^(seq(ifelse(limits[1] <= 0,
0,
trunc(log10(limits[1]))),
trunc(log10(limits[2])),
by = 1))}
ggplot(data) +
geom_bar(aes(fill = Name, y = Var2 * 1000, x = Day),
stat = "identity",
colour = "black",
position = position_stack(reverse = TRUE))+
geom_line(aes(x=Day, y=Var1),
stat = "identity",
color = "black",
linetype = "dotted",
size = 0.8)+
geom_point(aes(Day, Var1),
shape = 8)+
labs(title = "",
x = "",
y = expression('Var1'))+
scale_y_continuous(
breaks = br,
sec.axis = sec_axis(~./1000, name= expression(paste("Var2"))))+
theme_classic()+
scale_fill_grey(start = 1,
end = 0.1,
name = "",
labels = c("a", "b", "c"))
Results aren't so pretty but you can customize the breaks as you wish.
You absolutely should read the answer #teunbrand linked to in the comment to your question. But for the matter of displaying log values on the left and original values on the right, you can use:
tibble(Day = 1:10,
Val1 =10*Day) %>%
ggplot(aes(x = Day, y = log10(Val1))) +
geom_col() +
scale_y_log10(name = "log(Val1)",
sec.axis = sec_axis(~ 10^., name = "Val1"))

How to create an individual line plot in between box plot in r

I'm trying to create a plot like this image below where the individual data lines are in between the box plots. Image to create in R ggplot2
The closest I am getting is something like this:
Image using ggplot2 but it looks a bit cluttered with the lines/points behind.
data1 %>%
ggplot(aes(Time,Trait)) +
geom_line(aes(group=ID), position = "identity")+
geom_point(aes(group=ID), shape=21, colour="black", size=2, position = "identity")+
geom_boxplot(width=.5,position = position_dodge(width=0.9), fill="white") +
stat_summary(fun.data= mean_cl_boot, geom = "errorbar", width = 0.1, position = position_dodge(width = .9)) +
stat_summary(fun = mean, geom = "point", shape = 18, size=3, position = "identity")+
facet_wrap(~Cond) +
theme_classic()
Any tips would be greatly appreciated!
One option to achieve your desired result would be to make use of continuous x scale. Doing so makes it possible to shift the box plots to the left or to right and vice versa for the points and lines:
Making use of some random data to mimic your real data set.
data1$Time1 <- as.numeric(factor(data1$Time, levels = c("Pre", "Post")))
data1$Time_box <- data1$Time1 + .1 * ifelse(data1$Time == "Pre", -1, 1)
data1$Time_lp <- data1$Time1 + .1 * ifelse(data1$Time == "Pre", 1, -1)
library(ggplot2)
ggplot(data1, aes(x = Time_box, y = Trait)) +
geom_line(aes(x = Time_lp, group=ID), position = "identity")+
geom_point(aes(x = Time_lp, group=ID), shape=21, colour="black", size=2, position = "identity")+
geom_boxplot(aes(x = Time_box, group=Time1), width=.25, fill="white") +
stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.1) +
stat_summary(fun = mean, geom = "point", shape = 18, size=3, position = "identity") +
scale_x_continuous(breaks = c(1, 2), labels = c("Pre", "Post")) +
facet_wrap(~Cond) +
theme_classic()
DATA
set.seed(42)
data1 <- data.frame(
ID = rep(1:10, 4),
Time = rep(c("Pre", "Post"), each = 10),
Trait = runif(40),
Cond = rep(c("MBSR", "SME"), each = 20)
)
EDIT If you want to two boxplots side by side it's basically the same. However in that case you have to map the interaction of Time1 and the variable mapped on fill on the group aesthetic in geom_boxplot (and probably the error bars as well):
library(ggplot2)
set.seed(42)
data1 <- data.frame(
ID = rep(1:10, 4),
Time = rep(c("Pre", "Post"), each = 10),
Fill = rep(c("Fill1", "Fill2"), each = 5),
Trait = runif(40),
Cond = rep(c("MBSR", "SME"), each = 20)
)
ggplot(data1, aes(x = Time_box, y = Trait)) +
geom_line(aes(x = Time_lp, group=ID, color = Fill), position = "identity")+
geom_point(aes(x = Time_lp, group=ID, fill = Fill), shape=21, colour="black", size=2, position = "identity")+
geom_boxplot(aes(x = Time_box, group=interaction(Time1, Fill) , fill = Fill), width=.25) +
stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.1) +
stat_summary(fun = mean, geom = "point", shape = 18, size=3, position = "identity") +
scale_x_continuous(breaks = c(1, 2), labels = c("Pre", "Post")) +
facet_wrap(~Cond) +
theme_classic()

Breaking y-axis in ggplot2 with geom_bar

I'm having a hard time dealing with this plot.
The height of values in ANI>96 making it hard to read the red and blue percentage text.
I failed to break the y-axis by looking at answers from other posts in StackOverflow.
Any suggestions?
Thanks.
library(data.table)
library(ggplot2)
dt <- data.table("ANI"= sort(c(seq(79,99),seq(79,99))), "n_pairs" = c(5, 55, 13, 4366, 6692, 59568, 382873, 397996, 1104955, 282915,
759579, 261170, 312989, 48423, 120574, 187685, 353819, 79468, 218039, 66314, 41826, 57668, 112960, 81652, 28613,
64656, 21939, 113656, 170578, 238967, 610234, 231853, 1412303, 5567, 4607268, 5, 14631942, 0, 17054678, 0, 3503846, 0),
"same/diff" = rep(c("yes","no"), 21))
for (i in 1:nrow(dt)) {
if (i%%2==0) {
next
}
total <- dt$n_pairs[i] + dt$n_pairs[i+1]
dt$total[i] <- total
dt$percent[i] <- paste0(round(dt$n_pairs[i]/total *100,2), "%")
dt$total[i+1] <- total
dt$percent[i+1] <- paste0(round(dt$n_pairs[i+1]/total *100,2), "%")
}
ggplot(data=dt, aes(x=ANI, y=n_pairs, fill=`same/diff`)) +
geom_text(aes(label=percent), position=position_dodge(width=0.9), hjust=0.75, vjust=-0.25) +
geom_bar(stat="identity") + scale_x_continuous(breaks = dt$ANI) +
labs(x ="ANI", y = "Number of pairs", fill = "Share one common species taxonomy?") +
theme_classic() + theme(legend.position="bottom")
Here is the list of major changes I made:
I reduced the y axis by zooming into the chart with coord_cartesian (which is called by coord_flip).
coord_flip shouuld also improve the readability of the chart by switching x and y. I don't know if the switch is a desirable output for you.
Also now position_dodge, works as expected: two bars next to each other with the labels on top (on the left in this case).
I set geom_bar before geom_text so that the text is always in front of the bars in the chart.
I set scale_y_continuous to change the labels of the y axis (in the chart the x axis because of the switch) to improve the readability of the zeros.
ggplot(data=dt, aes(x = ANI, y = n_pairs, fill = `same/diff`)) +
geom_bar(stat = "identity", position = position_dodge2(width = 1), width = 0.8) +
geom_text(aes(label = percent), position = position_dodge2(width = 1), hjust = 0, size = 3) +
scale_x_continuous(breaks = dt$ANI) +
scale_y_continuous(labels = scales::comma) +
labs(x ="ANI", y = "Number of pairs", fill = "Share one common species taxonomy?") +
theme_classic() +
theme(legend.position = "bottom") +
coord_flip(ylim = c(0, 2e6))
EDIT
Like this columns and labels are stacked but labels never overlap.
ggplot(data=dt, aes(x = ANI, y = n_pairs, fill = `same/diff`)) +
geom_bar(stat = "identity", width = 0.8) +
geom_text(aes(label = percent,
hjust = ifelse(`same/diff` == "yes", 1, 0)),
position = "stack", size = 3) +
scale_x_continuous(breaks = dt$ANI) +
scale_y_continuous(labels = scales::comma) +
labs(x ="ANI", y = "Number of pairs", fill = "Share one common species taxonomy?") +
theme_classic() +
theme(legend.position = "bottom") +
coord_flip(ylim = c(0, 2e6))
Alternatively, you can avoid labels overlapping with check_overlap = TRUE, but sometimes one of the labels will not be shown.
ggplot(data=dt, aes(x = ANI, y = n_pairs, fill = `same/diff`)) +
geom_bar(stat = "identity", width = 0.8) +
geom_text(aes(label = percent), hjust = 1, position = "stack", size = 3, check_overlap = TRUE) +
scale_x_continuous(breaks = dt$ANI) +
scale_y_continuous(labels = scales::comma) +
labs(x ="ANI", y = "Number of pairs", fill = "Share one common species taxonomy?") +
theme_classic() +
theme(legend.position = "bottom") +
coord_flip(ylim = c(0, 2e6))

ggplot add text to the center of a donut chart in R

I am working on a donut chart using ggplot2, but I need the center of the plot to contain text.
Here's sample data (found from this site: https://www.datanovia.com/en/blog/how-to-create-a-pie-chart-in-r-using-ggplot2/):
library(dplyr)
count.data <- data.frame(
class = c("1st", "2nd", "3rd", "Crew"),
n = c(325, 285, 706, 885),
prop = c(14.8, 12.9, 32.1, 40.2)
)
count.data <- count.data %>%
arrange(desc(class)) %>%
mutate(lab.ypos = cumsum(prop) - 0.5*prop)
count.data
I then modified their code to get this donut chart:
library(ggplot2)
library(dplyr)
mycols <- c("#0073C2FF", "#EFC000FF", "#868686FF", "#CD534CFF")
ggplot(count.data, aes(x = 2, y = prop, fill = class)) +
geom_bar(stat = "identity", color = "white") +
coord_polar(theta = "y", start = 0)+
geom_text(aes(y = lab.ypos, label = paste0("n = ", n, ", \n", prop, "%")), color = "white")+
scale_fill_manual(values = mycols) +
theme_void() +
xlim(.5, 2.5)
The plot looks like this:
It is exactly what I want except I need the center of the donut to have the proportion from a variable. In this case, I want the center to say 40.2% (the prop of crew, in this example).
How do I do this?
Edit
Used annotate as suggested by #aosmith and made it a direct call to crew.
like this?
ggplot(count.data, aes(x = 2, y = prop, fill = class)) +
geom_bar(stat = "identity", color = "white") +
coord_polar(theta = "y", start = 0)+
geom_text(aes(y = lab.ypos, label = paste0("n = ", n, ", \n", prop, "%")), color = "white")+
scale_fill_manual(values = mycols) +
theme_void() +
xlim(.5, 2.5) +
annotate(geom = 'text', x = 0.5, y = 0, label = paste0(count.data$prop[count.data$class == 'Crew'], "%"))

Count and Percent Together using Stack Bar in R

I am trying to create stack bar with counts and percent in same graph. I took help from Showing data values on stacked bar chart in ggplot2 and add group total and plotted my as
By using code
### to plot stacked bar graph with total on the top and
### distribution of the frequency;
library(ggplot2);
library(plyr);
library(dplyr);
Year <- c(rep(c("2006-07", "2007-08", "2008-09", "2009-10"), each = 4))
Category <- c(rep(c("A", "B", "C", "D"), times = 4))
Frequency <- c(168, 259, 226, 340, 216, 431, 319, 368, 423, 645, 234, 685, 166, 467, 274, 251)
Data <- data.frame(Year, Category, Frequency);
sum_count <-
Data %>%
group_by(Year) %>%
summarise(max_pos = sum(Frequency));
sum_count;
Data <- ddply(Data, .(Year), transform, pos =
cumsum(Frequency) - (0.5 * Frequency));
Data;
# plot bars and add text
p <- ggplot(Data, aes(x = Year, y = Frequency)) +
geom_bar(aes(fill = Category), stat="identity") +
geom_text(aes(label=Frequency,y = pos), size = 3) +
geom_text(data = sum_count,
aes(y = max_pos, label = max_pos), size = 4,
vjust = -0.5);
print(p);
/Now I want to overlay percent of each group with counts This is my approach.merge data such a way that we can calculate
% for each of the group you are dealing with/
MergeData <- merge(Data,sum_count,by="Year");
MergeData <- transform(MergeData,
per_cent=round((pos/max_pos)*100,0));
MergeData<- ddply(MergeData, .(Year), transform, per_pos =
cumsum(per_cent) - (0.5 * per_cent));
# calculate percent and attach % sign;
MergeData <- transform(MergeData,
per_cent=paste(round((pos/max_pos)*100,0),"%"));
# Data only with percents
Percent_Data <- subset(MergeData,select
= c("Year","Category","per_cent","per_pos"));
/I am wondering if it is possible to overlay percent data to the image I created using previous code so that number and percent can be presented together./
I think you are almost there.
Use MergeData as the source for the data frame and add one more call to geom_text
p <- ggplot(MergeData, aes(x = Year, y = Frequency, group = Category)) +
geom_bar(aes(fill = Category), stat="identity") +
geom_text(aes(label=Frequency,y = pos), size = 3, vjust = 1) +
geom_text(
aes(y = max_pos, label = max_pos), size = 4,
vjust = -.5) +
geom_text(aes(x = Year, y = pos, label = per_cent), vjust = -1, size = 4)
print(p);
You may need to fiddle with hjust and vjust to get the text just how you like it.
Thank you for your response. I think it is very good.
p <- ggplot(MergeData, aes(x = Year, y = Frequency, group = Category)) +
geom_bar(aes(fill = Category), stat="identity") +
geom_text(aes(label=Frequency,y = pos), vjust = 1,size = 2,hjust = 0.5) +
geom_text(aes(y = max_pos, label = max_pos), size = 3,vjust = -.1) +
geom_text(aes(x = Year, y = pos, label = per_cent), vjust = -.4, size = 2)+
xlab("Year") + ylab(" Number of People") + # Set axis labels
ggtitle("Distribution by Category over Year") + # Set title
theme(panel.background =
element_rect(fill = 'white', colour = 'white'),
legend.position = "bottom" ,
legend.title = element_text(color="black",
size=7),
legend.key.width = unit(1,"inch") );
print(p);
now my % on top of number numbers,in other words, it is "17%" and "168" but I want "168" and "17%". I tried switching position of geom_text() but it did not work. I am wondering if you know how to fix it.
Yes it helped. I fixed number to make center of each stack. therefore i needed to make change in percent below code fixed my issue. Thank you so much for your help.
p <- ggplot(MergeData, aes(x = Year, y = Frequency, group = Category)) +
geom_bar(aes(fill = Category), stat="identity") +
geom_text(aes(label=Frequency,y = pos), vjust = 1,
size = 2,hjust = 0.5) +
geom_text(aes(y = max_pos, label = max_pos), size = 3,vjust = -.1) +
geom_text(aes(x = Year, y = pos, label = per_cent), vjust = 1.95,
size = 2,hjust=0.3)+
xlab("Year") + ylab(" Number of People") + # Set axis labels
ggtitle("Distribution by Category over Year") + # Set title;
theme(panel.background =
element_rect(fill = 'white', colour = 'white'),
legend.position = "bottom" ,
legend.title = element_text(color="black",
size=7) );
print(p);

Resources