ggplot: Grouped, adjacent bars of variable width - r

I'm trying to produce barplot in which the width and height of the bars both convey information: the height is the number of hours spent on a task, the widths respectively indicate the perceived aptitude and importance associated with the task. I've managed to produce this monstrosity:
It's functional but horrible. I would really like to place the bars alongside one another (rather than overlaying them), so that each activity is represented by two touching bars of the same height (=time spent) but different widths and colors. I've been trying to to pass a width argument to this plot:
but setting 'aes(width = widthVariable)' gives me overlapping bars (similar to the first image) and the following warning message:
"position_dodge requires non-overlapping x intervals".
Is there a way of grouping my bars by activity, displaying them adjacently and varying their widths?
Here's a bit of the df I'm using:
molten = data.frame(Activity = rep(c('Administration','Working with Colleagues','Use of Social Media','Leadership Role'),2),
variable = c(rep('Importance',4),rep('Competence',4)),
value = rep(c(3.02,1.71,2.39,3.32),2),
width = c(3.48,3.52,4.01,2.98,
3.85,2.34,4.87,3.81))
The second plot is this:
ggplot(molten, aes(x=Activity, y=value, fill=variable)) + geom_bar(stat='identity',position = 'dodge')
and the first in something like this:
ggplot(molten, aes(x=Activity, y=value, fill=variable)) + geom_bar(stat='identity',aes(width = width/10))
Although I actually made it using slightly simpler dataframe, which I melt()-ed into the one above.

Not a perfect solution, but you can create a new column that combines Activity and Variable, use that as the x, and fill by variable:
molten<-mutate(molten,activity=paste(Activity,variable))
ggplot(molten, aes(x=activity, y=value,width = width/10)) +
geom_bar(stat='identity', aes(fill=variable)) +
theme(axis.text.x = element_text(angle = 45,hjust=1)) +
scale_x_discrete(breaks=molten$activity, labels=molten$Activity)

I've made some progress, building on iod's idea of mutating the original data frame.
I've made two separate geom-bars and nudged them into one another. I'd really love it if every bar touched its neighbor, but position_nudge() only takes a constant. I'm still getting into ggplot so the most obvious solution in my mind is a recycled 'nudge' vector, akin to barplot()'s color argument.
tldr: Little gaps between bars but reasonably pretty now.
molten<-mutate(molten,activity=paste(Activity,variable))
molten$importanceBars = c(value[variable=='Importance'],rep(0,nrow(molten)-sum(variable=='Importance')))
molten$competenceBars = c(rep(0,nrow(molten)-sum(variable=='Competence')),value[variable=='Competence'])
ggplot(molten, aes(x=activity,width = width/6, fill = variable)) +
geom_bar(stat='identity', aes(y=importanceBars),position=position_nudge(x=-0.2-0.35)) +
geom_bar(stat='identity', aes(y=competenceBars),position=position_nudge(x=-0.35)) +
theme(axis.text.x = element_text(angle = 45,hjust=1)) +
scale_x_discrete(breaks=molten$activity[molten$variable=='Competence'],
labels=molten$Activity[molten$variable=='Competence'])

I've done it - had to draw every bar as a rectangle, adjusting xmin and xmax accordingly.
wadjust = 5.5
gap = 0.0
minv = 1:length(value) - 0.5 + gap/2
maxv = minv + 1 -gap
minv[1:length(minv)%%2!=0] = maxv[1:length(maxv)%%2!=0] - width[order(Activity)][(1:length(width))%%2!=0]/wadjust
maxv[1:length(maxv)%%2==0] = minv[1:length(minv)%%2==0] + width[order(Activity)][(1:length(width))%%2==0]/wadjust
minv = minv +0.525
maxv = maxv +0.525
minvord = minv[order(Activity)]
maxvord = maxv[order(Activity)]
ggplot(molten, aes(x=activity,width = width/6, fill = variable)) +
geom_rect(xmin = minv,xmax = maxv, ymin = rep(0,28), ymax = value[order(Activity)],
fill = rep(c('#e1de00','#e84619'),len=28)) +
theme(axis.text.x = element_text(angle = 45,hjust=1)) +
theme(plot.margin=unit(c(1,0.5,1,2),"cm")) +
scale_x_discrete(breaks=molten$activity[molten$variable=='Importance'],
labels=molten$Activity[molten$variable=='Importance'][order(Activity[molten$variable=='Importance'])]) +
scale_y_continuous(labels = 0:3, breaks = 0:3, limits = c(0,3)) +
xlab('Activity') + ylab('Hours Spent') +
labs(title = 'Perceived Importance & Competence\nAssociated with Clerical Duties') +
theme(panel.grid.major.x = element_blank()) +
geom_vline(xintercept = (maxv[1:length(maxv)%%2!=0]+minv[1:length(minv)%%2==0])/2,col='white') +
geom_vline(xintercept = seq(len = 14, by = 2),col = 'white')

Related

Set the width and gap in geom_bar in a large dataset with a lot of unique values

I have the dataframe below:
res<-sample.int(2187, 2187)
freq<-floor(runif(2187, 95,105))
t<-data.frame(res,freq)
and Im trying to create a bar chart based on this but despite the fact that I use width and color arguments I still cannot create space between the bars which are black instead of the selected fill.
library(ggplot2)
require(scales)
ggplot(t,width=0.1)+
geom_bar(aes(x=res,y=freq ,fill = (t$res==101)),
color = "black",stat = "identity") +
scale_fill_manual(values=c("darkblue", "lightblue"), guide = F) +
theme_classic(base_size = 16)+ theme(legend.position = "none")+
scale_x_discrete(breaks = seq(80, 115, 5))+ scale_y_continuous(labels = comma)
Note that this code works nice for a dataset with much fewer unique values like:
fac<-factor(rep(c(80,85,100,100.5,100.7,101,101.5,110,105),2000000))
res<-data.frame(fac)
new<-data.frame(table(res))
require(scales)
ggplot(new,width=0.1)+
geom_bar(aes(x=res,y=Freq ,fill = (new$res==101)),
color = "black",stat = "identity") +
scale_fill_manual(values=c("darkblue", "lightblue"), guide = F) +
theme_classic(base_size = 16)+ theme(legend.position = "none")+
scale_x_discrete(breaks = seq(80, 115, 5))+ scale_y_continuous(labels = comma)
May be I am completely wrong but if I understand correctly, the OP wants to reproduce the second chart from scratch using a sample of random numbers instead of already tabulated counts.
To create a histogram / bar chart, we only need a vector of random numbers (wraped in a data.frame for ggplot) and let geom_bar() do the counting. In addition, a particular bar will be highlighted.
By using floor(), the random numbers are already binned but are still considered as continuous by ggplot(). Therefore, they need to be turned into factor.
# create data
set.seed(123L) # ensure random data are reproducible
t <- data.frame(res = floor(runif(2187, 95, 105)))
library(ggplot2)
ggplot(t) +
aes(x = as.factor(res), fill = res == 101) +
geom_bar() +
theme_classic(base_size = 16) +
scale_fill_manual(values = c("darkblue", "lightblue"), guide = FALSE) +
xlab("res") +
ylab("freq")
Edit: geom_histogram()
Ther is an alternative approach using geom_histogram().
geom_histogram() does all steps in one go: The binning (no need to use floor()) as well as counting and plotting:
set.seed(123L) # ensure random data are reproducible
t2 <- data.frame(res = runif(2187, 95,105)) # floor() omitted here
ggplot(t2) +
aes(x = res, fill = floor(res) == 101) +
geom_histogram(breaks = seq(95, 105, 1), closed = "left") +
theme_classic(base_size = 16) +
scale_fill_manual(values = c("darkblue", "lightblue"), guide = FALSE) +
xlab("res") +
ylab("freq")
Here, the breaks parameter was used to specify the bin boundaries explicitely. Alternatively, the number of bins or the width of the bins can be specifies. This gives flexibilty to play around with the parameters.
Edit 2
The OP has asked about the case where the random numbers are uniformly distributed between 100 and 1015. With an adjustment to the sequence of breaks,
set.seed(123L) # ensure random data are reproducible
t3 <- data.frame(res = runif(2187, 100, 1015))
ggplot(t3) +
aes(x = res, fill = floor(res) == 101) +
geom_histogram(breaks = seq(100, 1015, 1), closed = "left") +
theme_classic(base_size = 16) +
scale_fill_manual(values = c("darkblue", "lightblue"), guide = FALSE) +
xlab("res") +
ylab("freq")
returns
This chart contains over 900 bars for each bin of width 1 which aren't all visible depending on the screen resolution as already explained by Jon Spring.
Therefore, it might be more suitable to reduce the number of bins, e.g., to 100 bins:
ggplot(t3) +
aes(x = res, fill = floor(res) == 101) +
geom_histogram(bins = 100L) +
theme_classic(base_size = 16) +
scale_fill_manual(values = c("darkblue", "lightblue"), guide = FALSE) +
xlab("res") +
ylab("freq")
Please note that 101 is still highlighted in the lower left corner.
Edit -- added alternate solutions at bottom.
If you have over 2,000 bars, and each one has a black outline 1 pixel wide on each side, that'll take something on the order of 6,000 horizontal pixels (ignoring anti-aliasing) to see one with a different fill. Most screens have much lower resolution than that.
If you must use bars, and must show every value, one option would be to drop the outline with color = NA and set width = 1 (as a term in the geom_col/geom_bar call) so there's no distracting blank space between bars. Even then, the different color at res == 101 is only visible at certain resolutions. (That might vary on device settings and anti-aliasing.)
ggplot(t)+
geom_col(aes(x=res,y=freq , fill = (res==101)),
color = NA, width = 1) +
scale_fill_manual(values=c("darkblue", "lightblue"), guide = F) +
theme_classic(base_size = 16) +
scale_x_continuous(breaks = c(500*0:4, 101))
If you must show all 2000 points, but want to highlight one, it might make sense to use a different geom that spreads the data out to use more of the available space.
For instance, we might use geom_point or geom_jitter to plot all the coordinates in 2d space. Here, I highlight the element with res == 101. I use arrange to make sure the special dot gets plotted last so that it doesn't get occluded.
library(dplyr)
ggplot(t %>% arrange(res == 101),
aes(x = res, y = freq,
fill = res == 101,
size = res == 101)) +
geom_jitter(shape = 21, stroke = 0.1)
Or we might plot the data as a line, highlighting the special dot on its own:
ggplot(t, aes(res, freq)) +
geom_line(color = "gray70") +
geom_point(data = subset(t, res == 101)) +
expand_limits(y=0)

How to stack two ggplot points on top of each other with error bars

I am attempting to plot two points that have error bars on them on top of each other in ggplot. However, the error bars are not syncing up with the points. This is the code that I'm using, and I have my ensuing graph attached:
df = data.frame(rtix = mean(DandI_Variance$`1RTI`[1:11]),
rtiy = 50,
rtixmin = DandI_Variance$`1RTI`[11],
rtixmax = DandI_Variance$`1RTI`[1],
rtiymin = 52,
rtiymax = 42,
rtcx = mean(DandI_Variance$`1RTC`[1:11]),
rtcy = 75,
rtcxmin = DandI_Variance$`1RTC`[11],
rtcxmax = DandI_Variance$`1RTC`[1],
rtcymin = 69,
rtcymax = 79)
ggplot(data = df, aes(x = rtix, y = rtiy)) +
geom_point() +
geom_errorbar(aes(ymin = rtiymin, ymax = rtiymax, width = .07, color = "blue")) +
geom_errorbarh(aes(xmin = rtixmin, xmax = rtixmax, height = 10, color = "blue")) +
geom_point(aes(x = rtcx, y = rtcy)) +
geom_errorbar(aes(ymin = rtcymin, ymax = rtcymax, width = .07, color = "red")) +
geom_errorbarh(aes(xmin = rtcxmin, xmax = rtcxmax, height = 10, color = "red")) +
xlab("S Equalibrium") +
ylab("Time to Equalibrium") +
ylim(0, 100) +
xlim(0, 1) +
ggtitle("Performance of Models")
I think there might be some confusion caused within ggplot since you have the geom_errorbar() and geom_errorbarh() functions twice in the same call. It also just looked like you're structuring your data frame in a weird way. Rather than having one row, why not give your data frame 2 rows, each with identifying columns?
I'd try structuring the code like this as a first step (hopefully this solves it).
I've just compressed the dataframe into 2 rows and 7 columns (adding a new one for type to use for color), I've then just called the ggplot2 functions once rather than twice, and moved the width outside of the aes call (since the aes call take the inputs as names, not values, this means a width of 0.7 is actually a factor called "0.7" not what you're desiring, which is a numerical width of 0.7) and kept the color in (only because color is now using a column instead of a name, notice on your plot "blue" is actually red and vice versa, that's because of the same problem as the width issue I described above). Finally I've added the manual colour scale so we can choose which has which colour. You can switch blue and red around if you want it in the other order.
df = data.frame(rtx = c(mean(DandI_Variance$`1RTI`[1:11]),
mean(DandI_Variance$`1RTC`[1:11])),
rty = c(50,75),
rtxmin = c(DandI_Variance$`1RTI`[11],
DandI_Variance$`1RTC`[11]),
rtxmax = c(DandI_Variance$`1RTI`[1],
DandI_Variance$`1RTC`[1]),
rtymin = c(52,69),
rtymax = c(42,79),
rttype = c('I', 'C')
)
ggplot(data = df, aes(x = rtx, y = rty)) +
geom_point() +
geom_errorbar(aes(ymin = rtymin, ymax = rtymax, color = rttype), width = .07) +
geom_errorbarh(aes(xmin = rtxmin, xmax = rtxmax, color = rttype), height = 10) +
scale_color_manual(values = c("blue", "red")) +
xlab("S Equalibrium") +
ylab("Time to Equalibrium") +
ylim(0, 100) +
xlim(0, 1) +
ggtitle("Performance of Models")

apply ggplot color scale gradient to portion of data

I have a question about applying ggplot's color scale gradient. I have dataset where the response variable is a continuous variable including both positive and negative numbers, and the independent variable is a number of independent sites. I'm trying to plot the data in such a way that I can plot all of the data in the background and then apply a color scale gradient to response data that covers the negative range of the data. This is what I have so far with a example dataset that mimics the structure of my actual dataset.
tr_sim <- data.frame(site_id = seq(1,100,1), estimated_impact =
rnorm(100,18,200), impact_group = rep(c(1,2),each = 50))
rng_full <- range(tr_sim$estimated_impact)
#produce plot showing the full range of impacts across all sites and then
over the subsetted sites
impact_plot_full <- ggplot(data = tr_sim, aes(x = factor(site_id, levels =
site_id[order(estimated_impact)]), y = estimated_impact)) +
geom_bar(stat = "identity",width = 1, fill = "grey80")
impact_plot_full
impact_plot_full +
geom_bar(stat = "identity", width = 1, position = "stack", aes(y =
estimated_impact[impact_group == 1])) +
scale_fill_gradient2(low="firebrick", mid="yellow", high = "green4") +
labs(y = "Estimated Impact ($/week)", x = "Total number of sites with estimate
is 100", title = "Sites with the greatest impact after coverage loss") +
theme(axis.text.x = element_blank()) +
scale_y_continuous(breaks =
round(seq(rng_full[1],rng_full[2],by=100),digits=0))
I can plot all of the data in the background in gray and I'm attempting to plot the negative range of the response data on top of this. I get the error that 'aesthetics must be either length 1 or the same as the data(100), y,x'. I know this is occurring because the negative data is not the same length as the entire dataset, but I can not figure out a way to do this. Any suggestions would be greatly appreciated.
Thank you,
Curtis
You need to subset the data and use fill in the aes() for geom_bar.
impact_plot_full +
geom_bar(data = subset(tr_sim, estimated_impact < 0),
stat = "identity",
aes(y = estimated_impact, fill = estimated_impact)) +
scale_fill_gradient2(low = "firebrick", mid = "yellow", high =
"green4") +
theme(axis.text.x = element_blank()) +
xlab("site_id")
Hope this is what You are looking for.

How to use percentage as label in stacked bar plot?

I'm trying to display percentage numbers as labels inside the bars of a stacked bar plot in ggplot2. I found some other post from 3 years ago but I'm not able to reproduce it: How to draw stacked bars in ggplot2 that show percentages based on group?
The answer to that post is almost exactly what I'm trying to do.
Here is a simple example of my data:
df = data.frame('sample' = c('cond1','cond1','cond1','cond2','cond2','cond2','cond3','cond3','cond3','cond4','cond4','cond4'),
'class' = c('class1','class2','class3','class1','class2','class3','class1','class2','class3','class1','class2','class3'))
ggplot(data=df, aes(x=sample, fill=class)) +
coord_flip() +
geom_bar(position=position_fill(reverse=TRUE), width=0.7)
I'd like for every bar to show the percentage/fraction, so in this case they would all be 33%. In reality it would be nice if the values would be calculated on the fly, but I can also hand the percentages manually if necessary. Can anybody help?
Side question: How can I reduce the space between the bars? I found many answers to that as well but they suggest using the width parameter in position_fill(), which doesn't seem to exist anymore.
Thanks so much!
EDIT:
So far, there are two examples that show exactly what I was asking for (big thanks for responding so quickly), however they fail when applying it to my real data. Here is the example data with just another element added to show what happens:
df = data.frame('sample' = c('cond1','cond1','cond1','cond2','cond2','cond2','cond3','cond3','cond3','cond4','cond4','cond4','cond1'),
'class' = c('class1','class2','class3','class1','class2','class3','class1','class2','class3','class1','class2','class3','class2'))
Essentially, I'd like to have only one label per class/condition combination.
I think what OP wanted was labels on the actual sections of the bars. We can do this using data.table to get the count percentages and the formatted percentages and then plot using ggplot:
library(data.table)
library(scales)
dt <- setDT(df)[,list(count = .N), by = .(sample,class)][,list(class = class, count = count,
percent_fmt = paste0(formatC(count*100/sum(count), digits = 2), "%"),
percent_num = count/sum(count)
), by = sample]
ggplot(data=dt, aes(x=sample, y= percent_num, fill=class)) +
geom_bar(position=position_fill(reverse=TRUE), stat = "identity", width=0.7) +
geom_text(aes(label = percent_fmt),position = position_stack(vjust = 0.5)) + coord_flip()
Edit: Another solution where you calculate the y-value of your label in the aggregate. This is so we don't have to rely on position_stack(vjust = 0.5):
dt <- setDT(df)[,list(count = .N), by = .(sample,class)][,list(class = class, count = count,
percent_fmt = paste0(formatC(count*100/sum(count), digits = 2), "%"),
percent_num = count/sum(count),
cum_pct = cumsum(count/sum(count)),
label_y = (cumsum(count/sum(count)) + cumsum(ifelse(is.na(shift(count/sum(count))),0,shift(count/sum(count))))) / 2
), by = sample]
ggplot(data=dt, aes(x=sample, y= percent_num, fill=class)) +
geom_bar(position=position_fill(reverse=TRUE), stat = "identity", width=0.7) +
geom_text(aes(label = percent_fmt, y = label_y)) + coord_flip()
Here is a solution where you first calculate the percentages using dplyr and then plot them:
UPDATED:
options(stringsAsFactors = F)
df = data.frame(sample = c('cond1','cond1','cond1','cond2','cond2','cond2','cond3','cond3','cond3','cond4','cond4','cond4'),
class = c('class1','class2','class3','class1','class2','class3','class1','class2','class3','class1','class2','class3'))
library(dplyr)
library(scales)
df%>%
# count how often each class occurs in each sample.
count(sample, class)%>%
group_by(sample)%>%
mutate(pct = n / sum(n))%>%
ggplot(aes(x = sample, y = pct, fill = class)) +
coord_flip() +
geom_col(width=0.7)+
geom_text(aes(label = paste0(round(pct * 100), '%')),
position = position_stack(vjust = 0.5))
Use scales
library(scales)
ggplot(data=df, aes(x=sample, fill=class)) +
coord_flip() +
geom_bar(position=position_fill(reverse=TRUE), width=0.7) +
scale_y_continuous(labels =percent_format())

ggplot, facet, piechart: placing text in the middle of pie chart slices

I'm trying to produce a facetted pie-chart with ggplot and facing problems with placing text in the middle of each slice:
dat = read.table(text = "Channel Volume Cnt
AGENT high 8344
AGENT medium 5448
AGENT low 23823
KIOSK high 19275
KIOSK medium 13554
KIOSK low 38293", header=TRUE)
vis = ggplot(data=dat, aes(x=factor(1), y=Cnt, fill=Volume)) +
geom_bar(stat="identity", position="fill") +
coord_polar(theta="y") +
facet_grid(Channel~.) +
geom_text(aes(x=factor(1), y=Cnt, label=Cnt, ymax=Cnt),
position=position_fill(width=1))
The output:
What parameters of geom_text should be adjusted in order to place numerical labels in the middle of piechart slices?
Related question is Pie plot getting its text on top of each other but it doesn't handle case with facet.
UPDATE: following Paul Hiemstra advice and approach in the question above I changed code as follows:
---> pie_text = dat$Cnt/2 + c(0,cumsum(dat$Cnt)[-length(dat$Cnt)])
vis = ggplot(data=dat, aes(x=factor(1), y=Cnt, fill=Volume)) +
geom_bar(stat="identity", position="fill") +
coord_polar(theta="y") +
facet_grid(Channel~.) +
geom_text(aes(x=factor(1),
---> y=pie_text,
label=Cnt, ymax=Cnt), position=position_fill(width=1))
As I expected tweaking text coordiantes is absolute but it needs be within facet data:
NEW ANSWER: With the introduction of ggplot2 v2.2.0, position_stack() can be used to position the labels without the need to calculate a position variable first. The following code will give you the same result as the old answer:
ggplot(data = dat, aes(x = "", y = Cnt, fill = Volume)) +
geom_bar(stat = "identity") +
geom_text(aes(label = Cnt), position = position_stack(vjust = 0.5)) +
coord_polar(theta = "y") +
facet_grid(Channel ~ ., scales = "free")
To remove "hollow" center, adapt the code to:
ggplot(data = dat, aes(x = 0, y = Cnt, fill = Volume)) +
geom_bar(stat = "identity") +
geom_text(aes(label = Cnt), position = position_stack(vjust = 0.5)) +
scale_x_continuous(expand = c(0,0)) +
coord_polar(theta = "y") +
facet_grid(Channel ~ ., scales = "free")
OLD ANSWER: The solution to this problem is creating a position variable, which can be done quite easily with base R or with the data.table, plyr or dplyr packages:
Step 1: Creating the position variable for each Channel
# with base R
dat$pos <- with(dat, ave(Cnt, Channel, FUN = function(x) cumsum(x) - 0.5*x))
# with the data.table package
library(data.table)
setDT(dat)
dat <- dat[, pos:=cumsum(Cnt)-0.5*Cnt, by="Channel"]
# with the plyr package
library(plyr)
dat <- ddply(dat, .(Channel), transform, pos=cumsum(Cnt)-0.5*Cnt)
# with the dplyr package
library(dplyr)
dat <- dat %>% group_by(Channel) %>% mutate(pos=cumsum(Cnt)-0.5*Cnt)
Step 2: Creating the facetted plot
library(ggplot2)
ggplot(data = dat) +
geom_bar(aes(x = "", y = Cnt, fill = Volume), stat = "identity") +
geom_text(aes(x = "", y = pos, label = Cnt)) +
coord_polar(theta = "y") +
facet_grid(Channel ~ ., scales = "free")
The result:
I would like to speak out against the conventional way of making pies in ggplot2, which is to draw a stacked barplot in polar coordinates. While I appreciate the mathematical elegance of that approach, it does cause all sorts of headaches when the plot doesn't look quite the way it's supposed to. In particular, precisely adjusting the size of the pie can be difficult. (If you don't know what I mean, try to make a pie chart that extends all the way to the edge of the plot panel.)
I prefer drawing pies in a normal cartesian coordinate system, using geom_arc_bar() from ggforce. It requires a little bit of extra work on the front end, because we have to calculate angles ourselves, but that's easy and the level of control we get as a result is more than worth it.
I've used this approach in previous answers here and here.
The data (from the question):
dat = read.table(text = "Channel Volume Cnt
AGENT high 8344
AGENT medium 5448
AGENT low 23823
KIOSK high 19275
KIOSK medium 13554
KIOSK low 38293", header=TRUE)
The pie-drawing code:
library(ggplot2)
library(ggforce)
library(dplyr)
# calculate the start and end angles for each pie
dat_pies <- left_join(dat,
dat %>%
group_by(Channel) %>%
summarize(Cnt_total = sum(Cnt))) %>%
group_by(Channel) %>%
mutate(end_angle = 2*pi*cumsum(Cnt)/Cnt_total, # ending angle for each pie slice
start_angle = lag(end_angle, default = 0), # starting angle for each pie slice
mid_angle = 0.5*(start_angle + end_angle)) # middle of each pie slice, for the text label
rpie = 1 # pie radius
rlabel = 0.6 * rpie # radius of the labels; a number slightly larger than 0.5 seems to work better,
# but 0.5 would place it exactly in the middle as the question asks for.
# draw the pies
ggplot(dat_pies) +
geom_arc_bar(aes(x0 = 0, y0 = 0, r0 = 0, r = rpie,
start = start_angle, end = end_angle, fill = Volume)) +
geom_text(aes(x = rlabel*sin(mid_angle), y = rlabel*cos(mid_angle), label = Cnt),
hjust = 0.5, vjust = 0.5) +
coord_fixed() +
scale_x_continuous(limits = c(-1, 1), name = "", breaks = NULL, labels = NULL) +
scale_y_continuous(limits = c(-1, 1), name = "", breaks = NULL, labels = NULL) +
facet_grid(Channel~.)
To show why I think this this approach is so much more powerful than the conventional (coord_polar()) approach, let's say we want the labels on the outside of the pie rather than inside. This creates a couple of problems, such as we will have to adjust hjust and vjust depending on the side of the pie a label falls, and also we will have to make the
plot panel wider than high to make space for the labels on the side without generating excessive space above and below. Solving these problems in the polar coordinate approach is not fun, but it's trivial in the cartesian coordinates:
# generate hjust and vjust settings depending on the quadrant into which each
# label falls
dat_pies <- mutate(dat_pies,
hjust = ifelse(mid_angle>pi, 1, 0),
vjust = ifelse(mid_angle<pi/2 | mid_angle>3*pi/2, 0, 1))
rlabel = 1.05 * rpie # now we place labels outside of the pies
ggplot(dat_pies) +
geom_arc_bar(aes(x0 = 0, y0 = 0, r0 = 0, r = rpie,
start = start_angle, end = end_angle, fill = Volume)) +
geom_text(aes(x = rlabel*sin(mid_angle), y = rlabel*cos(mid_angle), label = Cnt,
hjust = hjust, vjust = vjust)) +
coord_fixed() +
scale_x_continuous(limits = c(-1.5, 1.4), name = "", breaks = NULL, labels = NULL) +
scale_y_continuous(limits = c(-1, 1), name = "", breaks = NULL, labels = NULL) +
facet_grid(Channel~.)
To tweak the position of the label text relative to the coordinate, you can use the vjust and hjust arguments of geom_text. This will determine the position of all labels simultaneously, so this might not be what you need.
Alternatively, you could tweak the coordinate of the label. Define a new data.frame where you average the Cnt coordinate (label_x[i] = Cnt[i+1] + Cnt[i]) to position the label in the center of that particular pie. Just pass this new data.frame to geom_text in replacement of the original data.frame.
In addition, piecharts have some visual interpretation flaws. In general I would not use them, especially where good alternatives exist, e.g. a dotplot:
ggplot(dat, aes(x = Cnt, y = Volume)) +
geom_point() +
facet_wrap(~ Channel, ncol = 1)
For example, from this plot it is obvious that Cnt is higher for Kiosk than for Agent, this information is lost in the piechart.
Following answer is partial, clunky and I won't accept it.
The hope is that it will solicit better solution.
text_KIOSK = dat$Cnt
text_AGENT = dat$Cnt
text_KIOSK[dat$Channel=='AGENT'] = 0
text_AGENT[dat$Channel=='KIOSK'] = 0
text_KIOSK = text_KIOSK/1.7 + c(0,cumsum(text_KIOSK)[-length(dat$Cnt)])
text_AGENT = text_AGENT/1.7 + c(0,cumsum(text_AGENT)[-length(dat$Cnt)])
text_KIOSK[dat$Channel=='AGENT'] = 0
text_AGENT[dat$Channel=='KIOSK'] = 0
pie_text = text_KIOSK + text_AGENT
vis = ggplot(data=dat, aes(x=factor(1), y=Cnt, fill=Volume)) +
geom_bar(stat="identity", position=position_fill(width=1)) +
coord_polar(theta="y") +
facet_grid(Channel~.) +
geom_text(aes(y=pie_text, label=format(Cnt,format="d",big.mark=','), ymax=Inf), position=position_fill(width=1))
It produces following chart:
As you noticed I can't move labels for green (low).

Resources