Finding the random x-values used by geom_jitter - r

I want to be able to select observations from a box plot with jittered points over top. I have been somewhat successful by having the point click find the category, look at the y-value and select the observation. The following code shows my progress so far:
# ------------------------------Load Libraries---------------------------------
library(shiny)
library(ggplot2)
library(dplyr)
# -------------------------Print Boxplot to Screen-----------------------------
ui <- fluidPage(plotOutput('irisPlot', click = 'irisClick'))
server <- function(input, output){
# --------------------------Store Clicked Points-------------------------------
clicked <- reactiveValues(rows = rep(TRUE,nrow(iris)))
# ---------------------------Modify the Dataset--------------------------------
IRIS <- reactive({iris %>% mutate(index = clicked$rows)})
# ---------------------Select Points Through Plot Click------------------------
observeEvent(
input$irisClick,{
nS <- iris %>% mutate(selected = rep(FALSE,nrow(iris)))
lvls <- levels(iris$Species)
plant <- lvls[round(input$irisClick$x)]
pxl <- which(
sqrt((iris$Sepal.Width-input$irisClick$y)^2) %in%
min(sqrt((iris$Sepal.Width-input$irisClick$y)^2))
)
point <- iris[pxl,'Sepal.Width']
nS[nS$Species == plant & nS$Sepal.Width %in% point,'selected'] <- TRUE
clicked$rows <- xor(clicked$rows, nS$selected)
})
# --------------------------Generate the Boxplot-------------------------------
output$irisPlot <- renderPlot({
set.seed(1)
ggplot(IRIS(), aes(x = Species, y = Sepal.Width))+
geom_boxplot(na.rm = TRUE,outlier.shape = NA)+
geom_jitter(
na.rm = TRUE,
width = .8,
aes(shape = index, size = index, colour = index)
)+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
panel.border = element_rect(colour = 'black', fill = NA),
legend.position = "none"
)+
scale_shape_manual(values = c('FALSE'= 1,'TRUE'= 19))+
scale_size_manual(values = c('FALSE' = 4, 'TRUE'= 2))+
scale_colour_manual(values = c('TRUE' = "#428BCA", 'FALSE' = '#FAA634'))
})
}
shinyApp(ui, server)
As I said the code mostly works but it can be inconsistent. Sometimes it can't find a point, other times it selects a large group of points or selects a point on the opposite side of the box plot. I figure the best way to solve this is to have both an x and y coordinate to select the point however, since the x values are randomly generated I need geom_jitter() to tell me what x-values it is using for a given plot but I have not been able to find any way to access this. Any help finding this information would be greatly appreciated.

My thanks to aosmith for telling me about the layer_data() function and to Peter Ellis for suggesting that I use geom_point() instead of geom_jitter() both comments were instrumental in helping me solve my problem.
What I had to do was create a new plot object in the global environment to jitter the points. Then use the layer_data() function to return the newly created x-values.
Finally, using those x-values, I created a new plot object and layered the points over top using geom_point(). Here is the completed code for anyone interested.
# ------------------------------Load Libraries---------------------------------
library(shiny)
library(ggplot2)
library(dplyr)
# ----------------------------Generate X Coords--------------------------------
set.seed(1)
g1 <- ggplot(iris, aes(x = Species, y = Sepal.Width))+
geom_boxplot(na.rm = TRUE,outlier.shape = NA)+
geom_jitter(na.rm = TRUE,width = .8)
xPoints <- layer_data(g1, i = 2)$x
# -------------------------Print Boxplot to Screen-----------------------------
ui <- fluidPage(
plotOutput('irisPlot', click = 'irisClick')
)
server <- function(input, output){
# --------------------------Store Clicked Points-------------------------------
clicked <- reactiveValues(rows = rep(TRUE,nrow(iris)))
rand <- reactiveValues(x = rep(NA,nrow(iris)))
# ---------------------------Modify the Dataset--------------------------------
IRIS <- reactive({iris %>% mutate(index = clicked$rows)})
# ---------------------Select Points Through Plot Click------------------------
observeEvent(
input$irisClick,{
nS <-data.frame( iris, x = xPoints)
point <- nearPoints(
df = nS,
coordinfo = input$irisClick,
xvar = 'x',
yvar = 'Sepal.Width',
allRows = TRUE
)
clicked$rows <- xor(clicked$rows, point$selected_)
})
# --------------------------Generate the Boxplot-------------------------------
output$irisPlot <- renderPlot({
ggplot(IRIS(), aes(x = Species, y = Sepal.Width))+
geom_boxplot(na.rm = TRUE,outlier.shape = NA)+
geom_point(
aes(
x = xPoints,
y = iris$Sepal.Width,
shape = index,
size = index,
colour = index
),
inherit.aes = FALSE
)+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
panel.border = element_rect(colour = 'black', fill = NA),
legend.position = "none"
)+
scale_shape_manual(values = c('FALSE'= 1,'TRUE'= 19))+
scale_size_manual(values = c('FALSE' = 4, 'TRUE'= 2))+
scale_colour_manual(values = c('TRUE' = "#428BCA", 'FALSE' = '#FAA634'))
})
output$x <- renderPlot({
})
}
shinyApp(ui, server)

Just for the benefit of people like me who might be googling this problem I solved it very easily using Peter Ellis's suggestion of jittering the points myself using jitter().
I'm making it into an answer because I thought it should be more visible, I nearly missed it when I was looking at this page.

Related

How to control stripe transparency using ggforestplot/geom_stripes?

I was hoping to have some help in modifying the stripe transparency/shading color in the ggforestplot package. Please see the image below ("lighten" indicates the stripes I need to lighten). What is the best way of modifying the following code to do that?
Thank you so much for any pointers!
# Load and attach the packages
library(ggforestplot)
library (ggplot2)
library(tidyverse)
# Reproducible dataset
df <- ggforestplot::df_linear_associations %>% filter( trait == "BMI", dplyr::row_number() <= 30)
# Draw a forestplot
ggforestplot::forestplot(
df = df,
name = name,
estimate = beta,
se = se)+
geom_point(shape = 15, size = 5) +
geom_stripes( odd ="#00000000", even = "#00000000") +
theme(legend.position="none",
panel.background = element_rect(fill = "transparent",colour = NA),
plot.background = element_rect(fill = "transparent",colour = NA))
The issue is that ggforestplot::forestplot already adds a geom_stripes layer with hard-coded default values for odd and even. Adding another geom_stripes will have no effect on this underlying stripes layer and will simply result in overplotting of the points, vertical lines, ... . To adjust the transparency you could (and TBMK need to) hack the internals:
# Load and attach the packages
library(ggforestplot)
library(ggplot2)
library(tidyverse)
# Reproducible dataset
df <- ggforestplot::df_linear_associations %>% filter( trait == "BMI", dplyr::row_number() <= 30)
# Draw a forestplot
p <- ggforestplot::forestplot(
df = df,
name = name,
estimate = beta,
se = se) +
geom_point(shape = 15, size = 5) +
theme(legend.position="none",
panel.background = element_rect(fill = "transparent"))
p$layers[[1]]$aes_params$odd <- "#00000000"
p

R Shiny: create non-reactive background in plotOutput

I'm trying to build a shiny app where I can change a plot interactively. I want the plot to change within miliseconds and as the changes only include the addition of some points this is actually possible.
The reproducible example contains an abstraction of this idea. The first example plots a scatterplot and I can interactively change the number of points. This happens basically immediately. I'll refer to this part of the plot as the "reactive layer".
library(shiny)
ui <- fluidPage(
sliderInput(inputId = "slider_input", label = "Reactive values:", min = 1, max = 100, value = 10),
plotOutput(outputId = "plotx")
)
quick_server <- function(input, output, session){
output$plotx <- renderPlot({
# reactive layer
plot(
x = sample(x = -4:4, size = input$slider_input, replace = T),
y = sample(x = -4:4, size = input$slider_input, replace = T)
)
})
}
shinyApp(ui = ui, server = quick_server)
The problem is that the plot that I want to change interactively always includes a "slow non reactive layer" of many datapoints that are unreactive and never change. Due to the size of this data set and renderPlot() always replotting it the speed with which I interactively change the "reactive layer" decreases dramatically.
library(shiny)
ui <- fluidPage(
sliderInput(inputId = "slider_input", label = "Reactive values:", min = 1, max = 100, value = 10),
plotOutput(outputId = "plotx")
)
slow_server <- function(input, output, session){
base_data <- reactiveVal(value = data.frame(x = rnorm(n = 200000), y = rnorm(n = 200000)))
output$plotx <- renderPlot({
# slow non reactive layer
plot(x = base_data()$x, y = base_data()$y)
# reactive layer
points(
x = sample(x = -4:4, size = input$slider_input, replace = T),
y = sample(x = -4:4, size = input$slider_input, replace = T),
col = "red",
cex = 5,
pch = 19
)
})
}
shinyApp(ui = ui, server = slow_server)
As the base data and the resulting plot (here a cloud of points) never changes it is quite annyoing that renderPlot() always replots everything (both "layers"). Is there a way to "isolate" the plot created from the non reactive data set? Such that the layer that is not reactive is not replotted?
I have already tried to work with ggplot2 and create a steady layer
big_data_frame <- data.frame(x = rnorm(n = 200000), y = rnorm(n = 200000))
steady_layer <- reactiveVal(value = geom_point(data = big_data_frame, mapping = aes(x = x, y = y))
And then created the plot like this
output$plotx <- renderPlot({
small_df <-
data.frame(
x = sample(x = -4:4, size = input$slider_input, replace = T),
y = sample(x = -4:4, size = input$slider_input, replace = T)
)
ggplot() +
steady_layer() +
geom_point(data = small_df, mapping = aes(x = x, y = y)
})
But this doesn't help as it is the replotting process that takes time not creating the ggplot layer itself.
Although I can imagine that the solution might be to create a .png of the big plot and use it as the background of the HTML and CSS for the output$plotx by making it a reactive UI I have not managed to manipulate the HTML and CSS successfully.
Any help is appreciated. Thanks so much in advance!
You need to understand how renderPlot works. It uses the R png function first to create a png and then sends it to the client browser. When the data of the plot changes, this png is recreated. So, replot part of the png is not possible. So adding points to an existing plot will always use the time of slow points + new points, so under the current shiny mechanism, it is not possible not to recalculate these slow points. A possible option is to use plotly with proxy . It is made of HTML and Javascript, so yes, you can do it with partial updating. See the link for details, not repeating here. For my personal experience, it is fast not so fast as milliseconds-level as you want.
So here I have a smart trick for you: why do we update on the same plot? We can use one plot as background and it is slow, but we only render it one time, and we will never touch it again. Then we update another plot that has only a few points and we stack this plot on top of the slow plot.
Here is how:
add some CSS tricks to do the stacking
rendering the slow plot
rendering the the quick plot with transparency
library(shiny)
library(ggplot2)
ui <- fluidPage(
sliderInput(inputId = "slider_input", label = "Reactive values:", min = 1, max = 100, value = 10),
div(
class = "large-plot",
plotOutput(outputId = "plot_bg"),
plotOutput(outputId = "plotx")
),
tags$style(
"
.large-plot {
position: relative;
}
#plot_bg {
position: absolute;
}
#plotx {
position: absolute;
}
"
)
)
slow_server <- function(input, output, session){
base_data <- reactiveVal(value = data.frame(x = rnorm(n = 200000), y = rnorm(n = 200000)))
output$plot_bg <- renderPlot({
ggplot(base_data()) +
geom_point(aes(x,y)) +
scale_x_continuous(breaks = -4:4) +
scale_y_continuous(breaks = -4:4) +
xlim(-5, 5) +
ylim(-5, 5)
})
output$plotx <- renderPlot({
data.frame(
x = sample(x = -4:4, size = input$slider_input, replace = T),
y = sample(x = -4:4, size = input$slider_input, replace = T)
) %>%
ggplot() +
geom_point(aes(x,y), color = "red", size = 3) +
scale_x_continuous(breaks = -4:4) +
scale_y_continuous(breaks = -4:4) +
theme(
panel.background = element_rect(fill = "transparent"),
plot.background = element_rect(fill = "transparent", color = NA),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.background = element_rect(fill = "transparent"),
legend.box.background = element_rect(fill = "transparent")
)+
xlim(-5, 5) +
ylim(-5, 5)
}, bg="transparent")
}
shinyApp(ui = ui, server = slow_server)

How to override an aes color (controlled by a variable) based on a condition?

I'm trying to graph multiple nonlinear least squares regression in r in different colors based on the value of a variable.
However, I also display the equation of the last one, and I would like the color in the nonlinear regression corresponding to the equation to be black as well.
What I've tried is shown in the geom_smooth() layer - I tried to include an ifelse() statement, but this doesn't work because of reasons described here: Different between colour argument and aes colour in ggplot2?
test <- function() {
require(ggplot2)
set.seed(1);
master <- data.frame(matrix(NA_real_, nrow = 0, ncol = 3))
for( i in 1:5 ) {
df <- data.frame(matrix(NA_real_, nrow = 50, ncol = 3))
colnames(df) <- c("xdata", "ydata", "test")
df$xdata = as.numeric(sample(1:100, size = nrow(df), replace = FALSE))
df$ydata = as.numeric(sample(1:3, size = nrow(df), prob=c(.60, .25, .15), replace = TRUE))
# browser()
df$test = i
master <- rbind(master, df)
}
df <- master
last <- 5
# based on https://stackoverflow.com/questions/18305852/power-regression-in-r-similar-to-excel
power_eqn = function(df, start = list(a=300,b=1)) {
m = nls(as.numeric(reorder(xdata,-ydata)) ~ a*ydata^b, start = start, data = df)
# View(summary(m))
# browser()
# eq <- substitute(italic(hat(y)) == a ~italic(x)^b*","~~italic(r)^2~"="~r2*","~~p~"="~italic(pvalue),
eq <- substitute(italic(y) == a ~italic(x)^b*","~~italic('se')~"="~se*","~~italic(p)~"="~pvalue,
list(a = format(coef(m)[1], digits = 6), # a
b = format(coef(m)[2], digits = 6), # b
# r2 = format(summary(m)$r.squared, digits = 3),
se = format(summary(m)$parameters[2,'Std. Error'], digits = 6), # standard error
pvalue = format(summary(m)$coefficients[2,'Pr(>|t|)'], digits=6) )) # p value (based on t statistic)
as.character(as.expression(eq))
}
plot1 <- ggplot(df, aes(x = as.numeric(reorder(xdata,-ydata)), y = ydata ) ) +
geom_point(color="black", shape=1 ) +
# PROBLEM LINE
stat_smooth(aes(color=ifelse(test==5, "black", test)), method = 'nls', formula = 'y~a*x^b', method.args = list(start= c(a =1,b=1)),se=FALSE, fullrange=TRUE) +
geom_text(x = quantile(df$xdata)[4], y = max(df$ydata), label = power_eqn(df), parse = TRUE, size=4, color="black") + # make bigger? add border around?
theme(legend.position = "none", axis.ticks.x = element_blank() ) + #, axis.title.x = "family number", axis.title.y = "number of languages" ) # axis.text.x = element_blank(),
labs( x = "xdata", y = "ydata", title="test" )
plot1
}
test()
This is the graph I got.
I would like the line corresponding to the points and equation to be black as well. Does anyone know how to do this?
I do not want to use a scale_fill_manual, etc., because my real data would have many, many more lines - unless the scale_fill_manual/etc. can be randomly generated.
You could use scale_color_manual using a custom created palette where your level of interest (in your example where test equals 5) is set to black. Below I use palettes from RColorBrewer, extend them if necessary to the number of levels needed and sets the last color to black.
library(RColorBrewer) # provides several great palettes
createPalette <- function(n, colors = 'Greens') {
max_colors <- brewer.pal.info[colors, ]$maxcolors # Get maximum colors in palette
palette <- brewer.pal(min(max_colors, n), colors) # Get RColorBrewer palette
if (n > max_colors) {
palette <- colorRampPalette(palette)(n) # make it longer i n > max_colros
}
# assume that n-th color should be black
palette[n] <- "#000000"
# return palette
palette[1:n]
}
# create a palette with 5 levels using the Spectral palette
# change from 5 to the needed number of levels in your real data.
mypalette <- createPalette(5, 'Spectral') # palettes from RColorBrewer
We can then use mypalette with scale_color_manual(values=mypalette) to color points and lines according to the test variable.
Please note that I have updated geom_point and stat_smooth to so that they use aes(color=as.factor(test)). I have also changed the call to power_eqn to only use data points where df$test==5. The black points, lines and equation should now be based on the same data.
plot1 <- ggplot(df, aes(x = as.numeric(reorder(xdata,-ydata)), y = ydata )) +
geom_point(aes(color=as.factor(test)), shape=1) +
stat_smooth(aes(color=as.factor(test)), method = 'nls', formula = 'y~a*x^b', method.args = list(start= c(a =1,b=1)),se=FALSE, fullrange=TRUE) +
geom_text(x = quantile(df$xdata)[4], y = max(df$ydata), label = power_eqn(df[df$test == 5,]), parse = TRUE, size=4, color="black") +
theme(legend.position = "none", axis.ticks.x = element_blank() ) +
labs( x = "xdata", y = "ydata", title="test" ) +
scale_color_manual(values = mypalette)
plot1
See resulting figure here (not reputation enough to include them)
I hope you find my answer useful.

Cannot control Legend in Shiny ggplot

I have two problems. First I want to be able to format the numbers in the legend. Outside of Shiny I can do this fine using format(val, big.mark = ",") in the geom_point() call. But when I get to Shiny it won't work. Also as you can see there are only two points in the legend. I want more points. If is on for each value that is ok but I also want to be able to control it. I have tried scale_fill_manual(values = factor(val)) and the same with scale_color_manual() but neither seem to work either in Shiny or just in R.
My shiny server code is below:
shinyServer(function(input, output) {
#spend <- spend[order(spend$Tier.1.Contract.Category),]
spend <- read.csv("data/Construction_data.csv")
centpop <- aggregate(cbind(Action.Obligation, Action_Absolute_Value) ~ Principal.Place.of.Performance.Country.Name + FY, spend, sum)
#centmap <- map("world", region = toupper(centpop$Principal.Place.of.Performance.Country.Name))
mapdata <- map_data("world2", region = unique(centpop$Principal.Place.of.Performance.Country.Name), na.rm = TRUE)
mapdata$countries <- toupper(mapdata$region)
centmerge <- merge(x = centpop, y = mapdata, by.x = "Principal.Place.of.Performance.Country.Name", by.y = "countries", na.rm +TRUE)
centmerge <- centmerge[order(centmerge$Principal.Place.of.Performance.Country.Name,centmerge$order),]
centavg <- aggregate(cbind(Action_Absolute_Value, Action.Obligation, lat, long, group)~ Principal.Place.of.Performance.Country.Name + FY, centmerge, mean)
output$GMap <- renderPlot({
options(scipen = 999)
centavg <- centavg[centavg$FY == input$FY,]
ggplot(data = centmerge, aes_string(x="long", y="lat", group = "group")) +
geom_polygon(stat = "identity", fill = "#CC6600", color = "black", alpha =.7) +
geom_point(data = centavg, aes_string(x = "long", y = "lat", group = "Principal.Place.of.Performance.Country.Name", size = input$SpendType), stat = "identity", color = "#990000")+
scale_size_continuous(range = c(3,8)) +
ggtitle("Contract Spending in the CENTCOM AOR") +
coord_map("polyconic")+
theme(axis.text.y = element_blank(), axis.text.x = element_blank(), axis.title.x=element_blank(),
axis.title.y=element_blank(),axis.ticks = element_blank(), panel.background = element_blank(), plot.title=element_text(face="bold", size=20))+
labs(size = "Amount ($M)")
})
and my ui is the following:
library(shiny)
spend <- read.csv("data/CENTCOMdata.csv")
shinyUI(fluidPage(
titlePanel("Contract Spend"),
sidebarLayout(
sidebarPanel(
selectInput("SpendType", label = "Select Action Obligation Type",
choices = colnames(spend)[11:12]
),
selectInput("FY", label = "Fiscal Year",
choices = unique(spend$FY),
selected = min(unique(spend$FY))
)
),
mainPanel(plotOutput("GMap")
)
)
))
it produces the chart seen below:
Thanks
UPDATE: Here is a sample of my data. It won't be enough to reproduce the whole thing but, will give a sense of the fields and data types.
Principal.Place.of.Performance.Country.Name = c("BAHRAIN","BAHRAIN","BAHRAIN","EGYPT","EGYPT","EGYPT","IRAN","IRAN","IRAQ","IRAQ","IRAQ","JORDAN","JORDAN","JORDAN","KUWAIT","KUWAIT")
FY = c(2013,2014,2015,2013,2014,2015,2013,2014,2013,2014,2015,2013,2014,2015,2013,2014)
Action_Absolute_Value = c(771321456.1,479582869.8,514214922.1,97286075.83,69121513.24,23071035.64,1382620,24423.76,2022560127,1999064521,120073800.5,266136924.4,380041091.5,54896224.32,3189939809,1982808077)
Action.Obligation = c(758755260,440975903.7,507433161.8,85876698.15,55720022.7,21882002.7,1382620,-24423.76,1146078095,647903735.5,77122340.53,244683999.1,368479216.8,49013205.78,2970687191,1752387003)
lat = c(26.1272188822428,26.1272188822428,26.1272188822428,29.0995075001436,29.0995075001436,29.0995075001436,32.9537412255211,32.9537412255211,32.83929874051,32.83929874051,32.83929874051,31.2209393637521,31.2209393637521,31.2209393637521,29.4381094614665,29.4381094614665)
long = c(50.5078544616699,50.5078544616699,50.5078544616699,31.8482420528636,31.8482420528636,31.8482420528636,53.2439389524207,53.2439389524207,45.3801684841033,45.3801684841033,45.3801684841033,36.4073101225353,36.4073101225353,36.4073101225353,47.735088857015,47.735088857015)
group = c(22,22,22,8.61176470588235,8.61176470588235,8.61176470588235,5.16814159292035,5.16814159292035,8.03225806451613,8.03225806451613,8.03225806451613,8,8,8,18.3333333333333,18.3333333333333)
data.frame(Principal.Place.of.Performance.Country.Name,FY,Action_Absolute_Value, Action.Obligation, lat, long, group)

R: Pie chart with percentage as labels using ggplot2

From a data frame I want to plot a pie chart for five categories with their percentages as labels in the same graph in order from highest to lowest, going clockwise.
My code is:
League<-c("A","B","A","C","D","E","A","E","D","A","D")
data<-data.frame(League) # I have more variables
p<-ggplot(data,aes(x="",fill=League))
p<-p+geom_bar(width=1)
p<-p+coord_polar(theta="y")
p<-p+geom_text(data,aes(y=cumsum(sort(table(data)))-0.5*sort(table(data)),label=paste(as.character(round(sort(table(data))/sum(table(data)),2)),rep("%",5),sep="")))
p
I use
cumsum(sort(table(data)))-0.5*sort(table(data))
to place the label in the corresponding portion and
label=paste(as.character(round(sort(table(data))/sum(table(data)),2)),rep("%",5),sep="")
for the labels which is the percentages.
I get the following output:
Error: ggplot2 doesn't know how to deal with data of class uneval
I've preserved most of your code. I found this pretty easy to debug by leaving out the coord_polar... easier to see what's going on as a bar graph.
The main thing was to reorder the factor from highest to lowest to get the plotting order correct, then just playing with the label positions to get them right. I also simplified your code for the labels (you don't need the as.character or the rep, and paste0 is a shortcut for sep = "".)
League<-c("A","B","A","C","D","E","A","E","D","A","D")
data<-data.frame(League) # I have more variables
data$League <- reorder(data$League, X = data$League, FUN = function(x) -length(x))
at <- nrow(data) - as.numeric(cumsum(sort(table(data)))-0.5*sort(table(data)))
label=paste0(round(sort(table(data))/sum(table(data)),2) * 100,"%")
p <- ggplot(data,aes(x="", fill = League,fill=League)) +
geom_bar(width = 1) +
coord_polar(theta="y") +
annotate(geom = "text", y = at, x = 1, label = label)
p
The at calculation is finding the centers of the wedges. (It's easier to think of them as the centers of bars in a stacked bar plot, just run the above plot without the coord_polar line to see.) The at calculation can be broken out as follows:
table(data) is the number of rows in each group, and sort(table(data)) puts them in the order they'll be plotted. Taking the cumsum() of that gives us the edges of each bar when stacked on top of each other, and multiplying by 0.5 gives us the half the heights of each bar in the stack (or half the widths of the wedges of the pie).
as.numeric() simply ensures we have a numeric vector rather than an object of class table.
Subtracting the half-widths from the cumulative heights gives the centers each bar when stacked up. But ggplot will stack the bars with the biggest on the bottom, whereas all our sort()ing puts the smallest first, so we need to do nrow - everything because what we've actually calculate are the label positions relative to the top of the bar, not the bottom. (And, with the original disaggregated data, nrow() is the total number of rows hence the total height of the bar.)
Preface: I did not make pie charts of my own free will.
Here's a modification of the ggpie function that includes percentages:
library(ggplot2)
library(dplyr)
#
# df$main should contain observations of interest
# df$condition can optionally be used to facet wrap
#
# labels should be a character vector of same length as group_by(df, main) or
# group_by(df, condition, main) if facet wrapping
#
pie_chart <- function(df, main, labels = NULL, condition = NULL) {
# convert the data into percentages. group by conditional variable if needed
df <- group_by_(df, .dots = c(condition, main)) %>%
summarize(counts = n()) %>%
mutate(perc = counts / sum(counts)) %>%
arrange(desc(perc)) %>%
mutate(label_pos = cumsum(perc) - perc / 2,
perc_text = paste0(round(perc * 100), "%"))
# reorder the category factor levels to order the legend
df[[main]] <- factor(df[[main]], levels = unique(df[[main]]))
# if labels haven't been specified, use what's already there
if (is.null(labels)) labels <- as.character(df[[main]])
p <- ggplot(data = df, aes_string(x = factor(1), y = "perc", fill = main)) +
# make stacked bar chart with black border
geom_bar(stat = "identity", color = "black", width = 1) +
# add the percents to the interior of the chart
geom_text(aes(x = 1.25, y = label_pos, label = perc_text), size = 4) +
# add the category labels to the chart
# increase x / play with label strings if labels aren't pretty
geom_text(aes(x = 1.82, y = label_pos, label = labels), size = 4) +
# convert to polar coordinates
coord_polar(theta = "y") +
# formatting
scale_y_continuous(breaks = NULL) +
scale_fill_discrete(name = "", labels = unique(labels)) +
theme(text = element_text(size = 22),
axis.ticks = element_blank(),
axis.text = element_blank(),
axis.title = element_blank())
# facet wrap if that's happening
if (!is.null(condition)) p <- p + facet_wrap(condition)
return(p)
}
Example:
# sample data
resps <- c("A", "A", "A", "F", "C", "C", "D", "D", "E")
cond <- c(rep("cat A", 5), rep("cat B", 4))
example <- data.frame(resps, cond)
Just like a typical ggplot call:
ex_labs <- c("alpha", "charlie", "delta", "echo", "foxtrot")
pie_chart(example, main = "resps", labels = ex_labs) +
labs(title = "unfacetted example")
ex_labs2 <- c("alpha", "charlie", "foxtrot", "delta", "charlie", "echo")
pie_chart(example, main = "resps", labels = ex_labs2, condition = "cond") +
labs(title = "facetted example")
It worked on all included function greatly inspired from here
ggpie <- function (data)
{
# prepare name
deparse( substitute(data) ) -> name ;
# prepare percents for legend
table( factor(data) ) -> tmp.count1
prop.table( tmp.count1 ) * 100 -> tmp.percent1 ;
paste( tmp.percent1, " %", sep = "" ) -> tmp.percent2 ;
as.vector(tmp.count1) -> tmp.count1 ;
# find breaks for legend
rev( tmp.count1 ) -> tmp.count2 ;
rev( cumsum( tmp.count2 ) - (tmp.count2 / 2) ) -> tmp.breaks1 ;
# prepare data
data.frame( vector1 = tmp.count1, names1 = names(tmp.percent1) ) -> tmp.df1 ;
# plot data
tmp.graph1 <- ggplot(tmp.df1, aes(x = 1, y = vector1, fill = names1 ) ) +
geom_bar(stat = "identity", color = "black" ) +
guides( fill = guide_legend(override.aes = list( colour = NA ) ) ) +
coord_polar( theta = "y" ) +
theme(axis.ticks = element_blank(),
axis.text.y = element_blank(),
axis.text.x = element_text( colour = "black"),
axis.title = element_blank(),
plot.title = element_text( hjust = 0.5, vjust = 0.5) ) +
scale_y_continuous( breaks = tmp.breaks1, labels = tmp.percent2 ) +
ggtitle( name ) +
scale_fill_grey( name = "") ;
return( tmp.graph1 )
} ;
An example :
sample( LETTERS[1:6], 200, replace = TRUE) -> vector1 ;
ggpie(vector1)
Output

Resources