ggalluvial ordering stratum - r

I've been trying to make some sankey diagrams with the ggalluvial package. I rather like it but I'm having problems controlling the order of the lodes. I'm using the alluvia format described at the start of the vignette.
Basically my diagram is showing subsets of level 2 and level 3 of one time point and how they move to another time point. The problem is that I can't for the life of me figure out how to force the order of the stratum as the diagram is unreadable without the order being correct. Here's my code:
library("ggalluvial")
library("ggplot2")
subsank_math = structure(list(`Winter Projection` = structure(c(2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 5L, 5L), .Label = c("Level 5", "Level 4", "Level 3",
"Level 2", "Level 1"), class = "factor"), subgroup = structure(c(1L,
2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L,
6L, 1L, 2L, 3L, 4L, 5L, 6L), .Label = c("Level 3 (+)", "Level 3",
"Level 3 (-)", "Level 2 (+)", "Level 2", "Level 2 (-)"), class = "factor"),
n = c(119, 102, 16, 10, 12, 1, 272, 544, 182, 151, 134, 22,
40, 239, 204, 326, 663, 225, 0, 15, 12, 44, 215, 219)), row.names = c(NA,
-24L), .Names = c("Winter Projection", "subgroup", "n"), class = "data.frame")
ggplot(subsank_math,
aes(weight = n,
axis1 = subgroup, axis2 = `Winter Projection`)) +
geom_alluvium(aes(fill = subgroup),
width = 0, knot.pos = 0, reverse = FALSE) +
geom_stratum(width = 1/8, reverse = FALSE) +
geom_text(stat = "stratum", label.strata = TRUE, reverse = FALSE)
Kinda strange that it orders the first axis according to the levels of the factor but not the second.

I've just started playing with the ggalluvial package myself, so I won't claim to understand how things work, but reformating the data frame into lode format (described near the end of the package's vignette) worked for me:
library(dplyr)
library(tidyr)
df.lode <- subsank_math %>%
mutate(subject = seq(1, n())) %>%
gather(x, level, -n, -subject) %>%
mutate(level = factor(level,
levels = c("Level 1", "Level 2 (-)", "Level 2",
"Level 2 (+)", "Level 3 (-)", "Level 3",
"Level 3 (+)", "Level 4")))
> head(df.lode)
n subject x level
1 119 1 Winter Projection Level 4
2 102 2 Winter Projection Level 4
3 16 3 Winter Projection Level 4
4 10 4 Winter Projection Level 4
5 12 5 Winter Projection Level 4
6 1 6 Winter Projection Level 4
ggplot(df.lode,
aes(x = x,
stratum = level,
alluvium = subject,
weight = n,
label = level)) +
geom_flow(aes(fill = level)) +
geom_stratum() +
geom_text(stat = "stratum") +
scale_fill_discrete(limits = levels(df$a1))

Related

How to get geom_area() to not be stacked

I would like to have the two areas in my area plot to not be stacked.
At the minute I am subtracting the blue area from the red area to get the area you see here. Although, because the blue points are greater than the red points, the whole thing falls apart so I am also using max(value, 0) to prevent negative values.
How can I get it so that my area chart allows the blue to simply sit on top of the red, so that I can just have the red as a straight line along the top.
I can't just add a box because this is the percentage chart, but I need the same for the actual values as well.
ggplot(progress_targets_total, aes(x=wave, y=perc, fill=group)) + geom_area()
Here is my code. Very basic. I assume there is some aesthetic term I can just throw into the geom_area() part but the google is not helping today.
Cheers
Some data
wave value perc group
1 1 893 95.101171 Actual
2 1 46 4.898829 Target
3 2 796 90.351873 Actual
4 2 85 9.648127 Target
5 3 694 85.679012 Actual
6 3 116 14.320988 Target
structure(list(wave = c(1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L,
6L, 6L, 7L, 7L, 8L, 8L, 9L, 9L, 10L, 10L, 11L, 11L, 12L, 12L),
value = c(893, 46, 796, 85, 694, 116, 858, 86, 812, 61, 643,
157, 746, 190, 416, 454, 10, 813, 0, 914, 0, 866, 0, 794),
perc = c(95.1011714589989, 4.89882854100107, 90.3518728717367,
9.64812712826334, 85.679012345679, 14.320987654321, 90.8898305084746,
9.11016949152542, 93.0126002290951, 6.98739977090493, 80.375,
19.625, 79.7008547008547, 20.2991452991453, 47.816091954023,
52.183908045977, 1.21506682867558, 98.7849331713244, 0, 100,
0, 100, 0, 100), group = structure(c(2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L), .Label = c("Target", "Actual"), class = "factor")), row.names = c(NA,
-24L), class = "data.frame")
You could use position = 'identity' to unstack the areas:
library(ggplot2)
ggplot(progress_targets_total, aes(x=wave, y=value, fill= group)) +
geom_area(alpha = 0.4, position = 'identity')
Created on 2022-10-19 with reprex v2.0.2

R ggplot2 & gganimate: animation changes at end

I am trying to create this figure that animates over time using the gganimate library, going from the 'baseline' timepoint to the 'late' timepoint'. However for some reason, the image changes between frames 22-24 and again between 42-44. It throws off the visualization. But I am not sure how to fix it. Many thanks!
library(ggplot2)
library(tweenr)
library(gganimate)
library(treemapify)
set.seed(1)
colors <- c("turquoise", "gold", "yellowgreen", "dodgerblue", "firebrick", "orchid4",
"grey74", "forestgreen", "deeppink2", "grey0", "slateblue", "sienna2",
"khaki2", "steelblue", "darksalmon", "darksalmon")
tweened <- tween_states(list(PID50baseline, PID50late, PID50baseline),
tweenlength = 8, statelength = 8,
ease = 'cubic-in-out', nframes = 50)
animated_plot <- ggplot(tweened,
aes(area = Number, fill = Cluster.Name,
subgroup=Type, frame = .frame)) +
geom_treemap(fixed = T) +
geom_treemap_subgroup_border(fixed = T) +
geom_treemap_subgroup_text(place = "centre", grow = T, alpha = 0.5,
colour = "black", fontface = "italic",
min.size = 0,fixed = T) +
scale_fill_manual(values = colors) +
theme(legend.position = "bottom")
animation::ani.options(interval = 1/10)
gganimate(animated_plot, "animated_treemap_PID50.gif", title_frame = T,
ani.width = 200, ani.height = 200)
The data I used for this:
dput(PID50baseline)
structure(list(Cluster.Name = structure(c(13L, 14L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 15L, 15L), .Label = c("Cluster
13", "Cluster 14", "Cluster 17", "Cluster 18", "Cluster 19", "Cluster 20",
"Cluster 27", "Cluster 35", "Cluster 36", "Cluster 40", "Cluster 41",
"Cluster 42", "Cluster 5", "Cluster 6", "Non-clonal"), class = "factor"),
Number = c(5L, 9L, 0L, 0L, 1L, 2L, 0L, 2L, 3L, 2L, 1L, 0L,
0L, 0L, 1L, 28L), Type = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L), .Label = c("Defective",
"Intact"), class = "factor")), .Names = c("Cluster.Name",
"Number", "Type"), class = "data.frame", row.names = c(NA, -16L))
dput(PID50late)
structure(list(Cluster.Name = structure(c(13L, 14L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 15L, 15L), .Label = c("Cluster 13",
"Cluster 14", "Cluster 17", "Cluster 18", "Cluster 19", "Cluster 20",
"Cluster 27", "Cluster 35", "Cluster 36", "Cluster 40", "Cluster 41",
"Cluster 42", "Cluster 5", "Cluster 6", "Non-clonal"), class = "factor"),
Number = c(2L, 10L, 2L, 2L, 1L, 0L, 5L, 0L, 5L, 0L, 3L, 3L,
2L, 2L, 18L, 59L), Type = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L), .Label = c("Defective",
"Intact"), class = "factor")), .Names = c("Cluster.Name",
"Number", "Type"), class = "data.frame", row.names = c(NA, -16L))
I believe treemapify omits areas with a size of 0. This could be the reason for your problem. In other words, replacing 0 with a small positive value greater than 0 (and using 16 distinct colors) gives you something like this:
tweened$Number[tweened$Number==0] <- 1e-10
colors <- unname(randomcoloR::distinctColorPalette(nlevels(tweened$Cluster.Name)))

ggplot heatmap gradient colours for different categories

I am trying to adapt a worked example from ggplot2 heatmaps: using different gradients for categories
However the values that I want to plot are discrete (I think). I have already standardised my values (in a stored procedure) into percent values between 0 and 1. If the percent_value is 0 then I was to show white. If the percent value is 1 then I want to show the full colour. The colour gradates from white to full. Each category has it own colour.
Here is my code...
library(RColorBrewer)
rm(list=ls())
yval <- c("51140/1234.5985/16:25:17" ,"51140/1234.5985/16:25:17" ,"51140/1234.5985/16:25:17" ,"51141/1234.5985/16:25:17" ,"51146/1234.5985/16:25:17" ,"51146/1234.5985/16:25:17" ,"51146/1234.5985/16:25:17" ,"51147/1234.5985/16:25:17" ,"51147/1234.5985/16:25:17" ,"51147/1234.5985/16:25:17" ,"51149/1234.5985/16:25:17" ,"51150/1234.5985/16:25:17" ,"51150/1234.5985/16:25:17" ,"51150/1234.5985/16:25:17" ,"51153/1234.5985/16:25:17" ,"51153/1234.5985/16:25:17" ,"51153/1234.5985/16:25:17")
cat <- c("cat1" ,"cat1" ,"cat1" ,"cat2" ,"cat1" ,"cat1" ,"cat1" ,"cat1" ,"cat1" ,"cat1" ,"cat2" ,"cat1" ,"cat1" ,"cat1" ,"cat1" ,"cat1" ,"cat1")
xval <- c("cat1.ant" ,"cat1.output3" ,"cat1.input5" ,"cat2.cat2_active_state" ,"cat1.input5" ,"cat1.output3" ,"cat1.ant" ,"cat1.ant" ,"cat1.output3" ,"cat1.input5" ,"cat2.cat2_active_state" ,"cat1.input5" ,"cat1.ant" ,"cat1.output3" ,"cat1.output3" ,"cat1.ant" ,"cat1.input5")
value <- c(0.75 ,1 ,1 ,0.1 ,1 ,1 ,0.75 ,0 ,1 ,1 ,1 ,1 ,0.75 ,1 ,1 ,0.75 ,1)
dat <- data.frame(xval, yval, cat, value)
n <- length(unique(dat$cat))
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
sample_colours <- sample(col_vector, n)
# 2 categories. I've hard-coded the gradient ends in this example.
# I've tried translating the value up the number line to separate the categories into different colour bands.
gradientends <- c(0, 1, 2, 3)
interleave <- function(v1,v2)
{
ord1 <- 2*(1:length(v1))-1
ord2 <- 2*(1:length(v2))
c(v1,v2)[order(c(ord1,ord2))]
}
colorends <- interleave(rep("white",n),sample_colours)
ggplot(dat, aes(x = xval, y = factor(yval))) +
geom_tile(aes(fill = value), colour = "grey80") +
geom_text(aes(label = value)) +
scale_fill_gradientn(colours = colorends) + #, values = gradientends) +
theme(axis.ticks = element_blank(),
axis.text.x = element_text(angle = 330, hjust = 0))
I have tried various approaches and it seems to me that scale_fill_gradient is probably not a good way to approach this. It appears that the scale function is "adjusting" values on the fly because depending on the values I am plotting then I get my heatmap looking correct or not.
Is there a way around it with this approach or perhaps there is a better approach?
Liam
I have figured out how to get my example working. It turns out I was getting the gradientends wrong and I should have been doing a rescale in scale_fill_gradientn(colours = colorends, values = rescale(gradientends)). To be honest, I'm not quite sure what is happening here! Presumeably the gradientends are getting rescaled in the same manner as scale_fill_ is filling in scaled rescaleoffset values, so everything lines up correctly with no overspill into neighbouring colour blocks.
Here is the working code. I have put the data in dput() ofrmat as suggested in SO guidelines. I've included the value and rescaloffset values in the geom_text (which helps debugging). I also added another category to complicate it a bit.
rm(list=ls())
library(RColorBrewer)
dat <- structure(list(xval = structure(c(5L, 3L, 2L, 4L, 2L, 3L, 1L,
1L, 3L, 2L, 4L, 2L, 1L, 3L, 3L, 1L, 2L), .Label = c("cat1.ant",
"cat1.input5", "cat1.output3", "cat2.cat2_active_state", "cat3.ant"
), class = "factor"), yval = structure(c(1L, 1L, 1L, 2L, 3L,
3L, 3L, 4L, 4L, 4L, 5L, 6L, 6L, 6L, 7L, 7L, 7L), .Label = c("51140/1234.5985/16:25:17",
"51141/1234.5985/16:25:17", "51146/1234.5985/16:25:17", "51147/1234.5985/16:25:17",
"51149/1234.5985/16:25:17", "51150/1234.5985/16:25:17", "51153/1234.5985/16:25:17"
), class = "factor"), cat = structure(c(3L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("cat1",
"cat2", "cat3"), class = "factor"), value = c(0.75, 1, 1, 0.1,
1, 1, 0.75, 0, 1, 1, 1, 1, 0.75, 1, 1, 0.75, 1), rescaleoffset = c(200.75,
1, 1, 100.1, 1, 1, 0.75, 0, 1, 1, 101, 1, 0.75, 1, 1, 0.75, 1
)), .Names = c("xval", "yval", "cat", "value", "rescaleoffset"
), row.names = c(NA, -17L), class = "data.frame")
n <- length(unique(dat$cat))
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
sample_colours <- sample(col_vector, n)
dat$rescaleoffset <- dat$value + 100*(as.numeric(dat$cat)-1)
scalerange <- range(dat$value)
gradientends <- scalerange + rep(c(0,100,200), each=2)
colorends <- c("white", "red", "white", "green", "white", "blue")
ggplot(dat, aes(xval, yval)) +
geom_tile(aes(fill = rescaleoffset), colour = "white") +
geom_text(aes(label = paste(format(round(value, 5), nsmall = 5), format(round(rescaleoffset, 5), nsmall = 5), sep='\n'))) +
scale_fill_gradientn(colours = colorends, values = rescale(gradientends)) +
scale_x_discrete("", expand = c(0, 0)) +
scale_y_discrete("", expand = c(0, 0)) +
theme_grey(base_size = 9) +
theme(axis.ticks = element_blank(),
axis.text.x = element_text(angle = 330, hjust = 0))+
theme(legend.background = element_rect(fill="gray90", size=30, linetype="dotted"))
Although the values are numeric and would appear to be continuous, they actually represent discrete categoric values. Overall, I'm happy with this and is exactly what I am looking for, although some work required on the formatting and parametrisation.
EDIT: Now I am really baffled. Here is a similar set of data but it is not plotting as I expect. I expect the BCU1 catgeory to be darkviolet (not white) because it has a value of 1.0. There is something that I am not understanding with the scaling. Could anyone help?
dat <- structure(list(heatmap_row_display = structure(c(2L, 6L, 5L,
8L, 4L, 3L, 7L, 9L, 1L, 3L, 7L, 9L, 4L, 1L, 4L, 1L, 3L, 7L, 9L
), .Label = c("051140/1084.8158/16:25:17", "051141/1084.8466/16:25:17",
"051146/1084.8803/16:25:17", "051147/1084.8876/16:25:17", "051148/1084.8965/16:25:17",
"051149/1084.9465/16:25:17", "051150/1084.9525/16:25:17", "051152/1084.9965/16:25:17",
"051153/1085.0193/16:25:17"), class = "factor"), msg_no = c(51141L,
51149L, 51148L, 51152L, 51147L, 51146L, 51150L, 51153L, 51140L,
51146L, 51150L, 51153L, 51147L, 51140L, 51147L, 51140L, 51146L,
51150L, 51153L), relative_time_ms = c(1084.8466, 1084.9465, 1084.8965,
1084.9965, 1084.8876, 1084.8803, 1084.9525, 1085.0193, 1084.8158,
1084.8803, 1084.9525, 1085.0193, 1084.8876, 1084.8158, 1084.8876,
1084.8158, 1084.8803, 1084.9525, 1085.0193), pcan_rx_datetime_adjusted = structure(c(1487089517,
1487089517, 1487089517, 1487089517, 1487089517, 1487089517, 1487089517,
1487089517, 1487089517, 1487089517, 1487089517, 1487089517, 1487089517,
1487089517, 1487089517, 1487089517, 1487089517, 1487089517, 1487089517
), class = c("POSIXct", "POSIXt"), tzone = ""), block_name = structure(c(1L,
1L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L), .Label = c("BCU1", "BCU2", "IDC1_status"), class = "factor"),
pcan_attribute = structure(c(1L, 1L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L), .Label = c("BCU1.BCU1_active_state",
"BCU2.BCU2_active_state", "IDC1_status.IDC1_ant", "IDC1_status.IDC1_input5",
"IDC1_status.IDC1_output3"), class = "factor"), data_value_as_string = c(1L,
1L, 1L, 1L, 0L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), data_value = c(1L, 1L, 1L, 1L, 0L, 3L, 3L, 3L,
3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), value = c(1,
1, 1, 1, 0, 0.75, 0.75, 0.75, 0.75, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1), colour = structure(c(2L, 2L, 1L, 1L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("cyan",
"darkviolet", "deeppink"), class = "factor"), rescaleoffset = c(1,
1, 101, 101, 200, 200.75, 200.75, 200.75, 200.75, 201, 201,
201, 201, 201, 201, 201, 201, 201, 201)), .Names = c("heatmap_row_display",
"msg_no", "relative_time_ms", "pcan_rx_datetime_adjusted", "block_name",
"pcan_attribute", "data_value_as_string", "data_value", "value",
"colour", "rescaleoffset"), row.names = c(NA, 19L), class = "data.frame")
n <- length(unique(dat$block_name))
# Do it this way to avoid reordering the colours in the data frame
sample_colours <- levels(factor(dat$colour, levels=unique(dat$colour)))
# Rescale all the values into categories of 100
dat$rescaleoffset <- dat$value + 100*(as.numeric(dat$block_name)-1)
scalerange <- range(dat$value)
# Mark the end of each gradient for each category block.
gradientends <- scalerange + rep(seq(0, (n - 1) * 100, by = 100), each=2)
# Interleave two vectors, used to interleave "white" with each of the category colours.
# "white" is used to colour the values on lowest end of each category's gradient range.
interleave <- function(v1,v2)
{
ord1 <- 2*(1:length(v1))-1
ord2 <- 2*(1:length(v2))
c(v1,v2)[order(c(ord1,ord2))]
}
colorends <- interleave(rep("white",n),sample_colours)
p <- ggplot(dat, aes(pcan_attribute, heatmap_row_display)) +
geom_tile(aes(fill = rescaleoffset), colour = "white") +
geom_text(aes(label = paste(format(round(value, 1), nsmall = 1), sep='\n')), size=rel(2.0)) +
scale_fill_gradientn(colours = colorends, values = rescale(gradientends)) +
scale_x_discrete("", expand = c(0, 0)) +
scale_y_discrete("", expand = c(0, 0)) +
theme_grey(base_size = 9) +
theme(axis.ticks = element_blank(),
axis.text.x = element_text(angle = 330, hjust = 0))+
theme(legend.background = element_rect(fill="gray90", size=30, linetype="dotted"))
print(p)

R ggplot2 - errorbars layering over eachother

I have a data set in R which I want to get an error bar on, however it isn't plotting correctly (see photo). I have also included my data set.
ant.d<-structure(list(group.name = structure(c(1L, 18L, 20L, 24L, 8L,
13L, 15L, 17L, 12L, 19L, 21L, 22L, 23L, 9L, 11L, 16L, 2L, 3L,
4L, 5L, 6L, 7L, 10L, 14L), .Label = c("group 1", "group 10",
"group 11", "group 12", "group 13", "group 14", "group 15 ",
"group 16 ", "group 17", "group 18", "group 19", "group 2", "group 20",
"group 21", "group 22", "group 23", "group 24", "group 3", "group 4 ",
"group 5 ", "group 6", "group 7 ", "group 8 ", "group 9 "), class = "factor"),
habitat.type = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("edge", "forest", "Pasture"), class = "factor"),
species.richness = c(3L, 5L, 2L, 3L, 1L, 2L, 4L, 3L, 9L,
5L, 5L, 4L, 4L, 4L, 8L, 7L, 4L, 3L, 5L, 2L, 3L, 6L, 2L, 1L
), X = c(2.875, 2.875, 2.875, 2.875, 2.875, 2.875, 2.875,
2.875, 5.75, 5.75, 5.75, 5.75, 5.75, 5.75, 5.75, 5.75, 3.25,
3.25, 3.25, 3.25, 3.25, 3.25, 3.25, 3.25), se = c(2.32340059786604,
1.7996983644207, 2.84557296642458, 2.32340059786604, 4.02424788183988,
2.84557296642458, 2.01212394091994, 2.32340059786604, 1.34141596061329,
1.7996983644207, 1.7996983644207, 2.01212394091994, 2.01212394091994,
2.01212394091994, 1.42278648321229, 1.52102272991811, 2.01212394091994,
2.32340059786604, 1.7996983644207, 2.84557296642458, 2.32340059786604,
1.64289231816395, 2.84557296642458, 4.02424788183988)), .Names = c("group.name",
"habitat.type", "species.richness", "X", "se"), row.names = c(NA,
-24L), class = "data.frame")
What am I doing wrong? I've spent some time reading about error bars in R and I've not been successful.
ant.d$se <- 1.96*(sd(ant.d$species.richness, na.rm=T)/sqrt(ant.d$species.richness))
p<-ggplot(data = ant.d, aes(y = species.richness, x = habitat.type)) +
geom_bar(stat="identity",position="dodge")
p
p + geom_bar(position=dodge) + geom_errorbar(aes(ymax = species.richness + se, ymin=species.richness - se), position=dodge, width=0.25)
If I understand you correctly about what you are trying to achieve, then it's probably best to aggregate your data before plotting:
df <- aggregate(cbind(species.richness,se) ~ habitat.type, ant.d, mean)
ggplot(data = df, aes(x = habitat.type, y = species.richness)) +
geom_bar(stat="identity", fill="grey") +
geom_errorbar(stat="identity", aes(ymax = species.richness + se, ymin=species.richness - se), width=0.25)
which gives:
If you want groups within each habitat.type, you could something like this:
ggplot(data = ant.d, aes(x = habitat.type, y = species.richness, fill = group.name)) +
geom_bar(stat="identity", position=position_dodge(0.8)) +
geom_errorbar(stat="identity", aes(ymax = species.richness + se, ymin=species.richness - se), width=0.25,
position=position_dodge(0.8)) +
scale_fill_discrete(guide = guide_legend(ncol=2))
which gives:

Reverse fill order of stacked bars with faceting

I can't figure out how to get the fill order to reverse. Basically, I'm trying to get the guide and the fill to match an intrinsic order of the words from positive to negative:
The guide, and the fill order, from top to bottom should be:
"Far better than I expected", (Filled at very top, at top of legend)
"A little better than I expected",
"About what I expected",
"A little worse than I expected",
"Far worse than I expected" (Filled at very bottom, at bottom of legend)
You'll need sample data:
dat <- structure(list(Banner = structure(c(2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L), .Label = c("Other", "Some Company"
), class = "factor"), Response = structure(c(1L, 2L, 3L, 4L,
5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L,
1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L),
.Label = c(
"Far better than I expected",
"A little better than I expected",
"About what I expected",
"A little worse than I expected",
"Far worse than I expected"), class = "factor"), Frequency = c(1L,
6L, 9L, 0L, 0L, 29L, 71L, 149L, 32L, 6L, 1L, 7L, 16L, 1L, 0L,
38L, 90L, 211L, 24L, 6L, 0L, 0L, 8L, 1L, 1L, 6L, 13L, 109L, 35L,
9L), Proportion = c(6, 38, 56, 0, 0, 10, 25, 52, 11, 2, 4, 28,
64, 4, 0, 10, 24, 57, 7, 2, 0, 0, 80, 10, 10, 3, 8, 63, 20, 5
), Phase = c("Phase 1", "Phase 1", "Phase 1", "Phase 1", "Phase 1",
"Phase 1", "Phase 1", "Phase 1", "Phase 1", "Phase 1", "Phase 2",
"Phase 2", "Phase 2", "Phase 2", "Phase 2", "Phase 2", "Phase 2",
"Phase 2", "Phase 2", "Phase 2", "Phase 3", "Phase 3", "Phase 3",
"Phase 3", "Phase 3", "Phase 3", "Phase 3", "Phase 3", "Phase 3",
"Phase 3")), .Names = c("Banner", "Response", "Frequency", "Proportion",
"Phase"),
row.names = c(NA, 30L),
sig = character(0),
comment = "Overall, my experience was... by Company", q1 = "", q2 = "",
class = c("survcsub", "data.frame"))
Position labels
dat <- ddply(dat, .(Banner, Phase), function(x) {
x$Pos <- (cumsum(x$Proportion) - 0.5*x$Proportion)
x
})
Plot
ggplot(dat, aes(Banner, Proportion/100, fill=Response,
label=ifelse(Proportion > 5, percent(Proportion/100), ""))) +
geom_bar(position="fill", stat="identity") +
geom_text(aes(Banner, Pos/100)) +
facet_grid(~Phase) +
scale_y_continuous(labels=percent) +
labs(x="\nCompany", y="\nProportion")
What I've tried:
dat$Response <- factor(dat$Response, levels=rev(dat$Response))
# No dice, reverses the colour of the scale but not the position of the fill
To change the order of values in stacked barplot you should use argument order= in aes() of geom_bar() and set name of column necessary for ordering (in this case Response). With function desc() you can set reverse order of bars.
Using your original data frame (without last line of factor()).
ggplot(dat, aes(Banner, Proportion/100, fill=Response,
label=ifelse(Proportion > 5, percent(Proportion/100), ""))) +
geom_bar(position="fill", stat="identity",aes(order=desc(Response))) +
geom_text(aes(Banner, Pos/100)) +
facet_grid(~Phase) +
scale_y_continuous(labels=percent) +
labs(x="\nCompany", y="\nProportion")
To get correct placement of labels, changed calculation of positions:
dat <- ddply(dat, .(Banner, Phase), function(x) {
x$Pos <- (100-cumsum(x$Proportion) + 0.5*x$Proportion)
x
})

Resources