Related
I'm using the excellent package ggrepel() to position text labels on bars with a bit of jitter. The only trouble is that they are appearing in reverse order. Here's some reproducible code:
library(tidyverse)
require(scales) # Used for adding percentages to bar charts
library(ggrepel)
# ingest some sample data
structure(list(Q52_bin = structure(c(3L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 1L, 2L,
2L, 1L, 3L, 2L, 3L, 3L, 1L), .Label = c("low", "medium", "high"
), class = "factor"), Q53_bin = structure(c(2L, 3L, 2L, 2L, 2L,
2L, 2L, 3L, 2L, 3L, 2L, 1L, 2L, 2L, 1L, 2L, 3L, 2L, 2L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 3L, 1L,
2L, 2L, 2L, 1L, 2L, 2L, 3L, 2L), .Label = c("low", "medium",
"high"), class = "factor"), Q57_bin = structure(c(2L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 3L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 1L,
1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 3L,
3L, 1L, 2L, 2L, 2L, 2L, 1L, 3L, 2L, 2L), .Label = c("low", "medium",
"high"), class = "factor"), Q4 = c(2, 3, 3, 5, 4, 3, 4, 5, 2,
4, 2, 3, 5, 4, 3, 3, 5, 5, 4, 5, 3, 2, 4, 1, 5, 4, 4, 4, 4, 4,
5, 3, 5, 1, 5, 5, 4, 5, 4, 1, 4, 2, 1, 5, 4)), row.names = c(NA,
-45L), class = c("tbl_df", "tbl", "data.frame"))
df <- select(climate_experience_data, Q52_bin, Q53_bin, Q57_bin, Q4)
names(df) <- c("Q52_bin", "Q53_bin", "Q57_bin", "response")
facet_names <- c(`Q52_bin` = "Spirituality", `Q53_bin` = "Politics L/R", `Q57_bin` = "Religiosity", `low`="low", `medium`="medium", `high`="high")
facet_labeller <- function(variable,value){return(facet_names[value])}
q4_levels = c("Not at all", "A little", "Some", "A lot", "A great deal")
df$response <- factor(df$response, ordered = TRUE, levels = c("5", "4", "3", "2", "1"))
df$response <- fct_recode(df$response, "Not at all" = "1", "A little" = "2", "Some" = "3", "A lot" = "4", "A great deal" = "5")
caption <- "How much have you thought about climate change before today?"
df %>%
# we need to get the data including facet info in long format, so we use pivot_longer()
pivot_longer(!response, names_to = "bin_name", values_to = "b") %>%
# add counts for plot below
count(response, bin_name, b) %>%
group_by(bin_name,b) %>%
mutate(perc=paste0(round(n*100/sum(n),1),"%")) %>%
# run ggplot
ggplot(aes(x = n, y = "", fill = response, label = perc)) +
geom_col(position=position_fill(), aes(fill=response)) +
coord_cartesian(clip = "off") +
geom_label_repel(
fill = "white",
size = 3,
min.segment.length = 0,
max.overlaps = Inf,
position = position_fill()
) +
scale_fill_brewer(palette="YlOrBr") +
scale_x_continuous(labels = scales::percent_format(), expand = c(0.05, 0.05)) +
facet_grid(vars(b), vars(bin_name), labeller=as_labeller(facet_names)) +
labs(caption = caption, x = "", y = "") +
guides(fill = guide_legend(title = NULL))
ggsave("figures/q4_faceted.png", width = 30, height = 10, units = "cm")
Here's the visual that I'm getting:
So what's going on here in terms of labels reversing? I'm concerned that there may be something about the plot that is inaccurate!
The issue is that the grouping variable used for your labels is different from the one used for the columns. To fix that you have to explicitly tell ggrepel to group the labels by response using the group aes:
library(tidyverse)
library(ggrepel)
df_long <- df %>%
pivot_longer(!response, names_to = "bin_name", values_to = "b") %>%
count(response, bin_name, b) %>%
group_by(bin_name, b) %>%
mutate(perc = paste0(round(n * 100 / sum(n), 1), "%"))
ggplot(df_long, aes(x = n, y = "")) +
geom_col(position = position_fill(), aes(fill = response)) +
coord_cartesian(clip = "off") +
geom_label_repel(
aes(group = response, label = perc),
fill = "white",
size = 3,
min.segment.length = 0,
max.overlaps = Inf,
position = position_fill()
) +
scale_fill_brewer(palette = "YlOrBr") +
scale_x_continuous(labels = scales::percent_format(), expand = c(0.05, 0.05)) +
facet_grid(vars(b), vars(bin_name), labeller = as_labeller(facet_names)) +
labs(caption = caption, x = "", y = "") +
guides(fill = guide_legend(title = NULL))
I am trying to make a stacked barplot with two variables. My desired outcome looks like this:
This is the first part of my data. There are 220 more rows:
Type Week Stage
<chr> <dbl> <dbl>
1 Captured 1 2
2 Captured 1 1
3 Captured 1 1
4 Captured 1 2
5 Captured 1 1
6 Captured 1 3
7 Captured 1 NA
8 Captured 1 3
9 Captured 1 2
10 Captured 1 1
So far I'm not getting anywhere, this is my code so far
library(data.table)
dat.m <- melt(newrstudio2, id.vars="Type")
dat.m
library(ggplot2)
ggplot(dat.m, aes(x=Type, y=value, fill=variable)) +
geom_bar(stat="identity")
I guess I need to calculate the number of observations of each stage in each week of each type? I've tried both long and wide data, but I somehow need to combine week with type? I don't know, I'm at a loss.
Alternative way:
set.seed(123)
# sample data
my_data <- data.frame(Type = sample(c("W", "C"), 220, replace = TRUE),
Week = sample(paste0("Week ", 1:4), 220, replace = TRUE),
Stage = sample(paste0('S', 1:4), 220, replace = TRUE))
head(my_data)
library(ggplot2)
ggplot(my_data, aes(x = Type, fill = Stage)) +
geom_bar(aes(y = (..count..)/sum(..count..)), position = "fill") +
facet_grid(. ~ Week, switch="both") +
scale_y_continuous(labels = scales::percent) +
ylab("Stage [%]") +
theme(strip.background = element_blank(),
strip.placement = "outside",
panel.spacing = unit(0, "lines"))
Alternatively we could use base graphics. First, what you're probably most interested in, we should reshape the data.
For this we could split the data per week and run a dcast() over it.
L <- lapply(split(d, d$week), function(x)
data.table::dcast(x, type ~ stage, value.var="stage", fun=length))
d2 <- do.call(rbind, L) # transform back into a data frame
Now – with credits to #alemol – we want the proportions.
d2[-1] <- t(apply(d2[-1], 1, prop.table))
Then we are able to plot relatively simply. Note, that barplot() additionally gives us a vector of bar coordinates which we can use later for the axis() labels.
cols <- c("#ed1c24", "#ff7f27", "#00a2e8", "#fff200") # define stage colors
par(mar=c(5, 5, 3, 5) + .1, xpd=TRUE) # set plot margins
p <- barplot(t(d2[-1]), col=cols, border="white", space=rep(c(.2, 0), 5),
font.axis=2, xaxt="n", yaxt="n", xlab="Week")
axis(1, at=p, labels=rep(c("C", "W"), 5), tick=FALSE, line=0)
axis(1, at=apply(matrix(p, , 2, byrow=TRUE), 1, mean), labels=1:5, tick=FALSE, line=1)
axis(2, at=0:10/10, labels=paste0(seq(0, 100, 10), "%"), line=0, las=2)
legend(12, .5, legend=rev(names(d2[-1])), col=rev(cols), pch=15, title="Stage")
Result:
Data:
d <- structure(list(type = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L,
1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L,
2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L,
2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L,
2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L,
2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L,
2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L), .Label = c("C", "W"), class = "factor"), week = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), stage = c(3L,
1L, 1L, 2L, 2L, 2L, 1L, 3L, 2L, 4L, 1L, 1L, 2L, 2L, 3L, 4L, 3L,
2L, 4L, 1L, 1L, 3L, 1L, 2L, 3L, 1L, 4L, 1L, 2L, 4L, 2L, 3L, 4L,
4L, 2L, 4L, 4L, 2L, 3L, 1L, 1L, 4L, 4L, 1L, 4L, 3L, 3L, 3L, 2L,
1L, 3L, 4L, 2L, 4L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 2L, 1L, 3L, 2L,
1L, 1L, 1L, 4L, 2L, 4L, 1L, 4L, 3L, 4L, 4L, 4L, 2L, 2L, 2L, 2L,
2L, 1L, 3L, 4L, 2L, 4L, 4L, 2L, 2L, 3L, 4L, 4L, 3L, 3L, 1L, 1L,
1L, 2L, 4L, 3L, 1L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 4L, 2L, 1L,
2L, 1L, 3L, 3L, 2L, 4L, 3L, 1L, 1L, 4L, 1L, 4L, 4L, 1L, 2L, 2L,
2L, 1L, 3L, 4L, 3L, 4L, 3L, 4L, 4L, 3L, 1L, 1L, 2L, 1L, 2L, 3L,
2L, 2L, 1L, 4L, 3L, 4L, 2L, 2L, 3L, 1L, 2L, 3L, 3L, 3L, 3L, 2L,
1L, 2L, 2L, 1L, 1L, 3L, 4L, 3L, 4L, 2L, 4L, 1L, 1L, 2L, 1L, 3L,
2L, 1L, 3L, 3L, 2L, 2L, 1L, 3L, 2L, 2L, 2L, 1L, 4L, 2L, 4L, 2L,
4L, 3L, 3L, 1L, 3L, 4L, 3L, 2L, 1L, 2L, 4L, 1L, 2L, 4L, 2L, 1L,
2L, 1L, 2L, 2L, 3L, 1L, 3L, 3L, 3L, 2L, 2L, 1L, 2L, 3L, 2L, 2L,
1L, 2L, 1L, 3L, 3L, 2L, 1L, 3L, 4L, 2L, 1L, 2L, 4L, 3L, 4L, 2L,
3L, 2L, 4L, 1L, 4L, 4L, 2L, 1L, 2L)), row.names = c(NA, -250L
), class = "data.frame")
Is this what you're looking for:
set.seed(123)
# sample data
my_data <- data.frame(Type = sample(paste0('T', 1:4), 220, replace = TRUE),
Week = sample(paste0('W', 1:4), 220, replace = TRUE),
Stage = sample(paste0('S', 1:4), 220, replace = TRUE))
ggplot(my_data, aes(x=Week:Type, fill = Stage)) + geom_bar()
I'm trying to remove the redundant "pro/retro" labels on the second row of panels on my plot. However, I still want to keep the top row of panel labels intact. I've tried for the past hour to selectively remove the 1st strip on the 2nd panel row and I was wondering if anyone here knows how to do this. See below for technical details.
I have the following plot:
It was generated from the following data:
absBtwnDat <- structure(list(setSize = structure(c(1L, 2L, 3L, 4L, 5L, 6L,
7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L,
6L, 7L), .Label = c("2", "3", "4", "5", "6", "7", "8"), class = "factor"),
Measure = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L), .Label = c("Actual", "Predicted"), class = "factor"),
Location = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L), .Label = c("fix", "forced"), class = "factor"),
JudgementType = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L), .Label = c("pro", "retro"), class = "factor"),
Accuracy = c(1.91388888888889, 2.95555555555556, 3.74861111111111,
4.37777777777778, 4.21527777777778, 3.0875, 2.85277777777778,
2, 2.99444444444444, 4, 4.77222222222222, 5.24444444444444,
5.18472222222222, 5.20277777777778, 1.98888888888889, 3,
3.97222222222222, 4.85972222222222, 5.70555555555556, 6.56944444444444,
7.27222222222222, 2, 3, 3.99444444444444, 4.99444444444444,
5.86944444444444, 6.75555555555556, 7.57777777777778, 1.96111111111111,
2.97777777777778, 3.78333333333333, 3.97222222222222, 4.22361111111111,
3.64722222222222, 3.68888888888889, 2, 3, 3.97222222222222,
4.67777777777778, 5.26944444444444, 5.4625, 5.8, 2, 3, 3.98333333333333,
4.87777777777778, 5.73055555555556, 6.48333333333333, 7.62916666666667,
2, 3, 3.98333333333333, 4.96666666666667, 5.96944444444444,
6.94444444444444, 7.93333333333333), LL = c(1.85, 2.87777777777778,
3.59861111111111, 4.15555555555556, 3.78888888888889, 2.73055555555556,
2.55555555555556, 2, 2.96111111111111, 4, 4.64444444444444,
5.01666666666667, 4.88333333333333, 4.88611111111111, 1.91111111111111,
3, 3.89444444444444, 4.73611111111111, 5.47777777777778,
6.20277777777778, 6.71666666666667, 2, 3, 3.96666666666667,
4.95555555555556, 5.65096686319131, 6.48333333333333, 7.17222222222222,
1.86637442123568, 2.92222222222222, 3.65, 3.61666666666667,
3.88333333333333, 3.17092476055122, 3.18888888888889, 2,
3, 3.92222222222222, 4.49444444444444, 5.0375, 5.09444444444444,
5.40555555555556, 2, 3, 3.92777777777778, 4.72222222222222,
5.52777777777778, 6.24444444444444, 7.37361111111111, 2,
3, 3.95, 4.88888888888889, 5.93333333333333, 6.88333333333333,
7.73065763697428), UL = c(1.95555555555556, 2.98333333333333,
3.84444444444444, 4.56666666666667, 4.6, 3.43611111111111,
3.17916666666667, 2, 3, 4, 4.86111111111111, 5.42777777777778,
5.48656054159421, 5.58611111111111, 2, 3, 4, 4.93888888888889,
5.83888888888889, 6.76944444444444, 7.6, 2, 3, 4, 5, 5.94166666666667,
6.88888888888889, 7.78888888888889, 1.98888888888889, 2.99444444444444,
3.87777777777778, 4.22777777777778, 4.53611111111111, 4.19722222222222,
4.20555555555556, 2, 3, 3.98888888888889, 4.78333333333333,
5.45555555555556, 5.79583333333333, 6.16666666666667, 2,
3, 3.99444444444444, 4.95, 5.85972222222222, 6.67222222222222,
7.80138888888889, 2, 3, 3.99444444444444, 4.98888888888889,
5.9875, 6.97222222222222, 7.98333333333333)), .Names = c("setSize",
"Measure", "Location", "JudgementType", "Accuracy", "LL", "UL"
), row.names = c(NA, -56L), class = "data.frame")
I visualized it using using the following code:
library(ggplot2)
p1 <- ggplot(data = absBtwnDat, aes(x = as.numeric(as.character(setSize)),
y = Accuracy, group = Measure,
colour = Measure))+
geom_point()+
geom_line(aes(linetype = Measure))+
scale_x_continuous("Trial Set Size", breaks = 2:8)+
scale_y_continuous("Accuracy (# Correct)", breaks = 0:8, limits = c(0, 8))+
geom_errorbar(aes(ymin = LL, ymax = UL), width = .1, size = .75)+
scale_colour_grey(start = .8, end = .4)+
facet_wrap(~JudgementType+Location, dir = "v")+
theme(legend.position = "top")
Just to be certain, I've highlighted unwanted strip in the following image:
With this you'll only have one row of labels per panel, but they still include both words.
p1 <- ggplot(data = absBtwnDat,
aes(x = as.numeric(as.character(setSize)), y = Accuracy,
group = Measure,
colour = Measure))+
geom_point()+
geom_line(aes(linetype = Measure))+
scale_x_continuous("Trial Set Size", breaks = 2:8)+
scale_y_continuous("Accuracy (# Correct)",
breaks = 0:8, limits = c(0, 8))+
geom_errorbar(aes(ymin = LL, ymax = UL),
width = .1, size = .75)+
scale_colour_grey(start = .8, end = .4)+
facet_wrap(~JudgementType + Location,
dir = "v",
labeller = label_wrap_gen(multi_line=FALSE)) +
theme(legend.position = "top")
p1
Here is a possible solution:
g1 <- ggplotGrob(p1)
k <- which(g1$layout$name=="strip-t-1-2")
g1$grobs[[k]]$grobs[[1]]$children[[2]]$children[[1]]$label <- ""
g1$grobs[[k]]$grobs[[1]]$children[[1]]$gp$fill <- NA
k <- which(g1$layout$name=="strip-t-2-2")
g1$grobs[[k]]$grobs[[1]]$children[[2]]$children[[1]]$label <- ""
g1$grobs[[k]]$grobs[[1]]$children[[1]]$gp$fill <- NA
library(grid)
grid.draw(g1)
I am trying to plot a group of lines and assign one label to these lines in the legend. Here's an example of the data (df2) that I am using.
structure(list(
true = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L),
.Label = c("model1", "model2"), class = "factor"),
test = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L),
.Label = c("false.model1", "false.model2", "false.model3", "true model"), class = "factor"),
est.interval = c(0, 5, 5, 7, 7, 10, 10, 11, 11, 0, 2, 2, 3, 3, 0, 2, 2, 4, 4, 0, 2, 2, 3, 3, 4, 4, 0, 7, 7, 10, 10, 13, 13, 0, 4, 4, 5, 5, 0, 5, 5, 6, 6, 7, 7, 9, 9, 0, 2, 2, 3, 3, 4, 4),
sens = c(1, 1, 0.75, 0.75, 0.5, 0.5, 0.25, 0.25, 0, 1, 1, 0.25, 0.25, 0, 1, 1, 0.5, 0.5, 0, 1, 1, 0.5, 0.5, 0.25, 0.25, 0, 1, 1, 0.75, 0.75, 0.25, 0.25, 0, 1, 1, 0.5, 0.5, 0, 1, 1, 0.75, 0.75, 0.5, 0.5, 0.25, 0.25, 0, 1, 1, 0.5, 0.5, 0.25, 0.25, 0)),
.Names = c("true", "test", "est.interval", "sens"),
class = "data.frame")
And here I plot the curves using ggplot.
ggplot(df2, aes(x=est.interval, y=sens, color=test)) +
ylim(0,1) +
geom_line(size = 0.6) +
facet_wrap(~true, nrow=1) +
scale_colour_manual(values=c(rep('#CCCCCC', 3), "#000000"), name="Estimation Model") +
guides(color=guide_legend(keywidth = 3, keyheight = 1))
I'd like to have the three grey "false.model" curves separately visible on the plot, but in the legend I just want one "false models" entry beside one grey line. Any thoughts on how to do this?
I had a problem with your dput. It seemed to be missing rownames which made it an invalid data.frame. Here a corrected dump
df2 <- structure(list(true = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("model1",
"model2"), class = "factor"), test = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L,
4L, 4L, 4L), .Label = c("false.model1", "false.model2", "false.model3",
"true model"), class = "factor"), est.interval = c(0, 5, 5, 7,
7, 10, 10, 11, 11, 0, 2, 2, 3, 3, 0, 2, 2, 4, 4, 0, 2, 2, 3,
3, 4, 4, 0, 7, 7, 10, 10, 13, 13, 0, 4, 4, 5, 5, 0, 5, 5, 6,
6, 7, 7, 9, 9, 0, 2, 2, 3, 3, 4, 4), sens = c(1, 1, 0.75, 0.75,
0.5, 0.5, 0.25, 0.25, 0, 1, 1, 0.25, 0.25, 0, 1, 1, 0.5, 0.5,
0, 1, 1, 0.5, 0.5, 0.25, 0.25, 0, 1, 1, 0.75, 0.75, 0.25, 0.25,
0, 1, 1, 0.5, 0.5, 0, 1, 1, 0.75, 0.75, 0.5, 0.5, 0.25, 0.25,
0, 1, 1, 0.5, 0.5, 0.25, 0.25, 0)), .Names = c("true", "test",
"est.interval", "sens"), row.names = c(NA, -54L), class = "data.frame")
The easiest thing to do would be to create a new factor that you can use for the coloring. For example
df2$testcol<-factor(ifelse(df2$test=="true model",1,2),
levels=1:2,
labels=c("True Model","False Model"))
Now all the false models will share a common value. Then you can do
ggplot(df2, aes(x=est.interval, y=sens, group=test, colour=testcol)) +
ylim(0,1) +
geom_line(size = 0.6) +
facet_wrap(~true, nrow=1) +
scale_colour_manual(values=c("True Model"='#CC0000',
"False Model"="#999999"), name="Estimation Model") +
guides(color=guide_legend(keywidth = 3, keyheight = 1))
to get
Is there a way to make the thinner lines in the plot (those without an y axis tick label) appear closer to the lines above (those with a label) so as to better simulate pairs of baseline / actual bars of the same activity in a gantt chart?
See gantt chart examples here and here.
mdfr <- structure(list(name = structure(c(8L, 8L, 8L, 8L, 6L, 6L, 6L,
6L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 7L, 7L, 7L, 7L, 5L, 5L, 5L,
5L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 8L, 8L, 8L, 8L, 6L, 6L, 6L,
6L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 7L, 7L, 7L, 7L, 5L, 5L, 5L,
5L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L), .Label = c("100 A", "100 B",
"101 A", "101 B", "102 A", "102 B", "103 A", "103 B"), class = "factor"),
stadio = c(2, 4, 5, 7, 2, 4, 5, 7, 2, 4, 5, 7, 2, 4, 5, 7,
1, 3, 6, 8, 1, 3, 6, 8, 1, 3, 6, 8, 1, 3, 6, 8, 2, 4, 5,
7, 2, 4, 5, 7, 2, 4, 5, 7, 2, 4, 5, 7, 1, 3, 6, 8, 1, 3,
6, 8, 1, 3, 6, 8, 1, 3, 6, 8), variable = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L), .Label = c("start_date", "end_date"), class = "factor"),
value = c("05/10/2012", "17/12/2012", "12/03/2012", "30/05/2013",
"10/01/2013", "14/10/2013", "24/10/2013", "10/01/2014", "30/09/2013",
"29/01/2014", "30/01/2014", "06/05/2014", "30/09/2013", "29/01/2014",
"30/01/2014", "06/05/2014", "05/10/2012", "17/12/2012", "12/03/2012",
"30/05/2013", "10/01/2013", "14/10/2013", "24/10/2013", "10/01/2014",
"30/09/2013", "29/01/2014", "30/01/2014", "05/06/2014", "30/09/2013",
"29/01/2014", "30/01/2014", "05/06/2014", "17/12/2012", "12/03/2012",
"30/05/2013", "30/05/2014", "14/10/2013", "24/10/2013", "10/01/2014",
"11/07/2014", "29/01/2014", "30/01/2014", "06/05/2014", "23/12/2014",
"29/01/2014", "30/01/2014", "06/05/2014", "23/12/2014", "17/12/2012",
"12/03/2012", "30/05/2013", "30/05/2014", "14/10/2013", "24/10/2013",
"10/01/2014", "11/07/2014", "29/01/2014", "30/01/2014", "05/06/2014",
"28/12/2014", "29/01/2014", "30/01/2014", "05/06/2014", "29/12/2014"
), rating = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("3",
"5"), class = "factor")), row.names = c(NA, -64L), .Names = c("name",
"stage", "variable", "value", "rating"), class = "data.frame")
names <- as.character(unique(mdfr$name))
names1 <- names[gsub("[^ B]","",names) == " B"]
names1 <- paste("No.",gsub("[ B]","",names1),sep="")
names2 <- rep("",length(names1))
new.names <- c(names1,names2)
ggplot(mdfr, aes(as.POSIXct(as.Date(value, "%d/%m/%Y")), name, colour = factor(stage))) +
geom_line(aes(size=rating)) +
labs(colour="(Baseline/Actual):", x = "", y = "") +
scale_colour_brewer(palette="RdYlGn",breaks = c("1", "3", "6","8"), guide = "none") +
scale_size_manual(breaks = levels(mdfr$rating), values = as.integer(levels(mdfr$rating)), guide = "none") +
scale_y_discrete(breaks=names, labels=new.names)
I would use facets to do this. Below you find a possible solution. This may not be the most elegant solution, but it lets you change the distance between thinner and thicker lines by changing the expand argument in scale_x_discrete.
# numbers to facet by (levels used for order of the facets)
mdfr$nr <- factor(paste0("No.", as.numeric(gsub("A|B", "", mdfr$name))),
levels=unique(paste0("No.", as.numeric(gsub("A|B", "", mdfr$name)))))
# recast your data
df <- dcast(mdfr, nr+stage+rating~variable)
# plot as before, switched x and y values
ggplot(df, aes(x=factor(rating),
ymin=as.POSIXct(as.Date(start_date, "%d/%m/%Y")),
ymax=as.POSIXct(as.Date(end_date, "%d/%m/%Y")),
color=factor(stage),
size=rating
)) +
geom_linerange() + # linerange instead of line
facet_grid(nr~., scales="free_x") + # faceting
coord_flip() + # flip coordinates back
scale_x_discrete(name="", breaks=NULL, expand=c(4,1)) + # use the expand variable to change the distances
scale_colour_brewer(palette="RdYlGn",breaks = c("1", "3", "6","8"), guide = "none") +
scale_size_manual(breaks = levels(mdfr$rating), values = as.integer(levels(mdfr$rating)), guide = "none")