How to add a new (custom) variable to a ggplot legend - r

I've run a number of models with two estimated parameters per model with five groups and two treatments. I'm trying to graph the confidence intervals of these estimates in a large panel plot. Since, I've simulated these data sets I would like to be able to include a dashed line for the "true value" of the parameter which I set at the beginning of the exercise for reference so we can see how well the confidence interval of the model estimates includes the true value. I can do this just fine but I'd like to include another line in the legend that shows "dashed black line" = True Value.
Here's an example of the code. The first set of code works and does not include the dashed black line in the legend.
group = c("group1", "group2", "group3", "group4", "group5")
treatment = c("treatment1", "treatment2")
estimates = c("estim1", "estim2")
parameters = c("param1", "param2")
means = c(0, 0, 5, 0, -5, 0, 0, 7, -5, 10, -5, 0, 0, 0, 0, 0, -5, 0, 0, 10)
UL = c(.5, .5, 5.5, .5, -4.5, 0.5, 0.5, 7.5, -4.5, 10.5, -4.5, .5, .5, .5, .5, .5, -4.5, .5, .5, 10.5)
LL = c(-.5, -.5, 4.5, -.5, -5.5, -.5, -.5, 6.5, -4.5, 9.5, -4.5, -.5, -.5, -.5, -.5, -.5, -4.5, -.5, -.5, 9.5)
values = c(.2, -.2, 5.2, -.3, -4.7, -.1, -.2, 6.9, -5.3, 10.1, -4.4, 0.1, 0.2, 0.3, 0.1, -0.1, -4.9, -.2, -.2, 9.9)
df = data.frame(
group = rep(rep(group, each = 2), 2),
treatment = rep(treatment, each = 10),
estimates = rep(estimates, 10),
LL = LL,
means = means,
UL = UL,
parameters = rep(parameters, 10),
values = values
)
ggplot(data = df, aes(x = as.factor(estimates), y = means, color = estimates))+
geom_point()+
geom_errorbar(aes(ymin = LL, ymax = UL), width=.1, position = position_dodge(0.1))+
geom_segment(x = rep(c(.6, 1.6), 10), xend = rep(c(1.4, 2.4), 10),
y = values, yend = values, col = "black",
linetype = 3)+
scale_x_discrete(labels = c(expression(beta[1]), expression(beta[2])))+
xlab("Beta coefficient type")+ylab("Confidence Interval of Estimate")+
ggtitle("Coefficient Estimates")+
facet_grid(row = vars(treatment), col = vars(group))+
scale_color_manual(name = "Symbols",
values = c("estim1" = "#F8766D", "estim2" = "#00BFC4"),
labels = c(expression(beta[1]),
expression(beta[2])))
scale_shape_manual(values = c("b1" = 16,
"b2" = 16)+
scale_linetype_manual(values = c("b1" = 1,
"b2" = 1))
The second set of code, does not work but is my best attempt as to what maybe I should do to try to get the dashed black line in the legend.
ggplot(data = df, aes(x = as.factor(estimates), y = means, color = estimates))+
geom_point()+
geom_errorbar(aes(ymin = LL, ymax = UL), width=.1, position = position_dodge(0.1))+
geom_segment(x = rep(c(.6, 1.6), 10), xend = rep(c(1.4, 2.4), 10),
y = values, yend = values, col = "black",
linetype = 3)+
scale_x_discrete(labels = c(expression(beta[1]), expression(beta[2])))+
xlab("Beta coefficient type")+ylab("Confidence Interval of Estimate")+
ggtitle("Coefficient Estimates")+
facet_grid(row = vars(treatment), col = vars(group))+
scale_color_manual(name = "Symbols",
values = c("estim1" = "#F8766D", "estim2" = "#00BFC4"),
#"" = "#00000"),
labels = c(expression(beta[1]),
expression(beta[2])))#,
#"True Value"))#+
scale_shape_manual(values = c("b1" = 16,
"b2" = 16,
"" = 0))+
scale_linetype_manual(values = c("b1" = 1,
"b2" = 1,
"b3" = 3))
I've also thought that maybe I could include try to relevel the df$estimates column to include three levels (the existing) "estim1", "estim2" and a dummy "True Value" level with no observations but I'm worried that this would just add an empty "True Value" tick to each of my 12 plots on the x-axis sublabels.
Thanks for you help.

Map the linetype of your geom_segment to a string called "True value" inside aes, then add a scale_linetype_manual call. This will create a separate legend entry that matches the appearance of your segment and has the correct label.
ggplot(data = df, aes(x = as.factor(estimates), y = means, color = estimates)) +
geom_point() +
geom_errorbar(aes(ymin = LL, ymax = UL), width=.1,
position = position_dodge(0.1)) +
geom_segment(x = rep(c(.6, 1.6), 10), xend = rep(c(1.4, 2.4), 10),
y = values, yend = values, col = "black",
aes(linetype = "True value")) +
scale_x_discrete(labels = c(expression(beta[1]), expression(beta[2]))) +
xlab("Beta coefficient type")+ylab("Confidence Interval of Estimate") +
ggtitle("Coefficient Estimates") +
facet_grid(row = vars(treatment), col = vars(group)) +
scale_color_manual(name = "Symbols",
values = c("estim1" = "#F8766D", "estim2" = "#00BFC4"),
labels = c(expression(beta[1]),
expression(beta[2]))) +
scale_linetype_manual(values = 3, name = NULL)

Related

Forcing specific plot symbols for points in R ggplot2

I am trying to specify manually, the shape of data points in r ggplot2 but can't seem to get it to work. Below is a sample example
p.est<- c(1.65, 1.55, 0.70, 1.61, 1.25)
lcl<-c(1.25, 1.10, 0.50, 1.20, 1.02)
ucl<-c(2.20, 2.05, 0.90, 2.20, 1.50)
toy.data <- tibble(zc = zc, p.est = p.est, p.lcl = p.lcl, p.ucl = p.ucl)
Assume I want two types of plot symbols for the five points, I use scale_shape_manual() in ggplot2 but it doesn't seem to work. Below is my sample code and the resulting plot attached. I'm trying to modify so the plot symbols for the points correspond to 5 which is a diamond and 16 which is a circle.
ggplot(toy.data, aes(zc, p.est, ymin = p.lcl, ymax = p.ucl)) +
scale_shape_manual(values = c(5, 16, 5, 5, 16))+
geom_pointrange(position = position_dodge(width = 0.1))+
geom_hline(yintercept = 1)+
ylim(0.5, 2.5)
You can add shape = factor(p.est). Otherwise scale_shape doesn't apply to anything.
zc <- c(1,2,3,4,5)
p.est <- c(1.65, 1.55, 0.70, 1.61, 1.25)
p.lcl <-c(1.25, 1.10, 0.50, 1.20, 1.02)
p.ucl <-c(2.20, 2.05, 0.90, 2.20, 1.50)
toy.data <- tibble(zc = zc,
p.est = p.est,
p.lcl = p.lcl,
p.ucl = p.ucl)
ggplot(toy.data, aes(zc, p.est, ymin = p.lcl, ymax = p.ucl, shape = factor(p.est))) +
scale_shape_manual(values = c(5, 16, 5, 5, 16)) +
geom_pointrange(position = position_dodge(width = 0.1)) +
geom_hline(yintercept = 1) +
ylim(0.5, 2.5)
EDITED Follow up question
toy.data <- tibble(zc = zc,
p.est = p.est,
p.est.x = c("A","B","A","A","B"),
p.lcl = p.lcl,
p.ucl = p.ucl)
ggplot(toy.data, aes(zc, p.est, ymin = p.lcl, ymax = p.ucl, shape = p.est.x)) +
scale_shape_manual(values = c(5, 16)) +
geom_pointrange(position = position_dodge(width = 0.1)) +
geom_hline(yintercept = 1) +
ylim(0.5, 2.5)

Wrong color for 0 in gradient using ggplot2 in R

I'm trying to make a ggplot2 plot with the colors of points and segments mapped to a continuous variable (time, in this case). The points show up with colors I'd expect based on the gradient and the time mapped to that color, but the segment for the first point, when time = 0, does not. Here's an example:
Oxy <- data.frame(Time = c(0, 0.5, 1, 1.5, 2, 4, 8, 12),
DrugConc = c(0, 8, 12, 13, 10, 7.5, 5, 2.5),
Pupil = c(0, -0.04, -0.1, -0.25, -0.23, -0.2, -0.15, -0.08))
for(j in 1:(nrow(Oxy)-1)){
Oxy$Xstart[j] <- Oxy$Pupil[j]
Oxy$Xend[j] <- Oxy$Pupil[j+1]
Oxy$Ystart[j] <- Oxy$DrugConc[j]
Oxy$Yend[j] <- Oxy$DrugConc[j+1]
}
ggplot(Oxy, aes(x = Pupil, y = DrugConc, color = Time)) +
geom_point() +
geom_segment(data = Oxy,
aes(x = Xstart, xend = Xend, y = Ystart, yend = Yend),
arrow = arrow(length = unit(8, "points"), type = "open")) +
xlab("Percent change in pupil diameter") +
ylab("Oxycodone concentration (ng/mL)")
This results in this graph:
The first segment should be dark blue, not light blue, just like the first point. Am I missing something?
The 8th row of Oxy basically overplots the first row. I visualized this by changing Time to a factor and also adding size as an aesthetic so we can easily see what ggplot() is doing.
library(ggplot2)
Oxy <- data.frame(Time = c(0, 0.5, 1, 1.5, 2, 4, 8, 12),
DrugConc = c(0, 8, 12, 13, 10, 7.5, 5, 2.5),
Pupil = c(0, -0.04, -0.1, -0.25, -0.23, -0.2, -0.15, -0.08))
for(j in 1:(nrow(Oxy)-1)){
Oxy$Xstart[j] <- Oxy$Pupil[j]
Oxy$Xend[j] <- Oxy$Pupil[j+1]
Oxy$Ystart[j] <- Oxy$DrugConc[j]
Oxy$Yend[j] <- Oxy$DrugConc[j+1]
}
#Plot just the first four row segments
ggplot(Oxy[1:4,], aes(x = Pupil, y = DrugConc, colour = factor(Time), size = factor(Time))) +
geom_point() +
geom_segment(aes(x = Xstart, xend = Xend, y = Ystart, yend = Yend),
arrow = arrow(length = unit(8, "points"), type = "open")) +
scale_colour_brewer(type = "div")
#> Warning: Using size for a discrete variable is not advised.
#plot rows 5 - 8
ggplot(Oxy[5:8,], aes(x = Pupil, y = DrugConc, colour = factor(Time), size = factor(Time))) +
geom_point() +
geom_segment(aes(x = Xstart, xend = Xend, y = Ystart, yend = Yend),
arrow = arrow(length = unit(8, "points"), type = "open")) +
scale_colour_brewer(type = "div")
#> Warning: Using size for a discrete variable is not advised.
Created on 2019-01-15 by the reprex package (v0.2.1)
In short - there's likely a bug in your input data, ggplot() is doing what it's supposed to do.

Repel geom label and text in ggplot. And ordering geom points based on size

I have 2 data frames such as these:
df1 <- data.frame(
party = c("Blue Party", "Red Party"),
dim1 = c(0.03, -0.04),
dim2 = c(-0.05, 0.02),
sz = c(34, 42)
)
df2 <- data.frame(
var = c("Economic", "Gov trust", "Inst trust", "Nationalism", "Religiosity"),
dim1 = c(0.1, -0.5, 0, 0.6, 0.4),
dim2 = c(0.1, 0.6, 0, 0, 0.3)
)
I want to plot the parties from df1 as points defined by size and include arrows based on df2 on the same graph. I've used ggplot to do this:
ggplot(df1, aes(x = dim1, y = dim2, color = party)) +
geom_point(size = df1$sz) +
scale_size_area() +
scale_x_continuous(limits = c(-1.5, 1.5)) +
scale_y_continuous(limits = c(-1.5, 1.5)) +
geom_label_repel(aes(label = party),
box.padding = 1,
point.padding = 1.5,
force = 1) +
geom_segment(aes(xend=0, yend=0, x=dim1, y=dim2), data=df2,
arrow=arrow(length=unit(0.20,"cm"), ends="first", type = "closed"), color="black") +
geom_text_repel(aes(x=dim1, y=dim2, label=var),
data = df2, color = "black", size = 3, force = 1)
Resulting in this:
The functions geom_label_repel and geom_text_repel prevent the party labels and the texts from overlapping, but how can I repel the labels and texts from each other?
My second problem is that I want to order the points, with the smallest in the front and the largest at the back. How could this be done?
Appreciate the help!

ggplot background in three different colours with geom_rect not working - with data and all code

Reproducible data:
df <- data.frame(cbind("Thriving" = c(2, 2, NA, runif(9, 2.0, 5.0)), "Performance" = c(2, 3.5, 2.3, 4.2, NA, runif(7, 1.9, 6.9)), "Mastery_Climate" = c(runif(10, 2.2, 6.5), NA, 2.3), "Competitive_Climate" = c(NA, runif(4, 1.0, 3.6), NA, NA, runif(5, 1.5, 2.8)), "Collaboration" = c(runif(8, 2.2, 7.0), NA, NA, 5.5, 2.1)))
With this data I want to create bloxplots using the following command with the packages ggplot2 and tidyr:
df %>%
gather(key = "variable", value = "value") -> n
n$variable <- factor(n$variable, levels = c("Thriving", "Performance", "Mastery_Climate", "Competitive_Climate", "Collaboration"))
ggplot(data = n, aes(y = value, x = variable)) + stat_summary(fun.data = min.mean.sd.max, geom = "boxplot", col = "#323232", fill = "#EFC76C") +
coord_flip() + scale_y_continuous(breaks = c(1, 2, 3, 4, 5, 6, 7)) +
expand_limits(y = c(1, 7)) +
labs(x = "", y = "") +
theme(text = element_text(size = 12), panel.background = element_rect(fill = "#EAEDED")) +
theme(plot.margin=unit(c(0, 2, 0, 1.8),"cm"))
The function used in stat_summary is as follows:
min.mean.sd.max <- function(x) {
r <- c(min(x), mean(x) - sd(x), mean(x), mean(x) + sd(x), max(x))
names(r) <- c("ymin", "lower", "middle", "upper", "ymax")
r
}
Now, HERE IT COMES: everything works beautifully, however, now I would like to colour the background in three different colours, green, yellow and red. I know that I have to use geom_rect for that. However, in order to have the boxplots in the foreground, I need to pass the geom_rect argument first - but this breaks my code:
df %>%
gather(key = "variable", value = "value") -> n
n$variable <- factor(n$variable, levels = c("Thriving", "Performance", "Mastery_Climate", "Competitive_Climate", "Collaboration"))
ggplot(data = n, aes(y = value, x = variable)) +
geom_rect(aes(xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = 3, fill = "green"), alpha = .01) +
geom_rect(aes(xmin = -Inf, xmax = Inf, ymin = 3, ymax = 5, fill = "yellow"), alpha = .01) +
geom_rect(aes(xmin = -Inf, xmax = Inf, ymin = 5, ymax = Inf, fill = "red"), alpha = .01) +
stat_summary(fun.data = min.mean.sd.max, geom = "boxplot", col = "#323232", fill = "#EFC76C") +
coord_flip() + scale_y_continuous(breaks = c(1, 2, 3, 4, 5, 6, 7)) +
expand_limits(y = c(1, 7)) +
labs(x = "", y = "") +
theme(text = element_text(size = 12), panel.background = element_rect(fill = "#EAEDED")) +
theme(plot.margin=unit(c(0, 2, 0, 1.8),"cm"))
As you can see, I get the error "Error: Discrete value supplied to continuous scale". From research I understand that this is because I needed to change the sequence and it now is a problem that the x-variable is a factor. However, I have been unable to solve this.
It would be great if all the other code could stay the same, it took me ages to put it together. Also, once the boxplots are in the foreground, it would be great if the grid behind would still be visible. Furthermore, I was so confused by the fill in geom_rect, I put in "green" and I get pink, or I put "yellow" and get blue - no clue why.
In any case, any help is very much appreciated. Many greetings!
Edit: Updated answer with better shading control
I think this approach is more idiomatic to ggplot: this puts the shading into a separate table with numeric y values. In a modified ggplot call, all the y values are mapped as numeric values, but the labels for those values are swapped out in the scale_y_continuous line.
rects <- data.frame(xmin = -Inf,
xmax = Inf,
ymin = c(0,3,5),
ymax = c(3,5,Inf),
fill = c("green", "yellow", "red"))
ggplot(data = n, aes(y = value, x = as.numeric(variable))) +
geom_rect(data = rects, aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax, fill = fill),
# Control the shading opacity here.
inherit.aes = FALSE, alpha = 0.15) +
stat_summary(fun.data = min.mean.sd.max, geom = "boxplot", col = "#323232", fill = "#EFC76C") +
scale_fill_identity() +
scale_x_continuous(breaks = as.numeric(unique(n$variable)), minor_breaks = NULL,
labels = unique(n$variable)) +
scale_y_continuous(breaks = c(1, 2, 3, 4, 5, 6, 7)) +
expand_limits(y = c(1, 7)) +
coord_flip() +
labs(x = "", y = "") +
theme(text = element_text(size = 12), panel.background = element_rect(fill = "#EAEDED")) +
theme(plot.margin=unit(c(0, 2, 0, 1.8),"cm"))
Original answer
geom_rect's coordinates should be pulled outside of the aes() call, and then I get a working solution. However, one problem with this approach is that the background rectangles are actually getting drawn once for each element in the source data, which is why the colors are so bright even with alpha = 0.01.
ggplot(data = n, aes(y = value, x = variable)) +
geom_rect(xmin = -Inf, xmax = Inf, ymin = 0, ymax = 3, fill = "green", alpha = .005) +
geom_rect(xmin = -Inf, xmax = Inf, ymin = 3, ymax = 5, fill = "yellow", alpha = .005) +
geom_rect(xmin = -Inf, xmax = Inf, ymin = 5, ymax = 7, fill = "red", alpha = .005) +
stat_summary(fun.data = min.mean.sd.max, geom = "boxplot", col = "#323232", fill = "#EFC76C") +
coord_flip() + scale_y_continuous(breaks = c(1, 2, 3, 4, 5, 6, 7)) +
expand_limits(y = c(1, 7)) +
labs(x = "", y = "") +
theme(text = element_text(size = 12), panel.background = element_rect(fill = "#EAEDED")) +
theme(plot.margin=unit(c(0, 2, 0, 1.8),"cm"))

Custom graphics in R

I am trying to add the below "graphic" to a chart I am doing in R.
I could easily do the graphic in a graphics application and then 'glue' it together with the R graph. However, it could be cool to make everything in R since it has the caveat that the position of the black arrow depends on calculated number. In the below case 6.8.
Any suggestions on how I could trick R to produce something like this?
This is a start for a function in base graphics:
draw <- function(x){
plot(NA, xlim=c(0,7), ylim=c(-.3,1), xaxt="n", yaxt="n", xlab="", ylab="")
lines(x=c(0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,0),
y=rep(c(0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0,0)))
lines(c(0,7),c(1,1))
for(i in 1:7) text(x = i-0.5, y= 0.5, labels=i)
arrows(0, 1.5, 7, 1.5, code=3)
polygon(x -1 +c(-.1, 0, .1),c(-.3,-0.05,-.3), col="black")
}
draw(4)
draw(3)
If you are interested in a base graphics function, you will probably want to make the width:heigth ratio fixed an change my polygon-triangle into a proper arrow, add some more text and things, but this should get you started:
Here's the ggplot2 solution:
df_nums <- data.frame(number <- 1:7,
fill <- c(rep("white", 5), "darkblue", "white"),
color <- c(rep("black", 5), "white", "black"))
df_text <- data.frame(label = c("Lower Risk", "Higher Risk", "Typically Lower Rewards",
"Typically Higher Rewards"),
hjust = c(0, 1, 0, 1),
x = c(0, 7, 0, 7),
y = c(2.9, 2.9, 2.1, 2.1))
arrow_x_pos <- 6.8 # position of arrow
p1 <- ggplot(df_nums) +
geom_tile(aes(x = number - .5, y = 1, fill = fill), size = 1, color = "black") +
geom_text(aes(x = number - .5, y = 1, color = color, label = number), size = 8) +
scale_color_identity(guide = "none") + scale_fill_identity(guide = "none") +
geom_text(data = df_text, aes(x = x, y = y, label = label, hjust = hjust), size = 5.5,
fontface = "bold") +
geom_text(aes(label = "Risk and Reward Profile", x = 0, y = 3.5),
fontface = "bold", size = 6.5, hjust = 0) +
geom_segment(x = 0, xend = 7, y = 2.5, yend = 2.5, size = 1,
arrow = arrow(length = unit(10,"pt"), ends = "both"),
color = "grey70") +
geom_segment(x = arrow_x_pos - 1, xend = arrow_x_pos - 1, y = -.3, yend = .2, size = 4,
arrow = arrow(length = unit(7, "pt"), type = "closed"),
lineend = "butt", linejoin = "mitre") +
ylim(-.2, 3.6) +
coord_fixed() +
theme_void()
p1

Resources