Add legend for various stat_ellipse - r

I have a plot with various data ellipses (stat_ellipse) such that I have varied the alpha level. How can I add a legend to the plot to correspond to the various ellipses (with color and alpha level)?
library(tidyverse)
library(RColorBrewer)
colors <- brewer.pal(n = 8, name = "Blues")
ggplot(faithful, aes(waiting, eruptions)) +
geom_point(alpha=0.1, color='navy') +
stat_ellipse(level=0.1, color=colors[1], size=1.1) +
stat_ellipse(level=0.2, color=colors[2], size=1.1) +
stat_ellipse(level=0.3, color=colors[3], size=1.1) +
stat_ellipse(level=0.4, color=colors[4], size=1.1) +
stat_ellipse(level=0.5, color=colors[5], size=1.1) +
stat_ellipse(level=0.6, color=colors[6], size=1.1) +
stat_ellipse(level=0.7, color=colors[7], size=1.1) +
stat_ellipse(level=0.8, color=colors[8], size=1.1) +
stat_ellipse(level=0.9, color='navy', size=1.1)

I don't know whether this is possible with stat_ellipse. Here is a solution. It consists in computing the ellipses with the car package and then using geom_path.
library(car)
levels <- seq(0.1, 0.9, by = 0.2)
ell <- dataEllipse(faithful$waiting, faithful$eruptions, draw = FALSE,
levels = levels, segments = 200)
library(purrr)
paths_list <- imap(ell, function(mat, level){
cbind(mat, level=as.numeric(level))
})
paths <- as.data.frame(do.call(rbind, paths_list))
paths$level <- as.factor(paths$level)
ggplot(faithful, aes(waiting, eruptions)) +
geom_point(alpha=0.5, color='navy') +
geom_path(aes(x=x, y=y, group=level, color=level), data=paths, size = 2)

Related

Adding custom legend after theme(legend.title=element_blank(), legend.position='none')

I have the following data:
set.seed(100)
vals <- rnorm(100)
groups <- c(rep('group a', 30), rep('group b', 70))
df <- data.frame('vals'=vals, 'groups'=groups)
I plot the distributions of vals within groups like this:
ggplot(df, aes(y=vals, x=groups, fill=groups)) +
geom_boxplot() +
theme_minimal() +
scale_fill_brewer(palette = "Set3") +
geom_hline(yintercept=0.5, color='red', lty='dashed', size=1) +
geom_hline(yintercept=-0.5, color='blue', lty='dashed', size=1) +
theme(legend.title=element_blank(), legend.position='none')
This produces the following picture.
I would like to include a legend for the blue and red horizontal lines but not for the boxplots. How do I do that?
You could use the aes of linetype for each geom_hline with scale_linetype_manual and say that the boxplot should not be shown like this:
set.seed(100)
vals <- rnorm(100)
groups <- c(rep('group a', 30), rep('group b', 70))
df <- data.frame('vals'=vals, 'groups'=groups)
library(ggplot2)
ggplot(df, aes(y=vals, x=groups, fill=groups)) +
geom_boxplot(show.legend = FALSE) +
theme_minimal() +
scale_fill_brewer(palette = "Set3") +
geom_hline(aes(yintercept=0.5, lty='red'), color='red', size=1) +
geom_hline(aes(yintercept=-0.5, lty='blue'), color='blue', size=1) +
scale_linetype_manual(name = "Legend", values = c(2, 2),
guide = guide_legend(override.aes = list(color = c("red", "blue"))))
Created on 2023-01-16 with reprex v2.0.2

Can anyone explain why creating a histogram with two conditions shows incorrect distribution in R?

I want to create a histogram with data from two different conditions (A and B in the example below). I want to plot both distributions in the same plot using geom_histogram in R.
However, it seems that for condition A, the distribution of the whole data set is shown (instead of only A).
In the example below, three cases are shown:
Plotting A and B
Plotting only A
Plotting only B
You will see that the distribution of A is not the same when you compare 1) and 2).
Can anyone explain why this occurs and how to fix this problem?
set.seed(5)
# Create test data frame
test <- data.frame(
condition=factor(rep(c("A", "B"), each=200)),
value =c(rnorm(200, mean=12, sd=2.5), rnorm(200, mean=13, sd=2.1))
)
# Create separate data sets
test_a <- test[test$condition == "A",]
test_b <- test[test$condition == "B",]
# 1) Plot A and B
ggplot(test, aes(x=value, fill=condition)) +
geom_histogram(binwidth = 0.25, alpha=.5) +
ggtitle("Test A and AB")
# 2) Plot only A
ggplot(test_a, aes(x=value, fill=condition)) +
geom_histogram(binwidth = 0.25, alpha=.5) +
ggtitle("Test A")
# 3) Plot only B
ggplot(test_b, aes(x=value, fill=condition)) +
geom_histogram(binwidth = 0.25, alpha=.5) +
ggtitle("Test B")
An alternative for visualization, not to supplant MichaelDewar's answer:
ggab <- ggplot(test, aes(x=value, fill=condition)) +
geom_histogram(binwidth = 0.25, alpha=.5, position = "identity") +
ggtitle("Test A and AB") +
xlim(5, 20) +
ylim(0, 13)
# 2) Plot only A
gga <- ggplot(test_a, aes(x=value, fill=condition)) +
geom_histogram(binwidth = 0.25, alpha=.5) +
ggtitle("Test A") +
xlim(5, 20) +
ylim(0, 13)
# 3) Plot only B
ggb <- ggplot(test_b, aes(x=value, fill=condition)) +
geom_histogram(binwidth = 0.25, alpha=.5) +
ggtitle("Test B") +
xlim(5, 20) +
ylim(0, 13)
library(patchwork) # solely for a quick side-by-side-by-side presentation
gga + ggab + ggb + plot_annotation(title = 'position = "identity"')
The key in this visualization is adding position="identity" to the first hist (the others do not need it).
Alternatively, one could use position="dodge" (this is best viewed on the console, it's a bit difficult on this small snapshot).
And for perspective, position = "stack", the default, showing "A" with a demonstrably altered histogram.
The plots are stacked in the A+B plot. So the A bars start at the top of the B bars. Also, the scaling on the axes are different. It's also possible that the bins have different endpoints.
So, yes, the A+B plot is showing the total distribution. The fill helps you see the contribution from each of the A and B.
If you want to overlay the two plots, use:
ggplot(mapping = aes(x=value, fill=condition)) +
geom_histogram(data = test_a, binwidth = 0.25, alpha=.5) +
geom_histogram(data = test_b, binwidth = 0.25, alpha=.5) +
ggtitle("Test A and AB")

Displaying the number of counts per bin in a ggplot2 hexbin graph

I'm working on a project to simulate the movement of missing ships. I've made a distribution map using this code:
library("ggplot2")
a <- rnorm(1000, 30.2, 2)
b <- rnorm(1000, 10, 5)
y <- (x + a + b) * 0.6
df <- data.frame(x,y)
p <- ggplot(df,aes(x=x,y=y)) +
ggtitle("A Priori Map") + xlab("Longtitude") + ylab("Latitude") +
scale_fill_gradientn(colors = topo.colors(10))
p + stat_binhex(show.legend = T, bins = 20)
This produces a map like this:
A hexbin map
However, instead of showing the number of counts using a color, I would like to show the actual count in a point. So if the program 'landed' on a certain point 3 times, it would display '3'.
How can this be done in R?
Here's how to add counts to the existing graph:
library(ggplot2)
theme_set(theme_bw())
set.seed(2)
a <- rnorm(1000, 30.2, 2)
b <- rnorm(1000, 10, 5)
x = rnorm(1000)
y <- (x + a + b) * 0.6
df <- data.frame(x,y)
p <- ggplot(df,aes(x=x,y=y)) +
ggtitle("A Priori Map") +
xlab("Longtitude") + ylab("Latitude") +
scale_fill_gradientn(colors = topo.colors(10)) +
stat_binhex(show.legend = T, bins = 20)
p + geom_text(stat="binhex", bins=20, aes(label=..count..), show.legend=FALSE,
colour=hcl(15,100,60), fontface="bold", size=3.5)
To remove the fill colours, you could do:
ggplot(df,aes(x=x,y=y)) +
ggtitle("A Priori Map") +
xlab("Longtitude") + ylab("Latitude") +
stat_binhex(bins = 20, fill=NA, colour="black") +
geom_text(stat="binhex", bins=20, aes(label=..count..), colour="red")
You could also use text size to highlight the regions of highest density:
ggplot(df,aes(x=x,y=y)) +
ggtitle("A Priori Map") +
xlab("Longtitude") + ylab("Latitude") +
stat_binhex(show.legend = T, bins = 20, fill=NA, colour="grey70") +
geom_text(stat="binhex", bins=20, aes(label=..count.., size=..count..), colour="red") +
scale_size_continuous(range=c(3,6)) +
guides(size=FALSE)
Which also works without the hex-grid:
ggplot(df,aes(x=x,y=y)) +
ggtitle("A Priori Map") +
xlab("Longtitude") + ylab("Latitude") +
geom_text(stat="binhex", bins=20, aes(label=..count.., size=..count..), colour="red") +
scale_size_continuous(range=c(3,6)) +
guides(size=FALSE)

Deleting an entire row of facets of unused factor level combination

I want to remove the 2nd row of facets from my plot below because there is no data for that factor combination.
library(ggplot2)
library(grid)
set.seed(5000)
# generate first df
df1 = data.frame(x=rep(rep(seq(2,8,2),4),6),
y=rep(rep(seq(2,8,2),each=4),6),
v1=c(rep("x1",32),rep("x2",64)),
v2=c(rep("y1",64),rep("y2",32)),
v3=rep(rep(c("t1","t2"),each=16),3),
v4=rbinom(96,1,0.5))
# generate second df
df2 = data.frame(x=runif(20)*10, y=runif(20)*10,
v1=sample(c("x1","x2"),20,T))
# plot
ggplot() +
geom_point(data=df1, aes(x=x, y=y, colour = factor(v4)), shape=15, size=5) +
scale_colour_manual(values = c(NA,"black")) + facet_grid(v1+v2~v3, drop = T) +
geom_point(data=df2, aes(x=x,y=y), shape=23 , colour="black", fill="white", size=4) +
coord_equal(ratio=1) + xlim(0, 10) + ylim(0, 10)
I tried to use the idea from this post..
g=ggplotGrob(y)
pos=which(g$layout$t==5 | g$layout$t==6)
g$layout=g$layout[-c(pos),]
g$grobs=g$grobs[-c(pos)]
grid.newpage()
grid.draw(g)
..but got this.
How do I eliminate the white space? Also, is there a straightforward solution to this, without having to manipulate the grobs, etc?
Just modify the data:
df2 <- rbind(cbind(df2, v2 = "y1"),
cbind(df2, v2 = "y2"))
df2 <- df2[!(df2$v1 == "x1" & df2$v2 == "y2"),]
# plot
ggplot() +
geom_point(data=df1, aes(x=x, y=y, colour = factor(v4)), shape=15, size=5) +
scale_colour_manual(values = c(NA,"black")) + facet_grid(v1+v2~v3, drop = T) +
geom_point(data=df2, aes(x=x,y=y), shape=23 , colour="black", fill="white", size=4) +
coord_equal(ratio=1) + xlim(0, 10) + ylim(0, 10)

Add multiple geom_line to ggplot

The geom_line CUR_MTH_UNEARN_REV_EUR is plotted correctly as a numeric. My goal is to add a second numeric geom_line (i.e., CUR_MTH_EARN_REV_EUR). Here's the code:
library("ggthemes")
library("gridExtra")
library("grid")
p = ggplot(f, aes(DTE_OF_REPORT_EUR, CUR_MTH_UNEARN_REV_EUR, label=(CUR_MTH_UNEARN_REV_EUR)))
+ geom_point(size=ifelse(f$CUR_MTH_UNEARN_REV_EUR<8.0, 11, 5), color=ifelse(f$CUR_MTH_UNEARN_REV_EUR<8.0, '#CC0000', 'black'))
+ geom_line(size=2,aes(group=1)) + geom_rangeframe() + theme_wsj()
+ theme(axis.text.x=element_text(angle=50, size=20, vjust=0.7))
+ geom_smooth(aes(group=1), method="loess", colour = "#CC0000", lwd=2)
+ geom_text(aes(label=CUR_MTH_UNEARN_REV_EUR), hjust=-0.5, vjust=0.5, fontface="bold")
+ ggtitle("Unearned Revenue by Service Code 'BS', in CSG Months, Jul. 2014-Aug. 2015")
+ theme(plot.title = element_text(lineheight=.8, face="bold"))
p
Text1 = textGrob("Source: Revenue Assurance and Quality Control", gp=gpar(fontsize=7))
p2 = p + annotation_custom(grob = Text1, ymin = -0.2, ymax = -30)
p2
format(round(f$CUR_MTH_UNEARN_REV_EUR, 2), nsmall = 2)
f$ScoreRounded <- round(f$CUR_MTH_UNEARN_REV_EUR, 1)
f$DTE_OF_REPORT_EUR <- factor(f$DTE_OF_REPORT_EUR, levels=unique(as.character(f$DTE_OF_REPORT_EUR)))
Hope this helps as a start. You can just add things, but you need to have the correct aes.
#data with x and two y-variables
set.seed(123)
f <- data.frame(x=1:10, var1=sample(7:10,10,T),
var2=sample(5:7,10,T))
#as you want sizing by a measure, make a flag
f$var1_threshold <- f$var1 <9
#example with adding different geoms
#not that it's unnecessary to use my data (f)
#in the call to aes, as everything I need is already
#inside f
p <- ggplot(f, aes(x=x)) +
geom_point(aes(y=var1,size=var1_threshold, color=var1_threshold))+
#colors and size in aes allows for legend generation.
scale_size_manual(values=c("FALSE"=5,"TRUE"=8)) +
scale_color_manual(values=c("FALSE"='#CC0000',"TRUE"='black')) +
geom_line(aes(y=var1),size=1) +
geom_line(aes(y=var2),size=1) +
geom_smooth(aes(y=var1), colour="#CC0000") +
geom_smooth(aes(y=var2), colour="black")

Resources