R Knitr not printing model results to finle - r

I have an R script that is generating a number of plots and well as assessing a few linear models. For some reason when I try to print out the plots and linear model statistics in a loop they don't end up in the file but when I remove the loop they get printed to file.
R/Knitr markup that doesn't work (desired lines to print with # I want this to print):
library('RODBC')
library('ggplot2')
library('dplyr')
library('reshape2')
con <- odbcConnect('yield_model')
sql <- "SELECT DISTINCT [grouping],[group],regionId,class,finalPk,avgArea,layer,runTime,random,name FROM dbo.pk p LEFT OUTER JOIN dred.dbo.yasMap y ON p.class = y.id WHERE random=1"
values <- sqlQuery(con,sql,stringsAsFactors = FALSE)
values$model = "NM"
groupings <- unique(select(values,regionId,class,layer))
groupings <- groupings[order(groupings$layer,groupings$regionId,groupings$class),]
for(i in 1:nrow(groupings)) {
data <- subset(values,regionId == groupings$regionId[i] & class == groupings$class[i] & layer == groupings$layer[i])
layer <- unique(select(data,layer))
region <- unique(select(data,regionId))
defectNo <- unique(select(data,class))
defectName <- unique(select(data,name))
count <- length(unique(data$avgArea))
average <- mean(data$finalPk)
myPlot <- ggplot(data=data,aes(x=avgArea,y=finalPk)) +
geom_point(size=4,color="red") +
ggtitle(paste("Defect=",defectName$name,"(", defectNo$class,"), Region=",region$regionId, ", Layer=", layer$layer,sep="")) +
geom_abline(intercept=average,slop=0,size=1,aes(color="mean"))
if(count > 1) {
myPlot <- myPlot + stat_smooth(method ="lm",formula = y ~ x, se = FALSE,size=1,aes(color="linear"))
}
if (count > 3) {
myPlot <- myPlot + stat_smooth(method = "lm", formula = y ~ log(x), se = FALSE, size=1, aes(color="log"))
myPlot <- myPlot + stat_smooth(method = "lm", formula = y ~ poly(x,2), se = FALSE, size=1, aes(color="poly"))
}
if(count > 1 & count < 4) {
myPlot <- myPlot + scale_colour_manual(name="",values=c("mean" = "orange","linear" = "green"))
} else if(count > 3) {
myPlot <- myPlot + scale_colour_manual(name="",values=c("mean" = "orange","linear" = "green","log" = "red","poly" = "blue"))
} else if (count == 2 | count == 3) {
}
plot(myPlot)
paste("Average Pk=",average,sep="") # I want this to print
if(count > 1) {
linMod <- lm(data$finalPk ~ data$avgArea)
coef(linMod) # I want this to print
summary(linMod) # I want this to print
}
if (count > 3) {
linModLog <- lm(data$finalPk ~ log(data$avgArea))
coef(linModLog) # I want this to print
summary(linModLog) # I want this to print
linModPoly <- lm(data$finalPk ~ poly(data$avgArea,2))
coef(linModPoly) # I want this to print
summary(linModPoly) # I want this to print
}
}
R/Knitr that does print:
library('RODBC')
library('ggplot2')
library('dplyr')
library('reshape2')
con <- odbcConnect('yield_model')
sql <- "SELECT DISTINCT [grouping],[group],regionId,class,finalPk,avgArea,layer,runTime,random,name FROM dbo.pk p LEFT OUTER JOIN dred.dbo.yasMap y ON p.class = y.id WHERE random=1"
values <- sqlQuery(con,sql,stringsAsFactors = FALSE)
values$model = "NM"
groupings <- unique(select(values,regionId,class,layer))
groupings <- groupings[order(groupings$layer,groupings$regionId,groupings$class),]
#for(i in 1:nrow(groupings)) {
i <- 1
data <- subset(values,regionId == groupings$regionId[i] & class == groupings$class[i] & layer == groupings$layer[i])
layer <- unique(select(data,layer))
region <- unique(select(data,regionId))
defectNo <- unique(select(data,class))
defectName <- unique(select(data,name))
count <- length(unique(data$avgArea))
average <- mean(data$finalPk)
myPlot <- ggplot(data=data,aes(x=avgArea,y=finalPk)) +
geom_point(size=4,color="red") +
ggtitle(paste("Defect=",defectName$name,"(", defectNo$class,"), Region=",region$regionId, ", Layer=", layer$layer,sep="")) +
geom_abline(intercept=average,slop=0,size=1,aes(color="mean"))
if(count > 1) {
myPlot <- myPlot + stat_smooth(method ="lm",formula = y ~ x, se = FALSE,size=1,aes(color="linear"))
}
if (count > 3) {
myPlot <- myPlot + stat_smooth(method = "lm", formula = y ~ log(x), se = FALSE, size=1, aes(color="log"))
myPlot <- myPlot + stat_smooth(method = "lm", formula = y ~ poly(x,2), se = FALSE, size=1, aes(color="poly"))
}
if(count > 1 & count < 4) {
myPlot <- myPlot + scale_colour_manual(name="",values=c("mean" = "orange","linear" = "green"))
} else if(count > 3) {
myPlot <- myPlot + scale_colour_manual(name="",values=c("mean" = "orange","linear" = "green","log" = "red","poly" = "blue"))
} else if (count == 2 | count == 3) {
}
plot(myPlot)
paste("Average Pk=",average,sep="") # I want this to print
if(count > 1) {
linMod <- lm(data$finalPk ~ data$avgArea)
coef(linMod) # I want this to print
summary(linMod) # I want this to print
}
if (count > 3) {
linModLog <- lm(data$finalPk ~ log(data$avgArea))
coef(linModLog) # I want this to print
summary(linModLog) # I want this to print
linModPoly <- lm(data$finalPk ~ poly(data$avgArea,2))
coef(linModPoly) # I want this to print
summary(linModPoly) # I want this to print
}
#}
My header values (I also tried with no results portion at all):
{r echo=FALSE,results='asis'}
Hopefully somebody has some advice for me. Appreciate the help!

Given that it's everything after the plot(myPlot) statement which is giving you problems in the loop, I'm thinking the answer here might apply to you.
Try adding a plot.new call after the plot(myPlot) statement

Related

How to position a common label for multiple plots using gtable in ggplot in R?

I have been attempting to solve this issue for a considerable amount of time with no success. I am creating multiple partial dependence plots (PDPs) and utilising a package called zenplots to lay them out. However, the issue I am having is I cannot figure out a way to have a common legend for the multiple plots. I have tried placing them on a grid and plotting and tried changing the positioning of the grobs... but I cant figure it out. For example:
In the above plot, all PDPs are on the same scale and I would like a single legend. Currently, when I produce the image, it plots a legend for each individual plot. Whereas, what I want is something like the image below (which I made in photoshop):
The code Im providing to produce the plots is somewhat long ( which I hope won't deter people)... but essentially it's only the ggplot part of the code that I need to manipulate. That is, Im creating the actual ggplot on lines 103-105 and more generally between lines 103-125, where I use ggtable to build the plots. For example, changing the color argument on line 115 to: guides(fill = FALSE, color = "colour bar") will create the legend for each plot... setting color = FALSE will remove the legends.
below is the code used to make the plots and it's application on the air quality data:
library(randomForest)
library(ggplot2)
library(dplyr)
pdpLayout <- function(data,
fit,
response,
pal = rev(RColorBrewer::brewer.pal(11, "RdYlBu")),
gridSize = 10,
nmax = 500,
class = 1,
rug = TRUE,
...) {
data <- na.omit(data)
# if (is.numeric(nmax) && nmax < nrow(data)) {
# data <- data[sample(nrow(data), nmax), , drop = FALSE]
# }
gridSize <- min(gridSize, nmax)
predData <- predict(fit, data)
vars <- names(data)
vars <- vars[-match(response, vars)]
datap <- data[,vars]
zpath <- 1:length(vars)
zdata <- datap
zpairs <- t(sapply(1:(length(zpath)-1), function(i){
z <- zpath[i:(i+1)]
if (i %% 2 == 0) rev(z) else z
}))
zpairs <- cbind(vars[zpairs[, 1]], vars[zpairs[, 2]])
# loop through vars and create a list of pdps for each pair
pdplist <- vector("list", nrow(zpairs))
for (i in 1:nrow(zpairs)) {
ind <- zpairs[i, ]
if (!is.na(ind[1])) {
px <- pdp_data(data, ind, gridsize = gridSize)
px$.pid <- i
pdplist[[i]] <- px
} else {
pdplist[[i]] <- NULL
}
}
pdplist <- bind_rows(pdplist)
pdplist$fit <- predict(fit, pdplist)
pdplist <- split(pdplist, pdplist$.pid)
pdplist0 <- vector("list", nrow(zpairs))
j <- 1
for (i in 1:nrow(zpairs)) {
ind <- zpairs[i, ]
if (!is.na(ind[1])) {
pdplist0[[i]] <- pdplist[[j]] %>%
group_by(.data[[ind[1]]], .data[[ind[2]]]) %>%
summarise(fit = mean(fit))
j <- j + 1
} else {
pdplist0[[i]] <- NULL
}
}
pdplist <- pdplist0
pdplist0 <- NULL
names(pdplist) <- paste(zpairs[, 2], zpairs[, 1], sep = "pp")
message("Finished ice/pdp")
# Set limits for pairs
pdplist0 <- pdplist[!sapply(pdplist, is.null)]
r <- range(sapply(pdplist0, function(x) range(x$fit)))
limits <- range(labeling::rpretty(r[1], r[2]))
# Zenplot graphing function
data$pred <- predData
z2index <- 0
pdpnn <- function(zargs) {
z2index <<- z2index + 1
vars <- zpairs[z2index, ]
pdp <- pdplist[[z2index]]
if (!is.null(pdp)) {
if (is.factor(pdp[[vars[1]]]) + is.factor(pdp[[vars[2]]]) == 1) {
if (is.factor(pdp[[vars[1]]])) vars <- rev(vars)
p <- ggplot(data = pdp, aes(x = .data[[vars[1]]], y = fit, color = .data[[vars[2]]])) +
geom_line() +
geom_rug(data = data, sides = "b", aes(y = .data[["pred"]]))
} else {
if (is.factor(pdp[[vars[1]]])) posx <- "jitter" else posx <- "identity"
if (is.factor(pdp[[vars[2]]])) posy <- "jitter" else posy <- "identity"
p <- ggplot(data = pdp, aes(x = .data[[vars[1]]], y = .data[[vars[2]]])) +
geom_tile(aes(fill = fit)) +
scale_fill_gradientn(name = "y-hat", colors = pal, limits = limits, oob = scales::squish)
if (rug) {
p <- p +
geom_rug(data = data, sides = "b", position = posx, aes(color = .data[["pred"]])) +
geom_rug(data = data, sides = "l", position = posy, aes(color = .data[["pred"]])) +
scale_color_gradientn(name = "y-hat", colors = pal, limits = limits, oob = scales::squish)
}
}
p <- p +
guides(fill = FALSE, color = FALSE) +
theme_bw() +
theme(
axis.line = element_blank(),
axis.ticks = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.border = element_rect(colour = "gray", fill = NA, size = 1.5)
)
} else {
p <- ggplot() +
theme(panel.background = element_blank())
}
ggplot_gtable(ggplot_build(p))
}
suppressMessages({
zenplots::zenplot(zdata,
pkg = "grid", labs = list(group = NULL),
plot2d = pdpnn, ...
)
})
}
pdp_data <- function(d, var, gridsize = 30) {
if (length(var) == 1) {
pdpvar <- d[[var]]
if (is.factor(pdpvar)) {
gridvals <- levels(pdpvar)
} else {
gridvals <- seq(min(pdpvar, na.rm = T), max(pdpvar, na.rm = T), length.out = gridsize)
}
dnew <- do.call(rbind, lapply(gridvals, function(i) {
d1 <- d
d1[[var]] <- i
d1
}))
if (is.factor(pdpvar)) dnew[[var]] <- factor(dnew[[var]], levels = levels(pdpvar), ordered = is.ordered(pdpvar))
}
else {
pdpvar1 <- d[[var[1]]]
pdpvar2 <- d[[var[2]]]
if (is.factor(pdpvar1)) {
gridvals1 <- levels(pdpvar1)
} else {
gridvals1 <- seq(min(pdpvar1, na.rm = T), max(pdpvar1, na.rm = T), length.out = gridsize)
}
if (is.factor(pdpvar2)) {
gridvals2 <- levels(pdpvar2)
} else {
gridvals2 <- seq(min(pdpvar2, na.rm = T), max(pdpvar2, na.rm = T), length.out = gridsize)
}
gridvals <- expand.grid(gridvals1, gridvals2)
dnew <- do.call(rbind, lapply(1:nrow(gridvals), function(i) {
d1 <- d
d1[[var[1]]] <- gridvals[i, 1]
d1[[var[2]]] <- gridvals[i, 2]
d1
}))
if (is.factor(pdpvar1)) dnew[[var[1]]] <- factor(dnew[[var[1]]], levels = levels(pdpvar1), ordered = is.ordered(pdpvar1))
if (is.factor(pdpvar2)) dnew[[var[2]]] <- factor(dnew[[var[2]]], levels = levels(pdpvar2), ordered = is.ordered(pdpvar2))
}
dnew$.id <- 1:nrow(d)
rownames(dnew) <- NULL
dnew
}
Now use some data to create the plots:
aq <- na.omit(airquality)
rf <- randomForest(Ozone~., data = aq)
pdpLayout(aq, rf, "Ozone")
Any help or suggestions is greatly appreciated.

Add a Passing-Bablok regression line

I have to perform many comparisons between different measurement methods and I have to use the Passing-Bablok regression approach.
I would like to take advantage of ggplot2 and faceting, but I don't know how to add a geom_smooth layer based on the Passing-Bablok regression.
I was thinking about something like: https://stackoverflow.com/a/59173260/2096356
Furthermore, I would also need to show the regression line equation, with confidence interval for intercept and slope parameters, in each plot.
Edit with partial solution
I've found a partial solution combining the code provided in this post and in this answer.
## Regression algorithm
passing_bablok.fit <- function(x, y) {
x_name <- deparse(substitute(x))
lx <- length(x)
l <- lx*(lx - 1)/2
k <- 0
S <- rep(NA, lx)
for (i in 1:(lx - 1)) {
for (j in (i + 1):lx) {
k <- k + 1
S[k] <- (y[i] - y[j])/(x[i] - x[j])
}
}
S.sort <- sort(S)
N <- length(S.sort)
neg <- length(subset(S.sort,S.sort < 0))
K <- floor(neg/2)
if (N %% 2 == 1) {
b <- S.sort[(N+1)/2+K]
} else {
b <- sqrt(S.sort[N / 2 + K]*S.sort[N / 2 + K + 1])
}
a <- median(y - b * x)
res <- as.vector(c(a,b))
names(res) <- c("(Intercept)", x_name)
class(res) <- "Passing_Bablok"
res
}
## Computing confidence intervals
passing_bablok <- function(formula, data, R = 100, weights = NULL){
ret <- boot::boot(
data = model.frame(formula, data),
statistic = function(data, ind) {
data <- data[ind, ]
args <- rlang::parse_exprs(colnames(data))
names(args) <- c("y", "x")
rlang::eval_tidy(rlang::expr(passing_bablok.fit(!!!args)), data, env = rlang::current_env())
},
R=R
)
class(ret) <- c("Passing_Bablok", class(ret))
ret
}
## Plotting confidence bands
predictdf.Passing_Bablok <- function(model, xseq, se, level) {
pred <- as.vector(tcrossprod(model$t0, cbind(1, xseq)))
if(se) {
preds <- tcrossprod(model$t, cbind(1, xseq))
data.frame(
x = xseq,
y = pred,
ymin = apply(preds, 2, function(x) quantile(x, probs = (1-level)/2)),
ymax = apply(preds, 2, function(x) quantile(x, probs = 1-((1-level)/2)))
)
} else {
return(data.frame(x = xseq, y = pred))
}
}
An example of usage:
z <- data.frame(x = rnorm(100, mean = 100, sd = 5),
y = rnorm(100, mean = 110, sd = 8))
ggplot(z, aes(x, y)) +
geom_point() +
geom_smooth(method = passing_bablok) +
geom_abline(slope = 1, intercept = 0)
So far, I haven't been able to show the regression line equation, with confidence interval for intercept and slope parameters (as +- or in parentheses).
You've arguably done with difficult part with the PaBa regression.
Here's a basic solution using your passing_bablok.fit function:
z <- data.frame(x = 101:200+rnorm(100,sd=10),
y = 101:200+rnorm(100,sd=8))
mycoefs <- as.numeric(passing_bablok.fit(x = z$x, y=z$y))
paba_eqn <- function(thecoefs) {
l <- list(m = format(thecoefs[2], digits = 2),
b = format(abs(thecoefs[1]), digits = 2))
if(thecoefs[1] >= 0){
eq <- substitute(italic(y) == m %.% italic(x) + b,l)
} else {
eq <- substitute(italic(y) == m %.% italic(x) - b,l)
}
as.character(as.expression(eq))
}
library(ggplot2)
ggplot(z, aes(x, y)) +
geom_point() +
geom_smooth(method = passing_bablok) +
geom_abline(slope = 1, intercept = 0) +
annotate("text",x = 110, y = 220, label = paba_eqn(mycoefs), parse = TRUE)
Note the equation will vary because of rnorm in the data creation..
The solution could definitely be made more slick and robust, but it works for both positive and negative intercepts.
Equation concept sourced from: https://stackoverflow.com/a/13451587/2651663

ggplot2 making a line graph that records constantly changing variables

I have a graph and code like the following:
library(igraph)
g <- make_empty_graph (2) %>%
set_vertex_attr("a", value = 1) %>%
add_vertices(2, color = 2, "a" = 2) %>%
add_vertices(2, color = 4, "a" = 3) %>%
add_edges(c(1,2, 2,1, 1,5, 5,1, 1,4 ,4,1)) %>%
set_vertex_attr("xyz", value = 3)
i <- 1
repeat {
prev_value <- mean(V(g)$a == 1)
print(V(g)$a)
print(mean(V(g)$a == 1))
print(i)
V(g)$xyz = sapply(V(g), function(x) {
NeighborList = neighbors(g, x)
length(NeighborList[NeighborList$a == 2])
})
V(g)$a[V(g)$xyz == 1] = 2
i <- i + 1
aa <- mean(V(g)$a == 1)
if (aa == prev_value) {
break
}
}
df <- data.frame(time=i, prop=aa)
library(ggplot2)
ggplot(data=df, aes(x=time, y=prop, group=1)) +
geom_line() +
geom_point()
Whenever I try to run this however, it doesn't work. Ideally I would like an output where ggplot2 would plot a line graph that depicts the change in the proportion of nodes with an attribute "aa" value of 1 as the time variable "i" changes.
I am not sure what exactly you want. But if you want to depict the difference in aa for each iteration then you can add the ggplot inside the repeat.
repeat {
prev_value <- mean(V(g)$a == 1)
print(V(g)$a)
print(mean(V(g)$a == 1))
print(i)
V(g)$xyz = sapply(V(g), function(x) {
NeighborList = neighbors(g, x)
length(NeighborList[NeighborList$a == 2])
})
V(g)$a[V(g)$xyz == 1] = 2
i <- i + 1
aa <- mean(V(g)$a == 1)
if(i == 1){
df <- data.frame(time=i, prop=aa)
print( ggplot(data=df, aes(x=time, y=prop, group=1)) +
geom_line() +
geom_point() )}else{
df <- rbind(df,data.frame(time=i, prop=aa))
print( ggplot(data=df, aes(x=time, y=prop, group=1)) +
geom_line() +
geom_point() )}
if (aa == prev_value) {
break
}
}

ggplot2: annotation_custom gives an empty layer

Some test data:
ltd <- data.frame(r = c(rnorm(10), f1 = c(rep("L", 5), rep("H", 5)),
f2 = rep(c("A", "B"), 5))
And a minimal function:
tf <- function(formula = NULL, data = NULL) {
res <- as.character(formula[[2]]) # clean & prep data
fac1 <- as.character(formula[[3]][2])
fac2 <- as.character(formula[[3]][3])
counts <- count(data, vars = c(fac2, fac1)) # get table data ready
colnames(counts) <- c(fac2, fac1, "count")
myt <- tableGrob(counts, show.box = TRUE,
show.rownames = FALSE, show.colnames = TRUE,
show.csep = TRUE, show.rsep = TRUE,
separator = "black")
p <- ggplot()
p <- p + geom_point(data = data,
aes_string(x = fac1, y = res, color = fac2, group = fac2))
p <- p + annotation_custom(myt) # comment out and it works
}
Run it:
require("plyr")
require("gridExtra")
require("ggplot2")
tmp <- tf(formula = r~f1*f2, data = ltd)
print(tmp)
Gives Error in if (nrow(layer_data) == 0) return() : argument is of length zero
If you print the tableGrob it does exist, so I'm not sure what's going on here. If you comment out the annotation_custom it works, and I think I'm following the docs. Thanks. (ggplot2_0.9.3)
Here is solution to your problem: I relocated your data= and aes_string call to the main ggplot call. I don't know why it matters, but now the plot prints without error.
p <- ggplot(data=data, aes_string(x=fac1, y=res, color=fac2, group=fac2)) +
geom_point() +
annotation_custom(myt)

ggplot2 - Shade area above line

I have some data that is constrained below a 1:1 line. I would to demonstrate this on a plot by lightly shading the area ABOVE the line, to draw the attention of the viewer to the area beneath the line.
I'm using qplot to generate the graphs. Quickly, I have;
qplot(x,y)+geom_abline(slope=1)
but for the life of me, can't figure out how to easily shade the above area without plotting a separate object. Is there an easy fix for this?
EDIT
Ok, Joran, here is an example data set:
df=data.frame(x=runif(6,-2,2),y=runif(6,-2,2),
var1=rep(c("A","B"),3),var2=rep(c("C","D"),3))
df_poly=data.frame(x=c(-Inf, Inf, -Inf),y=c(-Inf, Inf, Inf))
and here is the code that I'm using to plot it (I took your advice and have been looking up ggplot()):
ggplot(df,aes(x,y,color=var1))+
facet_wrap(~var2)+
geom_abline(slope=1,intercept=0,lwd=0.5)+
geom_point(size=3)+
scale_color_manual(values=c("red","blue"))+
geom_polygon(data=df_poly,aes(x,y),fill="blue",alpha=0.2)
The error kicked back is: "object 'var1' not found" Something tells me that I'm implementing the argument incorrectly...
Building on #Andrie's answer here is a more (but not completely) general solution that handles shading above or below a given line in most cases.
I did not use the method that #Andrie referenced here since I ran into issues with ggplot's tendency to automatically extend the plot extents when you add points near the edges. Instead, this builds the polygon points manually using Inf and -Inf as needed. A few notes:
The points have to be in the 'correct' order in the data frame, since ggplot plots the polygon in the order that the points appear. So it's not enough to get the vertices of the polygon, they must be ordered (either clockwise or counterclockwise) as well.
This solution assumes that the line you are plotting does not itself cause ggplot to extend the plot range. You'll see in my example that I pick a line to draw by randomly choosing two points in the data and drawing the line through them. If you try to draw a line too far away from the rest of you points, ggplot will automatically alter the plot ranges, and it becomes hard to predict what they will be.
First, here's the function that builds the polygon data frame:
buildPoly <- function(xr, yr, slope = 1, intercept = 0, above = TRUE){
#Assumes ggplot default of expand = c(0.05,0)
xrTru <- xr + 0.05*diff(xr)*c(-1,1)
yrTru <- yr + 0.05*diff(yr)*c(-1,1)
#Find where the line crosses the plot edges
yCross <- (yrTru - intercept) / slope
xCross <- (slope * xrTru) + intercept
#Build polygon by cases
if (above & (slope >= 0)){
rs <- data.frame(x=-Inf,y=Inf)
if (xCross[1] < yrTru[1]){
rs <- rbind(rs,c(-Inf,-Inf),c(yCross[1],-Inf))
}
else{
rs <- rbind(rs,c(-Inf,xCross[1]))
}
if (xCross[2] < yrTru[2]){
rs <- rbind(rs,c(Inf,xCross[2]),c(Inf,Inf))
}
else{
rs <- rbind(rs,c(yCross[2],Inf))
}
}
if (!above & (slope >= 0)){
rs <- data.frame(x= Inf,y= -Inf)
if (xCross[1] > yrTru[1]){
rs <- rbind(rs,c(-Inf,-Inf),c(-Inf,xCross[1]))
}
else{
rs <- rbind(rs,c(yCross[1],-Inf))
}
if (xCross[2] > yrTru[2]){
rs <- rbind(rs,c(yCross[2],Inf),c(Inf,Inf))
}
else{
rs <- rbind(rs,c(Inf,xCross[2]))
}
}
if (above & (slope < 0)){
rs <- data.frame(x=Inf,y=Inf)
if (xCross[1] < yrTru[2]){
rs <- rbind(rs,c(-Inf,Inf),c(-Inf,xCross[1]))
}
else{
rs <- rbind(rs,c(yCross[2],Inf))
}
if (xCross[2] < yrTru[1]){
rs <- rbind(rs,c(yCross[1],-Inf),c(Inf,-Inf))
}
else{
rs <- rbind(rs,c(Inf,xCross[2]))
}
}
if (!above & (slope < 0)){
rs <- data.frame(x= -Inf,y= -Inf)
if (xCross[1] > yrTru[2]){
rs <- rbind(rs,c(-Inf,Inf),c(yCross[2],Inf))
}
else{
rs <- rbind(rs,c(-Inf,xCross[1]))
}
if (xCross[2] > yrTru[1]){
rs <- rbind(rs,c(Inf,xCross[2]),c(Inf,-Inf))
}
else{
rs <- rbind(rs,c(yCross[1],-Inf))
}
}
return(rs)
}
It expects the x and y ranges of your data (as in range()), the slope and intercept of the line you are going to plot, and whether you want to shade above or below the line. Here's the code I used to generate the following four examples:
#Generate some data
dat <- data.frame(x=runif(10),y=runif(10))
#Select two of the points to define the line
pts <- dat[sample(1:nrow(dat),size=2,replace=FALSE),]
#Slope and intercept of line through those points
sl <- diff(pts$y) / diff(pts$x)
int <- pts$y[1] - (sl*pts$x[1])
#Build the polygon
datPoly <- buildPoly(range(dat$x),range(dat$y),
slope=sl,intercept=int,above=FALSE)
#Make the plot
p <- ggplot(dat,aes(x=x,y=y)) +
geom_point() +
geom_abline(slope=sl,intercept = int) +
geom_polygon(data=datPoly,aes(x=x,y=y),alpha=0.2,fill="blue")
print(p)
And here are some examples of the results. If you find any bugs, of course, let me know so that I can update this answer...
EDIT
Updated to illustrate solution using OP's example data:
set.seed(1)
dat <- data.frame(x=runif(6,-2,2),y=runif(6,-2,2),
var1=rep(c("A","B"),3),var2=rep(c("C","D"),3))
#Create polygon data frame
df_poly <- buildPoly(range(dat$x),range(dat$y))
ggplot(data=dat,aes(x,y)) +
facet_wrap(~var2) +
geom_abline(slope=1,intercept=0,lwd=0.5)+
geom_point(aes(colour=var1),size=3) +
scale_color_manual(values=c("red","blue"))+
geom_polygon(data=df_poly,aes(x,y),fill="blue",alpha=0.2)
and this produces the following output:
As far as I know there is no other way other than creating a polygon with alpha-blended fill. For example:
df <- data.frame(x=1, y=1)
df_poly <- data.frame(
x=c(-Inf, Inf, -Inf),
y=c(-Inf, Inf, Inf)
)
ggplot(df, aes(x, y)) +
geom_blank() +
geom_abline(slope=1, intercept=0) +
geom_polygon(data=df_poly, aes(x, y), fill="blue", alpha=0.2) +
One easy way to do this is to use geom_ribbon with the ymax value set to Inf, and the ymin value calculated by stat_function:
library(ggplot2)
myfun <- function(x) x
myfun2 <- function(x) x^2
ggplot() +
geom_function(fun = myfun) +
geom_ribbon(stat = 'function', fun = myfun,
mapping = aes(ymin = after_stat(y), ymax = Inf),
fill = 'lightblue', alpha = 0.5)
ggplot() +
geom_function(fun = myfun2) +
geom_ribbon(stat = 'function', fun = myfun2,
mapping = aes(ymin = after_stat(y), ymax = Inf),
fill = 'lightblue', alpha = 0.5)
Created on 2022-05-26 by the reprex package (v2.0.1)
Based on a minimally modified version of #joran's answer:
library(ggplot2)
library(tidyr)
library(dplyr)
buildPoly <- function(slope, intercept, above, xr, yr){
# By Joran Elias, #joran https://stackoverflow.com/a/6809174/1870254
#Find where the line crosses the plot edges
yCross <- (yr - intercept) / slope
xCross <- (slope * xr) + intercept
#Build polygon by cases
if (above & (slope >= 0)){
rs <- data.frame(x=-Inf,y=Inf)
if (xCross[1] < yr[1]){
rs <- rbind(rs,c(-Inf,-Inf),c(yCross[1],-Inf))
}
else{
rs <- rbind(rs,c(-Inf,xCross[1]))
}
if (xCross[2] < yr[2]){
rs <- rbind(rs,c(Inf,xCross[2]),c(Inf,Inf))
}
else{
rs <- rbind(rs,c(yCross[2],Inf))
}
}
if (!above & (slope >= 0)){
rs <- data.frame(x= Inf,y= -Inf)
if (xCross[1] > yr[1]){
rs <- rbind(rs,c(-Inf,-Inf),c(-Inf,xCross[1]))
}
else{
rs <- rbind(rs,c(yCross[1],-Inf))
}
if (xCross[2] > yr[2]){
rs <- rbind(rs,c(yCross[2],Inf),c(Inf,Inf))
}
else{
rs <- rbind(rs,c(Inf,xCross[2]))
}
}
if (above & (slope < 0)){
rs <- data.frame(x=Inf,y=Inf)
if (xCross[1] < yr[2]){
rs <- rbind(rs,c(-Inf,Inf),c(-Inf,xCross[1]))
}
else{
rs <- rbind(rs,c(yCross[2],Inf))
}
if (xCross[2] < yr[1]){
rs <- rbind(rs,c(yCross[1],-Inf),c(Inf,-Inf))
}
else{
rs <- rbind(rs,c(Inf,xCross[2]))
}
}
if (!above & (slope < 0)){
rs <- data.frame(x= -Inf,y= -Inf)
if (xCross[1] > yr[2]){
rs <- rbind(rs,c(-Inf,Inf),c(yCross[2],Inf))
}
else{
rs <- rbind(rs,c(-Inf,xCross[1]))
}
if (xCross[2] > yr[1]){
rs <- rbind(rs,c(Inf,xCross[2]),c(Inf,-Inf))
}
else{
rs <- rbind(rs,c(yCross[1],-Inf))
}
}
return(rs)
}
you can also extend ggplot like this:
GeomSection <- ggproto("GeomSection", GeomPolygon,
default_aes = list(fill="blue", size=0, alpha=0.2, colour=NA, linetype="dashed"),
required_aes = c("slope", "intercept", "above"),
draw_panel = function(data, panel_params, coord) {
ranges <- coord$backtransform_range(panel_params)
data$group <- seq_len(nrow(data))
data <- data %>% group_by_all %>% do(buildPoly(.$slope, .$intercept, .$above, ranges$x, ranges$y)) %>% unnest
GeomPolygon$draw_panel(data, panel_params, coord)
}
)
geom_section <- function (mapping = NULL, data = NULL, ..., slope, intercept, above,
na.rm = FALSE, show.legend = NA) {
if (missing(mapping) && missing(slope) && missing(intercept) && missing(above)) {
slope <- 1
intercept <- 0
above <- TRUE
}
if (!missing(slope) || !missing(intercept)|| !missing(above)) {
if (missing(slope))
slope <- 1
if (missing(intercept))
intercept <- 0
if (missing(above))
above <- TRUE
data <- data.frame(intercept = intercept, slope = slope, above=above)
mapping <- aes(intercept = intercept, slope = slope, above=above)
show.legend <- FALSE
}
layer(data = data, mapping = mapping, stat = StatIdentity,
geom = GeomSection, position = PositionIdentity, show.legend = show.legend,
inherit.aes = FALSE, params = list(na.rm = na.rm, ...))
}
To be able to use it as easily as a geom_abline:
set.seed(1)
dat <- data.frame(x=runif(6,-2,2),y=runif(6,-2,2),
var1=rep(c("A","B"),3),var2=rep(c("C","D"),3))
ggplot(data=dat,aes(x,y)) +
facet_wrap(~var2) +
geom_abline(slope=1,intercept=0,lwd=0.5)+
geom_point(aes(colour=var1),size=3) +
scale_color_manual(values=c("red","blue"))+
geom_section(slope=1, intercept=0, above=TRUE)
This variant has the additional advantage that it also works with multiple slopes and non-default limit expansions.
ggplot(data=dat,aes(x,y)) +
facet_wrap(~var2) +
geom_abline(slope=1,intercept=0,lwd=0.5)+
geom_point(aes(colour=var1),size=3) +
scale_color_manual(values=c("red","blue"))+
geom_section(data=data.frame(slope=c(-1,1), above=c(FALSE,TRUE), selected=c("selected","selected 2")),
aes(slope=slope, above=above, intercept=0, fill=selected), size=1) +
expand_limits(x=3)

Resources