How can I draw diamonds in R with ggplot2? - r

I am trying to replicate the following picture in R, in particular with ggplot2
I was able to draw the red rss contour lines but I've no idea how to draw a diamond (like the one in the left picture). The "expected Output" should be a way to draw a diamond with a given side length.
EDIT: Here is a short reproducible example to add the diamond randomly inside the following plot:
mlb<- read.table('https://umich.instructure.com/files/330381/download?download_frd=1', as.is=T, header=T)
str(mlb)
fit<-lm(Height~Weight+Age-1, data = as.data.frame(scale(mlb[,4:6])))
points = data.frame(x=c(0,fit$coefficients[1]),y=c(0,fit$coefficients[2]),z=c("(0,0)","OLS Coef"))
Y=scale(mlb$Height)
X = scale(mlb[,c(5,6)])
beta1=seq(-0.556, 1.556, length.out = 100)
beta2=seq(-0.661, 0.3386, length.out = 100)
df <- expand.grid(beta1 = beta1, beta2 = beta2)
b = as.matrix(df)
df$sse <- rep(t(Y)%*%Y,100*100) - 2*b%*%t(X)%*%Y + diag(b%*%t(X)%*%X%*%t(b))
base <- ggplot() +
stat_contour(data=df, aes(beta1, beta2, z = sse),breaks = round(quantile(df$sse, seq(0, 0.2, 0.03)), 0),
size = 0.5,color="darkorchid2",alpha=0.8) +
scale_x_continuous(limits = c(-0.4,1))+
scale_y_continuous(limits = c(-0.55,0.4))+
geom_point(data = points,aes(x,y))+
geom_text(data = points,aes(x,y,label=z),vjust = 2,size=3.5)
base

You can draw shapes with geom_polygon.
library(ggplot2)
df <- data.frame(x = c(1, 0, -1, 0), y = c(0, 1, 0, -1))
ggplot(df) + geom_polygon(aes(x = x, y = y))
If you want to generate the coordinates from a center and a side length, you can transform a base matrix. You can also combine this with an existing plot by supplying the coordinates to the data argument of the geom instead of to ggplot() as shown. Change the sqrt2 scaling if you want the corner-to-center as the argument instead of the side length.
diamond <- function(side_length, center) {
base <- matrix(c(1, 0, 0, 1, -1, 0, 0, -1), nrow = 2) * sqrt(2) / 2
trans <- (base * side_length) + center
as.data.frame(t(trans))
}
ggplot() + geom_polygon(data = diamond(2, c(1, 2)), mapping = aes(x = V1, y = V2))
Here's an example of adding it in to your provided data. Note that I put it before (underneath) the text, and named the arguments to be clear (probably the source of that object coercible by fortify error.
mlb <- read.table("https://umich.instructure.com/files/330381/download?download_frd=1", as.is = T, header = T)
fit <- lm(Height ~ Weight + Age - 1, data = as.data.frame(scale(mlb[, 4:6])))
points <- data.frame(x = c(0, fit$coefficients[1]), y = c(0, fit$coefficients[2]), z = c("(0,0)", "OLS Coef"))
Y <- scale(mlb$Height)
X <- scale(mlb[, c(5, 6)])
beta1 <- seq(-0.556, 1.556, length.out = 100)
beta2 <- seq(-0.661, 0.3386, length.out = 100)
df <- expand.grid(beta1 = beta1, beta2 = beta2)
b <- as.matrix(df)
df$sse <- rep(t(Y) %*% Y, 100 * 100) - 2 * b %*% t(X) %*% Y + diag(b %*% t(X) %*% X %*% t(b))
ggplot(df) +
stat_contour(aes(beta1, beta2, z = sse),
breaks = round(quantile(df$sse, seq(0, 0.2, 0.03)), 0),
size = 0.5, color = "darkorchid2", alpha = 0.8
) +
geom_polygon(data = diamond(0.1, c(0, 0)), mapping = aes(x = V1, y = V2), fill = "cadetblue1") +
scale_x_continuous(limits = c(-0.4, 1)) +
scale_y_continuous(limits = c(-0.55, 0.4)) +
geom_point(data = points, aes(x, y)) +
geom_text(data = points, aes(x, y, label = z), vjust = 2, size = 3.5)
#> Warning: Removed 4215 rows containing non-finite values (stat_contour).
Created on 2018-08-01 by the reprex package (v0.2.0).

Related

plot contours in ggplot

How do I plot contours ?
I have x, y, z. I wish to plot contour lines using V values.
# data
tbl <- tibble(x = runif(n = 1000, min = 0, max = 1),
y = runif(n = 1000, min = 0, max = 1),
V = x^2.5 + y^2)
# plots
ggplot(data = tbl,
aes(x = x,
y = y
z = V)) +
geom_contour_filled(alpha = 0.8, breaks = seq(0, 2, 0.2)) +
theme_bw()
Here is a way, solving the problem with a shameless copy&paste of the franke example in the documentation of geom_contour_filled.
The trick is to use package interp to prepare the data for plotting. In the code below the only change in the instruction to create grid is the data set being binned.
suppressPackageStartupMessages({
library(tidyverse)
library(interp)
})
set.seed(2022)
tbl <- tibble(x = runif(n = 1000, min = 0, max = 1),
y = runif(n = 1000, min = 0, max = 1),
V = x^2.5 + y^2)
grid <- with(tbl, interp::interp(x, y, V))
griddf <- subset(data.frame(x = rep(grid$x, nrow(grid$z)),
y = rep(grid$y, each = ncol(grid$z)),
z = as.numeric(grid$z)),
!is.na(z))
# plots
ggplot(data = griddf,
aes(x = x,
y = y,
z = z)) +
stat_contour_filled(alpha = 0.8, breaks = seq(0, 2, 0.2)) +
theme_bw()
Created on 2022-05-18 by the reprex package (v2.0.1)
Edit
To better control the bins, use either argument bins or argument binwidth instead of breaks. The following code has a bin width of 0.1, doubling the number of bins and now uses geom_contour_filled, like in the question.
ggplot(data = griddf,
aes(x = x,
y = y,
z = z)) +
geom_contour_filled(alpha = 0.8, binwidth = 0.1, show.legend = FALSE) +
theme_bw()
Created on 2022-05-18 by the reprex package (v2.0.1)
geom_contour_filled require binned data.
So your data should be
# data
tbl <- tibble(x = rep(seq(0,1,length.out=100),100),
y = rep(seq(0,1,length.out=100),each=100),
V = x^2.5 + y^2)

Convert plot to ggplot and add horizontal lines until specific points

I have creted the plot below using the base R plot() function but I would like to convert it to ggplot() and also add horizontal lines like in the example picture but until the crossing with the graphs and not a full continuing horizontal line until the end.
# Figure 3.1 & 3.2
# curve(logistic(pnorm(x), a=1, d=0),-3,3,ylab="Probability of x",
# main="Logistic transform of x",xlab="z score units")
# #logistic with a=1.702 is almost the same as pnorm
# curve(logistic(pnorm(x), d=1),add=TRUE)
# Set x-axis values
theta <- seq(from = -10, to = 10, by = 0.001)
# Code for plot1
B_i <- 1
B_j <- -1
P_item1_rasch <- NULL
P_item2_rasch <- NULL
for (i in 1:length(theta)){
P_item1_rasch[i] <- (exp((theta[i]-B_i)))/(1+(exp((theta[i]-B_i))))
P_item2_rasch[i] <- (exp((theta[i]-B_j)))/(1+(exp((theta[i]-B_j))))
}
#select the colors that will be used
library(RColorBrewer)
#all palette available from RColorBrewer
display.brewer.all()
#we will select the first 4 colors in the Set1 palette
cols<-brewer.pal(n=4,name="Set1")
#cols contain the names of four different colors
plot(theta, P_item1_rasch, xlim=c(-4,4), ylim=c(0,1))
lines(theta, P_item2_rasch,col=cols[2])
# Add lines at the values below, but only half as in the example Figures
# abline(h=0.5)
# abline(v=-1)
# abline(v=1)
Perhaps something like this?
theta <- seq(from = -10, to = 10, by = 0.001)
# Code for plot1
B_i <- 1
B_j <- -1
P_item0_rasch <- NULL
P_item1_rasch <- NULL
P_item2_rasch <- NULL
for (i in 1:length(theta)){
P_item0_rasch[i] <- (exp((theta[i])))/(1+(exp((theta[i]))))
P_item1_rasch[i] <- (exp((theta[i]-B_i)))/(1+(exp((theta[i]-B_i))))
P_item2_rasch[i] <- (exp((theta[i]-B_j)))/(1+(exp((theta[i]-B_j))))
}
df <- data.frame(theta = rep(theta, 3),
P_item_rasch = c(P_item0_rasch, P_item1_rasch, P_item2_rasch),
number = factor(rep(1:3, each = length(theta))))
library(ggplot2)
ggplot(df, aes(theta, P_item_rasch, color = number)) +
geom_line() +
lims(x = c(-6, 6)) +
geom_segment(x = -1, xend = 1, y = 0.5, yend = 0.5, lty = 2) +
geom_vline(xintercept = c(-1, 0, 1), lty = 2) +
scale_color_manual(values = RColorBrewer::brewer.pal(4, "Set1")[-1]) +
theme_classic() +
theme(legend.position = "none")
#> Warning: Removed 24000 row(s) containing missing values (geom_path).
Edit
The OP changed the question to alter the requirements. Here is a way to achieve them:
ggplot(df, aes(theta, P_item_rasch)) +
geom_line(aes(color = number)) +
lims(x = c(-6, 6)) +
# Line between curves
geom_segment(x = -1, xend = 1, y = 0.5, yend = 0.5, lty = 2) +
# Optional line on left
geom_segment(x = -Inf, xend = -1, y = 0.5, yend = 0.5, lty = 2) +
# Lower lines
geom_segment(data = data.frame(theta = c(-1, 0, 1), P_item_rasch = rep(-Inf, 3)),
aes(xend = theta, yend = 0.5), lty = 2) +
# Upper lines
#geom_segment(data = data.frame(theta = c(-1, 0, 1), P_item_rasch = rep(Inf, 3)),
# aes(xend = theta, yend = 0.5), lty = 2) +
scale_color_manual(values = RColorBrewer::brewer.pal(4, "Set1")[-1]) +
theme_classic() +
theme(legend.position = "none")
Created on 2020-12-06 by the reprex package (v0.3.0)

operation between stat_summary_hex plots made in ggplot2

I have two populations A and B distributed spatially with one character Z, I want to be able to make an hexbin substracting the proportion of the character in each hexbin. Here I have the code for two theoretical populations A and B
library(hexbin)
library(ggplot2)
set.seed(2)
xA <- rnorm(1000)
set.seed(3)
yA <- rnorm(1000)
set.seed(4)
zA <- sample(c(1, 0), 20, replace = TRUE, prob = c(0.2, 0.8))
hbinA <- hexbin(xA, yA, xbins = 40, IDs = TRUE)
A <- data.frame(x = xA, y = yA, z = zA)
set.seed(5)
xB <- rnorm(1000)
set.seed(6)
yB <- rnorm(1000)
set.seed(7)
zB <- sample(c(1, 0), 20, replace = TRUE, prob = c(0.4, 0.6))
hbinB <- hexbin(xB, yB, xbins = 40, IDs = TRUE)
B <- data.frame(x = xB, y = yB, z = zB)
ggplot(A, aes(x, y, z = z)) + stat_summary_hex(fun = function(z) sum(z)/length(z), alpha = 0.8) +
scale_fill_gradientn(colours = c("blue","red")) +
guides(alpha = FALSE, size = FALSE)
ggplot(B, aes(x, y, z = z)) + stat_summary_hex(fun = function(z) sum(z)/length(z), alpha = 0.8) +
scale_fill_gradientn (colours = c("blue","red")) +
guides(alpha = FALSE, size = FALSE)
here is the two resulting graphs
My goal is to make a third graph with hexbins with the values of the difference between hexbins at the same coordinates but I don't even know how to start to do it, I have done something similar in the raster Package, but I need it as hexbins
Thanks a lot
You need to make sure that both plots use the exact same binning. In order to achieve this, I think it is best to do the binning beforehand and then plot the results with stat_identity / geom_hex. With the variables from your code sample you ca do:
## find the bounds for the complete data
xbnds <- range(c(A$x, B$x))
ybnds <- range(c(A$y, B$y))
nbins <- 30
# function to make a data.frame for geom_hex that can be used with stat_identity
makeHexData <- function(df) {
h <- hexbin(df$x, df$y, nbins, xbnds = xbnds, ybnds = ybnds, IDs = TRUE)
data.frame(hcell2xy(h),
z = tapply(df$z, h#cID, FUN = function(z) sum(z)/length(z)),
cid = h#cell)
}
Ahex <- makeHexData(A)
Bhex <- makeHexData(B)
## not all cells are present in each binning, we need to merge by cellID
byCell <- merge(Ahex, Bhex, by = "cid", all = T)
## when calculating the difference empty cells should count as 0
byCell$z.x[is.na(byCell$z.x)] <- 0
byCell$z.y[is.na(byCell$z.y)] <- 0
## make a "difference" data.frame
Diff <- data.frame(x = ifelse(is.na(byCell$x.x), byCell$x.y, byCell$x.x),
y = ifelse(is.na(byCell$y.x), byCell$y.y, byCell$y.x),
z = byCell$z.x - byCell$z.y)
## plot the results
ggplot(Ahex) +
geom_hex(aes(x = x, y = y, fill = z),
stat = "identity", alpha = 0.8) +
scale_fill_gradientn (colours = c("blue","red")) +
guides(alpha = FALSE, size = FALSE)
ggplot(Bhex) +
geom_hex(aes(x = x, y = y, fill = z),
stat = "identity", alpha = 0.8) +
scale_fill_gradientn (colours = c("blue","red")) +
guides(alpha = FALSE, size = FALSE)
ggplot(Diff) +
geom_hex(aes(x = x, y = y, fill = z),
stat = "identity", alpha = 0.8) +
scale_fill_gradientn (colours = c("blue","red")) +
guides(alpha = FALSE, size = FALSE)

plotting density cauchy distribution in R

Just curious how can you generate the dcauchy distribution from Wikipedia:
Normally, you have
dcauchy(x, location = 0, scale = 1, log = FALSE)
for one line density p(x) v.s x
I assume in order to generate the diagram from wiki, a data.frame involves?
cauchy_dist <- data.frame(cauchy1 = rcauchy(10, location = 0, scale = 1, log = FALSE), cauchy2 = ....... , cauchy3 = ..... )
or you just need to
plot(x, P(x))
and then add lines to it?
You can use ggplot2's stat_function:
ggplot(data.frame(x = c(-5, 5)), aes(x)) +
stat_function(fun = dcauchy, n = 1e3, args = list(location = 0, scale = 0.5), aes(color = "a"), size = 2) +
stat_function(fun = dcauchy, n = 1e3, args = list(location = 0, scale = 1), aes(color = "b"), size = 2) +
stat_function(fun = dcauchy, n = 1e3, args = list(location = 0, scale = 2), aes(color = "c"), size = 2) +
stat_function(fun = dcauchy, n = 1e3, args = list(location = -2, scale = 1), aes(color = "d"), size = 2) +
scale_x_continuous(expand = c(0, 0)) +
scale_color_discrete(name = "",
labels = c("a" = expression(x[0] == 0*","~ gamma == 0.5),
"b" = expression(x[0] == 0*","~ gamma == 1),
"c" = expression(x[0] == 0*","~ gamma == 2),
"d" = expression(x[0] == -2*","~ gamma == 1))) +
ylab("P(x)") +
theme_bw(base_size = 24) +
theme(legend.position = c(0.8, 0.8),
legend.text.align = 0)
You could create the data as follows:
location <- c(0, 0, 0, -2)
scale <- c(0.5, 1, 2, 1)
x <- seq(-5, 5, by = 0.1)
cauchy_data <- Map(function(l, s) dcauchy(x, l, s), location, scale)
names(cauchy_data) <- paste0("cauchy", seq_along(location))
cauchy_tab <- data.frame(x = x, cauchy_data)
head(cauchy_tab)
## x cauchy1 cauchy2 cauchy3 cauchy4
## 1 -5.0 0.006303166 0.01224269 0.02195241 0.03183099
## 2 -4.9 0.006560385 0.01272730 0.02272830 0.03382677
## 3 -4.8 0.006833617 0.01324084 0.02354363 0.03600791
## 4 -4.7 0.007124214 0.01378562 0.02440091 0.03839685
## 5 -4.6 0.007433673 0.01436416 0.02530285 0.04101932
## 6 -4.5 0.007763656 0.01497929 0.02625236 0.04390481
Map is used to apply a function of multiple variables to just as many vectors element by element. Thus, the first list element of cauchy_data will contain the following
dcauchy(x, location[1], scale[1])
and so on. I then put the Cauchy data in a data frame together with the vector of x coordinates, x. So you have the desired data table.
There are, of course, many ways to plot this. I prefer to use ggplot and show you how to plot as an example:
library(tidyr)
library(ggplot2)
curve_labs <- paste(paste("x0 = ", location), paste("gamma = ", scale), sep = ", ")
plot_data <- gather(cauchy_tab, key = curve, value = "P", -x )
ggplot(plot_data, aes(x = x, y = P, colour = curve)) + geom_line() +
scale_colour_discrete(labels = curve_labs)
You could tweak the plot in many ways to get something that more closely resembles the plot from Wikipedia.

How to adjust the point size to the scale of the plot in ggplot2?

Let's generate some data:
x <- -10*cos(seq(0, pi, length.out = 100))+1
y <- 10*seq(0, pi, length.out = 100)
xerr <- rep(2, 100)
yerr <- rep(2, 100)
dd <- as.data.frame(cbind(x, y, xerr, yerr))
Here I have x and y coordinates of some points with their errors, xerr and yerr (for convenience I have set them constant). I would like to represent these errors with the size of the points. This is easily doable:
ggplot() +
geom_point(data = dd, aes(x, y, size = sqrt(xerr^2 + yerr^2)), colour = "gray") +
geom_path(data = dd, aes(x, y), colour = "red", size = .5) +
scale_size_identity() +
theme_bw()
However, the size of these points is defined on a scale that doesn't have any relation with the scale of the plot. Is there a way to adjust the dimension of the points in relation to the scale of the plot? In the above example, the radius of each point should have size equal to 2.828 and not less than one as it is now.
One way is to explicitly draw ellipses with the axes defined by the size of the errors.
x <- -10*cos(seq(0, pi, length.out = 10))+1
y <- 10*seq(0, pi, length.out = 10)
xerr <- runif(10, 1, 5)
yerr <- runif(10, 1, 5)
dd <- as.data.frame(cbind(x, y, xerr, yerr))
dd$frame <- factor(seq(1:10))
For this purpose we define our function to generate ellipses:
ellipseFun <- function(center = c(0, 0), axes = c(1, 1), npoints = 101){
tt <- seq(0,2*pi, length.out = npoints)
xx <- center[1] + axes[1] * cos(tt)
yy <- center[2] + axes[2] * sin(tt)
return(data.frame(x = xx, y = yy))
}
We then generate the matrices for all ellipses:
ddEll <- data.frame()
for(k in levels(dd$frame)){
ddEll <- rbind(ddEll, cbind(as.data.frame(with(dd[dd$frame == k,], ellipseFun(center = c(x, y), axes = c(xerr, yerr), npoints = 101))),frame = k))
}
And, finally, we can plot them:
library(ggplot2)
ggplot() +
geom_point(data = dd, aes(x, y)) +
geom_polygon(data=ddEll, aes(x = x, y = y, group = frame), colour = "gray", fill = "red", alpha = .2) +
scale_size_identity() +
theme_bw() +
xlim(c(-20, 20)) +
ylim(c(-5, 35)) +
coord_fixed()

Resources