plotting density cauchy distribution in R - r

Just curious how can you generate the dcauchy distribution from Wikipedia:
Normally, you have
dcauchy(x, location = 0, scale = 1, log = FALSE)
for one line density p(x) v.s x
I assume in order to generate the diagram from wiki, a data.frame involves?
cauchy_dist <- data.frame(cauchy1 = rcauchy(10, location = 0, scale = 1, log = FALSE), cauchy2 = ....... , cauchy3 = ..... )
or you just need to
plot(x, P(x))
and then add lines to it?

You can use ggplot2's stat_function:
ggplot(data.frame(x = c(-5, 5)), aes(x)) +
stat_function(fun = dcauchy, n = 1e3, args = list(location = 0, scale = 0.5), aes(color = "a"), size = 2) +
stat_function(fun = dcauchy, n = 1e3, args = list(location = 0, scale = 1), aes(color = "b"), size = 2) +
stat_function(fun = dcauchy, n = 1e3, args = list(location = 0, scale = 2), aes(color = "c"), size = 2) +
stat_function(fun = dcauchy, n = 1e3, args = list(location = -2, scale = 1), aes(color = "d"), size = 2) +
scale_x_continuous(expand = c(0, 0)) +
scale_color_discrete(name = "",
labels = c("a" = expression(x[0] == 0*","~ gamma == 0.5),
"b" = expression(x[0] == 0*","~ gamma == 1),
"c" = expression(x[0] == 0*","~ gamma == 2),
"d" = expression(x[0] == -2*","~ gamma == 1))) +
ylab("P(x)") +
theme_bw(base_size = 24) +
theme(legend.position = c(0.8, 0.8),
legend.text.align = 0)

You could create the data as follows:
location <- c(0, 0, 0, -2)
scale <- c(0.5, 1, 2, 1)
x <- seq(-5, 5, by = 0.1)
cauchy_data <- Map(function(l, s) dcauchy(x, l, s), location, scale)
names(cauchy_data) <- paste0("cauchy", seq_along(location))
cauchy_tab <- data.frame(x = x, cauchy_data)
head(cauchy_tab)
## x cauchy1 cauchy2 cauchy3 cauchy4
## 1 -5.0 0.006303166 0.01224269 0.02195241 0.03183099
## 2 -4.9 0.006560385 0.01272730 0.02272830 0.03382677
## 3 -4.8 0.006833617 0.01324084 0.02354363 0.03600791
## 4 -4.7 0.007124214 0.01378562 0.02440091 0.03839685
## 5 -4.6 0.007433673 0.01436416 0.02530285 0.04101932
## 6 -4.5 0.007763656 0.01497929 0.02625236 0.04390481
Map is used to apply a function of multiple variables to just as many vectors element by element. Thus, the first list element of cauchy_data will contain the following
dcauchy(x, location[1], scale[1])
and so on. I then put the Cauchy data in a data frame together with the vector of x coordinates, x. So you have the desired data table.
There are, of course, many ways to plot this. I prefer to use ggplot and show you how to plot as an example:
library(tidyr)
library(ggplot2)
curve_labs <- paste(paste("x0 = ", location), paste("gamma = ", scale), sep = ", ")
plot_data <- gather(cauchy_tab, key = curve, value = "P", -x )
ggplot(plot_data, aes(x = x, y = P, colour = curve)) + geom_line() +
scale_colour_discrete(labels = curve_labs)
You could tweak the plot in many ways to get something that more closely resembles the plot from Wikipedia.

Related

R, ggplot2: How to plot bezier curves that pass through fixed coordinates?

I am helping someone translate hand-drawn economics supply and demand functions into image files that can be included in a Word document. These have been going well using Hmisc::bezier and geom_path modeled after Andrew Heiss's recon plots and using his curve_intersect function. That is, until the author asked that one of the supply curves should pass through a specified set of coordinates. The Hmisc::bezier function only uses the first and last control point as absolute, and bends toward intermediate points so the specified intersection point does not match the curve. I tried creating a spline of 2 bezier curves with the bezier function from the bezier package (v1.1.2, https://cran.r-project.org/web/packages/bezier/bezier.pdf), but this fails with "Error in FUN(X[[i]], ...) : object 'x' not found", which I do not understand or know how to fix.
Please let me know where I am going wrong or if there is a better method! I will include the commented out attempts using various functions. Please excuse the amateurish code, as I am a relative newb at R and ggplot2.
This section not directly relevant to my question
# Graph figures for physical economics, negative oil prices paper
library(reconPlots)
library(dplyr)
library(ggplot2)
library(patchwork)
library(ggrepel)
library(bezier)
library(ggforce)
options(ggrepel.max.time = 1)
options(ggrepel.max.iter = 20000)
#Set seed value for ggrepel
set.seed(52)
# panel (a)
#Set values of curves using the bezier function, each pair of c() values
# is an xy coordinate, and the sets of coordinates control the shape of the
# curve
supply <- Hmisc::bezier(c(1, 5, 6), c(3, 4, 9)) %>%
as_data_frame()
demand <- Hmisc::bezier(c(0, 9, 9), c(6, 6, 6)) %>%
as_data_frame()
label_height <- Hmisc::bezier(c(0, 9, 9), c(8, 8, 8)) %>%
as_data_frame()
# Calculate the intersections of the two curves
intersections <- bind_rows(curve_intersect(supply, demand))
# Calculate point where the curve label(s) intersect a specified height
supply_label <- bind_rows(curve_intersect(supply, label_height))
labels <- data_frame(label = expression("PS"[CR]^DRL),
x = supply_label$x,
y = supply_label$y)
production <- ggplot(mapping = aes(x = x, y = y)) +
#Draw the supply curve. Demand is not drawn in this figure, but the
# intersections of an imaginary demand curve are used to illustrate P0
# and Q0, the intersection point, and the dotted lines
geom_path(data = supply, color = "#0073D9", size = 1) +
geom_segment(data = intersections,
aes(x = x, y = 0, xend = x, yend = y), lty = "dotted") +
geom_segment(data = intersections,
aes(x = 0, y = y, xend = x, yend = y), lty = "dotted") +
#Draw the supply curve label using the intersection calculated above, using
# GGrepel so that the labels do not overlap the curve line
geom_text_repel(data = labels
,aes(x = x, y = y, label = label)
,parse = TRUE
,direction = "x"
,force = 3
,force_pull = 0.1
,hjust = 0
,min.segment.length = 0
) +
#Draw the intersection point based on intersection function between supply
# and the phantom flat demand curve at height y=6
geom_point(data = intersections, size = 3) +
#Use scale functions to set y-axis label, axis intersection point labels,
# and limits of the viewing area
scale_x_continuous(expand = c(0, 0), breaks = intersections$x
,labels = expression(Q[CR]^{DRL-PS})
,limits=c(0,9)
) +
scale_y_continuous(expand = c(0, 0), breaks = c(intersections$y, 9)
,labels = c(expression(P[CR]==frac("$",brl))
,expression(P[CR]))
,limits=c(0,9)
) +
#Use labs function to set x-axis title and title of each graph using the
# caption function so that it displays on the bottom
labs(x = expression(frac(Barrels,Week)),
caption = expression(atop("(a) Driller Production Supply", "of Crude Oil"))
) +
#Set classic theme, x-axis title on right-hand side using larger font of
# relative size 1.2, graph title on left-hand side using same larger font
theme_classic() +
theme(axis.title.y = element_blank(),
axis.title.x = element_text(hjust = 1),
axis.text = element_text(size=rel(1.2)),
plot.caption = element_text(hjust = 0.5, size=rel(1.2))
) +
coord_equal()
# Save the intersections so we can set the same quantity, price for panel (c)
specified_intersections = intersections
# Panel (b)
supply <- Hmisc::bezier(c(3.99, 4), c(0, 9)) %>%
as_data_frame()
demand <- Hmisc::bezier(c(2, 3, 4, 5), c(9, 6.5, 6, 5.5)) %>%
as_data_frame()
demand_capacity <- Hmisc::bezier(c(5, 5), c(0, 5.5)) %>%
as_data_frame()
supply_capacity <- Hmisc::bezier(c(4.999, 5), c(0, 9)) %>%
as_data_frame()
supply_label_height <- Hmisc::bezier(c(0, 9), c(9, 9)) %>%
as_data_frame()
demand_label_height <- Hmisc::bezier(c(0, 9), c(8, 8)) %>%
as_data_frame()
capacity_label_height <- Hmisc::bezier(c(0, 9), c(9, 9)) %>%
as_data_frame()
# Calculate the intersections of the two curves
intersections <- bind_rows(curve_intersect(supply,
demand))
supply_label <- bind_rows(curve_intersect(supply
,supply_label_height))
demand_label <- bind_rows(curve_intersect(demand
,demand_label_height))
capacity_label <- bind_rows(curve_intersect(supply_capacity
,capacity_label_height))
labels <- data_frame(label = c(expression("OD"[CR]^DRL),expression("OS"[CR]^DRL)
,expression("Q"[CR]^CAP)
),
x = c(demand_label$x, supply_label$x
, capacity_label$x
),
y = c(demand_label$y, supply_label$y
, capacity_label$y
)
)
inventory <- ggplot(mapping = aes(x = x, y = y)) +
geom_path(data = supply, color = "#0073D9", size = 1) +
geom_path(data = demand, color = "#FF4036", size = 1) +
geom_path(data = demand_capacity, color = "#FF4036", size = 1) +
geom_path(data = supply_capacity, color = "#0073D9", size = 1, lty = "dashed") +
geom_segment(data = intersections,
aes(x = 0, y = y, xend = x, yend = y), lty = "dotted") +
geom_text_repel(data = labels
,aes(x = x, y = y, label = label)
,parse = TRUE
,direction = "x"
,force = 3
,force_pull = 0.1
,hjust = c(0, 0, 1)
,min.segment.length = 0
) +
geom_point(data = intersections, size = 3) +
scale_x_continuous(expand = c(0, 0), breaks = c(intersections$x
, 5),
labels = c(expression(paste(Q[CR]^{DRL-OS},phantom(12345)))
,expression(Q[CR]^CAP)
)
, limits=c(0,9)) +
scale_y_continuous(expand = c(0, 0), breaks = c(intersections$y, 9),
labels = c(expression(P[CR]),expression(P[CR]))
, limits=c(0,9)) +
labs(x = "Barrels",
caption = expression(atop("(b) Driller Storage / Ownership", "of Crude Oil"))
) +
theme_classic() +
theme(axis.title.y = element_blank(),
axis.title.x = element_text(hjust = 1),
axis.text = element_text(size=rel(1.2)),
plot.caption = element_text(hjust = 0.5, size=rel(1.2))
) +
coord_equal()
Relevant section
# panel (c)
# ggforce package method
#supply <- list(c(1, 4, specified_intersections$x, 5, 7),
# c(3, 4, specified_intersections$y, 7, 9)) %>%
# as_data_frame()
# bezier package method: Fails with "Error in FUN(X[[i]], ...) : object 'x' not found"
t <- seq(0, 2, length=10)
p <- list(c(1, 4, specified_intersections$x, 7, 8),
c(3, 4, specified_intersections$y, 6, 9))
#p <- matrix(c(1,3, 4,4, specified_intersections$x,specified_intersections$y,
# 7,6, 8,9), nrow=5, ncol=2, byrow=TRUE)
supply <- bezier(t=t, p=p) %>%
as_data_frame()
# Original: Fails because it does not pass through the specified intersection
#supply <- Hmisc::bezier(c(1, specified_intersections$x, 8),
# c(3, specified_intersections$y, 9)) %>%
# as_data_frame()
# Hmisc method: Fails because there is no way to get the two curves to appear
# contiguous
#supply1 <- Hmisc::bezier(c(1, 4, specified_intersections$x),
# c(3, 4, specified_intersections$y)) %>%
# as_data_frame()
#supply2 <- Hmisc::bezier(c(specified_intersections$x, 6, 7),
# c(specified_intersections$y, 8, 9)) %>%
# as_data_frame()
#demand <- Hmisc::bezier(c(0, 9), c(specified_intersections$y, specified_intersections$y)) %>%
# as_data_frame()
label_height <- Hmisc::bezier(c(0, 9), c(8, 8)) %>%
as_data_frame()
# Calculate the intersections of the two curves
#intersections <- bind_rows(curve_intersect(supply, demand))
#supply_label <- bind_rows(curve_intersect(supply,
# label_height))
#labels <- data_frame(label = expression("SS"[CR]^DRL),
# x = supply_label$x,
# y = supply_label$y)
sales <- ggplot(mapping = aes(x = x, y = y)) +
# ggforce package method
# geom_bspline(data = supply, color = "#0073D9", size = 1) +
# Original geom_path method
geom_path(data = supply, color = "#0073D9", size = 1) +
# Supply 1 and 2 for Hmisc method
# geom_path(data = supply1, color = "#0073D9", size = 1) +
# geom_path(data = supply2, color = "#0073D9", size = 1) +
geom_segment(data = specified_intersections,
aes(x = x, y = 0, xend = x, yend = y), lty = "dotted") +
geom_segment(data = specified_intersections,
aes(x = 0, y = y, xend = x, yend = y), lty = "dotted") +
# geom_text_repel(data = labels
# ,aes(x = x, y = y, label = label)
# ,parse = TRUE
# ,direction = "x"
# ,force = 3
# ,force_pull = 0.1
# ,hjust = 0
# ,min.segment.length = 0
# ) +
geom_point(data = specified_intersections, size = 3) +
scale_x_continuous(expand = c(0, 0), breaks = specified_intersections$x,
labels = expression(Q[CR]^{DRL-SS}), limits=c(0,9)) +
scale_y_continuous(expand = c(0, 0), breaks = c(specified_intersections$y, 9),
labels = c(expression(P[CR]),expression(P[CR]))) +
labs(x = expression(frac(Barrels,Week)),
caption = expression(atop("(c) Driller Sales Supply", "of Crude Oil"))
) +
theme_classic() +
theme(axis.title.y = element_blank(),
axis.title.x = element_text(hjust = 1),
axis.text = element_text(size=rel(1.2)),
plot.caption = element_text(hjust = 0.5, size=rel(1.2))
) +
coord_equal()
patchwork <- (production | inventory | sales)
patchwork
Graphs before implementation of fixed coordinates. Need to move panel (c) intersection point to match panel (a)
I solved the "Error in FUN(X[[i]], ...) : object 'x' not found" by printing the supply variable and noticing that the bezier function names its rows V1,V2 and not x,y. I needed to set the aesthetics of the geom_path to the correct mapping.
Relevant Section, trimmed to only the bezier method
# panel (c)
# bezier package method
t <- seq(0, 2, length = 100)
p <- matrix(c(1,3, 4,4, specified_intersections$x,specified_intersections$y,
7,6, 8,9), nrow=5, ncol=2, byrow=TRUE)
supply <- bezier::bezier(t=t, p=p, deg=2) %>%
as_data_frame()
sales <- ggplot(mapping = aes(x = x, y = y)) +
# Original geom_path method
geom_path(data = supply, mapping = aes(x = V1, y = V2),
color = "#0073D9", size = 1, inherit.aes = FALSE) +
geom_segment(data = specified_intersections,
aes(x = x, y = 0, xend = x, yend = y), lty = "dotted") +
geom_segment(data = specified_intersections,
aes(x = 0, y = y, xend = x, yend = y), lty = "dotted") +
geom_point(data = specified_intersections, size = 3) +
scale_x_continuous(expand = c(0, 0), breaks = specified_intersections$x,
labels = expression(Q[CR]^{DRL-SS}), limits=c(0,9)) +
scale_y_continuous(expand = c(0, 0), breaks = c(specified_intersections$y, 9),
labels = c(expression(P[CR]),expression(P[CR]))) +
labs(x = expression(frac(Barrels,Week)),
caption = expression(atop("(c) Driller Sales Supply", "of Crude Oil"))
) +
theme_classic() +
theme(axis.title.y = element_blank(),
axis.title.x = element_text(hjust = 1),
axis.text = element_text(size=rel(1.2)),
plot.caption = element_text(hjust = 0.5, size=rel(1.2))
) +
coord_equal()
patchwork <- (production | inventory | sales)
patchwork
This does not solve my larger problem of needing a smooth curve that passes through a specified set of coordinates, as it produces two bezier curves that do not match.
I will do some research on using functions to specify bezier curves and find out if there is some mathematical or programmatic way to specify a bezier curve that passes through a set of fixed coordinates. If I find one, I'll edit this answer.
If anyone knows how to accomplish this, I would appreciate any help!
Kinked bezier curves

Convert plot to ggplot and add horizontal lines until specific points

I have creted the plot below using the base R plot() function but I would like to convert it to ggplot() and also add horizontal lines like in the example picture but until the crossing with the graphs and not a full continuing horizontal line until the end.
# Figure 3.1 & 3.2
# curve(logistic(pnorm(x), a=1, d=0),-3,3,ylab="Probability of x",
# main="Logistic transform of x",xlab="z score units")
# #logistic with a=1.702 is almost the same as pnorm
# curve(logistic(pnorm(x), d=1),add=TRUE)
# Set x-axis values
theta <- seq(from = -10, to = 10, by = 0.001)
# Code for plot1
B_i <- 1
B_j <- -1
P_item1_rasch <- NULL
P_item2_rasch <- NULL
for (i in 1:length(theta)){
P_item1_rasch[i] <- (exp((theta[i]-B_i)))/(1+(exp((theta[i]-B_i))))
P_item2_rasch[i] <- (exp((theta[i]-B_j)))/(1+(exp((theta[i]-B_j))))
}
#select the colors that will be used
library(RColorBrewer)
#all palette available from RColorBrewer
display.brewer.all()
#we will select the first 4 colors in the Set1 palette
cols<-brewer.pal(n=4,name="Set1")
#cols contain the names of four different colors
plot(theta, P_item1_rasch, xlim=c(-4,4), ylim=c(0,1))
lines(theta, P_item2_rasch,col=cols[2])
# Add lines at the values below, but only half as in the example Figures
# abline(h=0.5)
# abline(v=-1)
# abline(v=1)
Perhaps something like this?
theta <- seq(from = -10, to = 10, by = 0.001)
# Code for plot1
B_i <- 1
B_j <- -1
P_item0_rasch <- NULL
P_item1_rasch <- NULL
P_item2_rasch <- NULL
for (i in 1:length(theta)){
P_item0_rasch[i] <- (exp((theta[i])))/(1+(exp((theta[i]))))
P_item1_rasch[i] <- (exp((theta[i]-B_i)))/(1+(exp((theta[i]-B_i))))
P_item2_rasch[i] <- (exp((theta[i]-B_j)))/(1+(exp((theta[i]-B_j))))
}
df <- data.frame(theta = rep(theta, 3),
P_item_rasch = c(P_item0_rasch, P_item1_rasch, P_item2_rasch),
number = factor(rep(1:3, each = length(theta))))
library(ggplot2)
ggplot(df, aes(theta, P_item_rasch, color = number)) +
geom_line() +
lims(x = c(-6, 6)) +
geom_segment(x = -1, xend = 1, y = 0.5, yend = 0.5, lty = 2) +
geom_vline(xintercept = c(-1, 0, 1), lty = 2) +
scale_color_manual(values = RColorBrewer::brewer.pal(4, "Set1")[-1]) +
theme_classic() +
theme(legend.position = "none")
#> Warning: Removed 24000 row(s) containing missing values (geom_path).
Edit
The OP changed the question to alter the requirements. Here is a way to achieve them:
ggplot(df, aes(theta, P_item_rasch)) +
geom_line(aes(color = number)) +
lims(x = c(-6, 6)) +
# Line between curves
geom_segment(x = -1, xend = 1, y = 0.5, yend = 0.5, lty = 2) +
# Optional line on left
geom_segment(x = -Inf, xend = -1, y = 0.5, yend = 0.5, lty = 2) +
# Lower lines
geom_segment(data = data.frame(theta = c(-1, 0, 1), P_item_rasch = rep(-Inf, 3)),
aes(xend = theta, yend = 0.5), lty = 2) +
# Upper lines
#geom_segment(data = data.frame(theta = c(-1, 0, 1), P_item_rasch = rep(Inf, 3)),
# aes(xend = theta, yend = 0.5), lty = 2) +
scale_color_manual(values = RColorBrewer::brewer.pal(4, "Set1")[-1]) +
theme_classic() +
theme(legend.position = "none")
Created on 2020-12-06 by the reprex package (v0.3.0)

How can I draw diamonds in R with ggplot2?

I am trying to replicate the following picture in R, in particular with ggplot2
I was able to draw the red rss contour lines but I've no idea how to draw a diamond (like the one in the left picture). The "expected Output" should be a way to draw a diamond with a given side length.
EDIT: Here is a short reproducible example to add the diamond randomly inside the following plot:
mlb<- read.table('https://umich.instructure.com/files/330381/download?download_frd=1', as.is=T, header=T)
str(mlb)
fit<-lm(Height~Weight+Age-1, data = as.data.frame(scale(mlb[,4:6])))
points = data.frame(x=c(0,fit$coefficients[1]),y=c(0,fit$coefficients[2]),z=c("(0,0)","OLS Coef"))
Y=scale(mlb$Height)
X = scale(mlb[,c(5,6)])
beta1=seq(-0.556, 1.556, length.out = 100)
beta2=seq(-0.661, 0.3386, length.out = 100)
df <- expand.grid(beta1 = beta1, beta2 = beta2)
b = as.matrix(df)
df$sse <- rep(t(Y)%*%Y,100*100) - 2*b%*%t(X)%*%Y + diag(b%*%t(X)%*%X%*%t(b))
base <- ggplot() +
stat_contour(data=df, aes(beta1, beta2, z = sse),breaks = round(quantile(df$sse, seq(0, 0.2, 0.03)), 0),
size = 0.5,color="darkorchid2",alpha=0.8) +
scale_x_continuous(limits = c(-0.4,1))+
scale_y_continuous(limits = c(-0.55,0.4))+
geom_point(data = points,aes(x,y))+
geom_text(data = points,aes(x,y,label=z),vjust = 2,size=3.5)
base
You can draw shapes with geom_polygon.
library(ggplot2)
df <- data.frame(x = c(1, 0, -1, 0), y = c(0, 1, 0, -1))
ggplot(df) + geom_polygon(aes(x = x, y = y))
If you want to generate the coordinates from a center and a side length, you can transform a base matrix. You can also combine this with an existing plot by supplying the coordinates to the data argument of the geom instead of to ggplot() as shown. Change the sqrt2 scaling if you want the corner-to-center as the argument instead of the side length.
diamond <- function(side_length, center) {
base <- matrix(c(1, 0, 0, 1, -1, 0, 0, -1), nrow = 2) * sqrt(2) / 2
trans <- (base * side_length) + center
as.data.frame(t(trans))
}
ggplot() + geom_polygon(data = diamond(2, c(1, 2)), mapping = aes(x = V1, y = V2))
Here's an example of adding it in to your provided data. Note that I put it before (underneath) the text, and named the arguments to be clear (probably the source of that object coercible by fortify error.
mlb <- read.table("https://umich.instructure.com/files/330381/download?download_frd=1", as.is = T, header = T)
fit <- lm(Height ~ Weight + Age - 1, data = as.data.frame(scale(mlb[, 4:6])))
points <- data.frame(x = c(0, fit$coefficients[1]), y = c(0, fit$coefficients[2]), z = c("(0,0)", "OLS Coef"))
Y <- scale(mlb$Height)
X <- scale(mlb[, c(5, 6)])
beta1 <- seq(-0.556, 1.556, length.out = 100)
beta2 <- seq(-0.661, 0.3386, length.out = 100)
df <- expand.grid(beta1 = beta1, beta2 = beta2)
b <- as.matrix(df)
df$sse <- rep(t(Y) %*% Y, 100 * 100) - 2 * b %*% t(X) %*% Y + diag(b %*% t(X) %*% X %*% t(b))
ggplot(df) +
stat_contour(aes(beta1, beta2, z = sse),
breaks = round(quantile(df$sse, seq(0, 0.2, 0.03)), 0),
size = 0.5, color = "darkorchid2", alpha = 0.8
) +
geom_polygon(data = diamond(0.1, c(0, 0)), mapping = aes(x = V1, y = V2), fill = "cadetblue1") +
scale_x_continuous(limits = c(-0.4, 1)) +
scale_y_continuous(limits = c(-0.55, 0.4)) +
geom_point(data = points, aes(x, y)) +
geom_text(data = points, aes(x, y, label = z), vjust = 2, size = 3.5)
#> Warning: Removed 4215 rows containing non-finite values (stat_contour).
Created on 2018-08-01 by the reprex package (v0.2.0).

custom varwidth in ggplot2

df = data.frame(a = c(0, 0), b = c(17, 15),
c = c(35,37), d = c(55,57),
e = c(80, 85), x = c(1, 2),
w1 = c(20, 30), w2 = c(0.2, 0.3))
ggplot(df) +
geom_boxplot(aes(x = x, ymin = a, lower = b, middle = c, upper = d, ymax = e),
stat = "identity")
I have a dataframe containing the values of each quantile for a boxplot, (a-e).
Is it possible use columns w1 or w2 to define the width of the boxplots in ggplot?
My desired result is similar to using varwidth in graphics::boxplot but with custom widths.
graphics::boxplot(mpg~gear, mtcars, varwidth = T)
Don't think this is a duplicate since it seems like the weight argument doesn't work with stat = identity.
Looks like it can be done by using stat_summary.
df = data.frame(a = c(0, 0), b = c(17, 15),
c = c(35,37), d = c(55,57),
e = c(80, 85), x = factor(c(1, 2)),
w = c(0.2, 0.3))
df2 = reshape2::melt(data = df, id = "x")
ff = function(x){
data.frame(
ymin = x[1],
lower = x[2],
middle = x[3],
upper = x[4],
ymax = x[5],
width = x[6]
)
}
ggplot(df2, aes(x, value)) + stat_summary(fun.data = ff, geom = "boxplot")
But i am not sure if this is the best way to do it.

Producing a "fuzzy" RD plot with ggplot2

My question is similar to this but the answers there will not work for me. Basically, I'm trying to produce a regression discontinuity plot with a "fuzzy" design that uses all the data for the treatment and control groups, but only plots the regression line within the "range" of the treatment and control groups.
Below, I've simulated some data and produced the fuzzy RD plot with base graphics. I'm hoping to replicate this plot with ggplot2. Note that the most important part of this is that the light blue regression line is fit using all the blue points, while the peach colored regression line is fit using all the red points, despite only being plotted over the ranges in which individuals were intended to receive treatment. That's the part I'm having a hard time replicating in ggplot.
I'd like to move to ggplot because I'd like to use faceting to produce this same plot across various units in which participants were nested. In the code below, I show a non-example using geom_smooth. When there's no fuzziness within a group, it works fine, but otherwise it fails. If I could get geom_smooth to be limited to only specific ranges, I think I'd be set. Any and all help is appreciated.
Simulate data
library(MASS)
mu <- c(0, 0)
sigma <- matrix(c(1, 0.7, 0.7, 1), ncol = 2)
set.seed(100)
d <- as.data.frame(mvrnorm(1e3, mu, sigma))
# Create treatment variable
d$treat <- ifelse(d$V1 <= 0, 1, 0)
# Introduce fuzziness
d$treat[d$treat == 1][sample(100)] <- 0
d$treat[d$treat == 0][sample(100)] <- 1
# Treatment effect
d$V2[d$treat == 1] <- d$V2[d$treat == 1] + 0.5
# Add grouping factor
d$group <- gl(9, 1e3/9)
Produce regression discontinuity plot with base
library(RColorBrewer)
pal <- brewer.pal(5, "RdBu")
color <- d$treat
color[color == 0] <- pal[1]
color[color == 1] <- pal[5]
plot(V2 ~ V1,
data = d,
col = color,
bty = "n")
abline(v = 0, col = "gray", lwd = 3, lty = 2)
# Fit model
m <- lm(V2 ~ V1 + treat, data = d)
# predicted achievement for treatment group
pred_treat <- predict(m,
newdata = data.frame(V1 = seq(-3, 0, 0.1),
treat = 1))
# predicted achievement for control group
pred_no_treat <- predict(m,
newdata = data.frame(V1 = seq(0, 4, 0.1),
treat = 0))
# Add predicted achievement lines
lines(seq(-3, 0, 0.1), pred_treat, col = pal[4], lwd = 3)
lines(seq(0, 4, 0.1), pred_no_treat, col = pal[2], lwd = 3)
# Add legend
legend("bottomright",
legend = c("Treatment", "Control"),
lty = 1,
lwd = 2,
col = c(pal[4], pal[2]),
box.lwd = 0)
non-example with ggplot
d$treat <- factor(d$treat, labels = c("Control", "Treatment"))
library(ggplot2)
ggplot(d, aes(V1, V2, group = treat)) +
geom_point(aes(color = treat)) +
geom_smooth(method = "lm", aes(color = treat)) +
facet_wrap(~group)
Notice the regression lines extending past the treatment range for groups 1 and 2.
There's probably a more graceful way to make the lines with geom_smooth, but it can be hacked together with geom_segment. Munge the data.frames outside of the plotting call if you like.
ggplot(d, aes(x = V1, y = V2, color = factor(treat, labels = c('Control', 'Treatment')))) +
geom_point(shape = 21) +
scale_color_brewer(NULL, type = 'qual', palette = 6) +
geom_vline(aes(xintercept = 0), color = 'grey', size = 1, linetype = 'dashed') +
geom_segment(data = data.frame(t(predict(m, data.frame(V1 = c(-3, 0), treat = 1)))),
aes(x = -3, xend = 0, y = X1, yend = X2), color = pal[4], size = 1) +
geom_segment(data = data.frame(t(predict(m, data.frame(V1 = c(0, 4), treat = 0)))),
aes(x = 0, xend = 4, y = X1, yend = X2), color = pal[2], size = 1)
Another option is geom_path:
df <- data.frame(V1 = c(-3, 0, 0, 4), treat = c(1, 1, 0, 0))
df <- cbind(df, V2 = predict(m, df))
ggplot(d, aes(x = V1, y = V2, color = factor(treat, labels = c('Control', 'Treatment')))) +
geom_point(shape = 21) +
geom_vline(aes(xintercept = 0), color = 'grey', size = 1, linetype = 'dashed') +
scale_color_brewer(NULL, type = 'qual', palette = 6) +
geom_path(data = df, size = 1)
For the edit with facets, if I understand what you want correctly, you can calculate a model for each group with lapply and predict for each group. Here I'm recombine with dplyr::bind_rows instead of do.call(rbind, ...) for the .id parameter to insert the group number from the list element name, though there are other ways to do the same thing.
df <- data.frame(V1 = c(-3, 0, 0, 4), treat = c('Treatment', 'Treatment', 'Control', 'Control'))
m_list <- lapply(split(d, d$group), function(x){lm(V2 ~ V1 + treat, data = x)})
df <- dplyr::bind_rows(lapply(m_list, function(x){cbind(df, V2 = predict(x, df))}), .id = 'group')
ggplot(d, aes(x = V1, y = V2, color = treat)) +
geom_point(shape = 21) +
geom_vline(aes(xintercept = 0), color = 'grey', size = 1, linetype = 'dashed') +
geom_path(data = df, size = 1) +
scale_color_brewer(NULL, type = 'qual', palette = 6) +
facet_wrap(~group)

Resources