Related
I am trying to draw a forest plot with different groups. The code I'm using looks like the following:
d = data.frame(Estimate = c(1.8,1.9,2.1,2.4,2.7,2.5),
Group = rep(c('Group A', 'Group B'), each = 3),
Method = rep(c('Method 1', 'Method 2', 'Method 3'), 2))
d$Lower = d$Estimate - 0.3
d$Upper = d$Estimate + 0.3
ggplot(data = d, aes(y = Group, x = Estimate, xmin = Lower, xmax = Upper, color = Method)) +
geom_point(size = 2, position=position_dodge(width = 0.5)) +
geom_linerange(position=position_dodge(width = 0.5)) +
geom_vline(xintercept = c(2, 2.5), linetype = "dashed")
And the resulting plot:
The vertical lines (2, 2.5) are the true group means. I want to limit these vertical lines to be within each group (i.e., the first one from bottom to the middle, the second one middle to top). Anyone know how to do this?
I've tried geom_segment() function but I think it requires a numerical y input, while it's a factor here.
Factors plotted on an axis are "really" numeric, but with labels added, so you can go ahead and add numeric segments:
ggplot(data = d, aes(y = Group, x = Estimate, xmin = Lower, xmax = Upper,
color = Method)) +
geom_point(size = 2, position=position_dodge(width = 0.5)) +
geom_linerange(position=position_dodge(width = 0.5)) +
geom_segment(data = data.frame(y = c(0.67, 1.67), x = c(2, 2.5),
xend = c(2, 2.5), yend = c(1.33, 2.33)),
aes(x, y, xend = xend, yend = yend),
inherit.aes = FALSE, linetype = 2)
Or, with a few tweaks:
ggplot(data = d, aes(y = Group, x = Estimate, xmin = Lower, xmax = Upper,
color = Method)) +
geom_linerange(position=position_dodge(width = 0.5), size = 1) +
geom_point(size = 3, position=position_dodge(width = 0.5), shape = 21,
fill = "white") +
geom_segment(data = data.frame(y = c(0.67, 1.67), x = c(2, 2.5),
xend = c(2, 2.5), yend = c(1.33, 2.33)),
aes(x, y, xend = xend, yend = yend),
inherit.aes = FALSE, linetype = 2) +
annotate("text", c(2, 2.5), c(1.5, 2.5), size = 6,
label = c("Group mean = 2", "Group mean = 2.5")) +
theme_minimal(base_size = 20) +
scale_color_brewer(palette = "Set1")
I want to create a graph where I can change the line size for each line c(1,2,3) and the alpha values for each line c(0.5,0.6,0.7). I tried to use scale_size_manual but it didn't make any difference. Any ideas on how to proceed?
var <- c("T","T","T","M","M","M","A","A","A")
val <- rnorm(12,4,5)
x <- c(1:12)
df <- data.frame(var,val,x)
ggplot(aes(x= x , y = val, color = var, group = var), data = df) +
scale_color_manual(values = c("grey","blue","black")) + geom_smooth(aes(x = x, y = val), formula = "y ~ x", method = "loess",se = FALSE, size = 1) + scale_x_continuous(breaks=seq(1, 12, 1), limits=c(1, 12)) + scale_size_manual(values = c(1,2,3))
To set the size and alpha values for your lines you have to map on aesthetics. Otherwise scale_size_manual will have no effect:
library(ggplot2)
ggplot(aes(x = x, y = val, color = var, group = var), data = df) +
scale_color_manual(values = c("grey", "blue", "black")) +
geom_smooth(aes(x = x, y = val, size = var, alpha = var), formula = "y ~ x", method = "loess", se = FALSE) +
scale_x_continuous(breaks = seq(1, 12, 1), limits = c(1, 12)) +
scale_size_manual(values = c(1, 2, 3)) +
scale_alpha_manual(values = c(.5, .6, .7))
I am making errorbar plot with different linetype
library(ggplot2)
library(plyr)
# Create dataset:
DF <- data.frame(
group = rep(c("a", "b", "c", "d"),each=10),
Ydata = c(seq(1,10,1),seq(5,50,5),seq(20,11,-1),seq(0.3,3,0.3)),
Xdata = c(seq(1,10,1),seq(5,50,5),seq(20,11,-1),seq(0.3,3,0.3)))
# Summarise data:
subDF <- ddply(DF, .(group), summarise,
X = mean(Xdata), Y = mean(Ydata),
X_sd = sd(Xdata, na.rm = T), Y_sd = sd(Ydata))
# Plot data with error bars:
ggplot(subDF, aes(x = X, y = Y,linetype = group)) +
geom_errorbar(aes(x = X,
ymin = (Y-Y_sd),
ymax = (Y+Y_sd)),
width = 1, size = 0.5) +
geom_point(cex = 3) +
scale_linetype_manual(values = c("solid","twodash","longdash","longdash"))
This give me the following plot, but I want the end whiskers to be solid. Anyone could help?
One option to achieve your desired result would be to switch to geom_linerange and add the whiskers via geom_segment like so:
library(ggplot2)
width <- .3
# Plot data with error bars:
ggplot(subDF, aes(x = X, y = Y, linetype = group)) +
geom_segment(aes(
x = X - width, xend = X + width,
y = Y - Y_sd, yend = Y - Y_sd
),
size = 0.5, linetype = "solid"
) +
geom_segment(aes(
x = X - width, xend = X + width,
y = Y + Y_sd, yend = Y + Y_sd
),
size = 0.5, linetype = "solid"
) +
geom_linerange(aes(
x = X,
ymin = (Y - Y_sd),
ymax = (Y + Y_sd)
),
size = 0.5
) +
geom_point(cex = 3) +
scale_linetype_manual(values = c("solid", "twodash", "longdash", "longdash"))
I mean, I'd want to paint only the square area P1 X (Q1-Q2).
Not the trapezoid (P2+P1) X (Q1-Q2/2).
Here's code that I used. I used ggplot and dplyr. How can I solve this problem?
How can I paint the only square area not the trapezoied area!!!!
library(ggplot2)
library(dplyr)
supply <- Hmisc::bezier(x = c(1, 8, 9),
y = c(1, 5, 9)) %>%
as_data_frame()
demand <- Hmisc::bezier(c(1, 3, 9),
c(9, 3, 1)) %>%
as_data_frame()
fun_supply <- approxfun(supply$x, supply$y, rule = 2)
fun_supply(c(2, 6, 8))
fun_demand <- approxfun(demand$x, demand$y, rule = 2)
intersection_funs <- uniroot(function(x) fun_supply(x) - fun_demand(x), c(1, 9))
intersection_funs
y_root <- fun_demand(intersection_funs$root)
curve_intersect <- function(curve1, curve2) {
# Approximate the functional form of both curves
curve1_f <- approxfun(curve1$x, curve1$y, rule = 2)
curve2_f <- approxfun(curve2$x, curve2$y, rule = 2)
# Calculate the intersection of curve 1 and curve 2 along the x-axis
point_x <- uniroot(function(x) curve1_f(x) - curve2_f(x),
c(min(curve1$x), max(curve1$x)))$root
# Find where point_x is in curve 2
point_y <- curve2_f(point_x)
# Finish
return(list(x = point_x, y = point_y))
}
intersection_xy <- curve_intersect(supply, demand)
intersection_xy
intersection_xy_df <- intersection_xy %>% as_data_frame()
demand2 <- Hmisc::bezier(c(1.5, 3.5, 9.5),
c(9.5, 3.5, 1.5)) %>%
as_data_frame()
supply2 <- Hmisc::bezier(c(1,7,8),
c(3,7,11)) %>%
as_data_frame()
#Make a data frame of the intersections of the supply curve and both demand curves
intersections <- bind_rows(curve_intersect(supply, demand),
curve_intersect(supply2, demand2))
plot_labels <- data_frame(label = c("S", "D","S[1]","D[1]"),
x = c(9, 1, 6.5, 3),
y = c(8, 8, 8, 8))
ggplot(mapping = aes(x = x, y = y)) +
geom_path(data = supply, color = "#0073D9", size = 1, linetype = "dashed") +
geom_path(data = demand, color = "#FF4036", size = 1, linetype = "dashed") +
geom_path(data = demand2, color = "#FF4036", size = 1) +
geom_path(data = supply2, color = "#0073D9", size = 1) +
geom_segment(data = intersections,
aes(x = x, y = 0, xend = x, yend = y), lty = "dotted") +
geom_segment(data = intersections,
aes(x = 0, y = y, xend = x, yend = y), lty = "dotted") +
geom_segment(data = intersections,
aes(x = x, y = y, xend = x, yend= y), lty = "dotted") +
geom_point(data = intersections, size = 3) +
geom_text(data = plot_labels,
aes(x = x, y = y, label = label), parse = TRUE) +
scale_x_continuous(expand = c(0, 0), breaks = intersections$x,
labels = expression(Q[1], Q[2])) +
scale_y_continuous(expand = c(0, 0), breaks = intersections$y,
labels = expression(P[1], P[2]))+
labs(x = "Quantity", y = "Price") +
geom_area(data =intersections, fill="#9999FF", alpha=0.5) +
theme_classic() +
coord_equal()
Could you help me to paint the area that I mentioned.
You might try adding geom_rect(data=intersections[1,], aes(xmin=0, xmax=x, ymin=0, ymax=y),fill='green', alpha=0.5) to your plot call.
So we have:
ggplot(mapping = aes(x = x, y = y)) +
geom_path(data = supply, color = "#0073D9", size = 1, linetype = "dashed") +
geom_path(data = demand, color = "#FF4036", size = 1, linetype = "dashed") +
geom_path(data = demand2, color = "#FF4036", size = 1) +
geom_path(data = supply2, color = "#0073D9", size = 1) +
geom_segment(data = intersections,
aes(x = x, y = 0, xend = x, yend = y), lty = "dotted") +
geom_segment(data = intersections,
aes(x = 0, y = y, xend = x, yend = y), lty = "dotted") +
geom_segment(data = intersections,
aes(x = x, y = y, xend = x, yend= y), lty = "dotted") +
geom_point(data = intersections, size = 3) +
geom_text(data = plot_labels,
aes(x = x, y = y, label = label), parse = TRUE) +
scale_x_continuous(expand = c(0, 0), breaks = intersections$x,
labels = expression(Q[1], Q[2])) +
scale_y_continuous(expand = c(0, 0), breaks = intersections$y,
labels = expression(P[1], P[2]))+
labs(x = "Quantity", y = "Price") +
geom_area(data =intersections, fill="#9999FF", alpha=0.5) +
theme_classic() +
coord_equal()+
geom_rect(data=intersections[1,], aes(xmin=0, xmax=x, ymin=0, ymax=y),fill='green', alpha=0.5)
Edit based on comment:
geom_rect(data=intersections, aes(xmin=x[2], xmax=x[1], ymin=0, ymax=y[1]),fill='green', alpha=0.5)
Though the answer from J Con is in depth and does provide a solution, a cleaner approach in ggplot2 may be to use the annotate function, with geom and other arguments set appropriately. (See link for help page.)
This is because using something like geom_rect involves passing positions and so on as a data.frame, which is a bit more of a hack as, conceptually, from a grammar of graphics perspective, the data layer and the annotation layer are distinct: the act of mapping data variables to graphical aesthetics in a systematic and objective way, and of marking up features within the dataset in a piecemeal and subjective way, are separate activities, and using annotate explicitly for the latter purpose makes this divide clearer in terms of the code and concepts.
Edit
To be more specific, the annotate equivalent of the following:
geom_rect(data=intersections, aes(xmin=x[2], xmax=x[1], ymin=0, ymax=y[1]),fill='green', alpha=0.5)
Would likely be as follows
annotate(
geom = "rect",
xmin = intersections$x[2], x = intersections$x[1],
ymin = 0, ymax = intersections$y[1],
fill = 'green', alpha = 0.5
)
Functionally this is exactly the same, but conceptually it makes the separation between the data layer and the annotation layer much clearer in the code expressed.
Note: Annotate could also be used for the points and text.
Given a data frame, Data of the form
x y
1 250 1.00000000
2 345 0.03567766
3 290 0.16654457
4 260 0.58363858
5 270 0.38754579
6 280 0.24713065
7 290 0.17142857
8 300 0.11709402
9 310 0.09047619
10 320 0.06439560
11 330 0.05098901
I am able to derive and plot a fit for the data with
library(ggplot2)
Data$x2<-Data$x^2
quadratic.model <- lm(log(Data$y) ~ Data$x + Data$x2)
fun_quad <- function(x){return(exp(
quadratic.model$coef[[3]] * x ^ 2 +
quadratic.model$coef[[2]] * x +
quadratic.model$coef[[1]]
))}
chartObj <- ggplot() +
stat_function(
fun = fun_quad,
aes(color = factor(0)),
size = 1.3,
linetype = "dotdash"
)+
geom_point(data = Data,
aes(x = x, y = y, fill = factor(0)),
color = "black", shape = 22, stroke = 0.7, size = 2.2) +
coord_trans(y = 'log10',
limx = c(250,350), limy = c(.025,1))+
theme_bw() +
guides(fill=F,color=F,linetype=F)
chartObj
which renders
.
I also tried plotting the CI using confint and geom_ribbon.
ribbon.ymin <- function(x){return(exp(
confint(quadratic.model)[[3]]*x^2 +
confint(quadratic.model)[[2]]*x +
confint(quadratic.model)[[1]]
))}
ribbon.ymax <- function(x){return(exp(
confint(quadratic.model)[[6]]*x^2 +
confint(quadratic.model)[[5]]*x +
confint(quadratic.model)[[4]]
))}
ribbonData <- as.data.frame(cbind(x = seq(250,350,.01)))
attach(ribbonData)
ribbonData$ymin <- ribbon.ymin(x)
ribbonData$ymax <- ribbon.ymax(x)
ribbonData$y <- fun_quad(x)
detach(ribbonData)
head(ribbonData)
chartObj <- chartObj +
geom_ribbon( data = ribbonData,
aes(x = x, y = 0:0,
ymin = ymin, ymax = ymax,
color = factor(0),fill = factor(0)),
alpha = 0.3)
however, this renders as below, which again feels obviously incorrect.
So, how do I plot the confidence interval associated with the function described by quadratic.model?
Update
I think that I've found nearly what I am looking for with the use of the predict command, specifically, shown below, however this is still leaves a bit to be desired, particularly the unevenness of the edges of the produced ribbon.
Data$x2<-Data$x^2
quadratic.model <- lm(log(Data$y) ~ Data$x + Data$x2)
fun_quad <- function(x){return(exp(
quadratic.model$coef[[3]] * x ^ 2 +
quadratic.model$coef[[2]] * x +
quadratic.model$coef[[1]]
))}
ribbonData<-predict(quadratic.model,data.frame(x=Data$x),interval="predict",level=.95)
# "predict" used over "confidence" in this example to show the rough edges better.
ribbonData<-as.data.frame(cbind(x=Data$x,fit=ribbonData[,1],lower=ribbonData[,2],upper=ribbonData[,3]))
ribbonData[,2:4]<-exp(ribbonData[,2:4])
chartObj <- ggplot() +
geom_ribbon( data = ribbonData,
aes(x = x, y = fit,
ymin = lower, ymax = upper,
color = factor(0),fill = factor(0)),
alpha = 0.3) +
stat_function(
fun = fun_quad,
aes(color = factor(0)),
size = 1.3,
linetype = "dotdash"
)+
geom_point(data = Data,
aes(x = x, y = y, fill = factor(0)),
color = "black", shape = 22, stroke = 0.7, size = 2.2) +
coord_trans(y = 'log10',
limx = c(250,350), limy = c(.025,1))+
theme_bw() +
guides(fill=F,color=F,linetype=F)
Is there a better way to represent the information presented by the plot above? To smooth out the rough edges of the ribbon?
It might "feel obviously incorrect", but it plots what it's been asked. The whole interval cannot be seen because limx and limy have been set:
ribbon <- function(x, level = 0.95) {
data.frame(
x,
ymin = exp(
confint(quadratic.model, level = level)[[3]] * x ^ 2 +
confint(quadratic.model, level = level)[[2]] * x +
confint(quadratic.model, level = level)[[1]]
),
ymax = exp(
confint(quadratic.model, level = level)[[6]]*x^2 +
confint(quadratic.model, level = level)[[5]]*x +
confint(quadratic.model, level = level)[[4]]
)
)
}
chartObj +
coord_trans(y = 'log10') +
geom_ribbon(data = ribbon(seq(250, 350, .01), level = 0.95),
aes(x = x, ymin = ymin, ymax = ymax,
color = factor(0), fill = factor(0)),
alpha = 0.3)
(NB: My answer is strictly about programming with ggplot2 and says nothing about the statistical validity of exponentiating a confidence interval).
Edit in response to OP's updated question (smooth out edges of ribbon).
predict() over more points:
quadratic.model <- lm(log(y) ~ x + x2, data = Data)
ribbonData <- data.frame(x = seq(250, 350, 0.01), x2 = seq(250, 350, 0.01) ^ 2)
ribbonData <- cbind(
ribbonData,
predict(quadratic.model, ribbonData,
interval = "prediction", level = 0.95)
)
# "predict" used over "confidence" in this example to show the rough edges better.
ribbonData[, 3:5] <- exp(ribbonData[, 3:5])
ggplot() +
geom_ribbon( data = ribbonData,
aes(x = x, y = fit,
ymin = lwr, ymax = upr,
color = factor(0),fill = factor(0)),
alpha = 0.3) +
stat_function(
fun = fun_quad,
aes(color = factor(0)),
size = 1.3,
linetype = "dotdash"
) +
geom_point(data = Data,
aes(x = x, y = y, fill = factor(0)),
color = "black", shape = 22, stroke = 0.7, size = 2.2) +
coord_trans(y = 'log10',
limx = c(250, 350), limy = c(.025, 1)) +
theme_bw() +
guides(fill = F, color = F, linetype = F)