Limiting vertical line length in R ggplot2 - r

I am trying to draw a forest plot with different groups. The code I'm using looks like the following:
d = data.frame(Estimate = c(1.8,1.9,2.1,2.4,2.7,2.5),
Group = rep(c('Group A', 'Group B'), each = 3),
Method = rep(c('Method 1', 'Method 2', 'Method 3'), 2))
d$Lower = d$Estimate - 0.3
d$Upper = d$Estimate + 0.3
ggplot(data = d, aes(y = Group, x = Estimate, xmin = Lower, xmax = Upper, color = Method)) +
geom_point(size = 2, position=position_dodge(width = 0.5)) +
geom_linerange(position=position_dodge(width = 0.5)) +
geom_vline(xintercept = c(2, 2.5), linetype = "dashed")
And the resulting plot:
The vertical lines (2, 2.5) are the true group means. I want to limit these vertical lines to be within each group (i.e., the first one from bottom to the middle, the second one middle to top). Anyone know how to do this?
I've tried geom_segment() function but I think it requires a numerical y input, while it's a factor here.

Factors plotted on an axis are "really" numeric, but with labels added, so you can go ahead and add numeric segments:
ggplot(data = d, aes(y = Group, x = Estimate, xmin = Lower, xmax = Upper,
color = Method)) +
geom_point(size = 2, position=position_dodge(width = 0.5)) +
geom_linerange(position=position_dodge(width = 0.5)) +
geom_segment(data = data.frame(y = c(0.67, 1.67), x = c(2, 2.5),
xend = c(2, 2.5), yend = c(1.33, 2.33)),
aes(x, y, xend = xend, yend = yend),
inherit.aes = FALSE, linetype = 2)
Or, with a few tweaks:
ggplot(data = d, aes(y = Group, x = Estimate, xmin = Lower, xmax = Upper,
color = Method)) +
geom_linerange(position=position_dodge(width = 0.5), size = 1) +
geom_point(size = 3, position=position_dodge(width = 0.5), shape = 21,
fill = "white") +
geom_segment(data = data.frame(y = c(0.67, 1.67), x = c(2, 2.5),
xend = c(2, 2.5), yend = c(1.33, 2.33)),
aes(x, y, xend = xend, yend = yend),
inherit.aes = FALSE, linetype = 2) +
annotate("text", c(2, 2.5), c(1.5, 2.5), size = 6,
label = c("Group mean = 2", "Group mean = 2.5")) +
theme_minimal(base_size = 20) +
scale_color_brewer(palette = "Set1")

Related

how to make vertical and horizonal line up to the axis boudary in ggplot

I want to use a vertical and horizonal line to mark a point in a facetd plot.
But the vertical and horizonal line can not touch the axis boudary.
I used a simple example to show the problem.
library(tidyverse)
library(ggh4x)
# data prepared
df = data.frame(
x = c(1, 2, 3, 4, 10, 20, 30 ,40),
y = c(1, 2, 3, -1, 10 ,20, 30, -10),
group = c(1, 1, 1, 1, 2, 2, 2, 2)
)
add.point = data.frame(
group = c(1, 2),
x = c(1, 10),
y = c(1.5, 15),
x_hline = c(0, 0),
y_hline = c(1.5, 15),
x_vline = c(1, 10),
y_vline = c(0, 0)
)
I tried several ways, such as expand_limits(), try scale_x(y)_continuous() and coord_cartesian(), but all failed.
By the way, I can live with the point y < 0 not showing up in the plot.
My question is that is there a way that make vertical and horizonal line touch the axis boudary in ggplot.
(p0 = df %>%
ggplot(aes(x = x, y = y)) +
geom_point() +
geom_smooth(se = F) +
facet_wrap(vars(group), scales = 'free')+
geom_point(data = add.point, aes(x = x, y = y), color = 'red')+
geom_segment(data = add.point,
aes(x = x_hline, y = y_hline,
xend = x, yend = y), linetype = 'dashed')+
geom_segment(data = add.point,
aes(x = x_vline, y = y_vline,
xend = x, yend = y), linetype = 'dashed'))
# try expand_limits()
(p1 = p0 + expand_limits(x = 0, y = 0))
# try scale_x(y)_continuous()
(p1 = p0 + scale_x_continuous(expand = c(0, 0)) + scale_y_continuous(expand = c(0, 0)))
# try coord_cartesian()
(p1 = p0 + coord_cartesian(ylim = 0))
You could use e.g. -Inf for the endpoints of your segments and use expand_limits to extend the limits to zero:
library(ggplot2)
ggplot(df, aes(x = x, y = y)) +
geom_point() +
geom_smooth(se = F) +
facet_wrap(vars(group), scales = 'free')+
geom_point(data = add.point, aes(x = x, y = y), color = 'red')+
geom_segment(data = add.point,
aes(x = -Inf, y = y_hline,
xend = x, yend = y), linetype = 'dashed')+
geom_segment(data = add.point,
aes(x = x_vline, y = -Inf,
xend = x, yend = y), linetype = 'dashed') +
expand_limits(x = 0, y = 0)

Create a special Radial bar chart (race track plot)

I was able to replicate another good answers here to create a basic radial plot, but can anyone give me any clue of others functions/parameters/ideas on how to convert the basic one to something similar to this :
You could get pretty close like this:
df <- data.frame(x = c(10, 12.5, 15), y = c(1:3),
col = c("#fcfbfc", "#fbc3a0", "#ec6f4a"))
library(ggplot2)
ggplot(df, aes(x = 0, xend = x, y = y, yend = y, color = col)) +
geom_hline(yintercept = c(1:3), size = 14, color = "#dfdfdf") +
geom_hline(yintercept = c(1:3), size = 13, color = "#f7f7f7") +
geom_segment(color = "#bf2c23", size = 14, lineend = 'round') +
geom_segment(size = 13, lineend = 'round') +
scale_color_identity() +
geom_point(aes(x = x - 0.03 * y), size = 5, color = "#bf2c23",
shape = 21, fill = 'white') +
geom_point(aes(x = x - 0.03 * y), size = 2, color = "#bf2c23",
shape = 21, fill = 'white') +
scale_y_continuous(limits = c(0, 4)) +
scale_x_continuous(limits = c(0, 20)) +
coord_polar() +
theme_void()
Here's a start. Are there particular aspects you're trying to replicate? This is a fairly customized format.
df <- data.frame(type = c("on", "ia", "n"),
radius = c(2,3,4),
value = c(10,21,22))
library(ggplot2); library(ggforce)
ggplot(df) +
geom_link(aes(x = radius, xend = radius,
y = 0, yend = value),
size = 17, lineend = "round", color = "#bb353c") +
geom_link(aes(x = radius, xend = radius,
y = 0, yend = value, color = type),
size = 16, lineend = "round") +
geom_label(aes(radius, y = 30,
label = paste(type, ": ", value)), hjust = 1.8) +
scale_x_continuous(limits = c(0,4)) +
scale_y_continuous(limits = c(0, 30)) +
scale_color_manual(values = c("on" = "#fff7f2",
"ia" = "#f8b68f",
"n" = "#e4593a")) +
guides(color = "none") +
coord_polar(theta = "y") +
theme_void()

Drawing elements (arrows & circle) in ggplot (R) to show the difference between two bars

I am trying to create a plot in R using ggplot that shows the difference between my two bars in a nice way.
I found an example that did part of what I wanted, but I have two major problems:
It is based on comparing groups of bars, but I only have two, so I added one group with both of them.
I would like to draw the arrow in nicer shape. I attached an image.
Code:
transactions <- c(5000000, 1000000)
time <- c("Q1","Q2")
group <- c("A", "A")
data <- data.frame(transactions, time, group)
library(ggplot2)
fun.data <- function(x){
print(x)
return(data.frame(y = max(x) + 1,
label = paste0(round(diff(x), 2), "cm")))
}
ylab <- c(2.5, 5.0, 7.5, 10)
gg <- ggplot(data, aes(x = time, y = transactions, fill = colors_hc[1], label = round(transactions, 0))) +
geom_bar(stat = "identity", show.legend = FALSE) +
geom_text(position = position_dodge(width = 0.9),
vjust = 1.1) +
geom_line(aes(group = group), position = position_nudge(0.1),
arrow = arrow()) +
stat_summary(aes(x = group, y = transactions),
geom = "label",
fun.data = fun.data,
fontface = "bold", fill = "lightgrey",
inherit.aes = FALSE) +
expand_limits(x = c(0, NA), y = c(0, NA)) +
scale_y_continuous(labels = paste0(ylab, "M"),
breaks = 10 ^ 6 * ylab)
gg
The arrows I am aiming for:
Where I am (ignore the ugliness, didn't style it yet):
This works, but you still need to play around a bit with the axes (or rather beautify them)
library(dplyr)
library(ggplot2)
transactions <- c(5000000, 1000000)
time <- c("Q1","Q2")
group <- c("A", "A")
my_data <- data.frame(transactions, time, group)
fun.data <- function(x){
return(data.frame(y = max(x) + 1,
label = as.integer(diff(x))))
}
my_data %>%
ggplot(aes(x = group, y = transactions, fill = time)) +
geom_bar(stat = 'identity', position = 'dodge') +
geom_text(aes(label = as.integer(transactions)),
position = position_dodge(width = 0.9),
vjust = 1.5) +
geom_line(aes(group = group), position = position_nudge(0.1),
arrow = arrow()) +
stat_summary(aes(x = group, y = transactions),
geom = "label",
size = 5,
position = position_nudge(0.05),
fun.data = fun.data,
fontface = "bold", fill = "lightgrey",
inherit.aes = FALSE)
Edit2:
y_limit <- 6000000
my_data %>%
ggplot(aes(x = time, y = transactions)) +
geom_bar(stat = 'identity',
fill = 'steelblue') +
geom_text(aes(label = as.integer(transactions)),
vjust = 2) +
coord_cartesian(ylim = c(0, y_limit)) +
geom_segment(aes(x = 'Q1', y = max(my_data$transactions),
xend = 'Q1', yend = y_limit)) +
geom_segment(aes(x = 'Q2', y = y_limit,
xend = 'Q2', yend = min(my_data$transactions)),
arrow = arrow()) +
geom_segment(aes(x = 'Q1', y = y_limit,
xend = 'Q2', yend = y_limit)) +
geom_label(aes(x = 'Q2',
y = y_limit,
label = as.integer(min(my_data$transactions)- max(my_data$transactions))),
size = 10,
position = position_nudge(-0.5),
fontface = "bold", fill = "lightgrey")

I'd like to paint an area but i don't know how to

I mean, I'd want to paint only the square area P1 X (Q1-Q2).
Not the trapezoid (P2+P1) X (Q1-Q2/2).
Here's code that I used. I used ggplot and dplyr. How can I solve this problem?
How can I paint the only square area not the trapezoied area!!!!
library(ggplot2)
library(dplyr)
supply <- Hmisc::bezier(x = c(1, 8, 9),
y = c(1, 5, 9)) %>%
as_data_frame()
demand <- Hmisc::bezier(c(1, 3, 9),
c(9, 3, 1)) %>%
as_data_frame()
fun_supply <- approxfun(supply$x, supply$y, rule = 2)
fun_supply(c(2, 6, 8))
fun_demand <- approxfun(demand$x, demand$y, rule = 2)
intersection_funs <- uniroot(function(x) fun_supply(x) - fun_demand(x), c(1, 9))
intersection_funs
y_root <- fun_demand(intersection_funs$root)
curve_intersect <- function(curve1, curve2) {
# Approximate the functional form of both curves
curve1_f <- approxfun(curve1$x, curve1$y, rule = 2)
curve2_f <- approxfun(curve2$x, curve2$y, rule = 2)
# Calculate the intersection of curve 1 and curve 2 along the x-axis
point_x <- uniroot(function(x) curve1_f(x) - curve2_f(x),
c(min(curve1$x), max(curve1$x)))$root
# Find where point_x is in curve 2
point_y <- curve2_f(point_x)
# Finish
return(list(x = point_x, y = point_y))
}
intersection_xy <- curve_intersect(supply, demand)
intersection_xy
intersection_xy_df <- intersection_xy %>% as_data_frame()
demand2 <- Hmisc::bezier(c(1.5, 3.5, 9.5),
c(9.5, 3.5, 1.5)) %>%
as_data_frame()
supply2 <- Hmisc::bezier(c(1,7,8),
c(3,7,11)) %>%
as_data_frame()
#Make a data frame of the intersections of the supply curve and both demand curves
intersections <- bind_rows(curve_intersect(supply, demand),
curve_intersect(supply2, demand2))
plot_labels <- data_frame(label = c("S", "D","S[1]","D[1]"),
x = c(9, 1, 6.5, 3),
y = c(8, 8, 8, 8))
ggplot(mapping = aes(x = x, y = y)) +
geom_path(data = supply, color = "#0073D9", size = 1, linetype = "dashed") +
geom_path(data = demand, color = "#FF4036", size = 1, linetype = "dashed") +
geom_path(data = demand2, color = "#FF4036", size = 1) +
geom_path(data = supply2, color = "#0073D9", size = 1) +
geom_segment(data = intersections,
aes(x = x, y = 0, xend = x, yend = y), lty = "dotted") +
geom_segment(data = intersections,
aes(x = 0, y = y, xend = x, yend = y), lty = "dotted") +
geom_segment(data = intersections,
aes(x = x, y = y, xend = x, yend= y), lty = "dotted") +
geom_point(data = intersections, size = 3) +
geom_text(data = plot_labels,
aes(x = x, y = y, label = label), parse = TRUE) +
scale_x_continuous(expand = c(0, 0), breaks = intersections$x,
labels = expression(Q[1], Q[2])) +
scale_y_continuous(expand = c(0, 0), breaks = intersections$y,
labels = expression(P[1], P[2]))+
labs(x = "Quantity", y = "Price") +
geom_area(data =intersections, fill="#9999FF", alpha=0.5) +
theme_classic() +
coord_equal()
Could you help me to paint the area that I mentioned.
You might try adding geom_rect(data=intersections[1,], aes(xmin=0, xmax=x, ymin=0, ymax=y),fill='green', alpha=0.5) to your plot call.
So we have:
ggplot(mapping = aes(x = x, y = y)) +
geom_path(data = supply, color = "#0073D9", size = 1, linetype = "dashed") +
geom_path(data = demand, color = "#FF4036", size = 1, linetype = "dashed") +
geom_path(data = demand2, color = "#FF4036", size = 1) +
geom_path(data = supply2, color = "#0073D9", size = 1) +
geom_segment(data = intersections,
aes(x = x, y = 0, xend = x, yend = y), lty = "dotted") +
geom_segment(data = intersections,
aes(x = 0, y = y, xend = x, yend = y), lty = "dotted") +
geom_segment(data = intersections,
aes(x = x, y = y, xend = x, yend= y), lty = "dotted") +
geom_point(data = intersections, size = 3) +
geom_text(data = plot_labels,
aes(x = x, y = y, label = label), parse = TRUE) +
scale_x_continuous(expand = c(0, 0), breaks = intersections$x,
labels = expression(Q[1], Q[2])) +
scale_y_continuous(expand = c(0, 0), breaks = intersections$y,
labels = expression(P[1], P[2]))+
labs(x = "Quantity", y = "Price") +
geom_area(data =intersections, fill="#9999FF", alpha=0.5) +
theme_classic() +
coord_equal()+
geom_rect(data=intersections[1,], aes(xmin=0, xmax=x, ymin=0, ymax=y),fill='green', alpha=0.5)
Edit based on comment:
geom_rect(data=intersections, aes(xmin=x[2], xmax=x[1], ymin=0, ymax=y[1]),fill='green', alpha=0.5)
Though the answer from J Con is in depth and does provide a solution, a cleaner approach in ggplot2 may be to use the annotate function, with geom and other arguments set appropriately. (See link for help page.)
This is because using something like geom_rect involves passing positions and so on as a data.frame, which is a bit more of a hack as, conceptually, from a grammar of graphics perspective, the data layer and the annotation layer are distinct: the act of mapping data variables to graphical aesthetics in a systematic and objective way, and of marking up features within the dataset in a piecemeal and subjective way, are separate activities, and using annotate explicitly for the latter purpose makes this divide clearer in terms of the code and concepts.
Edit
To be more specific, the annotate equivalent of the following:
geom_rect(data=intersections, aes(xmin=x[2], xmax=x[1], ymin=0, ymax=y[1]),fill='green', alpha=0.5)
Would likely be as follows
annotate(
geom = "rect",
xmin = intersections$x[2], x = intersections$x[1],
ymin = 0, ymax = intersections$y[1],
fill = 'green', alpha = 0.5
)
Functionally this is exactly the same, but conceptually it makes the separation between the data layer and the annotation layer much clearer in the code expressed.
Note: Annotate could also be used for the points and text.

Applying log scale to y-axis for visualizing proportions with ggplot2

I am attempting to recreate some plots from a research article in R and am running into an issue with applying a log scale to y axis. The visualization I'm attempting to recreate is this:
reference plot with y log scale
I currently have a working version without the logarithmic scale applied to the y-axis:
Proportion_Mean_Plot <- ggplot(proportions, aes(days2,
proportion_mean, group = observation)) +
geom_point(aes(shape = observation)) +
geom_line() +
scale_x_continuous(breaks = seq(0,335,20)) +
scale_y_continuous(breaks = seq(0,6,.5)) +
theme_tufte() +
geom_rangeframe() +
theme(legend.position="none") +
theme(axis.line.x = element_line(colour = "black", size = 0.5, linetype = 1),
axis.line.y = element_line(colour = "black", size = 0.5, linetype = 1)) +
labs(title = "Proportion of Baseline Mean",
subtitle = "Daily steps within each intervention phase",
x = "DAYS",
y = "PROPORTION OF BASELINE \n(MEAN)") +
geom_vline(xintercept = 164.5) +
geom_hline(yintercept = 1) +
annotate("text", x = c(82, 246), y = 5,
label = c("Intervention 1", "Intervention 2")) +
geom_segment(aes(x = 0, y = mean, xend = end, yend = mean),
data = proportion_intervention1_data) +
geom_segment(aes(x = start, y = mean, xend = end, yend = mean),
data = proportion_intervention2_data, linetype = 4)
This produces a decent representation of the original:
normally scaled y-axis plot
I would like to try to apply that logarithmic scaling to more closely match it. Any help is appreciated.
As per Richard's suggestion, here is a quick example how you can use scale_y_log10:
suppressPackageStartupMessages(library(tidyverse))
set.seed(123)
# generate some data
proportions <- tibble(interv_1 = pmax(0.4, rnorm(160, mean = 1.3, sd = 0.2)),
interv_2 = pmax(0.01, rnorm(160, mean = 1.6, sd = 0.5)))
proportions <- proportions %>%
gather(key = observation, value = proportion_mean) %>%
mutate(days2 = 1:320)
# create the plot
ggplot(proportions, aes(days2, proportion_mean, group = observation)) +
geom_point(aes(shape = observation)) +
geom_line() +
scale_x_continuous(breaks = seq(0,335,20), expand = c(0, 0)) +
scale_y_log10(breaks = c( 0.1, 0.5, 1, 2, 3, 4, 5), limits = c(0.1, 5)) +
# theme_tufte() +
# geom_rangeframe() +
theme(legend.position="none") +
theme(axis.line.x = element_line(colour = "black", size = 0.5, linetype = 1),
axis.line.y = element_line(colour = "black", size = 0.5, linetype = 1)) +
labs(title = "Proportion of Baseline Mean",
subtitle = "Daily steps within each intervention phase",
x = "DAYS",
y = "PROPORTION OF BASELINE \n(MEAN)") +
geom_vline(xintercept = 164.5) +
geom_hline(yintercept = 1) +
annotate("text", x = c(82, 246), y = 5,
label = c("Intervention 1", "Intervention 2")) +
# plugged the values for the means of the two distributions
geom_segment(aes(x = 0, y = 1.3, xend = 164.5, yend = 1.3)) +
geom_segment(aes(x = 164.5, y = 1.6, xend = 320, yend = 1.6), linetype = 4)

Resources