Add a legend in ggplot2 with R - r

I have a problem in adding the legend in R with ggplots when plotting the figure of sample sizes vs power. I have tried guide_legend() but it failed. Thanks so much for your help.
# Load the library and input the data
library(ggplot2)
n <- 2:10
control <- rep(150, 4)
infected <- c(150, 170, 200, 250)
all <- c(control, infected)
sigma <- c(35, 40, 45)
# Compute the population mean
mu <- mean(all)
# Compute the sum of the tau squared
tau2 <- sum((all-mu)^2)
# Compute the gamma
gamma.1 <- (n*tau2)/(sigma[1]^2)
gamma.2 <- (n*tau2)/(sigma[2]^2)
gamma.3 <- (n*tau2)/(sigma[3]^2)
# Compute the power
power.1 <- 1-pf(qf(.95, 7, 16), 7, 16, gamma.1)
power.2 <- 1-pf(qf(.95, 7, 16), 7, 16, gamma.2)
power.3 <- 1-pf(qf(.95, 7, 16), 7, 16, gamma.3)
# Plot the power vs the sample size
data <- data.frame(n, power.1, power.2, power.3)
ggplot(data, aes(x = n)) +
geom_point(aes(y=power.1), size = 3.5, color = "blue") +
geom_line(aes(y=power.1), size = 0.5) +
geom_point(aes(y=power.2), size = 3.5, color = "red") +
geom_line(aes(y=power.2), size = 0.5) +
geom_point(aes(y=power.3), size = 3.5, color = "black") +
geom_line(aes(y=power.3), size = 0.5) +
xlab("Sample Sizes") +
ylab("Power") +
ggtitle("Power versus Sample size")

You can transform your data to long format which is nice for plotting because then you have grouping variable (names of your columns are groups):
data %>%
pivot_longer(cols = contains("power"), names_to = "group", values_to = "power") %>%
ggplot(aes(n, power)) +
geom_line(aes(group = group)) +
geom_point(aes(color = group), size = 4) +
scale_color_manual(values = c("blue", "red", "black"))
Also I've added line before points.

Related

show range in a log scale

I'd like to include the range of possible values for y_val where the upper and lower bands are available. I tried using geom_errorbar which works for the continuous scale but not in log scale. How can I fix this?
x_val <- c(2, 3, 6, 12, 24)
y_val<-c(1,3,15,25,30)
y_upper<- c(1.2,3.2, 16, 28,40)
y_lower <-c(0.8,2.9, 12, 22,25)
df <- data.frame(x_val=x_val,y_val=y_val,y_upper=y_upper,
y_lower=y_lower)
ggplot(data=df,aes(x=x_val,y=y_val))+
geom_line()+
geom_point()+
geom_errorbar(ymin = y_lower, ymax = y_upper)+
scale_y_log10()+
scale_x_log10()
Per this answer, you need coord_trans(y = "log10") rather than scale_y_log10()
library(ggplot2)
x_val <- c(2, 3, 6, 12, 24)
y_val<-c(1,3,15,25,30)
y_upper<- c(1.2,3.2, 16, 28,40)
y_lower <-c(0.8,2.9, 12, 22,25)
df <- data.frame(x_val=x_val,
y_val=y_val,
y_upper=y_upper,
y_lower=y_lower)
ggplot(data=df,aes(x=x_val,y=y_val))+
geom_line()+
geom_point()+
geom_errorbar(ymin = y_lower, ymax = y_upper)+
coord_trans(y="log10", x = "log10", ylim = range(c(y_upper, y_lower)))
Created on 2021-03-16 by the reprex package (v1.0.0)
A trick could be to use geom_segment with arrow settings produced by a call to arrow().
library(ggplot2)
ggplot(data=df,aes(x=x_val,y=y_val))+
geom_line()+
geom_point()+
geom_segment(aes(xend = x_val, y = y_lower, yend = y_upper),
arrow = arrow(angle = 90, ends = "both")) +
scale_y_log10()+
scale_x_log10()

R, ggplot2: How to plot bezier curves that pass through fixed coordinates?

I am helping someone translate hand-drawn economics supply and demand functions into image files that can be included in a Word document. These have been going well using Hmisc::bezier and geom_path modeled after Andrew Heiss's recon plots and using his curve_intersect function. That is, until the author asked that one of the supply curves should pass through a specified set of coordinates. The Hmisc::bezier function only uses the first and last control point as absolute, and bends toward intermediate points so the specified intersection point does not match the curve. I tried creating a spline of 2 bezier curves with the bezier function from the bezier package (v1.1.2, https://cran.r-project.org/web/packages/bezier/bezier.pdf), but this fails with "Error in FUN(X[[i]], ...) : object 'x' not found", which I do not understand or know how to fix.
Please let me know where I am going wrong or if there is a better method! I will include the commented out attempts using various functions. Please excuse the amateurish code, as I am a relative newb at R and ggplot2.
This section not directly relevant to my question
# Graph figures for physical economics, negative oil prices paper
library(reconPlots)
library(dplyr)
library(ggplot2)
library(patchwork)
library(ggrepel)
library(bezier)
library(ggforce)
options(ggrepel.max.time = 1)
options(ggrepel.max.iter = 20000)
#Set seed value for ggrepel
set.seed(52)
# panel (a)
#Set values of curves using the bezier function, each pair of c() values
# is an xy coordinate, and the sets of coordinates control the shape of the
# curve
supply <- Hmisc::bezier(c(1, 5, 6), c(3, 4, 9)) %>%
as_data_frame()
demand <- Hmisc::bezier(c(0, 9, 9), c(6, 6, 6)) %>%
as_data_frame()
label_height <- Hmisc::bezier(c(0, 9, 9), c(8, 8, 8)) %>%
as_data_frame()
# Calculate the intersections of the two curves
intersections <- bind_rows(curve_intersect(supply, demand))
# Calculate point where the curve label(s) intersect a specified height
supply_label <- bind_rows(curve_intersect(supply, label_height))
labels <- data_frame(label = expression("PS"[CR]^DRL),
x = supply_label$x,
y = supply_label$y)
production <- ggplot(mapping = aes(x = x, y = y)) +
#Draw the supply curve. Demand is not drawn in this figure, but the
# intersections of an imaginary demand curve are used to illustrate P0
# and Q0, the intersection point, and the dotted lines
geom_path(data = supply, color = "#0073D9", size = 1) +
geom_segment(data = intersections,
aes(x = x, y = 0, xend = x, yend = y), lty = "dotted") +
geom_segment(data = intersections,
aes(x = 0, y = y, xend = x, yend = y), lty = "dotted") +
#Draw the supply curve label using the intersection calculated above, using
# GGrepel so that the labels do not overlap the curve line
geom_text_repel(data = labels
,aes(x = x, y = y, label = label)
,parse = TRUE
,direction = "x"
,force = 3
,force_pull = 0.1
,hjust = 0
,min.segment.length = 0
) +
#Draw the intersection point based on intersection function between supply
# and the phantom flat demand curve at height y=6
geom_point(data = intersections, size = 3) +
#Use scale functions to set y-axis label, axis intersection point labels,
# and limits of the viewing area
scale_x_continuous(expand = c(0, 0), breaks = intersections$x
,labels = expression(Q[CR]^{DRL-PS})
,limits=c(0,9)
) +
scale_y_continuous(expand = c(0, 0), breaks = c(intersections$y, 9)
,labels = c(expression(P[CR]==frac("$",brl))
,expression(P[CR]))
,limits=c(0,9)
) +
#Use labs function to set x-axis title and title of each graph using the
# caption function so that it displays on the bottom
labs(x = expression(frac(Barrels,Week)),
caption = expression(atop("(a) Driller Production Supply", "of Crude Oil"))
) +
#Set classic theme, x-axis title on right-hand side using larger font of
# relative size 1.2, graph title on left-hand side using same larger font
theme_classic() +
theme(axis.title.y = element_blank(),
axis.title.x = element_text(hjust = 1),
axis.text = element_text(size=rel(1.2)),
plot.caption = element_text(hjust = 0.5, size=rel(1.2))
) +
coord_equal()
# Save the intersections so we can set the same quantity, price for panel (c)
specified_intersections = intersections
# Panel (b)
supply <- Hmisc::bezier(c(3.99, 4), c(0, 9)) %>%
as_data_frame()
demand <- Hmisc::bezier(c(2, 3, 4, 5), c(9, 6.5, 6, 5.5)) %>%
as_data_frame()
demand_capacity <- Hmisc::bezier(c(5, 5), c(0, 5.5)) %>%
as_data_frame()
supply_capacity <- Hmisc::bezier(c(4.999, 5), c(0, 9)) %>%
as_data_frame()
supply_label_height <- Hmisc::bezier(c(0, 9), c(9, 9)) %>%
as_data_frame()
demand_label_height <- Hmisc::bezier(c(0, 9), c(8, 8)) %>%
as_data_frame()
capacity_label_height <- Hmisc::bezier(c(0, 9), c(9, 9)) %>%
as_data_frame()
# Calculate the intersections of the two curves
intersections <- bind_rows(curve_intersect(supply,
demand))
supply_label <- bind_rows(curve_intersect(supply
,supply_label_height))
demand_label <- bind_rows(curve_intersect(demand
,demand_label_height))
capacity_label <- bind_rows(curve_intersect(supply_capacity
,capacity_label_height))
labels <- data_frame(label = c(expression("OD"[CR]^DRL),expression("OS"[CR]^DRL)
,expression("Q"[CR]^CAP)
),
x = c(demand_label$x, supply_label$x
, capacity_label$x
),
y = c(demand_label$y, supply_label$y
, capacity_label$y
)
)
inventory <- ggplot(mapping = aes(x = x, y = y)) +
geom_path(data = supply, color = "#0073D9", size = 1) +
geom_path(data = demand, color = "#FF4036", size = 1) +
geom_path(data = demand_capacity, color = "#FF4036", size = 1) +
geom_path(data = supply_capacity, color = "#0073D9", size = 1, lty = "dashed") +
geom_segment(data = intersections,
aes(x = 0, y = y, xend = x, yend = y), lty = "dotted") +
geom_text_repel(data = labels
,aes(x = x, y = y, label = label)
,parse = TRUE
,direction = "x"
,force = 3
,force_pull = 0.1
,hjust = c(0, 0, 1)
,min.segment.length = 0
) +
geom_point(data = intersections, size = 3) +
scale_x_continuous(expand = c(0, 0), breaks = c(intersections$x
, 5),
labels = c(expression(paste(Q[CR]^{DRL-OS},phantom(12345)))
,expression(Q[CR]^CAP)
)
, limits=c(0,9)) +
scale_y_continuous(expand = c(0, 0), breaks = c(intersections$y, 9),
labels = c(expression(P[CR]),expression(P[CR]))
, limits=c(0,9)) +
labs(x = "Barrels",
caption = expression(atop("(b) Driller Storage / Ownership", "of Crude Oil"))
) +
theme_classic() +
theme(axis.title.y = element_blank(),
axis.title.x = element_text(hjust = 1),
axis.text = element_text(size=rel(1.2)),
plot.caption = element_text(hjust = 0.5, size=rel(1.2))
) +
coord_equal()
Relevant section
# panel (c)
# ggforce package method
#supply <- list(c(1, 4, specified_intersections$x, 5, 7),
# c(3, 4, specified_intersections$y, 7, 9)) %>%
# as_data_frame()
# bezier package method: Fails with "Error in FUN(X[[i]], ...) : object 'x' not found"
t <- seq(0, 2, length=10)
p <- list(c(1, 4, specified_intersections$x, 7, 8),
c(3, 4, specified_intersections$y, 6, 9))
#p <- matrix(c(1,3, 4,4, specified_intersections$x,specified_intersections$y,
# 7,6, 8,9), nrow=5, ncol=2, byrow=TRUE)
supply <- bezier(t=t, p=p) %>%
as_data_frame()
# Original: Fails because it does not pass through the specified intersection
#supply <- Hmisc::bezier(c(1, specified_intersections$x, 8),
# c(3, specified_intersections$y, 9)) %>%
# as_data_frame()
# Hmisc method: Fails because there is no way to get the two curves to appear
# contiguous
#supply1 <- Hmisc::bezier(c(1, 4, specified_intersections$x),
# c(3, 4, specified_intersections$y)) %>%
# as_data_frame()
#supply2 <- Hmisc::bezier(c(specified_intersections$x, 6, 7),
# c(specified_intersections$y, 8, 9)) %>%
# as_data_frame()
#demand <- Hmisc::bezier(c(0, 9), c(specified_intersections$y, specified_intersections$y)) %>%
# as_data_frame()
label_height <- Hmisc::bezier(c(0, 9), c(8, 8)) %>%
as_data_frame()
# Calculate the intersections of the two curves
#intersections <- bind_rows(curve_intersect(supply, demand))
#supply_label <- bind_rows(curve_intersect(supply,
# label_height))
#labels <- data_frame(label = expression("SS"[CR]^DRL),
# x = supply_label$x,
# y = supply_label$y)
sales <- ggplot(mapping = aes(x = x, y = y)) +
# ggforce package method
# geom_bspline(data = supply, color = "#0073D9", size = 1) +
# Original geom_path method
geom_path(data = supply, color = "#0073D9", size = 1) +
# Supply 1 and 2 for Hmisc method
# geom_path(data = supply1, color = "#0073D9", size = 1) +
# geom_path(data = supply2, color = "#0073D9", size = 1) +
geom_segment(data = specified_intersections,
aes(x = x, y = 0, xend = x, yend = y), lty = "dotted") +
geom_segment(data = specified_intersections,
aes(x = 0, y = y, xend = x, yend = y), lty = "dotted") +
# geom_text_repel(data = labels
# ,aes(x = x, y = y, label = label)
# ,parse = TRUE
# ,direction = "x"
# ,force = 3
# ,force_pull = 0.1
# ,hjust = 0
# ,min.segment.length = 0
# ) +
geom_point(data = specified_intersections, size = 3) +
scale_x_continuous(expand = c(0, 0), breaks = specified_intersections$x,
labels = expression(Q[CR]^{DRL-SS}), limits=c(0,9)) +
scale_y_continuous(expand = c(0, 0), breaks = c(specified_intersections$y, 9),
labels = c(expression(P[CR]),expression(P[CR]))) +
labs(x = expression(frac(Barrels,Week)),
caption = expression(atop("(c) Driller Sales Supply", "of Crude Oil"))
) +
theme_classic() +
theme(axis.title.y = element_blank(),
axis.title.x = element_text(hjust = 1),
axis.text = element_text(size=rel(1.2)),
plot.caption = element_text(hjust = 0.5, size=rel(1.2))
) +
coord_equal()
patchwork <- (production | inventory | sales)
patchwork
Graphs before implementation of fixed coordinates. Need to move panel (c) intersection point to match panel (a)
I solved the "Error in FUN(X[[i]], ...) : object 'x' not found" by printing the supply variable and noticing that the bezier function names its rows V1,V2 and not x,y. I needed to set the aesthetics of the geom_path to the correct mapping.
Relevant Section, trimmed to only the bezier method
# panel (c)
# bezier package method
t <- seq(0, 2, length = 100)
p <- matrix(c(1,3, 4,4, specified_intersections$x,specified_intersections$y,
7,6, 8,9), nrow=5, ncol=2, byrow=TRUE)
supply <- bezier::bezier(t=t, p=p, deg=2) %>%
as_data_frame()
sales <- ggplot(mapping = aes(x = x, y = y)) +
# Original geom_path method
geom_path(data = supply, mapping = aes(x = V1, y = V2),
color = "#0073D9", size = 1, inherit.aes = FALSE) +
geom_segment(data = specified_intersections,
aes(x = x, y = 0, xend = x, yend = y), lty = "dotted") +
geom_segment(data = specified_intersections,
aes(x = 0, y = y, xend = x, yend = y), lty = "dotted") +
geom_point(data = specified_intersections, size = 3) +
scale_x_continuous(expand = c(0, 0), breaks = specified_intersections$x,
labels = expression(Q[CR]^{DRL-SS}), limits=c(0,9)) +
scale_y_continuous(expand = c(0, 0), breaks = c(specified_intersections$y, 9),
labels = c(expression(P[CR]),expression(P[CR]))) +
labs(x = expression(frac(Barrels,Week)),
caption = expression(atop("(c) Driller Sales Supply", "of Crude Oil"))
) +
theme_classic() +
theme(axis.title.y = element_blank(),
axis.title.x = element_text(hjust = 1),
axis.text = element_text(size=rel(1.2)),
plot.caption = element_text(hjust = 0.5, size=rel(1.2))
) +
coord_equal()
patchwork <- (production | inventory | sales)
patchwork
This does not solve my larger problem of needing a smooth curve that passes through a specified set of coordinates, as it produces two bezier curves that do not match.
I will do some research on using functions to specify bezier curves and find out if there is some mathematical or programmatic way to specify a bezier curve that passes through a set of fixed coordinates. If I find one, I'll edit this answer.
If anyone knows how to accomplish this, I would appreciate any help!
Kinked bezier curves

Fail to change the legend title and label with ggplot2 in R

I was trying to change the legend title from group to the Greek letter "sigma" and the label "power.1, power.2, power.3" to "35, 40, 45" but it did not appear and still shows the default name and label. Could you please help me with it? Thanks so much.
# Load the library and input the data
library(ggplot2)
library(tidyr)
n <- 2:10
control <- rep(150, 4)
infected <- c(150, 170, 200, 250)
all <- c(control, infected)
sigma <- c(35, 40, 45)
# Compute the population mean
mu <- mean(all)
# Compute the sum of the tau squared
tau2 <- sum((all-mu)^2)
# Compute the gamma
gamma.1 <- (n*tau2)/(sigma[1]^2)
gamma.2 <- (n*tau2)/(sigma[2]^2)
gamma.3 <- (n*tau2)/(sigma[3]^2)
# Compute the power
power.1 <- 1-pf(qf(.95, 7, 16), 7, 16, gamma.1)
power.2 <- 1-pf(qf(.95, 7, 16), 7, 16, gamma.2)
power.3 <- 1-pf(qf(.95, 7, 16), 7, 16, gamma.3)
data <- data.frame(n, power.1, power.2, power.3)
data %>%
pivot_longer(cols = contains("power"), names_to = "group", values_to = "power") %>%
ggplot(aes(n, power)) +
geom_line(aes(color = group)) +
geom_point(aes(color = group), size = 4) +
scale_fill_discrete(name = expression(sigma), labels = c("35","40","45"))
Try this in the final part of your code. One lesson you can learn is that fill and color are different aesthetics. So, if you set color you must use scale_color_manual. Here the code:
#Code
data %>%
pivot_longer(cols = contains("power"), names_to = "group", values_to = "power") %>%
ggplot(aes(n, power)) +
geom_line(aes(color = group)) +
geom_point(aes(color = group), size = 4) +
scale_color_discrete(name = expression(sigma), labels = c("35","40","45"))
Output:
Or you can also try with guides() which will produce the same output (But first option is more direct):
#Code 2
data %>%
pivot_longer(cols = contains("power"), names_to = "group", values_to = "power") %>%
ggplot(aes(n, power)) +
geom_line(aes(color = group)) +
geom_point(aes(color = group), size = 4) +
scale_color_discrete(labels = c("35","40","45"))+
guides(color=guide_legend(title=expression(sigma)))
You should used:
scale_colour_discrete(name = expression(sigma), labels = c("35","40","45"))

shade block between two lines, values vary with facet_wrap

I'm plotting the relationships between speed and time for four different species (each in a different facet). For each species, I have a range of speeds I'm interested in, and would like to shade the area between the min and max values. However, these ranges are different for the 4th species compared to the first three.
#data to plot as points
species <- sample(letters[1:4], 40, replace = TRUE)
time <- runif(40, min = 1, max = 100)
speed <- runif(40, min = 1, max = 20)
df <- data.frame(species, time, speed)
#ranges of key speeds
sp <- letters[1:4]
minspeed <- c(5, 5, 5, 8)
maxspeed <- c(10, 10, 10, 13)
df.range <- data.frame(sp, minspeed, maxspeed)
ggplot() +
geom_hline(data = df.range, aes(yintercept = minspeed),
colour = "red") +
geom_hline(data = df.range, aes(yintercept = maxspeed),
colour = "red") +
geom_point(data=df, aes(time, speed),
shape = 1) +
facet_wrap(~species) +
theme_bw()
How do I:
get geom_hline to only plot the max and min ranges for the correct species, and
shade the area between the two lines?
For the later part, I've tried adding geom_ribbon to my plot, but I keep getting an error message that I'm unsure how to address.
geom_ribbon(data = df,
aes(ymin = minspeed, ymax = maxspeed,
x = c(0.0001, 100)),
fill = "grey",
alpha = 0.5) +
Error: Aesthetics must be either length 1 or the same as the data
(40): x, ymin, ymax
As per my comment, the following should work. Perhaps there are other unobserved differences between your actual use case & the example in your question?
colnames(df.range)[which(colnames(df.range) == "sp")] <- "species"
ggplot() +
geom_hline(data = df.range, aes(yintercept = minspeed),
colour = "red") +
geom_hline(data = df.range, aes(yintercept = maxspeed),
colour = "red") +
geom_point(data = df, aes(time, speed),
shape = 1) +
geom_rect(data = df.range,
aes(xmin = -Inf, xmax = Inf, ymin = minspeed, ymax = maxspeed),
fill = "grey", alpha = 0.5) +
facet_wrap(~species) +
theme_bw()
Data used:
df <- data.frame(species = sample(letters[1:4], 40, replace = TRUE),
time = runif(40, min = 1, max = 100),
speed = runif(40, min = 1, max = 20))
df.range <- data.frame(sp = letters[1:4],
minspeed = c(5, 5, 5, 8),
maxspeed = c(10, 10, 10, 13))

structure diagram where each members of group are connected to center and all cluster grand center in r

I am trying to create a structure diagram from the data like the following:
mydf <- data.frame ( group = rep (1:5, each = 20), z = rnorm (20, 10, 1),
x = c(rnorm (20, 2, 0.5), rnorm (20, 2, 0.5),
rnorm (20, 9, 0.5), rnorm (20, 9, 0.5),rnorm (20, 5, 0.5)),
y = c(rnorm (20, 2, 0.5), rnorm (20, 9, 0.5), rnorm (20, 2, 0.5),
rnorm (20, 9, 0.5), rnorm (20, 2, 0.5)))
means <- aggregate(. ~ group, data = mydf, mean)
gmx <-mean (mydf$x)
gmy <- mean (mydf$y)
library(ggplot2)
ggplot(mydf, aes(x, y)) +
geom_point(aes(colour= factor (group), size=z)) + theme_bw()
I want make connect every points within each cluster to its center and then the cluster center to grad mean. This will be produce a plot like the following (just rough sketch where two cluster are connected to the center, in real all cluster have the same):.........
(I would like to use the line segments of same color as of cluster if possible)
Here is an example:
library(plyr)
ms <- ddply(mydf, .(group), colwise(mean))
mydf2ms <- merge(mydf, ms, by = "group")
gm <- ddply(mydf, NULL, colwise(mean))
ms2gm <- data.frame(ms, gm)
ci <- expand.grid(1:3*2, seq(0, 2*pi, length = 180))
ci <- transform(ci, x = cos(Var2) * Var1 + gm$x, y = sin(Var2) * Var1 + gm$y)
library(ggplot2)
ggplot(mydf, aes(x, y)) +
geom_point(aes(colour= factor (group), size=z)) +
geom_segment(data = mydf2ms, mapping = aes(x = x.x, y = y.x, xend = x.y, yend = y.y, colour = factor(group))) +
geom_segment(data = ms2gm, mapping = aes(x = x, y = y, xend = x.1, yend = y.1)) +
geom_point(data = ms, colour = "black", size = 10, shape = 4) +
geom_point(data = gm, colour = "red", size = 10, shape = 4) +
geom_path(data = ci, mapping = aes(group = Var1), colour = "pink")

Resources