stat_fit_glance and generalized additive models (GAM) error - r

I am trying to add the p-value and R2 from mgcv::gam results to ggplot with facets. The sample dataframe and code are below. Is there a way to successfully paste the p-value and R2 on the ggplots?
DF <- data.frame(Site = rep(LETTERS[20:24], each = 4),
Region = rep(LETTERS[14:18], each = 4),
time = rep(LETTERS[1:10], each = 10),
group = rep(LETTERS[1:4], each = 10),
value1 = runif(n = 1000, min = 10, max = 15),
value2 = runif(n = 1000, min = 100, max = 150))
DF$time <- as.numeric(DF$time)
GAMFORMULA <- y ~ s(x,bs="cr",k=3)
plot1 <- ggplot(data=DF,
aes(x=time, y=value2)) +
geom_point(col="gray", alpha=0.8,
name="") +
geom_line(col="gray", alpha=0.8,
name="",aes(group=group)) +
geom_smooth(se=T, col="darkorange", alpha=0.8,
name="", fill="orange",
method="gam",formula=GAMFORMULA) +
theme_bw() +
theme(strip.text.x = element_text(size=10),
strip.text.y = element_text(size=10, face="bold", angle=0),
strip.background = element_rect(colour="black", fill="gray90"),
axis.text.x = element_text(size=10), # remove x-axis text
axis.text.y = element_text(size=10), # remove y-axis text
axis.ticks = element_blank(), # remove axis ticks
axis.title.x = element_text(size=18), # remove x-axis labels
axis.title.y = element_text(size=25), # remove y-axis labels
panel.background = element_blank(),
panel.grid.major = element_blank(), #remove major-grid labels
panel.grid.minor = element_blank(), #remove minor-grid labels
plot.background = element_blank()) +
labs(y="Value", x="Time", title = "") +
stat_fit_glance(method = "gam",
method.args = list(formula = GAMFORMULA),
aes(label = sprintf('R^2~"="~%.3f~~italic(p)~"="~%.2f',
stat(..r.squared..),stat(..p.value..))),
parse = TRUE)
plot1 + facet_wrap(Site~group, scales="free_y", ncol=3)
Error in sprintf("R^2~\"=\"~%.3f~~italic(p)~\"=\"~%.2f", r.squared, p.value) :
object 'r.squared' not found

My answer explains why stat_fit_glance() cannot be used to add r.sq to a plot, but I am afraid is does not provide an alternative approach.
stat_fit_glance() is a wrapper on broom:glance() that fits the model and passes the model fit object to broom:glance(). In the case of gam(), broom:glance() does not return an estimate for R2 and consequently also stat_fit_glance() is unable to return it.
To see what computed values are available one can use geom_debug() from package 'gginnards'.
library(ggpmisc)
library(gginnards)
library(mgcv)
DF <- data.frame(Site = rep(LETTERS[20:24], each = 4),
Region = rep(LETTERS[14:18], each = 4),
time = rep(LETTERS[1:10], each = 10),
group = rep(LETTERS[1:4], each = 10),
value1 = runif(n = 1000, min = 10, max = 15),
value2 = runif(n = 1000, min = 100, max = 150))
DF$time <- as.numeric(DF$time)
GAMFORMULA <- y ~ s(x,bs="cr",k=3)
plot1 <- ggplot(data=DF,
aes(x=time, y=value2)) +
geom_point(col="gray", alpha=0.8,
name="") +
geom_line(col="gray", alpha=0.8,
name="",aes(group=group)) +
geom_smooth(se=T, col="darkorange", alpha=0.8,
name="", fill="orange",
method="gam",formula=GAMFORMULA) +
theme_bw() +
theme(strip.text.x = element_text(size=10),
strip.text.y = element_text(size=10, face="bold", angle=0),
strip.background = element_rect(colour="black", fill="gray90"),
axis.text.x = element_text(size=10), # remove x-axis text
axis.text.y = element_text(size=10), # remove y-axis text
axis.ticks = element_blank(), # remove axis ticks
axis.title.x = element_text(size=18), # remove x-axis labels
axis.title.y = element_text(size=25), # remove y-axis labels
panel.background = element_blank(),
panel.grid.major = element_blank(), #remove major-grid labels
panel.grid.minor = element_blank(), #remove minor-grid labels
plot.background = element_blank()) +
labs(y="Value", x="Time", title = "") +
stat_fit_glance(method = "gam",
method.args = list(formula = GAMFORMULA),
# aes(label = sprintf('R^2~"="~%.3f~~italic(p)~"="~%.2f',
# stat(..r.squared..),stat(..p.value..))),
# parse = TRUE)
geom = "debug")
plot1 + facet_wrap(Site~group, scales="free_y", ncol=3)
Shown above are the values returned by stat_fit_glance() for the first two panels in the plot.
Note: There does not seem to be agreement on whether R-square is meaningful for GAM. However the summary() method for gam does return an adjusted R-square estimate as member r.sq.

Related

plotting p-values using ggplot stat_summary

I want to plot a dataframe (stats) with the coefficient and error bars, and automatically write the p-values above each point.
stats <- data.frame(Coefficient = c(-0.07,-0.04,-0.15173266),
p_value = c(.0765210755,0.5176050652,0.0001309025),
conf_low = c(-.1544418,-0.1686583,-0.2294873),
conf_high = c(0.007812205,0.084939487,-0.073978033),
Test = c("TestA","TestB","TestC"))
I am trying to make a function to plot the p-values above each Coefficient point. (The coord_flip in the plot below may also be throwing me off.
give.pval <- function(y){
return(c(x = Coefficient, label = stats$p_value))
}
The following ggplot is exactly what I need, except for the stat_summary line which I am doing incorrectly
ggplot(stats, aes(x = Test, y = Coefficient)) +
geom_point(aes(size = 6)) +
geom_errorbar(aes(ymax = conf_high, ymin = conf_low)) +
geom_hline(yintercept=0, linetype="dashed") +
#stat_summary(fun.data = give.pval, geom = "text") +
theme_calc() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
axis.text.x = element_text(size = 12, vjust = 0.5), axis.title.x = element_text(size = 16),
axis.text.y = element_text(size = 12), axis.title.y = element_blank(),
legend.position = "none",
plot.title = element_text(hjust = 0.5, size = 24)) +
coord_flip() +
ylab("Coefficient")
I would like the have this plot but with the appropriate p-value above each of the three Coefficient points.
Thanks for any advice.
This could be achieved with a geom_text layer where you map p_value on the label aes and some additional nudging
library(ggplot2)
stats <- data.frame(Coefficient = c(-0.07,-0.04,-0.15173266),
p_value = c(.0765210755,0.5176050652,0.0001309025),
conf_low = c(-.1544418,-0.1686583,-0.2294873),
conf_high = c(0.007812205,0.084939487,-0.073978033),
Test = c("TestA","TestB","TestC"))
ggplot(stats, aes(x = Test, y = Coefficient)) +
geom_point(aes(size = 6)) +
geom_errorbar(aes(ymax = conf_high, ymin = conf_low)) +
geom_hline(yintercept=0, linetype="dashed") +
geom_text(aes(label = p_value), nudge_x = .2) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
axis.text.x = element_text(size = 12, vjust = 0.5), axis.title.x = element_text(size = 16),
axis.text.y = element_text(size = 12), axis.title.y = element_blank(),
legend.position = "none",
plot.title = element_text(hjust = 0.5, size = 24)) +
coord_flip() +
ylab("Coefficient")

Partial italics in facet headings of ggplot

I am wondering if there is any way to rename facet titles so that they contain partial italics and partial non-italics.
Here is some toy data
library(Hmisc)
library(dplyr)
# Plot power vs. n for various odds ratios
n <- seq(10, 1000, by=10) # candidate sample sizes
OR <- as.numeric(sort(c(seq(1/0.90,1/0.13,length.out = 9),2.9))) # candidate ORs
alpha <- c(.001, .01, .05) # alpha significance levels
# put all of these into a dataset and calculate power
powerDF <- data.frame(expand.grid(OR, n, alpha)) %>%
rename(OR = Var1, num = Var2, alph = Var3) %>%
arrange(OR) %>%
mutate(power = as.numeric(bpower(p1=.29, odds.ratio=OR, n=num, alpha = alph))) %>%
transform(OR = factor(format(round(OR,2),nsmall=2)),
alph = factor(ifelse(alph == 0.001, "p=0.001",
ifelse(alph == 0.01, "p=0.01", "p=0.05"))))
pPower <- ggplot(powerDF, aes(x = num, y = power, colour = factor(OR))) +
geom_line() +
facet_grid(factor(alph)~.) +
labs(x = "sample size") +
scale_colour_discrete(name = "Odds Ratio") +
scale_x_continuous(breaks = seq(0,1000,100)) +
scale_y_continuous(breaks = seq(0,1,.1), sec.axis = sec_axis(trans=I, breaks=NULL, name="Significance Level")) + # this is the second axis label
theme_light() +
theme(axis.title.x = element_text(size = 12, face = "bold"),
axis.title.y = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 11),
panel.grid.minor = element_blank(),
panel.grid.major.y = element_line(colour = "gray95"),
panel.grid.major.x = element_line(colour = "gray95"),
strip.text = element_text(colour = 'black', face = 'bold', size = 12),
legend.text = element_text(size = 12),
legend.title = element_text(size = 12, face = "bold"))
pPower
Is there any way to get the facet headings to read "p=0.001", "p=0.01" etc, instead of "p=0.001", i.e. to get partial italics and partial non-italics?

Mean per group on a bubble plot with ggplot

I have a dataset with a lot of overlapping points and used ggplot to create a bubble plot to show that data. I need to add bars on my plot for the means of each group on the x axis (values can be 0, 1, or 2). I have tried to use geom_errorbar but haven't been able to get it to work with my data. Any help/suggestions would be greatly appreciated.
The following is my code and a script to generate fake data that is similar:
y <- seq(from=0, to=3.5, by=0.5)
x <- seq(from=0, to=2, by=1)
xnew <- sample(x, 100, replace=T)
ynew <- sample(y, 100, replace=T)
data <- data.frame(xnew,ynew)
data2 <- aggregate(data$xnew, by=list(x=data$xnew, y=data$ynew), length)
names(data2)[3] <- "Count"
ggplot(data2, aes(x = x, y = y)) +
geom_point(aes(size=Count)) +
labs(x = "Copies", y = "Score") +
aes(ymax=..y.., ymin=..y..) +
scale_x_continuous(breaks = seq(0, 2, 1)) +
scale_y_continuous(breaks = seq(0, 3, 0.5)) +
theme(legend.position = "bottom", legend.direction = "horizontal",
axis.line = element_line(size=1, colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(colour="black", size = 10),
axis.text.y = element_text(colour="black", size = 10))
I am not entirely sure that I understand your question correctly. It seems to me that in addition to the bubbles, you want to visualise the mean value of y for each value of x as a bar of some kind. (You mention error bars, but it seems that this is not a requirement, but just what you have tried. I will use geom_col() instead.)
I assume that you want to weigh the mean over y by the counts, i.e., sum(y * Count) / sum(Count). You can create a data frame that contains these values by using dplyr:
data2_mean
## # A tibble: 3 × 2
## x y
## <dbl> <dbl>
## 1 0 1.833333
## 2 1 1.750000
## 3 2 2.200000
When creating the plot, I use data2 as the data set for geom_point() and data2_mean as the data set for geom_col(). It is important to put the bars first, since the bubbles should be on top of the bars.
ggplot() +
geom_col(aes(x = x, y = y), data2_mean, fill = "gray60", width = 0.7) +
geom_point(aes(x = x, y = y, size = Count), data2) +
labs(x = "Copies", y = "Score") +
scale_x_continuous(breaks = seq(0, 2, 1)) +
scale_y_continuous(breaks = seq(0, 3, 0.5)) +
theme(legend.position = "bottom", legend.direction = "horizontal",
axis.line = element_line(size=1, colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(colour="black", size = 10),
axis.text.y = element_text(colour="black", size = 10))
Everything that I changed compared to your code comes before scale_x_continuous(). This produces the following plot:
Is this what you're after? I first calculated the group-level means using the dplyr package and then added line segments to your plot using geom_segment:
library(ggplot2)
library(dplyr)
data2 <- data2 %>% group_by(x) %>% mutate(mean.y = mean(y))
ggplot(data2, aes(x = x, y = y)) +
geom_point(aes(size=Count)) +
labs(x = "Copies", y = "Score") +
aes(ymax=..y.., ymin=..y..) +
scale_x_continuous(breaks = seq(0, 2, 1)) +
scale_y_continuous(breaks = seq(0, 3, 0.5)) +
theme(legend.position = "bottom", legend.direction = "horizontal",
axis.line = element_line(size=1, colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(colour="black", size = 10),
axis.text.y = element_text(colour="black", size = 10)) +
geom_segment(aes(y = mean.y, yend = mean.y, x = x -0.25, xend = x + 0.25))

Curious behaviour using gridExtra (ggplot)

I am trying to stack three plots on top of each other using the gridExtra package. I have tried the the first example that uses grid.arrange from here, which works absolutely fine.
However, when I try to use my own plots, I get axes for each plot but no data, with all the formatting stripped out. Minimum working example:
library(ggplot2)
library(gridExtra)
popu_H0 <- seq(10, 30, length=100)
popu_H0_norm <- dnorm(popu_H0, mean = 20, sd = 4)
popu_H0_df <- as.data.frame(cbind(popu_H0, popu_H0_norm))
plot_H0 <- ggplot(popu_H0_df, aes(x=popu_H0, y=popu_H0_norm))
plot_H0 +
geom_line() +
theme(
text = element_text(size=20),
axis.title.x = element_text(vjust=0.1),
axis.text.x = element_text(size = rel(1.8)),
legend.position = "none",
axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.line.y = element_blank()
) +
xlab("New label") +
annotate("text", x = 20, y = 0.05, label = "Some annotation", size = 10)
grid.arrange(plot_H0, plot_H0, plot_H0, ncol = 1, nrow = 3)
ggplot produces the expected output, but grid.arrange produces this.
You forgot to replace the plot object.
library(ggplot2)
library(gridExtra)
popu_H0 <- seq(10, 30, length=100)
popu_H0_norm <- dnorm(popu_H0, mean = 20, sd = 4)
popu_H0_df <- as.data.frame(cbind(popu_H0, popu_H0_norm))
plot_H0 <- ggplot(popu_H0_df, aes(x=popu_H0, y=popu_H0_norm))
plot_H0 <- plot_H0 + # Here you need `<-` to update the plot
geom_line() +
theme(
text = element_text(size=20),
axis.title.x = element_text(vjust=0.1),
axis.text.x = element_text(size = rel(1.8)),
legend.position = "none",
axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.line.y = element_blank()
) +
xlab("New label") +
annotate("text", x = 20, y = 0.05, label = "Some annotation", size = 10)
grid.arrange(plot_H0, plot_H0, plot_H0, ncol = 1, nrow = 3)

Prevent geom_points and their corresponding labels from overlapping

Thanks for the suggested duplicate, this is however not only about the labels, but is also about adjusting the points themselves so they do not overlap.
have a quick look at the plot below...
I need the coloured points, and their corresponding labels, to never overlap. They should be clustered together and all visible, perhaps with some indication that they are spaced and not 100% accurate, perhaps some sort of call out? Open to suggestions on that.
I've tried adding position = 'jitter' to both geom_point and geom_text, but that doesn't seem to be working (assume it is only for small overlaps?)
Ideas?
# TEST DATA
srvc_data <- data.frame(
Key = 1:20,
X = sample(40:80, 20, replace = T),
Y = sample(30:65, 20, replace = T)
)
srvc_data$Z <- with(srvc_data,abs(X-Y))
t1<-theme(
plot.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.line = element_line(size=.4)
)
main_plot <- ggplot(srvc_data, aes(x = X, y = Y),xlim=c(0,100), ylim=c(0,100)) +
t1 +
theme_bw() +
labs(x="X", y="Y") +
scale_x_continuous(limits = c(0, 100)) +
scale_y_continuous(limits = c(0, 100)) +
geom_abline(intercept = 0, slope = 1, colour="blue", size=34, alpha=.1)+
geom_abline(intercept = 0, slope = 1, colour="black", size=.2, alpha=.5,linetype="dashed")+
geom_point(size = 7, aes(color = Z), alpha=.7) +
scale_color_gradient("Gap %\n",low="green", high="red")+
coord_fixed()+
geom_text(aes(label=Key,size=6),show_guide = FALSE)
main_plot
Produces this plot (of course with your random data it will vary)
Thanks in advance.
Here's your plot with ggrepel geom_text_repel:
library(ggrepel)
# TEST DATA
set.seed(42)
srvc_data <- data.frame(
Key = 1:20,
X = sample(40:80, 20, replace = T),
Y = sample(30:65, 20, replace = T)
)
srvc_data$Z <- with(srvc_data,abs(X-Y))
t1<-theme(
plot.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.line = element_line(size=.4)
)
ggplot(srvc_data, aes(x = X, y = Y),xlim=c(0,100), ylim=c(0,100)) +
t1 +
theme_bw() +
labs(x="X", y="Y") +
scale_x_continuous(limits = c(0, 100)) +
scale_y_continuous(limits = c(0, 100)) +
geom_abline(intercept = 0, slope = 1, colour="blue", size=34, alpha=.1)+
geom_abline(intercept = 0, slope = 1, colour="black", size=.2, alpha=.5,linetype="dashed")+
geom_point(size = 7, aes(color = Z), alpha=.7) +
scale_color_gradient("Gap %\n",low="green", high="red")+
coord_fixed()+
geom_text_repel(aes(label=Key,size=6),show_guide = FALSE)

Resources