I am wondering if there is any way to rename facet titles so that they contain partial italics and partial non-italics.
Here is some toy data
library(Hmisc)
library(dplyr)
# Plot power vs. n for various odds ratios
n <- seq(10, 1000, by=10) # candidate sample sizes
OR <- as.numeric(sort(c(seq(1/0.90,1/0.13,length.out = 9),2.9))) # candidate ORs
alpha <- c(.001, .01, .05) # alpha significance levels
# put all of these into a dataset and calculate power
powerDF <- data.frame(expand.grid(OR, n, alpha)) %>%
rename(OR = Var1, num = Var2, alph = Var3) %>%
arrange(OR) %>%
mutate(power = as.numeric(bpower(p1=.29, odds.ratio=OR, n=num, alpha = alph))) %>%
transform(OR = factor(format(round(OR,2),nsmall=2)),
alph = factor(ifelse(alph == 0.001, "p=0.001",
ifelse(alph == 0.01, "p=0.01", "p=0.05"))))
pPower <- ggplot(powerDF, aes(x = num, y = power, colour = factor(OR))) +
geom_line() +
facet_grid(factor(alph)~.) +
labs(x = "sample size") +
scale_colour_discrete(name = "Odds Ratio") +
scale_x_continuous(breaks = seq(0,1000,100)) +
scale_y_continuous(breaks = seq(0,1,.1), sec.axis = sec_axis(trans=I, breaks=NULL, name="Significance Level")) + # this is the second axis label
theme_light() +
theme(axis.title.x = element_text(size = 12, face = "bold"),
axis.title.y = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 11),
panel.grid.minor = element_blank(),
panel.grid.major.y = element_line(colour = "gray95"),
panel.grid.major.x = element_line(colour = "gray95"),
strip.text = element_text(colour = 'black', face = 'bold', size = 12),
legend.text = element_text(size = 12),
legend.title = element_text(size = 12, face = "bold"))
pPower
Is there any way to get the facet headings to read "p=0.001", "p=0.01" etc, instead of "p=0.001", i.e. to get partial italics and partial non-italics?
Related
I have a plot where each axis has been log10 transformed. For one of my axis titles I would like to use both a square bracket ([]) and a superscript. How can I do this?
Example Data
library(dplyr)
library(ggplot2)
set.seed(123)
df <- data.frame(matrix(ncol = 2, nrow = 20))
colnames(df)[1:2] <- c('x','y')
df$x <- rnorm(20,1000,100)
df$y <- rnorm(20,1000,100)
df <- df %>%
mutate(log_x = log10(x),
log_y = log10(y))
Here is an example of the figure I am trying to make. I need to know how to make the -2 on the x-axis superscripted.
df %>%
ggplot(aes(x = log_x, y = log_y)) +
geom_point() +
labs(x = expression(log[10]~"[Area (m^-2)]"),
y = expression(log[10]~"[ Time Variable (months)]")) +
theme_bw() +
theme(axis.text.x = element_text(size = 16, color = "black"),
axis.text.y = element_text(size = 16, color = "black"),
axis.title = element_text(size = 16, color = "black"),
panel.grid = element_blank(),
panel.background = element_blank())
#MrFlick response provides the correct answer, see below.
df %>%
ggplot(aes(x = log_x, y = log_y)) +
geom_point() +
labs(x = expression(log[10]~"[Area"~ (m^-2) ~"]"),
y = expression(log[10]~"[ Time Variable (months)]")) +
theme_bw() +
theme(axis.text.x = element_text(size = 16, color = "black"),
axis.text.y = element_text(size = 16, color = "black"),
axis.title = element_text(size = 16, color = "black"),
panel.grid = element_blank(),
panel.background = element_blank())
I have a data frame with three groups (group1, group2, group3). I would like to show the p-value of their mean comparisons in ggplot2 which I can do however, the values are stacked ontop of one another making it difficult to see what is being compared. When I try to adjust where the p-values are located using the y_position() function, the boxplots collapse (I think because the y-axis is log10) but the p-values are no longer stacked ontop of one another. How can I keep the boxplots from collapsing and keep the p-values displayed so that you can see what is being compared?
Example data
library(ggplot2)
library(dplyr)
library(ggsignif)
df <- data.frame(matrix(ncol = 2, nrow = 30))
colnames(df)[1:2] <- c("group", "value")
df$group <- rep(c("group1","group2","group3"), each = 10)
df[1:10,2] <- rexp(10, 1/10)
df[11:20,2] <- rexp(10, 1/100)
df[21:30,2] <- rexp(10, 1/900)
# Need to say what should be compared for p-value determination
my_comparisons <- list(c("group1", "group2"),
c("group1", "group3"),
c("group2", "group3"))
Boxplots showing the distribution of value for each group however the p-values are ontop of one another so you cannot compare among groups.
df %>%
mutate(group = factor(group, levels = c("group3","group2","group1"))) %>%
ggplot(aes(x = group, y = value)) +
geom_signif(comparisons = my_comparisons,
map_signif_level = function(x) paste("p =", scales::pvalue(x))) +
scale_y_log10() +
geom_boxplot(outlier.colour="white", outlier.fill = "white", outlier.shape = 1, outlier.size = 0) +
geom_jitter(shape=1, position=position_jitter(0.2), color = "black", fill = "white", size = 2) +
labs(x = "",
y = "value") +
theme_bw() +
theme(axis.text.x = element_text(size = 16, color = "black"),
axis.text.y = element_text(size = 16, color = "black"),
axis.title = element_text(size = 16, color = "black"),
axis.title.x = element_text(vjust = -0.5),
panel.grid = element_blank(),
panel.background = element_blank())
Adjusting the y_position() of where the p-values should display but this collapses the y-axis. I have tried several values within y_position.
df %>%
mutate(group = factor(group, levels = c("group3","group2","group1"))) %>%
ggplot(aes(x = group, y = value)) +
geom_signif(y_position = c(2000,1800,1600),
comparisons = my_comparisons,
map_signif_level = function(x) paste("p =", scales::pvalue(x))) +
scale_y_log10() +
geom_boxplot(outlier.colour="white", outlier.fill = "white", outlier.shape = 1, outlier.size = 0) +
geom_jitter(shape=1, position=position_jitter(0.2), color = "black", fill = "white", size = 2) +
labs(x = "",
y = "value") +
theme_bw() +
theme(axis.text.x = element_text(size = 16, color = "black"),
axis.text.y = element_text(size = 16, color = "black"),
axis.title = element_text(size = 16, color = "black"),
axis.title.x = element_text(vjust = -0.5),
panel.grid = element_blank(),
panel.background = element_blank())
For some reason this parameter ignores the axis transformation. You therefore need to use the log10 values of the desired positions:
df %>%
mutate(group = factor(group, levels = c("group3","group2","group1"))) %>%
ggplot(aes(x = group, y = value)) +
geom_signif(comparisons = my_comparisons,
y_position = log10(c(5000, 10000, 25000)),
map_signif_level = function(x) paste("p =", scales::pvalue(x))) +
scale_y_log10() +
geom_boxplot(outlier.colour="white", outlier.fill = "white",
-outlier.shape = 1, outlier.size = 0) +
geom_jitter(shape=1, position=position_jitter(0.2), color = "black",
fill = "white", size = 2) +
labs(x = "",
y = "value") +
theme_bw() +
theme(axis.text.x = element_text(size = 16, color = "black"),
axis.text.y = element_text(size = 16, color = "black"),
axis.title = element_text(size = 16, color = "black"),
axis.title.x = element_text(vjust = -0.5),
panel.grid = element_blank(),
panel.background = element_blank())
Just to be clear: I am relatively new to R, and the code I am using is borrowed from someone else.
I have this graph for polling averages:
Here is my code: https://pastebin.com/qvQERRUH
library("tidyverse")
polls <- read.csv("polls_Paris.csv")
polls <- polls %>%
mutate(
date = format(as.Date(c(paste(year,month, day, sep="-")), by = "days"))
)
for(i in c("LFI", "PS", "EELV", "PP", "Griveaux", "LREM", "Villani", "Agir", "LR", "RN", "LP")) {
polls <- within(polls, {
assign(paste0("ci_", i), 1.96 * sqrt(( get(paste0("liste_", i)) * (100 - get(paste0("liste_", i)))) / n))
}
)
}
polls.10m <- polls[polls$date > seq(as.Date(Sys.Date()), length = 2, by = "-10 months")[2],]
polls.100 <- polls[order(as.Date(polls$date)),] %>% top_n(5000, as.Date(polls$date))
#Results = data.frame(date = as.Date("2019-12-01"), support = c(69.1,30.9))
svg('Opinion polling for the 2020 Paris municipal election.svg', width = 12, height = 6)
polls.100 %>%
gather(party, support, c(liste_LFI,liste_PS,liste_EELV,liste_PP,liste_Griveaux,liste_LREM,liste_Villani,liste_Agir,liste_LR,liste_RN,liste_LP), factor_key=TRUE) %>%
ggplot(aes(x=as.Date(date), y=support, colour=party)) +
geom_point(size=2.5, alpha=0.275) +
geom_smooth(se=FALSE, method="loess", span=1) +
labs(y = NULL,
x = NULL) +
guides(colour = guide_legend(ncol = 1, override.aes = list(linetype = 0, size = 3, alpha = 1))) +
scale_colour_manual(labels = c("Simonnet (LFI)", "Hidalgo (PS-PCF-G·s)", "Belliard (EELV)", "Gantzer (DVG)", "Griveaux (LREM-MR-UDI)", "Griveaux (avant diss. de Villani)", "Villani (Diss. LREM-PRG)", "Bournazel (Agir)", "Dati (LR)", "Federbusch (DVD-RN)", "Campion (SE)"), values = c("#cc2443", "#FF8080", "#00c000", "#ffc0c0", "#ffeb00", "#ffeb00", "#FF7F50", "#adc1fd", "#0066CC", "#0D378A", "#808080", "#808080")) +
theme(
plot.margin = margin(t = 0, unit = "cm"),
plot.background = element_blank(), panel.background = element_rect(fill = "grey92", colour = NA),
panel.border = element_blank(), legend.background = element_rect(fill = "transparent", colour = NA),
legend.key = element_rect(fill = "transparent", colour = NA), legend.title = element_blank(),
strip.background = element_rect(fill = "transparent", colour = NA),
panel.grid.major = element_line(colour = "#FFFFFF"), panel.grid.minor = element_line(colour = "#FFFFFF", size = 0.25),
axis.ticks = element_line(colour = "grey20"), axis.line = element_blank(),
plot.title = element_text(size = 12, hjust = 0),
plot.subtitle = element_text(size = 12, hjust = 0),
plot.caption = element_text(size = 12, colour = "#212121"),
axis.title = element_text(size = 12, face = "plain"), axis.text = element_text(size = 12, face = "plain", colour = "grey30"),
legend.position = "right",
legend.text = element_text(size = 12), strip.text = element_text(size = 12, face = "plain"),
legend.margin = margin(t = 0, unit = "cm"),
) +
scale_y_continuous(breaks = seq(0,33,5), minor_breaks = seq(0,33,1), limits = c(0, 33), expand = c(0, 0)) +
scale_x_date(breaks="6 months", minor_breaks="1 month", expand = c(0, 0))
#geom_point(data = Results, colour = c("#808080", "#E81B23"), size=4, shape=5) +
#geom_point(data = Results, colour = c("#808080", "#E81B23"), size=3.5, shape=18)
dev.off()
As you can see, Griveaux's line is split to separate the before-and-after of Villani's dissident candidacy; it's actually 2 separate lines (also separate in the dataset). Griveaux's name therefore has to appears twice.
How do I do to remove the key of a single set (remove the key for both the dots and regression line)?
Here is a hack. To remove a legend key, remove it from the breaks argument to scale_*_manual or equivalent but you must keep the same number of values as there are unique values in the color/fill aesthetic.
This is better shown with an example. I will use built-in data set iris.
To remove the legend key relative to "versicolor",
levels(df1$Species)
#[1] "setosa" "versicolor" "virginica"
just don't include it in the breaks.
library(ggplot2)
df1 <- iris[3:5]
ggplot(df1, aes(Petal.Length, Petal.Width, color = Species)) +
geom_point() +
geom_smooth(se = FALSE, method = "loess", span = 1) +
scale_color_manual(breaks = c("setosa", "virginica"),
values = c("red", "green", "blue"))
I am trying to add the p-value and R2 from mgcv::gam results to ggplot with facets. The sample dataframe and code are below. Is there a way to successfully paste the p-value and R2 on the ggplots?
DF <- data.frame(Site = rep(LETTERS[20:24], each = 4),
Region = rep(LETTERS[14:18], each = 4),
time = rep(LETTERS[1:10], each = 10),
group = rep(LETTERS[1:4], each = 10),
value1 = runif(n = 1000, min = 10, max = 15),
value2 = runif(n = 1000, min = 100, max = 150))
DF$time <- as.numeric(DF$time)
GAMFORMULA <- y ~ s(x,bs="cr",k=3)
plot1 <- ggplot(data=DF,
aes(x=time, y=value2)) +
geom_point(col="gray", alpha=0.8,
name="") +
geom_line(col="gray", alpha=0.8,
name="",aes(group=group)) +
geom_smooth(se=T, col="darkorange", alpha=0.8,
name="", fill="orange",
method="gam",formula=GAMFORMULA) +
theme_bw() +
theme(strip.text.x = element_text(size=10),
strip.text.y = element_text(size=10, face="bold", angle=0),
strip.background = element_rect(colour="black", fill="gray90"),
axis.text.x = element_text(size=10), # remove x-axis text
axis.text.y = element_text(size=10), # remove y-axis text
axis.ticks = element_blank(), # remove axis ticks
axis.title.x = element_text(size=18), # remove x-axis labels
axis.title.y = element_text(size=25), # remove y-axis labels
panel.background = element_blank(),
panel.grid.major = element_blank(), #remove major-grid labels
panel.grid.minor = element_blank(), #remove minor-grid labels
plot.background = element_blank()) +
labs(y="Value", x="Time", title = "") +
stat_fit_glance(method = "gam",
method.args = list(formula = GAMFORMULA),
aes(label = sprintf('R^2~"="~%.3f~~italic(p)~"="~%.2f',
stat(..r.squared..),stat(..p.value..))),
parse = TRUE)
plot1 + facet_wrap(Site~group, scales="free_y", ncol=3)
Error in sprintf("R^2~\"=\"~%.3f~~italic(p)~\"=\"~%.2f", r.squared, p.value) :
object 'r.squared' not found
My answer explains why stat_fit_glance() cannot be used to add r.sq to a plot, but I am afraid is does not provide an alternative approach.
stat_fit_glance() is a wrapper on broom:glance() that fits the model and passes the model fit object to broom:glance(). In the case of gam(), broom:glance() does not return an estimate for R2 and consequently also stat_fit_glance() is unable to return it.
To see what computed values are available one can use geom_debug() from package 'gginnards'.
library(ggpmisc)
library(gginnards)
library(mgcv)
DF <- data.frame(Site = rep(LETTERS[20:24], each = 4),
Region = rep(LETTERS[14:18], each = 4),
time = rep(LETTERS[1:10], each = 10),
group = rep(LETTERS[1:4], each = 10),
value1 = runif(n = 1000, min = 10, max = 15),
value2 = runif(n = 1000, min = 100, max = 150))
DF$time <- as.numeric(DF$time)
GAMFORMULA <- y ~ s(x,bs="cr",k=3)
plot1 <- ggplot(data=DF,
aes(x=time, y=value2)) +
geom_point(col="gray", alpha=0.8,
name="") +
geom_line(col="gray", alpha=0.8,
name="",aes(group=group)) +
geom_smooth(se=T, col="darkorange", alpha=0.8,
name="", fill="orange",
method="gam",formula=GAMFORMULA) +
theme_bw() +
theme(strip.text.x = element_text(size=10),
strip.text.y = element_text(size=10, face="bold", angle=0),
strip.background = element_rect(colour="black", fill="gray90"),
axis.text.x = element_text(size=10), # remove x-axis text
axis.text.y = element_text(size=10), # remove y-axis text
axis.ticks = element_blank(), # remove axis ticks
axis.title.x = element_text(size=18), # remove x-axis labels
axis.title.y = element_text(size=25), # remove y-axis labels
panel.background = element_blank(),
panel.grid.major = element_blank(), #remove major-grid labels
panel.grid.minor = element_blank(), #remove minor-grid labels
plot.background = element_blank()) +
labs(y="Value", x="Time", title = "") +
stat_fit_glance(method = "gam",
method.args = list(formula = GAMFORMULA),
# aes(label = sprintf('R^2~"="~%.3f~~italic(p)~"="~%.2f',
# stat(..r.squared..),stat(..p.value..))),
# parse = TRUE)
geom = "debug")
plot1 + facet_wrap(Site~group, scales="free_y", ncol=3)
Shown above are the values returned by stat_fit_glance() for the first two panels in the plot.
Note: There does not seem to be agreement on whether R-square is meaningful for GAM. However the summary() method for gam does return an adjusted R-square estimate as member r.sq.
I am wondering if there is any way to get a manual right-side y-axis label when there is no scale, only facet headings.
Here's an example
library(dplyr)
library(Hmisc)
# Plot power vs. n for various odds ratios (base prob.=.1)
(n <- seq(10, 1000, by=10)) # candidate sample sizes
(OR <- as.numeric(sort(c(seq(1/0.90,1/0.13,length.out = 9),2.9)))) # candidate odds ratios, spanning the 95% CI centered around an odds ratio of 2.9
alpha <- c(.001, .01, .05)
# put all of these into a dataset and calculate power
powerDF <- data.frame(expand.grid(OR, n, alpha)) %>%
rename(OR = Var1, num = Var2, alph = Var3) %>%
arrange(OR) %>%
mutate(power = as.numeric(bpower(p1=.29, odds.ratio=OR, n=num, alpha = alph))) %>%
transform(OR = factor(format(round(OR,2),nsmall=2)))
# now plot
pPower <- ggplot(powerDF, aes(x = num, y = power, colour = factor(OR))) +
geom_line() +
facet_grid(factor(alph)~.) +
labs(x = "sample size") +
scale_colour_discrete(name = "Odds Ratio") +
scale_x_continuous(breaks = seq(0,1000,100)) +
scale_y_continuous(breaks = seq(0,1,.1)) +
theme_light() +
theme(axis.title.x = element_text(size = 12, face = "bold"),
axis.title.y = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 11),
panel.grid.minor = element_blank(),
panel.grid.major.y = element_line(colour = "gray95"),
panel.grid.major.x = element_line(colour = "gray95"),
strip.text = element_text(colour = 'black', face = 'bold', size = 12),
legend.text = element_text(size = 12),
legend.title = element_text(size = 12, face = "bold"))
(Please forgive the cluttered axes labels, I had to reduce the size of the image to allow it to be uploaded).
I was wondering if there was any way to have an axis label saying 'significance level' down the right hand side of the graph?
Adding the following to scale_y_continuous seems one way to go (although a bunch of warnings)
sec.axis = sec_axis(trans=I, breaks=NULL, name="Significance")
Alternatively, you can add an additional strip that spans all the panels:
library(grid)
library(gtable)
g <- ggplotGrob(pPower)
rect <- grobTree(rectGrob(gp = gpar(fill = "grey70", col="grey70")),
textGrob("Significance", rot=-90, gp = gpar(col="black")))
g <- gtable_add_cols(g, g$widths[6], 6)
g <- gtable_add_grob(g, rect, l=7, t=7, b=11)
grid.newpage() ; grid.draw(g)