ggplot2: using square brackets and superscript in axis title - r

I have a plot where each axis has been log10 transformed. For one of my axis titles I would like to use both a square bracket ([]) and a superscript. How can I do this?
Example Data
library(dplyr)
library(ggplot2)
set.seed(123)
df <- data.frame(matrix(ncol = 2, nrow = 20))
colnames(df)[1:2] <- c('x','y')
df$x <- rnorm(20,1000,100)
df$y <- rnorm(20,1000,100)
df <- df %>%
mutate(log_x = log10(x),
log_y = log10(y))
Here is an example of the figure I am trying to make. I need to know how to make the -2 on the x-axis superscripted.
df %>%
ggplot(aes(x = log_x, y = log_y)) +
geom_point() +
labs(x = expression(log[10]~"[Area (m^-2)]"),
y = expression(log[10]~"[ Time Variable (months)]")) +
theme_bw() +
theme(axis.text.x = element_text(size = 16, color = "black"),
axis.text.y = element_text(size = 16, color = "black"),
axis.title = element_text(size = 16, color = "black"),
panel.grid = element_blank(),
panel.background = element_blank())

#MrFlick response provides the correct answer, see below.
df %>%
ggplot(aes(x = log_x, y = log_y)) +
geom_point() +
labs(x = expression(log[10]~"[Area"~ (m^-2) ~"]"),
y = expression(log[10]~"[ Time Variable (months)]")) +
theme_bw() +
theme(axis.text.x = element_text(size = 16, color = "black"),
axis.text.y = element_text(size = 16, color = "black"),
axis.title = element_text(size = 16, color = "black"),
panel.grid = element_blank(),
panel.background = element_blank())

Related

How to display p-values above boxplots on exponential (log10) y-axis?

I have a data frame with three groups (group1, group2, group3). I would like to show the p-value of their mean comparisons in ggplot2 which I can do however, the values are stacked ontop of one another making it difficult to see what is being compared. When I try to adjust where the p-values are located using the y_position() function, the boxplots collapse (I think because the y-axis is log10) but the p-values are no longer stacked ontop of one another. How can I keep the boxplots from collapsing and keep the p-values displayed so that you can see what is being compared?
Example data
library(ggplot2)
library(dplyr)
library(ggsignif)
df <- data.frame(matrix(ncol = 2, nrow = 30))
colnames(df)[1:2] <- c("group", "value")
df$group <- rep(c("group1","group2","group3"), each = 10)
df[1:10,2] <- rexp(10, 1/10)
df[11:20,2] <- rexp(10, 1/100)
df[21:30,2] <- rexp(10, 1/900)
# Need to say what should be compared for p-value determination
my_comparisons <- list(c("group1", "group2"),
c("group1", "group3"),
c("group2", "group3"))
Boxplots showing the distribution of value for each group however the p-values are ontop of one another so you cannot compare among groups.
df %>%
mutate(group = factor(group, levels = c("group3","group2","group1"))) %>%
ggplot(aes(x = group, y = value)) +
geom_signif(comparisons = my_comparisons,
map_signif_level = function(x) paste("p =", scales::pvalue(x))) +
scale_y_log10() +
geom_boxplot(outlier.colour="white", outlier.fill = "white", outlier.shape = 1, outlier.size = 0) +
geom_jitter(shape=1, position=position_jitter(0.2), color = "black", fill = "white", size = 2) +
labs(x = "",
y = "value") +
theme_bw() +
theme(axis.text.x = element_text(size = 16, color = "black"),
axis.text.y = element_text(size = 16, color = "black"),
axis.title = element_text(size = 16, color = "black"),
axis.title.x = element_text(vjust = -0.5),
panel.grid = element_blank(),
panel.background = element_blank())
Adjusting the y_position() of where the p-values should display but this collapses the y-axis. I have tried several values within y_position.
df %>%
mutate(group = factor(group, levels = c("group3","group2","group1"))) %>%
ggplot(aes(x = group, y = value)) +
geom_signif(y_position = c(2000,1800,1600),
comparisons = my_comparisons,
map_signif_level = function(x) paste("p =", scales::pvalue(x))) +
scale_y_log10() +
geom_boxplot(outlier.colour="white", outlier.fill = "white", outlier.shape = 1, outlier.size = 0) +
geom_jitter(shape=1, position=position_jitter(0.2), color = "black", fill = "white", size = 2) +
labs(x = "",
y = "value") +
theme_bw() +
theme(axis.text.x = element_text(size = 16, color = "black"),
axis.text.y = element_text(size = 16, color = "black"),
axis.title = element_text(size = 16, color = "black"),
axis.title.x = element_text(vjust = -0.5),
panel.grid = element_blank(),
panel.background = element_blank())
For some reason this parameter ignores the axis transformation. You therefore need to use the log10 values of the desired positions:
df %>%
mutate(group = factor(group, levels = c("group3","group2","group1"))) %>%
ggplot(aes(x = group, y = value)) +
geom_signif(comparisons = my_comparisons,
y_position = log10(c(5000, 10000, 25000)),
map_signif_level = function(x) paste("p =", scales::pvalue(x))) +
scale_y_log10() +
geom_boxplot(outlier.colour="white", outlier.fill = "white",
-outlier.shape = 1, outlier.size = 0) +
geom_jitter(shape=1, position=position_jitter(0.2), color = "black",
fill = "white", size = 2) +
labs(x = "",
y = "value") +
theme_bw() +
theme(axis.text.x = element_text(size = 16, color = "black"),
axis.text.y = element_text(size = 16, color = "black"),
axis.title = element_text(size = 16, color = "black"),
axis.title.x = element_text(vjust = -0.5),
panel.grid = element_blank(),
panel.background = element_blank())

Waffle chart with time on x axis

I would like to plot a waffle chart with time on the x-axis in R, similar to this one below. Can anyone help, please? Thanks.
The dataset is here:
df <- data.frame(spec = c("Rehab", "Cardiology", "Endocrine", "Respiratory", "General Surgery"),
start.month = c(11,11,7,3,1) )
Perhaps something like this?
library(ggplot2)
df2 <- do.call(rbind, lapply(seq(nrow(df)), function(i)
data.frame(month = factor(month.abb[12:df$start.month[i]], levels = month.abb),
spec = df$spec[i])))
df2$spec <- factor(df2$spec, levels = names(rev(sort(table(df2$spec)))))
ggplot(df2, aes(month, spec, colour = spec)) +
geom_point(size = 8, shape = 15) +
coord_cartesian(ylim = c(1, 9)) +
labs(y = "Cumulative no. of specialties",
x = "Months",
colour = "Specialties") +
scale_color_manual(values = c("#ffc000", "#ed7d31", "#5a9bd5",
"#70ad46", "#44546b")) +
theme_classic() +
theme(axis.text.y.left = element_blank(),
axis.ticks.length.y = unit(0, "points"),
legend.background = element_rect(colour = "black"),
axis.text = element_text(size = 16),
axis.title = element_text(size = 16))

dplyr + ggplot2. Use column calculated with dplyr inside scale_x_continuous() within the same pipeline

Is there a way to use a column calculated with dplyr in scale_x_continuous() from ggplot2 in the same pipeline?
p2 <- chat %>%
count(author) %>%
ggplot(aes(x = reorder(author, n), y = n, fill = n)) +
geom_bar(stat = "identity") +
coord_flip() +
theme_classic() +
scale_fill_viridis() +
scale_x_continuous(breaks = seq(0, **max(n)**, by = 250))
theme(
axis.title.x = element_blank(), axis.title.y = element_blank(),
legend.position = "none",
plot.title = element_text(size = 13, face = "bold", hjust = 0.5),
plot.subtitle = element_text(color = '#666664', size = 10, hjust = 0.5))
Basically, I'm counting the number of times a different author (factor column) appears in the dataframe. However, R is not letting me use n (which is then name of the column that count() returns) in scale_x_continuous. But it does within the ggplot() function.
Is there a way to do so? Or am I forced to do something like:
data <- chat %>%
count(author)
p2 <- ggplot(data, aes(x = reorder(author, n), y = n, fill = n)) +
geom_bar(stat = "identity") +
coord_flip() +
theme_classic() +
scale_fill_viridis() +
scale_x_continuous(breaks = seq(0, **max(data$n)**, by = 250))
theme(
axis.title.x = element_blank(), axis.title.y = element_blank(),
legend.position = "none",
plot.title = element_text(size = 13, face = "bold", hjust = 0.5),
plot.subtitle = element_text(color = '#666664', size = 10, hjust = 0.5))
Thanks in advance!
You can use curly braces and dot notation (relevant information in last part of the accepted answer in this question, and here):
library(tidyverse)
library(viridis)
#> Loading required package: viridisLite
p2 <- iris %>%
sample_n(100) %>%
count(Species) %>%
{
ggplot(., aes(x = reorder(Species, n), y = n, fill = n)) +
geom_bar(stat = "identity") +
coord_flip() +
theme_classic() +
scale_fill_viridis() +
scale_y_continuous(breaks = seq(0, max(.$n), by = 20)) +
theme(
axis.title.x = element_blank(), axis.title.y = element_blank(),
legend.position = "none",
plot.title = element_text(size = 13, face = "bold", hjust = 0.5),
plot.subtitle = element_text(color = '#666664', size = 10, hjust = 0.5)
)
}
p2
Created on 2019-11-24 by the reprex package (v0.3.0)
Please note that you did not provide any reproducible example, so I took iris as starting point and did some row sampling to get different frequencies for the Species count. If you update your question with a reproducible example I will update my answer.

Partial italics in facet headings of ggplot

I am wondering if there is any way to rename facet titles so that they contain partial italics and partial non-italics.
Here is some toy data
library(Hmisc)
library(dplyr)
# Plot power vs. n for various odds ratios
n <- seq(10, 1000, by=10) # candidate sample sizes
OR <- as.numeric(sort(c(seq(1/0.90,1/0.13,length.out = 9),2.9))) # candidate ORs
alpha <- c(.001, .01, .05) # alpha significance levels
# put all of these into a dataset and calculate power
powerDF <- data.frame(expand.grid(OR, n, alpha)) %>%
rename(OR = Var1, num = Var2, alph = Var3) %>%
arrange(OR) %>%
mutate(power = as.numeric(bpower(p1=.29, odds.ratio=OR, n=num, alpha = alph))) %>%
transform(OR = factor(format(round(OR,2),nsmall=2)),
alph = factor(ifelse(alph == 0.001, "p=0.001",
ifelse(alph == 0.01, "p=0.01", "p=0.05"))))
pPower <- ggplot(powerDF, aes(x = num, y = power, colour = factor(OR))) +
geom_line() +
facet_grid(factor(alph)~.) +
labs(x = "sample size") +
scale_colour_discrete(name = "Odds Ratio") +
scale_x_continuous(breaks = seq(0,1000,100)) +
scale_y_continuous(breaks = seq(0,1,.1), sec.axis = sec_axis(trans=I, breaks=NULL, name="Significance Level")) + # this is the second axis label
theme_light() +
theme(axis.title.x = element_text(size = 12, face = "bold"),
axis.title.y = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 11),
panel.grid.minor = element_blank(),
panel.grid.major.y = element_line(colour = "gray95"),
panel.grid.major.x = element_line(colour = "gray95"),
strip.text = element_text(colour = 'black', face = 'bold', size = 12),
legend.text = element_text(size = 12),
legend.title = element_text(size = 12, face = "bold"))
pPower
Is there any way to get the facet headings to read "p=0.001", "p=0.01" etc, instead of "p=0.001", i.e. to get partial italics and partial non-italics?

Set y limits in Bar Chart ggplot2

I have created a bar chart which shows the sales of products in a particular category. This is the bar chart. As you can see it is not very clear so I am trying to set limits for the Y axis.
I create the bar chart with the following line:
bakerySales <- ggplot(sales_bakery, aes(ProductName, ProductSales))+
stat_summary(fun.y=sum,geom="bar",colour="red",fill="red",show.legend =
FALSE)
I then go on to apply a theme to the bar chart using:
bakerySales <- bakerySales +
theme(axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_text(colour = "black", size = 14, angle = 60,
hjust = 1),
axis.text.y = element_text(colour = "black", size = 14),
panel.background = element_rect(fill = "white"),
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
axis.line = element_line(colour = "black", size = 1),
legend.position = "none",
plot.title = element_text(lineheight = 8, face = "bold"))
I have tried to set the limits for the y axis using:
bakerySales <- bakerySales + ylim(5000,10000)
When I do this I lose the content of the bar chart, It looks like this.
Can someone please tell me where I am going wrong.
Thanks
If you want to zoom in on specifix ylimits, you could use the coord_cartesian function. I do not have the bakerysales dataset, this is an example using mtcars data:
ggplot(mtcars, aes(x = gear, y = qsec)) +
stat_summary(fun.y=sum,geom="bar",colour="red",fill="red",show.legend = FALSE) +
coord_cartesian(ylim = c(200, 300))
Maybe you want
+ coord_cartesian(ylim = c(5000,10000))
df <- data.frame(x = c("a","b"), y = c(1000, 2000))
ggplot(df, aes(x=x,y=y)) +
geom_bar(stat="identity") +
coord_cartesian(ylim = c(500,3000))

Resources