I tried to produce 12 boxplots per ggplots stat_summary() functions, as you can see below in the reproducible example. I used stat_summary() instead of geom_boxplot(), because I want to whiskers to end at the 1st and 99th percentile of the data or to be individualized so to speak. I coded two functions, one for the whiskers and one for the outliers and used them as arguments in stat_summary(). This is the result:
I see two problems with this plot:
Not all outliers are coloured in red.
Outliers cut the whiskers, which is not supposed to happen by definition of my functions.
The help file has not been helping me in solving this issue. Comments are welcome.
The code:
library(stats)
library(ggplot2)
library(dplyr)
# Example Data
{
set.seed(123)
indexnumber_of_entity = rep(c(1:30),
each = 12)
month = rep(c(1:12),
each = 1,
times = 30)
variable_of_interest = runif(n = 360,
min = 0,
max = 100)
Data = as.data.frame(cbind(indexnumber_of_entity,
month,
variable_of_interest)) %>% mutate_at(.vars = c(1,2,3),
as.numeric)
Data_Above_99th_Percentile = filter(Data,
variable_of_interest > stats::quantile(Data$variable_of_interest,
0.99))
Data_Below_1st_Percentile = filter(Data,
variable_of_interest < stats::quantile(Data$variable_of_interest,
0.01))
}
# Functions that enable individualizing boxplots
{
Individualized_Boxplot_Quantiles <- function(x){
d <- data.frame(ymin = stats::quantile(x,0.01),
lower = stats::quantile(x,0.25),
middle = stats::quantile(x,0.5),
upper = stats::quantile(x,0.75),
ymax = stats::quantile(x,0.99),
row.names = NULL)
d[1, ]
}
Definition_of_Outliers = function(x)
{
subset(x,
stats::quantile(x,0.99) < x | stats::quantile(x,0.01) > x)
}
}
# Producing the ggplot
ggplot(data = Data) +
aes(x = month,
y = variable_of_interest,
group = month) +
stat_summary(fun.data = Individualized_Boxplot_Quantiles,
geom="boxplot",
lwd = 0.5) +
stat_summary(fun.y = Definition_of_Outliers,
geom="point",
size = 1) +
labs(title = "Distributions of Variable of Interest based on months",
x = "Month",
y = "Variable of Interest") +
theme(plot.title = element_text(size = 20,
hjust = 0.5,
face = "bold"),
axis.ticks.x = element_blank(),
axis.text.x = element_text(size = 12,
face = "bold"),
axis.text.y = element_text(size = 12,
face = "bold"),
axis.title.x = element_text(size = 16,
face = "bold",
vjust = -3),
axis.title.y = element_text(size = 16,
face = "bold",
vjust = 3)) +
scale_x_continuous(breaks = c(seq(from = 1,
to = 12,
by = 1))) +
scale_y_continuous(breaks = c(seq(from = 0,
to = 100,
by = 10))) +
geom_point(data = Data_Above_99th_Percentile,
colour = "red",
size = 1) +
geom_point(data = Data_Below_1st_Percentile,
colour = "red",
size = 1)
You can simplify the functions a little bit like this:
boxplot_quantiles <- function(x) {
y <- as.data.frame(t(stats::quantile(x, c(0.01, 0.25, 0.5, 0.75, 0.99))))
setNames(y, c('ymin', 'lower', 'middle', 'upper', 'ymax'))
}
outliers <- function(x) {
subset(x, stats::quantile(x,0.99) < x | stats::quantile(x,0.01) > x)
}
You can rely on the summary functions, since the Data_above_99th_Percentile and Data_Below_1st_Percentile were not groupwise calculations in your own code.
ggplot(data = Data, aes(x = month, y = variable_of_interest, group = month)) +
stat_summary(fun = outliers, geom = "point", col = 'red', size = 1) +
stat_summary(fun.data = boxplot_quantiles, geom = "boxplot", lwd = 0.5) +
scale_x_continuous('Month', breaks = 1:12) +
scale_y_continuous('Variable of Interest' , breaks = 0:10 * 10) +
labs(title = "Distributions of Variable of Interest based on months") +
theme(text = element_text(face = 'bold', size = 12),
plot.title = element_text(size = 20, hjust = 0.5),
axis.ticks.x = element_blank(),
axis.title.x = element_text(size = 16, margin = margin(20, 0, 0, 0)),
axis.title.y = element_text(size = 16, vjust = 3))
Edit
As long as you perform groupwise operations on the filtered data frames, your alternative method of drawing the outliers will work too. Note that I have added these in colored layers above the existing plot so that the red points are overplotted with blue and green dots:
Data_Above_99th_Percentile <- Data %>%
group_by(month) %>%
filter(variable_of_interest > quantile(variable_of_interest,0.99))
Data_Below_1st_Percentile <- Data %>%
group_by(month) %>%
filter(variable_of_interest < quantile(variable_of_interest, 0.01))
ggplot(data = Data, aes(x = month, y = variable_of_interest, group = month)) +
stat_summary(fun = outliers, geom = "point", col = 'red', size = 1) +
stat_summary(fun.data = boxplot_quantiles, geom = "boxplot", lwd = 0.5) +
scale_x_continuous('Month', breaks = 1:12) +
scale_y_continuous('Variable of Interest' , breaks = 0:10 * 10) +
labs(title = "Distributions of Variable of Interest based on months") +
theme(text = element_text(face = 'bold', size = 12),
plot.title = element_text(size = 20, hjust = 0.5),
axis.ticks.x = element_blank(),
axis.title.x = element_text(size = 16, margin = margin(20, 0, 0, 0)),
axis.title.y = element_text(size = 16, vjust = 3)) +
geom_point(data = Data_Below_1st_Percentile, color = 'green') +
geom_point(data = Data_Above_99th_Percentile, color = 'blue')
Related
I verified that when using geom_line the resulting graph is not formed by solid lines, the lines look like a ladder.
Would anyone know how to get solid lines?
Here is the code used.
data base
0;0
0.000700;1.050
0.001750;1.100
0.003800;1.150
0.029110;1.200
0.130900;1.240
0.341428;1.303
`library(tidyverse)
library(reprex)
SAT <- read.delim("Curva_Tipica.txt", header = FALSE, encoding = "UTF-8")
SAT <- str_split(SAT[[1]],";")
length_SAT <- length(SAT) + 1
curva_VxI <- tibble(
.rows = length_SAT,
I = 0,
V = 0,
)
for (linha in 2:length_SAT) {
curva_VxI$I[linha] <- as.double(SAT[[linha-1]][1])
curva_VxI$V[linha] <- as.double(SAT[[linha-1]][2])
}
Xac <- (curva_VxI$V[length_SAT] - curva_VxI$V[length_SAT-1])/(curva_VxI$I[length_SAT] - curva_VxI$I[length_SAT-1])
Vj <- curva_VxI$V[length_SAT] - Xac*curva_VxI$I[length_SAT]
Xac_grafico <- tibble(
x = c(0, 1),
y = c(0, Xac)
)
Vj_grafico <- tibble(
x = c(0),
y = c(Vj)
)
theme_set(theme_bw())
ggplot() +
geom_point(
data = curva_VxI,
aes(x = I, y = V),
color = "orange",
size = 2
) +
geom_line(
data = curva_VxI,
aes(x = I, y = V),
color = "orange",
linewidth = 1
) +
geom_point(
data = Xac_grafico,
aes(x = x, y = y),
color = "blue",
) +
geom_line(
data = Xac_grafico,
aes(x = x, y = y),
color = "blue",
linewidth = 1,
linetype = "solid",
) +
labs(
y = "V (pu)",
x = "I (pu)"
) +
coord_cartesian(
xlim = c(0, 1),
ylim = c(0, 1.5),
expand = FALSE,
clip = "off"
) +
scale_y_continuous(sec.axis = sec_axis(~.*1, "Xac (pu)")) +
theme(
axis.title.x = element_text(margin = margin(t = 10), size = 12, face = "bold"),
axis.title.y = element_text(margin = margin(r = 10), size = 12, face = "bold"),
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10),
legend.position = "top",
# plot.background = element_rect(fill = "gray60"),
plot.margin = margin(t = 1, r = 1, b = 0.5, l = 0.5, unit = "cm"),
)`
code output
expected result
result obtained
I have a dataframe of daily mean temperature observations for a year from two locations (i.e., Site 1 & Site 2). The three temperature variables are:
Air temperature
Water temperature
Difference = Air - Water
I would like to produce a four-panel figure where the top and bottom rows are Site 1 and Site 2 respectively, the left column displays Air and Water and the right column shows Difference. I am specifically not using facet_wrap() or facet_grid() because the y-axes are two different variables (i.e., temperature and temperature difference), and the legends are custom.
I can create the four-panel figure however, I would like the y-axis title for each column not to be repeated. Is there a way to extend each y-axis title across the stacked plots for each column?
Here is the output from the figure code below
The ideal figure would look something more like this
Example Data
library(data.table)
library(dplyr)
library(ggplot2)
library(patchwork)
set.seed(321)
# Create the example air and water temperature time series
df1 <- data.frame(matrix(ncol = 4, nrow = 365*4))
colnames(df1)[1:4] <- c("Location","Variable", "Date", "Temperature")
df1[1:730,1] <- "Site 1"
df1[731:NROW(df1),1] <- "Site 2"
df1[c(1:365,731:1095),2] <- "Air"
df1[c(366:730,1096:NROW(df1)),2] <- "Water"
df1$Date <- rep(seq.Date(as.Date("2021-01-01"),as.Date("2021-12-31"),"1 day"),4)
df1$noise <- rep(runif(365),4)
df1$t <- rep(seq(0,1*pi,,365),4)
for (i in 1:NROW(df1)) {
df1$Temperature[1:365] <- 20*sin(df1$t)+df1$noise*8
df1$Temperature[365:730] <- 17*sin(df1$t)+df1$noise*2
df1$Temperature[731:1095] <- 25*sin(df1$t)+df1$noise*6
df1$Temperature[1096:NROW(df1)] <- 18*sin(df1$t)+df1$noise*1.5
}
# Take the difference between air and water temperature
df1 <- df1[,1:4]
site1 <- df1[df1$Location == 'Site 1',]
site1 <- site1 %>%
tidyr::pivot_wider(names_from = Variable, values_from = Temperature) %>%
mutate(Difference = Air - Water)
site2 <- df1[df1$Location == 'Site 2',]
site2 <- site2 %>%
tidyr::pivot_wider(names_from = Variable, values_from = Temperature) %>%
mutate(Difference = Air - Water)
Code for figure
fig1a <- site1 %>%
ggplot() +
geom_line(aes(x = Date, y = Air), color = "red", size = 1) +
geom_line(aes(x = Date, y = Water), size = 1, alpha = 0.7) +
scale_x_date(date_breaks = "2 month",
date_labels = "%b",
expand = c(0, 0)) +
labs(x = NULL,
y = expression(paste("Temperature ( ", degree, "C)"))) +
annotate("rect", fill = "white", color = "black",
xmin = as.Date("2021-01-15", "%Y-%m-%d"),
xmax = as.Date("2021-03-15", "%Y-%m-%d"),
ymin = 26, ymax = 30) +
annotate("segment", color = "red", size = 1,
x = as.Date("2021-01-20", "%Y-%m-%d"),
xend = as.Date("2021-01-30", "%Y-%m-%d"),
y = 29, yend = 29) +
annotate("segment", color = "black", size = 1,
x = as.Date("2021-01-20", "%Y-%m-%d"),
xend = as.Date("2021-01-30", "%Y-%m-%d"),
y = 27, yend = 27) +
annotate("text", x = as.Date("2021-02-01", "%Y-%m-%d"), y = 29,
label = 'Air',
size = 5, fontface = 1, hjust = 0) +
annotate("text", x = as.Date("2021-02-01", "%Y-%m-%d"), y = 27,
label = "Water",
size = 5, fontface = 1, hjust = 0) +
annotate("text", x = as.Date("2021-11-30", "%Y-%m-%d"), y = 30,
label = "(a",
size = 6, fontface = 1, hjust = 0) +
theme_bw() +
theme(plot.margin = unit(c(0, 1, 0, 1), "lines"),
panel.grid = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14, color = "white"),
axis.text.y = element_text(size = 14, color = "black"),
panel.border = element_blank()) +
guides(fill = guide_legend(nrow = 2)) +
coord_cartesian(clip = 'off', ylim = c(0, 32)) +
annotation_custom(grid::rectGrob(gp = grid::gpar(fill = NA)))
fig1b <- site2 %>%
ggplot() +
geom_line(aes(x = Date, y = Air), color = "red", size = 1) +
geom_line(aes(x = Date, y = Water), size = 1, alpha = 0.7) +
scale_x_date(date_breaks = "2 month",
date_labels = "%b",
expand = c(0, 0)) +
labs(x = NULL,
y = expression(paste("Temperature ( ", degree, "C)"))) +
annotate("text", x = as.Date("2021-11-30", "%Y-%m-%d"), y = 30,
label = "(b",
size = 6, fontface = 1, hjust = 0) +
theme_bw() +
theme(plot.margin = unit(c(0, 1, 2, 0), "lines"),
panel.grid = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 16, color = "black", angle = 90, vjust = 0.5, hjust = 1),
axis.text.y = element_text(size = 16, color = "black"),
panel.border = element_blank()) +
guides(fill = guide_legend(nrow = 2)) +
coord_cartesian(clip = 'off', ylim = c(0,32)) +
annotation_custom(grid::rectGrob(gp = grid::gpar(fill = NA))) +
annotate(geom = "text", x = as.Date("2021-06-15", "%Y-%m-%d"), y = -7, label = 2021, size = 6)
fig1c <- site1 %>%
ggplot() +
geom_line(aes(x = Date, y = Difference), size = 1) +
scale_x_date(date_breaks = "2 month",
date_labels = "%b",
expand = c(0, 0)) +
labs(x = NULL,
y = expression(paste(Delta~" Temperature ( ", degree, "C)"))) +
annotate("rect", fill = "white", color = "black",
xmin = as.Date("2021-01-15", "%Y-%m-%d"),
xmax = as.Date("2021-06-01", "%Y-%m-%d"),
ymin = 10.25, ymax = 12.5) +
annotate("text", x = as.Date("2021-01-20", "%Y-%m-%d"), y = 12,
label = 'Pos. = Air > Water',
size = 5, fontface = 1, hjust = 0) +
annotate("text", x = as.Date("2021-01-20", "%Y-%m-%d"), y = 11,
label = "Neg. = Water > Air",
size = 5, fontface = 1, hjust = 0) +
annotate("text", x = as.Date("2021-11-30", "%Y-%m-%d"), y = 12,
label = "(c",
size = 6, fontface = 1, hjust = 0) +
theme_bw() +
theme(plot.margin = unit(c(0, 1, 0, 0), "lines"),
panel.grid = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14, color = "white"),
axis.text.y = element_text(size = 14, color = "black"),
panel.border = element_blank()) +
guides(fill = guide_legend(nrow = 2)) +
coord_cartesian(clip = 'off', ylim = c(-2, 12)) +
annotation_custom(grid::rectGrob(gp = grid::gpar(fill = NA)))
fig1d <- site2 %>%
ggplot() +
geom_line(aes(x = Date, y = Difference), size = 1) +
scale_x_date(date_breaks = "2 month",
date_labels = "%b",
expand = c(0, 0)) +
labs(x = NULL,
y = expression(paste(Delta~" Temperature ( ", degree, "C)"))) +
annotate("text", x = as.Date("2021-11-30", "%Y-%m-%d"), y = 12,
label = "(d",
size = 6, fontface = 1, hjust = 0) +
theme_bw() +
theme(plot.margin = unit(c(1, 1, 2, 0), "lines"),
panel.grid = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 16, color = "black", angle = 90, vjust = 0.5, hjust = 1),
axis.text.y = element_text(size = 16, color = "black"),
panel.border = element_blank()) +
guides(fill = guide_legend(nrow = 2)) +
coord_cartesian(clip = 'off', ylim = c(-2,12)) +
annotation_custom(grid::rectGrob(gp = grid::gpar(fill = NA))) +
annotate(geom = "text", x = as.Date("2021-06-15", "%Y-%m-%d"), y = -5, label = 2021, size = 6)
# width = 1200 height = 900
fig1a + fig1c + fig1b + fig1d + plot_layout(ncol = 2)
I figured out how to do it, which required using facet_grid. I ended up needing to create two facets that I then stitched together, see below.
# Take the difference between air and water temperature
df1 <- df1[,1:4]
site1 <- df1[df1$Location == 'Site 1',]
site1 <- site1 %>%
tidyr::pivot_wider(names_from = Variable, values_from = Temperature) %>%
mutate(Difference = Air - Water) %>%
tidyr::pivot_longer(cols = c('Water','Air','Difference'),
names_to = 'Variable',
values_to = 'Temperature')
site2 <- df1[df1$Location == 'Site 2',]
site2 <- site2 %>%
tidyr::pivot_wider(names_from = Variable, values_from = Temperature) %>%
mutate(Difference = Air - Water) %>%
tidyr::pivot_longer(cols = c('Water','Air','Difference'),
names_to = 'Variable',
values_to = 'Temperature')
df1 <- rbind(site1,site2)
# This creates the first column of figures
p1 <- df1 %>%
subset(!df1$Variable == 'Difference',) %>%
mutate(var_air_water = ## Here is the new variable
if_else(Variable %in% c("Air", "Water"),
true = "Air & Water",
false = Variable)) %>%
ggplot() +
geom_line(aes(x = Date, y = Temperature, group = Variable, color = Variable)) +
scale_color_manual(values = c("black", "red")) +
scale_x_date(date_breaks = "2 month",
date_labels = "%b",
expand = c(0, 0)) +
scale_y_continuous(breaks = seq(0,30,5)) +
labs(x = NULL,
y = expression(paste("Temperature ( ", degree, "C)"))) +
theme_bw() +
theme(plot.margin = unit(c(0, 1, 2, 0), "lines"),
panel.grid = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 16, color = "black", angle = 90, vjust = 0.5, hjust = 1),
axis.text.y = element_text(size = 16, color = "black"),
legend.title = element_blank(),
legend.text = element_text(size = 16),
legend.position = c(0.15,0.95),
legend.background = element_blank(),
strip.text = element_blank(),
strip.background = element_blank(),
panel.border = element_blank()) +
guides(fill = guide_legend(nrow = 2)) +
coord_cartesian(clip = 'off', ylim = c(0,33)) +
annotation_custom(grid::rectGrob(gp = grid::gpar(fill = NA))) +
annotate(geom = "text", x = as.Date("2021-06-15", "%Y-%m-%d"), y = -8, label = 2021, size = 6) +
facet_grid(Location~var_air_water)
# This adds the text to the corresponding figures
dat_text1 <- data.frame(
label = c("(a","(b"),
Location = c('Site 1','Site 2'),
x = c(as.Date("2021-12-01", "%Y-%m-%d"),
as.Date("2021-12-01", "%Y-%m-%d")),
y = c(32,32))
p1 <- p1 + geom_text(
data = dat_text1,
mapping = aes(x = x, y = y, label = label,
hjust = 0,
vjust = 1),
size = 5)
# This creates the second column of figures
p2 <- df1 %>%
mutate(var_air_water = ## Here is the new variable
if_else(Variable %in% c("Air", "Water"),
true = "Air & Water",
false = Variable)) %>%
subset(!var_air_water == c('Air & Water'),) %>%
ggplot() +
geom_line(aes(x = Date, y = Temperature, group = Variable, color = Variable)) +
scale_color_manual(values = "black") +
scale_x_date(date_breaks = "2 month",
date_labels = "%b",
expand = c(0, 0)) +
scale_y_continuous(breaks = seq(0,12,2)) +
labs(x = NULL,
y = expression(paste(Delta~" Temperature ( ", degree, "C)"))) +
theme_bw() +
theme(plot.margin = unit(c(0, 1, 2, 0), "lines"),
panel.grid = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 16, color = "black", angle = 90, vjust = 0.5, hjust = 1),
axis.text.y = element_text(size = 16, color = "black"),
legend.position = 'none',
strip.background = element_rect(fill = "gray80"),
strip.background.x = element_blank(),
strip.text.x = element_blank(),
strip.text.y = element_text(size = 16)) +
guides(fill = guide_legend(nrow = 2)) +
coord_cartesian(clip = 'off', ylim = c(0,12)) +
annotation_custom(grid::rectGrob(gp = grid::gpar(fill = NA))) +
annotate(geom = "text", x = as.Date("2021-06-15", "%Y-%m-%d"), y = -3, label = 2021, size = 6) +
facet_grid(Location~var_air_water)
# Like above, this adds the text to the corresponding figures
dat_text2 <- data.frame(
label = c("Pos. = Air > Water", "Neg. = Water > Air", "(c","(d"),
Location = c('Site 1','Site 1','Site 1','Site 2'),
x = c(as.Date("2021-01-15", "%Y-%m-%d"),
as.Date("2021-01-15", "%Y-%m-%d"),
as.Date("2021-12-01", "%Y-%m-%d"),
as.Date("2021-12-01", "%Y-%m-%d")),
y = c(12,11,12,12))
p2 <- p2 + geom_text(
data = dat_text2,
mapping = aes(x = x, y = y, label = label,
hjust = 0,
vjust = 1,
size = 10),
size = 5)
# width = 1200 height = 900
p1 + p2 + plot_layout(ncol = 2)
Here is my data which produces a heat map. What I am hoping to do is produce multiple difference heatmaps with an outline around each of x categories.
data <- data.frame(id=c("john","john","john","kate","kate","kate","chris","chris","chris"),
group=c("geo","his","math","geo","his","math","geo","his","math"),
grade=c(65,76,87,67,89,98,99,97,96),
class=c("A","A","A","A","A","A","B","B","B"))
data
mine.heatmap <- ggplot(data = data, mapping = aes(x = id, y = group, fill = grade)) +
geom_tile() +
xlab(label = "id") +
ylab(label="group") +
labs(fill="grade")+
scale_fill_gradient2(low = "#800080",
high = "#FF8C00",mid = "white")
x <- mine.heatmap + facet_grid(
cols = vars(class), scales = "free", space = "free"
)
x + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, size = 18, margin = margin(b=2)))+
theme(axis.text.y= element_text(angle = 0, vjust = 0.5, hjust=1, size = 18)) +
theme(legend.text = element_text(size=14))+
theme(legend.title = element_text(size=14))+
theme(strip.text = element_text(size=14))+
theme(axis.title.x = element_text(size=18)) +theme(axis.title.y = element_text(size=18))
Original Heat map:
What I am hoping to get are the following heatmaps:
One option to achieve your desired result would be to
put your plotting code in a function which takes as one argument the id for which you want to draw a outline.
Use some data wrangling to convert the categories to be plotted on the x and y aes to numerics per facet variable.
Add a geom_rect to your plotting code to draw the outline which uses the numerics computed in step 2.
library(ggplot2)
library(dplyr)
mine_heatmap <- function(x) {
p <- ggplot(data = data, mapping = aes(x = id, y = group, fill = grade)) +
geom_tile() +
# Add outline via a geom_rect
geom_rect(
data = subset(data, id == x),
aes(
xmin = id_num - .5, xmax = id_num + .5,
ymin = min(group_num) - .5, ymax = max(group_num) + .5
), fill = NA, color = "black", size = 1
) +
labs(x = "id", y = "group", fill = "grade") +
scale_fill_gradient2(
low = "#800080",
high = "#FF8C00", mid = "white"
)
p <- p + facet_grid(
cols = vars(class), scales = "free", space = "free"
)
p + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size = 18, margin = margin(b = 2))) +
theme(axis.text.y = element_text(angle = 0, vjust = 0.5, hjust = 1, size = 18)) +
theme(legend.text = element_text(size = 14)) +
theme(legend.title = element_text(size = 14)) +
theme(strip.text = element_text(size = 14)) +
theme(axis.title.x = element_text(size = 18)) + theme(axis.title.y = element_text(size = 18))
}
# Convert id and group to numerics per facet variable
data <- data |>
group_by(class) |>
mutate(
id_num = as.numeric(factor(id)),
group_num = as.numeric(factor(group))
) |>
ungroup()
mine_heatmap("john")
mine_heatmap("kate")
mine_heatmap("chris")
I am using the qicharts2() package to construct a p-chart in R. It is necessary to have a variable UCL / LCL, but the way the qic() natively constructs this is not what I'm looking for. See the below two images:
What qic() produces:
What I need it to produce:
I'm unsure how to change this and couldn't find much to help control the UCL/LCL in the help vignette. Any help on how to control these aesthetics or the calculation going into them is appreciated (I am not a statistician).
Sample:
df <- data.frame(Date = sample(seq(as.Date('1999/01/01'), as.Date('2000/01/01'), by="day"), 25),
Values = sample(seq(from = 0, to = 1, by = .1), size = 25, replace = TRUE),
Totals = sample(seq(from = 0, to = 50, by = 1), size = 25, replace = TRUE))
qic(data = df, y = Values, x = Date, n = Totals, chart = 'p', point.size = 2)
Thanks to the comments from #markus, the key was to save the qic() gg object to a variable and access the layers. Using the code below demonstrates how this works:
df <- data.frame(Date = sample(seq(as.Date('1999/01/01'), as.Date('2000/01/01'), by="day"), 25),
Values = sample(seq(from = 0, to = 1, by = .1), size = 25, replace = TRUE),
Totals = sample(seq(from = 0, to = 50, by = 1), size = 25, replace = TRUE))
p <- qic(data = df, y = Values, x = Date, n = Totals, chart = 'p', point.size = 2, show.labels = TRUE, decimals = 0) +
geom_line(color = "steelblue") + theme_bw() +
ylim(c(0,1)) +
ggtitle("Sample qic() Plot") +
xlab("") +
ylab("") +
theme(plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
axis.title.y = element_text(face = "bold", size = 12)) +
theme(axis.text.x = element_text(angle = 65, hjust = 1, size = 12, face = "bold"),
axis.text.y = element_text(size = 12, face = "bold")) +
theme(legend.position = "none")
p$layers[[1]] <- NULL;
p$layers <- c(p$layers, geom_step(data = p$data, aes(y = ucl), linetype = "dotted", col = "grey50", size = 1), geom_step(data = p$data, aes(y = lcl), linetype = "dotted", col = "grey50", size = 1));
p
Output:
I have to create graph
Following is my sample data frame
data <- data.frame(
"Tissue" = c("Adrenal gland", "Appendix", "Appendix"),
"protein.expression" = c("No detect","No detect", "Medium"),
"cell.type" = c("Glandular cells" ,"Lymphoid tissu","Glandular cells")
)
Left y axis is unique tissue type. Left axis have comma separated cell types.
I am not sure how to get the celltypes corresponding to each tissue (on left y axis) to right axis (in comma separated form)
My code is
p1 <- ggplot(dat %>% filter(facet==1), aes(
x = tissue,
y = factor(protein.expression, levels = unique(protein.expression, decreasing = F), ordered = TRUE),
fill = protein.expression,
label = cell.type
)) +
geom_point(stat = 'identity', aes(col = protein.expression), size = 12) +
geom_text(size = 6, fontface = "bold", colour = "white") +
geom_label() +
# facet_grid(cell.type ~ ., scales = "free", space = "free") +
scale_fill_manual(values = myPalette, drop = FALSE) +
scale_color_manual(values = myPalette, drop = FALSE) +
theme_classic() +
labs(title = "Protein Atlas") +
guides(fill = guide_legend(title = "Protein expression")) +
ylab("Cell types measured per tissue") +
# ylim(1,4) +
coord_flip() +
theme(axis.text.x = element_text(size = 25, vjust = 0.5, hjust = .9),
axis.text.y = element_text(size = 25),
legend.position = "none",
axis.title.x = element_text(size = 30),
axis.title.y = element_text(size = 30, margin = margin(t = 0, r = 20, b = 0, l = 0)),
legend.title = element_text(size = 30),
legend.text = element_text(size = 25),
legend.key.size = unit(2, 'cm'),
axis.ticks.length=unit(.01, "cm"),
strip.text.y = element_text(angle = 0))
the cell types are with in the dots. I want them to be on the right side, comma sepearated and if possible color coded by corresponding protein expression label.
So this is a bit of a hack but it might work for you.
I introduce a third column in the graph to hold the labels as per my original post.
I pre-process your data to try and spread out the labels in this third column around the Tissue variable to that they don't appear all on top of each other.
my pre-processing is pretty ugly but works ok. Note that I only catered for a max of 4 cell.types as per your comment.
It gives me this graph:
My code:
data = data.frame("Tissue"=c("Adrenal gland", "Appendix", "Appendix"), "protein.expression" = c("No detect","No detect", "Medium"), "cell.type" = c("Glandular cells" ,"Lymphoid tissu","Glandular cells"))
# Pre-processing section.
# Step 1: find out the n of cell.types per tissue type
counters <- data %>% group_by(Tissue) %>% summarise(count = n())
# Step 2: Join n back to original data. Transform protein.expression to ordered factor
data <- data %>%
inner_join(counters, by="Tissue") %>%
mutate(protein = factor(protein.expression, levels=unique(protein.expression, decreasing = F), ordered=TRUE),
positionTissue = as.numeric(Tissue))
results <- data.frame()
# Step 3: Spread the cell.type labels around the position of the Tissue. 4 scenarios catered for.
for(t in unique(data$Tissue)){
subData <- filter(data, Tissue == t)
subData$spreader <- as.numeric(subData$Tissue)
if(length(unique(subData$cell.type)) == 2){
subData <- subData %>%
mutate(x=factor(cell.type, levels=unique(cell.type, decreasing = F),ordered=TRUE),
spreader = ifelse(as.numeric(x)==1,as.numeric(Tissue)-0.1,as.numeric(Tissue)+0.1)) %>%
select(-x)
results <- rbind(results, subData)
} else if(length(unique(subData$cell.type)) == 3){
subData <- subData %>%
mutate(x=factor(cell.type, levels=unique(cell.type, decreasing = F),ordered=TRUE),
spreader = ifelse(as.numeric(x)==1,as.numeric(Tissue)-0.15,
ifelse(as.numeric(x)==3,as.numeric(Tissue)+0.15,as.numeric(Tissue)))) %>%
select(-x)
results <- rbind(results, subData)
} else if(length(unique(subData$cell.type)) == 4){
subData <- subData %>%
mutate(x=factor(cell.type, levels=unique(cell.type, decreasing = F),ordered=TRUE),
spreader = ifelse(as.numeric(x)==1,as.numeric(Tissue)-0.2,
ifelse(as.numeric(x)==2,as.numeric(Tissue)-0.1,
ifelse(as.numeric(x)==3,as.numeric(Tissue)+0.1,
ifelse(as.numeric(x)==4,as.numeric(Tissue)+0.2,as.numeric(Tissue)))))) %>%
select(-x)
results <- rbind(results, subData)
} else{
results <- rbind(results, subData)
}
}
# Plot the data based on the new label position "spreader" variable
ggplot(results, aes(x = positionTissue, y = protein, label=cell.type)) +
geom_point(stat='identity', aes(col=protein.expression), size=12) +
geom_text(aes(y=0.5,label=Tissue), size=8, fontface="bold", angle=90)+
geom_label(aes(y="zzz", x=spreader, fill=protein), colour="white") +
theme_classic() +
scale_x_continuous(limits = c(min(as.numeric(data$Tissue))-0.5,max(as.numeric(data$Tissue))+0.5))+
scale_y_discrete(breaks=c("Medium","No detect")) +
labs(title="Protein Atlas") +
guides(fill=guide_legend(title="Protein expression"))+
ylab("Cell types measured per tissue") +
xlab("") +
#ylim(1,4) +
coord_flip()+
theme(axis.text.x = element_text(size = 25),
axis.text.y = element_text(colour = NA),
legend.position = "none",
axis.title.x = element_text(size=30),
axis.title.y = element_text(size = 30, margin = margin(t = 0, r = 20, b = 0, l = 0)),
legend.title = element_text(size = 30),
legend.text = element_text(size = 25),
legend.key.size = unit(2, 'cm'),
axis.ticks.length=unit(.01, "cm"),
strip.text.y = element_text(angle = 0))
Edit #2:
Update to retain label colours by creating n positions where n is the number of cell.types:
data = data %>%
mutate(position = paste("z",cell.type))
Then you can use this new position variable instead of the static "zzz" I suggested in my original post. Your labels will have the correct colours, but your chart will look odd if there are a lot of cell.types.
geom_label(aes(y=position, label = cell.type)) +
EDIT #1: Update to avoid overlapping labels by grouping cell.types to a single label per tissue.
Creating a new label field that concatenates the individual labels for each tissue type:
data = data %>%
group_by(Tissue) %>%
mutate(label = paste(cell.type, collapse = "; "))
And amend the ggplot call to use this new field instead of the existing cell.type field:
geom_text(aes(y="zzz", label = label), size = 6, fontface = "bold", colour = "white")+
or:
geom_label(aes(y="zzz", label = label),) +
ORIGINAL POST:
You could plot your labels at a third position (e.g. "zzz") and then hide that position from the set of axis labels using scale_x_discrete(breaks=c()).
ggplot(data, aes(x = Tissue, y = factor(protein.expression,
levels=unique(protein.expression,
decreasing = F),
ordered=TRUE), fill = protein.expression,
label = cell.type))+
geom_point(stat='identity', aes(col=protein.expression), size=12) +
geom_text(aes(y="zzz"), size = 6, fontface = "bold", colour = "white")+
geom_label(aes(y="zzz"),) +
# facet_grid(cell.type ~ ., scales = "free", space = "free") +
# scale_fill_manual(values = myPalette, drop = FALSE) +
# scale_color_manual(values = myPalette, drop = FALSE) +
theme_classic() +
scale_y_discrete(breaks=c("Medium","No detect"))+
labs(title="Protein Atlas") +
guides(fill=guide_legend(title="Protein expression"))+
ylab("Cell types measured per tissue") +
#ylim(1,4) +
coord_flip()+
theme(axis.text.x = element_text(size = 25, vjust = 0.5, hjust = .9),
axis.text.y = element_text(size = 25),
legend.position = "none",
axis.title.x = element_text(size=30),
axis.title.y = element_text(size = 30, margin = margin(t = 0, r = 20, b = 0, l = 0)),
legend.title = element_text(size = 30),
legend.text = element_text(size = 25),
legend.key.size = unit(2, 'cm'),
axis.ticks.length=unit(.01, "cm"),
strip.text.y = element_text(angle = 0))