I am using the qicharts2() package to construct a p-chart in R. It is necessary to have a variable UCL / LCL, but the way the qic() natively constructs this is not what I'm looking for. See the below two images:
What qic() produces:
What I need it to produce:
I'm unsure how to change this and couldn't find much to help control the UCL/LCL in the help vignette. Any help on how to control these aesthetics or the calculation going into them is appreciated (I am not a statistician).
Sample:
df <- data.frame(Date = sample(seq(as.Date('1999/01/01'), as.Date('2000/01/01'), by="day"), 25),
Values = sample(seq(from = 0, to = 1, by = .1), size = 25, replace = TRUE),
Totals = sample(seq(from = 0, to = 50, by = 1), size = 25, replace = TRUE))
qic(data = df, y = Values, x = Date, n = Totals, chart = 'p', point.size = 2)
Thanks to the comments from #markus, the key was to save the qic() gg object to a variable and access the layers. Using the code below demonstrates how this works:
df <- data.frame(Date = sample(seq(as.Date('1999/01/01'), as.Date('2000/01/01'), by="day"), 25),
Values = sample(seq(from = 0, to = 1, by = .1), size = 25, replace = TRUE),
Totals = sample(seq(from = 0, to = 50, by = 1), size = 25, replace = TRUE))
p <- qic(data = df, y = Values, x = Date, n = Totals, chart = 'p', point.size = 2, show.labels = TRUE, decimals = 0) +
geom_line(color = "steelblue") + theme_bw() +
ylim(c(0,1)) +
ggtitle("Sample qic() Plot") +
xlab("") +
ylab("") +
theme(plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
axis.title.y = element_text(face = "bold", size = 12)) +
theme(axis.text.x = element_text(angle = 65, hjust = 1, size = 12, face = "bold"),
axis.text.y = element_text(size = 12, face = "bold")) +
theme(legend.position = "none")
p$layers[[1]] <- NULL;
p$layers <- c(p$layers, geom_step(data = p$data, aes(y = ucl), linetype = "dotted", col = "grey50", size = 1), geom_step(data = p$data, aes(y = lcl), linetype = "dotted", col = "grey50", size = 1));
p
Output:
Related
I verified that when using geom_line the resulting graph is not formed by solid lines, the lines look like a ladder.
Would anyone know how to get solid lines?
Here is the code used.
data base
0;0
0.000700;1.050
0.001750;1.100
0.003800;1.150
0.029110;1.200
0.130900;1.240
0.341428;1.303
`library(tidyverse)
library(reprex)
SAT <- read.delim("Curva_Tipica.txt", header = FALSE, encoding = "UTF-8")
SAT <- str_split(SAT[[1]],";")
length_SAT <- length(SAT) + 1
curva_VxI <- tibble(
.rows = length_SAT,
I = 0,
V = 0,
)
for (linha in 2:length_SAT) {
curva_VxI$I[linha] <- as.double(SAT[[linha-1]][1])
curva_VxI$V[linha] <- as.double(SAT[[linha-1]][2])
}
Xac <- (curva_VxI$V[length_SAT] - curva_VxI$V[length_SAT-1])/(curva_VxI$I[length_SAT] - curva_VxI$I[length_SAT-1])
Vj <- curva_VxI$V[length_SAT] - Xac*curva_VxI$I[length_SAT]
Xac_grafico <- tibble(
x = c(0, 1),
y = c(0, Xac)
)
Vj_grafico <- tibble(
x = c(0),
y = c(Vj)
)
theme_set(theme_bw())
ggplot() +
geom_point(
data = curva_VxI,
aes(x = I, y = V),
color = "orange",
size = 2
) +
geom_line(
data = curva_VxI,
aes(x = I, y = V),
color = "orange",
linewidth = 1
) +
geom_point(
data = Xac_grafico,
aes(x = x, y = y),
color = "blue",
) +
geom_line(
data = Xac_grafico,
aes(x = x, y = y),
color = "blue",
linewidth = 1,
linetype = "solid",
) +
labs(
y = "V (pu)",
x = "I (pu)"
) +
coord_cartesian(
xlim = c(0, 1),
ylim = c(0, 1.5),
expand = FALSE,
clip = "off"
) +
scale_y_continuous(sec.axis = sec_axis(~.*1, "Xac (pu)")) +
theme(
axis.title.x = element_text(margin = margin(t = 10), size = 12, face = "bold"),
axis.title.y = element_text(margin = margin(r = 10), size = 12, face = "bold"),
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10),
legend.position = "top",
# plot.background = element_rect(fill = "gray60"),
plot.margin = margin(t = 1, r = 1, b = 0.5, l = 0.5, unit = "cm"),
)`
code output
expected result
result obtained
I tried to produce 12 boxplots per ggplots stat_summary() functions, as you can see below in the reproducible example. I used stat_summary() instead of geom_boxplot(), because I want to whiskers to end at the 1st and 99th percentile of the data or to be individualized so to speak. I coded two functions, one for the whiskers and one for the outliers and used them as arguments in stat_summary(). This is the result:
I see two problems with this plot:
Not all outliers are coloured in red.
Outliers cut the whiskers, which is not supposed to happen by definition of my functions.
The help file has not been helping me in solving this issue. Comments are welcome.
The code:
library(stats)
library(ggplot2)
library(dplyr)
# Example Data
{
set.seed(123)
indexnumber_of_entity = rep(c(1:30),
each = 12)
month = rep(c(1:12),
each = 1,
times = 30)
variable_of_interest = runif(n = 360,
min = 0,
max = 100)
Data = as.data.frame(cbind(indexnumber_of_entity,
month,
variable_of_interest)) %>% mutate_at(.vars = c(1,2,3),
as.numeric)
Data_Above_99th_Percentile = filter(Data,
variable_of_interest > stats::quantile(Data$variable_of_interest,
0.99))
Data_Below_1st_Percentile = filter(Data,
variable_of_interest < stats::quantile(Data$variable_of_interest,
0.01))
}
# Functions that enable individualizing boxplots
{
Individualized_Boxplot_Quantiles <- function(x){
d <- data.frame(ymin = stats::quantile(x,0.01),
lower = stats::quantile(x,0.25),
middle = stats::quantile(x,0.5),
upper = stats::quantile(x,0.75),
ymax = stats::quantile(x,0.99),
row.names = NULL)
d[1, ]
}
Definition_of_Outliers = function(x)
{
subset(x,
stats::quantile(x,0.99) < x | stats::quantile(x,0.01) > x)
}
}
# Producing the ggplot
ggplot(data = Data) +
aes(x = month,
y = variable_of_interest,
group = month) +
stat_summary(fun.data = Individualized_Boxplot_Quantiles,
geom="boxplot",
lwd = 0.5) +
stat_summary(fun.y = Definition_of_Outliers,
geom="point",
size = 1) +
labs(title = "Distributions of Variable of Interest based on months",
x = "Month",
y = "Variable of Interest") +
theme(plot.title = element_text(size = 20,
hjust = 0.5,
face = "bold"),
axis.ticks.x = element_blank(),
axis.text.x = element_text(size = 12,
face = "bold"),
axis.text.y = element_text(size = 12,
face = "bold"),
axis.title.x = element_text(size = 16,
face = "bold",
vjust = -3),
axis.title.y = element_text(size = 16,
face = "bold",
vjust = 3)) +
scale_x_continuous(breaks = c(seq(from = 1,
to = 12,
by = 1))) +
scale_y_continuous(breaks = c(seq(from = 0,
to = 100,
by = 10))) +
geom_point(data = Data_Above_99th_Percentile,
colour = "red",
size = 1) +
geom_point(data = Data_Below_1st_Percentile,
colour = "red",
size = 1)
You can simplify the functions a little bit like this:
boxplot_quantiles <- function(x) {
y <- as.data.frame(t(stats::quantile(x, c(0.01, 0.25, 0.5, 0.75, 0.99))))
setNames(y, c('ymin', 'lower', 'middle', 'upper', 'ymax'))
}
outliers <- function(x) {
subset(x, stats::quantile(x,0.99) < x | stats::quantile(x,0.01) > x)
}
You can rely on the summary functions, since the Data_above_99th_Percentile and Data_Below_1st_Percentile were not groupwise calculations in your own code.
ggplot(data = Data, aes(x = month, y = variable_of_interest, group = month)) +
stat_summary(fun = outliers, geom = "point", col = 'red', size = 1) +
stat_summary(fun.data = boxplot_quantiles, geom = "boxplot", lwd = 0.5) +
scale_x_continuous('Month', breaks = 1:12) +
scale_y_continuous('Variable of Interest' , breaks = 0:10 * 10) +
labs(title = "Distributions of Variable of Interest based on months") +
theme(text = element_text(face = 'bold', size = 12),
plot.title = element_text(size = 20, hjust = 0.5),
axis.ticks.x = element_blank(),
axis.title.x = element_text(size = 16, margin = margin(20, 0, 0, 0)),
axis.title.y = element_text(size = 16, vjust = 3))
Edit
As long as you perform groupwise operations on the filtered data frames, your alternative method of drawing the outliers will work too. Note that I have added these in colored layers above the existing plot so that the red points are overplotted with blue and green dots:
Data_Above_99th_Percentile <- Data %>%
group_by(month) %>%
filter(variable_of_interest > quantile(variable_of_interest,0.99))
Data_Below_1st_Percentile <- Data %>%
group_by(month) %>%
filter(variable_of_interest < quantile(variable_of_interest, 0.01))
ggplot(data = Data, aes(x = month, y = variable_of_interest, group = month)) +
stat_summary(fun = outliers, geom = "point", col = 'red', size = 1) +
stat_summary(fun.data = boxplot_quantiles, geom = "boxplot", lwd = 0.5) +
scale_x_continuous('Month', breaks = 1:12) +
scale_y_continuous('Variable of Interest' , breaks = 0:10 * 10) +
labs(title = "Distributions of Variable of Interest based on months") +
theme(text = element_text(face = 'bold', size = 12),
plot.title = element_text(size = 20, hjust = 0.5),
axis.ticks.x = element_blank(),
axis.title.x = element_text(size = 16, margin = margin(20, 0, 0, 0)),
axis.title.y = element_text(size = 16, vjust = 3)) +
geom_point(data = Data_Below_1st_Percentile, color = 'green') +
geom_point(data = Data_Above_99th_Percentile, color = 'blue')
What is this type of data visualization plot called and how do I recreate it in R?
Image Source: https://www.pewresearch.org/global/2020/04/30/worldwide-optimism-about-future-of-gender-equality-even-as-many-see-advantages-for-men/pg_2020-04-30_global-gender-equality_0-02/
My Google search only resulted in regular bubble plots like this: https://r-graph-gallery.com/320-the-basis-of-bubble-plot.html
Something like this?
df <- data.frame(Question = rep(c("Getting\nhigh-paying jobs",
"Being leaders in\ntheir community",
"Expressing their\npolitical views",
"Getting a good\neducation"), 3),
Answer = rep(c("Men have more\nopportunities",
"Women have more\nopportunities",
"Both about\nthe same"), each = 4),
Value = c(54, 44, 31, 11, 3, 4, 3, 6, 38, 49, 63, 81))
library(ggplot2)
ggplot(df, aes(y = factor(Question, rev(unique(Question))),
x = factor(Answer, unique(Answer)),
fill = factor(Answer, unique(Answer)))) +
geom_point(shape = 21, aes(size = Value, color = after_scale(fill))) +
geom_text(aes(label = Value, color = Answer)) +
annotate("segment", x = rep(-Inf, 3), xend = rep(Inf, 3),
y = 1:3 + 0.5, yend = 1:3 + 0.5, linetype = 2, alpha = 0.5) +
scale_y_discrete() +
scale_x_discrete(position = "top") +
scale_size_continuous(range = c(5, 30)) +
scale_fill_manual(values = c("#959e4a", "#0f6599", "#dddac8")) +
scale_color_manual(values = c("black", "white", "white")) +
ggtitle(paste("Many think men have more opportunities than women",
"when it comes to getting high-paid jobs", sep = "\n")) +
theme_void() +
theme(legend.position = "none",
axis.text.x = element_text(face = 2),
axis.text.y = element_text(hjust = 1, face = 2),
plot.margin = margin(30, 30, 30, 30),
plot.title = element_text(size = 16, face = 2, family = "serif",
margin = margin(20, 0, 50, 0)))
Here's an example. Lots more formatting tweaks could be done, but I'd think of this fundamentally as a geom_point and a geom_text layer, the rest is tidying up.
library(ggplot2)
fake_data <- data.frame(x = rep(LETTERS[1:3], each = 4),
y = letters[1:4],
val = (1:12) / 12)
ggplot(fake_data, aes(x=1, y = 1, label = scales::percent(val))) +
geom_point(aes(size = val, color = x), alpha = 0.3) +
geom_text() +
scale_size_area(max_size = 20) +
guides(size = "none", color = "none") +
facet_grid(y ~ x, switch = "y") +
theme_void() +
theme(strip.text = element_text())
Let
df <- data.frame("Method" = rep(c("Method1", "Method2", "Method3", "Method4", "Method5"), each = 3, times = 1),
"Type" = rep(c("A", "B", "C"), 5),
"Value" = c(runif(5, 0, 1), runif(5, 0.2, 1.2), runif(5, 0.4, 1.4)))
I created a boxplot
get_box_stats <- function(y, upper_limit = max(df$Value) * 1.42) {
return(data.frame(
y = upper_limit,
label = paste(
length(y), "\n",
round(quantile(y, 0.25), 2), "\n",
round(median(y), 2), "\n",
round(quantile(y, 0.75), 2), "\n"
)
))
}
ggplot(df, aes(factor(Type), Value)) +
labs(fill = "Method") +
stat_summary(size = 4.6, fun.data = get_box_stats, geom = "text", position = position_dodge(.9),
hjust = 0.5, vjust = 1, aes(group = factor(Type)))+
geom_boxplot(coef = 0, aes(fill = factor(Type))) + theme_classic()+
theme(legend.position = "top", axis.text.x = element_text(size = 15),
axis.text.y = element_text(size = 15),
axis.title.x = element_text(size = 15),
axis.title.y = element_text(size = 15),
legend.title=element_text(size = 15),
legend.text=element_text(size = 15)) +
geom_dotplot(aes(fill = factor(Type)), dotsize = 0.8, binaxis = 'y', stackdir = 'center',
position = position_dodge(0.75))+
xlab("Method")
This results in a boxplot
QUESTION: As you can see, for stats are not perfectly centered, i.e for Method B -- values 1 and 5. Is there a way to fix this?
The problem lies in your use of paste in your summary function. By default, paste adds a space character between each element you want to paste together. Your summary string therefore has a space before and after every line break, but not before the first line. Since a space takes up some room, the aligment is off. Instead of adding in all those newline characters, specify that you want to use just a newline character as a separator using the sep argument:
get_box_stats <- function(y, upper_limit = max(df$Value) * 1.42) {
return(data.frame(
y = upper_limit,
label = paste(
length(y),
round(quantile(y, 0.25), 2),
round(median(y), 2),
round(quantile(y, 0.75), 2), sep = "\n"
)
))
}
ggplot(df, aes(factor(Type), Value)) +
labs(fill = "Method") +
stat_summary(size = 4.6, fun.data = get_box_stats, geom = "text",
hjust = 0.5, vjust = 1, aes(group = factor(Type)))+
geom_boxplot(coef = 0, aes(fill = factor(Type))) + theme_classic()+
theme(legend.position = "top", axis.text.x = element_text(size = 15),
axis.text.y = element_text(size = 15),
axis.title.x = element_text(size = 15),
axis.title.y = element_text(size = 15),
legend.title=element_text(size = 15),
legend.text=element_text(size = 15)) +
geom_dotplot(aes(fill = factor(Type)), dotsize = 0.8, binaxis = 'y',
stackdir = 'center',
position = position_dodge(0.75))+
xlab("Method")
Sample-data:
df <- data.frame("SL" = runif(50, 2.2, 5.8), "LMX" = runif(50, 1.8, 5.5))
I have many different variables for each of which I want to make a box plot with the code below. So that all panels will have the same size, I determined the plot margin so that it is not influenced by the length of the name of the variable. Therefore, now I want to add the variable name outside of the panel to the left.
However, this turns out to be more difficult than expected. I know that this issue has been raised before, but none of the solutions works with me (rnorm or geom_text).
Any help is much appreciated, thank you :)
df %>%
select("Servant Leadership" = SL) %>%
gather(key = "variable", value = "value") -> n
n$variable <- factor(n$variable, levels = c("Servant Leadership"))
ggplot(data = n, aes(y = value, x = as.numeric(variable))) +
stat_summary(fun.data = min.mean.sd.max, geom = "boxplot", col = "#323232", fill = "#EFC76C") +
scale_fill_identity() +
scale_x_continuous(breaks = as.numeric(unique(n$variable)), minor_breaks = NULL,
labels = "", expand = c(0.12, 0.12)) +
scale_y_continuous(breaks = c(1, 2, 3, 4, 5, 6, 7)) +
expand_limits(y = c(1, 7)) + coord_flip() + labs(x = "", y = "") +
theme(text = element_text(size = 15), panel.background = element_rect(fill = "#EAEDED"),
panel.border = element_rect(fill=NA, color = "grey", size = 0.5, linetype = "solid")) +
theme(plot.margin=unit(c(0.2, 0.2, 0, 4),"cm"))
I forgot this code which I ran before:
min.mean.sd.max <- function(x) {
r <- c(min(x), mean(x) - sd(x), mean(x), mean(x) + sd(x), max(x))
names(r) <- c("ymin", "lower", "middle", "upper", "ymax")
r
}
And this are the packages which I use (maybe not all in this code however):
library(reshape)
library(scales)
library(ggplot2)
library(dplyr)
library(tidyr)
Based on the answer by Tung I amended the code for the box plot in the following way:
ggplot(data = n, aes(y = value, x = as.numeric(variable))) +
stat_summary(fun.data = min.mean.sd.max, geom = "boxplot", col = "#323232", fill = "#EFC76C") +
scale_fill_identity() +
scale_x_continuous(breaks = as.numeric(unique(n$variable)), minor_breaks = NULL,
labels = "", expand = c(0.12, 0.12)) +
scale_y_continuous(breaks = c(1, 2, 3, 4, 5, 6, 7)) +
expand_limits(y = c(1, 7)) + coord_flip(clip = "off") + labs(x = "", y = "") +
theme(text = element_text(size = 18), panel.background = element_rect(fill = "#EAEDED"),
panel.border = element_rect(fill=NA, color = "grey", size = 0.5, linetype = "solid")) +
geom_text(x = 1, y = 0.5, inherit.aes = FALSE, label = "Servant Leadership", check_overlap = TRUE, hjust = 1,
fontface = 'plain', size = 6, color = "#4E4E4E") +
theme(plot.margin=unit(c(0.05, 4.5, 0, 9.5),"cm"))