ggbetweenstats: logarithmic y axis removes grouped analysis from plot - r

I am conducting a kruskal-wallis test to determine statistically significance between three groups of a measurement. I use ggbetweenstats to determine between which group there is a statistically significant association.
Here is the code for sample data and the plot:
sampledata <- structure(list(ID = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20), group = c(1, 2, 3, 1, 2, 3,
1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2), measurement = c(0,
1, 200, 30, 1000, 6000, 1, 0, 0, 10000, 20000, 700, 65, 1, 8,
11000, 13000, 7000, 500, 3000)), class = "data.frame", row.names = c(NA,
20L))
library(ggstatsplot)
library(ggplot2)
ggbetweenstats(
data = sampledata,
x = group,
y = measurement,
type = "nonparametric",
plot.type = "box",
pairwise.comparisons = TRUE,
pairwise.display = "all",
centrality.plotting = FALSE,
bf.message = FALSE
)
You can see the results from the kruskal wallis test on the top of the plot as well as the groupes analysis in the plot. Now I want to change y axis to logarithmic scale:
ggbetweenstats(
data = sampledata,
x = group,
y = measurement,
type = "nonparametric",
plot.type = "box",
pairwise.comparisons = TRUE,
pairwise.display = "all",
centrality.plotting = FALSE,
bf.message = FALSE
) +
ggplot2::scale_y_continuous(trans=scales::pseudo_log_trans(sigma = 1, base = exp(1)), limits = c(0,25000), breaks = c(0,1,10,100,1000,10000)
)
However, this removes the grouped analysis. I have tried different scaling solutions and browsed SO for a solution but couldn't find anything. Thank you for your help!

It seems that the y_position parameter in the geom_signif component is not affected by the y axis transformation. You will need to pass the log values of the desired bracket heights manually. In theory, you can pass these via the ggsignif.args parameter, but it seems that in the latest version of ggstatsplot this isn't possible because the y_position is hard-coded.
One way tound this is to store the plot then change the y positions after the fact. Here's a full reprex with the latest versions of ggplot2, ggstatsplot and their dependencies (at the time of writing)
sampledata <- structure(list(ID = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20), group = c(1, 2, 3, 1, 2, 3,
1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2), measurement = c(0,
1, 200, 30, 1000, 6000, 1, 0, 0, 10000, 20000, 700, 65, 1, 8,
11000, 13000, 7000, 500, 3000)), class = "data.frame", row.names = c(NA,
20L))
library(ggstatsplot)
library(ggplot2)
library(scales)
p <- ggbetweenstats(
data = sampledata,
x = group,
y = measurement,
type = "nonparametric",
plot.type = "box",
pairwise.comparisons = TRUE,
pairwise.display = "all",
centrality.plotting = FALSE,
bf.message = FALSE
) + scale_y_continuous(trans = pseudo_log_trans(sigma = 1, base = exp(1)),
limits = c(0, exp(13)),
breaks = c(0, 10^(0:5)),
labels = comma)
#> Scale for y is already present.
#> Adding another scale for y, which will replace the existing scale.
i <- which(sapply(p$layers, function(x) inherits(x$geom, "GeomSignif")))
p$layers[[i]]$stat_params$y_position <- c(10, 10.8, 11.6)
p
Created on 2023-01-15 with reprex v2.0.2

Related

how to make a distinct graph plot based on one variable

I have a data like this
df<- structure(list(14, FALSE, c(1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12,
13, 6), c(0, 0, 0, 0, 0, 6, 6, 6, 6, 6, 6, 6, 0), c(0, 1, 2,
3, 4, 12, 5, 6, 7, 8, 9, 10, 11), c(0, 1, 2, 3, 4, 12, 5, 6,
7, 8, 9, 10, 11), c(0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13), c(0, 6, 6, 6, 6, 6, 6, 13, 13, 13, 13, 13, 13, 13, 13
), list(c(1, 0, 1), structure(list(), names = character(0)),
list(name = c("Bestman", "Tera1", "Tera2", "Tera3", "Tera4",
"Tera5", "Tetra", "Brownie1", "Brownie2", "Brownie3", "Brownie4",
"Brownie5", "Brownie6", "Brownie7")), list()), <environment>), class = "igraph")
I can plot it like this
plot(df)
if I want to remove the label , I can do this
plot(df,vertex.label=NA)
but it removes it for all and I want to keep the core
I want to be able to plot it with ggplot , removing the label on each node but leave the main core label on , remove the line around the circles
Your dput is not directly reproducible (igraphs contain an environment which isn't included in a dput). The graph I can recover has two linked central nodes, each with several child nodes.
We can draw graphs with plentiful customization options using the tidygraph and ggraph packages:
library(ggraph)
library(tidygraph)
library(extrafont)
as_tbl_graph(g) %>%
activate(nodes) %>%
mutate(type = ifelse(name %in% c("Bestman", "Tetra"), "root", "branch")) %>%
mutate(group = ifelse(name == "Bestman" | grepl("Tera", name),
"Bestman", "Tera")) %>%
ggraph(layout = "igraph", algorithm = "nicely") +
geom_edge_link(width = 2, alpha = 0.1) +
geom_node_circle(aes(r = ifelse(type == "root", 0.4, 0.1), fill = group),
color = NA) +
geom_node_text(aes(label = ifelse(type == "root", name, "")), size = 5,
color = "gray40", family = "Roboto Condensed", fontface = 2) +
theme_graph() +
coord_equal() +
scale_fill_brewer(palette = "Pastel2", guide = "none")

"sample sizes in the longitudinal and event processes differ" in JointModel in r

I am trying to perform a joint model analysis with simulated data. I believe I have formatted the data properly, but I receive this error:
"Error in jointModel(lmeFitJ, coxFit, timeVar = "time.point") :
sample sizes in the longitudinal and event processes differ; maybe you forgot the cluster() argument."
I only see this mentioned in the source code for JM and in one brief and unresolved troubleshooting thread. Where have I messed up? Thank you for any help!
Minimal complete example with first 4 participants:
#required packages
library(readxl, nlme, JM)
#long_data
structure(list(particip.id = c(1, 1, 1, 1, 2, 2, 3, 4, 4, 4,
4), time.point = c(1, 2, 3, 4, 1, 2, 1, 1, 2, 3, 4), school4me = c("DPU",
"DPU", "DPU", "DPU", "DPU", "DPU", "DPU", "DPU", "DPU", "DPU",
"DPU"), hours.a = c(3, 3, 2, 3, 0, 0, 6, 10, 13, 16, 15), hours.b = c(4,
6, 0, 0, 0, 1, 3, 7, 15, 9, 10), enrolled = c(1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1), TimeQ = c(4, 4, 4, 4, 2.9369807105977, 2.9369807105977,
1.50240888306871, 4, 4, 4, 4)), row.names = c(NA, -11L), class = c("tbl_df",
"tbl", "data.frame"))
#short_data
structure(list(particip.id = c(1, 2, 3, 4), time.point = c(3,
2, 3, 4), school4me = c("DPU", "DPU", "DPU", "DPU"), enrolled = c(0,
0, 0, 1), TimeQ = c(2.376576055, 1.152660467, 2.300307851, 4),
actual = c(1, 1, 1, 0)), row.names = c(NA, -4L), class = c("tbl_df",
"tbl", "data.frame"))
#Analysis
lmeFitJ <- lme(hours.a ~ time.point + time.point:school4me, data=long_data, random = ~time.point | particip.id)
coxFit <- coxph(Surv(TimeQ, actual) ~ school4me, data = short_data, x = TRUE)
fitJOINT <- jointModel(lmeFitJ, coxFit, timeVar = "time.point")
#analysis produces: "Error in jointModel(lmeFitJ, coxFit, timeVar = "time.point") : sample sizes in
#the longitudinal and event processes differ; maybe you forgot the cluster() argument."
In the source code you can find
if (is.null(survObject$model))
stop("\nplease refit the Cox model including in the ",
"call to coxph() the argument 'model = TRUE'.")
and
nT <- length(unique(idT))
if (LongFormat && is.null(survObject$model$cluster))
stop("\nuse argument 'model = TRUE' and cluster() in coxph().")
Unfortunately the longitudinal process warning is occurring first so you don't see them.
("sample sizes in the longitudinal and event processes differ; ",
"maybe you forgot the cluster() argument.\n")
Try adding model = TRUE and cluster(particip.id) to your coxFit i.e.
coxFit <- coxph(Surv(TimeQ, actual) ~ school4me + cluster(particip.id), data = short_data, x = TRUE, model = TRUE)

Add points to splines

I wish to add points directly on top of the curved spline.
The code here does not work because geom_point places the dots as if the lines were straight. See points #2, #3. I've tried using stat_bspline2 with geom = "point" without success.
Help is much appreciated.
library(tidyverse)
library(ggforce)
data <- tibble (
x = c(10, 15, 17, 17, 20, 22, 22, 23, 25, 25, 27, 29),
y = c(5, 7, 4, 4, 0, 5, 5, 6, 5, 5, 4, 5.5),
g = c("A", "A", "A", "B", "B", "B", "C", "C", "C", "D","D","D"),
pt = c(1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1)
)
data <- data %>%
mutate(pt_x = ifelse(pt == 1, x, NA),
pt_y = ifelse(pt == 1, y, NA))
ggplot(data) +
stat_bspline2(aes(x=x, y=y, color = ..group.., group = g), size = 4, n = 300, geom = "bspline0") +
scale_color_gradientn(colours = c("red", "pink", "green", "white"), guide = F) +
geom_point(aes(pt_x, pt_y), size = 7)

How to mirror the outer positions with the variable with R

I have a data frame:
tes <- data.frame(x = c(1, 1, 1, 2, 2, 2, 3, 3, 3),
y = c(1, 2, 3, 1, 2, 3, 1, 2, 3),
d = c(10, 20, 30, 100, 11, 12, 403, 43, 21))
They look like this on the plot
ggplot(aes(x = x, y = y), data = tes) + geom_point(aes(color = factor(d)), size = 5)
I'd like to "mirror the outer rows in this data to obtain such data and plot
tes1 <- data.frame(x = c(0, 0, 0, 0,0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4),
y = c(0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4),
d = c(10, 10, 20, 30, 30, 10, 10, 20, 30, 30, 100, 100, 11, 12, 12, 403, 403, 43, 21, 21, 403, 403, 43, 21, 21))
ggplot(aes(x = x, y = y), data = tes1) + geom_point(aes(color = factor(d)), size = 4)
Does this do what you're after?
Explanation: We first convert tes into a flattened table with ftable(xtabs(...). Then we simply replicate the first and last column, and first and last row. We then give new column and row names to reflect the extra "flanking" rows and columns, and finally convert back to a long dataframe with data.frame(table(...))
# Convert to table then matrix
m <- ftable(xtabs(d ~ x + y, data = tes));
class(m) <- "matrix";
# Replicate first and last column/row by binding to the beginning
# and end, respectively of the matrix
m <- cbind(m[, 1], m, m[, ncol(m)]);
m <- rbind(m[1, ], m, m[nrow(m), ]);
# Set column/row names
rownames(m) <- seq(min(tes$x) - 1, max(tes$x) + 1);
colnames(m) <- seq(min(tes$y) - 1, max(tes$y) + 1);
# Convert back to long dataframe
tes.ext <- data.frame(as.table(m));
colnames(tes.ext) <- colnames(tes);
# Plot
ggplot(aes(x = x, y = y), data = tes.ext) + geom_point(aes(color = factor(d)), size = 5)
Data
tes <- data.frame(x = c(1, 1, 1, 2, 2, 2, 3, 3, 3),
y = c(1, 2, 3, 1, 2, 3, 1, 2, 3),
d = c(10, 20, 30, 100, 11, 12, 403, 43, 21))

Grey Background in R When Using qcc (quality control charts) Plot

I'm having a problem where my graph is always on a light grey background which looks awful in LaTeX. I've tried using par(bg=NA), par(bg="white") which is what everyone suggests but that literally does nothing...
Here's the code:
# install.packages('qcc')
library(qcc)
nonconforming <- c(3, 4, 6, 5, 2, 8, 9, 4, 2, 6, 4, 8, 0, 7, 20, 6, 1, 5, 7)
samplesize <- rep(50, 19)
control <- qcc(nonconforming, type = "p", samplesize, plot = "FALSE")
warn.limits <- limits.p(control$center, control$std.dev, control$sizes, 2)
par(mar = c(5, 3, 1, 3), bg = "blue")
plot(control, restore.par = FALSE, title = "P Chart for Medical Insurance Claims",
xlab = "Day", ylab = "Proportion Defective")
abline(h = warn.limits, lty = 3, col = "blue")
v2 <- c("LWL", "UWL") # the labels for warn.limits
mtext(side = 4, text = v2, at = warn.limits, col = "blue", las = 2)
Check out ?qcc.options() -- specifically, the bg.margin option. The following will change your plot to have a lightgreen background (note: probably not a good choice for LaTeX, but it illustrates the point):
library(qcc)
nonconforming <- c(3, 4, 6, 5, 2, 8, 9, 4, 2, 6, 4, 8, 0, 7, 20, 6, 1, 5, 7)
samplesize <- rep(50, 19)
old <- qcc.options() # save the original options
qcc.options(bg.margin = "lightgreen")
par(mar = c(5, 3, 1, 3))
control <- qcc(nonconforming, type = "p", samplesize, plot = "FALSE")
warn.limits <- limits.p(control$center, control$std.dev, control$sizes, 2)
plot(control, restore.par = FALSE, title = "P Chart for Medical Insurance Claims",
xlab = "Day", ylab = "Proportion Defective")
abline(h = warn.limits, lty = 3, col = "blue")
v2 <- c("LWL", "UWL") # the labels for warn.limits
mtext(side = 4, text = v2, at = warn.limits, col = "blue", las = 2)
qcc.options(old) # reset the old options

Resources