I have two custom classes that have different types of constructors and different types of formatting methods, one class relying on the attributes of the object.
as.test_1 <- function(x, test_attribute, ...) {
attributes(x) <- list("test_attribute" = test_attribute)
x <-
structure(x, class = c("test_1", setdiff(class(x), "test_1")))
return(x)
}
format.test_1 <- function(x, ...) {
paste0(x, attributes(x)$test_attribute)
}
print.test_1 <- function(x, ...) {
cat(format(x, ...), "\n")
}
as.test_2 <- function(x, test_attribute, ...) {
x <-
structure(x, class = c("test_2", setdiff(class(x), "test_2")))
return(x)
}
format.test_2 <- function(x, ...) {
paste0(x, "test")
}
print.test_2 <- function(x, ...) {
cat(format(x, ...), "\n")
}
Now I want to to plot some data that contains these classes:
library(data.table)
data_1 <-
data.table(a = as.test_1(c(1, 2, 3, 4), test_attribute = "test"),
b = as.factor(c("a", "b", "c", "d")))
data_2 <-
data.table(a = as.test_2(c(1, 2, 3, 4), test_attribute = "test"),
b = as.factor(c("a", "b", "c", "d")))
Also I want to use the format method to format the y-axis. This is my first try, which works for the test_2 class that does not rely on the attributes of the variable for formatting:
library(ggplot2)
plot_data <- function(data) {
g <- ggplot(data, aes(x = b, y = a))
g <- g + geom_bar(stat = "identity")
expand <- c(0, 0)
attributes(expand) <- attributes(data$a)
limits <- c(0, 4)
attributes(limits) <- attributes(data$a)
g <- g + scale_y_continuous(
expand = expand,
limits = limits,
oob = scales::squish,
labels = getS3method(f = "format", class = class(data$a)[[1]])
)
return(print(g))
}
However for the test_1 class it seems like the attributes get lost somewhere in the call of scale_y_continuous.
plot_data(data_1)
plot_data(data_2)
Does someone know how to fix this?
Thanks!
Related
I have adapted the codes below which I referred from https://statsandr.com/blog/how-to-do-a-t-test-or-anova-for-many-variables-at-once-in-r-and-communicate-the-results-in-a-better-way/#to-go-even-further into my dataset:
Day<-c(1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2)
Group<-c("A","A","A","B","B","B","C","C","C","A","A","A","A","B","B","B","C","C","C")
Rain<-c(4,4,6,5,3,4,5,5,3,6,6,6,5,3,3,3,2,5,2)
UV<-c(6,6,7,8,5,6,5,6,6,6,7,7,8,8,5,6,8,5,7)
dat<-data.frame(Day,Group,Rain,UV)
x <- which(names(dat) == "Group")
y <- which(names(dat) == "Rain"
| names(dat) == "UV")
method1 <- "kruskal.test"
method2 <- "wilcox.test"
my_comparisons <- list(c("A", "B"), c("A", "C"), c("B", "C")) # comparisons for post-hoc test
library(ggpubr)
for (i in y) {
for (j in x) {
p <- ggboxplot(dat,
x = colnames(dat[j]), y = colnames(dat[i]),
color = colnames(dat[j]),
legend = "none",
palette = "npg",
add = "jitter"
)
print(
p + stat_compare_means(aes(label = paste0(..method.., ", p-value = ", ..p.format..)),
method = method1, label.y = max(dat[, i], na.rm = TRUE)
)
+ stat_compare_means(comparisons = my_comparisons, method = method2, label = "p.format")
)
}
}
How do I further repeat this function through different "Day"? Thanks.
I think you want to see the results for each day, right? You can add a third loop like this:
for (h in unique(dat$Day)) {
for (i in y) {
for (j in x) {
dat_tmp <- dat[dat$Day == h,] # create a subset of the data for each day
p <- ggboxplot(dat_tmp,
x = colnames(dat_tmp[j]), y = colnames(dat_tmp[i]),
color = colnames(dat_tmp[j]),
legend = "none",
palette = "npg",
add = "jitter"
)
print(
p + stat_compare_means(aes(label = paste0(..method.., ", p-value = ", ..p.format..)),
method = method1, label.y = max(dat_tmp[, i], na.rm = TRUE)
)
+ stat_compare_means(comparisons = my_comparisons, method = method2, label = "p.format")
)
}
}
}
I added a third loop to your code and created dat_tmp inside the loop, which becomes the dataset that you use for the analyses of each day.
I hope I am missing something painfully obvious here.
I wish to update (e.g., fix title, labs, etc.) on a ggplot object produced from gratia::draw(). Not really sure why I am unable to update the object.
Is there a simple solution?
# devtools::install_github('gavinsimpson/gratia')
library('mgcv')
library('gratia')
dat <- gamSim(1, n = 400, dist = "normal", scale = 2, verbose = FALSE)
mod <- gam(y ~ s(x0), data = dat, method = "REML")
draw(mod)
p = draw(mod)
# P is a ggobject.
class(p)
#> [1] "gg" "ggplot"
So, why can't I update p?
p + ggtitle("My title")
Created on 2019-02-26 by the reprex package (v0.2.1)
The object returned by draw.gam is the output of cowplot::plot_grid (not a pure ggplot2 graphical object).
I made a small change into draw.gam function of gratia. .
Now the p object can be manipulated:
# The modified draw.gam function
mydraw.gam <- function (object, parametric = TRUE, select = NULL, scales = c("free",
"fixed"), align = "hv", axis = "lrtb", n = 100, unconditional = FALSE,
overall_uncertainty = TRUE, dist = 0.1, ...)
{
scales <- match.arg(scales)
S <- smooths(object)
select <- gratia:::check_user_select_smooths(smooths = S, select = select)
d <- gratia:::smooth_dim(object)
take <- d <= 2L
select <- select[take]
S <- S[take]
d <- d[take]
is_re <- vapply(object[["smooth"]], gratia:::is_re_smooth, logical(1L))
is_by <- vapply(object[["smooth"]], gratia:::is_by_smooth, logical(1L))
if (any(is_by)) {
S <- vapply(strsplit(S, ":"), `[[`, character(1L), 1L)
}
npara <- 0
nsmooth <- length(S)
if (isTRUE(parametric)) {
terms <- parametric_terms(object)
npara <- length(terms)
p <- vector("list", length = npara)
}
g <- l <- vector("list", length = nsmooth)
for (i in unique(S)) {
eS <- evaluate_smooth(object, smooth = i, n = n, unconditional = unconditional,
overall_uncertainty = overall_uncertainty, dist = dist)
l[S == i] <- split(eS, eS[["smooth"]])
}
l <- l[select]
d <- d[select]
g <- g[select]
if (length(g) == 0L) {
message("Unable to draw any of the model terms.")
return(invisible(g))
}
for (i in seq_along(l)) {
g[[i]] <- draw(l[[i]])
}
if (isTRUE(parametric)) {
for (i in seq_along(terms)) {
p[[i]] <- evaluate_parametric_term(object, term = terms[i])
g[[i + length(g)]] <- draw(p[[i]])
}
}
if (isTRUE(identical(scales, "fixed"))) {
wrapper <- function(x) {
range(x[["est"]] + (2 * x[["se"]]), x[["est"]] -
(2 * x[["se"]]))
}
ylims <- range(unlist(lapply(l, wrapper)))
if (isTRUE(parametric)) {
ylims <- range(ylims, unlist(lapply(p, function(x) range(x[["upper"]],
x[["lower"]]))))
}
gg <- seq_along(g)[c(d == 1L, rep(TRUE, npara))]
for (i in gg) {
g[[i]] <- g[[i]] + lims(y = ylims)
}
}
g
}
# Example no. 1
dat <- gamSim(1, n = 400, dist = "normal", scale = 2, verbose = FALSE)
mod <- gam(y ~ s(x0), data = dat, method = "REML")
p <- mydraw.gam(mod)
p[[1]] + ggtitle("My title")
# Example no. 2
mod <- gam(y ~ s(x0) + x1, data = dat, method = "REML")
p <- mydraw.gam(mod)
# Plot graphs separately
p[[1]] + ggtitle("My title")
p[[2]] + ggtitle("My title")
# Arrange the two plots on the same figure
cowplot::plot_grid(plotlist = p)
Reposts from Gavin Simpson and Hao Ye, respectively:
I think the only way to change the title(s) on the individual plots of smooths would be to use draw(evaluate_smooth(model, "smooth"), title = "My title") individually at the moment.
You might be able to hack a title in a different way:
draw(mod) +
ggplot2::coord_cartesian(clip = "off") +
ggplot2::theme(plot.margin = ggplot2::unit(c(0.05, 0, 0, 0), "npc")) +
ggplot2::annotate("text", x = 0.5, y = 1, vjust = 0, label = "TITLE", size = 6)
I wonder how you can simplify these two :
plot (payroll,wins)
id = identify(payroll, wins,labels = code, n = 5)
plot (payroll,wins)
with(data, text(payroll, wins, labels = code, pos = 1, cex=0.5))
using other alternatives - pch() dan as.numeric()?
Not sure it's easier but you change pch during identification as below (taken from the R-help). Every time you click empty point change to filled-in dot.
# data simulation
data <- data.frame(payroll = rnorm(10), wins = rnorm(10), code = letters[1:10])
identifyPch <- function(x, y = NULL, n = length(x), plot = FALSE, pch = 19, ...)
{
xy <- xy.coords(x, y)
x <- xy$x
y <- xy$y
sel <- rep(FALSE, length(x))
while (sum(sel) < n) {
ans <- identify(x[!sel], y[!sel], labels = which(!sel), n = 1, plot = plot, ...)
if(!length(ans)) {
break
}
ans <- which(!sel)[ans]
points(x[ans], y[ans], pch = pch)
sel[ans] <- TRUE
}
## return indices of selected points
which(sel)
}
if(dev.interactive()) { ## use it
with(data, plot(payroll,wins))
id = with(data, identifyPch(payroll, wins))
}
Using the solution supplied by AkselA. in How to formatting numbers by column in a table (tableGrob)
I've tried to increase the font size in the table.
From this website, I believe the solution is in the "Accessing existing grobs in the table" section using:
g$grobs[ind][[1]][["gp"]] <- gpar(fontsize=15, fontface="bold").
But when I try to change the code I keep getting errors.
I was able to figure out the answer to this question. I was able to use the solution provided by '42-' in Text alignment and font size in gtable
Changing his code fragment and adding it to the end of the referenced question.
g$grobs[] <-
lapply(g$grobs[],
function(x) modifyList( x, list(gp=list(fontsize=25, cex=1) ) ) )
The full code would be:
library(gtable)
library(grid)
library(gridExtra)
library(zoo)
data(iris)
iris <- iris[1:4, 1:3]
rownames(iris) <- as.character(as.yearmon(
seq(as.Date("2000/1/1"), as.Date("2000/4/1"), by = "month")))
iris$RankColumn <- 1:nrow(iris)
# a simple function to scale each row or column to the range [0, 1]
# will convert characters to numerics if in a sensible format
norm <- function(x, mar=2) {
rnames <- rownames(x)
x <- apply(x, 2, as.numeric)
x <- apply(x, mar, function(y){(y-min(y))/(max(y)-min(y))})
rownames(x) <- rnames
x
}
# function to pad with zero
# by default does not pad integers
zeropad <- function(x, nz=1, exc.int=TRUE) {
if (is.integer(x) & exc.int) {
x
} else {
sprintf(paste0("%.", nz, "f"), x)
}
}
bluecol <- colorRamp(c("#3366EE", "#AABBFF", "#DDDDFF"))(norm(iris))
bluecol <- rgb(bluecol[, 1], bluecol[, 2], bluecol[, 3], max=255)
tt <- ttheme_default(core=list(bg_params=list(fill=bluecol)))
# convert floats to zero-padded characters
iris[1:ncol(iris)] <- sapply(iris, zeropad, 2)
g <- tableGrob(iris, theme=tt)
g <- gtable_add_grob(g,
grobs = rectGrob(gp = gpar(fill = NA, lwd = 2)),
t = 2, b = nrow(g), l = 1, r = ncol(g))
g <- gtable_add_grob(g,
grobs = rectGrob(gp = gpar(fill = NA, lwd = 2)),
t = 1, l = 1, r = ncol(g))
g$grobs[] <-
lapply(g$grobs[],
function(x) modifyList( x, list(gp=list(fontsize=25, cex=1) ) ) )
plot.new()
grid.draw(g)
I wish to plot mean (or other function) of reaction time as a function of the location of the target in the x y plane.
As test data:
library(ggplot2)
xs <- runif(100,-1,1)
ys <- runif(100,-1,1)
rts <- rnorm(100)
testDF <- data.frame("x"=xs,"y"=ys,"rt"=rts)
I know I can do this:
p <- ggplot(data = testDF,aes(x=x,y=y))+geom_bin2d(bins=10)
What I would like to be able to do, is the same thing but plot a function of the data in each bin rather than counts. Can I do this?
Or do I need to generate the conditional means first in R (e.g. drt <- tapply(testDF$rt,list(cut(testDF$x,10),cut(testDF$y,10)),mean)) and then plot that?
Thank you.
Update With the release of ggplot2 0.9.0, much of this functionality is covered by the new additions of stat_summary2d and stat_summary_bin.
here is a gist for this answer: https://gist.github.com/1341218
here is a slight modification of stat_bin2d so as to accept arbitrary function:
StatAggr2d <- proto(Stat, {
objname <- "aggr2d"
default_aes <- function(.) aes(fill = ..value..)
required_aes <- c("x", "y", "z")
default_geom <- function(.) GeomRect
calculate <- function(., data, scales, binwidth = NULL, bins = 30, breaks = NULL, origin = NULL, drop = TRUE, fun = mean, ...) {
range <- list(
x = scales$x$output_set(),
y = scales$y$output_set()
)
# Determine binwidth, if omitted
if (is.null(binwidth)) {
binwidth <- c(NA, NA)
if (is.integer(data$x)) {
binwidth[1] <- 1
} else {
binwidth[1] <- diff(range$x) / bins
}
if (is.integer(data$y)) {
binwidth[2] <- 1
} else {
binwidth[2] <- diff(range$y) / bins
}
}
stopifnot(is.numeric(binwidth))
stopifnot(length(binwidth) == 2)
# Determine breaks, if omitted
if (is.null(breaks)) {
if (is.null(origin)) {
breaks <- list(
fullseq(range$x, binwidth[1]),
fullseq(range$y, binwidth[2])
)
} else {
breaks <- list(
seq(origin[1], max(range$x) + binwidth[1], binwidth[1]),
seq(origin[2], max(range$y) + binwidth[2], binwidth[2])
)
}
}
stopifnot(is.list(breaks))
stopifnot(length(breaks) == 2)
stopifnot(all(sapply(breaks, is.numeric)))
names(breaks) <- c("x", "y")
xbin <- cut(data$x, sort(breaks$x), include.lowest=TRUE)
ybin <- cut(data$y, sort(breaks$y), include.lowest=TRUE)
if (is.null(data$weight)) data$weight <- 1
ans <- ddply(data.frame(data, xbin, ybin), .(xbin, ybin), function(d) data.frame(value = fun(d$z)))
within(ans,{
xint <- as.numeric(xbin)
xmin <- breaks$x[xint]
xmax <- breaks$x[xint + 1]
yint <- as.numeric(ybin)
ymin <- breaks$y[yint]
ymax <- breaks$y[yint + 1]
})
}
})
stat_aggr2d <- StatAggr2d$build_accessor()
and usage:
ggplot(data = testDF,aes(x=x,y=y, z=rts))+stat_aggr2d(bins=3)
ggplot(data = testDF,aes(x=x,y=y, z=rts))+
stat_aggr2d(bins=3, fun = function(x) sum(x^2))
As well, here is a slight modification of stat_binhex:
StatAggrhex <- proto(Stat, {
objname <- "aggrhex"
default_aes <- function(.) aes(fill = ..value..)
required_aes <- c("x", "y", "z")
default_geom <- function(.) GeomHex
calculate <- function(., data, scales, binwidth = NULL, bins = 30, na.rm = FALSE, fun = mean, ...) {
try_require("hexbin")
data <- remove_missing(data, na.rm, c("x", "y"), name="stat_hexbin")
if (is.null(binwidth)) {
binwidth <- c(
diff(scales$x$input_set()) / bins,
diff(scales$y$input_set() ) / bins
)
}
try_require("hexbin")
x <- data$x
y <- data$y
# Convert binwidths into bounds + nbins
xbnds <- c(
round_any(min(x), binwidth[1], floor) - 1e-6,
round_any(max(x), binwidth[1], ceiling) + 1e-6
)
xbins <- diff(xbnds) / binwidth[1]
ybnds <- c(
round_any(min(y), binwidth[1], floor) - 1e-6,
round_any(max(y), binwidth[2], ceiling) + 1e-6
)
ybins <- diff(ybnds) / binwidth[2]
# Call hexbin
hb <- hexbin(
x, xbnds = xbnds, xbins = xbins,
y, ybnds = ybnds, shape = ybins / xbins,
IDs = TRUE
)
value <- tapply(data$z, hb#cID, fun)
# Convert to data frame
data.frame(hcell2xy(hb), value)
}
})
stat_aggrhex <- StatAggrhex$build_accessor()
and usage:
ggplot(data = testDF,aes(x=x,y=y, z=rts))+stat_aggrhex(bins=3)
ggplot(data = testDF,aes(x=x,y=y, z=rts))+
stat_aggrhex(bins=3, fun = function(x) sum(x^2))
This turned out to be harder than I expected.
You can almost trick ggplot into doing this, by providing a weights aesthetic, but that only gives you the sum of the weights in the bin, not the mean (and you have to specify drop=FALSE to retain negative bin values). You can also retrieve either counts or density within a bin, but neither of those really solves the problem.
Here's what I ended up with:
## breaks vector (slightly coarser than the 10x10 spec above;
## even 64 bins is a lot for binning only 100 points)
bvec <- seq(-1,1,by=0.25)
## helper function
tmpf <- function(x,y,z,FUN=mean,breaks) {
midfun <- function(x) (head(x,-1)+tail(x,-1))/2
mids <- list(x=midfun(breaks$x),y=midfun(breaks$y))
tt <- tapply(z,list(cut(x,breaks$x),cut(y,breaks$y)),FUN)
mt <- melt(tt)
## factor order gets scrambled (argh), reset it
mt$X1 <- factor(mt$X1,levels=rownames(tt))
mt$X2 <- factor(mt$X2,levels=colnames(tt))
transform(X,
x=mids$x[mt$X1],
y=mids$y[mt$X2])
}
ggplot(data=with(testDF,tmpf(x,y,rt,breaks=list(x=bvec,y=bvec))),
aes(x=x,y=y,fill=value))+
geom_tile()+
scale_x_continuous(expand=c(0,0))+ ## expand to fill plot region
scale_y_continuous(expand=c(0,0))
This assumes equal bin widths, etc., could be extended ... it really is too bad that (as far as I can tell) stat_bin2d doesn't accept a user-specified function.