How to use plot inside a function?

How to use plot inside a function? - r

I have a list and I want to plot all data frames in it by function MyPlot, but there are several problems:
It just plot the last data frame (L2)
The names of each data frames can not be extracted by name = deparse(substitute(df))
If I use jpeg instead of pdf there is an error:
"Error in switch(units, in = res, cm = res/2.54, mm = res/25.4, px = 1) * :
non-numeric argument to binary operator"
Any help would be appreciated.
L1 = data.frame(A = c(1:4) , B = c(1:4) , C = c(1:4))
L2 = data.frame(A = c(5:8) , B = c(8:11), G = c(1:4) )
L=list(L1,L2)
names(L) = c('L1' , 'L2')
MyPlot <- function(df){
name = deparse(substitute(df))
jpeg(paste(name) , ".jpg")
#pdf(paste0(name,".pdf"), onefile = TRUE, paper = "A4")
P = ggplot(df, aes(A , B)) + geom_point()
#print(P)
dev.off()
}
Plot_jpeg = L %>% lapply(MyPlot)

This might not be what you want, but:
L1 = data.frame(A = c(1:4) , B = c(1:4) , C = c(1:4))
L2 = data.frame(A = c(5:8) , B = c(8:11), G = c(1:4) )
L=list(L1,L2)
names(L) = c('L1' , 'L2')
MyPlot <- function(df, name){
P = ggplot(df, aes(A , B)) + geom_point()
ggsave(P, glue::glue("{name}.jpeg")
return(P)
}
Plots_list = purrr::map2(L, names(L),
function(.x, .y) MyPlot(.x, .y))
As MrFlick suggests,
a more idiomatic purrr option could be purrr::imap(L, ~MyPlot(.x, .y)) since imap(x, ...) is short hand for map2(x, names(x), ...). I am using map2 because I would rather be explicit, less things to remember.

Related

cant get correct mapping when use ggproto to creat a new geom function

I want draw a multisequence align result in a whole view,so i draw a picture with follow code:
library(ggmsa)
library(Biostrings)
aln_data <- system.file("extdata", "seedSample.fa", package ="ggmsa")
df <- tidy_msa(aln_data)
###df like this
name position character
#1 Homo 1 -
#2 fascicularis 1 G
#3 mulatta 1 -
#4 Homo 2 -
#5 fascicularis 2 T
#6 mulatta 2 -
max_pos <- df %>% select(position) %>% max()
height_n <- length(unique(df$name))
df$y<- rep(seq(1,height_n,1),times=max_pos)
#caculate the seq frequce
fre_list<-list()
for (i in 1:length(unique(df$position))){
single_pos <- df[df$position == i,]
most_fre <- names(sort(table(single_pos$character),decreasing = T)[1])
fre_list[[i]] <- most_fre
}
df$fre <- rep(unlist(fre_list),each=height_n)
df$y <- ifelse(df$character == df$fre,0,df$y)
df$y0 <- ifelse(df$y ==0,0,df$y-1)
ggplot(df)+geom_segment(aes(x=position,xend=position,y=y0,yend=y),color='grey',alpha=0.8)+
theme_bw()+ggplot2::theme(panel.grid=element_blank(),
axis.text.x = element_blank(),axis.text.y = element_blank(),
axis.ticks=element_blank(),axis.title.x = element_blank(),
axis.title.y = element_blank())+
scale_y_continuous(expand=c(0,0))+
scale_x_continuous(expand=c(0,0))
it works well ,then i want to change it to a new geom function like follow:
StatMsawind <- ggplot2::ggproto('StatMsawind',Stat,required_aes= c('x','y'),
compute_group =function(data,scales){
View(data)
pos_number <- data %>% select(x) %>% max()
print(pos_number)
height_n <- nrow(data)/pos_number
print(height_n)
out<- data.frame('y'=rep(seq(1,height_n,1),times=pos_number))
out$cha <- data$y
fre_list<-list()
for (i in 1:pos_number){
single_pos <- data[data$x == i,]
most_fre <- names(sort(table(single_pos$y),decreasing = T)[1])
fre_list[[i]] <- most_fre
}
out$fre <- rep(unlist(fre_list),each=height_n)
out$y <- ifelse(out$cha == out$fre,0,out$y)
out$ystart <- ifelse(out$y == 0,0,out$y-1)
out
print(out)
})
stat_msawind <- function(data=NULL,mapping =NULL,geom = 'msawind',position='identity',
inherit.aes=TRUE,...){
ggplot2::layer(stat = StatMsawind, data = data, mapping = mapping, geom = geom,
position = position,inherit.aes = inherit.aes,
params = list(...))
}
GeomMsawind <- ggplot2::ggproto('GeomMsawind',ggplot2::Geom,
required_aes=c('x','y','ystart'),
default_aes=aes(color='grey',alpha=0.8),
draw_key = draw_key_abline,
draw_panel = function(data,panel_scales,coord){
coords <- coord$transform(data,panel_scales)
print(coords)
grid::segmentsGrob(x0=coords$x,x1=coords$x,y0=coords$ystart,y1=coords$y,
gp=grid::gpar(col=ggplot2::alpha(coords$color,coords$alpha)))
})
geom_msawind <- function(data=NULL,mapping =NULL,stat='msawind',position='identity',inherit.aes=TRUE,...){
ggplot2::layer(data = data,mapping = mapping,stat = stat, geom=GeomMsawind,position=position,inherit.aes = inherit.aes,
params = list(color='grey',alpha=0.8,...))
}
ggplot(df,aes(position,character))+geom_msawind()
i get this error:
Warning message:
Computation failed in `stat_msawind()`:
wrong sign in 'by' argument
when i chcek the data in StatMsawind function ,i find that the data shape is no equal to the passing variables df i send in.i can't figure out why it can't stat correct, what should i do to make it correct
thanks very much

Cannot update/edit ggplot2 object exported from a package (`gratia`) in R

I hope I am missing something painfully obvious here.
I wish to update (e.g., fix title, labs, etc.) on a ggplot object produced from gratia::draw(). Not really sure why I am unable to update the object.
Is there a simple solution?
# devtools::install_github('gavinsimpson/gratia')
library('mgcv')
library('gratia')
dat <- gamSim(1, n = 400, dist = "normal", scale = 2, verbose = FALSE)
mod <- gam(y ~ s(x0), data = dat, method = "REML")
draw(mod)
p = draw(mod)
# P is a ggobject.
class(p)
#> [1] "gg" "ggplot"
So, why can't I update p?
p + ggtitle("My title")
Created on 2019-02-26 by the reprex package (v0.2.1)

The object returned by draw.gam is the output of cowplot::plot_grid (not a pure ggplot2 graphical object).
I made a small change into draw.gam function of gratia. .
Now the p object can be manipulated:
# The modified draw.gam function
mydraw.gam <- function (object, parametric = TRUE, select = NULL, scales = c("free",
"fixed"), align = "hv", axis = "lrtb", n = 100, unconditional = FALSE,
overall_uncertainty = TRUE, dist = 0.1, ...)
{
scales <- match.arg(scales)
S <- smooths(object)
select <- gratia:::check_user_select_smooths(smooths = S, select = select)
d <- gratia:::smooth_dim(object)
take <- d <= 2L
select <- select[take]
S <- S[take]
d <- d[take]
is_re <- vapply(object[["smooth"]], gratia:::is_re_smooth, logical(1L))
is_by <- vapply(object[["smooth"]], gratia:::is_by_smooth, logical(1L))
if (any(is_by)) {
S <- vapply(strsplit(S, ":"), `[[`, character(1L), 1L)
}
npara <- 0
nsmooth <- length(S)
if (isTRUE(parametric)) {
terms <- parametric_terms(object)
npara <- length(terms)
p <- vector("list", length = npara)
}
g <- l <- vector("list", length = nsmooth)
for (i in unique(S)) {
eS <- evaluate_smooth(object, smooth = i, n = n, unconditional = unconditional,
overall_uncertainty = overall_uncertainty, dist = dist)
l[S == i] <- split(eS, eS[["smooth"]])
}
l <- l[select]
d <- d[select]
g <- g[select]
if (length(g) == 0L) {
message("Unable to draw any of the model terms.")
return(invisible(g))
}
for (i in seq_along(l)) {
g[[i]] <- draw(l[[i]])
}
if (isTRUE(parametric)) {
for (i in seq_along(terms)) {
p[[i]] <- evaluate_parametric_term(object, term = terms[i])
g[[i + length(g)]] <- draw(p[[i]])
}
}
if (isTRUE(identical(scales, "fixed"))) {
wrapper <- function(x) {
range(x[["est"]] + (2 * x[["se"]]), x[["est"]] -
(2 * x[["se"]]))
}
ylims <- range(unlist(lapply(l, wrapper)))
if (isTRUE(parametric)) {
ylims <- range(ylims, unlist(lapply(p, function(x) range(x[["upper"]],
x[["lower"]]))))
}
gg <- seq_along(g)[c(d == 1L, rep(TRUE, npara))]
for (i in gg) {
g[[i]] <- g[[i]] + lims(y = ylims)
}
}
g
}
# Example no. 1
dat <- gamSim(1, n = 400, dist = "normal", scale = 2, verbose = FALSE)
mod <- gam(y ~ s(x0), data = dat, method = "REML")
p <- mydraw.gam(mod)
p[[1]] + ggtitle("My title")
# Example no. 2
mod <- gam(y ~ s(x0) + x1, data = dat, method = "REML")
p <- mydraw.gam(mod)
# Plot graphs separately
p[[1]] + ggtitle("My title")
p[[2]] + ggtitle("My title")
# Arrange the two plots on the same figure
cowplot::plot_grid(plotlist = p)

Reposts from Gavin Simpson and Hao Ye, respectively:
I think the only way to change the title(s) on the individual plots of smooths would be to use draw(evaluate_smooth(model, "smooth"), title = "My title") individually at the moment.
You might be able to hack a title in a different way:
draw(mod) +
ggplot2::coord_cartesian(clip = "off") +
ggplot2::theme(plot.margin = ggplot2::unit(c(0.05, 0, 0, 0), "npc")) +
ggplot2::annotate("text", x = 0.5, y = 1, vjust = 0, label = "TITLE", size = 6)

Custom column names as arguments in the functions of stability R package

I developed the stability R package which can be installed from CRAN.
install.packages("stability")
However, I have difficulty in making it to take custom column names as function arguments. Here is an example of add_anova function
library(stability)
data(ge_data)
YieldANOVA <-
add_anova(
.data = ge_data
, .y = Yield
, .rep = Rep
, .gen = Gen
, .env = Env
)
YieldANOVA
The above code works fine. However, when I change the column names of the data.frame, it doesn't work as below:
df1 <- ge_data
names(df1) <- c("G", "Institute", "R", "Block", "E", "Y")
fm1 <-
add_anova(
.data = df1
, .y = Y
, .rep = Rep
, .gen = G
, .env = E
)
Error in model.frame.default(formula = terms(.data$Y ~ .data$E + .data$Rep:.data$E + :
invalid type (NULL) for variable '.data$Rep'
Similarly another function stab_reg
fm1Reg <-
stab_reg(
.data = df1
, .y = Y
, .gen = G
, .env = E
)
Error in eval(predvars, data, env) : object 'Gen' not found
The codes of these functions can be accessed by
getAnywhere(add_anova.default)
function (.data, .y, .rep, .gen, .env)
{
Y <- enquo(.y)
Rep <- enquo(.rep)
G <- enquo(.gen)
E <- enquo(.env)
fm1 <- lm(formula = terms(.data$Y ~ .data$E + .data$Rep:.data$E +
.data$G + .data$G:.data$E, keep.order = TRUE), data = .data)
fm1ANOVA <- anova(fm1)
rownames(fm1ANOVA) <- c("Env", "Rep(Env)", "Gen", "Gen:Env",
"Residuals")
fm1ANOVA[1, 4] <- fm1ANOVA[1, 3]/fm1ANOVA[2, 3]
fm1ANOVA[2, 4] <- NA
fm1ANOVA[1, 5] <- 1 - pf(as.numeric(fm1ANOVA[1, 4]), fm1ANOVA[1,
1], fm1ANOVA[2, 1])
fm1ANOVA[2, 5] <- 1 - pf(as.numeric(fm1ANOVA[2, 4]), fm1ANOVA[2,
1], fm1ANOVA[5, 1])
class(fm1ANOVA) <- c("anova", "data.frame")
return(list(anova = fm1ANOVA))
}
<bytecode: 0xc327c28>
<environment: namespace:stability>
and
getAnywhere(stab_reg.default)
function (.data, .y, .rep, .gen, .env)
{
Y <- enquo(.y)
Rep <- enquo(.rep)
G <- enquo(.gen)
E <- enquo(.env)
g <- length(levels(.data$G))
e <- length(levels(.data$E))
r <- length(levels(.data$Rep))
g_means <- .data %>% dplyr::group_by(!!G) %>% dplyr::summarize(Mean = mean(!!Y))
names(g_means) <- c("G", "Mean")
DataNew <- .data %>% dplyr::group_by(!!G, !!E) %>% dplyr::summarize(GEMean = mean(!!Y)) %>%
dplyr::group_by(!!E) %>% dplyr::mutate(EnvMean = mean(GEMean))
IndvReg <- lme4::lmList(GEMean ~ EnvMean | Gen, data = DataNew)
IndvRegFit <- summary(IndvReg)
StabIndvReg <- tibble::as_tibble(data.frame(g_means, Slope = coef(IndvRegFit)[,
, 2][, 1], LCI = confint(IndvReg)[, , 2][, 1], UCI = confint(IndvReg)[,
, 2][, 2], R.Sqr = IndvRegFit$r.squared, RMSE = IndvRegFit$sigma,
SSE = IndvRegFit$sigma^2 * IndvRegFit$df[, 2], Delta = IndvRegFit$sigma^2 *
IndvRegFit$df[, 2]/r))
MeanSlopePlot <- ggplot(data = StabIndvReg, mapping = aes(x = Slope,
y = Mean)) + geom_point() + geom_text(aes(label = G),
size = 2.5, vjust = 1.25, colour = "black") + geom_vline(xintercept = 1,
linetype = "dotdash") + geom_hline(yintercept = mean(StabIndvReg$Mean),
linetype = "dotdash") + labs(x = "Slope", y = "Mean") +
scale_x_continuous(sec.axis = dup_axis(), labels = scales::comma) +
scale_y_continuous(sec.axis = dup_axis(), labels = scales::comma) +
theme_bw()
return(list(StabIndvReg = StabIndvReg, MeanSlopePlot = MeanSlopePlot))
}
<bytecode: 0xe431010>
<environment: namespace:stability>

One of the problems in the data 'df1' is the column name is 'R' instead of "Rep" which was passed into the function. Second, the terms passed into the formula are quosures. we could change it to string with quo_names and then construct formula with paste
add_anova1 <- function (.data, .y, .rep, .gen, .env) {
y1 <- quo_name(enquo(.y))
r1 <- quo_name(enquo(.rep))
g1 <- quo_name(enquo(.gen))
e1 <- quo_name(enquo(.env))
fm <- formula(paste0(y1, "~", paste(e1, paste(r1, e1, sep=":"),
g1, paste(g1, e1, sep=":"), sep="+")))
fm1 <- lm(terms(fm, keep.order = TRUE), data = .data)
fm1ANOVA <- anova(fm1)
rownames(fm1ANOVA) <- c("Env", "Rep(Env)", "Gen", "Gen:Env",
"Residuals")
fm1ANOVA[1, 4] <- fm1ANOVA[1, 3]/fm1ANOVA[2, 3]
fm1ANOVA[2, 4] <- NA
fm1ANOVA[1, 5] <- 1 - pf(as.numeric(fm1ANOVA[1, 4]), fm1ANOVA[1,
1], fm1ANOVA[2, 1])
fm1ANOVA[2, 5] <- 1 - pf(as.numeric(fm1ANOVA[2, 4]), fm1ANOVA[2,
1], fm1ANOVA[5, 1])
class(fm1ANOVA) <- c("anova", "data.frame")
return(list(anova = fm1ANOVA))
}
YieldANOVA2 <- add_anova1(
.data = df1
, .y = Y
, .rep = R
, .gen = G
, .env = E
)
-checking with the output generated using 'ge_data' without changing the column names
all.equal(YieldANOVA, YieldANOVA2, check.attributes = FALSE)
#[1] TRUE
Similarly stab_reg could be changed

R redefine a string to argument

following on from some help earlier I think all I need for this to work is a way to define the variable dimxST below as not a string as I need that to point to the data frame....
cpkstudy <- function(x,y){
dxST <- paste(x,"$",y, sep = "")
dLSL <- paste(y, "LSL", sep = "")
dUSL <- paste(y, "USL", sep = "")
dTar <- paste(y, "Target", sep = "")
dimxST <-
dimLSL <- PivSpecs[[dLSL]]
dimUSL <- PivSpecs[[dUSL]]
dimTar <- PivSpecs[[dTar]]
ss.study.ca(dimxST, LSL = dimLSL, USL = dimUSL, Target = dimTar,
alpha = 0.05, f.na.rm = TRUE, f.main = "Six Sigma Study")
}
cpkstudy("cam1","D1")
link to the previous post

This is a different direction, and you may find the learning curve a bit steeper, but it's a lot more powerful. Instead of passing everything in as strings, we pass them without quotes, and use the rlang package to figure out where to evaluate D1.
# the same dummy data frame from Katia's answer
cam1 <- data.frame(D1 = rnorm(10),
D2 = rnorm(10))
PivSpecs <- list(D1LSL = 740, D1USL = 760, D1Target = 750)
library(rlang)
cpkstudy <- function(df, y){
quo_y <- enquo(y)
dLSL <- paste0(quo_name(quo_y), "LSL")
dUSL <- paste0(quo_name(quo_y), "USL")
dTar <- paste0(quo_name(quo_y), "Target")
dimxST <- eval_tidy(quo_y, data = df)
dimLSL <- PivSpecs[[dLSL]]
dimUSL <- PivSpecs[[dUSL]]
dimTar <- PivSpecs[[dTar]]
print(dimxST)
print (paste("dimLSL=", dimLSL) )
print (paste("dimUSL=", dimUSL) )
print (paste("dimTar=", dimTar) )
# ss.study.ca(dimxST, LSL = dimLSL, USL = dimUSL, Target = dimTar,
# alpha = 0.05, f.na.rm = TRUE, f.main = "Six Sigma Study")
}
# notice that I am not quoting cam1 and D1
cpkstudy(cam1, D1)
If you want to learn more about this, I would suggest looking at https://dplyr.tidyverse.org/articles/programming.html as an overview (the dplyr package imports some of the functions used in rlang), and http://rlang.r-lib.org/index.html for a more complete list of all the functions and examples.

You can use function get() to get object value from its string representation. In this solution I did not evaluate ss.study.ca() function itself, since I do not have your real-case input data, instead I just print the values that would go there:
cpkstudy <- function(x,y){
#dxST <- paste0(x,"$",y)
dLSL <- paste0(y, "LSL")
dUSL <- paste0(y, "USL")
dTar <- paste0(y, "Target")
dimxST <- get(x)[,y]
print(dimxST)
dimLSL <- PivSpecs[[dLSL]]
dimUSL <- PivSpecs[[dUSL]]
dimTar <- PivSpecs[[dTar]]
print (paste("dimLSL=", dimLSL) )
print (paste("dimUSL=", dimUSL) )
print (paste("dimTar=", dimTar) )
#ss.study.ca(dimxST, LSL = dimLSL, USL = dimUSL, Target = dimTar,
# alpha = 0.05, f.na.rm = TRUE, f.main = "Six Sigma Study")
}
# create some dummy dataframe to test with this example
cam1 <- data.frame(D1 = rnorm(10),
D2 = rnorm(10))
# define a list that will be used within a function
PivSpecs <- list(D1LSL = 740, D1USL = 760, D1Target = 750)
#test function
cpkstudy("cam1","D1")
#[1] 1.82120625 -0.08857998 -0.08452232 -0.43263828 0.17974556 -0.91141414 #-2.30595203 -1.24014396 -1.83814577 -0.24812598
#[1] "dimLSL= 740"
#[1] "dimUSL= 760"
#[1] "dimTar= 750"
I also changed your paste() commands on paste0() which has sep="" as a default.

Create a one-row dataframe where every column is the same as an existing variable

I often write code like this:
answer.df = data.frame(x = numeric(0), y = numeric(0), z = numeric(0))
for (i in 1:100) {
x = do_stuff(i)
y = do_more_stuff(i)
z = yet_more_stuff(i)
# Is there a better way of doing this:
temp.df = data.frame(x = x, y = y, z = z)
answer.df = rbind(answer.df, temp.df)
}
My question is, in the line temp.df = data.frame(x = x, y = y, z = z), is there a neater way of doing this? Imagine it with ten or more variables to understand my problem.

Try this:
do.call("rbind", lapply(1:100, function(i) list(x = xfun(i), y = yfun(i))))
Also try rbindlist from data.table which may have some performance advantages:
library(data.table)
rbindlist(lapply(1:100, function(i) list(x = xfun(i), y = yfun(i))))