Using xyplot within custom function with panel function in R - r

I'm trying to generate a custom xyplot in the lattice graphics package. It is a plot of two lines with range limits plotted.
# Raw dataframe in long format
df <- data.frame(
Response = c(runif(100), rnorm(100)),
Trial = c(rep("A", 100), rep("C", 100)),
Year = 1:10,
Rep = rep(1:10, each = 10))
# Aggregate the data (take mean/min/max across "Rep" variable
gdf <- do.call(data.frame, aggregate(Response ~ Year + Trial, data = df,
FUN = function(x) c(avg = mean(x), mini = min(x), maxi = max(x))))
# Plot using xyplot (without making this a function)
my.panel.bands <- function(x, y, upper, lower, fill, col,
subscripts, ..., font, fontface){
upper <- upper[subscripts]
lower <- lower[subscripts]
panel.polygon(c(x, rev(x)), c(upper, rev(lower)),
col = fill, alpha = 0.2, border = FALSE, ...)
}
f1 <- formula(Response.avg ~ Year)
p <- xyplot(x = f1, data = gdf, groups = Trial,
col=c("red", "blue"), pch = 16,
scales = list(x = list(rot = 45)),
xlab = 'Year', ylab = 'Response',
layout = c(1, 1),
ylim = c(min(gdf$Response.mini), max(gdf$Response.maxi)),
upper = gdf$Response.maxi,
lower = gdf$Response.mini,
panel = function(x, y, ...){
panel.superpose(x, y, panel.groups = my.panel.bands,
type = 'l', fill = c("red", "blue"),...)
panel.xyplot(x, y, type = 'b', cex = 0.6, lty = 1, ...)
}
)
png("panel_plot.png")
print(p)
dev.off()
However, if I try to make a custom function out of this xyplot command then I get a very different plot to what I was expecting. I'm assuming I'm doing something incorrect with passing the grouping variable or in using the panel function.
panel_plot <- function(f, df, grouper, xlabel, ylabel,
ylim, upper_border, lower_border, mfcol){
p <- xyplot(x = f, data = df, groups = eval(grouper),
col = c("red", "blue"), pch = 16,
scales = list(x = list(rot = 45)),
xlab = xlabel, ylab = ylabel,
ylim = ylim,
layout = mfcol,
upper = upper_border,
lower = lower_border,
panel = function(x, y, ...){
panel.superpose(x, y, panel.groups = my.panel.bands,
type = 'l', fill = c("red", "blue"),...)
panel.xyplot(x, y, type = 'b', cex = 0.6, lty = 1, ...)
}
)
return(p)
}
f1 <- formula(Response.avg ~ Year)
p <- panel_plot(f1, gdf, grouper = Trial,
xlabel = "Year", ylabel = "Response",
ylim = c(min(gdf$Response.mini),max(gdf$Response.maxi)),
upper_border = gdf$Response.maxi,
lower_border = gdf$Response.mini,
mfcol = c(1, 1))
png("panel_plot_asfunction.png")
print(p)
dev.off()
Finally, if I pass the name of the variable to the group argument as a string and modify the panel_plot() function to redefine a new variable in the data.frame, then it works as expected but this seems like a strange way to do things.
panel_plot <- function(f, df, grouper, xlabel, ylabel,
ylim, upper_border, lower_border, mfcol){
df$grouper <- df[, grouper]
p <- xyplot(x = f, data = df, groups = grouper,
...
p <- panel_plot(f1, gdf, grouper = "Trial",
...
How do I define the panel_plot function so that I don't have to create a dummy column and can pass the variable name (Trial) to this function so that it is not passed as a string?
I've tried using the suggestion here but using eval() on the variable name provided the unexpected figure above.

Actually, consider combining both solutions of your linked SO post using match.call() list and eval(call(), ...). By themselves alone neither worked on my end.
panel_plot <- function(f, df, grouper, xlabel, ylabel,
ylim, upper_border, lower_border, mfcol){
ll <- as.list(match.call(expand.dots = FALSE)[-1])
my_panel <- function(x, y, ...){
panel.superpose(x, y, panel.groups = my.panel.bands,
type = 'l', fill = c("red", "blue"),...)
panel.xyplot(x, y, type = 'b', cex = 0.6, lty = 1, ...)
}
p <- eval(call("xyplot",
x = ll$f,
data = ll$df,
groups = ll$grouper,
xlab = ll$xlabel, ylab = ll$ylabel,
ylim = ll$ylim,
layout = ll$mfcol,
upper = ll$upper_border,
lower = ll$lower_border,
panel = my_panel
)
)
return(p)
}
f1 <- formula(Response.avg ~ Year)
p <- panel_plot(f1, gdf, grouper = Trial,
xlabel = "Year", ylabel = "Response",
ylim = c(min(gdf$Response.mini), max(gdf$Response.maxi)),
upper_border = gdf$Response.maxi,
lower_border = gdf$Response.mini,
mfcol = c(1, 1))
p
However, aesthetics are not exactly the same such as red and blue lines and points. Possibly this is because eval(call(...)) requires all arguments to be specified? Try adjusting arguments or my_panel.
Alternatively, use your dummy column but still pass unquoted name and evaluate column name with eval. Here, aesthetics render as desired non-function version.
panel_plot <- function(f, df, grouper, xlabel, ylabel,
ylim, upper_border, lower_border, mfcol){
df$grouper <- eval(as.name(deparse(substitute(grouper))), df, .GlobalEnv)
p <- xyplot(x = f, data = df, groups = grouper,
...
}

Related

Call objects based on one class with the different attributes

Is it possible to call objects with different attributes based on one class?
For example here i want to have zlab attribute for one object and not for another one.
I tried to use do.call with structure, but it doesn't work, as I thought. Or maybe it should be constructed in some other way?
chart <- function(domain, n, f, col, zlab, ...) {
x <- y <- seq(min(domain), max(domain), length.out = n)
fun <- function(x, y) {}
body(fun) <- substitute(f)
do.call(
lapply, structure(list(x = x, y = y, f = fun,
col=col, zlab = zlab, ...), ...),
class = "myChart")
}
# plot method
plot.myChart <- function(x, ...) {
z = outer(x$x, x$y, x$f)
persp(x$x, x$y, z, col = x$col, zlab = x$zlab, ...)
}
# object call
chr1 <- chart(c(-6, 6),
n = 30,
sqrt(x^2 + y^2),
col = 'blue',
zlab = "Height")
chr2 <- chart(c(-5, 5),
n = 30,
x^2 + y^2,
col = 'green')
plot(chr1)
plot(chr2)
You can simply add defaults for each argument, so that if any are skipped, they have a sensible fallback. This is a bit harder with f, but can be done inside the function using a combination of missing, match.call and quote:
chart <- function(domain = c(-1, 1), n = 10, f, col = 'gray90', zlab = '', ...)
{
x <- y <- seq(min(domain), max(domain), length.out = n)
f <- if(missing(f)) quote(x + y) else match.call()$f
structure(list(x = x, y = y, f = as.function(c(alist(x=, y=), f)),
col = col, zlab = zlab, ...),
class = "myChart")
}
# plot method
plot.myChart <- function(x, ...) {
z = outer(x$x, x$y, x$f)
persp(x$x, x$y, z, col = x$col, zlab = x$zlab, ...)
}
# object call
chr1 <- chart(c(-6, 6),
n = 30,
sqrt(x^2 + y^2),
col = 'blue',
zlab = "Height")
chr2 <- chart(c(-5, 5),
n = 30,
x^2 + y^2,
col = 'green')
plot(chr1)
plot(chr2)
Note we even get a result with no arguments passed to chart at all.
chr3 <- chart()
plot(chr3)
Created on 2022-05-30 by the reprex package (v2.0.1)
Maybe this helps
chart <- function(domain, n, f, col, zlab, ...) {
x <- y <- seq(min(domain), max(domain), length.out = n)
fun <- function(x, y) {}
body(fun) <- substitute(f)
structure(c(list(x = x, y = y, f = fun, col=col, zlab = zlab),
c(...)), class = "myChart")
}
-testing
chr1 <- chart(c(-6, 6),
n = 30,
sqrt(x^2 + y^2),
col = 'blue',
zlab = "Height")
chr2 <- chart(c(-5, 5),
n = 30,
x^2 + y^2,
zlab = NULL,
col = 'green')
plot(chr1)
plot(chr2)

Add the new regression line but keep the regression lines from previous runs in R

Background
I have a function called TPN (R code is below the picture). When you run this function, it produces two plots (see picture below). The bottom-row plot samples from the top-row plot and then adds a red regression line. Each time you run the TPN function, the bottom-row plot produces a new red-colored regression line.
Question
In the bottom-row plot, I was wondering if there is a way I could KEEP the regression lines from previous runs each time I run the TPN function (see picture below)?
That is, each time that I run a new TPN function the regression line from a previous run is kept in its place (probably in a color other than "red" for distinction purposes), and the new regression line is just added to he bottom-row plot?
############## Input Values #################
TPN = function( each.sub.pop.n = 150,
sub.pop.means = 20:10,
predict.range = 10:0,
sub.pop.sd = .75,
n.sample = 2 ) {
#############################################
par( mar = c(2, 4.1, 2.1, 2.1) )
m = matrix( c(1, 2), nrow = 2, ncol = 1 ); layout(m)
set.seed(2460986)
Vec.rnorm <- Vectorize(function(n, mean, sd) rnorm(n, mean, sd), 'mean')
y <- c( Vec.rnorm(each.sub.pop.n, sub.pop.means, sub.pop.sd) )
set.seed(NULL)
x <- rep(predict.range, each = each.sub.pop.n)
plot(x, y, ylim = range(y)) ## Top-Row Plot
sample <- lapply(split(y, x), function(z) sample(z, n.sample, replace = TRUE))
sample <- data.frame(y = unlist(sample),
x = as.numeric(rep(names(sample), each = n.sample)))
x = sample$x ; y = sample$y
plot(x, y, ylim = range(y)) #### BOTTOM-ROW PLOT
abline(lm(y ~ x), col = 'red') # Regression Line
}
## TEST HERE:
TPN()
It ain't that easy. I made another function and edit the first one as well.
To summarize what I have done:
I made the first function to set par(new = TRUE) at the end of it. Also, set the color for points in the bottom row plot to be white only for formatting. You can get rid of col = 'white', bg = 'white' if you wish.
Then, in the second function top row plot does not get plotted and yaxis won't be added to the bottom row plot from each "test".
Look below:
############## Input Values #################
TPN = function( each.sub.pop.n = 150,
sub.pop.means = 20:10,
predict.range = 10:0,
sub.pop.sd = .75,
n.sample = 2 ) {
#############################################
par( mar = c(2, 4.1, 2.1, 2.1) )
m = matrix( c(1, 2), nrow = 2, ncol = 1 ); layout(m)
set.seed(2460986)
Vec.rnorm <- Vectorize(function(n, mean, sd) rnorm(n, mean, sd), 'mean')
y <- c( Vec.rnorm(each.sub.pop.n, sub.pop.means, sub.pop.sd) )
set.seed(NULL)
x <- rep(predict.range, each = each.sub.pop.n)
par(new = FALSE)
plot(x, y, ylim = range(y)) ## Top-Row Plot
sample <- lapply(split(y, x), function(z) sample(z, n.sample, replace = TRUE))
sample <- data.frame(y = unlist(sample),
x = as.numeric(rep(names(sample), each = n.sample)))
x = sample$x ; y = sample$y
plot(x, y, ylim = range(y), col = 'white', bg = 'white') #### BOTTOM-ROW PLOT
abline(lm(y ~ x), col = 'red') # Regression Line
par(new = TRUE)
}
The second one does not plot the top row one:
############## Input Values #################
TPN2 = function( each.sub.pop.n = 150,
sub.pop.means = 20:10,
predict.range = 10:0,
sub.pop.sd = .75,
n.sample = 2 ) {
#############################################
par( mar = c(2, 4.1, 2.1, 2.1) )
m = matrix( c(1, 2), nrow = 2, ncol = 1 ); layout(m)
set.seed(2460986)
Vec.rnorm <- Vectorize(function(n, mean, sd) rnorm(n, mean, sd), 'mean')
y <- c( Vec.rnorm(each.sub.pop.n, sub.pop.means, sub.pop.sd) )
set.seed(NULL)
x <- rep(predict.range, each = each.sub.pop.n)
#par(new = FALSE) #comment-out
#plot(x, y, ylim = range(y)) ##Top-Row Plot #comment-out
sample <- lapply(split(y, x), function(z) sample(z, n.sample, replace = TRUE))
sample <- data.frame(y = unlist(sample),
x = as.numeric(rep(names(sample), each = n.sample)))
x = sample$x ; y = sample$y
plot(x, y, ylim = range(y), axes = FALSE, col = 'white', bg = 'white') ##BOTTOM-ROW PLOT
abline(lm(y ~ x), col = 'blue') # Regression Line
par(new = TRUE)
}
Then your test would be like this:
## TEST HERE:
TPN()
TPN2()
TPN2()
TPN2()
This is the output:
A simple way to do what you want is to change your main effect (currently none) to return an accumulation of previous regressions and your side effect (plotting) to loop through these previous regressions (in blue) in addition to the current one (in red).
Another tip: you can use the abline(reg=lm(y~x)) argument and just accumulate the lm objects in a list. It's not necessary to store coefficients and intercepts separately as suggested in the other answer. Keeping the lm objects is also a good idea in case you want to go back and look at average R-squared, etc. -- you couldn't do that using only the coefficients.
Your new function could look like:
TPN.accum <- function( each.sub.pop.n = 150,
sub.pop.means = 20:10,
predict.range = 10:0,
sub.pop.sd = .75,
n.sample = 2,
lm.history = list() # the accumulator
){
par( mar = c(2, 4.1, 2.1, 2.1) )
m <- matrix( c(1, 2), nrow = 2, ncol = 1 ); layout(m)
set.seed(2460986)
Vec.rnorm <- Vectorize(function(n, mean, sd) rnorm(n, mean, sd), 'mean')
y <- c( Vec.rnorm(each.sub.pop.n, sub.pop.means, sub.pop.sd) )
set.seed(NULL)
x <- rep(predict.range, each = each.sub.pop.n)
plot(x, y, ylim = range(y)) ### Top-Row Plot
sample <- lapply(split(y, x), function(z) sample(z, n.sample, replace = TRUE))
sample <- data.frame(y = unlist(sample),
x = as.numeric(rep(names(sample), each = n.sample)))
x <- sample$x ; y <- sample$y
lm.current <- lm(y~x) # the current regression
plot(x, y, ylim = range(y)) ### Bottom-Row Plot
abline(reg = lm.current, col = 'red') # plot current regression (red)
for( i in seq_along(lm.history) ){
abline(reg=lm.history[[i]], col='blue') # plot any previous regressions (blue)
}
return(c(lm.history, list(lm.current))) # append current regression to accumulator
}
To initialize it and then run it repeatedly, just do something like:
tpn.history <- TPN.accum()
for (i in 1:5) tpn.history <- TPN.accum(lm.history=tpn.history)
And your output will look like:
I propose two possibilities:
Use par(mfg) to define on which panel to draw, so that you can add new points or lines on any of the two. For the color, I propose to add options saying if this is the first plot or the last plot of the series.
Store the coefficients of the abline to be used on other plots.
Use par(mfg)
I used some transparent color so that we do not see all superimposition of each iteration. Depending on what you want to achieve, you can modify this.
############## Input Values #################
TPN <- function(each.sub.pop.n = 150,
sub.pop.means = 20:10,
predict.range = 10:0,
sub.pop.sd = .75,
n.sample = 2,
plot = TRUE,
first = FALSE,
last = FALSE) {
#############################################
if (plot & first) {
plot.new()
m <- matrix( c(1, 2), nrow = 2, ncol = 1 ); layout(m)
par( mar = c(2, 4.1, 2.1, 2.1) )
}
set.seed(2460986)
Vec.rnorm <- Vectorize(function(n, mean, sd) rnorm(n, mean, sd), 'mean')
y <- c( Vec.rnorm(each.sub.pop.n, sub.pop.means, sub.pop.sd) )
set.seed(NULL)
x <- rep(predict.range, each = each.sub.pop.n)
if (plot) {
par(mfg = c(1,1)) ## Top-Row Plot
if (first) {
plot(x, y, ylim = range(y), col = "transparent")
} else if (last) {
plot(x, y, ylim = range(y))
}
}
sample <- lapply(split(y, x), function(z) sample(z, n.sample, replace = TRUE))
sample <- data.frame(y = unlist(sample),
x = as.numeric(rep(names(sample), each = n.sample)))
x = sample$x ; y = sample$y
if (plot) {
par(mfg = c(2,1)) #### BOTTOM-ROW PLOT
if (first) {
plot(x, y, ylim = range(y), col = "transparent")
}
if (last) {
points(x, y)
}
abline(lm(y ~ x), col = c('blue', 'red')[(last) + 1]) # Regression Line
}
}
## TEST HERE:
n <- 10
for (i in 1:n) {
TPN(first = ifelse(i == 1, TRUE, FALSE), last = ifelse(i == n, TRUE, FALSE))
}
Store the abline coefficients
There is no need of transparent color here because, a new plot is created for each iteration.
############## Input Values #################
TPN <- function(each.sub.pop.n = 150,
sub.pop.means = 20:10,
predict.range = 10:0,
sub.pop.sd = .75,
n.sample = 2,
plot = TRUE,
coefs = FALSE,
coefsup = NULL) {
#############################################
if (plot) {
m <- matrix( c(1, 2), nrow = 2, ncol = 1 )
layout(m)
par( mar = c(2, 4.1, 2.1, 2.1) )
}
set.seed(2460986)
Vec.rnorm <- Vectorize(function(n, mean, sd) rnorm(n, mean, sd), 'mean')
y <- c( Vec.rnorm(each.sub.pop.n, sub.pop.means, sub.pop.sd) )
set.seed(NULL)
x <- rep(predict.range, each = each.sub.pop.n)
if (plot) {
plot(x, y, ylim = range(y))
}
sample <- lapply(split(y, x), function(z) sample(z, n.sample, replace = TRUE))
sample <- data.frame(y = unlist(sample),
x = as.numeric(rep(names(sample), each = n.sample)))
x = sample$x ; y = sample$y
if (plot) {
plot(x, y, ylim = range(y))
# Add the previous lines if exists
if (!is.null(coefsup)) {
apply(coefsup, 1, function(x) abline(a = x[1], b = x[2], col = "blue"))
}
abline(lm(y ~ x), col = 'red') # Regression Line
}
if (coefs) {return(coef(lm(y ~ x)))}
}
# TEST with coefs
n <- 10
coefsup <- NULL
for (i in 1:n) {
coefsup <- rbind(coefsup, TPN(coefs = TRUE, coefsup = coefsup))
}
In both cases, the output is what you expect:

R function doesn't do what I want

I don't inderstand why my function doesn't work, can you help me to find my error.
VehiculeFunction <- function(data){
my.data <- data[data$GAMME =="M1",]
ma.col = rgb(red = 0.1,blue = 1,green = 0.1, alpha = 0.2)
X <- my.data$GMF.24
Y <- my.data$Cout.24
X11()
plot(X, Y, pch=20, las = 1, col = ma.col, xlim = c(0, 10), ylim = c(0,10))
identify(X, Y, labels = my.data$NITG, cex = 0.7)
}
This one works perfectly, and when I add two variables it returns "numeric(0)"
VehiculeFunction <- function(data, x, y){
my.data <- data[data$GAMME =="M1",]
ma.col = rgb(red = 0.1,blue = 1,green = 0.1, alpha = 0.2)
X <- my.data$x
Y <- my.data$y
X11()
plot(X, Y, pch=20, las = 1, col = ma.col, xlim = c(0, 10), ylim = c(0,10))
identify(X, Y, labels = my.data$NITG, cex = 0.7)
}
VehiculeFunction(data.vehicule, GMF.24, Cout.24)
numeric(0)
thank you Oleg, and if i want to add a condition on another variable of my dataframe, i want to take all the three first of "RANG_NITG_PROJET_K"
my.data1 <- my.data[my.data$RANG_NITG_PROJET_K == 1|2|3,] ?
but | think it's false because when i do this
my.data1 <- data.vehicule[data.vehicule$RANG_NITG_PROJET_K == 1,]
my.data2 <- data.vehicule[data.vehicule$RANG_NITG_PROJET_K == 2,]
my.data3 <- data.vehicule[data.vehicule$RANG_NITG_PROJET_K == 3,]
my.data <- rbind(my.data1, my.data2, my.data3)
it gives me two different dataframe ?
Try the following. You cannot access my.data columns (or list elements) using the $ operator in this case and you need to pass strings for x and y:
VehiculeFunction <- function(data, x, y, gamme){
my.data <- data[data$GAMME == gamme,]
ma.col = rgb(red = 0.1,blue = 1,green = 0.1, alpha = 0.2)
X <- my.data[[x]] # <- change from $ to [[]]
Y <- my.data[[y]] # <- change from $ to [[]]
X11()
plot(X, Y, pch=20, las = 1, col = ma.col, xlim = c(0, 10), ylim = c(0,10))
identify(X, Y, labels = my.data$NITG, cex = 0.7)
}
VehiculeFunction(data.vehicule, "GMF.24", "Cout.24", "M1") # <- change to strings

Warning message in predict.gam - why is this happening?

Reference: Page 22 of, http://www.statoek.wiso.uni-goettingen.de/mitarbeiter/ogi/pub/r_workshop.pdf
My question is self-contained in this post though. The data is in http://134.76.173.220/R_workshop
car <- read.table("car.dat", header = TRUE)
attach(car)
library(mgcv)
fit <- gam(MPG~s(SP))
plot(HP, MPG)
x <- seq(0, 350, length = 500)
y <- predict(fit, data.frame(HP = x))
lines(x, y, col = "red", lwd = 2)
Error im receiving:
Warning message:
In predict.gam(fit, data.frame(HP = x)) :
not all required variables have been supplied in newdata!
It seems to work if you scrap the 'attach' adjust the code accordingly and use SP as suggested by #GeorgeDontas
fit <- gam(MPG~s(SP), data=car)
plot(car$HP, car$MPG)
x <- seq(0, 350, length = 500)
y <- predict(fit, data.frame(SP = x))
lines(x, y, col = "red", lwd = 2)
I think it is supposed to be MPG~HP as that is a good fit and SP is poor (with default parameters)
par(mfrow=c(1,2))
fit <- gam(MPG~s(SP), data=car)
plot(car$SP, car$MPG)
x <- seq(0, 350, length = 500)
y <- predict(fit, data.frame(SP = x))
lines(x, y, col = "red", lwd = 2)
fit <- gam(MPG~s(HP), data=car)
plot(car$HP, car$MPG)
x <- seq(0, 350, length = 500)
y <- predict(fit, data.frame(HP = x))
lines(x, y, col = "red", lwd = 2)

Plot two size vector length with xyplot

Please note that this is a simplified version (and therefore duplicate of my earlier post):
https://stackoverflow.com/questions/18358694/xyplot-2-separate-data-frame-lengths
It may well be that it contained too much information, however quite basic was asked.
So here again:
I would like to plot 2 columns of different length with xyplot (only xyplot please).
The data:
Data <- data.frame(var1=rnorm(10,0,1),prob=seq(0.023,0.365,length=10))
Long <- data.frame(var2=rnorm(20,2,3))
How I would plot the Long (var2) vector of length 20 onto the plot of "Data" where (prob~var1) is plotted first.
You can use approx() inside your call to xyplot to interpolate the values of Data$prob after passing Long$var2 as a separate variable in the call. Notice the custom prepanel plot to adjust the limits.
lattice::xyplot(prob ~ var1, data = Data, z = Long$var2,
xlab = "Var1, Var2",
prepanel = function(x, y, z, ...) {
list(xlim = range(x, z),
ylim = range(y))
},
panel = function(x, y, z, ...) {
b <- approx(y, n = length(z))$y
panel.xyplot(x, y, ...)
panel.xyplot(z, b, col = "orange", ...)
})
EDIT: Actually, it is much cleaner to just reshape the data first.
dd <- rbind(data.frame(var = "var1",
val = Data$var1,
prob = Data$prob),
data.frame(var = "var2",
val = Long$var2,
prob = approx(Data$prob, n = 20)$y))
xyplot(prob ~ val, data = dd, groups = var, auto.key = TRUE)

Resources