How to generate this R function for random effect model? - r

I'm trying to create a code such that Y = 5g1(X1) + 3g2(X2) + 4g3(X3) + 6g4(X4) + sqrt(1.74)*eps (the functions g, are defined in the code).
X = (X1,...,Xp) should be an nxp dimensional design matrix, however I'm not sure about how to generate that based on this information where Xj = W+U is simulated according to a random effects model. I tried using X = do.call(cbind, replicate(p, X, simplify=FALSE)) but this just replicates each Xj, i'm not sure that's what should be done, they should be different.
Any advice on what i have missed would be appreciated and any improvements on the code too to make it more concise.
n<- 400
p<- 1000
W = runif(n)
U = runif(n)
eps = rnorm(n)
for (j in 1:p){
X = W+U
X = as.matrix(X)
return(X)} #This is a nx1 matrix...
#alternatively write: X = do.call(cbind, replicate(p, X, simplify=FALSE))
g1 = X
g2 = (2*X-1)^2
g3 = sin(2*pi*X)/(2-sin(2*pi*X))
g4 = 0.1*sin(2*pi*X) + 0.2*cos(2*pi*X) + 0.3*sin(2*pi*X)^2 + 0.4*cos(2*pi*X)^3 + 0.5*sin(2*pi*X)^3
Y = 5*g1 + 3*g2 + 4*g3 + 6*g4 + sqrt(1.74)*eps
return(Y)
}

I am not sure to capture the logic of your calculation, eventually it is something like this:
n <- 40 # 400
p <- 100 # 1000
X <- replicate(p, runif(n) + runif(n)) ## W+U
y <- function(X) {
g1 <- X
g2 <- (2*X-1)^2
g3 <- sin(2*pi*X)/(2-sin(2*pi*X))
g4 <- 0.1*sin(2*pi*X) + 0.2*cos(2*pi*X) + 0.3*sin(2*pi*X)^2 + 0.4*cos(2*pi*X)^3 + 0.5*sin(2*pi*X)^3
eps <- rnorm(length(X))
Y <- 5*g1 + 3*g2 + 4*g3 + 6*g4 + sqrt(1.74)*eps
return(Y)
}
Y <- apply(X, 2, FUN=y)
Also the variant without apply() works:
Y <- y(X)
To compare both variants:
set.seed(42)
Y1 <- apply(X, 2, FUN=y)
set.seed(42)
Y2 <- y(X)
identical(Y1, Y2)

Related

How to depict a graph of an implicit differentiation equation on R?

I'm now learning about calculus and want to depict a graph of x^2 + 6x + y^4 = 7, which I can using online graphing tool desmos.
But when I'm not sure how this is achievable on R. The first thing I thought is convert it in a form of y = f(x), but return (x^2 + 6*x - 7)^(1/4) gave me a different result.
At the same time, it seems impossible to return a equation in a function (return (x^2 + 6*x + y^4 = 7)). So how can I depict it on R?
Here is a sample code I usually use to depict a continuous graph.
f <- function(x) {
return () # return an equation
}
ggplot(data.frame(x=seq(-10,10,length.out=10)), aes(x)) + stat_function(fun=f)
You can have separate functions for the positive and negative solutions for y
f1 <- function(x) (7 - x^2 - 6*x)^(1/4)
f2 <- function(x) -f1(x)
Now just create a vector each for positive and negative values along the domain of x:
x <- seq(-7, 1, length = 1000)
y1 <- f1(x)
y2 <- f2(x)
And plot:
ggplot() +
geom_line(aes(x, y1)) +
geom_line(aes(x, y2))
You can use contourLines:
f <- function(x,y) x^2 + 6*x + y^4
x <- seq(-10, 3, len = 200)
y <- seq(-3, 3, len = 200)
z <- outer(x, y, f)
cr <- contourLines(x, y, z, levels = 7)
plot(cr[[1]]$x, cr[[1]]$y, type = "l")
library(ggplot2)
dat <- data.frame(x = cr[[1]]$x, y = cr[[1]]$y)
ggplot(dat) + geom_path(aes(x, y))

Add a Passing-Bablok regression line

I have to perform many comparisons between different measurement methods and I have to use the Passing-Bablok regression approach.
I would like to take advantage of ggplot2 and faceting, but I don't know how to add a geom_smooth layer based on the Passing-Bablok regression.
I was thinking about something like: https://stackoverflow.com/a/59173260/2096356
Furthermore, I would also need to show the regression line equation, with confidence interval for intercept and slope parameters, in each plot.
Edit with partial solution
I've found a partial solution combining the code provided in this post and in this answer.
## Regression algorithm
passing_bablok.fit <- function(x, y) {
x_name <- deparse(substitute(x))
lx <- length(x)
l <- lx*(lx - 1)/2
k <- 0
S <- rep(NA, lx)
for (i in 1:(lx - 1)) {
for (j in (i + 1):lx) {
k <- k + 1
S[k] <- (y[i] - y[j])/(x[i] - x[j])
}
}
S.sort <- sort(S)
N <- length(S.sort)
neg <- length(subset(S.sort,S.sort < 0))
K <- floor(neg/2)
if (N %% 2 == 1) {
b <- S.sort[(N+1)/2+K]
} else {
b <- sqrt(S.sort[N / 2 + K]*S.sort[N / 2 + K + 1])
}
a <- median(y - b * x)
res <- as.vector(c(a,b))
names(res) <- c("(Intercept)", x_name)
class(res) <- "Passing_Bablok"
res
}
## Computing confidence intervals
passing_bablok <- function(formula, data, R = 100, weights = NULL){
ret <- boot::boot(
data = model.frame(formula, data),
statistic = function(data, ind) {
data <- data[ind, ]
args <- rlang::parse_exprs(colnames(data))
names(args) <- c("y", "x")
rlang::eval_tidy(rlang::expr(passing_bablok.fit(!!!args)), data, env = rlang::current_env())
},
R=R
)
class(ret) <- c("Passing_Bablok", class(ret))
ret
}
## Plotting confidence bands
predictdf.Passing_Bablok <- function(model, xseq, se, level) {
pred <- as.vector(tcrossprod(model$t0, cbind(1, xseq)))
if(se) {
preds <- tcrossprod(model$t, cbind(1, xseq))
data.frame(
x = xseq,
y = pred,
ymin = apply(preds, 2, function(x) quantile(x, probs = (1-level)/2)),
ymax = apply(preds, 2, function(x) quantile(x, probs = 1-((1-level)/2)))
)
} else {
return(data.frame(x = xseq, y = pred))
}
}
An example of usage:
z <- data.frame(x = rnorm(100, mean = 100, sd = 5),
y = rnorm(100, mean = 110, sd = 8))
ggplot(z, aes(x, y)) +
geom_point() +
geom_smooth(method = passing_bablok) +
geom_abline(slope = 1, intercept = 0)
So far, I haven't been able to show the regression line equation, with confidence interval for intercept and slope parameters (as +- or in parentheses).
You've arguably done with difficult part with the PaBa regression.
Here's a basic solution using your passing_bablok.fit function:
z <- data.frame(x = 101:200+rnorm(100,sd=10),
y = 101:200+rnorm(100,sd=8))
mycoefs <- as.numeric(passing_bablok.fit(x = z$x, y=z$y))
paba_eqn <- function(thecoefs) {
l <- list(m = format(thecoefs[2], digits = 2),
b = format(abs(thecoefs[1]), digits = 2))
if(thecoefs[1] >= 0){
eq <- substitute(italic(y) == m %.% italic(x) + b,l)
} else {
eq <- substitute(italic(y) == m %.% italic(x) - b,l)
}
as.character(as.expression(eq))
}
library(ggplot2)
ggplot(z, aes(x, y)) +
geom_point() +
geom_smooth(method = passing_bablok) +
geom_abline(slope = 1, intercept = 0) +
annotate("text",x = 110, y = 220, label = paba_eqn(mycoefs), parse = TRUE)
Note the equation will vary because of rnorm in the data creation..
The solution could definitely be made more slick and robust, but it works for both positive and negative intercepts.
Equation concept sourced from: https://stackoverflow.com/a/13451587/2651663

How to set a logarithmic scale across multiple ggplot2 contour plots?

I am attempting to create three contour plots, each illustrating the following function applied to two input vectors and a fixed alpha:
alphas <- c(1, 5, 25)
x_vals <- seq(0, 25, length.out = 100)
y_vals <- seq(0, 50, length.out = 100)
my_function <- function(x, y, alpha) {
z <- (1 / (x + alpha)) * (1 / (y + alpha))
}
for each alpha in the vector alphas, I am creating a contour plot of z values—relative to the minimal z value—over x and y axes.
I do so with the following code (probably not best practices; I'm still learning the basics with R):
plots <- list()
for(i in seq_along(alphas)) {
z_table <- sapply(x_vals, my_function, y = y_vals, alpha = alphas[i])
x <- rep(x_vals, each = 100)
y <- rep(y_vals, 100)
z <- unlist(flatten(list(z_table)))
z_rel <- z / min(z)
d <- data.frame(cbind(x, y, z_rel))
plots[[i]] <- ggplot(data = d, aes(x = x, y = y, z = z_rel)) +
geom_contour_filled()
}
When alpha = 1:
When alpha = 25:
I want to display these plots in one grouping using ggarrange(), with one logarithmic color scale (as relative z varies so much from plot to plot). Is there a way to do this?
You can build a data frame with all the data for all alphas combined, with a column indicating the alpha, so you can facet your graph:
I basically removed the plot[[i]] part, and stacked up the d's created in the former loop:
d = numeric()
for(i in seq_along(alphas)) {
z_table <- sapply(x_vals, my_function, y = y_vals, alpha = alphas[i])
x <- rep(x_vals, each = 100)
y <- rep(y_vals, 100)
z <- unlist(flatten(list(z_table)))
z_rel <- z / min(z)
d <- rbind(d, cbind(x, y, z_rel))}
d = as.data.frame(d)
Then we create the alphas column:
d$alpha = factor(paste("alpha =", alphas[rep(1:3, each=nrow(d)/length(alphas))]),
levels = paste("alpha =", alphas[1:3]))
Then build the log scale inside the contour:
ggplot(data = d, aes(x = x, y = y, z = z_rel)) +
geom_contour_filled(breaks=round(exp(seq(log(1), log(1400), length = 14)),1)) +
facet_wrap(~alpha)
Output:

using grid.arrange with loop holding multiple plots

I am trying to create a plot where for each i there is a density graph and a histogram side by side. For this instance i = 1..3
The problem I have is creating the list to pass to grid.arrange. However I do it it seems to repeat itself somehow.
df:
x1 x2 x3
1 108.28 17.05 1484.10
2 152.36 16.59 750.33
3 95.04 10.91 766.42
4 65.45 14.14 1110.46
5 62.97 9.52 1031.29
6 263.99 25.33 195.26
7 265.19 18.54 193.83
8 285.06 15.73 191.11
9 92.01 8.10 1175.16
10 165.68 11.13 211.15
X <- df
mu.X <- colMeans(X)
cov.X <- cov(X)
eg <- eigen(cov.X)
myprinboot = function(
X,
iter = 10000,
alpha = 0.05,
prettyPlot = T
){
# Find the dimensions of X
nrX <- dim(X)[1]
nx <- dim(X)[2]
# Make matrices of suitable sizes to hold the booted parameter estimates
# lambda
# each cov matrix will have nx lambdas
lambda.mat <- matrix(NA, nr = nx, nc = iter)
# e vectors nx components each and one vector per eigen value
# Each cov matrix will therefore produce a nx X nx matrix of components
Y.mat <- matrix(NA, nr = nx, nc = iter * nx)
# For loop to fill the matrices created above
for (i in 1:iter)
{
# ind will contain random integers used to make random samples of the X matrix
# Must use number of rows nrX to index
ind <- sample(1:nrX,nrX,replace=TRUE)
# eigen will produce lambdas in decreasing order of size
# make an object then remove extract the list entries using $
eigvalvec <- eigen(cov(X[ind,]))
lambda.mat[,i] <- eigvalvec$values
colstart <- 1 + nx * (i - 1)
colend <- colstart + nx - 1
Y.mat[,colstart:colend] = eigvalvec$vectors
}
if(prettyPlot){
p <- list()
i <- 0
for(j in 1:(2*nx))
{
if (j %% 2 == 0){
p[[j]] <- ggplot(NULL, aes(lambda.mat[i,])) +
geom_histogram(color = 'black', fill = 'green', alpha = .5) +
xlab(substitute(lambda[i])) +
ggtitle(substitute(paste("Histogram of the pc variance ", lambda[i])))
} else {
i <- i + 1
p[[j]] <- ggplot(NULL, aes(lambda.mat[i,])) +
geom_density(fill = 'blue', alpha = .5) +
xlab((substitute(lambda[i]))) +
ggtitle(substitute(paste("Density plot of the pc variance ", lambda[i])))
}
do.call(grid.arrange, p)
}
do.call(grid.arrange, p)
} else {
layout(matrix(1:(2*nx),nr=nx,nc=2,byrow=TRUE))
for(i in 1:nx)
{
plot(density(lambda.mat[i,]),xlab=substitute(lambda[i]),
main=substitute(paste("Density plot of the pc variance ", lambda[i])
))
hist(lambda.mat[i,],xlab=substitute(lambda[i]),
main=substitute(paste("Histogram of the pc variance ", lambda[i])))
}
}
library(rgl)
plot3d(t(lambda.mat))
list(lambda.mat = lambda.mat, Y.mat = Y.mat)
}
pc <- myprinboot(X = Y, iter=1000, alpha=0.5)
Output
Anyone have any clue what I am doing wrong or is this just not possible?
I don't understand your code, Jay, as it seems to do lots of things and use both base and ggplot plotting, but if all you want is to create a combined histogram and density plot for each j, why not loop over j and inside that for j loop do something like this:
d <- your density plot created so that it depends on j only
h <- your histogram plot created so that it depends on j only
p[[j]] <- grid.arrange(d,h,ncol=2)
Then, when you come out of the loop, you'll have an object p which consists of a list of plots, with each plot consisting of a combination of density plot and histogram.
Then you could use the cowplot package (after installing it) to do something like this:
cowplot::plot_grid(plotlist = p, ncol = 2)
where the number of columns may need to be changed. See here for other ways to plot a list of plots: How do I arrange a variable list of plots using grid.arrange?
I don't know enough about your problem to understand why you treat the case of j even and j odd differently. But the underlying idea should be the same as what I suggested here.
I eventually got this working as follows.
getHist <- function(x, i){
lam <- paste('$\\lambda_', i, '$', sep='')
p <- qplot(x[i,],
geom="histogram",
fill = I('green'),
color = I('black'),
alpha = I(.5),
main=TeX(paste("Histogram of the pc variance ", lam, sep='')),
xlab=TeX(lam),
ylab="Count",
show.legend=F)
return(p)
}
getDens <- function(x, i){
lam <- paste('$\\lambda_', i, '$', sep='')
p <- qplot(x[i,],
geom="density",
fill = I('blue'),
alpha = I(.5),
main=TeX(paste("Density plot of the pc variance ", lam, sep='')),
xlab=TeX(lam),
ylab="Density",
show.legend=F)
return(p)
}
fp <- lapply(1:3, function(x) arrangeGrob(getHist(lambda.mat, x), getDens(lambda.mat, x), ncol=2))
print(marrangeGrob(fp, nrow = 3, ncol=1, top = textGrob("Lambda.mat Histogram and Density Plot",gp=gpar(fontsize=18))))

Q-Q plot with ggplot2::stat_qq, colours, multiple groups with Q-Q lines

I need to do something similar to what's shown in this excellent question:
Q-Q plot with ggplot2::stat_qq, colours, single group
but unfortunately there's a slight difference which is blocking me. Unlike the original question, I do want to separate the quantile computations by group, but I also want to add a QQ-line for each group. Following the OP's code, I can create the quantile-quantile plots by group:
library(dplyr)
library(ggplot2)
library(broom) ## for augment()
set.seed(1001)
N <- 1000
G <- 10
dd <- data_frame(x = runif(N),
group = factor(sample(LETTERS[1:G], size=N, replace=TRUE)),
y = rnorm(N) + 2*x + as.numeric(group))
m1 <- lm(y~x, data=dd)
dda <- cbind(augment(m1), group=dd$group)
sample_var <- "y"
group_var <- "group"
p <- ggplot(dda)+stat_qq(aes_string(sample=sample_var, colour=group_var))
p
How can I add the quantile-quantile lines for each group? NOTE: ideally I would like to specify the sample column and the group column at runtime. That's why I used aes_string.
EDIT to better clarify my problem, I add code to compute quantile-quantile lines when there's only one group. I need to generalize the code to multiple groups.
library(dplyr)
library(ggplot2)
library(broom) ## for augment()
# this section of the code is the same as before, EXCEPT G = 1, because for
# now the code only works for 1 group
set.seed(1001)
N <- 1000
G <- 1
dd <- data_frame(x = runif(N),
group = factor(sample(LETTERS[1:G], size=N, replace=TRUE)),
y = rnorm(N) + 2*x + as.numeric(group))
m1 <- lm(y~x, data=dd)
dda <- cbind(augment(m1), group=dd$group)
sample_var <- "y"
group_var <- "group"
# code to compute the slope and the intercept of the qq-line: basically,
# I would need to compute the slopes and the intercepts of the qq-lines
# for each group
vec <- dda[, sample_var]
y <- quantile(vec[!is.na(vec)], c(0.25, 0.75))
x <- qnorm(c(0.25, 0.75))
slope <- diff(y)/diff(x)
int <- y[1] - slope * x[1]
# now plot with ggplot2
p <- ggplot(dda)+stat_qq(aes_string(sample=sample_var, colour=group_var))+geom_abline(slope = slope, intercept = int)
p
Turning the code to calculate the qqlines into a function and then using lapply to create a separate data.frame for your qqlines is one approach.
library(dplyr)
library(ggplot2)
library(broom) ## for augment()
set.seed(1001)
N <- 1000
G <- 3
dd <- data_frame(x = runif(N),
group = factor(sample(LETTERS[1:G], size=N, replace=TRUE)),
y = rnorm(N) + 2*x + as.numeric(group))
m1 <- lm(y~x, data=dd)
dda <- cbind(augment(m1), group=dd$group)
sample_var <- "y"
group_var <- "group"
# code to compute the slope and the intercept of the qq-line
qqlines <- function(vec, group) {
x <- qnorm(c(0.25, 0.75))
y <- quantile(vec[!is.na(vec)], c(0.25, 0.75))
slope <- diff(y)/diff(x)
int <- y[1] - slope * x[1]
data.frame(slope, int, group)
}
slopedf <- do.call(rbind,lapply(unique(dda$group), function(grp) qqlines(dda[dda$group == grp,sample_var], grp)))
# now plot with ggplot2
p <- ggplot(dda)+stat_qq(aes_string(sample=sample_var, colour=group_var)) +
geom_abline(data = slopedf, aes(slope = slope, intercept = int, colour = group))
p
A more concise alternative. In ggplot2 v.3.0.0 and above you can use stat_qq_line:
ggplot(dda, aes(sample = y, colour = group)) +
stat_qq() +
stat_qq_line()
Output:
Data, from Jeremy Voisey's answer:
library(ggplot2)
library(broom)
set.seed(1001)
N <- 1000
G <- 3
dd <- data_frame(
x = runif(N),
group = factor(sample(LETTERS[1:G], size = N, replace = TRUE)),
y = rnorm(N) + 2 * x + as.numeric(group)
)
m1 <- lm(y ~ x, data = dd)
dda <- cbind(augment(m1), group = dd$group)

Resources