I am working on data replacement in a matrix. The replaced data will be calculated by calculating the sd of the (1+2k)X(1+2k) matrix centered on the value.
replace.loop = function(n, m, k, pad){
#search the value row by row, column by column
for (i in n) {
for (j in m) {
pad[i,j] = sd(as.vector(pad[(i-k):(i+k),(j-k):(j+k)]))
}
}
return(pad) #return the matrix that finishing calculation
}
Is there any way to rewrite this function with any apply() function? I am an R starter learner, so I am not sure which apply() function I should use.
for example:
X = matrix(c(.5,.5,.4,.4,.3,.5,.5,.4,.3,.3,.4,.4,.3,.2,.2,.4,.4,.3,.2,.1,.3,.3,.2,.2,.1), ncol=5)
k = 1
pad.X = matrix(0, dim(X)[1]+2*k, dim(X)[2]+2*k)
n = (k+1):(dim(X)[1]+k); m = (k+1):(dim(X)[2]+k)
pad.X[n, m] = X
Thanks!
I created a copy of pad called padOut. mapply will return the results as a matrix, which you can then assign to the relevant portion of padOut using matrix indexing:
replace.apply = function(n, m, k, pad){
kk <- -k:k
idx <- expand.grid(n, m)
padOut <- pad
padOut[as.matrix(idx)] <- mapply(function(i) sd(pad[idx[i,1] + kk, idx[i, 2] + kk]), 1:(length(m)*length(n)))
return(padOut)
}
With your example data:
X = matrix(c(.5,.5,.4,.4,.3,.5,.5,.4,.3,.3,.4,.4,.3,.2,.2,.4,.4,.3,.2,.1,.3,.3,.2,.2,.1), ncol=5)
k = 1
pad.X = matrix(0, dim(X)[1]+2*k, dim(X)[2]+2*k)
n = (k+1):(dim(X)[1]+k); m = (k+1):(dim(X)[2]+k)
pad.X[n, m] = X
replace.loop = function(n, m, k, pad){
#search the value row by row, column by column
padOut <- pad
for (i in n) {
for (j in m) {
padOut[i,j] = sd(as.vector(pad[(i-k):(i+k),(j-k):(j+k)]))
}
}
return(padOut) #return the matrix that finishing calculation
}
pad1 <- replace.loop(n, m, k, pad.X)
pad2 <- replace.apply(n, m, k, pad.X)
identical(pad1, pad2)
#> [1] TRUE
Related
below is a function to extract p-values from multiple Chi-Square tests and display them as a matrix. I'm trying to do the same, but to extract residuals instead. Any help is appreciated.
Sample data:
df <- data.frame(first_column = c(rep("E1_C1",5), rep("E1_C2",3), rep("E2_C2",7),rep("E3_C3",5)),
second_column = c(rep("E1_C1",3), rep("E1_C2",10), rep("E2_C2",4),rep("E3_C3",3)),
third_column = c(rep("E1_C1",7), rep("E1_C2",4), rep("E2_C2",3),rep("E3_C3",6)),
fourth_column = c(rep("E1_C1",4), rep("E1_C2",6), rep("E2_C2",6),rep("E3_C3",4))
)
Chi-square matrix function for P-Values:
chisqmatrix <- function(x) {
names = colnames(x); num = length(names)
m = matrix(nrow=num,ncol=num,dimnames=list(names,names))
for (i in 1:(num-1)) {
for (j in (i+1):num) {
#browser()
m[j,i] = chisq.test(x[, i, drop = TRUE],x[, j, drop = TRUE])$p.value
}
}
return (m)
}
Generate Chi-Square p-value matrix
res <- chisqmatrix(df)
res[, -ncol(res)]
In your case, the returned residuals is a 4x4 matrix. Instead of using a matrix to take the results, the following solution uses a list instead. This way you can have matrices of different sizes.
With minimal changes from your original code:
chisqlist <- function(x) {
names = colnames(x); num = length(names)
m = list()
index = 1
for (i in 1:(num-1)) {
for (j in (i+1):num) {
#browser()
m[[index]] = chisq.test(x[, i, drop = TRUE],x[, j, drop = TRUE])$residuals
index=index+1
}
}
return (m)
}
Edit:
I do prefer # Onyambu's answer, which I didn't see. It would be faster than a nested for loop.
Simply change your function from requesting $p.value to requesting $residuals. This will provide (observed - expected) / sqrt(expected). If you desire standardized residuals request $stdres.
chisqmatrix <- function(x) {
names = colnames(x); num = length(names)
m = matrix(nrow=num,ncol=num,dimnames=list(names,names))
for (i in 1:(num-1)) {
for (j in (i+1):num) {
#browser()
m[j,i] = chisq.test(x[, i, drop = TRUE],x[, j, drop = TRUE])$residuals
}
}
return (m)
}
i need to be able to use the 20 vectors in later caluclations ,so i want to create a 20x20 matrix which i can store each line from my code into a row,or if you can suggest a better way to be able to reference the code.
lambda=12
mu=2
answer = numeric(20)
k = 0
for (i in 1:20) {
Pi_i <- numeric(i)
m <- 1:i
k = k+((1/factorial(i)*(lambda/mu)^i))
for (j in m) {
Pi_i[j] = (k^-1)*((lambda/mu)^j)/factorial(j)
}
cat(Pi_i,"\n\n")
}
You could create an empty 20 x 20 matrix and write each vector into the ith row. You cannot have empty cells in a matrix, so you will have to append the correct number of NA values to the end of each vector:
lambda=12
mu=2
answer = numeric(20)
k = 0
result <- matrix(0, ncol = 20, nrow = 20)
for (i in 1:20) {
Pi_i <- numeric(i)
m <- 1:i
k = k+((1/factorial(i)*(lambda/mu)^i))
for (j in m) {
Pi_i[j] = (k^-1)*((lambda/mu)^j)/factorial(j)
}
result[i,] <- c(Pi_i, rep(NA, 20 - i))
}
result
I'm trying to create a function that simulate dolling m dice, n times, calculate the minimum of the outcome of the dice in each roll and then calculate the mean of the minimum and store the values into a vector.
mindice = function(n = 10, m = 3)
{
v <- vector
for(i in 1:10)
{
minima= numeric(n)
for(i in 1:n)
{
minima[i] = min(sample(6,m,replace=T))
}
v[i] = mean(minima)
}
v
}
I'm calling the function like that: mindice()
and the output is: Error in v[i] <- mean(minima) : object of type 'closure' is not subsettable
any help will be appreciated
I don't know if this fits your expectations. This function returns both the vector for the minimum per roll and their average
mindice <- function(n = 10, m = 3)
{
minima <- vector("numeric")
for(i in 1:n){
minima = c(minima, min(sample(6,m,replace=T)))
}
list(minimum_values=minima,
mean_minima=mean(minima))
}
mindice()
The comment about using v = vector() rather than v = vector should solve the bug in the way you've written the function. If you're interested in a more R-like way to do this function, try this:
mindice2 = function(n = 10, m = 3) {
rolls = sample(6, size = m * n, replace = TRUE)
rolls = matrix(rolls, nrow = m)
minima = apply(rolls, MARGIN = 2, FUN = min)
cum_mean = cumsum(minima) / seq_along(minima)
return(cum_mean)
}
Rather than sampling each roll separately, we do all the rolls at once (more efficient), and then put them in a matrix where each column is a roll of m dice. We can then use apply to find the minima, and cumsum to calculate the mean minimum after each roll.
I am new to R and trying to build a accumulative binomial distribution table and got stuck in the loop.
r = readline("please enter an interger n:")
p = seq(from = 0.1, to = 1,by = 0.1 )
r = seq(from = 0, to = 100)
n <- ""
for (each in r) {
x=qbinom(x,r,p)
}
print(x)
As an alternate to the loop: you can use expand.grid to create all permutations of k and p, and further avoid the loop as pbinom can take vectors.
# Create input values
p = 1:9/10
k = 0:25
n = 25
# Create permutations of `k` and `p`: use this to make grid of values
ex <- expand.grid(p=p, k=k)
# Find probabilities for each value set
ex$P <- with(ex, pbinom(k, n, p ))
# Reshape to your required table format
round(reshape2::dcast(k ~ p, data=ex, value.var = "P"), 3)
Loop approach
# Values to match new example
p = 1:19/20
k = 0:25
n = 4
# Create matrix to match the dimensions of our required output
# We will fill this as we iterate through the loop
mat1 <- mat2 <- matrix(0, ncol=length(p), nrow=length(k), dimnames=list(k, p))
# Loop through the values of k
# We will also use the fact that you can pass vectors to `pbinom`
# so for each value of `k`, we pass the vector of `p`
# So we will update each row of our output matrix with
# each iteration of the loop
for(i in seq_along(k)){
mat1[i, ] <- pbinom(k[i], n, p)
}
Just for completeness, we could of updated the columns of our output matrix instead - that is for each value of p pass the vector k
for(j in seq_along(p)){
mat2[, j] <- pbinom(k, n, p[j])
}
# Check that they give the same result
all.equal(mat1, mat2)
Functions like f=tanh the derivative can be written as df=f*(delta(i,j)-f). When g is a vector or column matrix, this can programmed as follows using a loop, transpose, and apply to calculate df.
set.seed(5)
g1<-matrix(rnorm(3),ncol=1)
f1<-tanh(g1)
df1a = matrix(NA,nrow=nrow(f1),ncol=nrow(f1))
df1b = matrix(NA,nrow=nrow(f1),ncol=nrow(f1))
for (i in seq(1,nrow(f1))) {
for (j in seq(1,nrow(f1))) {
df1a[i,j] = -f1[j]*f1[i]
df1b[i,j] = -f1[j]*f1[i]
}
df1b[i,i] = df1b[i,i]+f1[i]
}
df2a <- -f1 %*% t(f1)
df2b <- diag(as.list(f1))+df2a
df3a <- (-1)*apply(f1,1,'*',f1)
df3b <- diag(as.list(f1))+df3a
does_m1_m2_match = all.equal(df1b, df2b, tolerance = 1e-5)
does_m1_m3_match = all.equal(df1b, df3b, tolerance = 1e-5)
does_m1_m2_match
does_m1_m3_match
How to extend this when g is a matrix, and should be treated as a collection of column vectors. Here is the loop implementation. How to do the apply implementation?
ddf2a = array(NA,c(nrow(f2),nrow(f2),ncol(f2)))
ddf2b = array(NA,c(nrow(f2),nrow(f2),ncol(f2)))
for (k in seq(1,ncol(f2))) {
for (i in seq(1,nrow(f2))) {
for (j in seq(1,nrow(f2))) {
ddf2a[i,j,k] = -f2[j]*f2[i]
ddf2b[i,j,k] = -f2[j,k]*f2[i,k]
}
ddf2b[i,i,k] = ddf2b[i,i,k]+f2[i,k]
}
}
for (k in seq(1,ncol(f2))) {
does_m1_md_match = all.equal(df1b, ddf2b[,,k], tolerance = 1e-5)
print(paste('k',k,does_m1_md_match))
}