function ifelse calculation - r

I am trying to write the function in R, but keep getting an error. Within 1 simulation run I generate random values from 2 intevals - in order to generate 2 different output values.
se.m if the input parameter lies within [0, 1]
se.st if the input parameter lies wiothin [1, 5]
(floating point in neglegible)
Then, these randomly generated values are used as input in the following function:
This is the code I have used:
fuchs08 <- function(n){
x.m=se.m=x.st=se.st=NULL
for(i in 1:n){
se.m[i] <- ifelse (runif(n = 1, min = 0, max = 1) < 0.33, 0, 0.12 * (x.m[i]^2) - 0.04 * x.m[i])
se.st[i] <- ifelse (runif(n = 1, min = 1, max = 5) < 3.06, 0.12 * (x.m[i]^2) - 0.04 * x.m[i], 1)
}
return(cbind(se.m, se.st))
}
I dont get any results. I believe the error is in the ifelse statement, but cannot find a solution to it.
> fuchs08(5)
se.m se.st
[1,] 0 NA
[2,] NA 1
[3,] NA 1
[4,] NA NA
[5,] 0 1
The overall idea is add this function to a list of functions called funktionen. Then I run a simulation 100 times. Simulation 1 randomly chooses a function from the list funktionen and executes it. (Function creates two outputs for the aforementioned intevals: se.m and se.st which are combined with the outputs from simulation 2:99) Therefore the function needs to be in the format: function(n) in order to run the random function selection. Here is my code for that part:
funktionen <- list(akbas, bell.glade, borter, fell.hartford, fuchs07, fuchs08)
fxn_list_sample <- sample(1:5, 100, replace=T)
fxn_list_result <- unlist(sapply(fxn_list_sample, function(x) do.call(funktionen[[x]], args=list(n=1))))
results <- as.data.frame(t(fxn_list_result))
colnames(results) <- c("se.m", "se.st")
results <- melt(results)
results$value <-round(results$value, 4)
separate(results, variable, into = c("Parameter", "Intensitaet")) %>%
mutate(Intensitaet = c(3, 2) [(Intensitaet == "m")+1])
Any suggestions how to fix this?

To answer your trouble with ifelse():
ifelse() needs three arguments to make sense (condition, yes, no). it works with only the condition if the condition evaluates to NA, hence the NA's in your results and works with two arguments if the condition evalutates to TRUE, hence the 1 in your resutls. As Konrad says in the comment, the use of ifelse seems redundant. For illustration:
> ifelse(1==1)
Error in ifelse(1 == 1) : argument "yes" is missing, with no default
> ifelse(NA)
[1] NA
> ifelse(1==1, 4)
[1] 4
> ifelse(1!=1, 4)
Error in ifelse(1 != 1, 4) : argument "no" is missing, with no default
> ifelse(1!=1, 4, 10)
[1] 10
Regarding your original problem, I am not sure if I understand you question correctly, but maybe this does what you want:
fuchs08 <- function(x){
ifelse(x<1/3, 0,
ifelse(x<=3.06, 0.12*x^2-0.04*x, 1))
}
fuchs08_with_n_inputs_two_outputcols <- function(n) {
df <- data.frame(input=runif(n, 0, 5))
df$se.m <- ifelse(df$input<1, fuchs08(df$input), NA)
df$se.st <- ifelse(df$input>1 & df$input<5, fuchs08(df$input), NA)
return(df)
}
fuchs08_with_n_inputs_two_outputcols(10)
Edit: replaced n by x to avoid confusion and added a second function after having read your answer (the name is long for the sake of clarity...). It is not the output in your answer but may easily be transformed to that. I think it would be helpfull to give an example of the output you want and which format it should have (data.frame, named vector...?)

I think ifelse & if-and-else are both awkward. You could try something like:
fuchs08<-function(n,min,max) {
x<-runif(n,min,max)
y<-x
y[x<1/3]<-0
y[x>=1/3 & x<=3.06]<-0.12*y[x>=1/3 & x<=3.06]^2-0.04*y[x>=1/3 & x<=3.06]
y[x>3.06]<-1
return(y)
}
(want<-cbind(fuchs08(100,0,1),fuchs08(100,1,5)))

This seems to work. However, not very elegant answer. Feel free to give me tipps to improve it, reduce rebundant elements, etc.
fuchs08 <- function(n) {
x.m=se.m=x.st=se.st=NULL
for(i in 1:n){
print(x.m[i] <- runif(n = 1, min = 0, max = 1))
se.m[i] <- ifelse (runif(n = 1, min = 0, max = 1) < 0.33, 0, 0.12 * x.m[i]^2 - 0.04* x.m[i])
print(x.st[i] <- runif(n = 1, min = 1, max = 5))
se.st[i] <- ifelse (runif(n = 1, min = 1, max = 5) < 3.06, 0.12 * x.st[i]^2 - 0.04* x.st[i], 1)
}
return(cbind(se.m, se.st))
}
fuchs08(10)
The whole code is:
library(reshape2)
library(stringr)
install.packages("dplyr")
install.packages("tidyr")
library(dplyr)
library(tidyr)
install.packages("data.table")
library(data.table)
# AKBAS u.a. (2009)
akbas <- function(n){
x.m=se.m=x.st=se.st=NULL
for(i in 1:n){
print(x.m[i] <- runif(n = 1, min = 0, max = 1))
se.m[i] <- 0.17 * (x.m[i]^2) - 0.03 * x.m[i]
print(x.st[i] <- runif(n = 1, min = 1, max = 5))
se.st[i] <- 0.17 * (x.st[i]^2) - 0.03 * x.st[i]
}
akbasr<-return(cbind(se.m, se.st))
}
# FUCHS u.a.(2007)
fuchs07 <- function(n){
x.m=se.m=x.st=se.st=NULL #solves indexing problem
for(i in 1:n){
print(x.m[i] <- runif(n = 1, min = 0, max = 1))
se.m[i] <- 0.11 * (x.m[i]^2) - 0.02 * x.m[i]
print(x.st[i] <- runif(n = 1, min = 1, max = 5))
se.st[i] <- 0.11 * (x.st[i]^2) - 0.02 * x.st[i]
}
return(cbind(se.m, se.st))
}
# BELL AND GLADE (2004)
bell.glade <- function(n){
x.m=se.m=x.st=se.st=NULL
for(i in 1:n){
se.m[i] <- ifelse (runif(n = 1, min = 0, max = 1) < 1, 0.2, 0.2)
se.st[i] <- ifelse (runif(n = 1, min = 0, max = 1) < 1, 0.5, 0.5)
}
return(cbind(se.m, se.st))
}
# BORTER (1999b,a)
borter <- function(n){
x.m=se.m=x.st=se.st=NULL
for(i in 1:n){
se.m[i] <- ifelse (runif(n = 1, min = 0, max = 1) < 1, 0.1, 0.1)
se.st[i] <- ifelse (runif(n = 1, min = 0, max = 1) < 1, 0.5, 0.5)
}
return(cbind(se.m, se.st))
}
# FELL UND HARTFORD (1997)
fell.hartford <- function(n){
x.m=se.m=x.st=se.st=NULL
for(i in 1:n){
se.m[i] <- ifelse (runif(n = 1, min = 0, max = 1) < 0.25, 0.1, 0.4)
se.st[i] <- ifelse (runif(n = 1, min = 1, max = 5) < 1.5, 0.4, 0.7)
}
return(cbind(se.m, se.st))
}
# FUCH (2008, 2009)
fuchs08 <- function(n) {
x.m=se.m=x.st=se.st=NULL
for(i in 1:n){
print(x.m[i] <- runif(n = 1, min = 0, max = 1))
se.m[i] <- ifelse (runif(n = 1, min = 0, max = 1) < 0.33, 0, 0.12 * x.m[i]^2 - 0.04* x.m[i])
print(x.st[i] <- runif(n = 1, min = 1, max = 5))
se.st[i] <- ifelse (runif(n = 1, min = 1, max = 5) < 3.06, 0.12 * x.st[i]^2 - 0.04* x.st[i], 1)
}
return(cbind(se.m, se.st))
}
funktionen <- list(akbas, bell.glade, borter, fell.hartford, fuchs07, fuchs08)
fxn_list_sample <- sample(1:5, 100, replace=T)
fxn_list_result <- unlist(sapply(fxn_list_sample, function(x) do.call(funktionen[[x]], args=list(n=1))))
results <- as.data.frame(t(fxn_list_result))
colnames(results) <- c("se.m", "se.st")
results <- melt(results)
results$value <-round(results$value, 4)
separate(results, variable, into = c("Parameter", "Intensitaet")) %>%
mutate(Intensitaet = c(3, 2) [(Intensitaet == "m")+1])
write.csv(results, "murgang-test.csv")

Related

Operations between list objects in R with apply function

I have a 1x5 matrix A and a list of 5 B with matrices (double [100x100]) in R. Each argument of A corresponds to one B and I want to create the following function
C = ( max(A[,i],A[,j]) * B[[i]] * B[[j]] ) / 2
for example to calculate C between 1 and 2 I can use the following
set.seed(123)
A <- matrix(c(5.2,6.9,32,40,8.3), ncol = 5 )
B <- list(matrix(rnorm(100 * 100, mean = 0, sd = 1), 100, 100),
matrix(rnorm(100 * 100, mean = 0, sd = 1), 100, 100),
matrix(rnorm(100 * 100, mean = 0, sd = 1), 100, 100),
matrix(rnorm(100 * 100, mean = 0, sd = 1), 100, 100),
matrix(rnorm(100 * 100, mean = 0, sd = 1), 100, 100))
C_1.2<- max(A[,1],A[,2]) * (unlist(B[[1]])*unlist(B[[2]]))
I want to create a list in R that does the above for all possible combinations with the use of an apply function? I want the names to be C_j.j. I manage to it as follows:
combinations <- combn(1:5, 2)
result_list <- lapply(1:ncol(combinations), function(i){
j <- combinations[1, i]
k <- combinations[2, i]
C <- (max(A[, j], A[, k]) * B[[j]] * B[[k]]) / 2
list(C)
})
names(result_list) <- paste0("C_", combinations[1, ], ".", combinations[2, ])
However I can't produce the C_1.1, C_2.2,C_3.3,C_4.4,C_5.5. How can I fix that? I am thinking when i=j, then to calculate C_i.i <- (A[,i] * B[[i]]^2) / 2 .
A solution based on the answers is
out2 <- apply(expand.grid(seq_along(B), seq_along(B)), 1, \(i)
(max(A[, i]) * (B[[i[1]]] * B[[i[2]]])), simplify = FALSE)
names(out2) <- paste0("C_", do.call(paste,
c(expand.grid(seq_along(B), seq_along(B)), sep = ".")))
or
C <- lapply(1:5, function(i) {
lapply(1:5, function(j) {
if (i == j) {
C_i.i <- (A[,i] * B[[i]]^2)
} else {
C_i.j <- max(A[,i],A[,j]) * (unlist(B[[i]])*unlist(B[[j]]))
}
})
})
which however creates all possible scenarios and doubles the computational time since we know that C_1.2 = C_2.1 and ton calculation is needed.
We may do this directly in combn
out <- combn(seq_along(B), 2, FUN = function(i)
(max(A[, i]) * (B[[i[1]]] * B[[i[2]]])), simplify = FALSE)
names(out) <- paste0("C_", combn(seq_along(B), 2, FUN = paste, collapse = "."))
-checking the output with OP's output
> C_1.2<- max(A[,1],A[,2]) * (unlist(B[[1]])*unlist(B[[2]]))
> all.equal(out[[1]], C_1.2)
[1] TRUE
If we want all combinations, use expand.grid
out2 <- apply(expand.grid(seq_along(B), seq_along(B)), 1, \(i)
(max(A[, i]) * (B[[i[1]]] * B[[i[2]]])), simplify = FALSE)
names(out2) <- paste0("C_", do.call(paste,
c(expand.grid(seq_along(B), seq_along(B)), sep = ".")))

MCMC for estimating negative binomial distribution

I want to estimate parameters of negative binomial distribution using MCMC Metropolis-Hastings algorithm. In other words, I have sample:
set.seed(42)
y <- rnbinom(20, size = 3, prob = 0.2)
and I want to write algorithm that will estimate parameter of size and parameter of prob.
My work so far
I defined prior distribution of size as Poisson:
prior_r <- function(r) {
return(dpois(r, lambda = 2, log = T))
}
And prior distribution of prob as uniform on [0, 1]:
prior_prob <- function(prob) {
return(dunif(prob, min = 0, max = 1, log = T))
}
Moreover for simplicity I defined loglikelihood and joint probability functions:
loglikelihood <- function(data, r, prob) {
loglikelihoodValue <- sum(dnorm(data, mean = r, sd = prob, log = T))
return(loglikelihoodValue)
}
joint <- function(r, prob) {
data <- y
return(loglikelihood(data, r, prob) + prior_r(r) + prior_prob(prob))
}
Finally, the whole algorithm:
run_mcmc <- function(startvalue, iterations) {
chain <- array(dim = c(iterations + 1, 2))
chain[1, ] <- startvalue
for (i in 1:iterations) {
proposal_r <- rpois(1, lambda = chain[i, 1])
proposal_prob <- chain[i, 2] + runif(1, min = -0.2, max = 0.2)
quotient <- joint(proposal_r, proposal_prob) - joint(chain[i, 1], chain[i, 2])
if (runif(1, 0, 1) < min(1, exp(quotient))) chain[i + 1, ] <- c(proposal_r, proposal_prob)
else chain[i + 1, ] <- chain[i, ]
}
return(chain)
}
The problem
Problem that I'm having is that when I run it with starting values even very close to correct ones:
iterations <- 2000
startvalue <- c(4, 0.25)
res <- run_mcmc(startvalue, iterations)
I'll obtain posterior distribution which is obviously wrong. For example
> colMeans(res)
[1] 11.963018 0.994533
As you can see, size is located very close to point 12, and probability is located in point 1.
Do you know what's the cause of those phenomeons?
Change dnorm in loglikelihood to dnbinom and fix the proposal for prob so it doesn't go outside (0,1):
set.seed(42)
y <- rnbinom(20, size = 3, prob = 0.2)
prior_r <- function(r) {
return(dpois(r, lambda = 2, log = T))
}
prior_prob <- function(prob) {
return(dunif(prob, min = 0, max = 1, log = TRUE))
}
loglikelihood <- function(data, r, prob) {
loglikelihoodValue <- sum(dnbinom(data, size = r, prob = prob, log = TRUE))
return(loglikelihoodValue)
}
joint <- function(r, prob) {
return(loglikelihood(y, r, prob) + prior_r(r) + prior_prob(prob))
}
run_mcmc <- function(startvalue, iterations) {
chain <- array(dim = c(iterations + 1, 2))
chain[1, ] <- startvalue
for (i in 1:iterations) {
proposal_r <- rpois(1, lambda = chain[i, 1])
proposal_prob <- chain[i, 2] + runif(1, min = max(-0.2, -chain[i,2]), max = min(0.2, 1 - chain[i,2]))
quotient <- joint(proposal_r, proposal_prob) - joint(chain[i, 1], chain[i, 2])
if (runif(1, 0, 1) < min(1, exp(quotient))) {
chain[i + 1, ] <- c(proposal_r, proposal_prob)
} else {
chain[i + 1, ] <- chain[i, ]
}
}
return(chain)
}
iterations <- 2000
startvalue <- c(4, 0.25)
res <- run_mcmc(startvalue, iterations)
colMeans(res)
#> [1] 3.1009495 0.1988177

Why does function return NULL?

A beginner in R over here, so apologies for the basic question.
Why does ATE return a null vector instead of saving the values of the difference of the means?
fun.cluster <- function(M, N){
set.seed(02139)
J <- 1:M # vector J_i
df <- as.data.frame(matrix(data=1:N, nrow = N, ncol = 1)) #data frame of all original values
df$cluster <- cut(df$V1, M, labels = 1:M) #breaking the dataframe into clusters
df$cluster <- as.numeric(df$cluster)
Y1 <- as.vector(sample(J, 5)) # assigning treatment
df$treatment <- ifelse(df$cluster %in% Y1, df$treatment <- 1, df$treatment <- 0)
#Inducing intracluster correlation:
mu_0j <- runif(n = 50, min = -1, max = 1)
df$V1[df$treatment==0] <- mu_0j
mu_1j <- runif(n=50, min = -0.5, max = 1.5)
df$V1[df$treatment==0] <- mu_1j
# drawing values
y_0i <- rnorm(n = 50, mean = mu_0j, sd = 1)
y_1i <- rnorm(n = 50, mean = mu_1j, sd = 1)
D_i <- as.vector(c(y_0i, y_1i))
# calculating ATE:
ATE[i] <- mean(y_1i - y_0i)
}
ATE <- c()
for(i in 1:10){
fun.cluster(M = 10, N = 100)
}

Count with Varying Levels Parameters in R

I want R to count how many times my simulated ARIMA data conform to ARIMA(1,0,0) which I have achieved with:
library(forecast)
library(forecast)
cnt <- 0
num <- 60
phi <- 0.8
for(i in 1:10) {
epselon <- rnorm(num, mean=0, sd=1)
ar1 <- arima.sim(n = num, model=list(ar=phi, order = c(1, 0, 0)),
sd=1)
ar2 <- auto.arima(ar1)
if(all(arimaorder(ar2) == c(1, 0, 0))) cnt <- cnt + 1}
cnt
The above is just for a single case when sd=1, n=60, and ar=0.8.
I want a case when I have varying levels of N <- c(15, 20), SD <- c(1, 2) ^ 2, and phi = c(0.8, 0.9) for sample size, standard diviation and AR parameter respectively.
I have traid this:
library(forecast)
N <- c(15, 20)
SD <- c(1, 2) ^ 2
phi = c(0.8, 0.9)
## generate all combos
all_combos <- expand.grid(N = N, SD = SD, phi = phi)
epselon = function(n) rnorm(n, mean = 0, sd = SD)
## create function
fx_arima <- function(n, SD, phi) {
cnt <- 0
num <- 60
phi <- 0.8
for(i in 1:10) {
epselon <- rnorm(num, mean=0, sd=1)
ar1 <- arima.sim(n = num, model=list(ar=phi, order = c(1, 0, 0)), sd=1)
ar2 <- auto.arima(ar1)
if(all(arimaorder(ar2) == c(1, 0, 0))) cnt <- cnt + 1}
cnt
}
## find arima for all combos using Map
set.seed(123L)
res = Map(fx_arima, all_combos[["N"]], all_combos[["SD"]],
all_combos[["phi"]])
## or a little bit more work:
set.seed(123L)
res2 = by(all_combos, all_combos["N"],
function(DF) {
res = mapply(fx_arima, DF[["N"]], DF[["SD"]], DF[["phi"]])
colnames(res) = paste("SD", DF[["SD"]], "phi", DF[["phi"]], sep = "_")
res
})
res2
## write to csv
Map(function(file, DF) write.csv(DF, paste0("N_", file, ".csv")),
names(res2), res2)
which I mirror from arima.sim() function with varying: sample sizes, phi values and sd values and R Count How Many Time auto.arima() Confirmarima.sim() to be True
I got this error message
Error in `colnames<-`(`*tmp*`, value = c("SD_1_phi_0.2", "SD_4_phi_0.2", : attempt to set 'colnames' on an object with less than two dimensions
Traceback:
How can I solve this such that will have my result to show in varying form suuch that first row will be the label while the second row will be the count itself. The result will in two sheets; the first will be for 'N=15' and the second will be for 'N=20'.
If I understood your problem correctly, the error comes from function colnames because your function does not return a "pure" matrix-like object. If, instead, you use the function names in your last chunk of code as follows:
res2 = by(all_combos, all_combos["N"],
function(DF) {
res = mapply(fx_arima, DF[["N"]], DF[["SD"]], DF[["phi"]])
names(res) = paste("SD", DF[["SD"]], "phi", DF[["phi"]], sep = "_")
return(res)
})
res2
You will get:
> res2
N: 15
SD_1_phi_0.8 SD_4_phi_0.8 SD_1_phi_0.9 SD_4_phi_0.9
1 3 7 5
---------------------------------------------------------------------------
N: 20
SD_1_phi_0.8 SD_4_phi_0.8 SD_1_phi_0.9 SD_4_phi_0.9
3 4 5 2
With elements accessible by name and index:
> res2$`15`["SD_1_phi_0.8"]
SD_1_phi_0.8
1
> res2$`15`[1]
SD_1_phi_0.8
1

How to create a vector when using For Loop?

I am trying to conduct a two sided sign test from 10,000 random normal samples of size 30. I am trying to extract the p-values given from the binom.test and put them into a vector but can't quite figure out how to execute this.
set.seed(100)
sample <- matrix(rnorm(300000, mean=0.1, sd=1), 10000, 30)
success <- ifelse(sample>=0, 1, 0)
success
#sample[1,]
#success[1,]
#sum(success[1,])
#for loop
for(i in 1:10000){
pvalue<- binom.test(sum(success[i,]), 30, p=0.5,
alternative = c("two.sided"),
conf.level = 0.95)$p.value
p_values_success <- ifelse(pvalue<=0.05, 1, 0)
}
I guess what you are trying to do is
pvalue <- numeric(length = 1000L)
p_values_success <- numeric(length = 1000L)
for(i in 1:10000) {
pvalue[i] <- binom.test(sum(success[i,]), 30, p=0.5,
alternative = c("two.sided"),
conf.level = 0.95)$p.value
p_values_success[i] <- ifelse(pvalue[i]<=0.05, 1, 0)
}
However, if I had to rewrite you code completely from scratch I would do
set.seed(100)
sample <- matrix(rnorm(300000, mean=0.1, sd=1), 10000, 30)
success[] <- as.integer(sample >=0)
t(apply(success, 1, function(x) {
p_val <- binom.test(sum(x), 30, p=0.5,alternative = c("two.sided"),
conf.level = 0.95)$p.value
c(p_val, as.integer(p_val<=0.05))
}))
This will return a 2-column matrix where 1st column is pvalue and the second one is p_values_success.
You could also do:
apply(success, 1,
FUN = function(x)
ifelse(
binom.test(sum(x), 30, p = 0.5,
alternative = "two.sided", conf.level = 0.95)$p.value <= 0.05, 1, 0
)
)

Resources