How to create a plot of power of test in R? - r

I want to create a comparison for normal test with Shapiro-Wilks, Kolmogorov-Smirnov, Anderson-Darling, Cramer von Mises dan Adjusted Jarque-Bera methods based on the power of test (1-beta) on sample sizes n = 10,20,30,40 and 50.
testnormal=function(n,m,alfa)
{
require(nortest)
require(normtest)
require(xlsx)
pvalue=matrix(0,m,5)
decision=matrix(0,m,5)
for (i in 1:m)
{
data=runif(n,2,5)
test1=shapiro.test(data)
pv1=test1$p.value
pvalue[i,1]=pv1
if (pv1<alfa)
{
decision[i,1]=1
}
test2=ks.test(data,"pnorm",mean=mean(data),sd=sd(data))
pv2=test2$p.value
pvalue[i,2]=pv2
if (pv2<alfa)
{
decision[i,2]=1
}
test3=ad.test(data)
pv3=test3$p.value
pvalue[i,3]=pv3
if (pv3<alfa)
{
decision[i,3]=1
}
test4=cvm.test(data)
pv4=test4$p.value
pvalue[i,4]=pv4
if (pv4<alfa)
{
decision[i,4]=1
}
test5=ajb.norm.test(data)
pv5=test5$p.value
pvalue[i,5]=pv5
if (pv2<alfa)
{
decision[i,5]=1
}
}
result1=data.frame(pvalue)
result2=data.frame(decision)
colnames(result1)=c("SW","KS","AD","CvM","AJB")
colnames(result2)=c("SW","KS","AD","CvM","AJB")
write.xlsx(result1,"testnormal_pvalue.xlsx")
write.xlsx(result2,"testnormal_decision.xlsx")
one_min_beta=t(1-(colSums(decision)/m))
test.of.power=data.frame(one_min_beta)
colnames(test.of.power)=c("SW","KS","AD","CvM","AJB")
return(test.of.power)
}
simulation=testnormal(10,100,0.05)
simulation2=testnormal(20,100,0.05)
simulation3=testnormal(30,100,0.05)
simulation4=testnormal(40,100,0.05)
simulation5=testnormal(50,100,0.05)
output=rbind(simulation,simulation2,simulation3,simulation4,simulation5)
output
I want to graph the power of the test to see trends in the up and down trend of the power of the test over the sample size, anyone can help please?

I went through your code and rewrote along the way to better understand what you want (what is the excel stuff for?). I have broken it down to smaller functions to allow you to have more control in these kinds of simulation studies. The code is not particularly efficient.
But does this give you what you want?
library("nortest")
library("normtest")
library("dplyr")
library("ggplot2")
# Function for doing all tests and putting it into a data.frame
tests <- function(data) {
list_of_tests <- list(
SW = shapiro.test(data),
KS = ks.test(data, pnorm, mean = mean(data), sd = sd(data)),
AD = ad.test(data) ,
CMV = cvm.test(data),
AJB = ajb.norm.test(data)
)
# Combine to tibble
res <- bind_rows(lapply(list_of_tests, unclass))
res[c("method", "p.value")] # Keep only method and p-value cols
}
# Test it with e.g. 'tests(data = runif(8, 2, 5))'
# Function for repeated simulation and testing, combine results and derive power
testnormal <- function(n, m, alpha) {
# Important that runif is inside replicate
test_res <-
bind_rows(replicate(tests(data = runif(n, 2, 5)), n = m,
simplify = FALSE))
test_of_powers <-
test_res %>%
group_by(method) %>%
summarize(power = mean(p.value < alpha)) %>%
mutate(n = n, m = m, alpha = alpha)
return(test_of_powers)
}
# Repeat over a number of simulations:
sims <- expand.grid(n = c(10, 20, 30, 40, 50),
m = 1000,
alpha = 0.05)
output <- bind_rows(
mapply(testnormal, n = sims$n, m = sims$m, alpha = sims$alpha,
SIMPLIFY = FALSE)
)
Actually doing the plot:
# Plot it
ggplot(output, aes(x = n, y = power, col = method)) +
geom_line()
This way should make it easier to plot as well as making simulations over other grids of values (e.g. varying alpha) or expand your range of n, etc.

Related

FDA with R: How to get max() of curve in fda-object?

I have some functional data as fda-object. Now I got the first derivative and want to have the coordinates of the maximum value of each single curve. How is this possible?
For better understanding I include some fictional data I took from here:
library(fdaoutlier);
library(fda);
set.seed(95139);
n_obs <- 50;
n_curves <- 100
mod4 <- simulation_model4(n = n_curves, p = n_obs, outlier_rate = .5, seed = 50, plot = FALSE)
index1 <- mod4$true_outliers;
curves_mat <- mod4$data;
n_order = 4;
knots = c(seq(0,n_obs,5))
n_basis = length(knots) + n_order - 2;
spline_basis = create.bspline.basis(rangeval = c(0,n_obs), nbasis = n_basis, norder = n_order)
df1 <- curves_mat[index1,]
df1_obj <- Data2fd(argvals = 1:n_obs, y = t(df1), basisobj = spline_basis, lambda = 0.5)
So, how can I get the coordinates of the maximum value of each single curve of df1_obj?
Some kind of workaround, maybe somebody can add a better solution:
eval.fd() gives a discrete representation of the curves, and so one can get a maximum of them.
fine_df1 <- eval.fd(seq(0,50,length=500),df1_obj);
max_df1 <- array(NA,2);
for(c in c(1:dim(fine_df1)[2])){
cur <- fine_df1[,c];
m <- max(cur);
i <- which(cur %in% m);
max_df1 <- rbind(max_df1, c(i,m));
}
max_df1 <- max_df1[2:dim(max_df1)[1],];
plot(max_df1);

Computing Economic Models in R: How to apply shocks to parameter values in the euler equation?

Hi everyone im using R to try and simulate some economic models. We do this primarily through the use of the euler equation. I've figured out that applying shocks to values which are defined within the function (in this case it is k is pretty simple as seen in the code below, however I'm interested in applying a shock to parameters like delta, theta and rho.
For what its worth I'm using the R package deSolve. Any help is appreciated.
library('deSolve')
##############################################
#Computing the neoclassical growth model in R#
##############################################
#parameters and state space
A<-1
theta<- 0.1
alpha<-0.5
delta<-0.3
rho<-0.9
kinital <- c(k = 1)
times <- seq(from = 0, to = 100, by = 0.2)
#define euler equation
euler <- function(t, k, parms)
list((1/theta)*alpha*A*k^(alpha-1)-delta-rho)
#Compute
out <- ode(y = kinital, times = times, func = euler,
parms = NULL)
plot(out, main = "Euler equation", lwd = 2)
#########################
#Temporary Capital Shock#
########################
eventdat <- data.frame(var = c("k"),
time = c(30) ,
value = c(10),
method = c("add"))
eventdat1 <- data.frame(var = c("k"),
time = c(30) ,
value = c(-5),
method = c("add"))
out3<-ode(y=kinital,times=times,func=euler,events=list(data=eventdat))
out4<-ode(y=kinital,times=times,func=euler,events=list(data=eventdat1))
plot(out,out3,out4,main="Temporary Shock",lwd=3)
Not a great fix but the way to deal with this type of problem is by conditioning your values to take place over some interval. I do this for depreciation as follows:
##############################
#Temporary Depreciation Shock#
##############################
#New Vars
A<-1
theta<- 0.1
alpha<-0.5
delta<-0.3
rho<-0.9
kinital <- c(k = 17)
times <- seq(from = 0, to = 400, by = 0.2)
#Redefine Euler
euler2<-function(t,k,prams){
list((1/theta)*alpha*A*k^(alpha-1)-delta-rho)}
euler3<-function(t,k,prams){
list((1/theta)*alpha*A*k^(alpha-1)-(delta+0.05*(t>=30&t<=40))-rho)}
#Output
doutbase<-ode(y=kinital,times=times, func=euler2, parms=NULL)
doutchange<-ode(y=kinital,times=times, func=euler3, parms=NULL)
#plots
plot(doutbase,doutchange,main="Change in depreciation at t=30 until t=40",lwd=2)
A colleague off of stackexchange suggested a cleaner bit of code which is a bit cleaner. This is seen below:
A<-1
theta<- 0.1
alpha <- 0.5
rho<-0.9
init <- c(k = 17, delta = 0.3)
times <- seq(from = 0, to = 400, by = 0.2)
euler.function<-function(t,y, prams){
k <- y[1]
delta <- y[2]
dk <- (1/theta)*alpha*A*k^(alpha-1)-delta-rho
list(c(dk, 0))}
deventdat<- data.frame(var = c("delta", "delta"),
time = c(30, 51) ,
value = c(0.1, -0.1),
method = c("add"))
res<-ode(y=init,times=times, func=euler.function, parms=NULL, events=list(data=deventdat))
plot(res,lwd=2)

Plotting statistical power vs replicates and calculating mean of coefficients

I need to plot the statistical power vs. the number of replicates and in this case the number of replicates (n) is 3, but I can't figure out how to plot it.
This is what I have:
library(car)
n <- 3
nsims <- 1000
p = coef = vector()
for (i in 1:nsims) {
treat <- rnorm(n, mean = 460, sd = 110)
cont <- rnorm(n, mean = 415, sd = 110)
df <- data.frame(
y = c(treat, cont),
x = rep(c("treat", "cont"), each = n)
)
model <- glm(y ~ x, data = df)
p[i] = Anova(model)$P
coef[i] = coef(model)[2]
}
hist(p, col = 'skyblue')
sum(p < 0.05)/nsims
Can someone help me plot this?
Also, I need to calculate the mean of the coefficients using only models where p < 0.05. This is simulating the following process: if you perform the experiment, and p > 0.05, you report 'no effect’, but if p < 0.05 you report ‘significant effect’. But I'm not sure how to set that up from what I have.
Would I just do this?
mean(coef)
But I don't know how to include only those with p < 0.05.
Thank you!
Disclaimer: I spend a decent amount of time simulating experiments for work so I have strong opinions on this.
If that's everything because it's for a study assignment then fine, if you are planning to go further with this I recommend
adding the tidyverse to your arsenal.
Encapsulating functionality
First allows me to put a single iteration into a function to decouple its logic from the result subsetting (the encapsulation).
sim <- function(n) {
treat <- rnorm(n, 460, 110)
cont <- rnorm(n, 415, 110)
data <- data.frame(y = c(treat, cont), x = rep(c("treat", "cont"), each = n))
model <- glm(y ~ x, data = data)
p <- car::Anova(model)$P
coef <- coef(model)[2]
data.frame(n, p, coef)
}
Now we can simulate
nsims <- 1000
sims <- do.call(
rbind,
# We are now using the parameter as opposed to the previous post.
lapply(
rep(c(3, 5, 10, 20, 50, 100), each = nsims),
sim
)
)
# Aggregations
power_smry <- aggregate(p ~ n, sims, function(x) {mean(x < 0.05)})
coef_smry <- aggregate(coef ~ n, sims[sims$p < 0.05, ], mean)
# Plots
plot(p ~ n, data = power_smry
If you do this in the tidyverse this is one possible approach
crossing(
n = rep(c(3, 5, 10, 20, 50, 100))
# Add any number of other inputs here that you want to explore (like lift).
) %>%
rowwise() %>%
# This looks complicated but will be less so if you have multiple
# varying hyperparameters defined in crossing.
mutate(results = list(bind_rows(rerun(nsims, sim(n))))) %>%
pull(results) %>%
bind_rows() %>%
group_by(n) %>%
# The more metrics you want to summarize in different ways the easier compared to base.
summarize(
power = mean(p < 0.05),
coef = mean(coef[p < 0.05])
)

Function that will generate iter samples of size n from a gamma distribution with shape parameter alpha and rate parameter beta

The function needs to return the mean and standard deviation of each sample.
This is what I have:
sample_gamma <- function(alpha, beta, n, iter) {
mean = alpha/beta
var = alpha/(beta)^2
sd = sqrt(var)
gamma = rgamma(n,shape = alpha, scale = 1/beta)
sample_gamma = data.frame(mean = replicate(n = iter, expr = mean))
}
I'm very lost for this. I also need to create a data frame for this function.
Thank you for your time.
Edit:
sample_gamma <- function(alpha, beta, n, iter) {
output <- rgamma(iter, alpha, 1/beta)
output_1 <- matrix(output, ncol = iter)
means <- apply(output_1, 2, mean)
sds <- apply(output_1, 2, sd)
mystats <- data.frame(means, sds)
return(mystats)
}
This works except for the sds. It's returning NAs.
It's not really clear to me what you want. But say you want to create 10 samples of size 1000, alpha = 1, beta = 2. Then you can create a single stream of rgamma realizations, dimension them into a matrix, then get your stats with apply, and finally create a data frame with those vectors:
output <- rgamma(10*1000, 1, 1/2)
output <- matrix(output, ncol = 10)
means <- apply(output, 2, mean)
sds <- apply(output, 2, sd)
mystats <- data.frame(means, sds)
You could wrap your function around that code, replacing the hard values with parameters.

rgenoud - How to pass parameters to the function?

I have a function that currently plays nice with rgenoud. It has one parameter (xx) and rgenoud will optimize xx perfectly.
However, I would like to add a second parameter to my function that wouldnt be optimized by rgendoud . For example, I would like my function to either fit a model with a gaussian link or a poisson link and to specify that when I call rgenoud.
Any idea?
thanks
edit: here is a minimal working example of what I mean. How would you get the last line to work?
adstock reflect the fact that TV advertising should have an impact on the number of quotes of future weeks.
Adstock[t] = Ads[t] + rate* Ads[t-1] + rate^2*Ads[t-2] + .... + rate^max_memory * Ads[t-max_memory]
We want rgenoud to figure out what rate and max_memory will return the model with the best fit. Best fit is defined as the lowest RMSE.
set.seed(107)
library(fpp)
library(rgenoud)
adstock_k <- function(x, adstock_rate = 0, max_memory = 12){
learn_rates <- rep(adstock_rate, max_memory+1) ^ c(0:max_memory)
adstocked_advertising <- stats::filter(c(rep(0, max_memory), x), learn_rates, method="convolution")
adstocked_advertising <- adstocked_advertising[!is.na(adstocked_advertising)]
return(as.numeric(adstocked_advertising))
}
getRMSE <- function(x, y) {
mean((x-y)^2) %>% sqrt
}
df <- data.frame(insurance) %>%
mutate(Quotes = round (Quotes*1000, digits = 0 ))
df$idu <- as.numeric(rownames(df))
my_f <- function(xx){
adstock_rate <- xx[1]
adstock_memory <- xx[2]
df.temp <- df %>%
mutate(adstock = adstock_k(TV.advert, adstock_rate/100, adstock_memory ))
mod <- lm(data=df.temp, Quotes ~ adstock )
getRMSE( df.temp$Quotes, predict(mod))
}
domaine <- cbind(c(30,1), c(85, 8))
#this works
min_f <- genoud(my_f, nvars = 2, max = F, pop.size=1000, wait.generations=10, Domains = domaine, data.type.int = T)
#here I try to add a second parameter to the function.
my_f2 <- function(xx,first_n_weeks=20){
adstock_rate <- xx[1]
adstock_memory <- xx[2]
df.temp <- df %>%
filter(idu<= first_n_weeks) %>%
mutate(adstock = adstock_k(TV.advert, adstock_rate/100, adstock_memory ))
mod <- lm(data=df.temp, Quotes ~ adstock )
getRMSE( df.temp$Quotes, predict(mod))
}
#this doesnt work
min_f2 <- genoud(my_f2(first_n_week=10), nvars = 2, max = F, pop.size=1000, wait.generations=10, Domains = domaine, data.type.int = T)
Include the argument in the call to genoud, e.g.
genoud(my_f2, nvars = 2, max = F, pop.size=1000, wait.generations=10, Domains = domaine, data.type.int = T, first_n_weeks = 10)

Resources