FDA with R: How to get max() of curve in fda-object? - r

I have some functional data as fda-object. Now I got the first derivative and want to have the coordinates of the maximum value of each single curve. How is this possible?
For better understanding I include some fictional data I took from here:
library(fdaoutlier);
library(fda);
set.seed(95139);
n_obs <- 50;
n_curves <- 100
mod4 <- simulation_model4(n = n_curves, p = n_obs, outlier_rate = .5, seed = 50, plot = FALSE)
index1 <- mod4$true_outliers;
curves_mat <- mod4$data;
n_order = 4;
knots = c(seq(0,n_obs,5))
n_basis = length(knots) + n_order - 2;
spline_basis = create.bspline.basis(rangeval = c(0,n_obs), nbasis = n_basis, norder = n_order)
df1 <- curves_mat[index1,]
df1_obj <- Data2fd(argvals = 1:n_obs, y = t(df1), basisobj = spline_basis, lambda = 0.5)
So, how can I get the coordinates of the maximum value of each single curve of df1_obj?

Some kind of workaround, maybe somebody can add a better solution:
eval.fd() gives a discrete representation of the curves, and so one can get a maximum of them.
fine_df1 <- eval.fd(seq(0,50,length=500),df1_obj);
max_df1 <- array(NA,2);
for(c in c(1:dim(fine_df1)[2])){
cur <- fine_df1[,c];
m <- max(cur);
i <- which(cur %in% m);
max_df1 <- rbind(max_df1, c(i,m));
}
max_df1 <- max_df1[2:dim(max_df1)[1],];
plot(max_df1);

Related

Computing Economic Models in R: How to apply shocks to parameter values in the euler equation?

Hi everyone im using R to try and simulate some economic models. We do this primarily through the use of the euler equation. I've figured out that applying shocks to values which are defined within the function (in this case it is k is pretty simple as seen in the code below, however I'm interested in applying a shock to parameters like delta, theta and rho.
For what its worth I'm using the R package deSolve. Any help is appreciated.
library('deSolve')
##############################################
#Computing the neoclassical growth model in R#
##############################################
#parameters and state space
A<-1
theta<- 0.1
alpha<-0.5
delta<-0.3
rho<-0.9
kinital <- c(k = 1)
times <- seq(from = 0, to = 100, by = 0.2)
#define euler equation
euler <- function(t, k, parms)
list((1/theta)*alpha*A*k^(alpha-1)-delta-rho)
#Compute
out <- ode(y = kinital, times = times, func = euler,
parms = NULL)
plot(out, main = "Euler equation", lwd = 2)
#########################
#Temporary Capital Shock#
########################
eventdat <- data.frame(var = c("k"),
time = c(30) ,
value = c(10),
method = c("add"))
eventdat1 <- data.frame(var = c("k"),
time = c(30) ,
value = c(-5),
method = c("add"))
out3<-ode(y=kinital,times=times,func=euler,events=list(data=eventdat))
out4<-ode(y=kinital,times=times,func=euler,events=list(data=eventdat1))
plot(out,out3,out4,main="Temporary Shock",lwd=3)
Not a great fix but the way to deal with this type of problem is by conditioning your values to take place over some interval. I do this for depreciation as follows:
##############################
#Temporary Depreciation Shock#
##############################
#New Vars
A<-1
theta<- 0.1
alpha<-0.5
delta<-0.3
rho<-0.9
kinital <- c(k = 17)
times <- seq(from = 0, to = 400, by = 0.2)
#Redefine Euler
euler2<-function(t,k,prams){
list((1/theta)*alpha*A*k^(alpha-1)-delta-rho)}
euler3<-function(t,k,prams){
list((1/theta)*alpha*A*k^(alpha-1)-(delta+0.05*(t>=30&t<=40))-rho)}
#Output
doutbase<-ode(y=kinital,times=times, func=euler2, parms=NULL)
doutchange<-ode(y=kinital,times=times, func=euler3, parms=NULL)
#plots
plot(doutbase,doutchange,main="Change in depreciation at t=30 until t=40",lwd=2)
A colleague off of stackexchange suggested a cleaner bit of code which is a bit cleaner. This is seen below:
A<-1
theta<- 0.1
alpha <- 0.5
rho<-0.9
init <- c(k = 17, delta = 0.3)
times <- seq(from = 0, to = 400, by = 0.2)
euler.function<-function(t,y, prams){
k <- y[1]
delta <- y[2]
dk <- (1/theta)*alpha*A*k^(alpha-1)-delta-rho
list(c(dk, 0))}
deventdat<- data.frame(var = c("delta", "delta"),
time = c(30, 51) ,
value = c(0.1, -0.1),
method = c("add"))
res<-ode(y=init,times=times, func=euler.function, parms=NULL, events=list(data=deventdat))
plot(res,lwd=2)

How to create a plot of power of test in R?

I want to create a comparison for normal test with Shapiro-Wilks, Kolmogorov-Smirnov, Anderson-Darling, Cramer von Mises dan Adjusted Jarque-Bera methods based on the power of test (1-beta) on sample sizes n = 10,20,30,40 and 50.
testnormal=function(n,m,alfa)
{
require(nortest)
require(normtest)
require(xlsx)
pvalue=matrix(0,m,5)
decision=matrix(0,m,5)
for (i in 1:m)
{
data=runif(n,2,5)
test1=shapiro.test(data)
pv1=test1$p.value
pvalue[i,1]=pv1
if (pv1<alfa)
{
decision[i,1]=1
}
test2=ks.test(data,"pnorm",mean=mean(data),sd=sd(data))
pv2=test2$p.value
pvalue[i,2]=pv2
if (pv2<alfa)
{
decision[i,2]=1
}
test3=ad.test(data)
pv3=test3$p.value
pvalue[i,3]=pv3
if (pv3<alfa)
{
decision[i,3]=1
}
test4=cvm.test(data)
pv4=test4$p.value
pvalue[i,4]=pv4
if (pv4<alfa)
{
decision[i,4]=1
}
test5=ajb.norm.test(data)
pv5=test5$p.value
pvalue[i,5]=pv5
if (pv2<alfa)
{
decision[i,5]=1
}
}
result1=data.frame(pvalue)
result2=data.frame(decision)
colnames(result1)=c("SW","KS","AD","CvM","AJB")
colnames(result2)=c("SW","KS","AD","CvM","AJB")
write.xlsx(result1,"testnormal_pvalue.xlsx")
write.xlsx(result2,"testnormal_decision.xlsx")
one_min_beta=t(1-(colSums(decision)/m))
test.of.power=data.frame(one_min_beta)
colnames(test.of.power)=c("SW","KS","AD","CvM","AJB")
return(test.of.power)
}
simulation=testnormal(10,100,0.05)
simulation2=testnormal(20,100,0.05)
simulation3=testnormal(30,100,0.05)
simulation4=testnormal(40,100,0.05)
simulation5=testnormal(50,100,0.05)
output=rbind(simulation,simulation2,simulation3,simulation4,simulation5)
output
I want to graph the power of the test to see trends in the up and down trend of the power of the test over the sample size, anyone can help please?
I went through your code and rewrote along the way to better understand what you want (what is the excel stuff for?). I have broken it down to smaller functions to allow you to have more control in these kinds of simulation studies. The code is not particularly efficient.
But does this give you what you want?
library("nortest")
library("normtest")
library("dplyr")
library("ggplot2")
# Function for doing all tests and putting it into a data.frame
tests <- function(data) {
list_of_tests <- list(
SW = shapiro.test(data),
KS = ks.test(data, pnorm, mean = mean(data), sd = sd(data)),
AD = ad.test(data) ,
CMV = cvm.test(data),
AJB = ajb.norm.test(data)
)
# Combine to tibble
res <- bind_rows(lapply(list_of_tests, unclass))
res[c("method", "p.value")] # Keep only method and p-value cols
}
# Test it with e.g. 'tests(data = runif(8, 2, 5))'
# Function for repeated simulation and testing, combine results and derive power
testnormal <- function(n, m, alpha) {
# Important that runif is inside replicate
test_res <-
bind_rows(replicate(tests(data = runif(n, 2, 5)), n = m,
simplify = FALSE))
test_of_powers <-
test_res %>%
group_by(method) %>%
summarize(power = mean(p.value < alpha)) %>%
mutate(n = n, m = m, alpha = alpha)
return(test_of_powers)
}
# Repeat over a number of simulations:
sims <- expand.grid(n = c(10, 20, 30, 40, 50),
m = 1000,
alpha = 0.05)
output <- bind_rows(
mapply(testnormal, n = sims$n, m = sims$m, alpha = sims$alpha,
SIMPLIFY = FALSE)
)
Actually doing the plot:
# Plot it
ggplot(output, aes(x = n, y = power, col = method)) +
geom_line()
This way should make it easier to plot as well as making simulations over other grids of values (e.g. varying alpha) or expand your range of n, etc.

Creating a function that prints out multiple data-frames in r

I have a function that I have written to create a simulation that demonstrates the central limit theorem. I'm not sure if its possible or if I am better off just making separate functions but currently it only stores that data frame containing the mean values of all the trials.
# create function to perform CLT simulation
# where n = sample size, t = number of trials, pop = which population is being used, popmean = population mean,
cltsim <- function(n, t, pop, popmean, popsd, poptitle){
popsim <- data.frame()
# Run the simulation
for(i in n) { # for each value of n
col <- c()
for(j in t) { #loop through each value of t
trial <- 1:j
counter <- j #set up an egg timer based on whichever t value we're on
value <- c()
while(counter > 0) { # and extract n samples from the population
bucket <- sample(pop, i, replace = TRUE)
xbar <- mean(bucket) #calculate the sample mean
value <- c(value, xbar) # and add it to a vector
counter <- counter - 1 #egg timer counts down and loops back until it hits 0
}
sbar <- sd(value) #calculate the sample standard deviation
col <- cbind(trial, value, sbar, i, j) #merge all info together
popsim <- rbind(popsim, col) # attach it to empty dataframe
}
}
#clean up so just the finished data frame is left
rm(col, bucket, value, counter, i, j, n, sbar, t, xbar, trial)
#tidy up data frame in order to graph it
names(popsim) <- c("trial#", "value", "sdev", "samples", "trials")
#view the rows of data in popsim data table
popsim
}
when I try to add any more code that requires creating datatables it doesnt store them, below are the blocks of code I wish to add to the function
g1 <- ggplot(popsim, aes(x = value)) + geom_density(fill = "#09AB30") +
facet_grid(samples ~ trials, labeller = label_both) +
ggtitle(paste("Demonstrating The Central Limit Theorem with Simulation using", poptitle)) +
geom_vline(xintercept = popmean, linetype = "dashed")
g1
and
#create data frame of simulated sample standard deviations \
sdmatrix <- matrix(unique(popsim$sdev), nrow = 4, ncol = 4)
sdf <- as.data.frame(sdmatrix, row.names = c("t10", "t100", "t1000", "t10000"))
names(sdf) <- c("s1", "s10", "s30", "s50")
sdf <- t(sdf)
rm(sdmatrix)
sdf
exvals <- pop1sd/sqrt(c(1, 10, 30, 50))
dfex <- as.data.frame(exvals, row.names = c("s1", "s10", "s30", "s50"))
names(dfex) <- "Predicted Standard Deviations"
dfex
Ive had a look around and I cant find a solution anywhere, am I better off just writing different functions for them? Any advice or input on how to make this lot of code more effective/efficient would be greatly appreciated.
thanks in advance

issue with disag_model() function from disaggregation R package

I was trying to use the disaggregation package to evaluate if it could be used on the dataset I have. My original data are disaggregated, so I've aggregated them to use the disag_model function from disaggregation package and compare "fitted values" with actual values.
However when I run the function the R session aborts.
I tried to execute the disag_model function step by step and I saw that the problem is due to the use of nlminb() to optimize the a posteriori density function, but I cannot understand why it's happening and how to solve it.
Thanks for your help.
You can find the data I used at this link: https://www.dropbox.com/sh/au7l0e11trzfo19/AACpfRSUpd4gRCveUsh5JX6Ea?dl=0
Please download the folder to run the code.
This is the code I used:
library(tidyverse)
library(raster)
library(disaggregation)
library(sp)
path<- "yourPath/Data"
load(file.path(path, "myRS"))
load(file.path(path, "RAST"))
Data <- read.csv(file = paste(path, "/sim_data.csv", sep = ""))
Data$HasRes <- ifelse(Data$PN50 > runif(nrow(Data)), 1, 0)
for (i in 1:nlayers(myRS)) {
myRS#layers[[i]]#file#name<-file.path(path, "predStackl10")
}
DFCov <-
as.data.frame(raster::extract(myRS, Data[c("XCoord", "YCoord")]))
Data <- cbind(Data, DFCov)
# Remove NA
NAs <- which(is.na(rowSums(Data[names(myRS)])))
Data <- Data[-NAs, ]
Data$ISO3 <- as.factor(Data$ISO3)
world_shape <-
shapefile(file.path(path, "World.shp"))
lmic_shape <-
world_shape[(world_shape#data$ISO3 %in% levels(Data$ISO3)),]
plot(lmic_shape)
# I would like to convert Data in a SpatialPointsDataFrame object
PN50 <- Data
coordinates(PN50) <- c("XCoord", "YCoord")
is.projected(PN50) # see if a projection is defined
proj4string(PN50) <- CRS("+proj=longlat +datum=WGS84")
# compute the mean P50 within each state
PN50_mean <- aggregate(x = PN50,
by = list(Data$ISO3),
FUN = mean)
# compute the centroid of the observations coordinates for each state
PN50_centroid <-
Data %>% group_by(ISO3) %>% summarise(meanX = mean(XCoord), meanY = mean(YCoord))
# assign to each mean the centroid coordinates
PN50_agg <-
as.data.frame(
cbind(
PN50_mean = PN50_mean#data$PN50,
XCoord = PN50_centroid$meanX,
YCoord = PN50_centroid$meanY
)
)
PN50_agg$XCoord <- as.numeric(PN50_agg$XCoord)
PN50_agg$YCoord <- as.numeric(PN50_agg$YCoord)
PN50_agg$ISO3 <- as.character(PN50_centroid$ISO3)
samsiz <-
Data %>% group_by(ISO3) %>% summarise(sz = sum(SampleSize))
PN50_agg$sample_size <- as.numeric(samsiz$sz)
PN50_agg$case <- round(PN50_agg$PN50_mean * PN50_agg$sample_size)
# I would like having data in a SpatialPolygonsDataFrame format to use the disaggrgation package
library(sp)
coordinates(PN50_agg) <- c("XCoord", "YCoord")
proj4string(PN50_agg) <- CRS("+proj=longlat +datum=WGS84")
PN50_polyg <- lmic_shape
PN50_polyg#data <-
full_join(PN50_polyg#data, PN50_agg#data, by = "ISO3")
# covariates raster
covariate_stack <-
getCovariateRasters(path, shape = raster(x = paste0(path, '/multi.tif')))
names(covariate_stack)
covariate_stack2 <- dropLayer(covariate_stack, nlayers(covariate_stack))
names(covariate_stack2)
plot(covariate_stack2)
covariate_stack2 <- raster::stack(covariate_stack2)
covariate_stack2<-brick(covariate_stack2)
# population raster
extracted <- raster::extract(raster(x = paste0(path, '/multi.tif')), PN50_polyg)
n_cells <- sapply(extracted, length)
PN50_polyg#data$pop_per_cell <- PN50_polyg#data$sample_size / n_cells
population_raster <-
rasterize(PN50_polyg, covariate_stack2, field = 'pop_per_cell')
# prepare data for disag_model()
dis_data <- prepare_data(
polygon_shapefile = PN50_polyg,
covariate_rasters = covariate_stack2,
aggregation_raster = population_raster,
mesh.args = list(
max.edge = c(5, 40),
cut = 0.0005,
offset = 1
),
id_var = "ISO3",
response_var = "case",
sample_size_var = "sample_size",
na.action = TRUE,
ncores = 8
)
# Rho and p(Rho<Rho_min)
dist <- pointDistance(PN50_agg#coords, lonlat = F, allpairs = T)
rownames(dist) <- PN50_agg$ISO3
colnames(dist) <- PN50_agg$ISO3
flattenDist <- function(dist) {
up <- upper.tri(dist)
flat <- data_frame(row = rownames(dist)[row(dist)[up]],
column = rownames(dist)[col(dist)[up]],
dist = dist[up])
return(flat)
}
pair_dist <- flattenDist(dist)
d <- pair_dist$dist
k <- 0.036
CorMatern <- k * d * besselK(k * d, 1)
limits <- sp::bbox(PN50_polyg)
hypontenuse <-
sqrt((limits[1, 2] - limits[1, 1]) ^ 2 + (limits[2, 2] - limits[2, 1]) ^
2)
prior_rho <- hypontenuse / 3
p_rho <- sum(d[CorMatern <= 0.1] < prior_rho) / length(d[CorMatern <= 0.1])
# sigma and p(sigma>sigma_max)
sigma_boost <- function(data, i) {
sd(data[i] / mean(data[i]))
}
sigma <-
boot(data = dis_data$polygon_data$response,
statistic = sigma_boost,
10000)
prior_sigma <- sigma$t0
p_sigma <- sum(sigma$t >= sigma$t0) / length(sigma$t)
default_priors <-
list(
priormean_intercept = 0,
priorsd_intercept = 4,
priormean_slope = 0,
priorsd_slope = 2,
prior_rho_min = prior_rho,
prior_rho_prob = p_rho,
prior_sigma_max = prior_sigma,
prior_sigma_prob = p_sigma,
prior_iideffect_sd_max = 0.1,
prior_iideffect_sd_prob = 0.01
)
fitted_model <- disag_model(
data = dis_data,
iterations = 1000,
family = "binomial",
link = "logit",
# priors = default_priors,
field = TRUE,
iid = TRUE,
silent = TRUE
)
I was able to run the disag_model function using your dis_data object. There were no errors or crashes. I ran the following lines.
fitted_model <- disag_model(
data = dis_data,
iterations = 1000,
family = "binomial",
link = "logit",
field = TRUE,
iid = TRUE,
silent = TRUE
)
I am running on a Windows machine with 64GB RAM and 8 cores. It took over an hour and used all of my RAM for a while and up to 50% of my CPU, which is not surprising as you are fitting 5.5M pixels over the whole world. Therefore, I suspect it is related to your computer running out of resources. I suggest you try a smaller example to test it out first. Try fewer polygons and fewer pixels in each polygon.

Fit student t to incomplete distribution

Thanks to a closed form formula (I work on risk neutral density, with this king of formula: RND formula, page 8), I have an incomplete distribution of this type:
My idea would be to fit this density with a student-t.
I already tried the MASS and fitdistrplus packages but just can't find how to perform my task. Everything I can do for now is to get the fitted parameters (m=1702.041, s=6.608536, df=15.18036), but from here I don't know how to get my fitted values for my distribution.
A sample of code:
temp = matrix(nrow=1000, ncol=3)
colnames(temp) = c("strikes", "first_density", "mulitply_first_density")
temp = as.data.frame(temp)
# we generate fake data
temp$strikes = seq(1000,2000,length=1000)
temp$first_density = runif(1000,max=0.006, min=1e-10)
# we multiply our first density to generate our sample
temp$mulitply_first_density = temp$first_density*1000000
# we generate our sample
vec = vector()
for (i in 1:nrow(temp))
{
vec = c(vec, rep(temp$strike[i], temp$mulitply_first_density[i]))
}
# we laod our library
library("MASS")
# we fir our parameters
fitted_parameters = fitdistr(vec, "t")
The formula for the t-density function using the location and scale parameters is given in the examples of the documentation as mydt.
#simulated data
set.seed(42)
x <- rt(1e4, 7, 10)
plot(density(x))
library(MASS)
fitted_parameters = fitdistr(x, "t", start = list(df = 10, m = 10, s = 5))
# df m s
# 3.81901649 10.56816146 2.66905346
#( 0.15295551) ( 0.03448627) ( 0.03361758)
mydt <- function(x, m, s, df) dt((x-m)/s, df)/s
curve(do.call(mydt, c(list(x), as.list(fitted_parameters$estimate))), add = TRUE, col = "red")
legend("topright", legend = c("kernel density estimate", "fitted t distribution"),
col = c("black", "red"), lty = 1)

Resources