Impose constraints on the coefficient matrix of a var model in R - r

how can I impose constraints on the coefficient matrix of a var model in r.
Some of my code is followed
library(readxl)
dat_pc_log_d <- read_excel("C:/Users/Desktop/dat_pc_log_d.xlsx")
attach(dat_pc_log_d)
dat_pc_log_d$itcrm = NULL
dat_pc_log_d$...1 = NULL
data = ts(dat_pc_log_d,start = c(2004,1),end = c(2019,1),frequency = 4)
VAR_modelo = VAR(data,p=2)
VAR_modelo_restriccion = restrict(VAR_modelo,method = "ser",thresh = 2.0)
ir_pib = irf(VAR_modelo_restriccion, impulse = "pbipc_log_d", response = c("pbipc_log_d", "expopc_log_d", "pbiagr_log_d"),
boot = TRUE, ci = 0.95)
I need to ensure exogeneity of a variable, for it I have to impose zero in some lags coefficients of the independent variable. How can I do it ?
thanks

library(readxl)
dat_pc_log_d <- read_excel("C:/Users//dat_pc_log_d.xlsx")
attach(dat_pc_log_d)
dat_pc_log_d$...1 = NULL
data = ts(dat_pc_log_d,start = c(2004,1),end = c(2019,1),frequency = 4)
VAR_modelo = VAR(data,p=2)
restriccion = matrix(c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1),
nrow=8, ncol=17, byrow = TRUE)
VAR_modelo_restriccion = restrict(VAR_modelo,method = "man", resmat = restriccion)
ir_pib = irf(VAR_modelo_restriccion, impulse = "itcrm", response = c("pbipc_log_d", "expopc_log_d", "inverpc_log_d" , "pbiagr_log_d"),
boot = TRUE, nhead=20 ,ci = 0.68)

Related

uwot is throwing an error running the Monocle3 R package's "find_gene_module()" function, likely as an issue with how my data is formatted

I am trying to run the Monocle3 function find_gene_modules() on a cell_data_set (cds) but am getting a variety of errors in this. I have not had any other issues before this. I am working with an imported Seurat object. My first error came back stating that the number of rows were not the same between my cds and cds#preprocess_aux$gene_loadings values. I took a look and it seems my gene loadings were a list under cds#preprocess_aux#listData$gene_loadings. I then ran the following code to make a dataframe version of the gene loadings:
test <- seurat#assays$RNA#counts#Dimnames[[1]]
test <- as.data.frame(test)
cds#preprocess_aux$gene_loadings <- test
rownames(cds#preprocess_aux$gene_loadings) <- cds#preprocess_aux$gene_loadings[,1]
Which created a cds#preprocess_aux$gene_loadings dataframe with the same number of rows and row names as my cds. This resolved my original error but now led to a new error being thrown from uwot as:
15:34:02 UMAP embedding parameters a = 1.577 b = 0.8951
Error in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components, :
No numeric columns found
Running traceback() produces the following information.
> traceback()
4: stop("No numeric columns found")
3: uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
metric = metric, n_epochs = n_epochs, alpha = learning_rate,
scale = scale, init = init, init_sdev = init_sdev, spread = spread,
min_dist = min_dist, set_op_mix_ratio = set_op_mix_ratio,
local_connectivity = local_connectivity, bandwidth = bandwidth,
gamma = repulsion_strength, negative_sample_rate = negative_sample_rate,
a = a, b = b, nn_method = nn_method, n_trees = n_trees, search_k = search_k,
method = "umap", approx_pow = approx_pow, n_threads = n_threads,
n_sgd_threads = n_sgd_threads, grain_size = grain_size, y = y,
target_n_neighbors = target_n_neighbors, target_weight = target_weight,
target_metric = target_metric, pca = pca, pca_center = pca_center,
pca_method = pca_method, pcg_rand = pcg_rand, fast_sgd = fast_sgd,
ret_model = ret_model || "model" %in% ret_extra, ret_nn = ret_nn ||
"nn" %in% ret_extra, ret_fgraph = "fgraph" %in% ret_extra,
batch = batch, opt_args = opt_args, epoch_callback = epoch_callback,
tmpdir = tempdir(), verbose = verbose)
2: uwot::umap(as.matrix(preprocess_mat), n_components = max_components,
metric = umap.metric, min_dist = umap.min_dist, n_neighbors = umap.n_neighbors,
fast_sgd = umap.fast_sgd, n_threads = cores, verbose = verbose,
nn_method = umap.nn_method, ...)
1: find_gene_modules(cds[pr_deg_ids, ], reduction_method = "UMAP",
max_components = 2, umap.metric = "cosine", umap.min_dist = 0.1,
umap.n_neighbors = 15L, umap.fast_sgd = FALSE, umap.nn_method = "annoy",
k = 20, leiden_iter = 1, partition_qval = 0.05, weight = FALSE,
resolution = 0.001, random_seed = 0L, cores = 1, verbose = T)
I really have no idea what I am doing wrong or how to proceed from here. Does anyone with experience with uwot know where my error is coming from? Really appreciate the help!

mlrCPO - Task conversion TOCPO

I would like to build a CPO for the mlr::makeClassificationViaRegression wrapper. The wrapper builds regression models that predict for the positive class whether a particular example belongs to it (1) or not (-1). It also calculates predicted probabilities using a softmax.
After reading the documentation and vignettes for makeCPOTargetOp, my attempt is as follows:
cpoClassifViaRegr = makeCPOTargetOp(
cpo.name = 'ClassifViaRegr',
dataformat = 'task', #Not sure - will this work if input is df with unknown target values?
# properties.data = c('numerics', 'factors', 'ordered', 'missings'), #Is this needed?
properties.adding = 'twoclass', #See https://mlrcpo.mlr-org.com/articles/a_4_custom_CPOs.html#task-type-and-conversion
properties.needed = character(0),
properties.target = c('classif', 'twoclass'),
task.type.out = 'regr',
predict.type.map = c(response = 'response', prob = 'response'),
constant.invert = TRUE,
cpo.train = function(data, target) {
getTaskDesc(data)
},
cpo.retrafo = function(data, target, control) {
cat(class(target))
td = getTaskData(target, target.extra = T)
target.name = paste0(control$positive, ".prob")
data = td$data
data[[target.name]] = ifelse(td$target == pos, 1, -1)
makeRegrTask(id = paste0(getTaskId(target), control$positive, '.'),
data = data,
target = target.name,
weights = target$weights,
blocking = target$blocking)
},
cpo.train.invert = NULL, #Since constant.invert = T
cpo.invert = function(target, control.invert, predict.type) {
if(predict.type == 'response') {
factor(ifelse(target > 0, control.invert$positive, control.invert$positive))
} else {
levs = c(control.invert$positive, control.invert$negative)
propVectorToMatrix(vnapply(target, function(x) exp(x) / sum(exp(x))), levs)
}
})
It seems to work as expected, the demo below shows that the inverted prediction is identical to the prediction obtained using the makeClassificationViaRegr wrapper:
lrn = makeLearner("regr.lm")
# Wrapper -----------------------------------------------------------------
lrn2 = makeClassificationViaRegressionWrapper(lrn)
model = train(lrn2, sonar.task, subset = 1:140)
predictions = predict(model, newdata = getTaskData(sonar.task)[141:208, 1:60])
# CPO ---------------------------------------------------------------------
sonar.train = subsetTask(sonar.task, 1:140)
sonar.test = subsetTask(sonar.task, 141:208)
trafd = sonar.train %>>% cpoClassifViaRegr()
mod = train(lrn, trafd)
retr = sonar.test %>>% retrafo(trafd)
pred = predict(mod, retr)
invpred = invert(inverter(retr), pred)
identical(predictions$data$response, invpred$data$response)
The problem is that the after the CPO has converted the task from twoclass to regr, there is no way for me to specify predict.type = 'prob'. In the case of the wrapper, the properties of the base regr learner are modified to accept predict.type = prob (see here). But the CPO is unable to modify the learner in this way, so how can I tell my model to return predicted probabilities instead of the predicted response?
I was thinking I could specify a include.prob parameter, i.e. cpoClassifViaRegr(include.prob = T). If set to TRUE, the cpo.invert returns the predicted probabilities in addition to the predicted response. Would something like this work?

Generating correlated variables

I am studying the effects of skewness and kurtosis on the Pearson corrections to bivariate correlations for range restriction. Currently I am using R and "rcorrvar" as it should allow me to generate correlated vectors with a specifiable skew and kurtosis. When I run it as below
rcorrvar(n = 100, k_cont = 2, k_CAT = 2,pois = 2, k_nb = 0,
method = c("Fleishman", "Polynomial"), means = 0, vars = 1,
skews = 2,skurts = 4,fifths = NULL, sixths = NULL,
Six = list(), marginal = list(), support = list(), nrand = 100,
lam = NULL, size = NULL, prob = NULL, mu = NULL, Sigma = NULL,
rho = NULL, cstart = NULL, seed = 1234, errorloop = FALSE,
epsilon = 0.001, maxit = 1000, extra_correct = TRUE)
Error in rcorrvar(n = 100, k_cont = 2, k_CAT = 2, pois = 2, k_nb = 0, :
unused arguments (k_CAT = 2, pois = 2)
How do I correct these errors?
Assuming that the rcorrvar function you're using is from the SimMultiCorrData package, it appears as though you may have misspelled the two variables - they're supposed to be k_cat and k_pois.
Please note that R's variables are case-sensitive.

Time series forecasting using R

I have a problem forecasting this non stationery data(https://drive.google.com/file/d/14o5hHe8zxR0onRWq0mZNcYqI101O0Dkw/view?usp=sharing) using Auto Arima.
Please review my code.
# Read Data
r = read.csv('../Amazon/Amazon1.csv', header = TRUE, stringsAsFactors = FALSE)
# Time Series construction
ts = ts(t(r[,1:25]), frequency = 12, start = c(2016,01) )
# plotting Time series
ts.plot(ts[,1:2],type = 'b', xlab = 'Monthly Cycle', ylab = 'Number of Sales', main = "(TIME SERIES) Amazon Sales Cycle of multiple products for 24 months",col=c(rep("black",1),rep("red",2)))
legend("topleft",cex=.65,legend = ts[1,1:2], col = 1:ncol(ts), lty = 1)
set1 = ts[2:20,1]
#set2 = ts[15:20,1]
set1 = as.numeric(set1)
#set2 = as.numeric(set2)
# Building Forecasting models
mf = meanf(set1,h=4,level=c(90,95),fan=FALSE,lambda=NULL)
plot(mf)
mn = naive(set1,h=4,level=c(90,95),fan=FALSE,lambda=NULL)
plot(mn)
md = rwf(set1,h=4,drift=T,level=c(90,95),fan=FALSE,lambda=NULL)
plot(md)
# Checking Accuracy
accuracy(mf)
accuracy(mn)
accuracy(md)
# Identifying Stationarity/Non-Stationarity(unit Root testing)
adf = adf.test(set1)
adf
kpss = kpss.test(set1)
kpss
ndiffs(set1)
diff_data = diff(set1)
adf.test(diff_data) # Rerunning unit test on differenced data
# Identifying Seasonality/Trend
Stl = stl(set1,s.window='periodic')
# ARIMA modelling
ar_set1 = forecast::auto.arima(diff(diff_data), approximation=FALSE,trace=FALSE, stationary = TRUE)
forecast(ar_set1, h = 5)
ar_set2 = forecast::ets(diff(diff_data))
ar_set3 = forecast::nnetar(diff(diff_data), approximation=FALSE,trace=FALSE, stationary = TRUE)
# Prediction
predict(ar_set1, n.ahead = 5,se.fit = TRUE)
predict(ar_set2, n.ahead = 5,se.fit = TRUE)
predict(ar_set3, n.ahead = 5,se.fit = TRUE)
plot(forecast(ar_set1,h=6))
points(1:length(diff(diff_data)),fitted(ar_set1),type="l",col="green")
plot(forecast(ar_set2,h=6))
points(1:length(diff(diff_data)),fitted(ar_set2),type="l",col="green")
plot(forecast(ar_set3,h=6))
points(1:length(diff(diff_data)),fitted(ar_set3),type="l",col="green")
accuracy(ar_set1, test = diff_data)
accuracy(ar_set2, test = diff_data)
accuracy(ar_set3, test = diff_data)
I'm unable to get the forecast values using either of those 3 methods. Where am i going wrong?

Error for knn: no missing values are allowed - I don't have any missing values in the data

I am working on a data mining project (as a total coding outsider) and am trying to run a K-Nearest Neighbor analysis. However, I keep getting the "no missing values are allowed" error. My data does not have missing values so something must be wrong with my code. Can anyone help?
AirbnbNYCApril = read.delim(file=file.choose(),
header = T,
sep = ",",
stringsAsFactors = F)
> str(AirbnbNYCApril)
AirbnbNYCApril = na.omit(AirbnbNYCApril)
set.seed(1)
n = nrow(AirbnbNYCApril)
Train_indices = 1:round(0.75*n)
Test_indices = (round(0.75*n)+1):n
AirbnbNYCApril_shuffle = AirbnbNYCApril[sample(n),]
AirbnbNYCApril_Train = AirbnbNYCApril_shuffle[Train_indices,]
AirbnbNYCApril_Test = AirbnbNYCApril_shuffle[Test_indices,]
Train_labels = AirbnbNYCApril_Train$neighborhood
Test_labels = AirbnbNYCApril_Test$neighborhood
AirbnbNYCApril_Train$neighborhood = NULL
AirbnbNYCApril_Test$neighborhood = NULL
min_reviews = min(AirbnbNYCApril_Train$reviews)
max_reviews = max(AirbnbNYCApril_Train$reviews)
AirbnbNYCApril_Train$reviews = (AirbnbNYCApril_Train$reviews - min_reviews)/(max_reviews - min_reviews)
AirbnbNYCApril_Test$reviews = (AirbnbNYCApril_Test$reviews - min_reviews)/(max_reviews - min_reviews)
min_accommodates = min(AirbnbNYCApril_Train$accommodates)
max_accommodates = max(AirbnbNYCApril_Train$accommodates)
AirbnbNYCApril_Train$accommodates = (AirbnbNYCApril_Train$accommodates - min_accommodates)/(max_accommodates - min_accommodates)
min_price = min(AirbnbNYCApril_Train$price)
max_price = max(AirbnbNYCApril_Train$price)
AirbnbNYCApril_Train$price = (AirbnbNYCApril_Train$price - min_price)/(max_price - min_price)
AirbnbNYCApril_Test$price = (AirbnbNYCApril_Test$price - min_price)/(max_price - min_price)
min_lat = min(AirbnbNYCApril_Train$latitude)
max_lat = max(AirbnbNYCApril_Train$latitutde)
AirbnbNYCApril_Train$latitude = (AirbnbNYCApril_Train$latitude - min_lat)/(max_lat - min_lat)
AirbnbNYCApril_Test$latitude = (AirbnbNYCApril_Test$latitude - min_lat)/(max_lat - min_lat)
min_lon = min(AirbnbNYCApril_Train$longitude)
max_lon = max(AirbnbNYCApril_Train$longitude)
AirbnbNYCApril_Train$longitude = (AirbnbNYCApril_Train$longitude - min_lon)/(max_lon - min_lon)
AirbnbNYCApril_Test$longitude = (AirbnbNYCApril_Test$longitude - min_lon)/(max_lon - min_lon)
neighborhood_prediction = knn(train = AirbnbNYCApril_Train[,-1], test = AirbnbNYCApril_Test[,-1], cl = Train_labels, k=5)
confusion_matrix = table(Test_labels, neighborhood_prediction)
print(confusion_matrix)

Resources