UPD: replaced merge to inner_join, new error:
Error in UseMethod("tbl_vars") :
no applicable method for 'tbl_vars' applied to an object of class "function"
Calls: inner_join ... tbl_vars -> new_sel_vars -> structure -> tbl_vars_dispatch
I am trying to run my R-script from command line, but it is return error:
Error in if (nx >= 2^31 || ny >= 2^31) stop("long vectors are not supported") :
missing value where TRUE/FALSE needed
Calls: merge -> merge.data.frame
Execution halted
What it's mean?
Where are no one problems, when I run similar code from R or Rstudio.
How can I fix this issue?
Part of R-script
clonotypes_tables = function(name, cell, mode){
sub = subset(metadata, metadata$donor == as.character(name))
sub = subset(sub, sub$cell_type == as.character(cell))
if (nrow(sub) > 1){
sub = sub[order(sub$time_point), ]
if (file.exists(paste(getwd(), sub$file_name[1], sep="/")) & file.exists(paste(getwd(), sub$file_name[2], sep="/"))){
point1 = read.table(sub$file_name[1], header = T)
#cat("check1")
point2 = read.table(sub$file_name[2], header = T)
if (nrow(point1) >= 1000 & nrow(point2) >= 1000){
#common.clonotype = merge(point1[1:1000,], point2[1:1000,], by = c("cdr3aa", "v"))
if (mode == "CDR3_V"){
common.clonotype = merge(point1, point2, by = c("cdr3aa", "v"))
common.clonotype$clon = paste(common.clonotype$cdr3aa, common.clonotype$v, sep = "~")
}
else{
common.clonotype = merge(point1, point2, by = c("cdr3aa"))
common.clonotype$clon = common.clonotype$cdr3aa
}
common.clonotype = common.clonotype[,c("clon", "freq.x", "freq.y")]
colnames(common.clonotype) = c("Clonotypes", "0.5", "1")
dim(common.clonotype)
common.clonotype = common.clonotype[order(common.clonotype[2], decreasing = T), ]
common.clonotype
}
#return(common.clonotype)
}
else{
print(paste(name, cell, "hasn't two time points", sep = " "))
}
}
}
I guess you ran your R-script from command line on larger files or different files. The built-in (base) merge function won't merge data frames with more than 2^31 rows. Check the merge.data.frame code:
...
nx <- nrow(x <- as.data.frame(x))
ny <- nrow(y <- as.data.frame(y))
if (nx >= 2^31 || ny >= 2^31)
stop("long vectors are not supported")
...
Try alternative merge functions such as ..._join in dplyr library or the most efficient data.table framework.
Im using chainer and im try to do topic modeling. The code for the training phase contains the following:
optimizer = O.Adam()
optimizer.setup(self.train_model)
clip = chainer.optimizer.GradientClipping(5.0)
optimizer.add_hook(clip)
j = 0
msgs = defaultdict(list)
for epoch in range(epochs):
print "epoch : ",epoch
data = prepare_topics(cuda.to_cpu(self.train_model.mixture.weights.W.data).copy(),
cuda.to_cpu(self.train_model.mixture.factors.W.data).copy(),
cuda.to_cpu(self.train_model.sampler.W.data).copy(),
self.words)
top_words = print_top_words_per_topic(data)
if j % 100 == 0 and j > 100:
coherence = topic_coherence(top_words)
for j in range(self.n_topics):
print j, coherence[(j, 'cv')]
kw = dict(top_words=top_words, coherence=coherence, epoch=epoch)
data['doc_lengths'] = self.doc_lengths
data['term_frequency'] = self.term_frequency
for d, f in utils.chunks(self.batchsize, self.doc_ids, self.flattened):
t0 = time.time()
self.train_model.cleargrads()
l = self.train_model.fit_partial(d.copy(), f.copy(), update_words = update_words, update_topics = update_topics)
prior = self.train_model.prior()
loss = prior * self.fraction
loss.backward()
optimizer.update()
msg = ("J:{j:05d} E:{epoch:05d} L:{loss:1.3e} "
"P:{prior:1.3e} R:{rate:1.3e}")
prior.to_cpu()
loss.to_cpu()
t1 = time.time()
dt = t1 - t0
rate = self.batchsize / dt
msgs["E"].append(epoch)
msgs["L"].append(float(l))
j += 1
logs = dict(loss=float(l), epoch=epoch, j=j, prior=float(prior.data), rate=rate)
print msg.format(**logs)
print "\n ================================= \n"
#serializers.save_hdf5("lda2vec.hdf5", self.model)
msgs["loss_per_epoch"].append(float(l))
whn i execute the code i get for example:
J:00200 E:00380 L:0.000e+00 P:-2.997e+04 R:2.421e+04
only the L(loss) dont change, can someone please help to know why this value remain zero?
I want to debug functions in ShadowCAT package. https://github.com/Karel-Kroeze/ShadowCAT/tree/master/R
Take any internal functions from this package, they are getting called via validate_and_run() function. If I go though it I am directly presented an output and I am not able to run through each line of the code I am interested in. What I think validate_and_run() creating an environment to call the functions.
For e.g. I am trying to debug shadowcat function from the package using following code:
library(devtools)
install_github("Karel-Kroeze/ShadowCAT")
library(ShadowCAT)
debug(shadowcat)
alpha_beta <- simulate_testbank(model = "GPCM", number_items = 100,
number_dimensions = 3, number_itemsteps = 3)
model <- "GPCM"
start_items <- list(type = 'fixed', item_keys = c("item33", "item5", "item23"), n = 3)
stop_test <- list(min_n = 4, max_n = 30, target = c(.1, .1, .1))
estimator <- "maximum_aposteriori"
information_summary <- "posterior_determinant"
prior_form <- "normal"
prior_parameters <- list(mu = c(0, 0, 0), Sigma = diag(3))
# Initial call: get key of first item to adminster
call1 <- shadowcat(answers = NULL, estimate = c(0, 0, 0), variance = as.vector(diag(3) * 25),
model = model, alpha = alpha_beta$alpha, beta = alpha_beta$beta,
start_items = start_items, stop_test = stop_test,
estimator = estimator, information_summary = information_summary,
prior_form = prior_form, prior_parameters = prior_parameters)
In above shadowcat() function there ane many internal functions written but I do not see they are getting called anywhere in the shadowcat(). My speculation is that it is getting called in validate_and_run() function.
My question is how can I debug those internal functions inside the shadowcat() and see what each variable is storing and what are the inputs of the internal functions when they are getting called?
EDIT 1:
In any usual R function, when one debugs it, you can move your debugging cursor (yellow highlighted line) line by line by clicking on next in RStudio. Also, once you have gone over that line of code , you can see the value of the variable by printing the variable name on console. This I am not able to do in shadowcat() function. Internal function codes are written but they are never called in visible form. I need to see where they are getting called and need to debug through them
Any leads appreciated.
EDIT 2
Main body of the code:
function (answers, estimate, variance, model, alpha, beta, start_items,
stop_test, estimator, information_summary, prior_form = NULL,
prior_parameters = NULL, guessing = NULL, eta = NULL, constraints_and_characts = NULL,
lower_bound = NULL, upper_bound = NULL, safe_eap = FALSE,
eap_estimation_procedure = "riemannsum")
{
result <- function() {
switch_to_maximum_aposteriori <- estimator == "maximum_likelihood" &&
!is.null(lower_bound) && !is.null(upper_bound)
estimator <- get_estimator(switch_to_maximum_aposteriori = switch_to_maximum_aposteriori)
prior_form <- get_prior_form(switch_to_maximum_aposteriori = switch_to_maximum_aposteriori)
prior_parameters <- get_prior_parameters(switch_to_maximum_aposteriori = switch_to_maximum_aposteriori)
beta <- get_beta()
guessing <- get_guessing()
number_items <- nrow(alpha)
number_dimensions <- ncol(alpha)
number_itemsteps_per_item <- number_non_missing_cells_per_row(beta)
lp_constraints_and_characts <- get_lp_constraints_and_characts(number_items = number_items)
item_keys <- rownames(alpha)
item_keys_administered <- names(answers)
item_keys_available <- get_item_keys_available(item_keys_administered = item_keys_administered,
item_keys = item_keys)
attr(estimate, "variance") <- matrix(variance, ncol = number_dimensions)
estimate <- update_person_estimate(estimate = estimate,
answers_vector = unlist(answers), item_indices_administered = match(item_keys_administered,
item_keys), number_dimensions = number_dimensions,
alpha = alpha, beta = beta, guessing = guessing,
number_itemsteps_per_item = number_itemsteps_per_item,
estimator = estimator, prior_form = prior_form, prior_parameters = prior_parameters)
continue_test <- !terminate_test(number_answers = length(answers),
estimate = estimate, min_n = stop_test$min_n, max_n = stop_test$max_n,
variance_target = stop_test$target, cutoffs = stop_test$cutoffs)
if (continue_test) {
index_new_item <- get_next_item(start_items = start_items,
information_summary = information_summary, lp_constraints = lp_constraints_and_characts$lp_constraints,
lp_characters = lp_constraints_and_characts$lp_chars,
estimate = estimate, model = model, answers = unlist(answers),
prior_form = prior_form, prior_parameters = prior_parameters,
available = match(item_keys_available, item_keys),
administered = match(item_keys_administered,
item_keys), number_items = number_items, number_dimensions = number_dimensions,
estimator = estimator, alpha = alpha, beta = beta,
guessing = guessing, number_itemsteps_per_item = number_itemsteps_per_item,
stop_test = stop_test, eap_estimation_procedure = eap_estimation_procedure)
key_new_item <- item_keys[index_new_item]
}
else {
key_new_item <- NULL
}
list(key_new_item = as.scalar2(key_new_item), continue_test = as.scalar2(continue_test),
estimate = as.vector(estimate), variance = as.vector(attr(estimate,
"variance")), answers = answers)
}
update_person_estimate <- function(estimate, answers_vector,
item_indices_administered, number_dimensions, alpha,
beta, guessing, number_itemsteps_per_item, estimator,
prior_form, prior_parameters) {
if (length(answers) > start_items$n)
estimate_latent_trait(estimate = estimate, answers = answers_vector,
prior_form = prior_form, prior_parameters = prior_parameters,
model = model, administered = item_indices_administered,
number_dimensions = number_dimensions, estimator = estimator,
alpha = alpha, beta = beta, guessing = guessing,
number_itemsteps_per_item = number_itemsteps_per_item,
safe_eap = safe_eap, eap_estimation_procedure = eap_estimation_procedure)
else estimate
}
get_item_keys_available <- function(item_keys_administered,
item_keys) {
if (is.null(item_keys_administered))
item_keys
else item_keys[-which(item_keys %in% item_keys_administered)]
}
get_beta <- function() {
if (model == "GPCM" && is.null(beta) && !is.null(eta))
row_cumsum(eta)
else beta
}
get_guessing <- function() {
if (is.null(guessing))
matrix(0, nrow = nrow(as.matrix(alpha)), ncol = 1,
dimnames = list(rownames(alpha), NULL))
else guessing
}
get_estimator <- function(switch_to_maximum_aposteriori) {
if (switch_to_maximum_aposteriori)
"maximum_aposteriori"
else estimator
}
get_prior_form <- function(switch_to_maximum_aposteriori) {
if (switch_to_maximum_aposteriori)
"uniform"
else prior_form
}
get_prior_parameters <- function(switch_to_maximum_aposteriori) {
if (switch_to_maximum_aposteriori)
list(lower_bound = lower_bound, upper_bound = upper_bound)
else prior_parameters
}
get_lp_constraints_and_characts <- function(number_items) {
if (is.null(constraints_and_characts))
NULL
else constraints_lp_format(max_n = stop_test$max_n, number_items = number_items,
characteristics = constraints_and_characts$characteristics,
constraints = constraints_and_characts$constraints)
}
validate <- function() {
if (is.null(estimate))
return(add_error("estimate", "is missing"))
if (is.null(variance))
return(add_error("variance", "is missing"))
if (!is.vector(variance))
return(add_error("variance", "should be entered as vector"))
if (sqrt(length(variance)) != round(sqrt(length(variance))))
return(add_error("variance", "should be a covariance matrix turned into a vector"))
if (is.null(model))
return(add_error("model", "is missing"))
if (is.null(alpha))
return(add_error("alpha", "is missing"))
if (is.null(start_items))
return(add_error("start_items", "is missing"))
if (is.null(stop_test))
return(add_error("stop_test", "is missing"))
if (is.null(estimator))
return(add_error("estimator", "is missing"))
if (is.null(information_summary))
return(add_error("information_summary", "is missing"))
if (!is.matrix(alpha) || is.null(rownames(alpha)))
return(add_error("alpha", "should be a matrix with item keys as row names"))
if (!is.null(beta) && (!is.matrix(beta) || is.null(rownames(beta))))
return(add_error("beta", "should be a matrix with item keys as row names"))
if (!is.null(eta) && (!is.matrix(eta) || is.null(rownames(eta))))
return(add_error("eta", "should be a matrix with item keys as row names"))
if (!is.null(guessing) && (!is.matrix(guessing) || ncol(guessing) !=
1 || is.null(rownames(guessing))))
return(add_error("guessing", "should be a single column matrix with item keys as row names"))
if (!is.null(start_items$type) && start_items$type ==
"random_by_dimension" && length(start_items$n_by_dimension) %not_in%
c(1, length(estimate)))
return(add_error("start_items", "length of n_by_dimension should be a scalar or vector of the length of estimate"))
if (!row_names_are_equal(rownames(alpha), list(alpha,
beta, eta, guessing)))
add_error("alpha_beta_eta_guessing", "should have equal row names, in same order")
if (!is.null(beta) && !na_only_end_rows(beta))
add_error("beta", "can only contain NA at the end of rows, no values allowed after an NA in a row")
if (!is.null(eta) && !na_only_end_rows(eta))
add_error("eta", "can only contain NA at the end of rows, no values allowed after an NA in a row")
if (length(estimate) != ncol(alpha))
add_error("estimate", "length should be equal to the number of columns of the alpha matrix")
if (length(estimate)^2 != length(variance))
add_error("variance", "should have a length equal to the length of estimate squared")
if (is.null(answers) && !is.positive.definite(matrix(variance,
ncol = sqrt(length(variance)))))
add_error("variance", "matrix is not positive definite")
if (model %not_in% c("3PLM", "GPCM", "SM", "GRM"))
add_error("model", "of unknown type")
if (model != "GPCM" && is.null(beta))
add_error("beta", "is missing")
if (model == "GPCM" && is.null(beta) && is.null(eta))
add_error("beta_and_eta", "are both missing; define at least one of them")
if (model == "GPCM" && !is.null(beta) && !is.null(eta) &&
!all(row_cumsum(eta) == beta))
add_error("beta_and_eta", "objects do not match")
if (estimator != "maximum_likelihood" && is.null(prior_form))
add_error("prior_form", "is missing")
if (estimator != "maximum_likelihood" && is.null(prior_parameters))
add_error("prior_parameters", "is missing")
if (!is.null(prior_form) && prior_form %not_in% c("normal",
"uniform"))
add_error("prior_form", "of unknown type")
if (!is.null(prior_form) && !is.null(prior_parameters) &&
prior_form == "uniform" && (is.null(prior_parameters$lower_bound) ||
is.null(prior_parameters$upper_bound)))
add_error("prior_form_is_uniform", "so prior_parameters should contain lower_bound and upper_bound")
if (!is.null(prior_form) && !is.null(prior_parameters) &&
prior_form == "normal" && (is.null(prior_parameters$mu) ||
is.null(prior_parameters$Sigma)))
add_error("prior_form_is_normal", "so prior_parameters should contain mu and Sigma")
if (!is.null(prior_parameters$mu) && length(prior_parameters$mu) !=
length(estimate))
add_error("prior_parameters_mu", "should have same length as estimate")
if (!is.null(prior_parameters$Sigma) && (!is.matrix(prior_parameters$Sigma) ||
!all(dim(prior_parameters$Sigma) == c(length(estimate),
length(estimate))) || !is.positive.definite(prior_parameters$Sigma)))
add_error("prior_parameters_sigma", "should be a square positive definite matrix, with dimensions equal to the length of estimate")
if (!is.null(prior_parameters$lower_bound) && !is.null(prior_parameters$upper_bound) &&
(length(prior_parameters$lower_bound) != length(estimate) ||
length(prior_parameters$upper_bound) != length(estimate)))
add_error("prior_parameters_bounds", "should contain lower and upper bound of the same length as estimate")
if (is.null(stop_test$max_n))
add_error("stop_test", "contains no max_n")
if (!is.null(stop_test$max_n) && stop_test$max_n > nrow(alpha))
add_error("stop_test_max_n", "is larger than the number of items in the item bank")
if (!is.null(stop_test$max_n) && !is.null(stop_test$cutoffs) &&
(!is.matrix(stop_test$cutoffs) || nrow(stop_test$cutoffs) <
stop_test$max_n || ncol(stop_test$cutoffs) !=
length(estimate) || any(is.na(stop_test$cutoffs))))
add_error("stop_test_cutoffs", "should be a matrix without missing values, and number of rows equal to max_n and number of columns equal to the number of dimensions")
if (start_items$n == 0 && information_summary == "posterior_expected_kullback_leibler")
add_error("start_items", "requires n > 0 for posterior expected kullback leibler information summary")
if (!is.null(start_items$type) && start_items$type ==
"random_by_dimension" && length(start_items$n_by_dimension) ==
length(estimate) && start_items$n != sum(start_items$n_by_dimension))
add_error("start_items_n", "contains inconsistent information. Total length of start phase and sum of length per dimension do not match (n != sum(n_by_dimension)")
if (!is.null(start_items$type) && start_items$type ==
"random_by_dimension" && length(start_items$n_by_dimension) ==
1 && start_items$n != sum(rep(start_items$n_by_dimension,
length(estimate))))
add_error("start_items_n", "contains inconsistent information. Total length of start phase and sum of length per dimension do not match")
if (!is.null(stop_test$cutoffs) && !is.matrix(stop_test$cutoffs))
add_error("stop_test", "contains cutoff values in non-matrix format")
if (!all(names(answers) %in% rownames(alpha)))
add_error("answers", "contains non-existing key")
if (estimator %not_in% c("maximum_likelihood", "maximum_aposteriori",
"expected_aposteriori"))
add_error("estimator", "of unknown type")
if (information_summary %not_in% c("determinant", "posterior_determinant",
"trace", "posterior_trace", "posterior_expected_kullback_leibler"))
add_error("information_summary", "of unknown type")
if (estimator == "maximum_likelihood" && information_summary %in%
c("posterior_determinant", "posterior_trace", "posterior_expected_kullback_leibler"))
add_error("estimator_is_maximum_likelihood", "so using a posterior information summary makes no sense")
if (estimator != "maximum_likelihood" && (!is.null(lower_bound) ||
!is.null(upper_bound)))
add_error("bounds", "can only be defined if estimator is maximum likelihood")
if (!is.null(lower_bound) && length(lower_bound) %not_in%
c(1, length(estimate)))
add_error("lower_bound", "length of lower bound should be a scalar or vector of the length of estimate")
if (!is.null(upper_bound) && length(upper_bound) %not_in%
c(1, length(estimate)))
add_error("upper_bound", "length of upper bound should be a scalar or vector of the length of estimate")
if (!no_missing_information(constraints_and_characts$characteristics,
constraints_and_characts$constraints))
add_error("constraints_and_characts", "constraints and characteristics should either be defined both or not at all")
if (!characteristics_correct_format(constraints_and_characts$characteristics,
number_items = nrow(alpha)))
add_error("characteristics", "should be a data frame with number of rows equal to the number of items in the item bank")
if (!constraints_correct_structure(constraints_and_characts$constraints))
add_error("constraints_structure", "should be a list of length three lists, with elements named 'name', 'op', 'target'")
if (!constraints_correct_names(constraints_and_characts$constraints,
constraints_and_characts$characteristics))
add_error("constraints_name_elements", "should be defined as described in the details section of constraints_lp_format()")
if (!constraints_correct_operators(constraints_and_characts$constraints))
add_error("constraints_operator_elements", "should be defined as described in the details section of constraints_lp_format()")
if (!constraints_correct_targets(constraints_and_characts$constraints))
add_error("constraints_target_elements", "should be defined as described in the details section of constraints_lp_format()")
}
invalid_result <- function() {
list(errors = errors())
}
validate_and_run()
}
EDIT 3
validate_and_run() function:
function ()
{
.errors <- list()
add_error <- function(key, value = TRUE) {
.errors[key] <<- value
}
errors <- function() {
.errors
}
validate_and_runner <- function() {
if (exists("validate", parent.frame(), inherits = FALSE))
do.call("validate", list(), envir = parent.frame())
if (exists("test_inner_functions", envir = parent.frame(n = 2),
inherits = FALSE))
get("result", parent.frame())
else if (length(errors()) == 0)
do.call("result", list(), envir = parent.frame())
else do.call("invalid_result", list(), envir = parent.frame())
}
for (n in ls(environment())) assign(n, get(n, environment()),
parent.frame())
do.call("validate_and_runner", list(), envir = parent.frame())
}