I want to debug functions in ShadowCAT package. https://github.com/Karel-Kroeze/ShadowCAT/tree/master/R
Take any internal functions from this package, they are getting called via validate_and_run() function. If I go though it I am directly presented an output and I am not able to run through each line of the code I am interested in. What I think validate_and_run() creating an environment to call the functions.
For e.g. I am trying to debug shadowcat function from the package using following code:
library(devtools)
install_github("Karel-Kroeze/ShadowCAT")
library(ShadowCAT)
debug(shadowcat)
alpha_beta <- simulate_testbank(model = "GPCM", number_items = 100,
number_dimensions = 3, number_itemsteps = 3)
model <- "GPCM"
start_items <- list(type = 'fixed', item_keys = c("item33", "item5", "item23"), n = 3)
stop_test <- list(min_n = 4, max_n = 30, target = c(.1, .1, .1))
estimator <- "maximum_aposteriori"
information_summary <- "posterior_determinant"
prior_form <- "normal"
prior_parameters <- list(mu = c(0, 0, 0), Sigma = diag(3))
# Initial call: get key of first item to adminster
call1 <- shadowcat(answers = NULL, estimate = c(0, 0, 0), variance = as.vector(diag(3) * 25),
model = model, alpha = alpha_beta$alpha, beta = alpha_beta$beta,
start_items = start_items, stop_test = stop_test,
estimator = estimator, information_summary = information_summary,
prior_form = prior_form, prior_parameters = prior_parameters)
In above shadowcat() function there ane many internal functions written but I do not see they are getting called anywhere in the shadowcat(). My speculation is that it is getting called in validate_and_run() function.
My question is how can I debug those internal functions inside the shadowcat() and see what each variable is storing and what are the inputs of the internal functions when they are getting called?
EDIT 1:
In any usual R function, when one debugs it, you can move your debugging cursor (yellow highlighted line) line by line by clicking on next in RStudio. Also, once you have gone over that line of code , you can see the value of the variable by printing the variable name on console. This I am not able to do in shadowcat() function. Internal function codes are written but they are never called in visible form. I need to see where they are getting called and need to debug through them
Any leads appreciated.
EDIT 2
Main body of the code:
function (answers, estimate, variance, model, alpha, beta, start_items,
stop_test, estimator, information_summary, prior_form = NULL,
prior_parameters = NULL, guessing = NULL, eta = NULL, constraints_and_characts = NULL,
lower_bound = NULL, upper_bound = NULL, safe_eap = FALSE,
eap_estimation_procedure = "riemannsum")
{
result <- function() {
switch_to_maximum_aposteriori <- estimator == "maximum_likelihood" &&
!is.null(lower_bound) && !is.null(upper_bound)
estimator <- get_estimator(switch_to_maximum_aposteriori = switch_to_maximum_aposteriori)
prior_form <- get_prior_form(switch_to_maximum_aposteriori = switch_to_maximum_aposteriori)
prior_parameters <- get_prior_parameters(switch_to_maximum_aposteriori = switch_to_maximum_aposteriori)
beta <- get_beta()
guessing <- get_guessing()
number_items <- nrow(alpha)
number_dimensions <- ncol(alpha)
number_itemsteps_per_item <- number_non_missing_cells_per_row(beta)
lp_constraints_and_characts <- get_lp_constraints_and_characts(number_items = number_items)
item_keys <- rownames(alpha)
item_keys_administered <- names(answers)
item_keys_available <- get_item_keys_available(item_keys_administered = item_keys_administered,
item_keys = item_keys)
attr(estimate, "variance") <- matrix(variance, ncol = number_dimensions)
estimate <- update_person_estimate(estimate = estimate,
answers_vector = unlist(answers), item_indices_administered = match(item_keys_administered,
item_keys), number_dimensions = number_dimensions,
alpha = alpha, beta = beta, guessing = guessing,
number_itemsteps_per_item = number_itemsteps_per_item,
estimator = estimator, prior_form = prior_form, prior_parameters = prior_parameters)
continue_test <- !terminate_test(number_answers = length(answers),
estimate = estimate, min_n = stop_test$min_n, max_n = stop_test$max_n,
variance_target = stop_test$target, cutoffs = stop_test$cutoffs)
if (continue_test) {
index_new_item <- get_next_item(start_items = start_items,
information_summary = information_summary, lp_constraints = lp_constraints_and_characts$lp_constraints,
lp_characters = lp_constraints_and_characts$lp_chars,
estimate = estimate, model = model, answers = unlist(answers),
prior_form = prior_form, prior_parameters = prior_parameters,
available = match(item_keys_available, item_keys),
administered = match(item_keys_administered,
item_keys), number_items = number_items, number_dimensions = number_dimensions,
estimator = estimator, alpha = alpha, beta = beta,
guessing = guessing, number_itemsteps_per_item = number_itemsteps_per_item,
stop_test = stop_test, eap_estimation_procedure = eap_estimation_procedure)
key_new_item <- item_keys[index_new_item]
}
else {
key_new_item <- NULL
}
list(key_new_item = as.scalar2(key_new_item), continue_test = as.scalar2(continue_test),
estimate = as.vector(estimate), variance = as.vector(attr(estimate,
"variance")), answers = answers)
}
update_person_estimate <- function(estimate, answers_vector,
item_indices_administered, number_dimensions, alpha,
beta, guessing, number_itemsteps_per_item, estimator,
prior_form, prior_parameters) {
if (length(answers) > start_items$n)
estimate_latent_trait(estimate = estimate, answers = answers_vector,
prior_form = prior_form, prior_parameters = prior_parameters,
model = model, administered = item_indices_administered,
number_dimensions = number_dimensions, estimator = estimator,
alpha = alpha, beta = beta, guessing = guessing,
number_itemsteps_per_item = number_itemsteps_per_item,
safe_eap = safe_eap, eap_estimation_procedure = eap_estimation_procedure)
else estimate
}
get_item_keys_available <- function(item_keys_administered,
item_keys) {
if (is.null(item_keys_administered))
item_keys
else item_keys[-which(item_keys %in% item_keys_administered)]
}
get_beta <- function() {
if (model == "GPCM" && is.null(beta) && !is.null(eta))
row_cumsum(eta)
else beta
}
get_guessing <- function() {
if (is.null(guessing))
matrix(0, nrow = nrow(as.matrix(alpha)), ncol = 1,
dimnames = list(rownames(alpha), NULL))
else guessing
}
get_estimator <- function(switch_to_maximum_aposteriori) {
if (switch_to_maximum_aposteriori)
"maximum_aposteriori"
else estimator
}
get_prior_form <- function(switch_to_maximum_aposteriori) {
if (switch_to_maximum_aposteriori)
"uniform"
else prior_form
}
get_prior_parameters <- function(switch_to_maximum_aposteriori) {
if (switch_to_maximum_aposteriori)
list(lower_bound = lower_bound, upper_bound = upper_bound)
else prior_parameters
}
get_lp_constraints_and_characts <- function(number_items) {
if (is.null(constraints_and_characts))
NULL
else constraints_lp_format(max_n = stop_test$max_n, number_items = number_items,
characteristics = constraints_and_characts$characteristics,
constraints = constraints_and_characts$constraints)
}
validate <- function() {
if (is.null(estimate))
return(add_error("estimate", "is missing"))
if (is.null(variance))
return(add_error("variance", "is missing"))
if (!is.vector(variance))
return(add_error("variance", "should be entered as vector"))
if (sqrt(length(variance)) != round(sqrt(length(variance))))
return(add_error("variance", "should be a covariance matrix turned into a vector"))
if (is.null(model))
return(add_error("model", "is missing"))
if (is.null(alpha))
return(add_error("alpha", "is missing"))
if (is.null(start_items))
return(add_error("start_items", "is missing"))
if (is.null(stop_test))
return(add_error("stop_test", "is missing"))
if (is.null(estimator))
return(add_error("estimator", "is missing"))
if (is.null(information_summary))
return(add_error("information_summary", "is missing"))
if (!is.matrix(alpha) || is.null(rownames(alpha)))
return(add_error("alpha", "should be a matrix with item keys as row names"))
if (!is.null(beta) && (!is.matrix(beta) || is.null(rownames(beta))))
return(add_error("beta", "should be a matrix with item keys as row names"))
if (!is.null(eta) && (!is.matrix(eta) || is.null(rownames(eta))))
return(add_error("eta", "should be a matrix with item keys as row names"))
if (!is.null(guessing) && (!is.matrix(guessing) || ncol(guessing) !=
1 || is.null(rownames(guessing))))
return(add_error("guessing", "should be a single column matrix with item keys as row names"))
if (!is.null(start_items$type) && start_items$type ==
"random_by_dimension" && length(start_items$n_by_dimension) %not_in%
c(1, length(estimate)))
return(add_error("start_items", "length of n_by_dimension should be a scalar or vector of the length of estimate"))
if (!row_names_are_equal(rownames(alpha), list(alpha,
beta, eta, guessing)))
add_error("alpha_beta_eta_guessing", "should have equal row names, in same order")
if (!is.null(beta) && !na_only_end_rows(beta))
add_error("beta", "can only contain NA at the end of rows, no values allowed after an NA in a row")
if (!is.null(eta) && !na_only_end_rows(eta))
add_error("eta", "can only contain NA at the end of rows, no values allowed after an NA in a row")
if (length(estimate) != ncol(alpha))
add_error("estimate", "length should be equal to the number of columns of the alpha matrix")
if (length(estimate)^2 != length(variance))
add_error("variance", "should have a length equal to the length of estimate squared")
if (is.null(answers) && !is.positive.definite(matrix(variance,
ncol = sqrt(length(variance)))))
add_error("variance", "matrix is not positive definite")
if (model %not_in% c("3PLM", "GPCM", "SM", "GRM"))
add_error("model", "of unknown type")
if (model != "GPCM" && is.null(beta))
add_error("beta", "is missing")
if (model == "GPCM" && is.null(beta) && is.null(eta))
add_error("beta_and_eta", "are both missing; define at least one of them")
if (model == "GPCM" && !is.null(beta) && !is.null(eta) &&
!all(row_cumsum(eta) == beta))
add_error("beta_and_eta", "objects do not match")
if (estimator != "maximum_likelihood" && is.null(prior_form))
add_error("prior_form", "is missing")
if (estimator != "maximum_likelihood" && is.null(prior_parameters))
add_error("prior_parameters", "is missing")
if (!is.null(prior_form) && prior_form %not_in% c("normal",
"uniform"))
add_error("prior_form", "of unknown type")
if (!is.null(prior_form) && !is.null(prior_parameters) &&
prior_form == "uniform" && (is.null(prior_parameters$lower_bound) ||
is.null(prior_parameters$upper_bound)))
add_error("prior_form_is_uniform", "so prior_parameters should contain lower_bound and upper_bound")
if (!is.null(prior_form) && !is.null(prior_parameters) &&
prior_form == "normal" && (is.null(prior_parameters$mu) ||
is.null(prior_parameters$Sigma)))
add_error("prior_form_is_normal", "so prior_parameters should contain mu and Sigma")
if (!is.null(prior_parameters$mu) && length(prior_parameters$mu) !=
length(estimate))
add_error("prior_parameters_mu", "should have same length as estimate")
if (!is.null(prior_parameters$Sigma) && (!is.matrix(prior_parameters$Sigma) ||
!all(dim(prior_parameters$Sigma) == c(length(estimate),
length(estimate))) || !is.positive.definite(prior_parameters$Sigma)))
add_error("prior_parameters_sigma", "should be a square positive definite matrix, with dimensions equal to the length of estimate")
if (!is.null(prior_parameters$lower_bound) && !is.null(prior_parameters$upper_bound) &&
(length(prior_parameters$lower_bound) != length(estimate) ||
length(prior_parameters$upper_bound) != length(estimate)))
add_error("prior_parameters_bounds", "should contain lower and upper bound of the same length as estimate")
if (is.null(stop_test$max_n))
add_error("stop_test", "contains no max_n")
if (!is.null(stop_test$max_n) && stop_test$max_n > nrow(alpha))
add_error("stop_test_max_n", "is larger than the number of items in the item bank")
if (!is.null(stop_test$max_n) && !is.null(stop_test$cutoffs) &&
(!is.matrix(stop_test$cutoffs) || nrow(stop_test$cutoffs) <
stop_test$max_n || ncol(stop_test$cutoffs) !=
length(estimate) || any(is.na(stop_test$cutoffs))))
add_error("stop_test_cutoffs", "should be a matrix without missing values, and number of rows equal to max_n and number of columns equal to the number of dimensions")
if (start_items$n == 0 && information_summary == "posterior_expected_kullback_leibler")
add_error("start_items", "requires n > 0 for posterior expected kullback leibler information summary")
if (!is.null(start_items$type) && start_items$type ==
"random_by_dimension" && length(start_items$n_by_dimension) ==
length(estimate) && start_items$n != sum(start_items$n_by_dimension))
add_error("start_items_n", "contains inconsistent information. Total length of start phase and sum of length per dimension do not match (n != sum(n_by_dimension)")
if (!is.null(start_items$type) && start_items$type ==
"random_by_dimension" && length(start_items$n_by_dimension) ==
1 && start_items$n != sum(rep(start_items$n_by_dimension,
length(estimate))))
add_error("start_items_n", "contains inconsistent information. Total length of start phase and sum of length per dimension do not match")
if (!is.null(stop_test$cutoffs) && !is.matrix(stop_test$cutoffs))
add_error("stop_test", "contains cutoff values in non-matrix format")
if (!all(names(answers) %in% rownames(alpha)))
add_error("answers", "contains non-existing key")
if (estimator %not_in% c("maximum_likelihood", "maximum_aposteriori",
"expected_aposteriori"))
add_error("estimator", "of unknown type")
if (information_summary %not_in% c("determinant", "posterior_determinant",
"trace", "posterior_trace", "posterior_expected_kullback_leibler"))
add_error("information_summary", "of unknown type")
if (estimator == "maximum_likelihood" && information_summary %in%
c("posterior_determinant", "posterior_trace", "posterior_expected_kullback_leibler"))
add_error("estimator_is_maximum_likelihood", "so using a posterior information summary makes no sense")
if (estimator != "maximum_likelihood" && (!is.null(lower_bound) ||
!is.null(upper_bound)))
add_error("bounds", "can only be defined if estimator is maximum likelihood")
if (!is.null(lower_bound) && length(lower_bound) %not_in%
c(1, length(estimate)))
add_error("lower_bound", "length of lower bound should be a scalar or vector of the length of estimate")
if (!is.null(upper_bound) && length(upper_bound) %not_in%
c(1, length(estimate)))
add_error("upper_bound", "length of upper bound should be a scalar or vector of the length of estimate")
if (!no_missing_information(constraints_and_characts$characteristics,
constraints_and_characts$constraints))
add_error("constraints_and_characts", "constraints and characteristics should either be defined both or not at all")
if (!characteristics_correct_format(constraints_and_characts$characteristics,
number_items = nrow(alpha)))
add_error("characteristics", "should be a data frame with number of rows equal to the number of items in the item bank")
if (!constraints_correct_structure(constraints_and_characts$constraints))
add_error("constraints_structure", "should be a list of length three lists, with elements named 'name', 'op', 'target'")
if (!constraints_correct_names(constraints_and_characts$constraints,
constraints_and_characts$characteristics))
add_error("constraints_name_elements", "should be defined as described in the details section of constraints_lp_format()")
if (!constraints_correct_operators(constraints_and_characts$constraints))
add_error("constraints_operator_elements", "should be defined as described in the details section of constraints_lp_format()")
if (!constraints_correct_targets(constraints_and_characts$constraints))
add_error("constraints_target_elements", "should be defined as described in the details section of constraints_lp_format()")
}
invalid_result <- function() {
list(errors = errors())
}
validate_and_run()
}
EDIT 3
validate_and_run() function:
function ()
{
.errors <- list()
add_error <- function(key, value = TRUE) {
.errors[key] <<- value
}
errors <- function() {
.errors
}
validate_and_runner <- function() {
if (exists("validate", parent.frame(), inherits = FALSE))
do.call("validate", list(), envir = parent.frame())
if (exists("test_inner_functions", envir = parent.frame(n = 2),
inherits = FALSE))
get("result", parent.frame())
else if (length(errors()) == 0)
do.call("result", list(), envir = parent.frame())
else do.call("invalid_result", list(), envir = parent.frame())
}
for (n in ls(environment())) assign(n, get(n, environment()),
parent.frame())
do.call("validate_and_runner", list(), envir = parent.frame())
}
RStudio provides a nice function View (with uppercase V) to take a look into the data, but with R it's still nasty to get orientation in a large data set. The most common options are...
names(df)
str(df)
If you're coming from SPSS, R seems like a downgrade in this respect. I wondered whether there is a more user-friendly option? I did not find a ready-one, so I'd like to share my solution with you.
Using RStudio's built-in function View, it's white simple to have a variable listing for a data.frame similar to the one in SPSS. This function creates a new data.frame with the variable information and displays in the RStudio GUI via View.
# Better variables view
Varlist = function(sia) {
# Init varlist output
varlist = data.frame(row.names = names(sia))
varlist[["comment"]] = NA
varlist[["type"]] = NA
varlist[["values"]] = NA
varlist[["NAs"]] = NA
# Fill with meta information
for (var in names(sia)) {
if (!is.null(comment(sia[[var]]))) {
varlist[[var, "comment"]] = comment(sia[[var]])
}
varlist[[var, "NAs"]] = sum(is.na(sia[[var]]))
if (is.factor(sia[[var]])) {
varlist[[var, "type"]] = "factor"
varlist[[var, "values"]] = paste(levels(sia[[var]]), collapse=", ")
} else if (is.character(sia[[var]])) {
varlist[[var, "type"]] = "character"
} else if (is.logical(sia[[var]])) {
varlist[[var, "type"]] = "logical"
n = sum(!is.na(sia[[var]]))
if (n > 0) {
varlist[[var, "values"]] = paste(round(sum(sia[[var]], na.rm=T) / n * 100), "% TRUE", sep="")
}
} else if (is.numeric(sia[[var]])) {
varlist[[var, "type"]] = typeof(sia[[var]])
n = sum(!is.na(sia[[var]]))
if (n > 0) {
varlist[[var, "values"]] = paste(min(sia[[var]], na.rm=T), "...", max(sia[[var]], na.rm=T))
}
} else {
varlist[[var, "type"]] = typeof(sia[[var]])
}
}
View(varlist)
}
My recommendation is to store that as a file (e.g., Varlist.R) and whever you need it, just type:
source("Varlist.R")
Varlist(df)
Again please take note of the uppercase V using as function name.
Limitation: When working with data.frame, the listing will not be updated unless Varlist(df) is run again.
Note: R has a built-in option to view data with print. If working with pure R, just replace the View(varlist) by print(varlist). Yet, depending on screen size, Hmisc::describe() could be a better option for the console.
If you find a bug in a package, it's usually possible to patch the problem with fixInNamespace, e.g. fixInNamespace("mean.default", "base").
For S4 methods, I'm not sure how to do it though. The method I'm looking at is in the gWidgetstcltk package. You can see the source code with
getMethod(".svalue", c("gTabletcltk", "guiWidgetsToolkittcltk"))
I can't find the methods with fixInNamespace.
fixInNamespace(".svalue", "gWidgetstcltk")
Error in get(subx, envir = ns, inherits = FALSE) :
object '.svalue' not found
I thought setMethod might do the trick, but
setMethod(".svalue", c("gTabletcltk", "guiWidgetsToolkittcltk"),
definition = function (obj, toolkit, index = NULL, drop = NULL, ...)
{
widget = getWidget(obj)
sel <- unlist(strsplit(tclvalue(tcl(widget, "selection")),
" "))
if (length(sel) == 0) {
return(NA)
}
theChildren <- .allChildren(widget)
indices <- sapply(sel, function(i) match(i, theChildren))
inds <- which(visible(obj))[indices]
if (!is.null(index) && index == TRUE) {
return(inds)
}
if (missing(drop) || is.null(drop))
drop = TRUE
chosencol <- tag(obj, "chosencol")
if (drop)
return(obj[inds, chosencol, drop = drop])
else return(obj[inds, ])
},
where = "package:gWidgetstcltk"
)
Error in setMethod(".svalue", c("gTabletcltk", "guiWidgetsToolkittcltk"), :
the environment "gWidgetstcltk" is locked; cannot assign methods for function ".svalue"
Any ideas?
How about the old-school way of getting the source, applying the change and rebuilding?
you can first get the generic out, and then fix the generic by setMethod in your global environment, and then assign it back to that namespace
.svalue <- gWidgetstcltk:::.svalue
setMethod(".svalue", c("gTabletcltk", "guiWidgetsToolkittcltk"),
definition = function (obj, toolkit, index = NULL, drop = NULL, ...)
{
widget = getWidget(obj)
sel <- unlist(strsplit(tclvalue(tcl(widget, "selection")),
" "))
if (length(sel) == 0) {
return(NA)
}
theChildren <- .allChildren(widget)
indices <- sapply(sel, function(i) match(i, theChildren))
inds <- which(visible(obj))[indices]
if (!is.null(index) && index == TRUE) {
return(inds)
}
if (missing(drop) || is.null(drop))
drop = TRUE
chosencol <- tag(obj, "chosencol")
if (drop)
return(obj[inds, chosencol, drop = drop])
else return(obj[inds, ])
}#,
#where = "package:gWidgetstcltk"
)
assignInNamespace(".svalue", .svalue, ns = "gWidgetstcltk")