Finding Outliers and ChiSquare Matrix of a purely Categorical Dataset - r

I have been assigned the task of making a prediction model. The data set given to me is purely categorical and consists of 92 variables. A portion of it is given below:
Dataset <- structure(list(Age.Group = structure(c(1L, 2L, 3L, 3L, 4L, 4L,
4L, 1L, 4L, 4L, 2L, 1L, 2L, 5L, 3L, 2L, 1L, 4L, 1L, 4L, 4L, 3L,
4L, 2L, 2L, 1L, 4L, 2L, 3L, 2L, 4L, 4L, 3L, 3L, 3L, 3L, 5L, 3L,
2L, 2L, 2L, 2L, 4L, 2L, 3L, 4L, 3L, 3L, 1L, 4L), .Label = c("1",
"2", "3", "4", "5"), class = "factor"), Sex = structure(c(2L,
2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L,
2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L,
1L), .Label = c("Female", "Male"), class = "factor"), LOS = structure(c(2L,
2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L,
2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L,
2L), .Label = c("Abnormal", "Normal"), class = "factor"), Day.to.Operation = structure(c(1L,
2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L,
1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L,
1L), .Label = c("Abnormal", "Normal"), class = "factor"), Admit.Source = structure(c(2L,
2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("Emergency", "Outpatient clinic"), class = "factor"),
Insurance.Payors = structure(c(3L, 1L, 3L, 3L, 1L, 1L, 1L,
3L, 1L, 3L, 1L, 3L, 1L, 1L, 5L, 1L, 1L, 2L, 1L, 5L, 1L, 5L,
1L, 3L, 1L, 3L, 1L, 1L, 1L, 3L, 3L, 5L, 1L, 1L, 1L, 5L, 5L,
1L, 1L, 1L, 1L, 1L, 3L, 5L, 1L, 1L, 1L, 1L, 3L, 4L), .Label = c("Basic medical insurance for urban residents",
"Basic medical insurance for urban residents Others", "Free Medical Care",
"New Rural Cooperative Medical Care", "Self payment"), class = "factor"),
Current.Recent.Smoker...1.year. = structure(c(1L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L,
1L, 2L), .Label = c("No", "Yes"), class = "factor"), Hypertension = structure(c(1L,
1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L,
2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L,
2L, 2L, 1L, 2L), .Label = c("No", "Yes"), class = "factor"),
Dyslipidemia = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L,
2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L), .Label = c("No",
"Yes"), class = "factor"), Family.History.of.Premature.CAD = structure(c(2L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 1L, 2L), .Label = c("No", "Yes"), class = "factor"),
MI.History = structure(c(1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L,
1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L), .Label = c("No",
"Yes"), class = "factor"), Heart.Failure.History = structure(c(1L,
2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"),
PCI.History = structure(c(2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L), .Label = c("No",
"Yes"), class = "factor"), BMI.Group = structure(c(3L, 2L,
3L, 2L, 3L, 1L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 3L, 2L, 3L, 3L,
3L, 3L, 3L, 4L, 2L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 4L, 2L, 3L, 3L, 3L, 2L, 3L, 2L, 3L,
3L, 4L, 2L), .Label = c("2", "3", "4", "5"), class = "factor"),
Cerebrovascular.Disease = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("No", "Yes"), class = "factor"), Peripheral.Arterial.Disease = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"),
Chronic.Lung.Disease = structure(c(1L, 1L, 1L, 1L, 1L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No",
"Yes"), class = "factor"), Diabetes.Mellitus = structure(c(2L,
1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 1L, 2L, 1L), .Label = c("No", "Yes"), class = "factor"),
Diabetes.Therapy = structure(c(4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 3L, 4L, 2L, 4L, 4L, 1L, 2L, 4L, 4L, 4L, 2L, 2L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 4L,
2L, 4L, 4L, 4L, 4L, 2L, 4L, 2L, 4L, 4L, 4L, 4L, 2L), .Label = c("Diet",
"Insulin", "N/A", "Oral"), class = "factor"), Heart.Rate = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 2L), .Label = c("Abnormal", "Normal"), class = "factor"),
CAD.Presentation = structure(c(3L, 5L, 5L, 4L, 5L, 5L, 4L,
1L, 5L, 5L, 5L, 5L, 4L, 4L, 5L, 1L, 5L, 5L, 5L, 3L, 5L, 5L,
5L, 1L, 5L, 5L, 5L, 5L, 5L, 3L, 4L, 1L, 5L, 5L, 5L, 5L, 3L,
5L, 4L, 3L, 5L, 4L, 5L, 5L, 2L, 5L, 5L, 3L, 1L, 1L), .Label = c("Non STEMI 7 days",
"Silent myocardial ischemia 14 days", "Stable angina 42 days",
"STEMI 7 days", "Unstable angina 60 days"), class = "factor"),
STEMI.Non.STEMI.Onset.Date = structure(c(1L, 1L, 2L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L,
1L), .Label = c("0", "1", "17"), class = "factor"), STEMI.Non.STEMI.Estimated.Time = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"),
Anginal.Classification.w.in.2.Weeks = structure(c(2L, 4L,
3L, 5L, 1L, 5L, 4L, 1L, 5L, 4L, 5L, 2L, 2L, 3L, 1L, 1L, 2L,
5L, 5L, 3L, 2L, 5L, 2L, 2L, 2L, 4L, 1L, 2L, 3L, 5L, 2L, 4L,
3L, 5L, 4L, 4L, 5L, 2L, 1L, 3L, 2L, 1L, 3L, 1L, 5L, 2L, 3L,
2L, 1L, 2L), .Label = c("CCS I", "CCS II", "CCS III", "CCS IV",
"No symptoms"), class = "factor"), Anti.Anginal.Drug.Therapy.within.2.Weeks = structure(c(2L,
1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L,
2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L,
1L, 2L, 2L, 2L), .Label = c("No", "Yes"), class = "factor")), .Names = c("Age.Group",
"Sex", "LOS", "Day.to.Operation", "Admit.Source", "Insurance.Payors",
"Current.Recent.Smoker...1.year.", "Hypertension", "Dyslipidemia",
"Family.History.of.Premature.CAD", "MI.History", "Heart.Failure.History",
"PCI.History", "BMI.Group", "Cerebrovascular.Disease", "Peripheral.Arterial.Disease",
"Chronic.Lung.Disease", "Diabetes.Mellitus", "Diabetes.Therapy",
"Heart.Rate", "CAD.Presentation", "STEMI.Non.STEMI.Onset.Date",
"STEMI.Non.STEMI.Estimated.Time", "Anginal.Classification.w.in.2.Weeks",
"Anti.Anginal.Drug.Therapy.within.2.Weeks"), class = "data.frame", row.names = c(NA,
-50L))
I have performed the string cleaning and missing data treatment as of now. I need help in my next task which is to remove outliers and compute a chi square matrix from this categorical dataset. I am new to data analysis and am quite confused at this point. I would be extremely grateful if I could get help regarding this.

Related

How to randomly change 50% of a column observations based on another column condition?

I'm analyzing a survey and I need to do an interaction.plot() between variable disclosure_1 and TYPE_1 to see how they affect a third variable ADTRUST.
The survey randomly showed a different scenario to each participant. DISCLOSURE_1 is the code to indicate the type of disclosure that was shown to a respondent (B = Before, D = During, A = After, N = None).
TYPE_1 indicates the terminology used (DF = Deepfake, SM = Sythetic Media).
When creating the survey I dumbly only used DF for N because I thought there was no need to create an SM scenario if there was no disclosure shown (so no difference in terminology used). It still makes no sense logically, but when plotting the interaction plot the variable N does not appear. And since the study wants to analyze:
how disclosure impacts ADTRUST
how disclosure positioning impacts ADTRUST
how different terminology used in disclosure impact ADTRUST
I need to randomly substitute 50% of the observations with SM instead of DF under TYPE_1 but ONLY if the column DISCLOSURE_1 is == N.
I have no clue how to do that. Could somebody please help?
NOTE!!!! The structure is part of a bigger dataset. the dput was only done only for [25:26], so keep in mind I need to be able to precisely select the columns in the code.
Thank youu
structure(list(DISCLOSURE_1 = structure(c(4L, 3L, 1L, 3L, 1L,
1L, 4L, 3L, 4L, 3L, 4L, 1L, 3L, 3L, 4L, 1L, 3L, 3L, 1L, 3L, 3L,
4L, 2L, 1L, 1L, 4L, 2L, 3L, 3L, 1L, 4L, 1L, 1L, 4L, 4L, 1L, 3L,
2L, 2L, 1L, 4L, 1L, 1L, 1L, 4L, 3L, 4L, 3L, 2L, 3L, 1L, 1L, 3L,
1L, 1L, 2L, 1L, 2L, 2L, 2L, 3L, 2L, 1L, 3L, 3L, 3L, 3L, 2L, 2L,
3L, 3L, 2L, 1L, 2L, 3L, 1L, 2L, 2L, 1L, 1L, 1L, 3L, 2L, 2L, 3L,
3L, 2L, 3L, 3L, 2L, 1L, 3L, 1L, 1L, 4L, 4L, 1L, 2L, 4L, 3L, 1L,
1L, 1L, 3L, 2L, 3L, 1L, 2L, 2L, 3L, 4L, 2L, 4L, 2L, 3L, 3L, 2L,
4L, 4L, 3L, 4L, 1L, 1L, 3L, 3L, 1L, 3L, 2L, 3L, 3L, 1L, 2L, 1L,
4L, 1L, 2L, 3L, 3L, 1L, 4L, 2L, 3L, 2L, 1L, 1L, 2L, 2L, 1L, 4L,
2L, 4L, 1L, 4L, 2L, 1L, 1L, 1L, 3L, 2L, 3L, 2L, 4L, 3L, 1L, 3L,
1L, 1L, 3L, 2L, 3L, 2L, 4L, 3L, 3L, 1L, 1L, 3L, 2L, 2L, 1L, 1L,
3L, 3L, 4L, 2L, 2L, 3L, 3L, 1L, 2L, 1L, 2L, 3L, 2L, 3L, 3L, 3L,
3L, 4L, 3L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 4L, 1L, 2L, 2L, 3L, 3L,
1L, 4L, 4L, 1L, 2L, 4L, 2L, 1L, 1L, 4L, 1L, 1L, 2L, 1L, 2L, 2L,
3L, 3L, 3L, 3L, 2L, 1L, 4L, 1L, 1L, 2L, 2L, 4L, 2L, 3L, 1L, 2L,
3L, 3L, 2L, 4L, 3L, 2L, 2L, 4L, 4L, 2L, 1L, 1L, 2L, 2L, 3L, 1L,
1L, 4L, 3L, 1L, 3L, 3L, 3L, 2L, 2L, 2L, 4L, 2L, 4L, 4L, 4L, 1L,
3L, 1L, 3L, 1L, 3L, 1L, 2L, 4L, 3L, 2L, 2L, 2L, 1L, 2L, 2L, 3L,
2L, 1L, 2L, 4L, 1L, 2L, 3L, 2L, 1L, 2L, 4L, 4L, 2L, 3L, 2L, 1L,
2L, 2L, 2L, 4L, 2L, 1L, 3L, 1L, 3L, 3L, 4L, 1L, 2L, 3L, 2L, 2L,
3L, 4L, 3L, 2L, 3L, 3L, 2L, 1L, 1L, 3L, 3L, 3L, 3L, 1L, 2L, 2L,
1L, 2L, 2L, 1L, 1L, 3L, 3L, 1L, 2L, 1L, 1L, 3L, 2L, 1L, 2L, 3L,
2L, 3L, 1L, 2L, 3L, 4L, 2L, 1L, 1L, 3L), .Label = c("A", "B",
"D", "N"), class = "factor"), TYPE_1 = structure(c(1L, 1L, 2L,
1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L,
2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L,
2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L,
2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L,
1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L,
1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L,
2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L,
2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L,
2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L,
1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L,
2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L,
1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L,
2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L), .Label = c("DF",
"SM"), class = "factor")), row.names = c(NA, -367L), class = c("tbl_df",
"tbl", "data.frame"))
With data as your data.frame, this will replace exactly half (rounded down) of the N's with DF with SM:
blnN <- data$DISCLOSURE_1 == "N" & data$TYPE_1 == "DF"
data$TYPE_1[sample(which(blnN), sum(blnN)/2)] <- "SM"
If the 50% requirement is approximate, you can use runif() > 0.5
library(dplyr)
table(df)
TYPE_1
DISCLOSURE_1 DF SM
A 52 51
B 52 53
D 55 51
N 53 0
mut <- df |>
mutate(TYPE_1 = ifelse(DISCLOSURE_1 == "N" &
TYPE_1 == "DF" &
runif(n()) > 0.5,
"SM",
as.character(TYPE_1)))
table(mut)
TYPE_1
DISCLOSURE_1 DF SM
A 52 51
B 52 53
D 55 51
N 27 26

Using the likert package to plot two or multiple groups in R

I'm working with the likert package and I can produce grouped results. However, I can't figure out how can I work with two (or multiple) groups, i.e., grouping the results by two columns (sex and country)
In the wrong format, something like that
likert(ds[,3:7], grouping=ds[,1:2]) %>%
plot()
or:
likert(ds[,3:7], grouping=ds[,1]) %>%
plot() +
facet_wrap(~country)
The desired result can be this one
Or even better something like this one
Data and codes are below
ds <-structure(list(sex_female = structure(c(1L, 2L, 2L, 2L, 1L, 2L,
1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L,
2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L,
1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L), .Label = c("male",
"female"), class = "factor"), country = structure(c(2L, 2L, 2L,
1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L,
2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L,
2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L,
2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L,
2L), .Label = c("br", "sp"), class = "factor"), eat1_c = structure(c(1L,
1L, 1L, 3L, 4L, 4L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 4L,
2L, 1L, 2L, 1L, 1L, 1L, 4L, 4L, 1L, 2L, 3L, 1L, 2L, 4L, 1L, 2L,
1L, 2L, 1L, 1L, 2L, 3L, 1L, 3L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 4L,
1L, 1L, 2L, 1L, 4L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 4L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
4L, 3L, 3L, 4L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 3L, 2L, 3L, 1L, 1L,
1L, 1L, 3L), .Label = c("0", "1", "2", "3"), class = "factor"),
eat2_c = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 4L, 1L, 1L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L,
NA, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, NA, 3L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
1L), .Label = c("0", "1", "2", "3"), class = "factor"), eat3_c = structure(c(2L,
3L, 3L, 4L, 4L, NA, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, NA, 2L,
1L, 1L, 3L, 2L, 3L, 1L, 1L, 3L, NA, 2L, 1L, 2L, 3L, 1L, 2L,
1L, 3L, 1L, 2L, 1L, 1L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, NA, 3L, 1L, 1L, 4L, 1L, 1L, 2L, 1L, 1L, 1L,
3L, 1L, 4L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 4L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 1L, 4L, 3L, 1L, 4L, 3L, 2L, 1L, 1L, 2L,
1L, 3L, 1L, 1L, 2L, 1L, 2L, 1L, 1L), .Label = c("0", "1",
"2", "3"), class = "factor"), eat4_c = structure(c(1L, 1L,
1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 3L, 1L, 1L, 1L, 4L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 3L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L,
1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 3L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 4L, 1L, 1L, 1L,
3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("0", "1", "2",
"3"), class = "factor"), eat5_c = structure(c(2L, 1L, 1L,
1L, 2L, 3L, 1L, 4L, 1L, 1L, 3L, 1L, 4L, 2L, 3L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 1L, 3L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 3L, 3L,
1L, 1L, 3L, 1L, 3L, 4L, 1L, 3L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 3L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 3L, 1L, 2L, 2L, 1L, 1L, 1L,
1L, 4L, 2L, 2L, 3L, 3L, 3L, 1L, 3L, 1L, 4L, 2L, 2L, 1L, 1L,
1L, 1L, 3L, 1L, 4L, 1L, 1L), .Label = c("0", "1", "2", "3"
), class = "factor")), row.names = c(NA, -100L), class = "data.frame")
library(likert)
likert(ds[,3:7], grouping=ds[,1]) %>%
plot()

Getting specific combination of interaction as variable in logistic regression with R

I have this dataset and want to perform a regression analysis on it. I have to predictive variables urban_rural and religious. Now I want to have two specific interaction variables: 1.) Urban/not religious and 2.) Rural/religious. I know that interaction is possible through the sign *, but this does not give me the desired combination of interaction. I guess one has to set the reference variable manually?
structure(list(urban_rural = structure(c(1L, 1L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L,
1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = c("Urban", "Rural", "Refugee camp"
), class = "factor"), religious = structure(c(2L, 1L, 2L, 2L,
3L, 2L, 2L, 3L, 1L, 3L, 3L, 1L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 2L, 3L, 2L, 2L, 2L, 3L, 3L, 3L,
2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 3L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 2L, 2L, 2L, 2L,
2L, 2L, 3L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 1L, 3L, 1L, 2L, 2L, 2L,
1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L,
2L, 1L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 3L, 2L, 2L, 3L,
2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 3L, 2L, 1L, 3L, 1L, 2L, 3L, 2L,
2L, 1L, 2L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 3L, 2L,
3L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 3L, 2L, 2L, 3L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L,
1L, 2L, 2L, 2L, 2L, 3L, 2L, 3L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L,
2L, 2L, 3L, 2L, 3L, 1L), .Label = c("Religious", "Somewhat religious",
"Not religious"), class = "factor"), family_role_recoded = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L,
1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L,
2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L,
1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L,
2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L,
1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L), .Label = c("Agree/strongly agree",
"Disagree/strongly disagree", "Don't know"), class = "factor")), row.names = c(NA,
250L), class = "data.frame")
I used these regression models:
model1 <- glm(family_role_recoded ~ urban_rural,
family=binomial(link='logit'),
subset = (family_role_recoded != "Don't know" & urban_rural != "Refugee camp"),
data=dataset)
model2 <- glm(family_role_recoded ~ religious,
family=binomial(link='logit'),
subset = (family_role_recoded != "Don't know" & urban_rural != "Refugee camp"),
data=dataset)
model3 <- glm(family_role_recoded ~ urban_rural + religious,
family=binomial(link='logit'),
subset = (family_role_recoded != "Don't know" & urban_rural != "Refugee camp"),
data=dataset)
Does anyone have an idea how to solve this problem?
If you set the reference for religious to be "Somewhat religious" first. We can look at the results first :
library(broom)
dataset$religious = relevel(dataset$religious,ref="Somewhat religious")
fit0 = glm(family_role_recoded ~ urban_rural*religious,data=dataset,family=binomial())
# A tibble: 6 x 5
term estimate std.error statistic p.value
<chr> <dbl> <dbl> <dbl> <dbl>
1 (Intercept) -0.902 0.181 -4.99 6.03e-7
2 urban_ruralRural -0.484 0.532 -0.910 3.63e-1
3 religiousReligious -0.0141 0.456 -0.0308 9.75e-1
4 religiousNot religious 1.47 0.391 3.76 1.67e-4
5 urban_ruralRural:religiousReligious 0.995 1.14 0.876 3.81e-1
6 urban_ruralRural:religiousNot religio… 0.201 0.993 0.203 8.39e-1
You have one of the terms rural/religious. Intuitively, the Urban/Not religious term would be the flip of urban_ruralRural:religiousNot religio. We can also manually define the interaction terms we need:
dataset$Rural_religious = with(dataset,as.numeric(urban_rural=="Rural" & religious=="Religious"))
dataset$Urban_not_religious = with(dataset,as.numeric(urban_rural=="Urban" & religious=="Not religious"))
fit = glm(family_role_recoded ~ 0+urban_rural+religious+Urban_not_religious+Rural_religious,data=dataset,family=binomial())
tidy(fit)
# A tibble: 6 x 5
term estimate std.error statistic p.value
<chr> <dbl> <dbl> <dbl> <dbl>
1 urban_ruralUrban -0.902 0.181 -4.99 0.000000603
2 urban_ruralRural -1.39 0.500 -2.77 0.00556
3 religiousReligious -0.0141 0.456 -0.0308 0.975
4 religiousNot religious 1.67 0.913 1.83 0.0667
5 Urban_not_religious -0.201 0.993 -0.203 0.839
6 Rural_religious 0.995 1.14 0.876 0.381
You need to do a post hoc test. For that you can use the R package "emmeans"

bestglm : Error in levels(x)[x] : only 0's may be mixed with negative subscripts

was trying to use bestglm function via the AIC method to come up with a logistic regression model.
The following is a summary of the data set I ran it on:
dataset summary
The following is the line I ran:
best1 <- bestglm(trainset, IC="AIC", family=binomial)
The following is the error message I have received:
Error in levels(x)[x] : only 0's may be mixed with negative subscripts
In addition: Warning messages:
1: In model.response(mf, "numeric") :
using type = "numeric" with a factor response will be ignored
2: In Ops.factor(y, z$residuals) : ‘-’ not meaningful for factors
dput(testset)
structure(list(EyeContact = structure(c(2L, 1L, 2L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L,
1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L,
2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L,
2L), .Label = c("N", "Y"), class = "factor"), Post.Processing = structure(c(2L,
2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L,
1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L), .Label = c("N", "Y"), class = "factor"),
HairColour = structure(c(3L, 2L, 2L, 2L, 2L, 2L, 4L, 2L,
1L, 3L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 2L, 2L, 2L, 2L,
1L, 1L, 4L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L,
2L, 4L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 4L, 1L,
2L, 1L, 1L, 2L, 2L, 3L, 3L, 1L, 2L, 1L, 4L, 2L, 2L, 1L, 1L,
4L, 1L, 2L, 3L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 4L,
1L, 2L, 1L, 1L, 4L), .Label = c("BL", "BR", "NULL", "O"), class = "factor"),
Animals = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = c("N", "Y"), class = "factor"),
Age = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L), .Label = c("21", "22", "23"), class = "factor"),
Backview = structure(c(1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = c("N", "Y"), class = "factor"),
SkinTone = structure(c(3L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L,
2L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 3L, 3L, 2L, 2L, 1L,
2L, 2L, 2L, 2L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
2L, 2L, 3L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L,
2L, 1L, 1L, 2L), .Label = c("Dark", "Fair", "NULL"), class = "factor"),
Smile = structure(c(5L, 3L, 1L, 1L, 5L, 4L, 1L, 1L, 5L, 1L,
4L, 4L, 1L, 1L, 4L, 3L, 1L, 2L, 2L, 1L, 4L, 3L, 5L, 5L, 1L,
3L, 1L, 5L, 5L, 2L, 5L, 1L, 2L, 5L, 1L, 2L, 2L, 1L, 4L, 5L,
5L, 4L, 3L, 5L, 2L, 4L, 2L, 3L, 5L, 3L, 5L, 4L, 1L, 5L, 5L,
4L, 5L, 5L, 5L, 1L, 5L, 2L, 2L, 1L, 5L, 5L, 3L, 5L, 4L, 4L,
5L, 4L, 1L, 3L, 2L, 1L, 1L, 5L, 4L, 5L, 4L, 5L, 5L, 1L, 2L,
4L, 3L, 5L, 5L, 1L, 5L, 1L, 4L, 1L, 4L, 5L, 1L, 5L, 4L, 4L,
5L, 5L, 1L), .Label = c("CS", "NS", "NULL", "O", "ST"), class = "factor"),
HairLength = structure(c(1L, 3L, 2L, 2L, 2L, 1L, 3L, 3L,
1L, 2L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 3L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 2L, 1L, 1L, 1L, 1L, 3L, 1L,
1L, 3L, 3L, 1L, 2L, 3L, 3L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 3L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 3L, 1L, 3L, 2L, 1L, 1L, 1L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 3L, 3L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 1L, 1L, 1L, 3L), .Label = c("L", "NULL", "SM"), class = "factor"),
HairTexture = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 1L, 3L, 3L, 3L, 1L,
3L, 3L, 1L, 3L, 3L, 3L, 3L, 1L, 2L, 1L, 3L, 1L, 3L, 1L, 3L,
1L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 3L, 1L, 3L, 2L, 1L, 3L,
3L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 2L,
1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 3L, 1L,
1L, 3L, 3L, 2L, 1L, 3L, 1L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 1L,
1L, 3L, 3L, 3L, 3L), .Label = c("C", "NULL", "S"), class = "factor"),
HairStyle = structure(c(1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 3L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 1L, 1L, 1L,
1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L,
1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L,
1L, 1L, 1L, 1L), .Label = c("LD", "NULL", "T"), class = "factor"),
Outfit = structure(c(2L, 1L, 2L, 1L, 3L, 1L, 1L, 4L, 1L,
4L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 1L, 2L, 3L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 1L, 4L, 4L, 2L, 1L, 1L, 2L, 3L, 3L, 4L,
1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L,
4L, 3L, 4L, 1L, 1L, 1L, 2L, 3L, 1L, 3L, 1L, 1L, 1L, 1L, 3L,
2L, 2L, 1L, 2L, 1L, 1L, 3L, 1L, 1L, 1L, 3L, 3L, 1L, 1L, 1L,
1L, 1L, 4L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 3L, 1L, 1L, 1L, 1L,
2L, 4L, 1L, 4L), .Label = c("D", "I", "NULL", "O"), class = "factor"),
Background = structure(c(2L, 4L, 1L, 4L, 3L, 1L, 1L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 4L, 2L, 1L, 4L, 1L, 4L, 1L, 1L, 4L,
1L, 3L, 2L, 1L, 1L, 4L, 2L, 1L, 1L, 1L, 4L, 1L, 1L, 1L, 1L,
1L, 4L, 2L, 1L, 1L, 1L, 3L, 3L, 1L, 1L, 4L, 1L, 3L, 1L, 1L,
1L, 2L, 1L, 1L, 1L, 2L, 3L, 2L, 1L, 2L, 4L, 4L, 4L, 1L, 4L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 3L, 2L, 1L,
1L, 1L, 3L, 2L, 4L, 2L, 4L, 1L, 1L, 4L, 3L, 3L, 1L, 2L, 4L,
1L, 3L, 4L, 4L, 3L), .Label = c("I", "N", "NULL", "P"), class = "factor"),
TypeofShot = structure(c(1L, 4L, 1L, 4L, 2L, 4L, 1L, 1L,
4L, 1L, 1L, 2L, 1L, 1L, 4L, 3L, 4L, 1L, 1L, 3L, 4L, 3L, 3L,
3L, 4L, 4L, 2L, 1L, 3L, 1L, 3L, 4L, 1L, 4L, 1L, 1L, 2L, 1L,
1L, 4L, 1L, 1L, 4L, 4L, 2L, 1L, 3L, 4L, 1L, 1L, 2L, 1L, 4L,
4L, 3L, 1L, 4L, 1L, 3L, 1L, 4L, 1L, 1L, 1L, 1L, 3L, 1L, 1L,
2L, 2L, 1L, 4L, 1L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 2L, 3L, 3L,
3L, 4L, 2L, 3L, 3L, 1L, 3L, 4L, 1L, 3L, 2L, 1L, 1L, 1L, 3L,
2L, 1L, 4L, 3L, 4L), .Label = c("CU", "ECU", "LS", "MS"), class = "factor"),
Obstruction = structure(c(1L, 2L, 1L, 1L, 1L, 3L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 3L, 1L, 2L, 1L,
1L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 3L, 1L, 3L, 1L, 1L, 3L, 1L, 2L, 1L, 1L, 3L,
1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 1L, 1L, 3L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = c("N", "NULL", "Y"), class = "factor"),
Makeup = structure(c(4L, 4L, 2L, 2L, 3L, 2L, 2L, 3L, 2L,
2L, 1L, 1L, 2L, 2L, 3L, 4L, 1L, 2L, 2L, 4L, 1L, 4L, 2L, 3L,
4L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 2L, 1L, 2L, 3L, 3L, 1L, 2L, 1L, 3L, 2L,
4L, 2L, 2L, 2L, 3L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 1L, 2L, 2L,
3L, 2L, 3L, 2L, 4L, 4L, 2L, 3L, 2L, 1L, 2L, 3L, 3L, 1L, 2L,
1L, 2L, 4L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 4L, 2L, 2L,
1L, 3L, 1L, 1L), .Label = c("H", "L", "N", "NULL"), class = "factor"),
Results = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = c("N", "Y"), class = "factor"),
prediction = c(9.32475933917106e-09, 0.0385259384817495,
0.0678681154072461, 0.234968717458685, 0.0290199853775816,
0.171162293958793, 0.00129264601900783, 0.00675484440459677,
0.128155946032347, 0.133539709174044, 0.118744423809008,
0.060206929901843, 0.128155946032347, 0.146426608321148,
0.0552623520735392, 0.227991153820736, 2.54581077993876e-08,
0.0195543511193415, 0.128155946032347, 0.256310568145846,
0.0520386124569491, 0.111383163512112, 0.0402597164944323,
0.0141022328039524, 0.55471858422641, 0.128155946032347,
0.35526622136263, 0.128155946032347, 0.382743622548627, 0.00485338573377989,
0.128155946032347, 0.0324058895421302, 0.320728574893713,
0.320728574893713, 0.35526622136263, 0.146426608321148, 0.0179540767871002,
0.398798221640772, 0.362407391381727, 0.00485338573377989,
0.00129264601900783, 0.128155946032347, 0.0823507208338033,
0.00675484440459677, 0.0195543511193415, 0.320728574893713,
0.128155946032347, 0.174534177022049, 0.0477307982973154,
0.0625662879441275, 0.0174929064796301, 0.135882446473831,
0.00696631574219797, 0.419831884479578, 0.0862150002573959,
0.128155946032347, 0.0698582713166507, 0.128155946032347,
0.174534177022049, 0.146426608321148, 0.0234463612462439,
0.0141022328039524, 0.0239924885903984, 0.0290199853775816,
3.15391485574326e-09, 1.14002192545012e-08, 0.0345251778805331,
0.208346726243955, 0.0203551415502053, 0.020830802150735,
0.128155946032347, 0.197915823620481, 0.146426608321148,
9.32475933917106e-09, 9.32475933917106e-09, 0.128155946032347,
0.0552623520735392, 0.016802787713206, 0.0345251778805331,
0.146426608321148, 0.00675484440459677, 0.00579370288906212,
0.320728574893713, 0.00316694181006374, 0.320728574893713,
0.146426608321148, 1.66951123737628e-08, 0.0466701670833381,
0.0402597164944323, 0.382743622548627, 0.128155946032347,
0.128155946032347, 0.118744423809008, 0.171162293958793,
0.0402597164944323, 0.146426608321148, 0.0895467055067367,
0.0110101302622226, 0.05872534886842, 0.35526622136263, 0.0141022328039524,
0.118744423809008, 0.00414031965843898)), .Names = c("EyeContact",
"Post.Processing", "HairColour", "Animals", "Age", "Backview",
"SkinTone", "Smile", "HairLength", "HairTexture", "HairStyle",
"Outfit", "Background", "TypeofShot", "Obstruction", "Makeup",
"Results", "prediction"), row.names = c(2L, 3L, 9L, 17L, 19L,
22L, 23L, 28L, 29L, 41L, 42L, 45L, 47L, 53L, 55L, 67L, 68L, 69L,
72L, 78L, 80L, 81L, 82L, 83L, 84L, 90L, 94L, 95L, 101L, 103L,
106L, 111L, 113L, 116L, 118L, 119L, 120L, 122L, 123L, 128L, 130L,
134L, 136L, 138L, 144L, 146L, 148L, 150L, 152L, 161L, 162L, 163L,
165L, 168L, 174L, 175L, 180L, 181L, 183L, 194L, 204L, 207L, 210L,
213L, 214L, 215L, 221L, 224L, 230L, 234L, 235L, 236L, 237L, 239L,
240L, 244L, 249L, 250L, 255L, 259L, 262L, 272L, 277L, 278L, 280L,
281L, 284L, 289L, 296L, 297L, 304L, 306L, 308L, 316L, 321L, 323L,
327L, 329L, 332L, 335L, 337L, 339L, 340L), class = "data.frame")
The model is running, it gives some output but the print method doesn't work.
> print(best1)
AIC
Best Model:
Error in levels(x)[x] : only 0's may be mixed with negative subscripts
In addition: Warning messages:
1: In model.response(mf, "numeric") :
using type = "numeric" with a factor response will be ignored
2: In Ops.factor(y, z$residuals) : '-' not meaningful for factors
but best1 structure is correct and best1$BestModel is provided
best1$BestModel
Call: glm(formula = y ~ ., family = family, data = Xi, weights = weights)
Coefficients:
(Intercept) Post.ProcessingY Age22 Age23
-40.416 -244.338 59.277 -41.652
SkinToneFair SkinToneNULL SmileNS SmileNULL
245.316 -5.102 -80.986 -142.908
SmileO SmileST HairLengthNULL HairLengthSM
-121.258 -80.482 -159.677 -20.045
OutfitI OutfitNULL OutfitO BackgroundN
41.652 -41.653 -410.492 19.895
BackgroundNULL BackgroundP TypeofShotECU TypeofShotLS
-82.640 -208.283 16.369 -101.467
TypeofShotMS MakeupL MakeupN MakeupNULL
101.819 39.438 -122.850 285.187
Degrees of Freedom: 102 Total (i.e. Null); 79 Residual
Null Deviance: 69.99
Residual Deviance: 5.545 AIC: 53.55
You could replace the print.bestglm method with
print.bestglm <- function (x, ...)
{
ti <- x$Title
cat(ti, fill = TRUE)
if ((x$ModelReport$Bestk > 0) || (x$ModelReport$IncludeInterceptQ)) {
cat("Best Model:", fill = TRUE)
if (any(x$ModelReport$NumDF > 1))
out <- summary(x$BestModel)
else out <- summary(x$BestModel)$coefficients
print(out)
}
else cat("Best Model is the null model with no parameters.",
fill = TRUE)
}
The problem is that the code uses a deprecated feature. It calls aov on a glm object, which is wrong. I think that using the replacement function should solve the problem.

What is the meaning of the warning message about log(P) when calculating a polychoric correlation with 'hetcor'?

When calculating a polychoric correlation in R (library(polycor), function hetcor) I get the warning message In log(P) : NaNs produced. I wasn't able to figure out what this warning message might constitute. I suppose it has to do with the calculation of the p-values for testing bivariate normality.
Thus my questions are:
What characteristics of this dataset result in this warning?
What's the meaning of this warning?
Is this warning problematic in terms of using the polychoric correlation matrix for further analyses?
Data subset:
foo <- structure(list(item1 = structure(c(4L, 4L, 4L, 2L, 2L, 2L,
2L, 2L, 4L, 2L, 2L, 3L, 2L, 3L, 2L, 2L, 2L, 3L, 2L, 2L, 3L, 1L,
2L, 2L, 3L, 3L, 3L, 2L, 2L, 1L, 1L, 2L, 3L, 2L, 2L, 3L, 2L, 3L,
2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 2L, 3L, 3L, 3L, 2L, 2L, 2L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 3L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L,
1L, 2L, 2L, 4L, 2L, 4L, 2L, 2L, 3L, 1L, 2L, 1L, 2L, 2L, 2L, 1L,
2L, 2L, 3L, 2L, 2L, 2L, 3L, 1L, 2L, 2L, 2L, 2L, 4L, 2L, 2L, 2L,
2L, 2L, 2L, 4L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 3L, 3L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 3L, 3L, 3L
), .Label = c("0", "1", "2", "3"), class = c("ordered", "factor"
)), item2 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 3L, 2L, 1L, 3L, 2L, 1L, 1L, 3L,
1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 3L, 2L, 2L, 1L,
3L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 3L, 1L, 1L,
2L, 3L, 2L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L,
2L, 2L, 3L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L,
2L, 1L, 2L, 1L, 2L, 1L, 3L, 2L, 1L, 3L, 1L, 1L, 1L, 2L, 2L, 1L,
2L, 1L, 3L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 4L, 1L, 1L, 1L,
1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 4L, 1L, 1L, 3L), .Label = c("0",
"1", "2", "3"), class = c("ordered", "factor")), item3 = structure(c(4L,
4L, 4L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 4L, 1L, 2L, 1L, 1L, 1L,
1L, 2L, 1L, 4L, 2L, 2L, 1L, 3L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 3L, 1L, 1L, 1L, 2L, 1L, 1L,
2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 3L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 3L,
1L, 3L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 1L, 3L, 2L, 1L), .Label = c("0", "1", "2", "3"), class = c("ordered",
"factor")), item4 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 1L, 1L, 1L, 3L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 3L, 2L, 1L,
1L, 3L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L,
2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 3L, 1L, 2L, 3L, 2L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
1L, 2L, 1L, 2L, 3L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
1L, 2L, 2L, 2L, 3L, 1L, 1L, 2L, 2L, 2L, 1L, 3L, 1L, 1L, 1L, 2L,
2L, 1L, 1L, 1L, 2L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 4L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 4L, 1L, 2L, 3L), .Label = c("0",
"1", "2", "3"), class = c("ordered", "factor")), item5 = structure(c(4L,
4L, 4L, 1L, 1L, 1L, 1L, 2L, 3L, 2L, 2L, 4L, 2L, 3L, 2L, 1L, 1L,
3L, 3L, 3L, 4L, 3L, 2L, 1L, 3L, 3L, 4L, 1L, 2L, 1L, 1L, 1L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 3L, 4L, 2L, 1L, 2L, 2L, 2L, 2L,
3L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 3L, 3L, 1L,
2L, 1L, 1L, 3L, 1L, 2L, 2L, 1L, 3L, 2L, 1L, 2L, 2L, 1L, 1L, 2L,
1L, 2L, 4L, 2L, 2L, 1L, 2L, 2L, 4L, 2L, 4L, 1L, 1L, 2L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 3L, 2L, 3L, 2L, 1L, 3L, 2L, 1L, 1L, 3L, 3L,
1L, 4L, 1L, 1L, 1L, 1L, 2L, 3L, 3L, 3L, 2L, 1L, 3L, 2L, 1L, 1L,
1L, 1L, 2L, 3L, 4L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 3L, 1L,
3L, 3L, 4L, 3L, 3L), .Label = c("0", "1", "2", "3"), class = c("ordered",
"factor"))), .Names = c("item1", "item2", "item3", "item4",
"item5"))
Computation of correlation matrix:
hetcor(foo)
Comment: the real dataset contains about 2500 rows (and more variables), but when evaluating the contingency tables a sparse matrix doesn't seem to be an issue.
A short (and belated) answer to a very old question. The warning is because some of the cells in the cross tabulation of the variables (for example, variables 1 and 2) have 0 values in the cells. This can lead to problems in estimation.
The polychoric (and tetrachoric) correlations are normal theory approximations of what would happen if bivariate normal (and continuous) data were converted into categorical (dichotomous for tetrachorics, polytomous for polychorics) data. The normal theory approximation assumes that all cells have some value. However, the correlations can be found with 0 cell values, but with a warning. The resulting correlations are correct, but unstable, in that if we add a small correction for continuity (i.e., add .1 or .5 to the 0 cells), the values change a great deal. This problem is discussed by Gunther and Hofler for the case of tetrachoric correlations where they compare solutions with and with the correction for continuity.
(See the article by A. Gunther and M. Hofler. Different results on tetrachorical correlations in mplus and stata-stata announces modified procedure. Int J Methods Psychiatr Res, 15(3):157-66, 2006. for a discussion of this problem with tetrachoric correlations.)
Using the polychoric function in the psych package, we find the same answer as the hetcor function from polycor if we do not apply the correction for continuity, but somewhat different values if we do correct for continuity. I recommend the correction.
See the help function for polychoric in psych for a longer discussion of this problem.

Resources