I'm using C5.0 to make a decision tree, and it's using my class label in the tree. A snippet of my data is below.
trainX
V1 V2 V3 V4 V5 V6
1 39 State-gov 77516 Bachelors 13 Never-married
2 50 Self-emp-not-inc 83311 Bachelors 13 Married-civ-spouse
3 38 Private 215646 HS-grad 9 Divorced
4 53 Private 234721 11th 7 Married-civ-spouse
5 28 Private 338409 Bachelors 13 Married-civ-spouse
V7 V8 V9 V10 V11 V12 V13 V14
1 Adm-clerical Not-in-family White Male 2174 0 40 United-States
2 Exec-managerial Husband White Male 0 0 13 United-States
3 Handlers-cleaners Not-in-family White Male 0 0 40 United-States
4 Handlers-cleaners Husband Black Male 0 0 40 United-States
5 Prof-specialty Wife Black Female 0 0 40 Cuba
trainY
[1] <=50K <=50K <=50K <=50K <=50K
There are cases in my data of >50K as well, this snippet of 5 just did not contain any.
When I make my tree, this is the code I use
library(C50)
trainX = X[1:100,]
trainY = Y[1:100]
testX = X[101:150,]
testY = Y[101:150]
model = C5.0(trainX, trainY)
summary(model)
And the output I get is...
Decision tree:
<=50K (100/25)
Evaluation on training data (100 cases):
Decision Tree
----------------
Size Errors
1 25(25.0%) <<
(a) (b) <-classified as
---- ----
75 (a): class <=50K
25 (b): class >50K
What am I doing wrong that it's using the classification as part of the tree?
EDIT - DPUTS below of Head. Still gives me the same issue, where its making a Decision Tree using the split as <=50K or >50K, which is my "Y" output and thus shouldn't be part of the decision making process.
trainX
structure(list(V1 = c(39L, 50L, 38L, 53L, 28L, 37L), V2 = structure(c(8L,
7L, 5L, 5L, 5L, 5L), .Label = c(" ?", " Federal-gov", " Local-gov",
" Never-worked", " Private", " Self-emp-inc", " Self-emp-not-inc",
" State-gov", " Without-pay"), class = "factor"), V3 = c(77516L,
83311L, 215646L, 234721L, 338409L, 284582L), V4 = structure(c(10L,
10L, 12L, 2L, 10L, 13L), .Label = c(" 10th", " 11th", " 12th",
" 1st-4th", " 5th-6th", " 7th-8th", " 9th", " Assoc-acdm", " Assoc-voc",
" Bachelors", " Doctorate", " HS-grad", " Masters", " Preschool",
" Prof-school", " Some-college"), class = "factor"), V5 = c(13L,
13L, 9L, 7L, 13L, 14L), V6 = structure(c(5L, 3L, 1L, 3L, 3L,
3L), .Label = c(" Divorced", " Married-AF-spouse", " Married-civ-spouse",
" Married-spouse-absent", " Never-married", " Separated", " Widowed"
), class = "factor"), V7 = structure(c(2L, 5L, 7L, 7L, 11L, 5L
), .Label = c(" ?", " Adm-clerical", " Armed-Forces", " Craft-repair",
" Exec-managerial", " Farming-fishing", " Handlers-cleaners",
" Machine-op-inspct", " Other-service", " Priv-house-serv", " Prof-specialty",
" Protective-serv", " Sales", " Tech-support", " Transport-moving"
), class = "factor"), V8 = structure(c(2L, 1L, 2L, 1L, 6L, 6L
), .Label = c(" Husband", " Not-in-family", " Other-relative",
" Own-child", " Unmarried", " Wife"), class = "factor"), V9 = structure(c(5L,
5L, 5L, 3L, 3L, 5L), .Label = c(" Amer-Indian-Eskimo", " Asian-Pac-Islander",
" Black", " Other", " White"), class = "factor"), V10 = structure(c(2L,
2L, 2L, 2L, 1L, 1L), .Label = c(" Female", " Male"), class = "factor"),
V11 = c(2174L, 0L, 0L, 0L, 0L, 0L), V12 = c(0L, 0L, 0L, 0L,
0L, 0L), V13 = c(40L, 13L, 40L, 40L, 40L, 40L), V14 = structure(c(40L,
40L, 40L, 40L, 6L, 40L), .Label = c(" ?", " Cambodia", " Canada",
" China", " Columbia", " Cuba", " Dominican-Republic", " Ecuador",
" El-Salvador", " England", " France", " Germany", " Greece",
" Guatemala", " Haiti", " Holand-Netherlands", " Honduras",
" Hong", " Hungary", " India", " Iran", " Ireland", " Italy",
" Jamaica", " Japan", " Laos", " Mexico", " Nicaragua", " Outlying-US(Guam-USVI-etc)",
" Peru", " Philippines", " Poland", " Portugal", " Puerto-Rico",
" Scotland", " South", " Taiwan", " Thailand", " Trinadad&Tobago",
" United-States", " Vietnam", " Yugoslavia"), class = "factor")), .Names = c("V1",
"V2", "V3", "V4", "V5", "V6", "V7", "V8", "V9", "V10", "V11",
"V12", "V13", "V14"), row.names = c(NA, 6L), class = "data.frame")
trainY
structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c(" <=50K", " >50K"
), class = "factor")
After reading in trainX, trainY, the easiest way to reproduce this problem would be
library(C50)
test = C5.0(x=trainX, y=trainY)
My actual train Y :
structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L,
1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 2L, 1L, 2L, 2L, 1L, 1L), .Label = c(" <=50K", " >50K"), class = "factor")
My actual trainX
structure(list(age = c(39L, 50L, 38L, 53L, 28L, 37L, 49L, 52L,
31L, 42L, 37L, 30L, 23L, 32L, 40L, 34L, 25L, 32L, 38L, 43L, 40L,
54L, 35L, 43L, 59L, 56L, 19L, 54L, 39L, 49L, 23L, 20L, 45L, 30L,
22L, 48L, 21L, 19L, 31L, 48L, 31L, 53L, 24L, 49L, 25L, 57L, 53L,
44L, 41L, 29L, 25L, 18L, 47L, 50L, 47L, 43L, 46L, 35L, 41L, 30L,
30L, 32L, 48L, 42L, 29L, 36L, 28L, 53L, 49L, 25L, 19L, 31L, 29L,
23L, 79L, 27L, 40L, 67L, 18L, 31L, 18L, 52L, 46L, 59L, 44L, 53L,
49L, 33L, 30L, 43L, 57L, 37L, 28L, 30L, 34L, 29L, 48L, 37L, 48L,
32L), workClass = structure(c(8L, 7L, 5L, 5L, 5L, 5L, 5L, 7L,
5L, 5L, 5L, 8L, 5L, 5L, 5L, 5L, 7L, 5L, 5L, 7L, 5L, 5L, 2L, 5L,
5L, 3L, 5L, 1L, 5L, 5L, 3L, 5L, 5L, 2L, 8L, 5L, 5L, 5L, 5L, 7L,
5L, 7L, 5L, 5L, 5L, 2L, 5L, 5L, 8L, 5L, 5L, 5L, 5L, 2L, 6L, 5L,
5L, 5L, 5L, 5L, 5L, 1L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 1L, 5L, 5L,
7L, 5L, 5L, 5L, 5L, 1L, 5L, 3L, 5L, 5L, 5L, 5L, 5L, 5L, 3L, 5L,
5L, 2L, 5L, 5L, 5L, 5L, 3L, 3L, 7L, 5L, 5L, 2L), .Label = c(" ?",
" Federal-gov", " Local-gov", " Never-worked", " Private", " Self-emp-inc",
" Self-emp-not-inc", " State-gov", " Without-pay"), class = "factor"),
fnlwgt = c(77516L, 83311L, 215646L, 234721L, 338409L, 284582L,
160187L, 209642L, 45781L, 159449L, 280464L, 141297L, 122272L,
205019L, 121772L, 245487L, 176756L, 186824L, 28887L, 292175L,
193524L, 302146L, 76845L, 117037L, 109015L, 216851L, 168294L,
180211L, 367260L, 193366L, 190709L, 266015L, 386940L, 59951L,
311512L, 242406L, 197200L, 544091L, 84154L, 265477L, 507875L,
88506L, 172987L, 94638L, 289980L, 337895L, 144361L, 128354L,
101603L, 271466L, 32275L, 226956L, 51835L, 251585L, 109832L,
237993L, 216666L, 56352L, 147372L, 188146L, 59496L, 293936L,
149640L, 116632L, 105598L, 155537L, 183175L, 169846L, 191681L,
200681L, 101509L, 309974L, 162298L, 211678L, 124744L, 213921L,
32214L, 212759L, 309634L, 125927L, 446839L, 276515L, 51618L,
159937L, 343591L, 346253L, 268234L, 202051L, 54334L, 410867L,
249977L, 286730L, 212563L, 117747L, 226296L, 115585L, 191277L,
202683L, 171095L, 249409L), education = structure(c(10L,
10L, 12L, 2L, 10L, 13L, 7L, 12L, 13L, 10L, 16L, 10L, 10L,
8L, 9L, 6L, 12L, 12L, 2L, 13L, 11L, 12L, 7L, 2L, 12L, 10L,
12L, 16L, 12L, 12L, 8L, 16L, 10L, 16L, 16L, 2L, 16L, 12L,
16L, 8L, 7L, 10L, 10L, 12L, 12L, 10L, 12L, 13L, 9L, 9L, 16L,
12L, 15L, 10L, 12L, 16L, 5L, 9L, 12L, 12L, 10L, 6L, 12L,
11L, 16L, 12L, 16L, 12L, 16L, 16L, 16L, 10L, 10L, 16L, 16L,
12L, 8L, 1L, 2L, 6L, 12L, 10L, 12L, 12L, 12L, 12L, 12L, 13L,
7L, 11L, 9L, 16L, 16L, 12L, 10L, 16L, 11L, 16L, 8L, 12L), .Label = c(" 10th",
" 11th", " 12th", " 1st-4th", " 5th-6th", " 7th-8th", " 9th",
" Assoc-acdm", " Assoc-voc", " Bachelors", " Doctorate",
" HS-grad", " Masters", " Preschool", " Prof-school", " Some-college"
), class = "factor"), educationNum = c(13L, 13L, 9L, 7L,
13L, 14L, 5L, 9L, 14L, 13L, 10L, 13L, 13L, 12L, 11L, 4L,
9L, 9L, 7L, 14L, 16L, 9L, 5L, 7L, 9L, 13L, 9L, 10L, 9L, 9L,
12L, 10L, 13L, 10L, 10L, 7L, 10L, 9L, 10L, 12L, 5L, 13L,
13L, 9L, 9L, 13L, 9L, 14L, 11L, 11L, 10L, 9L, 15L, 13L, 9L,
10L, 3L, 11L, 9L, 9L, 13L, 4L, 9L, 16L, 10L, 9L, 10L, 9L,
10L, 10L, 10L, 13L, 13L, 10L, 10L, 9L, 12L, 6L, 7L, 4L, 9L,
13L, 9L, 9L, 9L, 9L, 9L, 14L, 5L, 16L, 11L, 10L, 10L, 9L,
13L, 10L, 16L, 10L, 12L, 9L), marital = structure(c(5L, 3L,
1L, 3L, 3L, 3L, 4L, 3L, 5L, 3L, 3L, 3L, 5L, 5L, 3L, 3L, 5L,
5L, 3L, 1L, 3L, 6L, 3L, 3L, 1L, 3L, 5L, 3L, 1L, 3L, 5L, 5L,
1L, 3L, 3L, 5L, 5L, 2L, 3L, 3L, 3L, 3L, 3L, 6L, 5L, 3L, 3L,
1L, 3L, 5L, 3L, 5L, 3L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 4L,
3L, 3L, 1L, 3L, 1L, 3L, 3L, 5L, 5L, 6L, 3L, 5L, 3L, 5L, 3L,
3L, 5L, 3L, 5L, 3L, 3L, 3L, 1L, 1L, 3L, 3L, 5L, 5L, 3L, 1L,
1L, 3L, 3L, 5L, 3L, 3L, 1L, 5L), .Label = c(" Divorced",
" Married-AF-spouse", " Married-civ-spouse", " Married-spouse-absent",
" Never-married", " Separated", " Widowed"), class = "factor"),
occ = structure(c(2L, 5L, 7L, 7L, 11L, 5L, 9L, 5L, 11L, 5L,
5L, 11L, 2L, 13L, 4L, 15L, 6L, 8L, 13L, 5L, 11L, 9L, 6L,
15L, 14L, 14L, 4L, 1L, 5L, 4L, 12L, 13L, 5L, 2L, 9L, 8L,
8L, 2L, 13L, 11L, 8L, 11L, 14L, 2L, 7L, 11L, 8L, 5L, 4L,
11L, 5L, 9L, 11L, 5L, 5L, 14L, 8L, 9L, 2L, 8L, 13L, 1L, 15L,
11L, 14L, 4L, 2L, 2L, 5L, 1L, 11L, 13L, 13L, 8L, 11L, 9L,
2L, 1L, 9L, 6L, 13L, 9L, 9L, 13L, 4L, 13L, 12L, 11L, 13L,
11L, 11L, 4L, 8L, 13L, 12L, 7L, 11L, 13L, 5L, 9L), .Label = c(" ?",
" Adm-clerical", " Armed-Forces", " Craft-repair", " Exec-managerial",
" Farming-fishing", " Handlers-cleaners", " Machine-op-inspct",
" Other-service", " Priv-house-serv", " Prof-specialty",
" Protective-serv", " Sales", " Tech-support", " Transport-moving"
), class = "factor"), relationship = structure(c(2L, 1L,
2L, 1L, 6L, 6L, 2L, 1L, 2L, 1L, 1L, 1L, 4L, 2L, 1L, 1L, 4L,
5L, 1L, 5L, 1L, 5L, 1L, 1L, 5L, 1L, 4L, 1L, 2L, 1L, 2L, 4L,
4L, 4L, 1L, 5L, 4L, 6L, 1L, 1L, 1L, 1L, 1L, 5L, 2L, 1L, 1L,
5L, 1L, 2L, 6L, 4L, 6L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
1L, 1L, 2L, 1L, 2L, 6L, 1L, 4L, 4L, 4L, 1L, 2L, 3L, 4L, 1L,
1L, 4L, 1L, 2L, 1L, 6L, 1L, 2L, 4L, 1L, 1L, 2L, 2L, 1L, 5L,
5L, 6L, 1L, 2L, 1L, 1L, 5L, 4L), .Label = c(" Husband", " Not-in-family",
" Other-relative", " Own-child", " Unmarried", " Wife"), class = "factor"),
race = structure(c(5L, 5L, 5L, 3L, 3L, 5L, 3L, 5L, 5L, 5L,
3L, 2L, 5L, 3L, 2L, 1L, 5L, 5L, 5L, 5L, 5L, 3L, 3L, 5L, 5L,
5L, 5L, 2L, 5L, 5L, 5L, 3L, 5L, 5L, 3L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 3L, 5L, 5L, 5L, 5L, 4L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 3L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 3L, 2L, 5L, 5L, 5L, 5L, 5L, 3L
), .Label = c(" Amer-Indian-Eskimo", " Asian-Pac-Islander",
" Black", " Other", " White"), class = "factor"), sex = structure(c(2L,
2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L,
2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L), .Label = c(" Female",
" Male"), class = "factor"), capGain = c(2174L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 14084L, 5178L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 5013L, 2407L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 14344L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), capLoss = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 2042L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1408L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1902L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1573L, 0L, 0L, 1902L, 0L, 0L, 0L), hours = c(40L,
13L, 40L, 40L, 40L, 40L, 16L, 45L, 50L, 40L, 80L, 40L, 30L,
50L, 40L, 45L, 35L, 40L, 50L, 45L, 60L, 20L, 40L, 40L, 40L,
40L, 40L, 60L, 80L, 40L, 52L, 44L, 40L, 40L, 15L, 40L, 40L,
25L, 38L, 40L, 43L, 40L, 50L, 40L, 35L, 40L, 38L, 40L, 40L,
43L, 40L, 30L, 60L, 55L, 60L, 40L, 40L, 40L, 48L, 40L, 40L,
40L, 40L, 45L, 58L, 40L, 40L, 40L, 50L, 40L, 32L, 40L, 70L,
40L, 20L, 40L, 40L, 2L, 22L, 40L, 30L, 40L, 40L, 48L, 40L,
35L, 40L, 50L, 40L, 50L, 40L, 40L, 25L, 35L, 40L, 50L, 60L,
48L, 40L, 40L), country = structure(c(40L, 40L, 40L, 40L,
6L, 40L, 24L, 40L, 40L, 40L, 40L, 20L, 40L, 40L, 1L, 27L,
40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 36L,
40L, 40L, 40L, 40L, 40L, 40L, 40L, 34L, 40L, 40L, 1L, 40L,
40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 1L,
17L, 40L, 40L, 40L, 27L, 34L, 40L, 40L, 40L, 1L, 40L, 40L,
40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 27L,
40L, 40L, 40L, 40L, 40L, 6L, 40L, 40L, 40L, 40L, 40L, 40L,
40L, 40L, 40L, 40L, 40L, 1L, 40L, 40L, 40L, 40L, 10L, 40L
), .Label = c(" ?", " Cambodia", " Canada", " China", " Columbia",
" Cuba", " Dominican-Republic", " Ecuador", " El-Salvador",
" England", " France", " Germany", " Greece", " Guatemala",
" Haiti", " Holand-Netherlands", " Honduras", " Hong", " Hungary",
" India", " Iran", " Ireland", " Italy", " Jamaica", " Japan",
" Laos", " Mexico", " Nicaragua", " Outlying-US(Guam-USVI-etc)",
" Peru", " Philippines", " Poland", " Portugal", " Puerto-Rico",
" Scotland", " South", " Taiwan", " Thailand", " Trinadad&Tobago",
" United-States", " Vietnam", " Yugoslavia"), class = "factor")), .Names = c("age",
"workClass", "fnlwgt", "education", "educationNum", "marital",
"occ", "relationship", "race", "sex", "capGain", "capLoss", "hours",
"country"), row.names = c(NA, 100L), class = "data.frame")
The code you provided constructs a factor with 1 level (<=50k) because the first vector input contains only 1Ls. You should assign these labels accordingly or use an easier way to construct your response variable - something like trainY <- as.factor(...).
I changed the way trainY is constructed to:
y <- structure(c(1L, 2L, 1L, 1L, 2L, 1L), .Label = c(" <=50K", " >50K"), class = "factor")
and after re-training the tree with same commands i got:
Decision tree:
V14 = Cuba: >50K (1)
V14 in {?,Cambodia,Canada,China,Columbia,Dominican-Republic,Ecuador,
El-Salvador,England,France,Germany,Greece,Guatemala,Haiti,
Holand-Netherlands,Honduras,Hong,Hungary,India,Iran,Ireland,Italy,
Jamaica,Japan,Laos,Mexico,Nicaragua,Outlying-US(Guam-USVI-etc),Peru,
Philippines,Poland,Portugal,Puerto-Rico,Scotland,South,Taiwan,Thailand,
Trinadad&Tobago,United-States,Vietnam,Yugoslavia}: <=50K (5/1)
Make sure you don't have only one class in the response when passing args to C5.0. hth
UPDATE
After plotting some of the predictors vs response I noticed that education and educationNum show the clearest division in the data (Doctorate implies >50K immediately). Next step was to tweak some of the very useful C5.0 Control options - they are well documented in the C5.0 package documentation and the official informal tutorial page - check them out they give you broad control over the classification controls.
For example:
C5.0(x = trainX,y = trainY,control = C5.0Control(subset = T, winnow = T,minCases = 4,fuzzyThreshold = T))
Decision tree:
educationNum <= 13 (14.5): <=50K (95/20)
educationNum >= 16 (14.5): >50K (5)
similiarly, doing some "feature engineering" which in this case meant just leaving out some of the columns from the original dataframe produced :
C5.0(x = trainX[ ,c(1:5, 9:13)], y = trainY)
Decision tree:
educationNum <= 14: <=50K (95/20)
educationNum > 14: >50K (5)
I believe that there is no one general "out of the box" C5.0 defaults setting that would produce satisfying results for all kinds of problems, so it really comes down to trying out different parameter settings, features etc...but as with all things R there is plenty of material around to give you some direction.
I have a dataframe df
df<-structure(list(subject = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L,
23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L,
36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L,
49L, 50L, 51L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L,
25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L,
38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L,
51L), sex = c(1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L,
1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L,
2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L,
1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L,
2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L), age = c(29L, 54L, 67L,
36L, 48L, 37L, 25L, 46L, 37L, 33L, 25L, 26L, 28L, 59L, 46L, 50L,
55L, 56L, 37L, 30L, 38L, 30L, 50L, 39L, 29L, 46L, 48L, 46L, 55L,
32L, 66L, 35L, 48L, 54L, 38L, 31L, 42L, 36L, 27L, 63L, 45L, 31L,
26L, 38L, 43L, 52L, 36L, 43L, 65L, 46L, 42L, 29L, 54L, 67L, 36L,
48L, 37L, 25L, 46L, 37L, 33L, 25L, 26L, 28L, 59L, 46L, 50L, 55L,
56L, 37L, 30L, 38L, 30L, 50L, 39L, 29L, 46L, 48L, 46L, 55L, 32L,
66L, 35L, 48L, 54L, 38L, 31L, 42L, 36L, 27L, 63L, 45L, 31L, 26L,
38L, 43L, 52L, 36L, 43L, 65L, 46L, 42L), edu = c(4L, 3L, 3L,
3L, 4L, 2L, 3L, 3L, 1L, 3L, 4L, 4L, 5L, 1L, 1L, 2L, 2L, 3L, 4L,
4L, 4L, 4L, 4L, 4L, 2L, 2L, 1L, 2L, 2L, 4L, 2L, 4L, 4L, 3L, 3L,
4L, 5L, 3L, 3L, 4L, 1L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 6L, 1L, 3L,
4L, 3L, 3L, 3L, 4L, 2L, 3L, 3L, 1L, 3L, 4L, 4L, 5L, 1L, 1L, 2L,
2L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 2L, 1L, 2L, 2L, 4L, 2L, 4L,
4L, 3L, 3L, 4L, 5L, 3L, 3L, 4L, 1L, 1L, 3L, 1L, 3L, 1L, 3L, 1L,
6L, 1L, 3L), biz_exp = c(5L, 15L, 3L, 4L, 10L, 6L, 0L, 5L, 8L,
5L, 0L, 8L, 3L, 23L, 5L, 7L, 5L, 11L, 4L, 4L, 11L, 3L, 15L, 4L,
4L, 6L, 6L, 5L, 13L, 2L, 13L, 6L, 8L, 27L, 7L, 3L, 11L, 5L, 1L,
4L, 8L, 8L, 4L, 15L, 18L, 30L, 9L, 14L, 18L, 21L, 16L, 5L, 15L,
3L, 4L, 10L, 6L, 0L, 5L, 8L, 5L, 0L, 8L, 3L, 23L, 5L, 7L, 5L,
11L, 4L, 4L, 11L, 3L, 15L, 4L, 4L, 6L, 6L, 5L, 13L, 2L, 13L,
6L, 8L, 27L, 7L, 3L, 11L, 5L, 1L, 4L, 8L, 8L, 4L, 15L, 18L, 30L,
9L, 14L, 18L, 21L, 16L), turnov = c(36L, NA, 12L, 9L, 48L, 9L,
8L, 24L, 4L, 250L, NA, 600L, 6L, 6L, 10L, 10L, 5L, 4L, 250L,
200L, 50L, 150L, 48L, NA, 9L, 6L, 2L, NA, NA, 3L, 7L, 23L, 75L,
7L, 5L, NA, 20L, 450L, 5L, 32L, 21L, 12L, 6L, 4L, 24L, 7L, 10L,
12L, 12L, 14L, 18L, 36L, NA, 12L, 9L, 48L, 9L, 8L, 24L, 4L, 250L,
NA, 600L, 6L, 6L, 10L, 10L, 5L, 4L, 250L, 200L, 50L, 150L, 48L,
NA, 9L, 6L, 2L, NA, NA, 3L, 7L, 23L, 75L, 7L, 5L, NA, 20L, 450L,
5L, 32L, 21L, 12L, 6L, 4L, 24L, 7L, 10L, 12L, 12L, 14L, 18L),
loc_pr = c(1L, 1L, 1L, 6L, 1L, 6L, 4L, 1L, 8L, 5L, 1L, 3L,
1L, 1L, 1L, 1L, 5L, 8L, 2L, 1L, 1L, 1L, 1L, 2L, 8L, 2L, 4L,
4L, 2L, 2L, 2L, 1L, 4L, 5L, 4L, 4L, 4L, 4L, NA, 4L, 5L, 5L,
5L, 8L, 1L, 2L, 4L, 3L, 3L, 4L, 3L, 1L, 1L, 1L, 6L, 1L, 6L,
4L, 1L, 8L, 5L, 1L, 3L, 1L, 1L, 1L, 1L, 5L, 8L, 2L, 1L, 1L,
1L, 1L, 2L, 8L, 2L, 4L, 4L, 2L, 2L, 2L, 1L, 4L, 5L, 4L, 4L,
4L, 4L, NA, 4L, 5L, 5L, 5L, 8L, 1L, 2L, 4L, 3L, 3L, 4L, 3L
), type = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 5L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 4L, 2L, 1L, 1L, 2L, 4L, 1L, 2L, 1L,
1L, 4L, 1L, 3L, 1L, 1L, 1L, 3L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 5L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 4L, 2L, 1L,
1L, 2L, 4L, 1L, 2L, 1L, 1L, 4L, 1L, 3L, 1L, 1L, 1L, 3L, 2L
), age_rec = c(2L, 4L, 4L, 100L, 4L, 100L, 100L, 4L, 100L,
2L, 1L, 2L, 2L, 4L, 4L, 4L, 4L, 100L, 3L, 2L, 3L, 2L, 4L,
3L, 100L, 27L, 100L, 100L, 4L, 2L, 100L, 2L, 4L, 30L, 3L,
2L, 59L, 8L, 100L, 27L, 3L, 59L, 2L, 59L, 3L, 59L, 3L, 3L,
4L, 64L, 3L, 2L, 4L, 4L, 100L, 4L, 100L, 100L, 4L, 100L,
2L, 1L, 2L, 2L, 4L, 4L, 4L, 4L, 100L, 3L, 2L, 3L, 2L, 4L,
3L, 100L, 27L, 100L, 100L, 4L, 2L, 100L, 2L, 4L, 30L, 3L,
2L, 59L, 8L, 100L, 27L, 3L, 59L, 2L, 59L, 3L, 59L, 3L, 3L,
4L, 64L, 3L), biz_exp_rec = c(2L, 4L, 2L, 3L, 3L, 3L, 1L,
2L, 3L, 2L, 1L, 3L, 2L, 4L, 2L, 3L, 2L, 4L, 2L, 2L, 4L, 2L,
4L, 2L, 2L, 4L, 4L, 4L, 4L, 1L, 4L, 3L, 3L, 4L, 3L, 2L, 3L,
3L, 2L, 4L, 3L, 2L, 2L, 3L, 4L, 4L, 3L, 4L, 4L, 4L, 4L, 2L,
4L, 2L, 3L, 3L, 3L, 1L, 2L, 3L, 2L, 1L, 3L, 2L, 4L, 2L, 3L,
2L, 4L, 2L, 2L, 4L, 2L, 4L, 2L, 2L, 4L, 4L, 4L, 4L, 1L, 4L,
3L, 3L, 4L, 3L, 2L, 3L, 3L, 2L, 4L, 3L, 2L, 2L, 3L, 4L, 4L,
3L, 4L, 4L, 4L, 4L), turnov_rec = structure(c(3L, NA, 3L,
2L, 3L, 3L, 1L, 3L, 3L, 4L, NA, 4L, 2L, 2L, 2L, 2L, 2L, 4L,
4L, 4L, 3L, 4L, 3L, 5L, 2L, 3L, 3L, 2L, NA, 2L, 4L, 3L, 4L,
4L, 2L, NA, 4L, 2L, 1L, 2L, 3L, 3L, 2L, 4L, 3L, 4L, 2L, 3L,
3L, 4L, 3L, 3L, NA, 3L, 2L, 3L, 3L, 1L, 3L, 3L, 4L, NA, 4L,
2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 3L, 4L, 3L, NA, 2L, 3L, 3L,
2L, NA, 2L, 4L, 3L, 4L, 4L, 2L, NA, 4L, 2L, 1L, 2L, 3L, 3L,
2L, 4L, 3L, 4L, 2L, 3L, 3L, 4L, 3L), .Label = c("1", "2",
"3", "4", "MA"), class = "factor"), bundle = c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), investment = c(86L,
100L, 100L, 75L, 100L, 59L, 68L, 86L, 80L, 100L, 86L, 100L,
100L, 100L, 100L, 100L, 100L, 93L, 64L, 100L, 24L, 18L, 89L,
75L, 80L, 29L, 54L, 65L, 100L, 27L, 59L, 30L, 59L, 43L, 59L,
59L, 5L, 26L, 100L, 75L, 59L, 5L, 59L, 74L, 59L, 79L, 75L,
75L, 86L, 66L, 86L, 55L, 100L, 68L, 1L, 75L, 1L, 1L, 79L,
1L, 54L, 48L, 33L, 55L, 90L, 85L, 39L, 70L, 1L, 45L, 54L,
33L, 3L, 44L, 75L, 1L, 1L, 1L, 1L, 96L, 26L, 1L, 23L, 66L,
1L, 89L, 83L, 52L, 61L, 1L, 88L, 45L, 72L, 60L, 1L, 60L,
2L, 86L, 10L, 63L, 1L, 88L)), .Names = c("subject", "sex",
"age", "edu", "biz_exp", "turnov", "loc_pr", "type", "age_rec",
"biz_exp_rec", "turnov_rec", "bundle", "investment"), class = "data.frame", row.names = c(NA,
-102L))
In this dataframe investment is my dependent variable and the other variables are my independent variables. My subjects are crossed within type of bundle. First of all, I would like know whether my subjects do bundle or not (bundle= 1 means that people bundle and bundle=0 means that people do not bundle), it will have an effect on the investment.
I have done this mixed effect linear model but I am not sure if this is correct as my p-value are equal to zero.
library(nlme)
model <- lme(investment~bundle, random = ~1|subject/bundle, data=df)
I have also tried to make an anova with repeated measures as such:
aov(investment~bundle+ Error(subject/bundle), data=df)
It works but not sure if the model formula is right
Anyone could help me with that?
I have a dataframe looking like this:
chr <- c(1,1,1,1,1)
b1 <- c('HP', 'HP', 'CP', 'CP', 'KP')
b2 <- c('HP', 'HP', 'CP', 'CP', 'KP')
b3 <- c('CP', 'KP', 'CP', 'HP', 'CP')
b4 <- c('CP', 'KP', 'CP', 'HP', 'CP')
b5 <- c('CP', 'CP', 'KP', 'KP', 'HP')
b6 <- c('CP', 'CP', 'KP', 'KP', 'HP')
b7 <- c('CP', 'KP', 'HP', 'CP', 'CP')
b8 <- c('CP', 'KP', 'HP', 'CP', 'CP')
df <- data.frame(chr, b1,b2,b3,b4,b5,b6,b7,b8)
I want to write a function that looks at each 'b' column and asks if it contains the value 'HP'. If it does, and the other six 'b' columns contain 'CP' or 'KP', I want to change the value 'HP' into 'CP' or 'KP' depending on which is the majority. If CP is the majority, change the HP to CP. If KP is the majority, change HP to KP.
(note that the value of b1 and b2, b3 and b4 etc is always the same, so really only 4 columns need to be looked at, b1, b3, b5, and b7).
To clarify, if the columns are e.g. HP HP CP CP CP CP KP KP, I want to change the two HPs into CPs (and leave the other columns the same).
So, the example I gave would become:
chr <- c(1,1,1,1,1)
b1 <- c('CP', 'KP', 'CP', 'CP', 'KP')
b2 <- c('CP', 'KP', 'CP', 'CP', 'KP')
b3 <- c('CP', 'KP', 'CP', 'CP', 'CP')
b4 <- c('CP', 'KP', 'CP', 'CP', 'CP')
b5 <- c('CP', 'CP', 'KP', 'KP', 'CP')
b6 <- c('CP', 'CP', 'KP', 'KP', 'CP')
b7 <- c('CP', 'KP', 'CP', 'CP', 'CP')
b8 <- c('CP', 'KP', 'CP', 'CP', 'CP')
df <- data.frame(chr, b1,b2,b3,b4,b5,b6,b7,b8)
df
I have written a function (just for df$b1) with if statements, but it doesn't work.
(note the rules for whether the HP changes to KP or CP depend on how many other CPs or KPs there are):
fun <- function(df){
if(df$b1 == 'HP' && df$b3 == 'CP' && df$b5 == 'CP' && df$b7 == 'CP') {df$b1 <- 'KP'}
if(df$b1 == 'HP' && df$b3 == 'KP' && df$b5 == 'CP' && df$b7 == 'CP') {df$b1 <- 'CP'}
if(df$b1 == 'HP' && df$b3 == 'CP' && df$b5 == 'KP' && df$b7 == 'CP') {df$b1 <- 'CP'}
if(df$b1 == 'HP' && df$b3 == 'CP' && df$b5 == 'CP' && df$b7 == 'KP') {df$b1 <- 'CP'}
if(df$b1 == 'HP' && df$b3 == 'KP' && df$b5 == 'KP' && df$b7 == 'CP') {df$b1 <- 'KP'}
if(df$b1 == 'HP' && df$b3 == 'KP' && df$b5 == 'CP' && df$b7 == 'KP') {df$b1 <- 'KP'}
if(df$b1 == 'HP' && df$b3 == 'CP' && df$b5 == 'KP' && df$b7 == 'KP') {df$b1 <- 'KP'}
if(df$b1 == 'HP' && df$b3 == 'KP' && df$b5 == 'KP' && df$b7 == 'KP') {df$b1 <- 'CP'}
df$b2 <-df$b1
}
Thanks very much for any help. I'm really stuck on this one.
EDIT: This is a sample of my actual data which is more complex than the example I gave above.
structure(list(chr = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), pos_c = c(2373L, 2406L, 2418L, 2419L,
2447L, 2450L, 2468L, 2524L, 2533L, 2535L, 2536L, 2542L, 2623L,
2709L, 3942L, 11716L, 11893L, 11898L, 12190L, 12396L, 26639L,
26640L, 26643L, 26646L, 26655L, 26657L, 26661L, 26667L, 26670L,
26676L, 26679L, 26684L, 26685L, 26688L, 26694L, 26703L, 26710L,
26712L, 26713L, 26723L, 26733L, 26737L, 26738L, 26739L, 26742L,
26743L, 26748L, 26761L, 26765L, 26766L, 26778L, 26781L, 26790L,
26792L, 26796L, 26802L, 26805L, 26811L, 26814L, 26819L, 26820L,
26823L, 26829L, 26838L, 26846L, 26847L, 26848L, 26872L, 26873L,
26874L, 26877L, 26878L, 26883L, 26889L, 26901L, 26904L, 26907L,
26916L, 26923L, 26925L, 26927L, 26931L, 26937L, 26940L, 26946L,
26954L, 26958L, 26961L, 26963L, 26964L, 26970L, 26981L, 26982L,
26983L, 26991L, 26994L, 26997L, 27007L, 27008L, 27009L, 27012L,
27015L, 27018L, 27027L, 202471L, 203660L, 203668L, 203669L, 203670L,
203672L, 203678L, 203683L, 203686L, 203687L, 203690L, 203704L,
203705L, 203711L, 203714L, 203732L, 203749L, 203752L, 203754L,
203755L, 203903L, 203910L, 203911L, 203912L, 203913L, 203914L,
203915L, 203922L, 203924L, 203933L, 203937L, 203939L, 203945L,
203948L, 203951L, 203957L, 203960L, 203961L, 203963L, 203969L,
203972L, 203973L, 203974L, 203975L, 203981L, 203991L, 204220L,
204227L, 204230L, 204232L, 204242L, 204245L, 204262L, 204272L,
204278L, 204282L, 204290L), c1 = c(101L, 60L, 63L, 64L, 100L,
97L, 94L, 83L, 80L, 48L, 46L, 51L, 69L, 46L, 23L, 79L, 63L, 59L,
53L, 85L, 13L, 12L, 1L, 9L, 11L, 13L, 9L, 14L, 14L, 12L, 15L,
9L, 15L, 14L, 14L, 2L, 2L, 8L, 3L, 0L, 0L, 4L, 2L, 1L, 4L, 4L,
8L, 39L, 7L, 5L, 2L, 41L, 69L, 79L, 89L, 120L, 128L, 90L, 134L,
107L, 169L, 120L, 103L, 48L, 58L, 132L, 62L, 19L, 9L, 13L, 12L,
12L, 17L, 251L, 8L, 367L, 367L, 264L, 5L, 170L, 113L, 234L, 134L,
143L, 189L, 224L, 255L, 296L, 448L, 239L, 169L, 80L, 312L, 84L,
403L, 397L, 430L, 529L, 544L, 556L, 565L, 549L, 555L, 4L, 11L,
0L, 18L, 18L, 19L, 19L, 18L, 18L, 17L, 17L, 15L, 15L, 16L, 15L,
13L, 14L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 2L, 3L, 2L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 1L, 13L, 2L, 10L, 4L, 10L, 24L, 33L, 33L, 63L, 42L), c2 = c(101L,
60L, 63L, 64L, 100L, 97L, 94L, 83L, 80L, 48L, 46L, 51L, 69L,
46L, 23L, 79L, 63L, 59L, 53L, 85L, 13L, 12L, 1L, 9L, 11L, 13L,
9L, 14L, 14L, 12L, 15L, 9L, 15L, 14L, 14L, 2L, 2L, 8L, 3L, 0L,
0L, 4L, 2L, 1L, 4L, 4L, 8L, 39L, 7L, 5L, 2L, 41L, 69L, 79L, 89L,
120L, 128L, 90L, 134L, 107L, 169L, 120L, 103L, 48L, 58L, 132L,
62L, 19L, 9L, 13L, 12L, 12L, 17L, 251L, 8L, 367L, 367L, 264L,
5L, 170L, 113L, 234L, 134L, 143L, 189L, 224L, 255L, 296L, 448L,
239L, 169L, 80L, 312L, 84L, 403L, 397L, 430L, 529L, 544L, 556L,
565L, 549L, 555L, 4L, 11L, 0L, 18L, 18L, 19L, 19L, 18L, 18L,
17L, 17L, 15L, 15L, 16L, 15L, 13L, 14L, 0L, 1L, 0L, 0L, 0L, 0L,
0L, 2L, 3L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 13L, 2L, 10L, 4L, 10L, 24L,
33L, 33L, 63L, 42L), c3 = c(37L, 0L, 0L, 0L, 42L, 46L, 46L, 21L,
26L, 6L, 2L, 7L, 11L, 4L, 0L, 4L, 1L, 0L, 0L, 2L, 29L, 29L, 0L,
22L, 23L, 23L, 26L, 27L, 29L, 24L, 32L, 26L, 35L, 32L, 32L, 3L,
3L, 10L, 1L, 5L, 1L, 6L, 1L, 0L, 5L, 11L, 6L, 81L, 15L, 14L,
0L, 92L, 157L, 174L, 168L, 236L, 221L, 143L, 228L, 251L, 292L,
273L, 281L, 33L, 39L, 260L, 57L, 53L, 24L, 22L, 26L, 37L, 37L,
484L, 16L, 721L, 724L, 436L, 7L, 367L, 163L, 411L, 167L, 373L,
275L, 599L, 637L, 773L, 866L, 615L, 223L, 63L, 531L, 59L, 878L,
868L, 911L, 939L, 975L, 995L, 980L, 931L, 958L, 12L, 16L, 0L,
12L, 13L, 12L, 11L, 9L, 12L, 11L, 11L, 10L, 1L, 0L, 0L, 0L, 1L,
1L, 2L, 1L, 0L, 1L, 1L, 0L, 2L, 2L, 2L, 0L, 0L, 0L, 0L, 0L, 1L,
0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 2L, 28L,
5L, 28L, 3L, 12L, 39L, 40L, 50L, 90L, 80L), c4 = c(37L, 0L, 0L,
0L, 42L, 46L, 46L, 21L, 26L, 6L, 2L, 7L, 11L, 4L, 0L, 4L, 1L,
0L, 0L, 2L, 29L, 29L, 0L, 22L, 23L, 23L, 26L, 27L, 29L, 24L,
32L, 26L, 35L, 32L, 32L, 3L, 3L, 10L, 1L, 5L, 1L, 6L, 1L, 0L,
5L, 11L, 6L, 81L, 15L, 14L, 0L, 92L, 157L, 174L, 168L, 236L,
221L, 143L, 228L, 251L, 292L, 273L, 281L, 33L, 39L, 260L, 57L,
53L, 24L, 22L, 26L, 37L, 37L, 484L, 16L, 721L, 724L, 436L, 7L,
367L, 163L, 411L, 167L, 373L, 275L, 599L, 637L, 773L, 866L, 615L,
223L, 63L, 531L, 59L, 878L, 868L, 911L, 939L, 975L, 995L, 980L,
931L, 958L, 12L, 16L, 0L, 12L, 13L, 12L, 11L, 9L, 12L, 11L, 11L,
10L, 1L, 0L, 0L, 0L, 1L, 1L, 2L, 1L, 0L, 1L, 1L, 0L, 2L, 2L,
2L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L,
1L, 1L, 0L, 1L, 1L, 2L, 28L, 5L, 28L, 3L, 12L, 39L, 40L, 50L,
90L, 80L), c5 = c(96L, 77L, 74L, 72L, 96L, 96L, 92L, 80L, 79L,
79L, 76L, 76L, 66L, 55L, 64L, 78L, 110L, 100L, 165L, 171L, 38L,
41L, 2L, 38L, 33L, 37L, 21L, 40L, 41L, 21L, 37L, 19L, 45L, 30L,
22L, 22L, 28L, 34L, 30L, 31L, 25L, 40L, 34L, 33L, 34L, 46L, 41L,
96L, 48L, 51L, 38L, 93L, 152L, 155L, 155L, 193L, 195L, 189L,
222L, 213L, 284L, 248L, 230L, 56L, 70L, 208L, 82L, 85L, 67L,
64L, 64L, 83L, 71L, 495L, 77L, 570L, 577L, 499L, 55L, 292L, 236L,
352L, 244L, 296L, 351L, 391L, 440L, 483L, 653L, 417L, 194L, 57L,
460L, 57L, 538L, 520L, 573L, 731L, 753L, 770L, 772L, 757L, 761L,
35L, 73L, 66L, 70L, 70L, 71L, 70L, 74L, 79L, 82L, 83L, 85L, 69L,
68L, 71L, 71L, 70L, 73L, 72L, 72L, 74L, 103L, 107L, 106L, 107L,
109L, 106L, 106L, 105L, 106L, 105L, 108L, 104L, 105L, 106L, 106L,
103L, 112L, 112L, 113L, 112L, 109L, 114L, 114L, 115L, 120L, 114L,
97L, 125L, 103L, 124L, 107L, 116L, 145L, 139L, 138L, 177L, 139L
), c6 = c(96L, 77L, 74L, 72L, 96L, 96L, 92L, 80L, 79L, 79L, 76L,
76L, 66L, 55L, 64L, 78L, 110L, 100L, 165L, 171L, 38L, 41L, 2L,
38L, 33L, 37L, 21L, 40L, 41L, 21L, 37L, 19L, 45L, 30L, 22L, 22L,
28L, 34L, 30L, 31L, 25L, 40L, 34L, 33L, 34L, 46L, 41L, 96L, 48L,
51L, 38L, 93L, 152L, 155L, 155L, 193L, 195L, 189L, 222L, 213L,
284L, 248L, 230L, 56L, 70L, 208L, 82L, 85L, 67L, 64L, 64L, 83L,
71L, 495L, 77L, 570L, 577L, 499L, 55L, 292L, 236L, 352L, 244L,
296L, 351L, 391L, 440L, 483L, 653L, 417L, 194L, 57L, 460L, 57L,
538L, 520L, 573L, 731L, 753L, 770L, 772L, 757L, 761L, 35L, 73L,
66L, 70L, 70L, 71L, 70L, 74L, 79L, 82L, 83L, 85L, 69L, 68L, 71L,
71L, 70L, 73L, 72L, 72L, 74L, 103L, 107L, 106L, 107L, 109L, 106L,
106L, 105L, 106L, 105L, 108L, 104L, 105L, 106L, 106L, 103L, 112L,
112L, 113L, 112L, 109L, 114L, 114L, 115L, 120L, 114L, 97L, 125L,
103L, 124L, 107L, 116L, 145L, 139L, 138L, 177L, 139L), c7 = c(28L,
3L, 1L, 1L, 52L, 50L, 60L, 49L, 50L, 3L, 2L, 2L, 37L, 11L, 0L,
1L, 2L, 2L, 0L, 1L, 28L, 30L, 1L, 17L, 23L, 28L, 11L, 30L, 32L,
13L, 32L, 19L, 39L, 18L, 17L, 23L, 29L, 46L, 37L, 25L, 21L, 42L,
32L, 29L, 30L, 41L, 44L, 141L, 72L, 64L, 25L, 93L, 219L, 234L,
218L, 294L, 277L, 184L, 294L, 273L, 382L, 293L, 280L, 131L, 132L,
386L, 157L, 99L, 77L, 75L, 68L, 66L, 88L, 615L, 55L, 746L, 740L,
685L, 27L, 305L, 158L, 511L, 151L, 326L, 371L, 605L, 650L, 727L,
886L, 623L, 314L, 170L, 734L, 162L, 937L, 908L, 987L, 964L, 997L,
1002L, 1007L, 960L, 980L, 28L, 75L, 61L, 96L, 98L, 97L, 96L,
93L, 101L, 99L, 100L, 98L, 91L, 90L, 90L, 89L, 87L, 76L, 75L,
75L, 76L, 88L, 92L, 87L, 86L, 88L, 87L, 85L, 87L, 87L, 83L, 86L,
87L, 86L, 86L, 89L, 83L, 83L, 84L, 84L, 86L, 83L, 86L, 88L, 87L,
88L, 84L, 81L, 118L, 90L, 120L, 90L, 101L, 127L, 134L, 140L,
172L, 160L), c8 = c(28L, 3L, 1L, 1L, 52L, 50L, 60L, 49L, 50L,
3L, 2L, 2L, 37L, 11L, 0L, 1L, 2L, 2L, 0L, 1L, 28L, 30L, 1L, 17L,
23L, 28L, 11L, 30L, 32L, 13L, 32L, 19L, 39L, 18L, 17L, 23L, 29L,
46L, 37L, 25L, 21L, 42L, 32L, 29L, 30L, 41L, 44L, 141L, 72L,
64L, 25L, 93L, 219L, 234L, 218L, 294L, 277L, 184L, 294L, 273L,
382L, 293L, 280L, 131L, 132L, 386L, 157L, 99L, 77L, 75L, 68L,
66L, 88L, 615L, 55L, 746L, 740L, 685L, 27L, 305L, 158L, 511L,
151L, 326L, 371L, 605L, 650L, 727L, 886L, 623L, 314L, 170L, 734L,
162L, 937L, 908L, 987L, 964L, 997L, 1002L, 1007L, 960L, 980L,
28L, 75L, 61L, 96L, 98L, 97L, 96L, 93L, 101L, 99L, 100L, 98L,
91L, 90L, 90L, 89L, 87L, 76L, 75L, 75L, 76L, 88L, 92L, 87L, 86L,
88L, 87L, 85L, 87L, 87L, 83L, 86L, 87L, 86L, 86L, 89L, 83L, 83L,
84L, 84L, 86L, 83L, 86L, 88L, 87L, 88L, 84L, 81L, 118L, 90L,
120L, 90L, 101L, 127L, 134L, 140L, 172L, 160L), k1 = c(39L, 64L,
68L, 69L, 38L, 38L, 41L, 51L, 54L, 84L, 83L, 84L, 57L, 50L, 43L,
58L, 72L, 71L, 29L, 35L, 0L, 0L, 10L, 1L, 1L, 0L, 3L, 0L, 0L,
1L, 0L, 3L, 0L, 0L, 0L, 14L, 14L, 9L, 15L, 18L, 24L, 20L, 20L,
27L, 28L, 10L, 28L, 27L, 59L, 64L, 73L, 43L, 19L, 7L, 27L, 5L,
23L, 30L, 29L, 65L, 10L, 46L, 27L, 160L, 168L, 95L, 175L, 255L,
265L, 271L, 270L, 76L, 269L, 77L, 14L, 12L, 11L, 118L, 382L,
204L, 220L, 181L, 290L, 290L, 114L, 209L, 89L, 159L, 7L, 144L,
95L, 9L, 180L, 411L, 105L, 125L, 97L, 19L, 3L, 3L, 2L, 12L, 1L,
540L, 1L, 32L, 14L, 14L, 13L, 13L, 15L, 14L, 12L, 11L, 12L, 11L,
12L, 13L, 13L, 9L, 18L, 17L, 8L, 18L, 6L, 2L, 1L, 2L, 1L, 2L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 4L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 0L, 2L, 1L, 21L, 28L, 49L, 50L, 54L, 45L,
44L), k2 = c(39L, 64L, 68L, 69L, 38L, 38L, 41L, 51L, 54L, 84L,
83L, 84L, 57L, 50L, 43L, 58L, 72L, 71L, 29L, 35L, 0L, 0L, 10L,
1L, 1L, 0L, 3L, 0L, 0L, 1L, 0L, 3L, 0L, 0L, 0L, 14L, 14L, 9L,
15L, 18L, 24L, 20L, 20L, 27L, 28L, 10L, 28L, 27L, 59L, 64L, 73L,
43L, 19L, 7L, 27L, 5L, 23L, 30L, 29L, 65L, 10L, 46L, 27L, 160L,
168L, 95L, 175L, 255L, 265L, 271L, 270L, 76L, 269L, 77L, 14L,
12L, 11L, 118L, 382L, 204L, 220L, 181L, 290L, 290L, 114L, 209L,
89L, 159L, 7L, 144L, 95L, 9L, 180L, 411L, 105L, 125L, 97L, 19L,
3L, 3L, 2L, 12L, 1L, 540L, 1L, 32L, 14L, 14L, 13L, 13L, 15L,
14L, 12L, 11L, 12L, 11L, 12L, 13L, 13L, 9L, 18L, 17L, 8L, 18L,
6L, 2L, 1L, 2L, 1L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 4L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 0L, 2L, 1L, 21L,
28L, 49L, 50L, 54L, 45L, 44L), k3 = c(84L, 122L, 120L, 120L,
92L, 88L, 90L, 107L, 98L, 114L, 120L, 117L, 91L, 64L, 59L, 100L,
113L, 109L, 56L, 136L, 1L, 0L, 29L, 7L, 4L, 6L, 5L, 6L, 6L, 9L,
7L, 11L, 7L, 10L, 9L, 44L, 46L, 38L, 51L, 60L, 79L, 75L, 80L,
83L, 80L, 41L, 97L, 61L, 133L, 135L, 180L, 100L, 50L, 28L, 75L,
18L, 79L, 94L, 100L, 117L, 47L, 74L, 68L, 393L, 390L, 191L, 416L,
504L, 532L, 545L, 545L, 181L, 556L, 175L, 19L, 24L, 19L, 312L,
766L, 389L, 416L, 418L, 639L, 475L, 239L, 293L, 70L, 135L, 37L,
122L, 84L, 42L, 408L, 886L, 93L, 115L, 65L, 67L, 35L, 37L, 47L,
50L, 54L, 942L, 9L, 43L, 29L, 29L, 29L, 29L, 28L, 27L, 25L, 25L,
26L, 32L, 33L, 32L, 33L, 30L, 26L, 23L, 24L, 23L, 8L, 1L, 2L,
2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 3L, 4L, 3L, 3L, 3L, 3L, 3L, 3L,
4L, 4L, 3L, 3L, 4L, 3L, 2L, 2L, 0L, 7L, 3L, 65L, 73L, 111L, 98L,
133L, 107L, 64L), k4 = c(84L, 122L, 120L, 120L, 92L, 88L, 90L,
107L, 98L, 114L, 120L, 117L, 91L, 64L, 59L, 100L, 113L, 109L,
56L, 136L, 1L, 0L, 29L, 7L, 4L, 6L, 5L, 6L, 6L, 9L, 7L, 11L,
7L, 10L, 9L, 44L, 46L, 38L, 51L, 60L, 79L, 75L, 80L, 83L, 80L,
41L, 97L, 61L, 133L, 135L, 180L, 100L, 50L, 28L, 75L, 18L, 79L,
94L, 100L, 117L, 47L, 74L, 68L, 393L, 390L, 191L, 416L, 504L,
532L, 545L, 545L, 181L, 556L, 175L, 19L, 24L, 19L, 312L, 766L,
389L, 416L, 418L, 639L, 475L, 239L, 293L, 70L, 135L, 37L, 122L,
84L, 42L, 408L, 886L, 93L, 115L, 65L, 67L, 35L, 37L, 47L, 50L,
54L, 942L, 9L, 43L, 29L, 29L, 29L, 29L, 28L, 27L, 25L, 25L, 26L,
32L, 33L, 32L, 33L, 30L, 26L, 23L, 24L, 23L, 8L, 1L, 2L, 2L,
2L, 2L, 2L, 4L, 4L, 4L, 4L, 3L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 4L,
4L, 3L, 3L, 4L, 3L, 2L, 2L, 0L, 7L, 3L, 65L, 73L, 111L, 98L,
133L, 107L, 64L), k5 = c(0L, 14L, 14L, 14L, 1L, 0L, 0L, 8L, 7L,
5L, 5L, 5L, 0L, 3L, 0L, 8L, 2L, 3L, 18L, 15L, 0L, 2L, 38L, 3L,
5L, 1L, 18L, 1L, 2L, 2L, 3L, 21L, 2L, 15L, 1L, 26L, 22L, 17L,
27L, 33L, 41L, 39L, 42L, 45L, 51L, 14L, 50L, 31L, 82L, 84L, 108L,
55L, 24L, 16L, 51L, 33L, 44L, 55L, 54L, 87L, 15L, 20L, 27L, 285L,
297L, 151L, 293L, 343L, 363L, 374L, 376L, 57L, 382L, 24L, 25L,
10L, 8L, 103L, 551L, 301L, 320L, 276L, 364L, 340L, 49L, 272L,
171L, 195L, 24L, 180L, 161L, 11L, 254L, 663L, 188L, 229L, 158L,
26L, 3L, 3L, 6L, 10L, 6L, 708L, 0L, 9L, 0L, 3L, 0L, 1L, 0L, 2L,
0L, 0L, 1L, 9L, 9L, 9L, 10L, 10L, 6L, 6L, 1L, 6L, 2L, 0L, 5L,
3L, 2L, 3L, 4L, 2L, 3L, 2L, 2L, 1L, 3L, 0L, 0L, 4L, 1L, 0L, 1L,
5L, 2L, 0L, 1L, 2L, 0L, 2L, 5L, 1L, 3L, 3L, 43L, 50L, 78L, 75L,
87L, 78L, 59L), k6 = c(0L, 14L, 14L, 14L, 1L, 0L, 0L, 8L, 7L,
5L, 5L, 5L, 0L, 3L, 0L, 8L, 2L, 3L, 18L, 15L, 0L, 2L, 38L, 3L,
5L, 1L, 18L, 1L, 2L, 2L, 3L, 21L, 2L, 15L, 1L, 26L, 22L, 17L,
27L, 33L, 41L, 39L, 42L, 45L, 51L, 14L, 50L, 31L, 82L, 84L, 108L,
55L, 24L, 16L, 51L, 33L, 44L, 55L, 54L, 87L, 15L, 20L, 27L, 285L,
297L, 151L, 293L, 343L, 363L, 374L, 376L, 57L, 382L, 24L, 25L,
10L, 8L, 103L, 551L, 301L, 320L, 276L, 364L, 340L, 49L, 272L,
171L, 195L, 24L, 180L, 161L, 11L, 254L, 663L, 188L, 229L, 158L,
26L, 3L, 3L, 6L, 10L, 6L, 708L, 0L, 9L, 0L, 3L, 0L, 1L, 0L, 2L,
0L, 0L, 1L, 9L, 9L, 9L, 10L, 10L, 6L, 6L, 1L, 6L, 2L, 0L, 5L,
3L, 2L, 3L, 4L, 2L, 3L, 2L, 2L, 1L, 3L, 0L, 0L, 4L, 1L, 0L, 1L,
5L, 2L, 0L, 1L, 2L, 0L, 2L, 5L, 1L, 3L, 3L, 43L, 50L, 78L, 75L,
87L, 78L, 59L), k7 = c(0L, 36L, 42L, 44L, 0L, 0L, 0L, 3L, 3L,
49L, 50L, 51L, 0L, 0L, 0L, 0L, 0L, 0L, 31L, 158L, 0L, 1L, 28L,
14L, 11L, 9L, 27L, 14L, 12L, 14L, 14L, 28L, 14L, 32L, 19L, 41L,
37L, 26L, 39L, 57L, 85L, 75L, 82L, 87L, 87L, 37L, 91L, 54L, 124L,
138L, 206L, 150L, 44L, 18L, 92L, 38L, 76L, 95L, 101L, 155L, 20L,
90L, 48L, 375L, 344L, 135L, 379L, 519L, 537L, 549L, 563L, 67L,
557L, 91L, 43L, 30L, 35L, 125L, 784L, 491L, 519L, 324L, 627L,
503L, 215L, 296L, 68L, 203L, 42L, 173L, 58L, 43L, 222L, 812L,
64L, 98L, 36L, 65L, 36L, 45L, 42L, 50L, 43L, 962L, 0L, 36L, 0L,
0L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 15L, 17L, 15L, 13L, 12L, 25L,
27L, 8L, 26L, 7L, 2L, 5L, 5L, 4L, 5L, 5L, 5L, 5L, 6L, 5L, 4L,
6L, 0L, 0L, 5L, 0L, 1L, 0L, 5L, 3L, 0L, 0L, 4L, 0L, 1L, 4L, 2L,
9L, 3L, 59L, 77L, 123L, 107L, 144L, 119L, 79L), k8 = c(0L, 36L,
42L, 44L, 0L, 0L, 0L, 3L, 3L, 49L, 50L, 51L, 0L, 0L, 0L, 0L,
0L, 0L, 31L, 158L, 0L, 1L, 28L, 14L, 11L, 9L, 27L, 14L, 12L,
14L, 14L, 28L, 14L, 32L, 19L, 41L, 37L, 26L, 39L, 57L, 85L, 75L,
82L, 87L, 87L, 37L, 91L, 54L, 124L, 138L, 206L, 150L, 44L, 18L,
92L, 38L, 76L, 95L, 101L, 155L, 20L, 90L, 48L, 375L, 344L, 135L,
379L, 519L, 537L, 549L, 563L, 67L, 557L, 91L, 43L, 30L, 35L,
125L, 784L, 491L, 519L, 324L, 627L, 503L, 215L, 296L, 68L, 203L,
42L, 173L, 58L, 43L, 222L, 812L, 64L, 98L, 36L, 65L, 36L, 45L,
42L, 50L, 43L, 962L, 0L, 36L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L,
1L, 15L, 17L, 15L, 13L, 12L, 25L, 27L, 8L, 26L, 7L, 2L, 5L, 5L,
4L, 5L, 5L, 5L, 5L, 6L, 5L, 4L, 6L, 0L, 0L, 5L, 0L, 1L, 0L, 5L,
3L, 0L, 0L, 4L, 0L, 1L, 4L, 2L, 9L, 3L, 59L, 77L, 123L, 107L,
144L, 119L, 79L), b1 = structure(c(7L, 3L, 3L, 3L, 7L, 7L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 7L, 1L, 1L, 7L,
7L, 7L, 1L, 7L, 1L, 1L, 1L, 1L, 7L, 1L, 1L, 1L, 7L, 7L, 7L, 7L,
5L, 5L, 7L, 7L, 5L, 5L, 7L, 7L, 3L, 5L, 5L, 5L, 3L, 7L, 7L, 7L,
1L, 7L, 7L, 7L, 3L, 1L, 7L, 7L, 7L, 7L, 3L, 7L, 5L, 5L, 5L, 5L,
7L, 5L, 7L, 7L, 1L, 1L, 3L, 5L, 3L, 7L, 3L, 3L, 3L, 7L, 3L, 7L,
3L, 1L, 7L, 7L, 7L, 3L, 5L, 7L, 7L, 7L, 1L, 1L, 1L, 1L, 1L, 1L,
5L, 1L, 5L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 5L, 5L, 7L, 5L, 5L, 6L, 6L, 2L, 6L, 2L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 6L, 6L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L,
1L, 7L, 7L, 7L, 7L, 3L, 7L, 7L, 3L, 7L), .Label = c("CP", "HF",
"HP", "KF", "KP", "NF", "NP"), class = "factor"), b2 = structure(c(7L,
3L, 3L, 3L, 7L, 7L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 7L, 1L, 1L, 7L, 7L, 7L, 1L, 7L, 1L, 1L, 1L, 1L, 7L, 1L,
1L, 1L, 7L, 7L, 7L, 7L, 5L, 5L, 7L, 7L, 5L, 5L, 7L, 7L, 3L, 5L,
5L, 5L, 3L, 7L, 7L, 7L, 1L, 7L, 7L, 7L, 3L, 1L, 7L, 7L, 7L, 7L,
3L, 7L, 5L, 5L, 5L, 5L, 7L, 5L, 7L, 7L, 1L, 1L, 3L, 5L, 3L, 7L,
3L, 3L, 3L, 7L, 3L, 7L, 3L, 1L, 7L, 7L, 7L, 3L, 5L, 7L, 7L, 7L,
1L, 1L, 1L, 1L, 1L, 1L, 5L, 1L, 5L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 5L, 5L, 7L, 5L, 5L, 6L, 6L, 2L, 6L,
2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 6L, 6L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 2L, 1L, 7L, 7L, 7L, 7L, 3L, 7L, 7L, 3L, 7L
), .Label = c("CP", "HF", "HP", "KF", "KP", "NF", "NP"), class = "factor"),
b3 = structure(c(3L, 5L, 5L, 5L, 3L, 3L, 3L, 5L, 7L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 1L, 1L, 5L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 1L, 7L, 7L, 5L, 5L, 7L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 7L, 5L, 3L, 5L, 5L, 5L, 3L, 7L, 7L, 3L,
7L, 7L, 7L, 3L, 3L, 7L, 7L, 7L, 5L, 5L, 3L, 5L, 5L, 5L, 5L,
5L, 7L, 5L, 7L, 7L, 1L, 1L, 3L, 5L, 3L, 7L, 3L, 7L, 3L, 7L,
3L, 7L, 1L, 1L, 7L, 7L, 7L, 3L, 5L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 5L, 3L, 5L, 7L, 3L, 7L, 7L, 7L, 3L, 3L, 3L, 7L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 2L, 2L, 2L,
6L, 4L, 4L, 4L, 4L, 6L, 4L, 6L, 6L, 6L, 6L, 6L, 6L, 4L, 4L,
6L, 6L, 4L, 6L, 2L, 7L, 1L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 3L,
7L), .Label = c("CP", "HF", "HP", "KF", "KP", "NF", "NP"), class = "factor"),
b4 = structure(c(3L, 5L, 5L, 5L, 3L, 3L, 3L, 5L, 7L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 1L, 1L, 5L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 1L, 7L, 7L, 5L, 5L, 7L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 7L, 5L, 3L, 5L, 5L, 5L, 3L, 7L, 7L, 3L,
7L, 7L, 7L, 3L, 3L, 7L, 7L, 7L, 5L, 5L, 3L, 5L, 5L, 5L, 5L,
5L, 7L, 5L, 7L, 7L, 1L, 1L, 3L, 5L, 3L, 7L, 3L, 7L, 3L, 7L,
3L, 7L, 1L, 1L, 7L, 7L, 7L, 3L, 5L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 5L, 3L, 5L, 7L, 3L, 7L, 7L, 7L, 3L, 3L, 3L, 7L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 2L, 2L, 2L,
6L, 4L, 4L, 4L, 4L, 6L, 4L, 6L, 6L, 6L, 6L, 6L, 6L, 4L, 4L,
6L, 6L, 4L, 6L, 2L, 7L, 1L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 3L,
7L), .Label = c("CP", "HF", "HP", "KF", "KP", "NF", "NP"), class = "factor"),
b5 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 1L, 3L, 1L, 4L,
1L, 2L, 1L, 1L, 4L, 1L, 4L, 1L, 4L, 4L, 2L, 4L, 2L, 2L, 2L,
4L, 2L, 2L, 2L, 2L, 4L, 2L, 4L, 2L, 2L, 4L, 2L, 1L, 1L, 4L,
4L, 4L, 4L, 4L, 4L, 1L, 4L, 4L, 4L, 4L, 2L, 4L, 4L, 4L, 3L,
3L, 4L, 4L, 1L, 4L, 1L, 1L, 1L, 3L, 2L, 4L, 2L, 2L, 2L, 4L,
2L, 4L, 4L, 1L, 4L, 4L, 4L, 2L, 3L, 4L, 2L, 4L, 1L, 1L, 1L,
1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 1L, 4L, 2L, 2L, 4L, 4L, 2L,
4L), .Label = c("CP", "HP", "KP", "NP"), class = "factor"),
b6 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 1L, 3L, 1L, 4L,
1L, 2L, 1L, 1L, 4L, 1L, 4L, 1L, 4L, 4L, 2L, 4L, 2L, 2L, 2L,
4L, 2L, 2L, 2L, 2L, 4L, 2L, 4L, 2L, 2L, 4L, 2L, 1L, 1L, 4L,
4L, 4L, 4L, 4L, 4L, 1L, 4L, 4L, 4L, 4L, 2L, 4L, 4L, 4L, 3L,
3L, 4L, 4L, 1L, 4L, 1L, 1L, 1L, 3L, 2L, 4L, 2L, 2L, 2L, 4L,
2L, 4L, 4L, 1L, 4L, 4L, 4L, 2L, 3L, 4L, 2L, 4L, 1L, 1L, 1L,
1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 1L, 4L, 2L, 2L, 4L, 4L, 2L,
4L), .Label = c("CP", "HP", "KP", "NP"), class = "factor"),
b7 = structure(c(2L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 4L,
4L, 4L, 2L, 2L, 5L, 1L, 1L, 1L, 4L, 4L, 2L, 2L, 4L, 3L, 6L,
6L, 6L, 3L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 3L, 3L, 3L, 3L, 6L,
6L, 3L, 6L, 6L, 6L, 6L, 3L, 6L, 3L, 3L, 4L, 3L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 3L, 2L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 4L, 4L,
4L, 6L, 4L, 2L, 6L, 2L, 2L, 2L, 4L, 3L, 6L, 3L, 6L, 3L, 6L,
3L, 6L, 6L, 2L, 6L, 6L, 6L, 6L, 4L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 4L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 6L, 6L, 6L, 6L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 6L, 2L, 6L, 3L, 3L, 6L, 3L, 3L,
6L), .Label = c("CF", "CP", "HP", "KP", "NF", "NP"), class = "factor"),
b8 = structure(c(2L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 4L,
4L, 4L, 2L, 2L, 5L, 1L, 1L, 1L, 4L, 4L, 2L, 2L, 4L, 3L, 6L,
6L, 6L, 3L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 3L, 3L, 3L, 3L, 6L,
6L, 3L, 6L, 6L, 6L, 6L, 3L, 6L, 3L, 3L, 4L, 3L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 3L, 2L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 4L, 4L,
4L, 6L, 4L, 2L, 6L, 2L, 2L, 2L, 4L, 3L, 6L, 3L, 6L, 3L, 6L,
3L, 6L, 6L, 2L, 6L, 6L, 6L, 6L, 4L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 4L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 6L, 6L, 6L, 6L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 6L, 2L, 6L, 3L, 3L, 6L, 3L, 3L,
6L), .Label = c("CF", "CP", "HP", "KP", "NF", "NP"), class = "factor")), .Names = c("chr",
"pos_c", "c1", "c2", "c3", "c4", "c5", "c6", "c7", "c8", "k1",
"k2", "k3", "k4", "k5", "k6", "k7", "k8", "b1", "b2", "b3", "b4",
"b5", "b6", "b7", "b8"), class = "data.frame", row.names = c(NA,
-161L))
You can try:
t(apply(df[,-1], 1, function(rg){
occ_rg <- table(rg)
rg[grep("HP",rg)] <- names(occ_rg)[which.max(occ_rg)]
return(rg)}))
So, to have your new df:
df <- data.frame(chr=df[, 1], t(apply(df[,-1], 1, function(rg){
occ_rg <- table(rg)
rg[grep("HP",rg)] <- names(occ_rg)[which.max(occ_rg)]
return(rg)})),
stringsAsFactors=F)
# chr b1 b2 b3 b4 b5 b6 b7 b8
#1 1 CP CP CP CP CP CP CP CP
#2 1 KP KP KP KP CP CP KP KP
#3 1 CP CP CP CP KP KP CP CP
#4 1 CP CP CP CP KP KP CP CP
#5 1 KP KP CP CP CP CP CP CP
EDIT
If you have other columns and the columns you want to change are the only ones beginning with "b", you can do :
df[, grepl("^b", colnames(df))] <- t(apply(df[, grepl("^b", colnames(df))],
1,
function(rg){
occ_rg <- table(rg)
rg[grep("HP",rg)] <- names(occ_rg)[which.max(occ_rg)]
return(rg)}))
Example:
With this df:
# chr c1 b1 b2 b3 b4 b5 b6 b7 b8 c2
#1 1 1 HP HP CP CP CP CP CP CP 11
#2 1 2 HP HP KP KP CP CP KP KP 12
#3 1 3 CP CP CP CP KP KP HP HP 13
#4 1 4 CP CP HP HP KP KP CP CP 14
#5 1 5 KP KP CP CP HP HP CP CP 15
You get:
# chr c1 b1 b2 b3 b4 b5 b6 b7 b8 c2
#1 1 1 CP CP CP CP CP CP CP CP 11
#2 1 2 KP KP KP KP CP CP KP KP 12
#3 1 3 CP CP CP CP KP KP CP CP 13
#4 1 4 CP CP CP CP KP KP CP CP 14
#5 1 5 KP KP CP CP CP CP CP CP 15
EDIT 2
If you have other values than "HP", "CP" and "KP" and want to replace "HP" by either "CP" or "KP", depending on which occurs the most, you can do:
df[, grepl("^b", colnames(df))] <- t(apply(df[, grepl("^b", colnames(df))],
1,
function(rg){
occ_rg <- table(rg)
occ_rg <- occ_rg[grepl("KP|CP", names(occ_rg))]
rg[grep("HP",rg)] <- names(occ_rg)[which.max(occ_rg)]
return(rg)}))
Explanation (for edit2):
df[, grepl("^b", colnames(df))] <- # only the columns beginning with b are considered (so the other ones will remain untouched)
t( # the results of apply will be transposed
apply(df[, grepl("^b", colnames(df))], # apply on df with only the columns beginning by b
1, # by row
function(rg){ # a function that takes a vector "rg" as input
occ_rg <- table(rg) # computes the table
occ_rg <- occ_rg[grepl("KP|CP", names(occ_rg))] # keep only the occurrences of either "KP" or "CP"
rg[grep("HP",rg)] <- names(occ_rg)[which.max(occ_rg)] # replace in the vector rg the "HP" elements by "KP" or "CP" depending on which occurs the most
return(rg) # finally returns the vector rg
}))