I have a model:
model<-xgboost(data=as.matrix(data[,-1]),label=data$Ethnicity, num_class=8, nrounds=50,objective="multi:softmax",lambda=1, eval_metric="merror")
data is a matrix of 94 variables of random survey question and the label is Ethnicity which is a 0-7 variable coding race/ethnicity so that every number from 0 to 7 represents an ethnicity.
I found which variables are most important in the prediction:
xgb.importance(model=model)
## Feature Gain Cover Frequency
## 1: q97 0.0924173556 0.0388402250 0.016981237
## 2: q9 0.0603595554 0.0199381316 0.012749847
## 3: q7 0.0456855077 0.0447756304 0.066922777
## 4: q6 0.0436987577 0.0485072162 0.041311731
## 5: q8 0.0319606309 0.0212999077 0.015199599
## 6: q99 0.0276115402 0.0201090242 0.007961695
## 7: q89 0.0245865711 0.0249913356 0.023829408
## 8: q13 0.0197648132 0.0190748590 0.010912533
## 9: q81 0.0194462208 0.0140010066 0.021880742
## 10: q71 0.0192126872 0.0194684164 0.019709370
Now I am stuck, my question is how do I describe or display the relationship between these variables and the labels? TIA!
Here are some data from dput(head(data)):
structure(list(r = c(2, 6, 4, 4, 4, 4), q6 = c(1.73, 1.5, 1.9,
NA, 1.63, 1.7), q7 = c(54.43, 51.26, 66.68, NA, 68.49, 59.88),
q8 = c(2, 2, 1, 2, 1, 2), q9 = c(5, 5, 5, 5, 4, 5), q10 = c(5,
1, 1, 1, 3, 1), q11 = c(1, 1, 1, 2, 1, 1), q12 = c(1, 1,
1, 4, 1, 1), q13 = c(1, 1, 1, 4, 1, 1), q14 = c(1, 1, 1,
1, 1, 1), q15 = c(1, 1, 1, 1, 1, 1), q16 = c(1, 1, 3, 1,
1, 1), q17 = c(2, 1, NA, 1, 1, 1), q18 = c(3, 1, NA, 2, 1,
1), q19 = c(2, 1, NA, 1, 1, 1), q20 = c(2, 1, NA, 2, 1, 1
), q21 = c(2, 2, NA, 2, 1, 2), q22 = c(2, 1, 1, 1, 4, 2),
q23 = c(2, 1, NA, 1, 5, 2), q24 = c(1, 2, 1, 2, 1, 1), q25 = c(1,
2, 1, 2, 2, 1), q26 = c(2, 2, 1, 1, 1, 1), q27 = c(2, 2,
1, 2, 1, 1), q28 = c(2, 2, 2, 2, 1, 1), q29 = c(1, 1, NA,
1, 1, 3), q30 = c(1, 1, NA, 1, 1, 3), q31 = c(1, 2, NA, 1,
1, 1), q32 = c(6, 1, NA, 6, 6, 1), q33 = c(NA, 1, NA, 2,
5, 1), q34 = c(NA, 1, NA, 2, 4, 1), q35 = c(NA, 1, NA, 5,
5, 1), q36 = c(2, 1, NA, 3, 3, 1), q37 = c(1, 1, NA, 1, 1,
1), q38 = c(6, 1, NA, 4, 1, 1), q39 = c(1, 2, 2, 1, 1, 2),
q40 = c(3, 1, NA, 2, 7, 1), q41 = c(6, 1, 2, 5, 6, 3), q42 = c(5,
1, 5, 5, 5, 6), q43 = c(1, 1, 1, 2, 2, 2), q44 = c(1, 1,
1, 2, 2, NA), q45 = c(1, 1, 1, 5, 7, 4), q46 = c(1, 1, 1,
6, 5, 7), q47 = c(7, 1, NA, 7, 7, 6), q48 = c(6, 1, 7, 5,
5, 6), q49 = c(4, 1, NA, 6, 1, 4), q50 = c(1, 1, 1, 2, 3,
1), q51 = c(1, 1, 1, 1, 1, 1), q52 = c(1, 1, 1, 1, 1, 1),
q53 = c(1, 1, 1, 2, 3, 1), q54 = c(1, 1, 1, 1, 2, 1), q55 = c(1,
1, 1, 2, 1, 1), q56 = c(1, 1, 1, 1, 1, 1), q57 = c(1, 1,
1, 4, 4, 2), q58 = c(1, 1, 1, 1, 1, 1), q59 = c(1, 2, 2,
2, 1, 1), q60 = c(1, 2, 1, 1, 1, 1), q61 = c(7, 1, 2, 5,
6, 6), q62 = c(3, 1, 3, 5, 7, 5), q63 = c(3, 1, 3, 2, 4,
5), q64 = c(3, 1, 3, 3, 3, 2), q65 = c(2, 1, 2, 2, 2, 3),
q66 = c(4, 1, NA, 4, 4, 2), q67 = c(2, 3, 3, 2, 3, 2), q68 = c(1,
1, 2, 1, 1, 1), q69 = c(2, 3, 3, 2, 3, 3), q70 = c(2, 4,
4, 2, 1, 1), q71 = c(3, 2, 3, 1, 3, 2), q72 = c(4, 4, 4,
2, 3, 2), q73 = c(1, 2, 1, 1, 1, 2), q74 = c(2, 2, 3, 2,
2, 2), q75 = c(2, 2, 2, 2, 2, 1), q76 = c(7, 2, 2, 2, 2,
1), q77 = c(3, 3, 4, 4, 2, 7), q78 = c(1, 2, 4, 2, 1, 3),
q79 = c(4, 8, 6, 3, 1, 2), q80 = c(6, 4, 4, 3, 1, 4), q81 = c(5,
NA, 1, 4, 2, 1), q82 = c(7, 1, 6, 5, 2, 7), q83 = c(1, 1,
1, 6, 1, 6), q84 = c(1, 1, 1, 2, 1, 2), q85 = c(2, 2, 1,
2, 2, 2), q86 = c(1, 1, NA, 1, 1, 1), q87 = c(2, 2, NA, 2,
2, 1), q88 = c(4, 5, 5, 3, 1, 2), q89 = c(4, 2, 2, 4, 2,
4), q90 = c(2, 1, NA, NA, 1, 2), q91 = c(1, 1, 1, 3, 3, 1
), q92 = c(1, 1, 1, 2, 2, 5), q93 = c(4, 5, 7, 4, 7, 2),
q94 = c(3, 3, 2, 2, 3, 2), q95 = c(1, 4, 1, 1, 1, 4), q96 = c(1,
1, 1, 1, 1, 1), q97 = c(1, 1, 3, 1, 2, 3), q98 = c(1, 2,
2, 1, 1, 1), q99 = c(1, 1, 1, 1, 1, 2)), row.names = c(NA,
6L), class = "data.frame")
I am trying to fit a gbm model using the caret package. I know other people have had the same problem, but all the solutions provided in the comments of those questions have not worked for my error. Here is my reproducible code:
library(dplyr)
library(MASS)
library(caret)
library(gbm)
Clean_winter_diff<-structure(list(Total = c(2L, 3L, 4L, 2L, 3L, 4L, 2L, 3L, 2L,
3L, 2L, 3L, 2L, 2L, 3L, 6L, 7L, 2L, 19L, 2L, 3L, 4L, 3L, 9L,
2L, 5L, 4L, 7L, 2L, 2L, 2L, 3L, 2L, 2L, 6L, 5L, 2L, 11L, 2L,
6L, 3L, 7L, 9L, 2L, 5L, 5L, 2L, 3L, 6L, 2L, 2L, 8L, 5L, 2L, 9L,
2L, 2L, 8L, 4L, 2L, 5L, 2L, 2L, 3L, 2L, 10L, 4L, 2L, 4L, 6L,
23L, 2L, 3L, 4L, 2L, 12L, 5L, 2L, 6L, 3L, 9L, 14L, 4L, 2L, 2L,
8L, 2L, 3L, 2L, 5L, 4L, 4L, 2L, 11L, 4L, 2L, 6L, 9L, 2L, 2L,
7L, 2L, 3L, 2L, 4L, 4L, 2L, 2L, 2L, 3L, 8L, 2L, 4L, 2L, 2L, 5L,
2L, 4L, 3L, 2L, 2L, 6L, 5L, 14L, 2L, 2L, 6L, 4L, 3L, 2L, 2L,
5L, 6L, 3L, 2L, 2L, 10L, 3L, 5L, 4L, 2L, 6L, 10L, 6L, 3L, 11L,
2L, 2L, 7L, 5L, 3L, 3L, 4L, 2L, 2L, 3L, 2L, 3L, 10L, 2L, 3L,
3L, 2L, 2L, 7L, 6L, 2L, 2L, 3L, 2L, 2L, 8L, 3L, 4L, 2L, 5L, 3L,
2L, 8L, 5L, 2L, 2L, 4L, 10L, 3L, 8L, 2L, 3L, 3L, 2L, 4L, 5L,
2L, 2L, 3L, 2L, 2L, 2L, 2L, 4L, 2L, 4L, 2L, 2L, 4L, 2L, 8L, 9L,
2L, 6L, 2L, 3L, 3L, 3L, 7L, 2L, 5L, 2L, 2L, 2L, 3L, 6L, 2L, 2L,
4L, 3L, 2L, 3L, 4L, 2L, 3L, 20L, 5L, 2L), Site = c(1, 1, 2, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 2, 1,
1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1,
1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 2,
1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2, 2, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 2, 1, 1, 2, 1, 1),
Night = c(0, 0, 1, 0, 0, 0.25, 0, 0.666666667, 0.5, 0, 0,
0, 0, 0.5, 0, 0.5, 0.428571429, 0, 0.6315789, 1, 0.666666667,
0.25, 1, 0.333333333, 1, 0.2, 1, 0, 0, 0, 1, 0.666666667,
0, 0.5, 0.166666667, 0, 0.5, 0.909090909, 1, 0.333333333,
1, 0, 0.222222222, 0, 0, 0.2, 0, 0, 0.333333333, 0, 0.5,
0.375, 0, 0, 0.222222222, 0, 0, 0.25, 0, 0, 0, 0, 0, 0.666666667,
0, 0.2, 0.75, 0, 1, 1, 0.869565217, 0, 0, 0, 1, 1, 0.2, 0.5,
0.333333333, 0, 0, 1, 0.25, 0.5, 0, 0, 0.5, 0, 0, 0.4, 0,
0.75, 1, 0.363636364, 0, 1, 1, 0.222222222, 0.5, 0, 0.142857143,
0, 0, 0, 0.25, 0, 0, 1, 0, 0.333333333, 0.25, 0.5, 0, 0.5,
0, 0.4, 0.5, 0.25, 0, 0, 0, 0, 0, 0.785714286, 0, 0, 0.833333333,
0, 0, 0, 0, 1, 0.5, 0, 0.5, 0, 0.6, 0, 0.2, 0, 1, 0.166666667,
1, 0, 0.666666667, 1, 0, 0, 0.285714286, 0.2, 0, 0, 0, 0,
0.5, 0, 0.5, 0.666666667, 0.4, 1, 0, 0, 0, 1, 0.857142857,
1, 0.5, 0, 0.666666667, 0, 0.5, 0.5, 0, 0, 0, 0.2, 0.333333333,
0, 0, 0.2, 1, 1, 0.25, 0.8, 0.333333333, 0.5, 0, 0.666666667,
0.333333333, 0, 1, 0.6, 1, 0, 0.333333333, 0, 0.5, 0, 0,
0.75, 0, 0.5, 1, 1, 0, 1, 0.375, 0.666666667, 0.5, 0.333333333,
0, 0.666666667, 0, 0.333333333, 0.428571429, 0, 0.4, 0.5,
1, 0.5, 0.333333333, 0.5, 0.5, 1, 0.5, 0.666666667, 0.5,
1, 0.5, 0, 0.666666667, 0.5, 0.2, 1), Day = c(1, 1, 0, 1,
1, 0.75, 1, 0.333333333, 0.5, 1, 1, 1, 1, 0.5, 1, 0.5, 0.571428571,
1, 0.368421053, 0, 0.333333333, 0.75, 0, 0.666666667, 0,
0.8, 0, 1, 1, 1, 0, 0.333333333, 1, 0.5, 0.833333333, 1,
0.5, 0.090909091, 0, 0.666666667, 0, 1, 0.777777778, 1, 1,
0.8, 1, 1, 0.666666667, 1, 0.5, 0.625, 1, 1, 0.777777778,
1, 1, 0.75, 1, 1, 1, 1, 1, 0.333333333, 1, 0.8, 0.25, 1,
0, 0, 0.130434783, 1, 1, 1, 0, 0, 0.8, 0.5, 0.666666667,
1, 1, 0, 0.75, 0.5, 1, 1, 0.5, 1, 1, 0.6, 1, 0.25, 0, 0.636363636,
1, 0, 0, 0.777777778, 0.5, 1, 0.857142857, 1, 1, 1, 0.75,
1, 1, 0, 1, 0.666666667, 0.75, 0.5, 1, 0.5, 1, 0.6, 0.5,
0.75, 1, 1, 1, 1, 1, 0.214285714, 1, 1, 0.166666667, 1, 1,
1, 1, 0, 0.5, 1, 0.5, 1, 0.4, 1, 0.8, 1, 0, 0.833333333,
0, 1, 0.333333333, 0, 1, 1, 0.714285714, 0.8, 1, 1, 1, 1,
0.5, 1, 0.5, 0.333333333, 0.6, 0, 1, 1, 1, 0, 0.142857143,
0, 0.5, 1, 0.333333333, 1, 0.5, 0.5, 1, 1, 1, 0.8, 0.666666667,
1, 1, 0.8, 0, 0, 0.75, 0.2, 0.666666667, 0.5, 1, 0.333333333,
0.666666667, 1, 0, 0.4, 0, 1, 0.666666667, 1, 0.5, 1, 1,
0.25, 1, 0.5, 0, 0, 1, 0, 0.625, 0.333333333, 0.5, 0.666666667,
1, 0.333333333, 1, 0.666666667, 0.571428571, 1, 0.6, 0.5,
0, 0.5, 0.666666667, 0.5, 0.5, 0, 0.5, 0.333333333, 0.5,
0, 0.5, 1, 0.333333333, 0.5, 0.8, 0), Distance_forest = c(0.527747223,
0.680189568, 0, 0.310562619, 0.328173668, 0.278522078, 0.722954456,
0.784333633, 0.633598813, 0.106383899, 0.525329032, 0.246038608,
0.575318257, 0, 0.767179738, 0.443355317, 0.876859332, 0.19139315,
0, 0.037535778, 0.432922864, 0.131314978, 0, 0, 0.093159023,
0.128161967, 0, 0, 0.006470757, 0.30307544, 0, 0.568211372,
0.263593171, 0.131057648, 0.168134106, 0.367657292, 0.717686941,
0.163080941, 0, 0.202433621, 0.3842, 0, 0, 0.165167085, 0.929924705,
2.120840521, 0.484698725, 1.078311772, 0.366644583, 0.340810601,
0.298239859, 0.195581001, 0.02421172, 0, 0.464407271, 0.198840768,
0.054828399, 0.489438607, 0.295818359, 0.110773002, 0.496209018,
0.67346593, 0.214433884, 0.108712722, 0.529136166, 0.639769867,
0, 0.396732499, 0.483450073, 0.001882719, 0.248622382, 0.925764277,
0.175704519, 0.622952019, 0, 0, 1.142940058, 1.133076471,
0.224133662, 1.083342909, 0.745420612, 0.377062959, 0.08050045,
0.162178412, 1.361054023, 0.123874613, 0.49008657, 0.638751698,
0.167293055, 0.306236508, 0.581962136, 0.269203966, 0.01981849,
0.389124993, 0.333741945, 0.089434216, 0, 0.172470454, 0.174222306,
0.298973407, 0.139883014, 0.455618893, 0.612636301, 0.372548564,
0.35343891, 0.583316416, 0.291550392, 0.530795339, 0.07577014,
0.844212848, 0.106972082, 0.992915959, 0.044859616, 0.820739224,
0.799670156, 0.316242417, 0.319460412, 0.810118761, 0.500966406,
0.377834056, 0.940032033, 0.151399734, 0.28102882, 0.212952188,
0.073000622, 0.370545468, 0.872918616, 0, 0.104900131, 0.081847421,
0.216958479, 0.008668498, 0.007014128, 0.495791646, 0.02399882,
0.297470809, 0.490666846, 0.415433354, 0.301854897, 0.365931213,
0.692253337, 0.165305616, 0.640148893, 0.835302988, 0.768199373,
0.153852261, 0.134893226, 0.540233724, 0.335663076, 0.102341147,
0.195486707, 0.362254712, 0.324739821, 1.697227338, 0.520683209,
0.020203443, 0, 0.275300664, 0.259782193, 0.051199078, 0.217527413,
0.550995487, 0.656144105, 0.277954065, 0.091362713, 0.769716859,
0.817754331, 0.531972108, 0.330715097, 0.795027122, 0.818699405,
0.113381995, 0.73975023, 0.342823482, 0.760817657, 0.817530729,
0.700152145, 0.88797978, 0.29428625, 0.108928974, 0.074075782,
0.747234676, 0, 0.543069, 0.262442933, 0.262835131, 0.356383731,
0.371421971, 0.015478187, 0.601986047, 0, 0.048889129, 0.406113218,
0.127855407, 0.396601367, 0.294174095, 1.112770231, 0.066093385,
0.833489821, 0.27603216, 0.261494516, 0.139170942, 0.36716509,
0.303017066, 0.245362186, 0, 0.071559882, 0.08333732, 0.617973146,
0.075376835, 0.778806939, 0, 0.484474765, 0.09264197, 0.605744884,
0.568592372, 0.464302103, 0.219293483, 0.115301111, 0.636074027,
0.69132069, 0.448515825, 0.150593216, 0, 0.668861867, 0.664099955,
0.386919408, 0.568691441, 0.328245416, 0.441309029, 0.216574999,
0.191497106, 0.372996079, 0.211736755), Altitude_diff = c(-0.093344147,
-0.032953796, -0.166307236, -0.082168137, -0.074024556, 0.011625801,
-0.035469849, 0.023688222, -0.035174545, 0.009125112, -0.148026001,
-0.136813009, -0.140504929, -0.155278686, -0.141057312, -0.154625722,
-0.138962751, 0.021278778, -0.112632, -0.121742996, -0.104769694,
-0.062242187, -0.105238068, -0.118123369, -0.116926834, -0.057471783,
-0.099749664, -0.138632839, -0.086083588, -0.086340958, -0.109178192,
-0.09964916, -0.086616302, -0.113422317, -0.145193425, -0.139987988,
-0.12330925, -0.062, -0.073519485, -0.0852851, -0.087, -0.041133632,
-0.02300371, 0.145411285, 0.007278729, 0.043087274, 0.12858374,
0.074364258, 0.444998927, -0.018522705, -0.028386627, 0.007190659,
-0.045301581, -0.057804062, 0.132843404, 0.021017105, -0.078413605,
-0.046420864, 0.058002304, -0.081611237, 0.079912634, -0.050522034,
-0.024949936, -0.084849548, -0.062893188, -0.041188028, -0.051312736,
-0.01290921, -0.072736145, -0.079543025, -0.016072741, -0.019319687,
-0.0213343, 0.020119728, -0.071389999, -0.088737882, 0.073720496,
-0.019645096, -0.059846527, 0.08921346, -0.027587019, -0.064136113,
-0.06246801, -0.049053955, 0.119930542, 0.013316631, -0.060812866,
-0.010882792, -0.072900299, -0.00263418, 0.055887116, -0.057,
-0.152, -0.082, -0.134, -0.157, -0.117, -0.128, 0.022, -0.129,
0.121, 0.126, 0.091, -0.075, 0.014, -0.071, 0.009, -0.137,
-0.13, -0.131, -0.054, -0.132, -0.093, -0.134, -0.143, -0.127,
-0.089, -0.058, -0.057, -0.057, -0.055, -0.15, -0.17, -0.106,
-0.177, -0.009, 0.008, -0.08, 0.067, -0.131, -0.029, -0.016,
0.048, -0.154, -0.133, -0.109, -0.056, 0.029, -0.091, 0.031,
0.032, 0.022, 0.029, 0.06, 0.075, -0.099, 0.075, 0.202, -0.022,
0.013, 0.118, -0.022, -0.034, 0.224, -0.003, 0.095, 0.03,
0.04, 0.105, -0.013, 0.031, -0.038, -0.043, -0.01, 0.046,
-0.096, -0.028, -0.033, -0.023, 0.066, 0.063, -0.041, -0.001,
-0.005, -0.025, 0.047, -0.025, -0.028, -0.002, 0.065, -0.019,
-0.133, -0.045, 0.0479274, -0.0969804, -0.0511209, -0.1380578,
-0.0619915, -0.1375449, 0.028642, -0.139097, -0.0267313,
-0.0866448, -0.0664405, -0.0098812, 0.0950015, -0.0905839,
-0.1271573, -0.1345035, 0.0696888, 0.1161573, -0.001593,
-0.139097, -0.139097, -0.0351609, -0.1168084, -0.0487204,
-0.0427109, 0.0139613, -0.0361378, -0.073785, -0.0521353,
0.0207491, -0.0398732, -0.0512241, -0.0480128, -0.0133375,
-0.0047241, 0.0556789, -0.0389344, -0.0307192, -0.0410356,
-0.0436031, -0.0513303, 0.0914526, 0.108031, 0.078924, -0.0482411,
-0.0010576, -0.0543727, 0.1055158, -0.0347792, 0.0091985,
-0.0066721), Revisits = c(0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 1, 0, 2, 1, 0, 4, 0, 1, 2, 2, 1, 0, 2, 2, 1, 0, 0,
1, 1, 0, 1, 2, 1, 1, 5, 1, 2, 1, 3, 3, 0, 2, 1, 0, 0, 2,
0, 0, 1, 1, 1, 0, 0, 0, 2, 0, 0, 1, 0, 0, 2, 0, 7, 3, 1,
1, 3, 7, 0, 0, 1, 1, 7, 1, 0, 1, 1, 2, 9, 1, 1, 0, 2, 1,
0, 0, 0, 0, 1, 1, 3, 1, 1, 4, 2, 0, 0, 2, 0, 0, 0, 0, 0,
1, 1, 0, 1, 2, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 6, 0,
0, 2, 2, 0, 0, 0, 3, 0, 1, 0, 0, 2, 0, 0, 1, 0, 3, 3, 2,
2, 5, 0, 0, 4, 4, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0,
0, 4, 3, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1,
0, 4, 0, 1, 1, 1, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 1, 0, 1, 0, 1, 1, 0, 2, 0, 0, 0, 1, 4, 1, 2, 1, 0, 0,
0, 0, 1, 1, 0, 0, 0, 2, 2, 0, 0, 1, 2, 1), Ratio = c(0, 0,
2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 2, 0, 3, 7, 0, 4.75, 0,
3, 2, 1.5, 9, 0, 2.5, 2, 7, 0, 0, 2, 3, 0, 2, 3, 5, 2, 2.2,
2, 3, 3, 2.333333333, 3, 0, 2.5, 5, 0, 0, 3, 0, 0, 8, 5,
2, 0, 0, 0, 4, 0, 0, 5, 0, 0, 1.5, 0, 1.428571429, 1.333333333,
2, 4, 2, 3.285714286, 0, 0, 4, 2, 1.714285714, 5, 0, 6, 3,
4.5, 1.555555556, 4, 2, 0, 4, 2, 0, 0, 0, 0, 4, 2, 3.666666667,
4, 2, 1.5, 4.5, 0, 0, 3.5, 0, 0, 0, 0, 0, 2, 2, 0, 3, 4,
0, 4, 2, 0, 0, 2, 0, 3, 0, 0, 6, 5, 2.333333333, 0, 0, 3,
2, 0, 0, 0, 1.666666667, 0, 3, 0, 0, 5, 0, 0, 4, 0, 2, 3.333333333,
3, 1.5, 2.2, 0, 0, 1.75, 1.25, 3, 0, 4, 0, 2, 0, 2, 0, 0,
2, 0, 0, 0, 0, 1.75, 2, 2, 0, 3, 0, 0, 8, 0, 0, 0, 0, 3,
0, 8, 0, 2, 2, 0, 2.5, 3, 8, 2, 3, 3, 2, 4, 2.5, 2, 2, 3,
2, 2, 2, 2, 4, 2, 4, 2, 2, 4, 2, 8, 9, 2, 3, 2, 3, 3, 3,
1.75, 2, 2.5, 2, 2, 2, 3, 6, 2, 2, 4, 3, 2, 1.5, 2, 2, 3,
20, 2.5, 2), Area = c(0.032426, 0.035282, 0.113383, 0.035693,
0.041549, 0.058353, 0.031573, 0.057897, 0.034298, 0.075203,
0.038044, 0.039534, 0.035463, 0.056159, 0.0319, 0.152971,
0.063424, 0.033137, 0.184546, 0.054271, 0.043699, 0.070929,
0.086888, 0.182135, 0.055882, 0.063176, 0.072119, 0.1096,
0.035482, 0.040162, 0.056385, 0.042962, 0.032754, 0.062732,
0.056648, 0.035606, 0.062001, 0.117763, 0.062311, 0.089266,
0.078665, 0.091633, 0.065517, 0.037454, 0.060411, 0.073355,
0.035344, 0.033497, 0.119351, 0.044972, 0.031568, 0.114325,
0.068984, 0.061986, 0.109741, 0.033782, 0.031849, 0.105872,
0.055202, 0.031857, 0.064647, 0.031718, 0.032588, 0.076284,
0.036021, 0.216575, 0.100172, 0.06227, 0.060081, 0.063876,
0.224969, 0.045917, 0.037024, 0.077219, 0.054039, 0.158028,
0.067884, 0.034719, 0.120346, 0.044812, 0.080923, 0.171879,
0.069136, 0.0417, 0.032867, 0.11509, 0.053077, 0.062925,
0.033554, 0.07492, 0.114556, 0.096677, 0.049153, 0.161404,
0.073527, 0.045258, 0.08603, 0.091654, 0.033591, 0.033243,
0.060307, 0.048489, 0.041845, 0.031375, 0.046293, 0.034473,
0.044909, 0.052535, 0.060832, 0.082261, 0.086662, 0.031981,
0.053075, 0.057269, 0.031764, 0.039376, 0.061771, 0.051374,
0.081914, 0.04886, 0.040433, 0.056631, 0.086457, 0.118001,
0.033169, 0.033734, 0.064399, 0.065725, 0.043722, 0.062459,
0.032385, 0.07605, 0.055818, 0.067326, 0.034017, 0.033867,
0.052257, 0.062541, 0.073173, 0.069685, 0.034166, 0.096639,
0.081452, 0.116512, 0.064753, 0.12308, 0.033466, 0.050626,
0.068697, 0.105123, 0.066668, 0.075515, 0.076373, 0.046068,
0.032637, 0.067892, 0.059513, 0.032345, 0.076412, 0.055942,
0.057757, 0.070641, 0.038058, 0.04051, 0.049283, 0.063954,
0.040222, 0.043146, 0.062292, 0.05944, 0.032226, 0.121329,
0.086029, 0.040616, 0.033843, 0.037219, 0.066294, 0.034378,
0.117405, 0.095093, 0.032398, 0.062489, 0.060033, 0.0006219274,
0.0004771933187, 0.0005009547997, 0.0004406716919, 0.0005174510498,
0.0004356966248, 0.0006026420288, 0.0004355072708, 0.0005670226318,
0.0004853354187, 0.0005070045624, 0.0005619193115, 0.0006754835205,
0.0004834161072, 0.0004468427429, 0.000439496521, 0.0006436887817,
0.0006849831238, 0.0005693302002, 0.0004349030151, 0.0004349030151,
0.0005387456665, 0.0004572155151, 0.0005252477493, 0.0005314183146,
0.0005879613037, 0.0005381040955, 0.0005002150269, 0.0005234927775,
0.000592482015, 0.0005348047689, 0.0005223570905, 0.0005260328979,
0.0005637895386, 0.0005767995911, 0.000629678894, 0.0005354559326,
0.0005431971436, 0.0005328845113, 0.0005311777954, 0.0005214696045,
0.0006679819946, 0.0006827795207, 0.0006529239502, 0.0005282859904,
0.0005745828705, 0.0005196272583, 0.0006795158081, 0.0005336247467,
0.0005789768311, 0.0005680122375), Distance_main = c(1.131059754,
0.9597414435, 0, 1.256349606, 1.078548275, 1.855321885, 4.111540893,
5.445573732, 4.717162654, 3.192720443, 1.230485339, 4.582202671,
2.234386271, 4.464622586, 1.793303323, 3.049223638, 2.517519578,
2.538484406, 0.2589592261, 0.8107408556, 1.265087883, 2.583951508,
0.5704173619, 0.150727288, 0, 2.880491806, 0.4688362577,
1.032252927, 1.711598417, 2.621504704, 0.5018857525, 0.9121811232,
1.467942423, 0.5364545556, 1.956558175, 1.903428792, 1.556986206,
0.3888441615, 0.2643162488, 0.06508233719, 1.137, 1.050285586,
1.40077366, 3.600281886, 2.354502437, 1.899786116, 3.690234235,
2.808763349, 0.7511081312, 1.271708613, 2.662284706, 2.675257642,
3.518963652, 3.64493179, 2.047243432, 2.681735548, 3.55460067,
4.471868465, 4.870529144, 4.073487063, 3.088843029, 4.176214051,
3.878882256, 3.798820098, 3.638531617, 3.78621757, 3.517110032,
3.885770398, 3.298820012, 3.207448044, 3.236561986, 4.13860818,
5.461401614, 3.068585968, 2.839888067, 2.545155836, 2.390539028,
3.996152667, 2.813447134, 2.336287582, 3.609633571, 1.994576758,
2.756891326, 2.963835872, 2.077835347, 1.981514275, 1.698439482,
4.559660757, 1.832220975, 1.538482109, 0.4012068882, 1.011597874,
0.2762621903, 0.6604082443, 1.726855522, 0.4426442882, 1.389697061,
2.265330127, 4.673539548, 2.833166846, 3.247307991, 1.550221184,
1.913466888, 1.02140226, 1.419304966, 4.649917894, 3.021104929,
1.138684662, 0.9702250537, 0.8674368023, 1.363686091, 2.237998135,
3.078402963, 2.612860775, 2.659002418, 0.7922293863, 0.5605036917,
2.918464369, 2.607222198, 2.72011864, 3.293449501, 0.2339249027,
0.09269339846, 0.4948047539, 0.988393193, 3.35986433, 3.283307665,
0.4664049454, 3.579501178, 0.9978282525, 2.513329669, 1.751686648,
2.364558742, 0.3028119337, 0.2667488345, 0.5316889235, 4.034444068,
3.413510363, 0.5591667383, 3.303219295, 1.845610995, 2.029920015,
1.968676774, 1.642599316, 2.259782135, 1.840349328, 2.169684459,
1.466603062, 1.35662262, 1.287059026, 1.114386511, 0.1013909283,
0.5191928737, 2.069483497, 2.864063592, 3.741153421, 3.675316052,
2.612341652, 2.535722998, 4.374650663, 0.9801658265, 4.516729836,
4.200885496, 3.757806231, 2.911160806, 0.08124990183, 4.160713125,
4.82011578, 3.805524153, 2.356340037, 2.528406371, 2.849670115,
4.335904978, 2.334369917, 1.682493793, 0.9721257977, 2.886626751,
1.678288529, 3.207466146, 2.493581595, 1.024302173, 0.2878921523,
1.951664026, 0.001168478, 1.9688079e-05, 0.000181543742,
0.000169602217, 0.000342252497, 3.8581815e-05, 0.000831689834,
0, 0.000310111829, 0.000123848133, 0.00027892549, 0.000474703505,
0.000605096677, 0.001312503032, 0.000397102961, 0.001565818974,
0.001649622681, 0.0018610356, 0.001417062691, 0.000275126286,
0.000431104276, 0.003826022716, 8.0019175e-05, 0.004124439051,
0.004485276435, 0.004514712379, 0.00294698083, 0.001935731554,
0.002986659776, 0.002716345238, 0.002434957234, 0.002476156054,
0.001893628041, 0.001454772675, 0.00099942015, 0.001028825627,
0.001531671726, 0.001566268214, 0.001890167805, 0.000937548652,
0.000653203203, 0.000456625581, 0.001139386805, 0.001135244462,
0, 0.001190210739, 0.000552443287, 0.002855486907, 0.001430594014,
0.000594097595, 0.000339933191)), row.names = c(NA, -234L
), class = "data.frame")
mydata = transform(Clean_winter_diff, Site=Site-1)
#separating training and test data
alpha<-0.7
inTrain_diff <- sample(1:nrow(mydata), alpha * nrow(mydata))
train.set.diff <- mydata[inTrain_diff,]
test.set.diff <- mydata[-inTrain_diff,]
winter.boost=gbm(Site~. ,data = mydata,n.trees = 10000,
shrinkage = 0.01, interaction.depth = 6, cv.folds = 5, verbose = F)
best.iter=gbm.perf(winter.boost, method = "cv")
best.iter
summary(winter.boost)
#Using caret to get model performance in best iteration
set.seed(123)
fitControl = trainControl(method="cv", number=5, returnResamp = "all")
model2 = train(Site~., data=mydata[complete.cases(mydata),], method="gbm",distribution="bernoulli", trControl=fitControl, verbose=F, tuneGrid=data.frame(.n.trees=best.iter, .shrinkage=0.01, .interaction.depth=1, .n.minobsinnode=1))
And this are the errors I get:
RMSE Rsquared MAE
Min. : NA Min. : NA Min. : NA
1st Qu.: NA 1st Qu.: NA 1st Qu.: NA
Median : NA Median : NA Median : NA
Mean :NaN Mean :NaN Mean :NaN
3rd Qu.: NA 3rd Qu.: NA 3rd Qu.: NA
Max. : NA Max. : NA Max. : NA
NA's :1 NA's :1 NA's :1
Error: Stopping
In addition: Warning messages:
1: In train.default(x, y, weights = w, ...) :
You are trying to do regression and your outcome only has two possible values Are you trying to do classification? If so, use a 2 level factor as your outcome column.
2: In nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
There were missing values in resampled performance measures
I have checked the dataframe for missing values, there are none. What is the problem?
sessionInfo():
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 17134)
Matrix products: default
locale:
[1] LC_COLLATE=Norwegian Bokmål_Norway.1252 LC_CTYPE=Norwegian Bokmål_Norway.1252 LC_MONETARY=Norwegian Bokmål_Norway.1252
[4] LC_NUMERIC=C LC_TIME=Norwegian Bokmål_Norway.1252
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] caret_6.0-86 ggplot2_3.3.2 lattice_0.20-38 mlbench_2.1-1 gbm_2.1.8 MASS_7.3-51.4 randomForest_4.6-14
[8] tree_1.0-40 ISLR_1.2 dplyr_1.0.0 rpart.plot_3.0.8 rpart_4.1-15
loaded via a namespace (and not attached):
[1] tinytex_0.24 tidyselect_1.1.0 xfun_0.15 purrr_0.3.4 reshape2_1.4.4 splines_3.6.2
[7] colorspace_1.4-1 vctrs_0.3.1 generics_0.0.2 stats4_3.6.2 survival_3.1-8 prodlim_2019.11.13
[13] rlang_0.4.7 ModelMetrics_1.2.2.2 pillar_1.4.6 glue_1.4.1 withr_2.2.0 foreach_1.5.0
[19] lifecycle_0.2.0 plyr_1.8.6 lava_1.6.7 stringr_1.4.0 timeDate_3043.102 munsell_0.5.0
[25] gtable_0.3.0 recipes_0.1.13 codetools_0.2-16 parallel_3.6.2 class_7.3-15 Rcpp_1.0.5
[31] scales_1.1.1 ipred_0.9-9 stringi_1.4.6 grid_3.6.2 tools_3.6.2 magrittr_1.5
[37] tibble_3.0.3 crayon_1.3.4 pkgconfig_2.0.3 ellipsis_0.3.1 Matrix_1.2-18 data.table_1.13.0
[43] pROC_1.16.2 lubridate_1.7.9 gower_0.2.2 rstudioapi_0.11 iterators_1.0.12 R6_2.4.1
[49] nnet_7.3-12 nlme_3.1-142 compiler_3.6.2
>
You are doing a classification, so you need to set the dependent variable to a factor for train in caret to work:
set.seed(123)
fitControl = trainControl(method="cv", number=5, returnResamp = "all")
mydata$Site = factor(mydata$Site)
model2 = train(Site~., data=mydata[complete.cases(mydata),], method="gbm",distribution="bernoulli", trControl=fitControl, verbose=F, tuneGrid=data.frame(.n.trees=400, .shrinkage=0.01, .interaction.depth=1, .n.minobsinnode=1))
model2
Stochastic Gradient Boosting
234 samples
9 predictor
2 classes: '0', '1'
No pre-processing
Resampling: Cross-Validated (5 fold)
Summary of sample sizes: 187, 187, 187, 188, 187
Resampling results:
Accuracy Kappa
0.9232192 0.5550649
Tuning parameter 'n.trees' was held constant at a value
Tuning parameter 'n.minobsinnode' was held constant at
a value of 1
hth1 is a data frame that I already have.
> hth1
Source: local data frame [13 x 14]
Groups: team [13]
team CSK DC DD GL KKR KTK KXIP MI PW RCB RPSG
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 CSK 0 8 11 0 11 2 9 10 4 10 0
2 DC 2 0 8 0 2 1 7 5 3 8 0
3 DD 5 3 0 0 7 2 8 5 2 10 2
4 GL 0 0 2 0 0 0 0 0 0 1 0
5 KKR 5 7 10 2 0 0 5 10 3 15 0
6 KTK 0 0 0 0 2 0 1 0 1 2 0
7 KXIP 8 3 10 2 14 0 0 11 2 6 1
8 MI 12 5 13 2 8 1 7 0 3 11 1
9 PW 2 1 4 0 2 0 4 3 0 1 0
10 RCB 9 3 7 2 3 0 12 8 4 0 1
11 RPSG 0 0 0 2 2 0 1 1 0 1 0
12 RR 8 2 7 0 14 1 7 6 2 7 0
13 SH 3 0 4 0 5 0 4 5 2 5 2
# ... with 2 more variables: RR <dbl>, SH <dbl>
Why do the data frame returned by bind_rows() and the original data frame differ?
> h <- list(hth1)
> hth_b1 <- bind_rows(h)
> identical(hth1, hth_b1)
[1] FALSE
> class(hth_b1)
[1] "grouped_df" "tbl_df" "tbl" "data.frame"
> class(hth1)
[1] "grouped_df" "tbl_df" "tbl" "data.frame"
> setequal(hth1, hth_b1)
TRUE
> anti_join(hth1, hth_b1)
Joining, by = c("team", "CSK", "DC", "DD", "GL", "KKR", "KTK", "KXIP", "MI", "PW", "RCB", "RPSG", "RR", "SH")
Source: local data frame [0 x 14]
Groups: team [13]
# ... with 14 variables: team <chr>, CSK <dbl>, DC <dbl>, DD <dbl>, GL <dbl>,
# KKR <dbl>, KTK <dbl>, KXIP <dbl>, MI <dbl>, PW <dbl>, RCB <dbl>,
# RPSG <dbl>, RR <dbl>, SH <dbl>
What am I missing? I have been stuck here for a long time.
Update 1:
As requested by Benjamin, I dput() function on both dataframes. Here is the output.
> dput(hth_b1)
structure(list(team = c("CSK", "DC", "DD", "GL", "KKR", "KTK",
"KXIP", "MI", "PW", "RCB", "RPSG", "RR", "SH"), CSK = c(0, 2,
5, 0, 5, 0, 8, 12, 2, 9, 0, 8, 3), DC = c(8, 0, 3, 0, 7, 0, 3,
5, 1, 3, 0, 2, 0), DD = c(11, 8, 0, 2, 10, 0, 10, 13, 4, 7, 0,
7, 4), GL = c(0, 0, 0, 0, 2, 0, 2, 2, 0, 2, 2, 0, 0), KKR = c(11,
2, 7, 0, 0, 2, 14, 8, 2, 3, 2, 14, 5), KTK = c(2, 1, 2, 0, 0,
0, 0, 1, 0, 0, 0, 1, 0), KXIP = c(9, 7, 8, 0, 5, 1, 0, 7, 4,
12, 1, 7, 4), MI = c(10, 5, 5, 0, 10, 0, 11, 0, 3, 8, 1, 6, 5
), PW = c(4, 3, 2, 0, 3, 1, 2, 3, 0, 4, 0, 2, 2), RCB = c(10,
8, 10, 1, 15, 2, 6, 11, 1, 0, 1, 7, 5), RPSG = c(0, 0, 2, 0,
0, 0, 1, 1, 0, 1, 0, 0, 2), RR = c(9, 7, 9, 0, 1, 1, 8, 10, 3,
9, 0, 0, 7), SH = c(3, 0, 4, 3, 4, 0, 4, 3, 0, 4, 0, 0, 0)), .Names = c("team",
"CSK", "DC", "DD", "GL", "KKR", "KTK", "KXIP", "MI", "PW", "RCB",
"RPSG", "RR", "SH"), row.names = c(NA, -13L), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), vars = list(team), indices = list(
0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L), group_sizes = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L, labels = structure(list(
team = c("CSK", "DC", "DD", "GL", "KKR", "KTK", "KXIP", "MI",
"PW", "RCB", "RPSG", "RR", "SH")), row.names = c(NA, -13L
), class = "data.frame", vars = list(team), .Names = "team"))
>
> dput(hth1)
structure(list(team = c("CSK", "DC", "DD", "GL", "KKR", "KTK",
"KXIP", "MI", "PW", "RCB", "RPSG", "RR", "SH"), CSK = c(0, 2,
5, 0, 5, 0, 8, 12, 2, 9, 0, 8, 3), DC = c(8, 0, 3, 0, 7, 0, 3,
5, 1, 3, 0, 2, 0), DD = c(11, 8, 0, 2, 10, 0, 10, 13, 4, 7, 0,
7, 4), GL = c(0, 0, 0, 0, 2, 0, 2, 2, 0, 2, 2, 0, 0), KKR = c(11,
2, 7, 0, 0, 2, 14, 8, 2, 3, 2, 14, 5), KTK = c(2, 1, 2, 0, 0,
0, 0, 1, 0, 0, 0, 1, 0), KXIP = c(9, 7, 8, 0, 5, 1, 0, 7, 4,
12, 1, 7, 4), MI = c(10, 5, 5, 0, 10, 0, 11, 0, 3, 8, 1, 6, 5
), PW = c(4, 3, 2, 0, 3, 1, 2, 3, 0, 4, 0, 2, 2), RCB = c(10,
8, 10, 1, 15, 2, 6, 11, 1, 0, 1, 7, 5), RPSG = c(0, 0, 2, 0,
0, 0, 1, 1, 0, 1, 0, 0, 2), RR = c(9, 7, 9, 0, 1, 1, 8, 10, 3,
9, 0, 0, 7), SH = c(3, 0, 4, 3, 4, 0, 4, 3, 0, 4, 0, 0, 0)), .Names = c("team",
"CSK", "DC", "DD", "GL", "KKR", "KTK", "KXIP", "MI", "PW", "RCB",
"RPSG", "RR", "SH"), class = c("grouped_df", "tbl_df", "tbl",
"data.frame"), row.names = c(NA, -13L), vars = list(team), labels = structure(list(
team = c("CSK", "DC", "DD", "GL", "KKR", "KTK", "KXIP", "MI",
"PW", "RCB", "RPSG", "RR", "SH")), class = "data.frame", row.names = c(NA,
-13L), vars = list(team), drop = TRUE, .Names = "team"), indices = list(
0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L), drop = TRUE, group_sizes = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L)
There is a difference in the output for both of them, there is an extra drop = TRUE for hth1.
I don't understand why it is not there in the other one.
A reproducible example:
library(tidyverse)
test1 <- mtcars %>% group_by(cyl)
test2 <- bind_rows(list(test1))
identical(test1, test2) #FALSE
all_equal(test1, test2) #TRUE
You can check both their attributes and you can see the rownames differ:
rownames(test1)
[1] "Mazda RX4" "Mazda RX4 Wag" "Datsun 710"
[4] "Hornet 4 Drive" "Hornet Sportabout" "Valiant"
[7] "Duster 360" "Merc 240D" "Merc 230"
[10] "Merc 280" "Merc 280C" "Merc 450SE"
[13] "Merc 450SL" "Merc 450SLC" "Cadillac Fleetwood"
[16] "Lincoln Continental" "Chrysler Imperial" "Fiat 128"
[19] "Honda Civic" "Toyota Corolla" "Toyota Corona"
[22] "Dodge Challenger" "AMC Javelin" "Camaro Z28"
[25] "Pontiac Firebird" "Fiat X1-9" "Porsche 914-2"
[28] "Lotus Europa" "Ford Pantera L" "Ferrari Dino"
[31] "Maserati Bora" "Volvo 142E"
rownames(test2)
[1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12" "13"
[14] "14" "15" "16" "17" "18" "19" "20" "21" "22" "23" "24" "25" "26"
[27] "27" "28" "29" "30" "31" "32"
Never expect tibbles to treat your rownames with respect, they may be silently dropped at any time.
Forgive the formatting on this answer, but it would appear that you have labels attached to one object, and not in the other. Where the labels got attached or removed isn't something I can know without looking at code that generates the objects. I've bolded the difference in your objects below.
Note: not formatting this as code is a deliberate choice. Formatting as code prevents me from marking the difference in the structure in bold text
dput(hth_b1)
structure(list(team = c("CSK", "DC", "DD", "GL", "KKR", "KTK",
"KXIP", "MI", "PW", "RCB", "RPSG", "RR", "SH"), CSK = c(0, 2,
5, 0, 5, 0, 8, 12, 2, 9, 0, 8, 3), DC = c(8, 0, 3, 0, 7, 0, 3,
5, 1, 3, 0, 2, 0), DD = c(11, 8, 0, 2, 10, 0, 10, 13, 4, 7, 0,
7, 4), GL = c(0, 0, 0, 0, 2, 0, 2, 2, 0, 2, 2, 0, 0), KKR = c(11,
2, 7, 0, 0, 2, 14, 8, 2, 3, 2, 14, 5), KTK = c(2, 1, 2, 0, 0,
0, 0, 1, 0, 0, 0, 1, 0), KXIP = c(9, 7, 8, 0, 5, 1, 0, 7, 4,
12, 1, 7, 4), MI = c(10, 5, 5, 0, 10, 0, 11, 0, 3, 8, 1, 6, 5
), PW = c(4, 3, 2, 0, 3, 1, 2, 3, 0, 4, 0, 2, 2), RCB = c(10,
8, 10, 1, 15, 2, 6, 11, 1, 0, 1, 7, 5), RPSG = c(0, 0, 2, 0,
0, 0, 1, 1, 0, 1, 0, 0, 2), RR = c(9, 7, 9, 0, 1, 1, 8, 10, 3,
9, 0, 0, 7), SH = c(3, 0, 4, 3, 4, 0, 4, 3, 0, 4, 0, 0, 0)), .Names = c("team",
"CSK", "DC", "DD", "GL", "KKR", "KTK", "KXIP", "MI", "PW", "RCB",
"RPSG", "RR", "SH"), row.names = c(NA, -13L), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), vars = list(team), indices = list(
0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L), group_sizes = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L , labels = structure(list(
team = c("CSK", "DC", "DD", "GL", "KKR", "KTK", "KXIP", "MI",
"PW", "RCB", "RPSG", "RR", "SH")), row.names = c(NA, -13L
), class = "data.frame", vars = list(team), .Names = "team"))
dput(hth1)
structure(list(team = c("CSK", "DC", "DD", "GL", "KKR", "KTK",
"KXIP", "MI", "PW", "RCB", "RPSG", "RR", "SH"), CSK = c(0, 2,
5, 0, 5, 0, 8, 12, 2, 9, 0, 8, 3), DC = c(8, 0, 3, 0, 7, 0, 3,
5, 1, 3, 0, 2, 0), DD = c(11, 8, 0, 2, 10, 0, 10, 13, 4, 7, 0,
7, 4), GL = c(0, 0, 0, 0, 2, 0, 2, 2, 0, 2, 2, 0, 0), KKR = c(11,
2, 7, 0, 0, 2, 14, 8, 2, 3, 2, 14, 5), KTK = c(2, 1, 2, 0, 0,
0, 0, 1, 0, 0, 0, 1, 0), KXIP = c(9, 7, 8, 0, 5, 1, 0, 7, 4,
12, 1, 7, 4), MI = c(10, 5, 5, 0, 10, 0, 11, 0, 3, 8, 1, 6, 5
), PW = c(4, 3, 2, 0, 3, 1, 2, 3, 0, 4, 0, 2, 2), RCB = c(10,
8, 10, 1, 15, 2, 6, 11, 1, 0, 1, 7, 5), RPSG = c(0, 0, 2, 0,
0, 0, 1, 1, 0, 1, 0, 0, 2), RR = c(9, 7, 9, 0, 1, 1, 8, 10, 3,
9, 0, 0, 7), SH = c(3, 0, 4, 3, 4, 0, 4, 3, 0, 4, 0, 0, 0)), .Names = c("team",
"CSK", "DC", "DD", "GL", "KKR", "KTK", "KXIP", "MI", "PW", "RCB",
"RPSG", "RR", "SH"), class = c("grouped_df", "tbl_df", "tbl",
"data.frame"), row.names = c(NA, -13L), vars = list(team), labels = structure(list(
team = c("CSK", "DC", "DD", "GL", "KKR", "KTK", "KXIP", "MI",
"PW", "RCB", "RPSG", "RR", "SH")), class = "data.frame", row.names = c(NA,
-13L), vars = list(team), drop = TRUE, .Names = "team"), indices = list(
0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L), drop = TRUE, group_sizes = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L)
In the example below, I will add labels to the mtcars data frame, then run it through bind_rows, and you'll see that the labels are no longer present. This is what I believe is happening to your data.
library(Hmisc)
mtcars2 <- mtcars
label(mtcars2, self = FALSE) <- toupper(names(mtcars))
library(dplyr)
mtcars3 <- bind_rows(mtcars2)
identical(mtcars2, mtcars3)
label(mtcars3)