Mutate_if syntax? - r

I have a dataset that looks like
structure(list(ID = 1:100, A = c(1, 1, 1, 0, 0, 0, 1, 0, 1, 1,
0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0,
0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0,
1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0,
1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1,
0, 1, 1, 1, 1, 1), B = c(-0.779571910800539, -1.01813937566596,
-0.617199891458882, 0.0309580500019241, 0.543273110365976, -0.0433300396605711,
0.230179974373525, -0.183807679340284, 1.23013876772693, -0.447068495884132,
-0.529019912858711, -0.423813233701193, -2.02301749716477, 0.107354643198155,
-0.182036878855649, -0.0686544314102692, -0.242211690200168,
0.235711424228903, -0.737085567507625, 1.08272499166402, 1.46797946789107,
0.676133655269793, 0.0970319828391364, -0.175265540837544, 1.01932401639564,
-1.6120456930373, -0.237498813763703, -1.0793071544667, 0.34060211076922,
0.358651319904244, 1.14185300245182, 0.643831607010375, -1.48935271976024,
1.52070114310115, 0.13758246936271, 0.677489791752007, -0.0421866338789382,
-0.963909996107064, -0.419518874496373, -1.94843733945541, -0.856606011022689,
0.950271505971139, -0.00501879225795071, -0.907348953277799,
0.176003279346265, 0.849120713832199, -0.682289211320935, 0.618834674100358,
-0.266654135174762, 1.38431159868239, 0.464047120137739, -0.478626559461985,
0.149837396236788, -1.22592409132424, 0.658992970998059, -0.755502690343619,
-1.64278237304159, 0.9123549798475, 0.212894692780789, -0.670549407572393,
2.37707712870178, -0.0295080172428597, -0.823140252108969, -0.428902533453998,
-0.435036177848892, 0.98534295091355, 1.24538388550067, 0.763169631787973,
0.0481870286750498, 0.373727588477095, 0.515173230638657, -0.980950523005618,
2.34498921196051, 1.16497367254483, 0.803207456941987, -1.20555741222113,
-1.69603664220648, -0.59655174894536, -0.471190748123387, 1.53055765388398,
0.426904841661558, -0.385574044956116, -1.05023815909094, -1.45225542235577,
-0.545485253245417, 0.173122341859165, -1.23651408987118, 0.438591835746343,
-0.826135255947115, 0.371873486298494, -0.422519474801474, -0.34343504002476,
-0.508591050193541, -1.64448384253113, -0.217712097435782, -0.396102247417337,
-0.324089563130585, 1.3108035615729, -1.74881781621313, -0.887343297491297
), C = c(2, 1, 2, 2, 1, 2, 1, 2, 3, 1, 1, 3, 2, 2, 3, 3, 2, 1,
1, 2, 3, 2, 3, 2, 2, 2, 3, 3, 3, 2, 2, 1, 3, 3, 2, 3, 3, 3, 3,
3, 1, 1, 2, 1, 1, 3, 3, 2, 3, 3, 1, 3, 1, 1, 2, 1, 1, 2, 1, 2,
2, 3, 2, 3, 3, 1, 2, 1, 3, 3, 2, 3, 3, 3, 3, 3, 2, 3, 3, 3, 1,
3, 3, 3, 3, 1, 1, 2, 1, 3, 2, 3, 3, 3, 1, 2, 2, 3, 3, 2), D = c(3,
2, 0, 1, 0, 2, 1, 1, 1, 2, 1, 3, 1, 0, 1, 2, 1, 1, 1, 3, 0, 3,
0, 0, 1, 3, 0, 3, 2, 1, 3, 1, 3, 0, 2, 1, 2, 0, 2, 2, 0, 0, 0,
3, 3, 3, 3, 2, 3, 2, 2, 1, 0, 2, 1, 0, 2, 0, 2, 2, 2, 3, 0, 1,
0, 3, 3, 1, 2, 1, 1, 0, 1, 0, 3, 1, 1, 1, 0, 2, 0, 3, 2, 3, 2,
2, 3, 3, 1, 2, 3, 3, 1, 2, 3, 2, 3, 3, 0, 2), E = c(0, 1, 0,
1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0,
1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1,
0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0,
1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0,
1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1), F = c(14L, 12L, 8L, 5L,
13L, 8L, 8L, 9L, 11L, 13L, 11L, 8L, 12L, 9L, 8L, 17L, 11L, 13L,
7L, 13L, 9L, 9L, 11L, 7L, 11L, 13L, 14L, 10L, 12L, 15L, 5L, 12L,
7L, 8L, 10L, 11L, 5L, 10L, 2L, 10L, 9L, 14L, 4L, 10L, 6L, 14L,
10L, 6L, 14L, 2L, 7L, 11L, 9L, 8L, 11L, 9L, 15L, 10L, 16L, 11L,
7L, 8L, 12L, 17L, 5L, 13L, 15L, 11L, 10L, 7L, 6L, 12L, 10L, 8L,
7L, 8L, 11L, 14L, 6L, 4L, 9L, 11L, 9L, 13L, 7L, 9L, 9L, 12L,
10L, 6L, 10L, 5L, 14L, 10L, 13L, 6L, 8L, 8L, 7L, 12L)), .Names = c("ID",
"A", "B", "C", "D", "E", "F"), row.names = c(NA, -100L), class = "data.frame")
However, my actual dataset has 100 columns. I would like to change any variable that has less than or equal to 5 unique values to a factor. I am using dplyr with the following code:
df %>%
mutate_if(is.integer, as.numeric) %>%
mutate_if(length(unique(.)) <= 5, as.factor(.))
But I get the error:
Error: length(.p) == length(vars) is not TRUE
Any thoughts? I want to convert those with less than or equal 5 unique values into factors.

You're close, just missing a couple ~s to signal a purrr-style anonymous function:
library(dplyr)
df %>% mutate_if(~length(unique(.x)) <= 5, ~as.factor(.x))
result:
> df %>% mutate_if(~length(unique(.x)) <= 5, ~as.factor(.x)) %>% glimpse()
Observations: 100
Variables: 7
$ ID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,...
$ A <fct> 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0...
$ B <dbl> -0.779571911, -1.018139376, -0.617199891, 0.030958050, 0.543273110, -0.043330040, 0.230179974, -0.183807679, 1.230138768, -0.44706...
$ C <fct> 2, 1, 2, 2, 1, 2, 1, 2, 3, 1, 1, 3, 2, 2, 3, 3, 2, 1, 1, 2, 3, 2, 3, 2, 2, 2, 3, 3, 3, 2, 2, 1, 3, 3, 2, 3, 3, 3, 3, 3, 1, 1, 2, 1...
$ D <fct> 3, 2, 0, 1, 0, 2, 1, 1, 1, 2, 1, 3, 1, 0, 1, 2, 1, 1, 1, 3, 0, 3, 0, 0, 1, 3, 0, 3, 2, 1, 3, 1, 3, 0, 2, 1, 2, 0, 2, 2, 0, 0, 0, 3...
$ E <fct> 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0...
$ F <int> 14, 12, 8, 5, 13, 8, 8, 9, 11, 13, 11, 8, 12, 9, 8, 17, 11, 13, 7, 13, 9, 9, 11, 7, 11, 13, 14, 10, 12, 15, 5, 12, 7, 8, 10, 11, 5...
Also note: if I remember correctly, . and .x in the anonymous functions can be used interchangeably, I am in the habit of using .x in case there is more than 1 argument (e.g., purrr::map2)

Related

Correlation between two variables produces NA

I am calculating the correlarion between each variable with the target feature, in a dataframe. It works great aside from one variable, Age, which is not producing the correlation, instead I get an NA. I removed all NA values before even starting the analysis. So the data is clean.
This is the code: (PD is the target variable and I want to compare it with all other variables. PD is binary)
pearsons = c()
for (i in 1:length(colnames(Train_set))){
pearsons[i] = cor(Train_set[,i], Train_set$PD, method = 'pearson')
}
This is the data structre: (only some of it)
> glimpse(Train_set)
Rows: 1,219
Columns: 56
$ PD <dbl> 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1,…
$ gender <int> 2, 2, 2, 1, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 1, 2, 2, 2, 2,…
$ cancer_type <int> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,…
$ Treatment <int> 5, 6, 6, 6, 5, 6, 5, 6, 5, 5, 5, 5, 6, 6, 6, 6, 6, 5, 5, 6, 6, 6, 6, 5, 6, 5, 5, 6, 5,…
$ totaldata_new.Age <int> 50, 66, 51, 60, 31, 70, 51, 56, 65, 62, 55, 69, 32, 82, 60, 49, 56, 59, 50, 51, 70, 74…
$ Adipocytes <dbl> 0.000000000, 0.000000000, 0.005592077, 0.005844092, 0.038175712, 0.000000000, 0.005063…
$ B.cells <dbl> 0.045214394, 1.300478781, 0.184967801, 0.032890485, 0.041641426, 0.006477740, 0.653999…
$ Basophils <dbl> 0.120695085, 0.065615816, 0.362173522, 0.039214941, 0.225555640, 0.056926623, 0.019076…
totaldata_new.Age is the Age variable. I tried setting it as.numeric() and as.integer() but both didn't work.
This is the training set,
structure(list(PD = c(0, 0, 1, 1, 1, 1, 0, 0, 1, 1), gender = c(2L,
2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L), cancer_type = c(3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), Treatment = c(5L, 6L, 6L, 6L,
5L, 6L, 5L, 6L, 5L, 5L), totaldata_new.Age = c(50L, 66L, 51L,
60L, 31L, 70L, 51L, 56L, 65L, 62L), Adipocytes = c(0, 0, 0.00559207695850587,
0.00584409167696122, 0.0381757121622292, 0, 0.00506330308366599,
0, 0.0156430635414994, 0), B.cells = c(0.0452143935493372, 1.30047878079526,
0.184967800962064, 0.0328904854435036, 0.0416414264467815, 0.00647774047514386,
0.653999365837062, 0.0331653878504112, 0.0286461940371656, 0.0888471904628742
), Basophils = c(0.120695085116671, 0.0656158162440011, 0.362173521572841,
0.0392149412975555, 0.225555640419744, 0.0569266227666268, 0.0190762558461507,
0.0733199539844435, 0.20291673586147, 0.0757313145147394), CD4..memory.T.cells = c(0,
0.24081994997988, 0, 0.0084070550945875, 0, 0, 0.0704387567897827,
0, 0.0177784010286187, 0.00653794301542519), CD4..naive.T.cells = c(0,
0.222121262122827, 0, 0, 0, 0, 0.0337776019379054, 0, 0, 0)), row.names = c("Pt10",
"Pt101", "Pt103", "Pt106", "Pt11", "Pt17", "Pt18", "Pt26", "Pt27",
"Pt28"), class = "data.frame")
Why is this variable producing NA, while other variables give good results of the correlation?
Looks like there are NA values in the columns of interest.
To avoid this problem, there is the parameter 'use' in the 'cor'-function, which the help explains as:
"giving a method for computing covariances in the presence of missing
values."
I'd recommend changing your code to:
pearsons[i] = cor(Train_set[,i], Train_set$PD, method = 'pearson',
use = "complete.obs")
Hope that helps!
Samuel

Describe or display the relationship between variables and the labels xgboost?

I have a model:
model<-xgboost(data=as.matrix(data[,-1]),label=data$Ethnicity, num_class=8, nrounds=50,objective="multi:softmax",lambda=1, eval_metric="merror")
data is a matrix of 94 variables of random survey question and the label is Ethnicity which is a 0-7 variable coding race/ethnicity so that every number from 0 to 7 represents an ethnicity.
I found which variables are most important in the prediction:
xgb.importance(model=model)
## Feature Gain Cover Frequency
## 1: q97 0.0924173556 0.0388402250 0.016981237
## 2: q9 0.0603595554 0.0199381316 0.012749847
## 3: q7 0.0456855077 0.0447756304 0.066922777
## 4: q6 0.0436987577 0.0485072162 0.041311731
## 5: q8 0.0319606309 0.0212999077 0.015199599
## 6: q99 0.0276115402 0.0201090242 0.007961695
## 7: q89 0.0245865711 0.0249913356 0.023829408
## 8: q13 0.0197648132 0.0190748590 0.010912533
## 9: q81 0.0194462208 0.0140010066 0.021880742
## 10: q71 0.0192126872 0.0194684164 0.019709370
Now I am stuck, my question is how do I describe or display the relationship between these variables and the labels? TIA!
Here are some data from dput(head(data)):
structure(list(r = c(2, 6, 4, 4, 4, 4), q6 = c(1.73, 1.5, 1.9,
NA, 1.63, 1.7), q7 = c(54.43, 51.26, 66.68, NA, 68.49, 59.88),
q8 = c(2, 2, 1, 2, 1, 2), q9 = c(5, 5, 5, 5, 4, 5), q10 = c(5,
1, 1, 1, 3, 1), q11 = c(1, 1, 1, 2, 1, 1), q12 = c(1, 1,
1, 4, 1, 1), q13 = c(1, 1, 1, 4, 1, 1), q14 = c(1, 1, 1,
1, 1, 1), q15 = c(1, 1, 1, 1, 1, 1), q16 = c(1, 1, 3, 1,
1, 1), q17 = c(2, 1, NA, 1, 1, 1), q18 = c(3, 1, NA, 2, 1,
1), q19 = c(2, 1, NA, 1, 1, 1), q20 = c(2, 1, NA, 2, 1, 1
), q21 = c(2, 2, NA, 2, 1, 2), q22 = c(2, 1, 1, 1, 4, 2),
q23 = c(2, 1, NA, 1, 5, 2), q24 = c(1, 2, 1, 2, 1, 1), q25 = c(1,
2, 1, 2, 2, 1), q26 = c(2, 2, 1, 1, 1, 1), q27 = c(2, 2,
1, 2, 1, 1), q28 = c(2, 2, 2, 2, 1, 1), q29 = c(1, 1, NA,
1, 1, 3), q30 = c(1, 1, NA, 1, 1, 3), q31 = c(1, 2, NA, 1,
1, 1), q32 = c(6, 1, NA, 6, 6, 1), q33 = c(NA, 1, NA, 2,
5, 1), q34 = c(NA, 1, NA, 2, 4, 1), q35 = c(NA, 1, NA, 5,
5, 1), q36 = c(2, 1, NA, 3, 3, 1), q37 = c(1, 1, NA, 1, 1,
1), q38 = c(6, 1, NA, 4, 1, 1), q39 = c(1, 2, 2, 1, 1, 2),
q40 = c(3, 1, NA, 2, 7, 1), q41 = c(6, 1, 2, 5, 6, 3), q42 = c(5,
1, 5, 5, 5, 6), q43 = c(1, 1, 1, 2, 2, 2), q44 = c(1, 1,
1, 2, 2, NA), q45 = c(1, 1, 1, 5, 7, 4), q46 = c(1, 1, 1,
6, 5, 7), q47 = c(7, 1, NA, 7, 7, 6), q48 = c(6, 1, 7, 5,
5, 6), q49 = c(4, 1, NA, 6, 1, 4), q50 = c(1, 1, 1, 2, 3,
1), q51 = c(1, 1, 1, 1, 1, 1), q52 = c(1, 1, 1, 1, 1, 1),
q53 = c(1, 1, 1, 2, 3, 1), q54 = c(1, 1, 1, 1, 2, 1), q55 = c(1,
1, 1, 2, 1, 1), q56 = c(1, 1, 1, 1, 1, 1), q57 = c(1, 1,
1, 4, 4, 2), q58 = c(1, 1, 1, 1, 1, 1), q59 = c(1, 2, 2,
2, 1, 1), q60 = c(1, 2, 1, 1, 1, 1), q61 = c(7, 1, 2, 5,
6, 6), q62 = c(3, 1, 3, 5, 7, 5), q63 = c(3, 1, 3, 2, 4,
5), q64 = c(3, 1, 3, 3, 3, 2), q65 = c(2, 1, 2, 2, 2, 3),
q66 = c(4, 1, NA, 4, 4, 2), q67 = c(2, 3, 3, 2, 3, 2), q68 = c(1,
1, 2, 1, 1, 1), q69 = c(2, 3, 3, 2, 3, 3), q70 = c(2, 4,
4, 2, 1, 1), q71 = c(3, 2, 3, 1, 3, 2), q72 = c(4, 4, 4,
2, 3, 2), q73 = c(1, 2, 1, 1, 1, 2), q74 = c(2, 2, 3, 2,
2, 2), q75 = c(2, 2, 2, 2, 2, 1), q76 = c(7, 2, 2, 2, 2,
1), q77 = c(3, 3, 4, 4, 2, 7), q78 = c(1, 2, 4, 2, 1, 3),
q79 = c(4, 8, 6, 3, 1, 2), q80 = c(6, 4, 4, 3, 1, 4), q81 = c(5,
NA, 1, 4, 2, 1), q82 = c(7, 1, 6, 5, 2, 7), q83 = c(1, 1,
1, 6, 1, 6), q84 = c(1, 1, 1, 2, 1, 2), q85 = c(2, 2, 1,
2, 2, 2), q86 = c(1, 1, NA, 1, 1, 1), q87 = c(2, 2, NA, 2,
2, 1), q88 = c(4, 5, 5, 3, 1, 2), q89 = c(4, 2, 2, 4, 2,
4), q90 = c(2, 1, NA, NA, 1, 2), q91 = c(1, 1, 1, 3, 3, 1
), q92 = c(1, 1, 1, 2, 2, 5), q93 = c(4, 5, 7, 4, 7, 2),
q94 = c(3, 3, 2, 2, 3, 2), q95 = c(1, 4, 1, 1, 1, 4), q96 = c(1,
1, 1, 1, 1, 1), q97 = c(1, 1, 3, 1, 2, 3), q98 = c(1, 2,
2, 1, 1, 1), q99 = c(1, 1, 1, 1, 1, 2)), row.names = c(NA,
6L), class = "data.frame")

Filter data frame to get only rows that have a value in column and another value in any column after first value, R

I am looking for a methodology to filter the following data frame so that I end up with only the rows that have a 1 in some column and a 2 in any other column after the column containing the 1. I am thankful for any help!
data_rel1 <- structure(list(job1category = c(NA, 1, 2, 2, 1, 1, 2, 1, 1, 1,
1, 1, 1, 1, NA, 1, 1, 4, 1, 1, NA, NA, 1, 1, 1, 1, 1, 1, 2, 1,
1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 6, 1, 1, 1, 1, NA, 7, NA, 5, 1,
5, NA, 2, 5, 1, NA, 1, 5, 1, 1, 1, 1, 5, 1, 5, 4, 1, 4, 5, 4,
NA, 5, NA, 5, 4, 3, 6, 1, 4, 4, 5, 4, 1, NA, 1, NA, 1, NA, 1,
1, 1, 1, 1, 4, 1, 1, 1, NA, 1, NA), job2category = c(3, 2, 1,
2, 3, 1, 2, 2, 1, 1, 1, NA, 2, 1, NA, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 5, 3, 3, 1, 1, 2, 4, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1,
1, 1, 7, 7, 1, 1, 1, NA, 4, 1, 1, NA, 2, 1, 1, 1, 1, NA, 5, NA,
4, 5, 4, NA, 5, 2, 4, 4, 2, 7, 5, NA, 5, 2, NA, 4, NA, 1, 5,
NA, 1, NA, 1, 1, 1, 1, 5, 2, NA, 4, 1, 1, 1, NA, 1, NA), job3category = c(3,
2, 1, 2, 3, 1, 2, 2, 1, 1, 1, NA, 2, 1, NA, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 5, 3, 3, 1, 1, 2, 4, 1, 1, 1, 1, 1, 1, 1, 1, 2,
2, 1, 1, 1, 7, 7, 1, 1, 1, NA, 4, 1, 1, NA, 2, 1, 1, 1, 1, NA,
5, NA, 4, 5, 4, NA, 5, 2, 4, 4, 2, 7, 5, NA, 5, 2, NA, 4, NA,
1, 5, NA, 1, NA, 1, 1, 1, 1, 5, 2, NA, 4, 1, 1, 1, NA, 1, NA),
job4category = c(3, 2, 1, 2, 3, 1, 2, 2, 1, 1, 1, NA, 2,
1, NA, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 3, 3, 1, 1, 2,
4, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 7, 7, 1, 1, 1,
NA, 4, 1, 1, NA, 2, 1, 1, 1, 1, NA, 5, NA, 4, 5, 4, NA, 5,
2, 4, 4, 2, 7, 5, NA, 5, 2, NA, 4, NA, 1, 5, NA, 1, NA, 1,
1, 1, 1, 5, 2, NA, 4, 1, 1, 1, NA, 1, NA)), row.names = c(NA,
-100L), class = c("tbl_df", "tbl", "data.frame"))
You can try this with an apply -
data_rel1[apply(data_rel1, 1, function(x) {
inds <- which(x == 1)
length(inds) && any(which(x == 2) > min(inds))
}), ]
# job1category job2category job3category job4category
# <dbl> <dbl> <dbl> <dbl>
#1 1 2 2 2
#2 1 2 2 2
#3 1 2 2 2
#4 1 2 2 2
#5 1 2 2 2
#6 1 2 2 2
#7 1 2 2 2
#8 1 2 2 2

Something is wrong; all the RMSE metric values are missing; using caret train function

I am trying to fit a gbm model using the caret package. I know other people have had the same problem, but all the solutions provided in the comments of those questions have not worked for my error. Here is my reproducible code:
library(dplyr)
library(MASS)
library(caret)
library(gbm)
Clean_winter_diff<-structure(list(Total = c(2L, 3L, 4L, 2L, 3L, 4L, 2L, 3L, 2L,
3L, 2L, 3L, 2L, 2L, 3L, 6L, 7L, 2L, 19L, 2L, 3L, 4L, 3L, 9L,
2L, 5L, 4L, 7L, 2L, 2L, 2L, 3L, 2L, 2L, 6L, 5L, 2L, 11L, 2L,
6L, 3L, 7L, 9L, 2L, 5L, 5L, 2L, 3L, 6L, 2L, 2L, 8L, 5L, 2L, 9L,
2L, 2L, 8L, 4L, 2L, 5L, 2L, 2L, 3L, 2L, 10L, 4L, 2L, 4L, 6L,
23L, 2L, 3L, 4L, 2L, 12L, 5L, 2L, 6L, 3L, 9L, 14L, 4L, 2L, 2L,
8L, 2L, 3L, 2L, 5L, 4L, 4L, 2L, 11L, 4L, 2L, 6L, 9L, 2L, 2L,
7L, 2L, 3L, 2L, 4L, 4L, 2L, 2L, 2L, 3L, 8L, 2L, 4L, 2L, 2L, 5L,
2L, 4L, 3L, 2L, 2L, 6L, 5L, 14L, 2L, 2L, 6L, 4L, 3L, 2L, 2L,
5L, 6L, 3L, 2L, 2L, 10L, 3L, 5L, 4L, 2L, 6L, 10L, 6L, 3L, 11L,
2L, 2L, 7L, 5L, 3L, 3L, 4L, 2L, 2L, 3L, 2L, 3L, 10L, 2L, 3L,
3L, 2L, 2L, 7L, 6L, 2L, 2L, 3L, 2L, 2L, 8L, 3L, 4L, 2L, 5L, 3L,
2L, 8L, 5L, 2L, 2L, 4L, 10L, 3L, 8L, 2L, 3L, 3L, 2L, 4L, 5L,
2L, 2L, 3L, 2L, 2L, 2L, 2L, 4L, 2L, 4L, 2L, 2L, 4L, 2L, 8L, 9L,
2L, 6L, 2L, 3L, 3L, 3L, 7L, 2L, 5L, 2L, 2L, 2L, 3L, 6L, 2L, 2L,
4L, 3L, 2L, 3L, 4L, 2L, 3L, 20L, 5L, 2L), Site = c(1, 1, 2, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 2, 1,
1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1,
1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 2,
1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2, 2, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 2, 1, 1, 2, 1, 1),
Night = c(0, 0, 1, 0, 0, 0.25, 0, 0.666666667, 0.5, 0, 0,
0, 0, 0.5, 0, 0.5, 0.428571429, 0, 0.6315789, 1, 0.666666667,
0.25, 1, 0.333333333, 1, 0.2, 1, 0, 0, 0, 1, 0.666666667,
0, 0.5, 0.166666667, 0, 0.5, 0.909090909, 1, 0.333333333,
1, 0, 0.222222222, 0, 0, 0.2, 0, 0, 0.333333333, 0, 0.5,
0.375, 0, 0, 0.222222222, 0, 0, 0.25, 0, 0, 0, 0, 0, 0.666666667,
0, 0.2, 0.75, 0, 1, 1, 0.869565217, 0, 0, 0, 1, 1, 0.2, 0.5,
0.333333333, 0, 0, 1, 0.25, 0.5, 0, 0, 0.5, 0, 0, 0.4, 0,
0.75, 1, 0.363636364, 0, 1, 1, 0.222222222, 0.5, 0, 0.142857143,
0, 0, 0, 0.25, 0, 0, 1, 0, 0.333333333, 0.25, 0.5, 0, 0.5,
0, 0.4, 0.5, 0.25, 0, 0, 0, 0, 0, 0.785714286, 0, 0, 0.833333333,
0, 0, 0, 0, 1, 0.5, 0, 0.5, 0, 0.6, 0, 0.2, 0, 1, 0.166666667,
1, 0, 0.666666667, 1, 0, 0, 0.285714286, 0.2, 0, 0, 0, 0,
0.5, 0, 0.5, 0.666666667, 0.4, 1, 0, 0, 0, 1, 0.857142857,
1, 0.5, 0, 0.666666667, 0, 0.5, 0.5, 0, 0, 0, 0.2, 0.333333333,
0, 0, 0.2, 1, 1, 0.25, 0.8, 0.333333333, 0.5, 0, 0.666666667,
0.333333333, 0, 1, 0.6, 1, 0, 0.333333333, 0, 0.5, 0, 0,
0.75, 0, 0.5, 1, 1, 0, 1, 0.375, 0.666666667, 0.5, 0.333333333,
0, 0.666666667, 0, 0.333333333, 0.428571429, 0, 0.4, 0.5,
1, 0.5, 0.333333333, 0.5, 0.5, 1, 0.5, 0.666666667, 0.5,
1, 0.5, 0, 0.666666667, 0.5, 0.2, 1), Day = c(1, 1, 0, 1,
1, 0.75, 1, 0.333333333, 0.5, 1, 1, 1, 1, 0.5, 1, 0.5, 0.571428571,
1, 0.368421053, 0, 0.333333333, 0.75, 0, 0.666666667, 0,
0.8, 0, 1, 1, 1, 0, 0.333333333, 1, 0.5, 0.833333333, 1,
0.5, 0.090909091, 0, 0.666666667, 0, 1, 0.777777778, 1, 1,
0.8, 1, 1, 0.666666667, 1, 0.5, 0.625, 1, 1, 0.777777778,
1, 1, 0.75, 1, 1, 1, 1, 1, 0.333333333, 1, 0.8, 0.25, 1,
0, 0, 0.130434783, 1, 1, 1, 0, 0, 0.8, 0.5, 0.666666667,
1, 1, 0, 0.75, 0.5, 1, 1, 0.5, 1, 1, 0.6, 1, 0.25, 0, 0.636363636,
1, 0, 0, 0.777777778, 0.5, 1, 0.857142857, 1, 1, 1, 0.75,
1, 1, 0, 1, 0.666666667, 0.75, 0.5, 1, 0.5, 1, 0.6, 0.5,
0.75, 1, 1, 1, 1, 1, 0.214285714, 1, 1, 0.166666667, 1, 1,
1, 1, 0, 0.5, 1, 0.5, 1, 0.4, 1, 0.8, 1, 0, 0.833333333,
0, 1, 0.333333333, 0, 1, 1, 0.714285714, 0.8, 1, 1, 1, 1,
0.5, 1, 0.5, 0.333333333, 0.6, 0, 1, 1, 1, 0, 0.142857143,
0, 0.5, 1, 0.333333333, 1, 0.5, 0.5, 1, 1, 1, 0.8, 0.666666667,
1, 1, 0.8, 0, 0, 0.75, 0.2, 0.666666667, 0.5, 1, 0.333333333,
0.666666667, 1, 0, 0.4, 0, 1, 0.666666667, 1, 0.5, 1, 1,
0.25, 1, 0.5, 0, 0, 1, 0, 0.625, 0.333333333, 0.5, 0.666666667,
1, 0.333333333, 1, 0.666666667, 0.571428571, 1, 0.6, 0.5,
0, 0.5, 0.666666667, 0.5, 0.5, 0, 0.5, 0.333333333, 0.5,
0, 0.5, 1, 0.333333333, 0.5, 0.8, 0), Distance_forest = c(0.527747223,
0.680189568, 0, 0.310562619, 0.328173668, 0.278522078, 0.722954456,
0.784333633, 0.633598813, 0.106383899, 0.525329032, 0.246038608,
0.575318257, 0, 0.767179738, 0.443355317, 0.876859332, 0.19139315,
0, 0.037535778, 0.432922864, 0.131314978, 0, 0, 0.093159023,
0.128161967, 0, 0, 0.006470757, 0.30307544, 0, 0.568211372,
0.263593171, 0.131057648, 0.168134106, 0.367657292, 0.717686941,
0.163080941, 0, 0.202433621, 0.3842, 0, 0, 0.165167085, 0.929924705,
2.120840521, 0.484698725, 1.078311772, 0.366644583, 0.340810601,
0.298239859, 0.195581001, 0.02421172, 0, 0.464407271, 0.198840768,
0.054828399, 0.489438607, 0.295818359, 0.110773002, 0.496209018,
0.67346593, 0.214433884, 0.108712722, 0.529136166, 0.639769867,
0, 0.396732499, 0.483450073, 0.001882719, 0.248622382, 0.925764277,
0.175704519, 0.622952019, 0, 0, 1.142940058, 1.133076471,
0.224133662, 1.083342909, 0.745420612, 0.377062959, 0.08050045,
0.162178412, 1.361054023, 0.123874613, 0.49008657, 0.638751698,
0.167293055, 0.306236508, 0.581962136, 0.269203966, 0.01981849,
0.389124993, 0.333741945, 0.089434216, 0, 0.172470454, 0.174222306,
0.298973407, 0.139883014, 0.455618893, 0.612636301, 0.372548564,
0.35343891, 0.583316416, 0.291550392, 0.530795339, 0.07577014,
0.844212848, 0.106972082, 0.992915959, 0.044859616, 0.820739224,
0.799670156, 0.316242417, 0.319460412, 0.810118761, 0.500966406,
0.377834056, 0.940032033, 0.151399734, 0.28102882, 0.212952188,
0.073000622, 0.370545468, 0.872918616, 0, 0.104900131, 0.081847421,
0.216958479, 0.008668498, 0.007014128, 0.495791646, 0.02399882,
0.297470809, 0.490666846, 0.415433354, 0.301854897, 0.365931213,
0.692253337, 0.165305616, 0.640148893, 0.835302988, 0.768199373,
0.153852261, 0.134893226, 0.540233724, 0.335663076, 0.102341147,
0.195486707, 0.362254712, 0.324739821, 1.697227338, 0.520683209,
0.020203443, 0, 0.275300664, 0.259782193, 0.051199078, 0.217527413,
0.550995487, 0.656144105, 0.277954065, 0.091362713, 0.769716859,
0.817754331, 0.531972108, 0.330715097, 0.795027122, 0.818699405,
0.113381995, 0.73975023, 0.342823482, 0.760817657, 0.817530729,
0.700152145, 0.88797978, 0.29428625, 0.108928974, 0.074075782,
0.747234676, 0, 0.543069, 0.262442933, 0.262835131, 0.356383731,
0.371421971, 0.015478187, 0.601986047, 0, 0.048889129, 0.406113218,
0.127855407, 0.396601367, 0.294174095, 1.112770231, 0.066093385,
0.833489821, 0.27603216, 0.261494516, 0.139170942, 0.36716509,
0.303017066, 0.245362186, 0, 0.071559882, 0.08333732, 0.617973146,
0.075376835, 0.778806939, 0, 0.484474765, 0.09264197, 0.605744884,
0.568592372, 0.464302103, 0.219293483, 0.115301111, 0.636074027,
0.69132069, 0.448515825, 0.150593216, 0, 0.668861867, 0.664099955,
0.386919408, 0.568691441, 0.328245416, 0.441309029, 0.216574999,
0.191497106, 0.372996079, 0.211736755), Altitude_diff = c(-0.093344147,
-0.032953796, -0.166307236, -0.082168137, -0.074024556, 0.011625801,
-0.035469849, 0.023688222, -0.035174545, 0.009125112, -0.148026001,
-0.136813009, -0.140504929, -0.155278686, -0.141057312, -0.154625722,
-0.138962751, 0.021278778, -0.112632, -0.121742996, -0.104769694,
-0.062242187, -0.105238068, -0.118123369, -0.116926834, -0.057471783,
-0.099749664, -0.138632839, -0.086083588, -0.086340958, -0.109178192,
-0.09964916, -0.086616302, -0.113422317, -0.145193425, -0.139987988,
-0.12330925, -0.062, -0.073519485, -0.0852851, -0.087, -0.041133632,
-0.02300371, 0.145411285, 0.007278729, 0.043087274, 0.12858374,
0.074364258, 0.444998927, -0.018522705, -0.028386627, 0.007190659,
-0.045301581, -0.057804062, 0.132843404, 0.021017105, -0.078413605,
-0.046420864, 0.058002304, -0.081611237, 0.079912634, -0.050522034,
-0.024949936, -0.084849548, -0.062893188, -0.041188028, -0.051312736,
-0.01290921, -0.072736145, -0.079543025, -0.016072741, -0.019319687,
-0.0213343, 0.020119728, -0.071389999, -0.088737882, 0.073720496,
-0.019645096, -0.059846527, 0.08921346, -0.027587019, -0.064136113,
-0.06246801, -0.049053955, 0.119930542, 0.013316631, -0.060812866,
-0.010882792, -0.072900299, -0.00263418, 0.055887116, -0.057,
-0.152, -0.082, -0.134, -0.157, -0.117, -0.128, 0.022, -0.129,
0.121, 0.126, 0.091, -0.075, 0.014, -0.071, 0.009, -0.137,
-0.13, -0.131, -0.054, -0.132, -0.093, -0.134, -0.143, -0.127,
-0.089, -0.058, -0.057, -0.057, -0.055, -0.15, -0.17, -0.106,
-0.177, -0.009, 0.008, -0.08, 0.067, -0.131, -0.029, -0.016,
0.048, -0.154, -0.133, -0.109, -0.056, 0.029, -0.091, 0.031,
0.032, 0.022, 0.029, 0.06, 0.075, -0.099, 0.075, 0.202, -0.022,
0.013, 0.118, -0.022, -0.034, 0.224, -0.003, 0.095, 0.03,
0.04, 0.105, -0.013, 0.031, -0.038, -0.043, -0.01, 0.046,
-0.096, -0.028, -0.033, -0.023, 0.066, 0.063, -0.041, -0.001,
-0.005, -0.025, 0.047, -0.025, -0.028, -0.002, 0.065, -0.019,
-0.133, -0.045, 0.0479274, -0.0969804, -0.0511209, -0.1380578,
-0.0619915, -0.1375449, 0.028642, -0.139097, -0.0267313,
-0.0866448, -0.0664405, -0.0098812, 0.0950015, -0.0905839,
-0.1271573, -0.1345035, 0.0696888, 0.1161573, -0.001593,
-0.139097, -0.139097, -0.0351609, -0.1168084, -0.0487204,
-0.0427109, 0.0139613, -0.0361378, -0.073785, -0.0521353,
0.0207491, -0.0398732, -0.0512241, -0.0480128, -0.0133375,
-0.0047241, 0.0556789, -0.0389344, -0.0307192, -0.0410356,
-0.0436031, -0.0513303, 0.0914526, 0.108031, 0.078924, -0.0482411,
-0.0010576, -0.0543727, 0.1055158, -0.0347792, 0.0091985,
-0.0066721), Revisits = c(0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 1, 0, 2, 1, 0, 4, 0, 1, 2, 2, 1, 0, 2, 2, 1, 0, 0,
1, 1, 0, 1, 2, 1, 1, 5, 1, 2, 1, 3, 3, 0, 2, 1, 0, 0, 2,
0, 0, 1, 1, 1, 0, 0, 0, 2, 0, 0, 1, 0, 0, 2, 0, 7, 3, 1,
1, 3, 7, 0, 0, 1, 1, 7, 1, 0, 1, 1, 2, 9, 1, 1, 0, 2, 1,
0, 0, 0, 0, 1, 1, 3, 1, 1, 4, 2, 0, 0, 2, 0, 0, 0, 0, 0,
1, 1, 0, 1, 2, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 6, 0,
0, 2, 2, 0, 0, 0, 3, 0, 1, 0, 0, 2, 0, 0, 1, 0, 3, 3, 2,
2, 5, 0, 0, 4, 4, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0,
0, 4, 3, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1,
0, 4, 0, 1, 1, 1, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 1, 0, 1, 0, 1, 1, 0, 2, 0, 0, 0, 1, 4, 1, 2, 1, 0, 0,
0, 0, 1, 1, 0, 0, 0, 2, 2, 0, 0, 1, 2, 1), Ratio = c(0, 0,
2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 2, 0, 3, 7, 0, 4.75, 0,
3, 2, 1.5, 9, 0, 2.5, 2, 7, 0, 0, 2, 3, 0, 2, 3, 5, 2, 2.2,
2, 3, 3, 2.333333333, 3, 0, 2.5, 5, 0, 0, 3, 0, 0, 8, 5,
2, 0, 0, 0, 4, 0, 0, 5, 0, 0, 1.5, 0, 1.428571429, 1.333333333,
2, 4, 2, 3.285714286, 0, 0, 4, 2, 1.714285714, 5, 0, 6, 3,
4.5, 1.555555556, 4, 2, 0, 4, 2, 0, 0, 0, 0, 4, 2, 3.666666667,
4, 2, 1.5, 4.5, 0, 0, 3.5, 0, 0, 0, 0, 0, 2, 2, 0, 3, 4,
0, 4, 2, 0, 0, 2, 0, 3, 0, 0, 6, 5, 2.333333333, 0, 0, 3,
2, 0, 0, 0, 1.666666667, 0, 3, 0, 0, 5, 0, 0, 4, 0, 2, 3.333333333,
3, 1.5, 2.2, 0, 0, 1.75, 1.25, 3, 0, 4, 0, 2, 0, 2, 0, 0,
2, 0, 0, 0, 0, 1.75, 2, 2, 0, 3, 0, 0, 8, 0, 0, 0, 0, 3,
0, 8, 0, 2, 2, 0, 2.5, 3, 8, 2, 3, 3, 2, 4, 2.5, 2, 2, 3,
2, 2, 2, 2, 4, 2, 4, 2, 2, 4, 2, 8, 9, 2, 3, 2, 3, 3, 3,
1.75, 2, 2.5, 2, 2, 2, 3, 6, 2, 2, 4, 3, 2, 1.5, 2, 2, 3,
20, 2.5, 2), Area = c(0.032426, 0.035282, 0.113383, 0.035693,
0.041549, 0.058353, 0.031573, 0.057897, 0.034298, 0.075203,
0.038044, 0.039534, 0.035463, 0.056159, 0.0319, 0.152971,
0.063424, 0.033137, 0.184546, 0.054271, 0.043699, 0.070929,
0.086888, 0.182135, 0.055882, 0.063176, 0.072119, 0.1096,
0.035482, 0.040162, 0.056385, 0.042962, 0.032754, 0.062732,
0.056648, 0.035606, 0.062001, 0.117763, 0.062311, 0.089266,
0.078665, 0.091633, 0.065517, 0.037454, 0.060411, 0.073355,
0.035344, 0.033497, 0.119351, 0.044972, 0.031568, 0.114325,
0.068984, 0.061986, 0.109741, 0.033782, 0.031849, 0.105872,
0.055202, 0.031857, 0.064647, 0.031718, 0.032588, 0.076284,
0.036021, 0.216575, 0.100172, 0.06227, 0.060081, 0.063876,
0.224969, 0.045917, 0.037024, 0.077219, 0.054039, 0.158028,
0.067884, 0.034719, 0.120346, 0.044812, 0.080923, 0.171879,
0.069136, 0.0417, 0.032867, 0.11509, 0.053077, 0.062925,
0.033554, 0.07492, 0.114556, 0.096677, 0.049153, 0.161404,
0.073527, 0.045258, 0.08603, 0.091654, 0.033591, 0.033243,
0.060307, 0.048489, 0.041845, 0.031375, 0.046293, 0.034473,
0.044909, 0.052535, 0.060832, 0.082261, 0.086662, 0.031981,
0.053075, 0.057269, 0.031764, 0.039376, 0.061771, 0.051374,
0.081914, 0.04886, 0.040433, 0.056631, 0.086457, 0.118001,
0.033169, 0.033734, 0.064399, 0.065725, 0.043722, 0.062459,
0.032385, 0.07605, 0.055818, 0.067326, 0.034017, 0.033867,
0.052257, 0.062541, 0.073173, 0.069685, 0.034166, 0.096639,
0.081452, 0.116512, 0.064753, 0.12308, 0.033466, 0.050626,
0.068697, 0.105123, 0.066668, 0.075515, 0.076373, 0.046068,
0.032637, 0.067892, 0.059513, 0.032345, 0.076412, 0.055942,
0.057757, 0.070641, 0.038058, 0.04051, 0.049283, 0.063954,
0.040222, 0.043146, 0.062292, 0.05944, 0.032226, 0.121329,
0.086029, 0.040616, 0.033843, 0.037219, 0.066294, 0.034378,
0.117405, 0.095093, 0.032398, 0.062489, 0.060033, 0.0006219274,
0.0004771933187, 0.0005009547997, 0.0004406716919, 0.0005174510498,
0.0004356966248, 0.0006026420288, 0.0004355072708, 0.0005670226318,
0.0004853354187, 0.0005070045624, 0.0005619193115, 0.0006754835205,
0.0004834161072, 0.0004468427429, 0.000439496521, 0.0006436887817,
0.0006849831238, 0.0005693302002, 0.0004349030151, 0.0004349030151,
0.0005387456665, 0.0004572155151, 0.0005252477493, 0.0005314183146,
0.0005879613037, 0.0005381040955, 0.0005002150269, 0.0005234927775,
0.000592482015, 0.0005348047689, 0.0005223570905, 0.0005260328979,
0.0005637895386, 0.0005767995911, 0.000629678894, 0.0005354559326,
0.0005431971436, 0.0005328845113, 0.0005311777954, 0.0005214696045,
0.0006679819946, 0.0006827795207, 0.0006529239502, 0.0005282859904,
0.0005745828705, 0.0005196272583, 0.0006795158081, 0.0005336247467,
0.0005789768311, 0.0005680122375), Distance_main = c(1.131059754,
0.9597414435, 0, 1.256349606, 1.078548275, 1.855321885, 4.111540893,
5.445573732, 4.717162654, 3.192720443, 1.230485339, 4.582202671,
2.234386271, 4.464622586, 1.793303323, 3.049223638, 2.517519578,
2.538484406, 0.2589592261, 0.8107408556, 1.265087883, 2.583951508,
0.5704173619, 0.150727288, 0, 2.880491806, 0.4688362577,
1.032252927, 1.711598417, 2.621504704, 0.5018857525, 0.9121811232,
1.467942423, 0.5364545556, 1.956558175, 1.903428792, 1.556986206,
0.3888441615, 0.2643162488, 0.06508233719, 1.137, 1.050285586,
1.40077366, 3.600281886, 2.354502437, 1.899786116, 3.690234235,
2.808763349, 0.7511081312, 1.271708613, 2.662284706, 2.675257642,
3.518963652, 3.64493179, 2.047243432, 2.681735548, 3.55460067,
4.471868465, 4.870529144, 4.073487063, 3.088843029, 4.176214051,
3.878882256, 3.798820098, 3.638531617, 3.78621757, 3.517110032,
3.885770398, 3.298820012, 3.207448044, 3.236561986, 4.13860818,
5.461401614, 3.068585968, 2.839888067, 2.545155836, 2.390539028,
3.996152667, 2.813447134, 2.336287582, 3.609633571, 1.994576758,
2.756891326, 2.963835872, 2.077835347, 1.981514275, 1.698439482,
4.559660757, 1.832220975, 1.538482109, 0.4012068882, 1.011597874,
0.2762621903, 0.6604082443, 1.726855522, 0.4426442882, 1.389697061,
2.265330127, 4.673539548, 2.833166846, 3.247307991, 1.550221184,
1.913466888, 1.02140226, 1.419304966, 4.649917894, 3.021104929,
1.138684662, 0.9702250537, 0.8674368023, 1.363686091, 2.237998135,
3.078402963, 2.612860775, 2.659002418, 0.7922293863, 0.5605036917,
2.918464369, 2.607222198, 2.72011864, 3.293449501, 0.2339249027,
0.09269339846, 0.4948047539, 0.988393193, 3.35986433, 3.283307665,
0.4664049454, 3.579501178, 0.9978282525, 2.513329669, 1.751686648,
2.364558742, 0.3028119337, 0.2667488345, 0.5316889235, 4.034444068,
3.413510363, 0.5591667383, 3.303219295, 1.845610995, 2.029920015,
1.968676774, 1.642599316, 2.259782135, 1.840349328, 2.169684459,
1.466603062, 1.35662262, 1.287059026, 1.114386511, 0.1013909283,
0.5191928737, 2.069483497, 2.864063592, 3.741153421, 3.675316052,
2.612341652, 2.535722998, 4.374650663, 0.9801658265, 4.516729836,
4.200885496, 3.757806231, 2.911160806, 0.08124990183, 4.160713125,
4.82011578, 3.805524153, 2.356340037, 2.528406371, 2.849670115,
4.335904978, 2.334369917, 1.682493793, 0.9721257977, 2.886626751,
1.678288529, 3.207466146, 2.493581595, 1.024302173, 0.2878921523,
1.951664026, 0.001168478, 1.9688079e-05, 0.000181543742,
0.000169602217, 0.000342252497, 3.8581815e-05, 0.000831689834,
0, 0.000310111829, 0.000123848133, 0.00027892549, 0.000474703505,
0.000605096677, 0.001312503032, 0.000397102961, 0.001565818974,
0.001649622681, 0.0018610356, 0.001417062691, 0.000275126286,
0.000431104276, 0.003826022716, 8.0019175e-05, 0.004124439051,
0.004485276435, 0.004514712379, 0.00294698083, 0.001935731554,
0.002986659776, 0.002716345238, 0.002434957234, 0.002476156054,
0.001893628041, 0.001454772675, 0.00099942015, 0.001028825627,
0.001531671726, 0.001566268214, 0.001890167805, 0.000937548652,
0.000653203203, 0.000456625581, 0.001139386805, 0.001135244462,
0, 0.001190210739, 0.000552443287, 0.002855486907, 0.001430594014,
0.000594097595, 0.000339933191)), row.names = c(NA, -234L
), class = "data.frame")
mydata = transform(Clean_winter_diff, Site=Site-1)
#separating training and test data
alpha<-0.7
inTrain_diff <- sample(1:nrow(mydata), alpha * nrow(mydata))
train.set.diff <- mydata[inTrain_diff,]
test.set.diff <- mydata[-inTrain_diff,]
winter.boost=gbm(Site~. ,data = mydata,n.trees = 10000,
shrinkage = 0.01, interaction.depth = 6, cv.folds = 5, verbose = F)
best.iter=gbm.perf(winter.boost, method = "cv")
best.iter
summary(winter.boost)
#Using caret to get model performance in best iteration
set.seed(123)
fitControl = trainControl(method="cv", number=5, returnResamp = "all")
model2 = train(Site~., data=mydata[complete.cases(mydata),], method="gbm",distribution="bernoulli", trControl=fitControl, verbose=F, tuneGrid=data.frame(.n.trees=best.iter, .shrinkage=0.01, .interaction.depth=1, .n.minobsinnode=1))
And this are the errors I get:
RMSE Rsquared MAE
Min. : NA Min. : NA Min. : NA
1st Qu.: NA 1st Qu.: NA 1st Qu.: NA
Median : NA Median : NA Median : NA
Mean :NaN Mean :NaN Mean :NaN
3rd Qu.: NA 3rd Qu.: NA 3rd Qu.: NA
Max. : NA Max. : NA Max. : NA
NA's :1 NA's :1 NA's :1
Error: Stopping
In addition: Warning messages:
1: In train.default(x, y, weights = w, ...) :
You are trying to do regression and your outcome only has two possible values Are you trying to do classification? If so, use a 2 level factor as your outcome column.
2: In nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
There were missing values in resampled performance measures
I have checked the dataframe for missing values, there are none. What is the problem?
sessionInfo():
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 17134)
Matrix products: default
locale:
[1] LC_COLLATE=Norwegian Bokmål_Norway.1252 LC_CTYPE=Norwegian Bokmål_Norway.1252 LC_MONETARY=Norwegian Bokmål_Norway.1252
[4] LC_NUMERIC=C LC_TIME=Norwegian Bokmål_Norway.1252
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] caret_6.0-86 ggplot2_3.3.2 lattice_0.20-38 mlbench_2.1-1 gbm_2.1.8 MASS_7.3-51.4 randomForest_4.6-14
[8] tree_1.0-40 ISLR_1.2 dplyr_1.0.0 rpart.plot_3.0.8 rpart_4.1-15
loaded via a namespace (and not attached):
[1] tinytex_0.24 tidyselect_1.1.0 xfun_0.15 purrr_0.3.4 reshape2_1.4.4 splines_3.6.2
[7] colorspace_1.4-1 vctrs_0.3.1 generics_0.0.2 stats4_3.6.2 survival_3.1-8 prodlim_2019.11.13
[13] rlang_0.4.7 ModelMetrics_1.2.2.2 pillar_1.4.6 glue_1.4.1 withr_2.2.0 foreach_1.5.0
[19] lifecycle_0.2.0 plyr_1.8.6 lava_1.6.7 stringr_1.4.0 timeDate_3043.102 munsell_0.5.0
[25] gtable_0.3.0 recipes_0.1.13 codetools_0.2-16 parallel_3.6.2 class_7.3-15 Rcpp_1.0.5
[31] scales_1.1.1 ipred_0.9-9 stringi_1.4.6 grid_3.6.2 tools_3.6.2 magrittr_1.5
[37] tibble_3.0.3 crayon_1.3.4 pkgconfig_2.0.3 ellipsis_0.3.1 Matrix_1.2-18 data.table_1.13.0
[43] pROC_1.16.2 lubridate_1.7.9 gower_0.2.2 rstudioapi_0.11 iterators_1.0.12 R6_2.4.1
[49] nnet_7.3-12 nlme_3.1-142 compiler_3.6.2
>
You are doing a classification, so you need to set the dependent variable to a factor for train in caret to work:
set.seed(123)
fitControl = trainControl(method="cv", number=5, returnResamp = "all")
mydata$Site = factor(mydata$Site)
model2 = train(Site~., data=mydata[complete.cases(mydata),], method="gbm",distribution="bernoulli", trControl=fitControl, verbose=F, tuneGrid=data.frame(.n.trees=400, .shrinkage=0.01, .interaction.depth=1, .n.minobsinnode=1))
model2
Stochastic Gradient Boosting
234 samples
9 predictor
2 classes: '0', '1'
No pre-processing
Resampling: Cross-Validated (5 fold)
Summary of sample sizes: 187, 187, 187, 188, 187
Resampling results:
Accuracy Kappa
0.9232192 0.5550649
Tuning parameter 'n.trees' was held constant at a value
Tuning parameter 'n.minobsinnode' was held constant at
a value of 1

dplyr: How does bind_rows() change the original dataframe

hth1 is a data frame that I already have.
> hth1
Source: local data frame [13 x 14]
Groups: team [13]
team CSK DC DD GL KKR KTK KXIP MI PW RCB RPSG
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 CSK 0 8 11 0 11 2 9 10 4 10 0
2 DC 2 0 8 0 2 1 7 5 3 8 0
3 DD 5 3 0 0 7 2 8 5 2 10 2
4 GL 0 0 2 0 0 0 0 0 0 1 0
5 KKR 5 7 10 2 0 0 5 10 3 15 0
6 KTK 0 0 0 0 2 0 1 0 1 2 0
7 KXIP 8 3 10 2 14 0 0 11 2 6 1
8 MI 12 5 13 2 8 1 7 0 3 11 1
9 PW 2 1 4 0 2 0 4 3 0 1 0
10 RCB 9 3 7 2 3 0 12 8 4 0 1
11 RPSG 0 0 0 2 2 0 1 1 0 1 0
12 RR 8 2 7 0 14 1 7 6 2 7 0
13 SH 3 0 4 0 5 0 4 5 2 5 2
# ... with 2 more variables: RR <dbl>, SH <dbl>
Why do the data frame returned by bind_rows() and the original data frame differ?
> h <- list(hth1)
> hth_b1 <- bind_rows(h)
> identical(hth1, hth_b1)
[1] FALSE
> class(hth_b1)
[1] "grouped_df" "tbl_df" "tbl" "data.frame"
> class(hth1)
[1] "grouped_df" "tbl_df" "tbl" "data.frame"
> setequal(hth1, hth_b1)
TRUE
> anti_join(hth1, hth_b1)
Joining, by = c("team", "CSK", "DC", "DD", "GL", "KKR", "KTK", "KXIP", "MI", "PW", "RCB", "RPSG", "RR", "SH")
Source: local data frame [0 x 14]
Groups: team [13]
# ... with 14 variables: team <chr>, CSK <dbl>, DC <dbl>, DD <dbl>, GL <dbl>,
# KKR <dbl>, KTK <dbl>, KXIP <dbl>, MI <dbl>, PW <dbl>, RCB <dbl>,
# RPSG <dbl>, RR <dbl>, SH <dbl>
What am I missing? I have been stuck here for a long time.
Update 1:
As requested by Benjamin, I dput() function on both dataframes. Here is the output.
> dput(hth_b1)
structure(list(team = c("CSK", "DC", "DD", "GL", "KKR", "KTK",
"KXIP", "MI", "PW", "RCB", "RPSG", "RR", "SH"), CSK = c(0, 2,
5, 0, 5, 0, 8, 12, 2, 9, 0, 8, 3), DC = c(8, 0, 3, 0, 7, 0, 3,
5, 1, 3, 0, 2, 0), DD = c(11, 8, 0, 2, 10, 0, 10, 13, 4, 7, 0,
7, 4), GL = c(0, 0, 0, 0, 2, 0, 2, 2, 0, 2, 2, 0, 0), KKR = c(11,
2, 7, 0, 0, 2, 14, 8, 2, 3, 2, 14, 5), KTK = c(2, 1, 2, 0, 0,
0, 0, 1, 0, 0, 0, 1, 0), KXIP = c(9, 7, 8, 0, 5, 1, 0, 7, 4,
12, 1, 7, 4), MI = c(10, 5, 5, 0, 10, 0, 11, 0, 3, 8, 1, 6, 5
), PW = c(4, 3, 2, 0, 3, 1, 2, 3, 0, 4, 0, 2, 2), RCB = c(10,
8, 10, 1, 15, 2, 6, 11, 1, 0, 1, 7, 5), RPSG = c(0, 0, 2, 0,
0, 0, 1, 1, 0, 1, 0, 0, 2), RR = c(9, 7, 9, 0, 1, 1, 8, 10, 3,
9, 0, 0, 7), SH = c(3, 0, 4, 3, 4, 0, 4, 3, 0, 4, 0, 0, 0)), .Names = c("team",
"CSK", "DC", "DD", "GL", "KKR", "KTK", "KXIP", "MI", "PW", "RCB",
"RPSG", "RR", "SH"), row.names = c(NA, -13L), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), vars = list(team), indices = list(
0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L), group_sizes = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L, labels = structure(list(
team = c("CSK", "DC", "DD", "GL", "KKR", "KTK", "KXIP", "MI",
"PW", "RCB", "RPSG", "RR", "SH")), row.names = c(NA, -13L
), class = "data.frame", vars = list(team), .Names = "team"))
>
> dput(hth1)
structure(list(team = c("CSK", "DC", "DD", "GL", "KKR", "KTK",
"KXIP", "MI", "PW", "RCB", "RPSG", "RR", "SH"), CSK = c(0, 2,
5, 0, 5, 0, 8, 12, 2, 9, 0, 8, 3), DC = c(8, 0, 3, 0, 7, 0, 3,
5, 1, 3, 0, 2, 0), DD = c(11, 8, 0, 2, 10, 0, 10, 13, 4, 7, 0,
7, 4), GL = c(0, 0, 0, 0, 2, 0, 2, 2, 0, 2, 2, 0, 0), KKR = c(11,
2, 7, 0, 0, 2, 14, 8, 2, 3, 2, 14, 5), KTK = c(2, 1, 2, 0, 0,
0, 0, 1, 0, 0, 0, 1, 0), KXIP = c(9, 7, 8, 0, 5, 1, 0, 7, 4,
12, 1, 7, 4), MI = c(10, 5, 5, 0, 10, 0, 11, 0, 3, 8, 1, 6, 5
), PW = c(4, 3, 2, 0, 3, 1, 2, 3, 0, 4, 0, 2, 2), RCB = c(10,
8, 10, 1, 15, 2, 6, 11, 1, 0, 1, 7, 5), RPSG = c(0, 0, 2, 0,
0, 0, 1, 1, 0, 1, 0, 0, 2), RR = c(9, 7, 9, 0, 1, 1, 8, 10, 3,
9, 0, 0, 7), SH = c(3, 0, 4, 3, 4, 0, 4, 3, 0, 4, 0, 0, 0)), .Names = c("team",
"CSK", "DC", "DD", "GL", "KKR", "KTK", "KXIP", "MI", "PW", "RCB",
"RPSG", "RR", "SH"), class = c("grouped_df", "tbl_df", "tbl",
"data.frame"), row.names = c(NA, -13L), vars = list(team), labels = structure(list(
team = c("CSK", "DC", "DD", "GL", "KKR", "KTK", "KXIP", "MI",
"PW", "RCB", "RPSG", "RR", "SH")), class = "data.frame", row.names = c(NA,
-13L), vars = list(team), drop = TRUE, .Names = "team"), indices = list(
0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L), drop = TRUE, group_sizes = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L)
There is a difference in the output for both of them, there is an extra drop = TRUE for hth1.
I don't understand why it is not there in the other one.
A reproducible example:
library(tidyverse)
test1 <- mtcars %>% group_by(cyl)
test2 <- bind_rows(list(test1))
identical(test1, test2) #FALSE
all_equal(test1, test2) #TRUE
You can check both their attributes and you can see the rownames differ:
rownames(test1)
[1] "Mazda RX4" "Mazda RX4 Wag" "Datsun 710"
[4] "Hornet 4 Drive" "Hornet Sportabout" "Valiant"
[7] "Duster 360" "Merc 240D" "Merc 230"
[10] "Merc 280" "Merc 280C" "Merc 450SE"
[13] "Merc 450SL" "Merc 450SLC" "Cadillac Fleetwood"
[16] "Lincoln Continental" "Chrysler Imperial" "Fiat 128"
[19] "Honda Civic" "Toyota Corolla" "Toyota Corona"
[22] "Dodge Challenger" "AMC Javelin" "Camaro Z28"
[25] "Pontiac Firebird" "Fiat X1-9" "Porsche 914-2"
[28] "Lotus Europa" "Ford Pantera L" "Ferrari Dino"
[31] "Maserati Bora" "Volvo 142E"
rownames(test2)
[1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12" "13"
[14] "14" "15" "16" "17" "18" "19" "20" "21" "22" "23" "24" "25" "26"
[27] "27" "28" "29" "30" "31" "32"
Never expect tibbles to treat your rownames with respect, they may be silently dropped at any time.
Forgive the formatting on this answer, but it would appear that you have labels attached to one object, and not in the other. Where the labels got attached or removed isn't something I can know without looking at code that generates the objects. I've bolded the difference in your objects below.
Note: not formatting this as code is a deliberate choice. Formatting as code prevents me from marking the difference in the structure in bold text
dput(hth_b1)
structure(list(team = c("CSK", "DC", "DD", "GL", "KKR", "KTK",
"KXIP", "MI", "PW", "RCB", "RPSG", "RR", "SH"), CSK = c(0, 2,
5, 0, 5, 0, 8, 12, 2, 9, 0, 8, 3), DC = c(8, 0, 3, 0, 7, 0, 3,
5, 1, 3, 0, 2, 0), DD = c(11, 8, 0, 2, 10, 0, 10, 13, 4, 7, 0,
7, 4), GL = c(0, 0, 0, 0, 2, 0, 2, 2, 0, 2, 2, 0, 0), KKR = c(11,
2, 7, 0, 0, 2, 14, 8, 2, 3, 2, 14, 5), KTK = c(2, 1, 2, 0, 0,
0, 0, 1, 0, 0, 0, 1, 0), KXIP = c(9, 7, 8, 0, 5, 1, 0, 7, 4,
12, 1, 7, 4), MI = c(10, 5, 5, 0, 10, 0, 11, 0, 3, 8, 1, 6, 5
), PW = c(4, 3, 2, 0, 3, 1, 2, 3, 0, 4, 0, 2, 2), RCB = c(10,
8, 10, 1, 15, 2, 6, 11, 1, 0, 1, 7, 5), RPSG = c(0, 0, 2, 0,
0, 0, 1, 1, 0, 1, 0, 0, 2), RR = c(9, 7, 9, 0, 1, 1, 8, 10, 3,
9, 0, 0, 7), SH = c(3, 0, 4, 3, 4, 0, 4, 3, 0, 4, 0, 0, 0)), .Names = c("team",
"CSK", "DC", "DD", "GL", "KKR", "KTK", "KXIP", "MI", "PW", "RCB",
"RPSG", "RR", "SH"), row.names = c(NA, -13L), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), vars = list(team), indices = list(
0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L), group_sizes = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L , labels = structure(list(
team = c("CSK", "DC", "DD", "GL", "KKR", "KTK", "KXIP", "MI",
"PW", "RCB", "RPSG", "RR", "SH")), row.names = c(NA, -13L
), class = "data.frame", vars = list(team), .Names = "team"))
dput(hth1)
structure(list(team = c("CSK", "DC", "DD", "GL", "KKR", "KTK",
"KXIP", "MI", "PW", "RCB", "RPSG", "RR", "SH"), CSK = c(0, 2,
5, 0, 5, 0, 8, 12, 2, 9, 0, 8, 3), DC = c(8, 0, 3, 0, 7, 0, 3,
5, 1, 3, 0, 2, 0), DD = c(11, 8, 0, 2, 10, 0, 10, 13, 4, 7, 0,
7, 4), GL = c(0, 0, 0, 0, 2, 0, 2, 2, 0, 2, 2, 0, 0), KKR = c(11,
2, 7, 0, 0, 2, 14, 8, 2, 3, 2, 14, 5), KTK = c(2, 1, 2, 0, 0,
0, 0, 1, 0, 0, 0, 1, 0), KXIP = c(9, 7, 8, 0, 5, 1, 0, 7, 4,
12, 1, 7, 4), MI = c(10, 5, 5, 0, 10, 0, 11, 0, 3, 8, 1, 6, 5
), PW = c(4, 3, 2, 0, 3, 1, 2, 3, 0, 4, 0, 2, 2), RCB = c(10,
8, 10, 1, 15, 2, 6, 11, 1, 0, 1, 7, 5), RPSG = c(0, 0, 2, 0,
0, 0, 1, 1, 0, 1, 0, 0, 2), RR = c(9, 7, 9, 0, 1, 1, 8, 10, 3,
9, 0, 0, 7), SH = c(3, 0, 4, 3, 4, 0, 4, 3, 0, 4, 0, 0, 0)), .Names = c("team",
"CSK", "DC", "DD", "GL", "KKR", "KTK", "KXIP", "MI", "PW", "RCB",
"RPSG", "RR", "SH"), class = c("grouped_df", "tbl_df", "tbl",
"data.frame"), row.names = c(NA, -13L), vars = list(team), labels = structure(list(
team = c("CSK", "DC", "DD", "GL", "KKR", "KTK", "KXIP", "MI",
"PW", "RCB", "RPSG", "RR", "SH")), class = "data.frame", row.names = c(NA,
-13L), vars = list(team), drop = TRUE, .Names = "team"), indices = list(
0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L), drop = TRUE, group_sizes = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L)
In the example below, I will add labels to the mtcars data frame, then run it through bind_rows, and you'll see that the labels are no longer present. This is what I believe is happening to your data.
library(Hmisc)
mtcars2 <- mtcars
label(mtcars2, self = FALSE) <- toupper(names(mtcars))
library(dplyr)
mtcars3 <- bind_rows(mtcars2)
identical(mtcars2, mtcars3)
label(mtcars3)

Resources