Related
I try do define the model for my test and training dataset. But I get the following Error:
Error in eval(predvars, data, env) : object 'avg_rating' not found
But all of my datasets have the "avg_rating"
This is my code
lm_model <- train(avg_rating ~., data = trainingindex,method = "lm",na.action = na.omit, preProcess = c("scale", "center"),trControl = trainControl(method = "none"))
structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 13L, 14L,
15L, 16L, 17L, 18L, 19L, 21L, 23L, 24L, 25L, 27L, 28L, 29L, 30L,
31L, 32L, 33L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L,
45L, 46L, 47L, 48L, 49L, 52L, 53L, 55L, 58L, 61L, 62L, 63L, 65L,
66L, 67L, 68L, 69L, 70L, 71L, 74L, 77L, 78L, 80L, 81L, 83L, 84L,
85L, 86L, 87L, 88L, 90L, 91L, 92L, 93L, 94L, 96L, 97L, 99L, 102L,
103L, 104L, 105L, 106L, 107L, 108L, 109L, 110L, 111L, 113L, 115L,
116L, 118L, 119L, 120L, 121L, 122L, 123L, 124L, 125L, 126L, 127L,
128L, 129L, 130L, 131L, 132L, 133L, 134L, 135L, 136L, 137L, 138L,
139L, 140L, 141L, 142L, 143L, 144L, 145L, 146L, 147L, 148L, 150L,
152L, 154L, 155L, 157L, 158L, 160L, 161L, 162L, 165L, 166L, 167L,
168L, 170L, 171L, 172L, 173L, 174L, 175L, 176L, 177L, 178L, 179L,
180L, 181L, 182L, 185L, 187L, 188L, 189L, 190L, 191L, 192L, 193L,
194L, 195L, 196L, 197L, 199L, 200L, 201L, 202L, 203L, 204L, 205L,
207L, 208L, 209L, 210L, 213L, 214L, 216L, 217L, 219L, 220L, 221L,
223L, 224L, 225L, 226L, 227L, 228L, 230L, 231L, 232L, 233L, 234L,
235L, 236L, 237L, 238L, 239L, 240L, 242L, 243L, 244L, 245L, 246L,
247L, 248L, 249L, 250L, 251L, 252L, 253L, 254L, 255L, 257L, 259L,
260L, 261L, 262L, 263L, 264L, 266L, 267L, 268L, 271L, 272L, 273L,
274L, 275L, 276L, 277L, 278L, 280L, 281L, 282L, 284L, 285L, 286L,
287L, 288L, 290L, 291L, 294L, 295L, 296L, 297L, 298L, 299L, 300L,
301L, 302L, 303L, 304L, 305L, 308L, 309L, 310L, 311L, 312L, 313L,
314L, 315L, 317L, 318L, 319L, 320L, 321L, 322L, 323L, 324L, 326L,
327L, 329L, 330L, 331L, 332L, 333L, 334L, 335L, 336L, 337L, 338L,
340L, 341L, 343L, 344L, 345L, 346L, 348L, 349L, 350L, 351L, 353L,
354L, 355L, 356L, 357L, 358L, 359L, 360L, 361L, 363L, 364L, 365L,
366L, 367L, 368L, 369L, 370L, 371L, 372L, 373L, 374L, 375L, 376L,
377L, 378L, 379L, 380L, 381L, 382L, 383L, 384L, 385L, 386L, 387L,... 3687L), .Dim = c(2952L, 1
), .Dimnames = list(NULL, "Resample1"))
15L, 16L, 17L, 18L, 19L, 21L, 23L, 24L, 25L, 27L, 28L, 29L, 30L,
31L, 32L, 33L, 35L, 36L), .Dim = c(30L, 1L), .Dimnames = list(
NULL, "Resample1"))
I have this data below. I am having problem partitioning this using caret's createPartition.
gg <- structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L,
5L, 5L, 6L, 6L, 6L, 145L, 145L, 145L, 146L, 146L, 146L, 147L,
147L, 147L, 148L, 148L, 148L, 149L, 149L, 149L, 150L, 150L, 150L,
193L, 193L, 193L, 194L, 194L, 194L, 195L, 195L, 195L, 196L, 196L,
196L, 197L, 197L, 197L, 198L, 198L, 198L, 199L, 199L, 199L, 200L,
200L, 200L, 201L, 201L, 201L, 202L, 202L, 202L, 203L, 203L, 203L,
204L, 204L, 204L, 205L, 205L, 205L, 206L, 206L, 206L, 207L, 207L,
207L, 208L, 208L, 208L, 209L, 209L, 209L, 210L, 210L, 210L, 211L,
211L, 211L, 212L, 212L, 212L, 213L, 213L, 213L, 214L, 214L, 214L,
215L, 215L, 215L, 216L, 216L, 216L, 217L, 217L, 217L, 218L, 218L,
218L, 219L, 219L, 219L, 220L, 220L, 220L, 221L, 221L, 221L, 222L,
222L, 222L, 223L, 223L, 223L, 224L, 224L, 224L, 225L, 225L, 225L,
226L, 226L, 226L, 227L, 227L, 227L, 228L, 228L, 228L, 229L, 229L,
229L, 230L, 230L, 230L, 231L, 231L, 231L, 232L, 232L, 232L, 233L,
233L, 233L, 234L, 234L, 234L, 235L, 235L, 235L, 236L, 236L, 236L,
237L, 237L, 237L, 238L, 238L, 238L, 239L, 239L, 239L, 240L, 240L,
240L, 7L, 7L, 7L, 8L, 8L, 8L, 9L, 9L, 9L, 10L, 10L, 10L, 11L,
11L, 11L, 12L, 12L, 12L, 13L, 13L, 13L, 14L, 14L, 14L, 15L, 15L,
15L, 16L, 16L, 16L, 17L, 17L, 17L, 18L, 18L, 18L, 19L, 19L, 19L,
20L, 20L, 20L, 21L, 21L, 21L, 22L, 22L, 22L, 23L, 23L, 23L, 24L,
24L, 24L, 25L, 25L, 25L, 26L, 26L, 26L, 27L, 27L, 27L, 28L, 28L,
28L, 29L, 29L, 29L, 30L, 30L, 30L, 31L, 31L, 31L, 32L, 32L, 32L,
33L, 33L, 33L, 34L, 34L, 34L, 35L, 35L, 35L, 36L, 36L, 36L, 37L,
37L, 37L, 38L, 38L, 38L, 39L, 39L, 39L, 40L, 40L, 40L, 41L, 41L,
41L, 42L, 42L, 42L, 43L, 43L, 43L, 44L, 44L, 44L, 45L, 45L, 45L,
46L, 46L, 46L, 47L, 47L, 47L, 48L, 48L, 48L, 49L, 49L, 49L, 50L,
50L, 50L, 51L, 51L, 51L, 52L, 52L, 52L, 53L, 53L, 53L, 54L, 54L,
54L, 55L, 55L, 55L, 56L, 56L, 56L, 57L, 57L, 57L, 58L, 58L, 58L,
59L, 59L, 59L, 60L, 60L, 60L, 61L, 61L, 61L, 62L, 62L, 62L, 63L,
63L, 63L, 64L, 64L, 64L, 65L, 65L, 65L, 66L, 66L, 66L, 67L, 67L,
67L, 68L, 68L, 68L, 69L, 69L, 69L, 70L, 70L, 70L, 71L, 71L, 71L,
72L, 72L, 72L, 73L, 73L, 73L, 74L, 74L, 74L, 75L, 75L, 75L, 76L,
76L, 76L, 77L, 77L, 77L, 78L, 78L, 78L, 79L, 79L, 79L, 80L, 80L,
80L, 81L, 81L, 81L, 82L, 82L, 82L, 83L, 83L, 83L, 84L, 84L, 84L,
85L, 85L, 85L, 86L, 86L, 86L, 87L, 87L, 87L, 88L, 88L, 88L, 89L,
89L, 89L, 90L, 90L, 90L, 91L, 91L, 91L, 92L, 92L, 92L, 93L, 93L,
93L, 94L, 94L, 94L, 95L, 95L, 95L, 96L, 96L, 96L, 97L, 97L, 97L,
98L, 98L, 98L, 99L, 99L, 99L, 100L, 100L, 100L, 101L, 101L, 101L,
102L, 102L, 102L, 103L, 103L, 103L, 104L, 104L, 104L, 105L, 105L,
105L, 106L, 106L, 106L, 107L, 107L, 107L, 108L, 108L, 108L, 109L,
109L, 109L, 110L, 110L, 110L, 111L, 111L, 111L, 112L, 112L, 112L,
113L, 113L, 113L, 114L, 114L, 114L, 115L, 115L, 115L, 116L, 116L,
116L, 117L, 117L, 117L, 118L, 118L, 118L, 119L, 119L, 119L, 120L,
120L, 120L, 121L, 121L, 121L, 122L, 122L, 122L, 123L, 123L, 123L,
124L, 124L, 124L, 125L, 125L, 125L, 126L, 126L, 126L, 127L, 127L,
127L, 128L, 128L, 128L, 129L, 129L, 129L, 130L, 130L, 130L, 131L,
131L, 131L, 132L, 132L, 132L, 151L, 151L, 151L, 152L, 152L, 152L,
153L, 153L, 153L, 154L, 154L, 154L, 155L, 155L, 155L, 156L, 156L,
156L, 157L, 157L, 157L, 158L, 158L, 158L, 159L, 159L, 159L, 160L,
160L, 160L, 161L, 161L, 161L, 162L, 162L, 162L, 163L, 163L, 163L,
164L, 164L, 164L, 165L, 165L, 165L, 166L, 166L, 166L, 167L, 167L,
167L, 168L, 168L, 168L, 169L, 169L, 169L, 170L, 170L, 170L, 171L,
171L, 171L, 172L, 172L, 172L, 173L, 173L, 173L, 174L, 174L, 174L,
175L, 175L, 175L, 176L, 176L, 176L, 177L, 177L, 177L, 178L, 178L,
178L, 179L, 179L, 179L, 180L, 180L, 180L, 181L, 181L, 181L, 182L,
182L, 182L, 183L, 183L, 183L, 184L, 184L, 184L, 185L, 185L, 185L,
186L, 186L, 186L, 187L, 187L, 187L, 188L, 188L, 188L, 189L, 189L,
189L, 190L, 190L, 190L, 191L, 191L, 191L, 192L, 192L, 192L, 133L,
133L, 133L, 134L, 134L, 134L, 135L, 135L, 135L, 136L, 136L, 136L,
137L, 137L, 137L, 138L, 138L, 138L, 139L, 139L, 139L, 140L, 140L,
140L, 141L, 141L, 141L, 142L, 142L, 142L, 143L, 143L, 143L, 144L,
144L, 144L, 241L, 241L, 241L, 242L, 242L, 242L, 243L, 243L, 243L,
244L, 244L, 244L, 245L, 245L, 245L, 246L, 246L, 246L, 385L, 385L,
385L, 386L, 386L, 386L, 387L, 387L, 387L, 388L, 388L, 388L, 389L,
389L, 389L, 390L, 390L, 390L, 433L, 433L, 433L, 434L, 434L, 434L,
435L, 435L, 435L, 436L, 436L, 436L, 437L, 437L, 437L, 438L, 438L,
438L, 439L, 439L, 439L, 440L, 440L, 440L, 441L, 441L, 441L, 442L,
442L, 442L, 443L, 443L, 443L, 444L, 444L, 444L, 445L, 445L, 445L,
446L, 446L, 446L, 447L, 447L, 447L, 448L, 448L, 448L, 449L, 449L,
449L, 450L, 450L, 450L, 451L, 451L, 451L, 452L, 452L, 452L, 453L,
453L, 453L, 454L, 454L, 454L, 455L, 455L, 455L, 456L, 456L, 456L,
457L, 457L, 457L, 458L, 458L, 458L, 459L, 459L, 459L, 460L, 460L,
460L, 461L, 461L, 461L, 462L, 462L, 462L, 463L, 463L, 463L, 464L,
464L, 464L, 465L, 465L, 465L, 466L, 466L, 466L, 467L, 467L, 467L,
468L, 468L, 468L, 469L, 469L, 469L, 470L, 470L, 470L, 471L, 471L,
471L, 472L, 472L, 472L, 473L, 473L, 473L, 474L, 474L, 474L, 475L,
475L, 475L, 476L, 476L, 476L, 477L, 477L, 477L, 478L, 478L, 478L,
479L, 479L, 479L, 480L, 480L, 480L, 247L, 247L, 247L, 248L, 248L,
248L, 249L, 249L, 249L, 250L, 250L, 250L, 251L, 251L, 251L, 252L,
252L, 252L, 253L, 253L, 253L, 254L, 254L, 254L, 255L, 255L, 255L,
256L, 256L, 256L, 257L, 257L, 257L, 258L, 258L, 258L, 259L, 259L,
259L, 260L, 260L, 260L, 261L, 261L, 261L, 262L, 262L, 262L, 263L,
263L, 263L, 264L, 264L, 264L, 265L, 265L, 265L, 266L, 266L, 266L,
267L, 267L, 267L, 268L, 268L, 268L, 269L, 269L, 269L, 270L, 270L,
270L, 271L, 271L, 271L, 272L, 272L, 272L, 273L, 273L, 273L, 274L,
274L, 274L, 275L, 275L, 275L, 276L, 276L, 276L, 277L, 277L, 277L,
278L, 278L, 278L, 279L, 279L, 279L, 280L, 280L, 280L, 281L, 281L,
281L, 282L, 282L, 282L, 283L, 283L, 283L, 284L, 284L, 284L, 285L,
285L, 285L, 286L, 286L, 286L, 287L, 287L, 287L, 288L, 288L, 288L,
289L, 289L, 289L, 290L, 290L, 290L, 291L, 291L, 291L, 292L, 292L,
292L, 293L, 293L, 293L, 294L, 294L, 294L, 295L, 295L, 295L, 296L,
296L, 296L, 297L, 297L, 297L, 298L, 298L, 298L, 299L, 299L, 299L,
300L, 300L, 300L, 301L, 301L, 301L, 302L, 302L, 302L, 303L, 303L,
303L, 304L, 304L, 304L, 305L, 305L, 305L, 306L, 306L, 306L, 307L,
307L, 307L, 308L, 308L, 308L, 309L, 309L, 309L, 310L, 310L, 310L,
311L, 311L, 311L, 312L, 312L, 312L, 319L, 319L, 319L, 320L, 320L,
320L, 321L, 321L, 321L, 322L, 322L, 322L, 323L, 323L, 323L, 324L,
324L, 324L, 325L, 325L, 325L, 326L, 326L, 326L, 327L, 327L, 327L,
328L, 328L, 328L, 329L, 329L, 329L, 330L, 330L, 330L, 331L, 331L,
331L, 332L, 332L, 332L, 333L, 333L, 333L, 334L, 334L, 334L, 335L,
335L, 335L, 336L, 336L, 336L, 337L, 337L, 337L, 338L, 338L, 338L,
339L, 339L, 339L, 340L, 340L, 340L, 341L, 341L, 341L, 342L, 342L,
342L, 343L, 343L, 343L, 344L, 344L, 344L, 345L, 345L, 345L, 346L,
346L, 346L, 347L, 347L, 347L, 348L, 348L, 348L, 349L, 349L, 349L,
350L, 350L, 350L, 351L, 351L, 351L, 352L, 352L, 352L, 353L, 353L,
353L, 354L, 354L, 354L, 355L, 355L, 355L, 356L, 356L, 356L, 357L,
357L, 357L, 358L, 358L, 358L, 359L, 359L, 359L, 360L, 360L, 360L,
361L, 361L, 361L, 362L, 362L, 362L, 363L, 363L, 363L, 364L, 364L,
364L, 365L, 365L, 365L, 366L, 366L, 366L, 367L, 367L, 367L, 368L,
368L, 368L, 369L, 369L, 369L, 370L, 370L, 370L, 371L, 371L, 371L,
372L, 372L, 372L, 391L, 391L, 391L, 392L, 392L, 392L, 393L, 393L,
393L, 394L, 394L, 394L, 395L, 395L, 395L, 396L, 396L, 396L, 397L,
397L, 397L, 398L, 398L, 398L, 399L, 399L, 399L, 400L, 400L, 400L,
401L, 401L, 401L, 402L, 402L, 402L, 403L, 403L, 403L, 404L, 404L,
404L, 405L, 405L, 405L, 406L, 406L, 406L, 407L, 407L, 407L, 408L,
408L, 408L, 409L, 409L, 409L, 410L, 410L, 410L, 411L, 411L, 411L,
412L, 412L, 412L, 413L, 413L, 413L, 414L, 414L, 414L, 415L, 415L,
415L, 416L, 416L, 416L, 417L, 417L, 417L, 418L, 418L, 418L, 419L,
419L, 419L, 420L, 420L, 420L, 421L, 421L, 421L, 422L, 422L, 422L,
423L, 423L, 423L, 424L, 424L, 424L, 425L, 425L, 425L, 426L, 426L,
426L, 427L, 427L, 427L, 428L, 428L, 428L, 429L, 429L, 429L, 430L,
430L, 430L, 431L, 431L, 431L, 432L, 432L, 432L, 373L, 373L, 373L,
374L, 374L, 374L, 375L, 375L, 375L, 376L, 376L, 376L, 377L, 377L,
377L, 378L, 378L, 378L, 379L, 379L, 379L, 380L, 380L, 380L, 381L,
381L, 381L, 382L, 382L, 382L, 383L, 383L, 383L, 384L, 384L, 384L,
313L, 313L, 313L, 314L, 314L, 314L, 315L, 315L, 315L, 316L, 316L,
316L, 317L, 317L, 317L, 318L, 318L, 318L), .Label = c("CUR:0:L1",
"CUR:0:L2", "CUR:0:L3", "CUR:0:L4", "CUR:0:L5", "CUR:0:L6", "CUR:00A:L1",
"CUR:00A:L2", "CUR:00A:L3", "CUR:00A:L4", "CUR:00A:L5", "CUR:00A:L6",
"CUR:00B:L1", "CUR:00B:L2", "CUR:00B:L3", "CUR:00B:L4", "CUR:00B:L5",
"CUR:00B:L6", "CUR:00C:L1", "CUR:00C:L2", "CUR:00C:L3", "CUR:00C:L4",
"CUR:00C:L5", "CUR:00C:L6", "CUR:00D:L1", "CUR:00D:L2", "CUR:00D:L3",
"CUR:00D:L4", "CUR:00D:L5", "CUR:00D:L6", "CUR:00F:L1", "CUR:00F:L2",
"CUR:00F:L3", "CUR:00F:L4", "CUR:00F:L5", "CUR:00F:L6", "CUR:00H:L1",
"CUR:00H:L2", "CUR:00H:L3", "CUR:00H:L4", "CUR:00H:L5", "CUR:00H:L6",
"CUR:00I:L1", "CUR:00I:L2", "CUR:00I:L3", "CUR:00I:L4", "CUR:00I:L5",
"CUR:00I:L6", "CUR:00J:L1", "CUR:00J:L2", "CUR:00J:L3", "CUR:00J:L4",
"CUR:00J:L5", "CUR:00J:L6", "CUR:00K:L1", "CUR:00K:L2", "CUR:00K:L3",
"CUR:00K:L4", "CUR:00K:L5", "CUR:00K:L6", "CUR:00L:L1", "CUR:00L:L2",
"CUR:00L:L3", "CUR:00L:L4", "CUR:00L:L5", "CUR:00L:L6", "CUR:00N:L1",
"CUR:00N:L2", "CUR:00N:L3", "CUR:00N:L4", "CUR:00N:L5", "CUR:00N:L6",
"CUR:00O:L1", "CUR:00O:L2", "CUR:00O:L3", "CUR:00O:L4", "CUR:00O:L5",
"CUR:00O:L6", "CUR:00P:L1", "CUR:00P:L2", "CUR:00P:L3", "CUR:00P:L4",
"CUR:00P:L5", "CUR:00P:L6", "CUR:00Q:L1", "CUR:00Q:L2", "CUR:00Q:L3",
"CUR:00Q:L4", "CUR:00Q:L5", "CUR:00Q:L6", "CUR:00R:L1", "CUR:00R:L2",
"CUR:00R:L3", "CUR:00R:L4", "CUR:00R:L5", "CUR:00R:L6", "CUR:00T:L1",
"CUR:00T:L2", "CUR:00T:L3", "CUR:00T:L4", "CUR:00T:L5", "CUR:00T:L6",
"CUR:00U:L1", "CUR:00U:L2", "CUR:00U:L3", "CUR:00U:L4", "CUR:00U:L5",
"CUR:00U:L6", "CUR:00V:L1", "CUR:00V:L2", "CUR:00V:L3", "CUR:00V:L4",
"CUR:00V:L5", "CUR:00V:L6", "CUR:00W:L1", "CUR:00W:L2", "CUR:00W:L3",
"CUR:00W:L4", "CUR:00W:L5", "CUR:00W:L6", "CUR:00X:L1", "CUR:00X:L2",
"CUR:00X:L3", "CUR:00X:L4", "CUR:00X:L5", "CUR:00X:L6", "CUR:00Z:L1",
"CUR:00Z:L2", "CUR:00Z:L3", "CUR:00Z:L4", "CUR:00Z:L5", "CUR:00Z:L6",
"CUR:01A:L1", "CUR:01A:L2", "CUR:01A:L3", "CUR:01A:L4", "CUR:01A:L5",
"CUR:01A:L6", "CUR:01B:L1", "CUR:01B:L2", "CUR:01B:L3", "CUR:01B:L4",
"CUR:01B:L5", "CUR:01B:L6", "CUR:1:L1", "CUR:1:L2", "CUR:1:L3",
"CUR:1:L4", "CUR:1:L5", "CUR:1:L6", "CUR:10:L1", "CUR:10:L2",
"CUR:10:L3", "CUR:10:L4", "CUR:10:L5", "CUR:10:L6", "CUR:11:L1",
"CUR:11:L2", "CUR:11:L3", "CUR:11:L4", "CUR:11:L5", "CUR:11:L6",
"CUR:12:L1", "CUR:12:L2", "CUR:12:L3", "CUR:12:L4", "CUR:12:L5",
"CUR:12:L6", "CUR:13:L1", "CUR:13:L2", "CUR:13:L3", "CUR:13:L4",
"CUR:13:L5", "CUR:13:L6", "CUR:16:L1", "CUR:16:L2", "CUR:16:L3",
"CUR:16:L4", "CUR:16:L5", "CUR:16:L6", "CUR:18:L1", "CUR:18:L2",
"CUR:18:L3", "CUR:18:L4", "CUR:18:L5", "CUR:18:L6", "CUR:19:L1",
"CUR:19:L2", "CUR:19:L3", "CUR:19:L4", "CUR:19:L5", "CUR:19:L6",
"CUR:2:L1", "CUR:2:L2", "CUR:2:L3", "CUR:2:L4", "CUR:2:L5", "CUR:2:L6",
"CUR:3:L1", "CUR:3:L2", "CUR:3:L3", "CUR:3:L4", "CUR:3:L5", "CUR:3:L6",
"CUR:4:L1", "CUR:4:L2", "CUR:4:L3", "CUR:4:L4", "CUR:4:L5", "CUR:4:L6",
"CUR:5:L1", "CUR:5:L2", "CUR:5:L3", "CUR:5:L4", "CUR:5:L5", "CUR:5:L6",
"CUR:6:L1", "CUR:6:L2", "CUR:6:L3", "CUR:6:L4", "CUR:6:L5", "CUR:6:L6",
"CUR:7:L1", "CUR:7:L2", "CUR:7:L3", "CUR:7:L4", "CUR:7:L5", "CUR:7:L6",
"CUR:8:L1", "CUR:8:L2", "CUR:8:L3", "CUR:8:L4", "CUR:8:L5", "CUR:8:L6",
"CUR:9:L1", "CUR:9:L2", "CUR:9:L3", "CUR:9:L4", "CUR:9:L5", "CUR:9:L6",
"PRI:0:L1", "PRI:0:L2", "PRI:0:L3", "PRI:0:L4", "PRI:0:L5", "PRI:0:L6",
"PRI:00A:L1", "PRI:00A:L2", "PRI:00A:L3", "PRI:00A:L4", "PRI:00A:L5",
"PRI:00A:L6", "PRI:00B:L1", "PRI:00B:L2", "PRI:00B:L3", "PRI:00B:L4",
"PRI:00B:L5", "PRI:00B:L6", "PRI:00C:L1", "PRI:00C:L2", "PRI:00C:L3",
"PRI:00C:L4", "PRI:00C:L5", "PRI:00C:L6", "PRI:00D:L1", "PRI:00D:L2",
"PRI:00D:L3", "PRI:00D:L4", "PRI:00D:L5", "PRI:00D:L6", "PRI:00F:L1",
"PRI:00F:L2", "PRI:00F:L3", "PRI:00F:L4", "PRI:00F:L5", "PRI:00F:L6",
"PRI:00H:L1", "PRI:00H:L2", "PRI:00H:L3", "PRI:00H:L4", "PRI:00H:L5",
"PRI:00H:L6", "PRI:00I:L1", "PRI:00I:L2", "PRI:00I:L3", "PRI:00I:L4",
"PRI:00I:L5", "PRI:00I:L6", "PRI:00J:L1", "PRI:00J:L2", "PRI:00J:L3",
"PRI:00J:L4", "PRI:00J:L5", "PRI:00J:L6", "PRI:00K:L1", "PRI:00K:L2",
"PRI:00K:L3", "PRI:00K:L4", "PRI:00K:L5", "PRI:00K:L6", "PRI:00L:L1",
"PRI:00L:L2", "PRI:00L:L3", "PRI:00L:L4", "PRI:00L:L5", "PRI:00L:L6",
"PRI:00N:L1", "PRI:00N:L2", "PRI:00N:L3", "PRI:00N:L4", "PRI:00N:L5",
"PRI:00N:L6", "PRI:00O:L1", "PRI:00O:L2", "PRI:00O:L3", "PRI:00O:L4",
"PRI:00O:L5", "PRI:00O:L6", "PRI:00P:L1", "PRI:00P:L2", "PRI:00P:L3",
"PRI:00P:L4", "PRI:00P:L5", "PRI:00P:L6", "PRI:00Q:L1", "PRI:00Q:L2",
"PRI:00Q:L3", "PRI:00Q:L4", "PRI:00Q:L5", "PRI:00Q:L6", "PRI:00R:L1",
"PRI:00R:L2", "PRI:00R:L3", "PRI:00R:L4", "PRI:00R:L5", "PRI:00R:L6",
"PRI:00T:L1", "PRI:00T:L2", "PRI:00T:L3", "PRI:00T:L4", "PRI:00T:L5",
"PRI:00T:L6", "PRI:00U:L1", "PRI:00U:L2", "PRI:00U:L3", "PRI:00U:L4",
"PRI:00U:L5", "PRI:00U:L6", "PRI:00V:L1", "PRI:00V:L2", "PRI:00V:L3",
"PRI:00V:L4", "PRI:00V:L5", "PRI:00V:L6", "PRI:00W:L1", "PRI:00W:L2",
"PRI:00W:L3", "PRI:00W:L4", "PRI:00W:L5", "PRI:00W:L6", "PRI:00X:L1",
"PRI:00X:L2", "PRI:00X:L3", "PRI:00X:L4", "PRI:00X:L5", "PRI:00X:L6",
"PRI:00Z:L1", "PRI:00Z:L2", "PRI:00Z:L3", "PRI:00Z:L4", "PRI:00Z:L5",
"PRI:00Z:L6", "PRI:01A:L1", "PRI:01A:L2", "PRI:01A:L3", "PRI:01A:L4",
"PRI:01A:L5", "PRI:01A:L6", "PRI:01B:L1", "PRI:01B:L2", "PRI:01B:L3",
"PRI:01B:L4", "PRI:01B:L5", "PRI:01B:L6", "PRI:1:L1", "PRI:1:L2",
"PRI:1:L3", "PRI:1:L4", "PRI:1:L5", "PRI:1:L6", "PRI:10:L1",
"PRI:10:L2", "PRI:10:L3", "PRI:10:L4", "PRI:10:L5", "PRI:10:L6",
"PRI:11:L1", "PRI:11:L2", "PRI:11:L3", "PRI:11:L4", "PRI:11:L5",
"PRI:11:L6", "PRI:12:L1", "PRI:12:L2", "PRI:12:L3", "PRI:12:L4",
"PRI:12:L5", "PRI:12:L6", "PRI:13:L1", "PRI:13:L2", "PRI:13:L3",
"PRI:13:L4", "PRI:13:L5", "PRI:13:L6", "PRI:16:L1", "PRI:16:L2",
"PRI:16:L3", "PRI:16:L4", "PRI:16:L5", "PRI:16:L6", "PRI:18:L1",
"PRI:18:L2", "PRI:18:L3", "PRI:18:L4", "PRI:18:L5", "PRI:18:L6",
"PRI:19:L1", "PRI:19:L2", "PRI:19:L3", "PRI:19:L4", "PRI:19:L5",
"PRI:19:L6", "PRI:2:L1", "PRI:2:L2", "PRI:2:L3", "PRI:2:L4",
"PRI:2:L5", "PRI:2:L6", "PRI:3:L1", "PRI:3:L2", "PRI:3:L3", "PRI:3:L4",
"PRI:3:L5", "PRI:3:L6", "PRI:4:L1", "PRI:4:L2", "PRI:4:L3", "PRI:4:L4",
"PRI:4:L5", "PRI:4:L6", "PRI:5:L1", "PRI:5:L2", "PRI:5:L3", "PRI:5:L4",
"PRI:5:L5", "PRI:5:L6", "PRI:6:L1", "PRI:6:L2", "PRI:6:L3", "PRI:6:L4",
"PRI:6:L5", "PRI:6:L6", "PRI:7:L1", "PRI:7:L2", "PRI:7:L3", "PRI:7:L4",
"PRI:7:L5", "PRI:7:L6", "PRI:8:L1", "PRI:8:L2", "PRI:8:L3", "PRI:8:L4",
"PRI:8:L5", "PRI:8:L6", "PRI:9:L1", "PRI:9:L2", "PRI:9:L3", "PRI:9:L4",
"PRI:9:L5", "PRI:9:L6"), class = "factor")
I wanted to use caret to partition my data, so this is what I did:
library(caret)
train.rows<- createDataPartition(gg, p=0.7,list = FALSE)
> length(train.rows)
[1] 1440
However, I am getting everything in gg in my train.rows even after 0.7 partitioning. What am I missing here?
Try it without class = factor
Then your partitioned vector will be:
indexes <- caret::createDataPartition(gg, times = 1, p = 0.7, list=FALSE)
train <- gg[indexes]
test <- gg[-indexes]
This is a little strange. I converted data from .csv to .xts other times before, but this time for some reasons cannot.
Here is my data set (dput() of half the real data set, since the complete one was out of characters limits. And yeah, the problem persists):
structure(list(time = structure(c(347L, 369L, 391L, 413L, 435L,
457L, 479L, 501L, 522L, 543L, 564L, 585L, 605L, 624L, 641L, 12L,
33L, 54L, 75L, 96L, 117L, 138L, 159L, 180L, 201L, 222L, 243L,
264L, 285L, 306L, 327L, 349L, 371L, 393L, 415L, 437L, 459L, 481L,
503L, 524L, 545L, 566L, 587L, 607L, 626L, 643L, 14L, 35L, 56L,
77L, 98L, 119L, 140L, 161L, 182L, 203L, 224L, 245L, 266L, 287L,
308L, 329L, 351L, 373L, 395L, 417L, 439L, 461L, 483L, 505L, 526L,
547L, 568L, 589L, 609L, 628L, 16L, 37L, 58L, 79L, 100L, 121L,
142L, 163L, 184L, 205L, 226L, 247L, 268L, 289L, 310L, 331L, 353L,
375L, 397L, 419L, 441L, 463L, 485L, 507L, 528L, 549L, 570L, 591L,
611L, 630L, 645L, 18L, 39L, 60L, 81L, 102L, 123L, 144L, 165L,
186L, 207L, 228L, 249L, 270L, 291L, 312L, 333L, 355L, 377L, 399L,
421L, 443L, 465L, 487L, 509L, 530L, 551L, 572L, 593L, 613L, 632L,
20L, 41L, 62L, 83L, 104L, 125L, 146L, 167L, 188L, 209L, 230L,
251L, 272L, 293L, 314L, 335L, 357L, 379L, 401L, 423L, 445L, 467L,
489L, 511L, 532L, 553L, 574L, 595L, 615L, 634L, 647L, 1L, 22L,
43L, 64L, 85L, 106L, 127L, 148L, 169L, 190L, 211L, 232L, 253L,
274L, 295L, 316L, 337L, 359L, 381L, 403L, 425L, 447L, 469L, 491L,
513L, 534L, 555L, 576L, 597L, 617L, 636L, 3L, 24L, 45L, 66L,
87L, 108L, 129L, 150L, 171L, 192L, 213L, 234L, 255L, 276L, 297L,
318L, 339L, 361L, 383L, 405L, 427L, 449L, 471L, 493L, 515L, 536L,
557L, 578L, 5L, 26L, 47L, 68L, 89L, 110L, 131L, 152L, 173L, 194L,
215L, 236L, 257L, 278L, 299L, 320L, 341L, 363L, 385L, 407L, 429L,
451L, 473L, 495L, 517L, 538L, 559L, 580L, 600L, 619L, 638L, 7L,
28L, 49L, 70L, 91L, 112L, 133L, 154L, 175L, 196L, 217L, 238L,
259L, 280L, 301L, 322L, 343L, 365L, 387L, 409L, 431L, 453L, 475L,
497L, 519L, 540L, 561L, 582L, 602L, 621L, 9L, 30L, 51L, 72L,
93L, 114L, 135L, 156L, 177L, 198L, 219L, 240L, 261L, 282L, 303L,
324L, 345L, 367L, 389L, 411L, 433L, 455L, 477L, 499L, 520L, 541L,
562L, 583L, 603L, 622L, 640L, 10L, 31L, 52L, 73L, 94L, 115L,
136L, 157L, 178L, 199L, 220L, 241L, 262L, 283L, 304L, 325L, 346L,
368L, 390L, 412L, 434L, 456L, 478L, 500L, 521L, 542L, 563L, 584L,
604L, 623L, 11L, 32L, 53L, 74L, 95L, 116L, 137L, 158L, 179L,
200L, 221L, 242L, 263L, 284L, 305L, 326L, 348L, 370L, 392L, 414L,
436L, 458L, 480L, 502L, 523L, 544L, 565L, 586L, 606L, 625L, 642L,
13L, 34L, 55L, 76L, 97L, 118L, 139L, 160L, 181L, 202L, 223L,
244L, 265L, 286L, 307L, 328L, 350L, 372L, 394L, 416L, 438L, 460L,
482L, 504L, 525L, 546L, 567L, 588L, 608L, 627L, 644L, 15L, 36L,
57L, 78L, 99L, 120L, 141L, 162L, 183L, 204L, 225L, 246L, 267L,
288L, 309L, 330L, 352L, 374L, 396L, 418L, 440L, 462L, 484L, 506L,
527L, 548L, 569L, 590L, 610L, 629L, 17L, 38L, 59L, 80L, 101L,
122L, 143L, 164L, 185L, 206L, 227L, 248L, 269L, 290L, 311L, 332L,
354L, 376L, 398L, 420L, 442L, 464L, 486L, 508L, 529L, 550L, 571L,
592L, 612L, 631L, 646L, 19L, 40L, 61L, 82L, 103L, 124L, 145L,
166L, 187L, 208L, 229L, 250L, 271L, 292L, 313L, 334L, 356L, 378L,
400L, 422L, 444L, 466L, 488L, 510L, 531L, 552L, 573L, 594L, 614L,
633L, 21L, 42L, 63L, 84L, 105L, 126L, 147L, 168L, 189L, 210L,
231L, 252L, 273L, 294L, 315L, 336L, 358L, 380L, 402L, 424L, 446L,
468L, 490L, 512L, 533L, 554L, 575L, 596L, 616L, 635L, 648L, 2L,
23L, 44L, 65L, 86L, 107L, 128L, 149L, 170L, 191L, 212L, 233L,
254L, 275L, 296L, 317L, 338L, 360L, 382L, 404L, 426L, 448L, 470L,
492L, 514L, 535L, 556L, 577L, 598L, 618L, 637L, 4L, 25L, 46L,
67L, 88L, 109L, 130L, 151L, 172L, 193L, 214L, 235L, 256L, 277L,
298L, 319L, 340L, 362L, 384L, 406L, 428L, 450L, 472L, 494L, 516L,
537L, 558L, 579L, 599L, 6L, 27L, 48L, 69L, 90L, 111L, 132L, 153L,
174L, 195L, 216L, 237L, 258L, 279L, 300L, 321L, 342L, 364L, 386L,
408L, 430L, 452L, 474L, 496L, 518L, 539L, 560L, 581L, 601L, 620L,
639L, 8L, 29L, 50L, 71L, 92L, 113L, 134L, 155L, 176L, 197L, 218L,
239L, 260L, 281L, 302L, 323L, 344L, 366L, 388L, 410L, 432L, 454L,
476L, 498L), .Label = c("01/01/2015", "01/01/2016", "01/02/2015",
"01/02/2016", "01/03/2015", "01/03/2016", "01/04/2015", "01/04/2016",
"01/05/2015", "01/06/2015", "01/07/2015", "01/08/2014", "01/08/2015",
"01/09/2014", "01/09/2015", "01/10/2014", "01/10/2015", "01/11/2014",
"01/11/2015", "01/12/2014", "01/12/2015", "02/01/2015", "02/01/2016",
"02/02/2015", "02/02/2016", "02/03/2015", "02/03/2016", "02/04/2015",
"02/04/2016", "02/05/2015", "02/06/2015", "02/07/2015", "02/08/2014",
"02/08/2015", "02/09/2014", "02/09/2015", "02/10/2014", "02/10/2015",
"02/11/2014", "02/11/2015", "02/12/2014", "02/12/2015", "03/01/2015",
"03/01/2016", "03/02/2015", "03/02/2016", "03/03/2015", "03/03/2016",
"03/04/2015", "03/04/2016", "03/05/2015", "03/06/2015", "03/07/2015",
"03/08/2014", "03/08/2015", "03/09/2014", "03/09/2015", "03/10/2014",
"03/10/2015", "03/11/2014", "03/11/2015", "03/12/2014", "03/12/2015",
"04/01/2015", "04/01/2016", "04/02/2015", "04/02/2016", "04/03/2015",
"04/03/2016", "04/04/2015", "04/04/2016", "04/05/2015", "04/06/2015",
"04/07/2015", "04/08/2014", "04/08/2015", "04/09/2014", "04/09/2015",
"04/10/2014", "04/10/2015", "04/11/2014", "04/11/2015", "04/12/2014",
"04/12/2015", "05/01/2015", "05/01/2016", "05/02/2015", "05/02/2016",
"05/03/2015", "05/03/2016", "05/04/2015", "05/04/2016", "05/05/2015",
"05/06/2015", "05/07/2015", "05/08/2014", "05/08/2015", "05/09/2014",
"05/09/2015", "05/10/2014", "05/10/2015", "05/11/2014", "05/11/2015",
"05/12/2014", "05/12/2015", "06/01/2015", "06/01/2016", "06/02/2015",
"06/02/2016", "06/03/2015", "06/03/2016", "06/04/2015", "06/04/2016",
"06/05/2015", "06/06/2015", "06/07/2015", "06/08/2014", "06/08/2015",
"06/09/2014", "06/09/2015", "06/10/2014", "06/10/2015", "06/11/2014",
"06/11/2015", "06/12/2014", "06/12/2015", "07/01/2015", "07/01/2016",
"07/02/2015", "07/02/2016", "07/03/2015", "07/03/2016", "07/04/2015",
"07/04/2016", "07/05/2015", "07/06/2015", "07/07/2015", "07/08/2014",
"07/08/2015", "07/09/2014", "07/09/2015", "07/10/2014", "07/10/2015",
"07/11/2014", "07/11/2015", "07/12/2014", "07/12/2015", "08/01/2015",
"08/01/2016", "08/02/2015", "08/02/2016", "08/03/2015", "08/03/2016",
"08/04/2015", "08/04/2016", "08/05/2015", "08/06/2015", "08/07/2015",
"08/08/2014", "08/08/2015", "08/09/2014", "08/09/2015", "08/10/2014",
"08/10/2015", "08/11/2014", "08/11/2015", "08/12/2014", "08/12/2015",
"09/01/2015", "09/01/2016", "09/02/2015", "09/02/2016", "09/03/2015",
"09/03/2016", "09/04/2015", "09/04/2016", "09/05/2015", "09/06/2015",
"09/07/2015", "09/08/2014", "09/08/2015", "09/09/2014", "09/09/2015",
"09/10/2014", "09/10/2015", "09/11/2014", "09/11/2015", "09/12/2014",
"09/12/2015", "10/01/2015", "10/01/2016", "10/02/2015", "10/02/2016",
"10/03/2015", "10/03/2016", "10/04/2015", "10/04/2016", "10/05/2015",
"10/06/2015", "10/07/2015", "10/08/2014", "10/08/2015", "10/09/2014",
"10/09/2015", "10/10/2014", "10/10/2015", "10/11/2014", "10/11/2015",
"10/12/2014", "10/12/2015", "11/01/2015", "11/01/2016", "11/02/2015",
"11/02/2016", "11/03/2015", "11/03/2016", "11/04/2015", "11/04/2016",
"11/05/2015", "11/06/2015", "11/07/2015", "11/08/2014", "11/08/2015",
"11/09/2014", "11/09/2015", "11/10/2014", "11/10/2015", "11/11/2014",
"11/11/2015", "11/12/2014", "11/12/2015", "12/01/2015", "12/01/2016",
"12/02/2015", "12/02/2016", "12/03/2015", "12/03/2016", "12/04/2015",
"12/04/2016", "12/05/2015", "12/06/2015", "12/07/2015", "12/08/2014",
"12/08/2015", "12/09/2014", "12/09/2015", "12/10/2014", "12/10/2015",
"12/11/2014", "12/11/2015", "12/12/2014", "12/12/2015", "13/01/2015",
"13/01/2016", "13/02/2015", "13/02/2016", "13/03/2015", "13/03/2016",
"13/04/2015", "13/04/2016", "13/05/2015", "13/06/2015", "13/07/2015",
"13/08/2014", "13/08/2015", "13/09/2014", "13/09/2015", "13/10/2014",
"13/10/2015", "13/11/2014", "13/11/2015", "13/12/2014", "13/12/2015",
"14/01/2015", "14/01/2016", "14/02/2015", "14/02/2016", "14/03/2015",
"14/03/2016", "14/04/2015", "14/04/2016", "14/05/2015", "14/06/2015",
"14/07/2015", "14/08/2014", "14/08/2015", "14/09/2014", "14/09/2015",
"14/10/2014", "14/10/2015", "14/11/2014", "14/11/2015", "14/12/2014",
"14/12/2015", "15/01/2015", "15/01/2016", "15/02/2015", "15/02/2016",
"15/03/2015", "15/03/2016", "15/04/2015", "15/04/2016", "15/05/2015",
"15/06/2015", "15/07/2015", "15/08/2014", "15/08/2015", "15/09/2014",
"15/09/2015", "15/10/2014", "15/10/2015", "15/11/2014", "15/11/2015",
"15/12/2014", "15/12/2015", "16/01/2015", "16/01/2016", "16/02/2015",
"16/02/2016", "16/03/2015", "16/03/2016", "16/04/2015", "16/04/2016",
"16/05/2015", "16/06/2015", "16/07/2015", "16/08/2014", "16/08/2015",
"16/09/2014", "16/09/2015", "16/10/2014", "16/10/2015", "16/11/2014",
"16/11/2015", "16/12/2014", "16/12/2015", "17/01/2015", "17/01/2016",
"17/02/2015", "17/02/2016", "17/03/2015", "17/03/2016", "17/04/2015",
"17/04/2016", "17/05/2015", "17/06/2015", "17/07/2014", "17/07/2015",
"17/08/2014", "17/08/2015", "17/09/2014", "17/09/2015", "17/10/2014",
"17/10/2015", "17/11/2014", "17/11/2015", "17/12/2014", "17/12/2015",
"18/01/2015", "18/01/2016", "18/02/2015", "18/02/2016", "18/03/2015",
"18/03/2016", "18/04/2015", "18/04/2016", "18/05/2015", "18/06/2015",
"18/07/2014", "18/07/2015", "18/08/2014", "18/08/2015", "18/09/2014",
"18/09/2015", "18/10/2014", "18/10/2015", "18/11/2014", "18/11/2015",
"18/12/2014", "18/12/2015", "19/01/2015", "19/01/2016", "19/02/2015",
"19/02/2016", "19/03/2015", "19/03/2016", "19/04/2015", "19/04/2016",
"19/05/2015", "19/06/2015", "19/07/2014", "19/07/2015", "19/08/2014",
"19/08/2015", "19/09/2014", "19/09/2015", "19/10/2014", "19/10/2015",
"19/11/2014", "19/11/2015", "19/12/2014", "19/12/2015", "20/01/2015",
"20/01/2016", "20/02/2015", "20/02/2016", "20/03/2015", "20/03/2016",
"20/04/2015", "20/04/2016", "20/05/2015", "20/06/2015", "20/07/2014",
"20/07/2015", "20/08/2014", "20/08/2015", "20/09/2014", "20/09/2015",
"20/10/2014", "20/10/2015", "20/11/2014", "20/11/2015", "20/12/2014",
"20/12/2015", "21/01/2015", "21/01/2016", "21/02/2015", "21/02/2016",
"21/03/2015", "21/03/2016", "21/04/2015", "21/04/2016", "21/05/2015",
"21/06/2015", "21/07/2014", "21/07/2015", "21/08/2014", "21/08/2015",
"21/09/2014", "21/09/2015", "21/10/2014", "21/10/2015", "21/11/2014",
"21/11/2015", "21/12/2014", "21/12/2015", "22/01/2015", "22/01/2016",
"22/02/2015", "22/02/2016", "22/03/2015", "22/03/2016", "22/04/2015",
"22/04/2016", "22/05/2015", "22/06/2015", "22/07/2014", "22/07/2015",
"22/08/2014", "22/08/2015", "22/09/2014", "22/09/2015", "22/10/2014",
"22/10/2015", "22/11/2014", "22/11/2015", "22/12/2014", "22/12/2015",
"23/01/2015", "23/01/2016", "23/02/2015", "23/02/2016", "23/03/2015",
"23/03/2016", "23/04/2015", "23/04/2016", "23/05/2015", "23/06/2015",
"23/07/2014", "23/07/2015", "23/08/2014", "23/08/2015", "23/09/2014",
"23/09/2015", "23/10/2014", "23/10/2015", "23/11/2014", "23/11/2015",
"23/12/2014", "23/12/2015", "24/01/2015", "24/01/2016", "24/02/2015",
"24/02/2016", "24/03/2015", "24/03/2016", "24/04/2015", "24/04/2016",
"24/05/2015", "24/06/2015", "24/07/2014", "24/07/2015", "24/08/2014",
"24/08/2015", "24/09/2014", "24/09/2015", "24/10/2014", "24/10/2015",
"24/11/2014", "24/11/2015", "24/12/2014", "24/12/2015", "25/01/2015",
"25/01/2016", "25/02/2015", "25/02/2016", "25/03/2015", "25/03/2016",
"25/04/2015", "25/05/2015", "25/06/2015", "25/07/2014", "25/07/2015",
"25/08/2014", "25/08/2015", "25/09/2014", "25/09/2015", "25/10/2014",
"25/10/2015", "25/11/2014", "25/11/2015", "25/12/2014", "25/12/2015",
"26/01/2015", "26/01/2016", "26/02/2015", "26/02/2016", "26/03/2015",
"26/03/2016", "26/04/2015", "26/05/2015", "26/06/2015", "26/07/2014",
"26/07/2015", "26/08/2014", "26/08/2015", "26/09/2014", "26/09/2015",
"26/10/2014", "26/10/2015", "26/11/2014", "26/11/2015", "26/12/2014",
"26/12/2015", "27/01/2015", "27/01/2016", "27/02/2015", "27/02/2016",
"27/03/2015", "27/03/2016", "27/04/2015", "27/05/2015", "27/06/2015",
"27/07/2014", "27/07/2015", "27/08/2014", "27/08/2015", "27/09/2014",
"27/09/2015", "27/10/2014", "27/10/2015", "27/11/2014", "27/11/2015",
"27/12/2014", "27/12/2015", "28/01/2015", "28/01/2016", "28/02/2015",
"28/02/2016", "28/03/2015", "28/03/2016", "28/04/2015", "28/05/2015",
"28/06/2015", "28/07/2014", "28/07/2015", "28/08/2014", "28/08/2015",
"28/09/2014", "28/09/2015", "28/10/2014", "28/10/2015", "28/11/2014",
"28/11/2015", "28/12/2014", "28/12/2015", "29/01/2015", "29/01/2016",
"29/02/2016", "29/03/2015", "29/03/2016", "29/04/2015", "29/05/2015",
"29/06/2015", "29/07/2014", "29/07/2015", "29/08/2014", "29/08/2015",
"29/09/2014", "29/09/2015", "29/10/2014", "29/10/2015", "29/11/2014",
"29/11/2015", "29/12/2014", "29/12/2015", "30/01/2015", "30/01/2016",
"30/03/2015", "30/03/2016", "30/04/2015", "30/05/2015", "30/06/2015",
"30/07/2014", "30/07/2015", "30/08/2014", "30/08/2015", "30/09/2014",
"30/09/2015", "30/10/2014", "30/10/2015", "30/11/2014", "30/11/2015",
"30/12/2014", "30/12/2015", "31/01/2015", "31/01/2016", "31/03/2015",
"31/03/2016", "31/05/2015", "31/07/2014", "31/07/2015", "31/08/2014",
"31/08/2015", "31/10/2014", "31/10/2015", "31/12/2014", "31/12/2015"
), class = "factor"), index = c(11.54043, 14.27814, 11.5583,
12.37828, 12.54057, 12.10189, 12.12189, 12.28188, 11.96189, 12.35303,
13.023, 12.55187, 11.04192, 8.722033, 6.952167, 6.732189, 9.022016,
8.432052, 5.882287, 5.276563, 4.731485, 4.403024, 4.651509, 6.319038,
7.818936, 7.948929, 6.809, 6.199048, 6.749004, 6.499023, 5.899076,
4.529247, 4.02078, 3.760833, 3.617566, 3.36093, 3.950794, 4.230742,
4.320727, 4.720667, 4.570688, 4.080769, 4.360721, 4.580687, 4.730665,
4.630679, 4.960635, 4.180751, 4.270736, 4.210746, 4.440708, 3.670853,
3.570877, 3.650858, 3.740838, 3.880808, 3.840816, 3.240964, 3.160988,
3.250961, 3.580874, 3.560879, 5.380586, 4.510697, 4.390716, 4.260737,
3.890806, 3.36093, 3.721801, 3.591829, 3.560497, 4.120431, 4.55039,
4.4404, 4.470397, 4.670381, 3.660484, 3.730475, 3.160559, 3.320533,
3.380523, 3.600492, 3.030583, 3.260542, 2.970594, 3.040581, 2.99059,
3.40052, 3.730475, 3.430516, 3.530501, 2.970594, 3.820464, 3.830463,
3.870458, 3.700479, 3.710477, 3.680481, 3.490507, 3.740474, 3.260542,
3.318999, 3.298999, 3.328999, 3.368284, 3.41828, 3.238295, 3.008317,
2.878331, 2.788342, 2.598366, 2.488382, 2.468385, 2.448388, 2.548373,
2.308412, 2.448388, 2.658358, 2.048463, 2.568371, 2.838336, 2.868332,
2.998318, 3.358285, 3.118306, 2.618364, 2.478384, 3.1783, 3.018316,
3.07831, 2.898329, 2.938325, 2.88833, 2.848335, 2.948324, 2.908328,
2.958322, 2.968321, 2.736638, 2.927969, 2.95236, 2.92152, 4.159778,
3.274662, 3.716456, 4.321648, 4.33252, 4.942867, 4.324445, 3.925162,
3.485163, 3.945088, 3.467801, 3.84071, 3.542677, 3.207959, 3.097636,
3.229113, 3.049058, 3.487368, 2.946642, 3.194158, 3.033129, 2.741163,
2.646968, 2.514944, 2.612467, 2.806449, 2.708465, 2.567833, 2.783192,
2.99844, 2.858031, 2.860846, 2.422666, 2.08108, 2.192705, 2.407469,
2.951197, 2.425093, 2.561358, 2.162087, 2.164641, 2.295119, 1.817072,
1.385466, 2.399334, 2.859039, 2.098575, 2.406024, 2.369869, 2.744476,
3.224035, 2.8761, 2.99883, 3.079353, 2.99788, 2.957237, 2.329897,
2.556688, 2.261765, 2.211449, 2.077952, 2.172062, 2.501332, 2.271251,
2.567649, 1.985015, 2.011745, 2.378133, 1.937532, 2.295658, 1.967439,
1.922405, 1.77076, 1.877509, 1.903558, 1.843825, 2.033853, 2.107302,
2.038126, 2.054973, 1.993873, 2.042604, 1.981318, 2.286632, 1.902597,
2.202905, 2.262768, 2.493253, 2.105771, 2.113826, 2.7515, 2.085522,
2.613089, 2.118656, 2.310738, 2.626212, 2.629956, 2.752603, 2.746964,
2.766788, 2.696453, 2.159032, 2.134599, 1.714365, 1.55678, 1.626582,
1.607851, 1.532417, 1.571745, 1.500041, 1.543227, 1.480322, 1.762261,
1.515217, 1.304601, 1.447073, 1.475861, 1.498862, 1.573622, 1.515242,
1.606151, 1.581706, 1.443625, 1.442918, 1.450428, 1.56483, 1.502704,
1.555937, 1.593459, 1.459013, 1.365548, 1.530271, 1.522306, 1.164105,
1.449812, 1.34549, 1.277848, 1.140585, 1.035555, 1.161103, 1.085743,
1.174396, 1.188879, 1.245301, 0.985737, 1.169837, 1.21196, 1.132433,
1.199008, 1.16729, 1.176818, 1.202165, 1.191286, 1.199928, 1.16782,
1.163427, 1.147315, 1.152607, 1.229492, 1.464407, 1.35002, 1.326579,
1.254948, 1.333277, 0.965398, 1.246482, 1.068102, 1.05843, 1.15212,
1.182821, 1.328945, 1.261149, 1.319696, 0.815034, 1.242683, 1.222728,
1.351629, 1.311053, 1.299895, 1.161236, 0.913985, 1.021523, 0.974081,
1.312736, 0.84724, 0.784337, 0.910343, 0.911839, 0.988695, 1.204447,
1.188309, 1.209292, 1.269653, 1.131285, 1.196762, 1.122018, 1.278813,
1.306997, 1.507417, 1.808925, 1.422698, 1.362512, 1.456492, 1.339841,
1.408134, 1.464803, 1.472624, 1.507043, 1.55663, 1.48721, 1.481805,
1.350952, 1.394053, 1.505662, 1.552468, 1.835227, 1.529406, 1.542733,
2.472506, 2.051214, 2.04605, 2.332706, 2.51142, 2.856563, 2.625034,
2.642861, 2.351145, 2.318266, 2.551799, 2.332817, 2.073351, 1.730547,
2.268209, 2.08866, 1.918522, 2.225836, 2.343466, 2.1983, 2.214688,
2.249369, 2.320987, 2.158788, 2.250545, 1.86419, 1.960187, 2.145659,
1.785818, 1.812893, 1.670426, 1.759863, 1.930967, 1.911622, 1.682475,
1.77137, 1.566444, 1.802325, 1.586361, 1.294167, 1.483635, 1.699373,
1.980278, 1.628827, 2.130249, 1.65064, 1.830685, 2.334663, 2.239406,
2.374907, 2.174426, 2.11795, 1.962688, 1.970793, 2.334288, 1.97112,
2.109338, 2.380336, 1.974693, 2.231339, 1.150346, 1.248199, 1.104014,
1.145332, 1.376, 1.365866, 1.431675, 1.411714, 1.470395, 1.463537,
1.479107, 1.571953, 1.582307, 1.425284, 1.357404, 1.459058, 1.29251,
2.079904, 2.043994, 2.02053, 1.854421, 2.024019, 2.027243, 2.024739,
2.020098, 2.072994, 1.89817, 1.970579, 1.925721, 1.940698, 1.958429,
1.97927, 1.990377, 2.545347, 2.343933, 2.110605, 2.372304, 2.614607,
2.65837, 1.253188, 2.371879, 2.48065, 2.581769, 2.201459, 1.705221,
2.662408, 1.769794, 2.160805, 1.933198, 2.318748, 2.279574, 2.206514,
1.86008, 2.221785, 2.732116, 2.876525, 2.45854, 2.093711, 1.990731,
2.119744, 1.88928, 1.906683, 1.711405, 1.290373, 1.965132, 1.639966,
1.579937, 1.896039, 1.955329, 1.970785, 1.41028, 1.963055, 1.935048,
1.958985, 1.912964, 1.915689, 1.844459, 2.267502, 2.263569, 2.260751,
1.863576, 1.810112, 1.739387, 1.646463, 1.552307, 1.871372, 1.735762,
1.694135, 1.627406, 1.789137, 1.636116, 1.65404, 1.655442, 1.466584,
1.630533, 1.474457, 1.505985, 1.435338, 1.537106, 1.521365, 1.464372,
1.450722, 1.387195, 1.432416, 1.409623, 1.943541, 1.895353, 1.727831,
1.915016, 2.142965, 1.78175, 1.757019, 4.046341, 2.268203, 1.695811,
1.714067, 1.689575, 1.810448, 1.587102, 1.83034, 1.513751, 1.535203,
1.531233, 1.43809, 1.390571, 1.292746, 1.3538, 1.201273, 1.481288,
1.600983, 1.438571, 1.583992, 1.766542, 1.717157, 1.773975, 1.95323,
2.0458, 1.965663, 1.868745, 1.862877, 1.717166, 1.85268, 1.865566,
2.831913, 1.858382, 1.926938, 1.911859, 2.364972, 2.271169, 2.147911,
2.273932, 2.173164, 2.235003, 2.160419, 2.58684, 2.440009, 2.334429,
2.374356, 2.637341, 2.751997, 2.662583, 2.570964, 2.643219, 2.196613,
2.226018, 2.142688, 2.403963, 2.384954, 2.661776, 2.711935, 2.714279,
2.329776, 2.370735, 2.100872, 1.943771, 1.575529, 1.544865, 1.51201,
1.443336, 1.655716, 1.664355, 1.717507, 1.717282, 1.806321, 1.788896,
1.803193, 1.401859, 1.762782, 1.537422, 2.145965, 2.305251, 2.110511,
1.934735, 1.946052, 2.138253, 2.025721, 1.993805, 2.072526, 1.888899,
1.803845, 1.830216, 1.821895, 1.843385, 1.999159, 1.951067, 1.889941,
2.360204, 2.645206, 2.347469, 2.241971, 2.043113, 1.962672, 1.903516,
1.609725, 1.71036, 1.801525, 1.748996, 1.566542, 1.588622, 1.507817,
1.629962, 1.669554, 1.624924, 1.555608, 1.474775, 1.438227, 1.664659,
1.499378)), .Names = c("time", "index"), class = "data.frame", row.names = c(NA,
-648L))
So, what I generally do is to write this code:
library(fBasics)
pw_index <- read.csv("~/data/index.csv",
header=T)
# Set time in date format
index$time <- as.Date(index$time, format="%d/%m/%y")
index <- index[order(index$time), ]
# Save the date in a separate identifier as character
dates = as.character(index$time)
index <- index[order(dates), ]
# Convert the data frame to an .xts object:
index_xts <- as.xts(index$index, order.by=index$time)
head(index_xts)
If I initially inspect the dataset vie head() I obtain this:
time index
<fctr> <dbl>
1 17/07/2014 11.54043
2 18/07/2014 14.27814
3 19/07/2014 11.55830
4 20/07/2014 12.37828
5 21/07/2014 12.54057
6 22/07/2014 12.10189
However, what I do obtain after the code is a completely messed out dataset (last observation should be of 2016...):
[,1]
2020-01-01 2.708465
2020-01-01 2.268203
2020-01-02 2.567833
2020-01-02 1.695811
2020-01-03 2.783192
2020-01-03 1.714067
Who knows what's going on?
Your code is somewhat convoluted, and I'm not entirely sure what you're trying to do. For converting the data in your data.frame into an xts object you can do the following:
library(xts);
xts <- xts(x = df$index, order.by = as.POSIXct(df$time, format = "%d/%m/%Y"));
tail(xts);
# [,1]
#2016-04-19 1.624924
#2016-04-20 1.555608
#2016-04-21 1.474775
#2016-04-22 1.438227
#2016-04-23 1.664659
#2016-04-24 1.499378
I assume that df is your data.frame the content of which you provided with dput.
Hi programming fellows,
Please consider the following data frame:
df <- structure(list(date = structure(c(1251350100.288, 1251351900,
1251353699.712, 1251355500.288, 1251357300, 1251359099.712), class = c("POSIXct",
"POSIXt")), mix.ratio.csi = c(442.78316237477, 436.757082063885,
425.742872761246, 395.770804307671, 386.758335309866, 392.115887652156
), mix.ratio.licor = c(447.141491945547, 441.319548211994, 430.854166343173,
402.232640566763, 393.683007533694, 398.388336602215), ToKeep = c(FALSE,
FALSE, TRUE, TRUE, TRUE, TRUE)), .Names = c("date", "value1",
"value2", "ToKeep"), index = structure(integer(0), ToKeep = c(1L,
2L, 8L, 52L, 53L, 54L, 55L, 85L, 86L, 87L, 88L, 89L, 92L, 93L,
94L, 95L, 96L, 97L, 98L, 99L, 100L, 102L, 103L, 105L, 106L, 192L,
193L, 220L, 223L, 225L, 228L, 229L, 260L, 263L, 264L, 265L, 266L,
267L, 305L, 306L, 307L, 308L, 309L, 310L, 311L, 312L, 313L, 314L,
315L, 352L, 353L, 354L, 375L, 376L, 378L, 379L, 380L, 383L, 411L,
412L, 413L, 414L, 415L, 416L, 418L, 419L, 445L, 453L, 463L, 464L,
465L, 466L, 467L, 468L, 497L, 504L, 547L, 548L, 549L, 586L, 589L,
630L, 631L, 632L, 633L, 634L, 635L, 636L, 644L, 645L, 646L, 647L,
648L, 649L, 650L, 651L, 674L, 675L, 676L, 677L, 678L, 682L, 687L,
690L, 691L, 724L, 725L, 726L, 727L, 728L, 729L, 730L, 731L, 732L,
733L, 734L, 735L, 736L, 739L, 740L, 741L, 742L, 768L, 771L, 772L,
773L, 774L, 775L, 776L, 777L, 778L, 779L, 3L, 4L, 5L, 6L, 7L,
9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L,
22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L,
35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L,
48L, 49L, 50L, 51L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L,
65L, 66L, 67L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L, 76L, 77L,
78L, 79L, 80L, 81L, 82L, 83L, 84L, 90L, 91L, 101L, 104L, 107L,
108L, 109L, 110L, 111L, 112L, 113L, 114L, 115L, 116L, 117L, 118L,
119L, 120L, 121L, 122L, 123L, 124L, 125L, 126L, 127L, 128L, 129L,
130L, 131L, 132L, 133L, 134L, 135L, 136L, 137L, 138L, 139L, 140L,
141L, 142L, 143L, 144L, 145L, 146L, 147L, 148L, 149L, 150L, 151L,
152L, 153L, 154L, 155L, 156L, 157L, 158L, 159L, 160L, 161L, 162L,
163L, 164L, 165L, 166L, 167L, 168L, 169L, 170L, 171L, 172L, 173L,
174L, 175L, 176L, 177L, 178L, 179L, 180L, 181L, 182L, 183L, 184L,
185L, 186L, 187L, 188L, 189L, 190L, 191L, 194L, 195L, 196L, 197L,
198L, 199L, 200L, 201L, 202L, 203L, 204L, 205L, 206L, 207L, 208L,
209L, 210L, 211L, 212L, 213L, 214L, 215L, 216L, 217L, 218L, 219L,
221L, 222L, 224L, 226L, 227L, 230L, 231L, 232L, 233L, 234L, 235L,
236L, 237L, 238L, 239L, 240L, 241L, 242L, 243L, 244L, 245L, 246L,
247L, 248L, 249L, 250L, 251L, 252L, 253L, 254L, 255L, 256L, 257L,
258L, 259L, 261L, 262L, 268L, 269L, 270L, 271L, 272L, 273L, 274L,
275L, 276L, 277L, 278L, 279L, 280L, 281L, 282L, 283L, 284L, 285L,
286L, 287L, 288L, 289L, 290L, 291L, 292L, 293L, 294L, 295L, 296L,
297L, 298L, 299L, 300L, 301L, 302L, 303L, 304L, 316L, 317L, 318L,
319L, 320L, 321L, 322L, 323L, 324L, 325L, 326L, 327L, 328L, 329L,
330L, 331L, 332L, 333L, 334L, 335L, 336L, 337L, 338L, 339L, 340L,
341L, 342L, 343L, 344L, 345L, 346L, 347L, 348L, 349L, 350L, 351L,
355L, 356L, 357L, 358L, 359L, 360L, 361L, 362L, 363L, 364L, 365L,
366L, 367L, 368L, 369L, 370L, 371L, 372L, 373L, 374L, 377L, 381L,
382L, 384L, 385L, 386L, 387L, 388L, 389L, 390L, 391L, 392L, 393L,
394L, 395L, 396L, 397L, 398L, 399L, 400L, 401L, 402L, 403L, 404L,
405L, 406L, 407L, 408L, 409L, 410L, 417L, 420L, 421L, 422L, 423L,
424L, 425L, 426L, 427L, 428L, 429L, 430L, 431L, 432L, 433L, 434L,
435L, 436L, 437L, 438L, 439L, 440L, 441L, 442L, 443L, 444L, 446L,
447L, 448L, 449L, 450L, 451L, 452L, 454L, 455L, 456L, 457L, 458L,
459L, 460L, 461L, 462L, 469L, 470L, 471L, 472L, 473L, 474L, 475L,
476L, 477L, 478L, 479L, 480L, 481L, 482L, 483L, 484L, 485L, 486L,
487L, 488L, 489L, 490L, 491L, 492L, 493L, 494L, 495L, 496L, 498L,
499L, 500L, 501L, 502L, 503L, 505L, 506L, 507L, 508L, 509L, 510L,
511L, 512L, 513L, 514L, 515L, 516L, 517L, 518L, 519L, 520L, 521L,
522L, 523L, 524L, 525L, 526L, 527L, 528L, 529L, 530L, 531L, 532L,
533L, 534L, 535L, 536L, 537L, 538L, 539L, 540L, 541L, 542L, 543L,
544L, 545L, 546L, 550L, 551L, 552L, 553L, 554L, 555L, 556L, 557L,
558L, 559L, 560L, 561L, 562L, 563L, 564L, 565L, 566L, 567L, 568L,
569L, 570L, 571L, 572L, 573L, 574L, 575L, 576L, 577L, 578L, 579L,
580L, 581L, 582L, 583L, 584L, 585L, 587L, 588L, 590L, 591L, 592L,
593L, 594L, 595L, 596L, 597L, 598L, 599L, 600L, 601L, 602L, 603L,
604L, 605L, 606L, 607L, 608L, 609L, 610L, 611L, 612L, 613L, 614L,
615L, 616L, 617L, 618L, 619L, 620L, 621L, 622L, 623L, 624L, 625L,
626L, 627L, 628L, 629L, 637L, 638L, 639L, 640L, 641L, 642L, 643L,
652L, 653L, 654L, 655L, 656L, 657L, 658L, 659L, 660L, 661L, 662L,
663L, 664L, 665L, 666L, 667L, 668L, 669L, 670L, 671L, 672L, 673L,
679L, 680L, 681L, 683L, 684L, 685L, 686L, 688L, 689L, 692L, 693L,
694L, 695L, 696L, 697L, 698L, 699L, 700L, 701L, 702L, 703L, 704L,
705L, 706L, 707L, 708L, 709L, 710L, 711L, 712L, 713L, 714L, 715L,
716L, 717L, 718L, 719L, 720L, 721L, 722L, 723L, 737L, 738L, 743L,
744L, 745L, 746L, 747L, 748L, 749L, 750L, 751L, 752L, 753L, 754L,
755L, 756L, 757L, 758L, 759L, 760L, 761L, 762L, 763L, 764L, 765L,
766L, 767L, 769L, 770L, 780L, 781L, 782L, 783L, 784L, 785L, 786L,
787L, 788L, 789L)), row.names = c(NA, 6L), class = "data.frame")
I need to create a new data.frame with the following structure:
1) if column 'ToKeep' is TRUE, then columns 'date', 'value1' and 'value2' remain the same;
2) if column 'ToKeep' is FALSE, then columns 'value1' e 'value2' receive NA (and 'date' remains the same).
I have been trying to use ifelse so far, but still haven't found the right indexing procedure:
df[, c(2,3)] <- lapply(df[, 4], function(x) ifelse(x == FALSE, NA, x))
Any suggestion?
Thanks in advance,
Thiago.
You can use the logical column to subset the rows, choose the columns you want, then assign the NA values with [<-
df2 <- df ## so that we don't over-write the original data set
df2[!df2$ToKeep, c("value1", "value2")] <- NA
which results in
df2
# date value1 value2 ToKeep
# 1 2009-08-26 22:15:00 NA NA FALSE
# 2 2009-08-26 22:45:00 NA NA FALSE
# 3 2009-08-26 23:14:59 425.7429 430.8542 TRUE
# 4 2009-08-26 23:45:00 395.7708 402.2326 TRUE
# 5 2009-08-27 00:15:00 386.7583 393.6830 TRUE
# 6 2009-08-27 00:44:59 392.1159 398.3883 TRUE
You could replace the lapply command with
df[,2:3] <- lapply(df[,2:3], function(x)
ifelse(df[,'ToKeep'], x, NA))
df
# date value1 value2 ToKeep
#1 2009-08-27 01:15:00 NA NA FALSE
#2 2009-08-27 01:45:00 NA NA FALSE
#3 2009-08-27 02:14:59 425.7429 430.8542 TRUE
#4 2009-08-27 02:45:00 395.7708 402.2326 TRUE
#5 2009-08-27 03:15:00 386.7583 393.6830 TRUE
#6 2009-08-27 03:44:59 392.1159 398.3883 TRUE
Or instead of ifelse, you can use replace
df[,2:3] <- lapply(df[,2:3], function(x)
replace(x, !df[,'ToKeep'], NA ))
I have toyed with a number of ideas to do this, but so far have only come up with some rather inelegant solutions. I'm sure I could make it work, but the code would neither be pretty nor efficient. Here's the problem:
I have a series of integer pairs that are presented as rows in a two-column data frame. The goal is three-fold:
You need to "eliminate" all the rows in this data frame. To "eliminate" a row, you must select either one of the units from that pair and send/save it to a vector of "selected" elements.
You must find the smallest possible combination of "selected elements" that will eliminate all the pairs in the data frame.
The code must be computationally efficient because it will be applied to rather large datasets.
For instance, one would choose items "1" and "2" from the following list of pairs:
1 3
1 4
2 5
3 2
The data below can be used as a working example.
Thanks!
Vincent
Update for some context:
Hi Cipi and SiggyF.
I understand your concerns about this being homework, so in case you read this again, here's some context that I hope may dispell your doubts.
I am working with time-series cross sectional data in which N is much larger than T. I would like to use panel-corrected standard errors like those proposed in Beck & Katz (1995). The packages "pcse" is mostly able to do this just fine. When you have an unbalanced panel, it essentially creates a "rectangular" dataset (every time units has the full amount of observations) by filling in missing values for the omitted observations in every panel. Then, pcse computes a matrix Sigma.hat which is essentially the weighted average of the outer product of the residuals within time periods (think of it as averaging over an N X N X T array to bring it down to a N X N Sigma.hat).
The problem is that if any two units have zero contemporaneous observation, then the corresponding cell in Sigma.hat will be NA, and pcse won't be able to use it to get the sandwich estimator of the variance covariance matrix. In my example, the data frame numbers correspond to the index of the missing values in Sigma.hat. I want to trim down Sigma.hat automatically, to get an estimate of the VCOV that uses the most information possible, hence my desire to keep as many of the numbers in the data frame.
This is probably very unclear to anyone who hasn't looked into pcse, but I hope you get the gist of it.
Sorry to have given an impression of impropriety, but I assure you, this is legit.
test<-structure(list(row = c(27L, 44L,
45L, 111L, 128L, 129L, 195L, 212L,
213L, 279L, 296L, 297L, 363L, 380L,
381L, 7L, 91L, 175L, 259L, 343L, 44L,
45L, 70L, 128L, 129L, 154L, 212L,
213L, 238L, 296L, 297L, 322L, 380L,
381L, 406L, 7L, 37L, 48L, 91L, 121L,
132L, 175L, 205L, 216L, 259L, 289L,
300L, 343L, 373L, 384L, 7L, 37L, 48L,
91L, 121L, 132L, 175L, 205L, 216L,
259L, 289L, 300L, 343L, 373L, 384L,
44L, 45L, 128L, 129L, 212L, 213L,
296L, 297L, 380L, 381L, 37L, 121L,
205L, 289L, 373L, 27L, 44L, 45L, 111L,
128L, 129L, 195L, 212L, 213L, 279L,
296L, 297L, 363L, 380L, 381L, 7L,
91L, 175L, 259L, 343L, 44L, 45L, 70L,
128L, 129L, 154L, 212L, 213L, 238L,
296L, 297L, 322L, 380L, 381L, 406L,
7L, 37L, 48L, 91L, 121L, 132L, 175L,
205L, 216L, 259L, 289L, 300L, 343L,
373L, 384L, 7L, 37L, 48L, 91L, 121L,
132L, 175L, 205L, 216L, 259L, 289L,
300L, 343L, 373L, 384L, 44L, 45L,
128L, 129L, 212L, 213L, 296L, 297L,
380L, 381L, 37L, 121L, 205L, 289L,
373L, 27L, 44L, 45L, 111L, 128L,
129L, 195L, 212L, 213L, 279L, 296L,
297L, 363L, 380L, 381L, 7L, 91L,
175L, 259L, 343L, 44L, 45L, 70L, 128L,
129L, 154L, 212L, 213L, 238L, 296L,
297L, 322L, 380L, 381L, 406L, 7L,
37L, 48L, 91L, 121L, 132L, 175L, 205L,
216L, 259L, 289L, 300L, 343L, 373L,
384L, 7L, 37L, 48L, 91L, 121L, 132L,
175L, 205L, 216L, 259L, 289L, 300L,
343L, 373L, 384L, 44L, 45L, 128L,
129L, 212L, 213L, 296L, 297L, 380L,
381L, 37L, 121L, 205L, 289L, 373L,
27L, 44L, 45L, 111L, 128L, 129L, 195L,
212L, 213L, 279L, 296L, 297L, 363L,
380L, 381L, 7L, 91L, 175L, 259L, 343L,
44L, 45L, 70L, 128L, 129L, 154L,
212L, 213L, 238L, 296L, 297L, 322L,
380L, 381L, 406L, 7L, 37L, 48L, 91L,
121L, 132L, 175L, 205L, 216L, 259L,
289L, 300L, 343L, 373L, 384L, 7L, 37L,
48L, 91L, 121L, 132L, 175L, 205L,
216L, 259L, 289L, 300L, 343L, 373L,
384L, 44L, 45L, 128L, 129L, 212L,
213L, 296L, 297L, 380L, 381L, 37L,
121L, 205L, 289L, 373L, 27L, 44L,
45L, 111L, 128L, 129L, 195L, 212L,
213L, 279L, 296L, 297L, 363L, 380L,
381L, 7L, 91L, 175L, 259L, 343L, 44L,
45L, 70L, 128L, 129L, 154L, 212L,
213L, 238L, 296L, 297L, 322L, 380L,
381L, 406L, 7L, 37L, 48L, 91L, 121L,
132L, 175L, 205L, 216L, 259L, 289L,
300L, 343L, 373L, 384L, 7L, 37L, 48L,
91L, 121L, 132L, 175L, 205L, 216L,
259L, 289L, 300L, 343L, 373L, 384L,
44L, 45L, 128L, 129L, 212L, 213L,
296L, 297L, 380L, 381L, 37L, 121L,
205L, 289L, 373L), col = c(7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 27L, 27L, 27L, 27L, 27L,
37L, 37L, 37L, 37L, 37L, 37L, 37L,
37L, 37L, 37L, 37L, 37L, 37L, 37L,
37L, 44L, 44L, 44L, 44L, 44L, 44L,
44L, 44L, 44L, 44L, 44L, 44L, 44L,
44L, 44L, 45L, 45L, 45L, 45L, 45L,
45L, 45L, 45L, 45L, 45L, 45L, 45L,
45L, 45L, 45L, 48L, 48L, 48L, 48L,
48L, 48L, 48L, 48L, 48L, 48L, 70L,
70L, 70L, 70L, 70L, 91L, 91L, 91L,
91L, 91L, 91L, 91L, 91L, 91L, 91L,
91L, 91L, 91L, 91L, 91L, 111L, 111L,
111L, 111L, 111L, 121L, 121L, 121L,
121L, 121L, 121L, 121L, 121L, 121L,
121L, 121L, 121L, 121L, 121L, 121L,
128L, 128L, 128L, 128L, 128L, 128L,
128L, 128L, 128L, 128L, 128L, 128L,
128L, 128L, 128L, 129L, 129L, 129L,
129L, 129L, 129L, 129L, 129L, 129L,
129L, 129L, 129L, 129L, 129L, 129L,
132L, 132L, 132L, 132L, 132L, 132L,
132L, 132L, 132L, 132L, 154L, 154L,
154L, 154L, 154L, 175L, 175L, 175L,
175L, 175L, 175L, 175L, 175L, 175L,
175L, 175L, 175L, 175L, 175L, 175L,
195L, 195L, 195L, 195L, 195L, 205L,
205L, 205L, 205L, 205L, 205L, 205L,
205L, 205L, 205L, 205L, 205L, 205L,
205L, 205L, 212L, 212L, 212L, 212L,
212L, 212L, 212L, 212L, 212L, 212L,
212L, 212L, 212L, 212L, 212L, 213L,
213L, 213L, 213L, 213L, 213L, 213L,
213L, 213L, 213L, 213L, 213L, 213L,
213L, 213L, 216L, 216L, 216L, 216L,
216L, 216L, 216L, 216L, 216L, 216L,
238L, 238L, 238L, 238L, 238L, 259L,
259L, 259L, 259L, 259L, 259L, 259L,
259L, 259L, 259L, 259L, 259L, 259L,
259L, 259L, 279L, 279L, 279L, 279L,
279L, 289L, 289L, 289L, 289L, 289L,
289L, 289L, 289L, 289L, 289L, 289L,
289L, 289L, 289L, 289L, 296L, 296L,
296L, 296L, 296L, 296L, 296L, 296L,
296L, 296L, 296L, 296L, 296L, 296L,
296L, 297L, 297L, 297L, 297L, 297L,
297L, 297L, 297L, 297L, 297L, 297L,
297L, 297L, 297L, 297L, 300L, 300L,
300L, 300L, 300L, 300L, 300L, 300L,
300L, 300L, 322L, 322L, 322L, 322L,
322L, 343L, 343L, 343L, 343L, 343L,
343L, 343L, 343L, 343L, 343L, 343L,
343L, 343L, 343L, 343L, 363L, 363L,
363L, 363L, 363L, 373L, 373L, 373L,
373L, 373L, 373L, 373L, 373L, 373L,
373L, 373L, 373L, 373L, 373L, 373L,
380L, 380L, 380L, 380L, 380L, 380L,
380L, 380L, 380L, 380L, 380L, 380L,
380L, 380L, 380L, 381L, 381L, 381L,
381L, 381L, 381L, 381L, 381L, 381L,
381L, 381L, 381L, 381L, 381L, 381L,
384L, 384L, 384L, 384L, 384L, 384L,
384L, 384L, 384L, 384L, 406L, 406L,
406L, 406L, 406L)), .Names = c("row",
"col" ), row.names = c(NA, -400L),
class = "data.frame")
Ok, if you consider your elements as vertices, and your pairs as edges of a graph, and your problem becomes a case of the well known (and NP complete) vertex cover problem. You can easily find an approximate solution, guaranteed to be within a factor of two of optimal by choosing an arbitrary edge, and selecting both vertices, removing all eliminated edges, lather, rinse, repeat. You can do incrementally better with more complicated approximation algorithms, but finding the optimal solution with a large graph is probably not feasible.
Here is a simple function to do this. (Note R is not my native language, so this is probably hideously non idomatic, any suggestions for improvement would be appreciated).
good <- function(dat, result = NULL) {
sampr <- dat[sample(1:(dim(dat)[1]),1),]
if (dim(dat)[1] == 0){
result
} else {
good(subset(dat, row != sampr$row & row != sampr$col & col != sampr$row &
col != sampr$col),result = c(result, sampr$row, sampr$col))
}
}
I'd run this a number of times and keep the best one. (It might also be useful to keep track of the size of the worst one, as it gives you a lower bound on the optimal size). It might be useful to postprocess the result to remove excess vertices.
Running 10000 iterations (and removing redundant vertices) gives the following 19 element solution to your sample problem.
7 37 45 48 91 121 128 132 175 205 212 216 259 279 289 300 343 373 384
We also know that the optimal solution must have at least 15 vertices.