Error in package msm: *** caught segfault *** 'memory not mapped' - r

I am trying to run a multistate model using the package msm and I am encountering the following error:
*** caught segfault ***
address 0x607c00032c60, cause 'memory not mapped'
The data
dat.long <- structure(list(id = c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L,
4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L, 8L, 8L, 8L, 9L, 9L,
9L, 10L, 10L, 10L, 11L, 11L, 11L, 12L, 12L, 12L, 13L, 13L, 13L,
14L, 14L, 14L, 15L, 15L, 15L, 16L, 16L, 16L, 17L, 17L, 17L, 18L,
18L, 18L, 19L, 19L, 19L, 20L, 20L, 20L, 21L, 21L, 21L, 22L, 22L,
22L, 23L, 23L, 23L, 24L, 24L, 24L, 25L, 25L, 25L, 26L, 26L, 26L,
27L, 27L, 27L, 28L, 28L, 28L, 29L, 29L, 29L, 30L, 30L, 30L, 31L,
31L, 31L, 32L, 32L, 32L, 33L, 33L, 33L, 34L, 34L, 34L, 35L, 35L,
35L, 36L, 36L, 36L, 37L, 37L, 37L, 38L, 38L, 38L, 39L, 39L, 39L,
40L, 40L, 40L, 41L, 41L, 41L, 42L, 42L, 42L, 43L, 43L, 43L, 44L,
44L, 44L, 45L, 45L, 45L, 46L, 46L, 46L, 47L, 47L, 47L, 48L, 48L,
48L, 49L, 49L, 49L, 50L, 50L, 50L, 51L, 51L, 51L, 52L, 52L, 52L,
53L, 53L, 53L, 54L, 54L, 54L, 55L, 55L, 55L, 56L, 56L, 56L, 57L,
57L, 57L, 58L, 58L, 58L, 59L, 59L, 59L, 60L, 60L, 60L, 61L, 61L,
61L, 62L, 62L, 62L, 63L, 63L, 63L, 64L, 64L, 64L, 65L, 65L, 65L,
66L, 66L, 66L, 67L, 67L, 67L, 68L, 68L, 68L, 69L, 69L, 69L, 70L,
70L, 70L, 71L, 71L, 71L, 72L, 72L, 72L, 73L, 73L, 73L, 74L, 74L,
74L, 75L, 75L, 75L, 76L, 76L, 76L, 77L, 77L, 77L, 78L, 78L, 78L,
79L, 79L, 79L, 80L, 80L, 80L, 81L, 81L, 81L, 82L, 82L, 82L, 83L,
83L, 83L, 84L, 84L, 84L, 85L, 85L, 85L, 86L, 86L, 86L, 87L, 87L,
87L, 88L, 88L, 88L, 89L, 89L, 89L, 90L, 90L, 90L, 91L, 91L, 91L,
92L, 92L, 92L, 93L, 93L, 93L, 94L, 94L, 94L, 95L, 95L, 95L, 96L,
96L, 96L, 97L, 97L, 97L, 98L, 98L, 98L, 99L, 99L, 99L), time = c(1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L,
1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L,
1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L,
1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L,
1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L,
1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L,
1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), age = c(63L, 67L, 71L, 65L,
69L, 73L, 60L, 64L, 69L, 62L, 65L, 69L, 64L, 68L, 72L, 64L, 68L,
72L, 64L, 68L, 72L, 64L, 68L, 72L, 64L, 68L, 73L, 65L, 69L, 73L,
61L, 65L, 68L, 63L, 67L, 72L, 64L, 69L, 73L, 61L, 65L, 69L, 61L,
65L, 69L, 64L, 68L, 72L, 63L, 67L, 71L, 61L, 65L, 69L, 64L, 68L,
72L, 65L, 69L, 73L, 63L, 67L, 71L, 61L, 64L, 68L, 63L, 67L, 71L,
63L, 68L, 72L, 62L, 66L, 70L, 64L, 68L, 72L, 62L, 66L, 70L, 65L,
69L, 73L, 63L, 66L, 70L, 62L, 66L, 70L, 62L, 65L, 70L, 62L, 66L,
70L, 63L, 67L, 71L, 62L, 66L, 71L, 62L, 66L, 70L, 63L, 67L, 71L,
64L, 67L, 72L, 61L, 65L, 69L, 64L, 67L, 71L, 64L, 69L, 72L, 62L,
66L, 70L, 62L, 66L, 70L, 63L, 67L, 71L, 64L, 68L, 72L, 62L, 66L,
70L, 60L, 64L, 68L, 63L, 67L, 71L, 64L, 68L, 73L, 64L, 68L, 72L,
64L, 68L, 72L, 64L, 68L, 71L, 62L, 65L, 69L, 61L, 65L, 69L, 64L,
68L, 72L, 60L, 65L, 69L, 62L, 66L, 70L, 60L, 64L, 68L, 63L, 67L,
71L, 63L, 67L, 71L, 64L, 68L, 72L, 65L, 69L, 73L, 61L, 65L, 69L,
63L, 67L, 71L, 63L, 67L, 71L, 62L, 67L, 71L, 64L, 68L, 72L, 64L,
68L, 72L, 63L, 67L, 71L, 64L, 68L, 72L, 64L, 68L, 72L, 62L, 66L,
71L, 61L, 65L, 69L, 63L, 68L, 72L, 60L, 65L, 69L, 61L, 65L, 69L,
63L, 68L, 72L, 62L, 67L, 70L, 64L, 68L, 73L, 61L, 65L, 69L, 62L,
66L, 70L, 62L, 65L, 70L, 61L, 65L, 69L, 64L, 68L, 72L, 62L, 66L,
70L, 61L, 65L, 69L, 61L, 66L, 70L, 63L, 67L, 71L, 61L, 65L, 70L,
62L, 67L, 71L, 60L, 64L, 69L, 61L, 66L, 70L, 61L, 65L, 69L, 62L,
67L, 72L, 63L, 67L, 71L, 60L, 64L, 69L, 61L, 65L, 69L, 65L, 69L,
73L, 62L, 66L, 70L, 60L, 64L, 68L), mci = structure(c(2L, 2L,
2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L,
2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L,
2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L,
2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L,
2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L,
1L, 1L, 1L, 1L, 2L, 1L, 2L), .Label = c("1", "2"), class = "factor")), .Names = c("id",
"time", "age", "mci"), reshapeLong = structure(list(varying = structure(list(
age = c("age_R2", "b_age_R2", "c_age"), mci = c("mci_w1",
"mci_w2", "mci_w3")), .Names = c("age", "mci")), v.names = c("age",
"mci"), idvar = "id", timevar = "time"), .Names = c("varying",
"v.names", "idvar", "timevar")), row.names = c("1.1", "1.2",
"1.3", "2.1", "2.2", "2.3", "3.1", "3.2", "3.3", "4.1", "4.2",
"4.3", "5.1", "5.2", "5.3", "6.1", "6.2", "6.3", "7.1", "7.2",
"7.3", "8.1", "8.2", "8.3", "9.1", "9.2", "9.3", "10.1", "10.2",
"10.3", "11.1", "11.2", "11.3", "12.1", "12.2", "12.3", "13.1",
"13.2", "13.3", "14.1", "14.2", "14.3", "15.1", "15.2", "15.3",
"16.1", "16.2", "16.3", "17.1", "17.2", "17.3", "18.1", "18.2",
"18.3", "19.1", "19.2", "19.3", "20.1", "20.2", "20.3", "21.1",
"21.2", "21.3", "22.1", "22.2", "22.3", "23.1", "23.2", "23.3",
"24.1", "24.2", "24.3", "25.1", "25.2", "25.3", "26.1", "26.2",
"26.3", "27.1", "27.2", "27.3", "28.1", "28.2", "28.3", "29.1",
"29.2", "29.3", "30.1", "30.2", "30.3", "31.1", "31.2", "31.3",
"32.1", "32.2", "32.3", "33.1", "33.2", "33.3", "34.1", "34.2",
"34.3", "35.1", "35.2", "35.3", "36.1", "36.2", "36.3", "37.1",
"37.2", "37.3", "38.1", "38.2", "38.3", "39.1", "39.2", "39.3",
"40.1", "40.2", "40.3", "41.1", "41.2", "41.3", "42.1", "42.2",
"42.3", "43.1", "43.2", "43.3", "44.1", "44.2", "44.3", "45.1",
"45.2", "45.3", "46.1", "46.2", "46.3", "47.1", "47.2", "47.3",
"48.1", "48.2", "48.3", "49.1", "49.2", "49.3", "50.1", "50.2",
"50.3", "51.1", "51.2", "51.3", "52.1", "52.2", "52.3", "53.1",
"53.2", "53.3", "54.1", "54.2", "54.3", "55.1", "55.2", "55.3",
"56.1", "56.2", "56.3", "57.1", "57.2", "57.3", "58.1", "58.2",
"58.3", "59.1", "59.2", "59.3", "60.1", "60.2", "60.3", "61.1",
"61.2", "61.3", "62.1", "62.2", "62.3", "63.1", "63.2", "63.3",
"64.1", "64.2", "64.3", "65.1", "65.2", "65.3", "66.1", "66.2",
"66.3", "67.1", "67.2", "67.3", "68.1", "68.2", "68.3", "69.1",
"69.2", "69.3", "70.1", "70.2", "70.3", "71.1", "71.2", "71.3",
"72.1", "72.2", "72.3", "73.1", "73.2", "73.3", "74.1", "74.2",
"74.3", "75.1", "75.2", "75.3", "76.1", "76.2", "76.3", "77.1",
"77.2", "77.3", "78.1", "78.2", "78.3", "79.1", "79.2", "79.3",
"80.1", "80.2", "80.3", "81.1", "81.2", "81.3", "82.1", "82.2",
"82.3", "83.1", "83.2", "83.3", "84.1", "84.2", "84.3", "85.1",
"85.2", "85.3", "86.1", "86.2", "86.3", "87.1", "87.2", "87.3",
"88.1", "88.2", "88.3", "89.1", "89.2", "89.3", "90.1", "90.2",
"90.3", "91.1", "91.2", "91.3", "92.1", "92.2", "92.3", "93.1",
"93.2", "93.3", "94.1", "94.2", "94.3", "95.1", "95.2", "95.3",
"96.1", "96.2", "96.3", "97.1", "97.2", "97.3", "98.1", "98.2",
"98.3", "99.1", "99.2", "99.3"), class = "data.frame")
I then run the multistate model as follows.
library(msm)
#construct the qmatrix(all transitions are allowed.)
Q <- matrix(c(1,1,1,1),
nrow = 2, ncol = 2, byrow=TRUE,
dimnames=list(from=1:2,to=1:2))
#specify the initial values
crudeinits <- crudeinits.msm(mci ~ age, subject=id, data=dat.long, qmatrix=Q)
#the model
mci.msm <- msm(mci ~ age, subject = id, qmatrix = crudeinits, data = dat.long)
This than results in the R session been terminated with the above error. I am currently unsure how to resolve this issue so any help would be appreciated.
Thanks

After contacting the maintainer of the msm package the issue was resolved by redefining the state variable numeric rather than a factor. The full reply is below.
Thanks for this report. This might be related to the state variable
being a factor rather than numeric. I can make it work for me by redefining
dat.long$mci <- as.numeric(dat.long$mci).
I couldn't reproduce the crash, but it didn't converge with the state as
a factor.
I don't think it's documented explicitly, but factor states were
supposed to work as long as their levels are named 1,2,.... So it's a
bug, which seems to have been introduced in 1.4, but I'll fix it for the
next release.

Related

Equatiomatic not building my equation from logistic model

I have the following data, the response variable which is categorical is gleason_score, while the predictor, a continuous variable is age. I have fitted the data using glm function to have a logistic regression model.
structure(list(age = c(69L, 60L, 78L, 73L, 80L, 78L, 89L, 75L,
66L, 74L, 72L, 80L, 63L, 100L, 67L, 73L, 75L, 83L, 72L, 73L,
50L, 75L, 70L, 56L, 75L, 70L, 90L, 65L, 70L, 80L, 73L, 70L, 70L,
75L, 71L, 65L, 65L, 72L, 67L, 65L, 70L, 75L, 85L, 75L, 70L, 86L,
74L, 78L, 64L, 70L, 65L, 65L, 70L, 74L, 77L, 75L, 65L, 80L, 70L,
70L, 58L, 58L, 65L, 78L, 76L, 80L, 66L, 71L, 70L, 55L, 70L, 90L,
78L, 67L, 65L, 60L, 69L, 80L, 72L, 76L, 68L, 77L, 88L, 69L, 79L,
77L, 78L, 66L, 80L, 72L, 81L, 80L, 70L, 86L, 87L, 70L, 80L, 66L,
60L, 50L, 69L, 63L, 75L, 68L, 68L, 75L, 63L, 74L, 54L, 81L, 72L,
70L, 68L, 55L, 75L, 75L, 65L, 72L, 77L, 64L, 64L, 76L, 83L, 95L,
85L, 70L, 75L, 75L, 61L, 95L, 72L, 81L, 87L, 70L, 77L, 70L, 65L,
77L, 70L, 65L, 70L, 75L, 68L, 93L, 65L, 65L, 75L, 78L, 86L),
gleason_score = structure(c(2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L,
2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L,
2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L,
2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 2L, 2L), .Label = c("0", "1"), class = "factor")), class = "data.frame", row.names = c(NA,
-149L))
Using the following code:
attach(data2)
glm.fit=glm(gleason_score ~ age, family=binomial(link = "logit"))
plot(x=age, y=gleason_score)
lines(age, glm.fit$fitted.values)
summary(glm.fit)
I have these results:
Call:
glm(formula = gleason_score ~ age, family = binomial(link = "logit"))
Deviance Residuals:
Min 1Q Median 3Q Max
-2.1515 0.4451 0.5806 0.6530 1.0105
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -2.17146 1.87588 -1.158 0.2470
age 0.05155 0.02651 1.944 0.0518 .
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 141.02 on 148 degrees of freedom
Residual deviance: 137.00 on 147 degrees of freedom
AIC: 141
Number of Fisher Scoring iterations: 4
I use the following code to build the equation:
equatiomatic::extract_eq(glm.fit, wrap = FALSE,use_coefs = TRUE)
but I have this error message:
Error in model$data[which(model$y == 1)[1], outcome_nm] : object of type 'environment' is not subsettable
Kindly assist in tracing my error
Do not use attach(data2).. Instead, pass data2 to the data argument of the glm() call.
glm.fit=glm(gleason_score ~ age, data=data2, family=binomial(link = "logit"))
equatiomatic::extract_eq(glm.fit, wrap = FALSE,use_coefs = TRUE)
Output:
$$
\log\left[ \frac { \widehat{P( \operatorname{gleason\_score} = \operatorname{1} )} }{ 1 - \widehat{P( \operatorname{gleason\_score} = \operatorname{1} )} } \right] = -2.17 + 0.05(\operatorname{age})
$$
To see why, compare glm.fit$data when glm.fit is created
with attach(data2) vs.
without using attach and instead passing data2 to data arg
The 2nd approach is correct.
Under the first approach (yours), glm.fit$data returns this:
<environment: R_GlobalEnv>
Under the second approach (correct), glm.fit$data returns the actual data (note only first six rows shown here)
age gleason_score
1 69 1
2 60 1
3 78 0
4 73 1
5 80 1
6 78 1

Why is prediction error discrete in adabag?

I've got the table of 55 observations with 5 variables (F,H,R,T,U) and 1 classifier variable ("Group") in which I have two groups.
I'm doing data sampling by splitting the data into the training set (70%) and test set (30%). Then I run adaboosting and check how it works.
I want to get the adaboost error distribution for 100 samplings. But the distribution occurs to be discrete, outputting only five value variants: 0, 0.0588235294117647, 0.117647058823529 0.176470588235294 and 0.235294117647059.It doesn't change with mfinal argument. I guess there should be more! How it works?
I use the folowing code:
predictions<-list()
for (i in 1:100){
train.ind<-sample(nrow(df), nrow(df) * 0.7)
assign(paste0("ada",i), do.call(boosting,
c(formula=Group~F + H + R + T + U,
data=substitute(df[train.ind,]), mfinal=50, boos=FALSE,
coeflearn='Breiman'),envir = parent.frame()))
assign(paste0("pred",i), predict(ada,df[-train.ind,]))
predictions[[i]]<-get(paste0("pred",i))$error
}
hist(100*unlist(predictions),breaks=10,
main="Error probability [%] ntrees=10. 100 sampling operations", xlab="AdaBoost error")
dput(df)
structure(list(Group = structure(c(2L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L
), .Label = c("Canines", "Sled"), class = "factor"), F = c(0.263150566678734,
0.260347316635598, 0.26437277258488, 0.265710057607949, 0.254866055219663,
0.263294264681227, 0.261901194801303, 0.257318268395066, 0.26420207103455,
0.252093225560912, 0.255473253732324, 0.259067858940115, 0.259528043446917,
0.267331491048901, 0.260246447333382, 0.26035486437815, 0.254553215708594,
0.274074579975413, 0.262896904742862, 0.260504330262876, 0.258329960879536,
0.262664861154909, 0.256148832094211, 0.258509128895957, 0.256292083925698,
0.262358651734143, 0.254578103664353, 0.255386025800537, 0.264120912009577,
0.275232714712253, 0.265375720277527, 0.267601768121804, 0.262932226832642,
0.263633189245163, 0.262826186070212, 0.261058637786334, 0.262979366135887,
0.259232168979912, 0.252933156025384, 0.263963451214447, 0.258511197058683,
0.261957295373665, 0.253412282699461, 0.260748166588172, 0.263136039863289,
0.255317062006506, 0.258822015633545, 0.252757763183064, 0.260840486010478,
0.258620689655172, 0.263738813871524, 0.26241134751773, 0.26405425581719,
0.263685152057245, 0.262062787572784), H = c(0.242711147002311,
0.243850477245014, 0.245132979060713, 0.241794831140003, 0.235370262206577,
0.241392449436832, 0.236787894677703, 0.240434935369935, 0.234076675284456,
0.236978505926275, 0.23489414817613, 0.236461115627298, 0.241377100655228,
0.240778565421122, 0.238954656595734, 0.237237027626932, 0.23562891291975,
0.228247507171151, 0.235543469567304, 0.238348073568565, 0.237639956832591,
0.237993655975811, 0.23053394888479, 0.237553985998722, 0.238716430501961,
0.241044553515742, 0.23579805839771, 0.244646715997643, 0.245211405561299,
0.248463204730402, 0.237910443860818, 0.23772859908127, 0.242517289073306,
0.230376515634971, 0.239386381312522, 0.242971498213445, 0.248246377553633,
0.245227816034538, 0.237968589560153, 0.235998092571798, 0.235639593181493,
0.240320284697509, 0.239383587641388, 0.237939850635807, 0.240409493084614,
0.239705089012767, 0.235291279312896, 0.237725562711216, 0.251017166425148,
0.244410329082034, 0.247581475626206, 0.244082639531298, 0.248022977743474,
0.246127343801762, 0.246345535241663), R = c(0.23238005068085,
0.233913128793082, 0.232906768805408, 0.234580624702711, 0.23729616240706,
0.232552468336102, 0.23566425708828, 0.233370934038501, 0.23413197660754,
0.241255572873247, 0.240609653949119, 0.233790113420818, 0.239086204963073,
0.233644719452121, 0.23849468613068, 0.236846146329206, 0.239755264655663,
0.225925420024587, 0.239355887920232, 0.237429996633718, 0.23819641170916,
0.232039177131833, 0.223832380603256, 0.235838907338977, 0.236669843303285,
0.234916072348618, 0.238304558463179, 0.235904655883701, 0.232124394623714,
0.222879222527955, 0.233232723139038, 0.233871666714818, 0.235947441217151,
0.242585880964708, 0.234693056561268, 0.233941777691605, 0.229366135886539,
0.23539800906269, 0.239803390172875, 0.236505714593364, 0.24647853698133,
0.235569395017794, 0.242526379716086, 0.236207360559779, 0.234180854122081,
0.240408036487878, 0.239601762794737, 0.245058343429191, 0.234449894103222,
0.237875925051173, 0.230698942666106, 0.233475177304965, 0.231384358432554,
0.233114688928642, 0.230655428424067), T = c(0.261758235638105,
0.261889077326307, 0.257587479549, 0.257914486549337, 0.272467520166701,
0.262760817545838, 0.265646653432713, 0.268875862196498, 0.267589277073454,
0.269672695639567, 0.269022944142428, 0.270680912011768, 0.260008650934782,
0.258245224077857, 0.262304209940204, 0.265561961665713, 0.270062606715993,
0.271752492828849, 0.262203737769602, 0.263717599534841, 0.265833670578713,
0.267302305737446, 0.289484838417743, 0.268097977766344, 0.268321642269056,
0.261680722401497, 0.271319279474757, 0.264062602318119, 0.258543287805409,
0.253424858029389, 0.263481112722616, 0.260797966082108, 0.258603042876902,
0.263404414155158, 0.263094376055998, 0.262028086308617, 0.259408120423941,
0.26014200592286, 0.269294864241588, 0.263532741620391, 0.259370672778494,
0.262153024911032, 0.264677749943065, 0.265104622216242, 0.262273612930016,
0.264569812492848, 0.266284942258822, 0.264458330676529, 0.253692453461153,
0.25909305621162, 0.257980767836164, 0.260030835646007, 0.256538408006782,
0.25707281521235, 0.260936248761486), U = c(0.276642254462421,
0.275750907536407, 0.274138521440258, 0.279385339041277, 0.283770344294126,
0.273124933319108, 0.276770665567999, 0.272796198013943, 0.273326789343435,
0.278824893979485, 0.282917535762971, 0.269035729493284, 0.276381346021371,
0.275681845488406, 0.280473043309851, 0.274957072857482, 0.279453614114969,
0.265400901516186, 0.284438401450319, 0.275270067631668, 0.277080803992985,
0.268341093323935, 0.26334299428362, 0.27494270078114, 0.277070411973316,
0.276364671746617, 0.277622940087166, 0.275489489882784, 0.275412200032649,
0.267636555236813, 0.275475938484053, 0.27914367434201, 0.281161825726141,
0.287341513046201, 0.274277898463271, 0.272041104617345, 0.268317034458041,
0.277054269097656, 0.276448903327891, 0.282483963758864, 0.288513266166897,
0.280409252669039, 0.283610415243301, 0.27874587902846, 0.274619094771137,
0.275604453090517, 0.286100299160421, 0.288513039597016, 0.270078586556683,
0.280480764184118, 0.274123602187187, 0.277940178846747, 0.273784368554907,
0.282369310276287, 0.277372857201026)), na.action = structure(c(`2` = 2L,
`4` = 4L, `19` = 18L, `24` = 20L, `28` = 24L, `29` = 25L, `30` = 26L,
`32` = 28L, `33` = 29L, `42` = 38L, `54` = 46L, `69` = 54L, `74` = 58L,
`77` = 59L, `79` = 60L, `80` = 61L, `83` = 62L), class = "omit"), row.names = c(5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 15L, 16L, 17L, 18L, 20L,
25L, 26L, 27L, 31L, 41L, 44L, 46L, 47L, 48L, 50L, 51L, 52L, 55L,
57L, 64L, 65L, 66L, 67L, 68L, 70L, 71L, 72L, 85L, 86L, 87L, 88L,
89L, 90L, 91L, 92L, 93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L,
101L, 102L, 103L), class = "data.frame")

Nonlinear model convergence

I have a time series data set and each time series has datapoint of 30-year from different/same species. I am developing a forecasting model using the first 23 years of data from each time series data point and I am using the rest 7 years as test set to know the predictive ability of model but the nonlinear model (Model 6 and Model 7) don't give succinct result?
Data:
DD <- structure(list(Plot = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("A",
"B", "C", "D"), class = "factor"), Species = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("BD", "BG"), class = "factor"), Year = c(37L,
38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L,
51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L,
64L, 65L, 66L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L,
47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L,
60L, 61L, 62L, 63L, 64L, 65L, 66L, 37L, 38L, 39L, 40L, 41L, 42L,
43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L,
56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 37L, 38L,
39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L,
52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L,
65L, 66L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L,
48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L,
61L, 62L, 63L, 64L, 65L, 66L, 37L, 38L, 39L, 40L, 41L, 42L, 43L,
44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L,
57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 37L, 38L, 39L,
40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L,
53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L,
66L), Count = c(81L, 45L, 96L, 44L, 24L, 8L, 28L, 32L, 39L, 29L,
40L, 17L, 4L, 12L, 18L, 11L, 63L, 98L, 78L, 76L, 67L, 36L, 56L,
43L, 81L, 8L, 14L, 20L, 25L, 19L, 135L, 91L, 171L, 88L, 59L,
1L, 1L, 1L, 2L, 1L, 11L, 9L, 34L, 15L, 32L, 21L, 33L, 43L, 39L,
20L, 6L, 3L, 9L, 9L, 28L, 16L, 15L, 2L, 1L, 1L, 34L, 16L, 19L,
35L, 32L, 7L, 2L, 30L, 29L, 25L, 28L, 11L, 31L, 31L, 28L, 27L,
34L, 110L, 87L, 103L, 72L, 19L, 46L, 43L, 107L, 32L, 26L, 31L,
12L, 29L, 23L, 40L, 50L, 23L, 34L, 11L, 9L, 4L, 24L, 55L, 14L,
16L, 51L, 43L, 2L, 13L, 8L, 96L, 52L, 118L, 32L, 1L, 8L, 17L,
34L, 29L, 38L, 15L, 4L, 38L, 2L, 1L, 1L, 1L, 1L, 1L, 3L, 3L,
4L, 6L, 4L, 4L, 10L, 6L, 7L, 9L, 15L, 30L, 25L, 36L, 13L, 17L,
43L, 36L, 60L, 50L, 26L, 13L, 13L, 27L, 18L, 56L, 96L, 16L, 54L,
2L, 2L, 9L, 5L, 5L, 6L, 2L, 6L, 2L, 3L, 4L, 3L, 136L, 71L, 116L,
28L, 23L, 76L, 64L, 98L, 58L, 26L, 13L, 13L, 13L, 18L, 19L, 24L,
18L, 17L, 3L, 23L, 19L, 9L, 11L, 13L, 20L, 29L, 29L, 17L, 20L,
26L, 71L, 63L, 53L, 54L, 20L, 22L, 18L, 93L, 50L, 18L, 12L, 12L,
31L), LogCount = c(1.908385019, 1.653212514, 1.982271233, 1.643462676,
1.380211242, 0.903089987, 1.447158031, 1.505109978, 1.591064607,
1.462397998, 1.602059991, 1.230448921, 0.602059991, 1.079181206,
1.255272505, 1.041392685, 1.799340549, 1.991226076, 1.892094603,
1.880813592, 1.826074803, 1.556302501, 1.748188027, 1.633468456,
1.908485019, 0.903089987, 1.146128035, 1.301029996, 1.397940009,
1.278753601, 2.130333768, 1.95904139, 2.2329961, 1.94448267,
1.770852012, 0, 0, 0, 0.30102999, 0, 1.0411392685, 0.954242509,
1.531478917, 1.176031259, 1.505149978, 1.322219295, 1.51851394,
1.6334684456, 1.591064607, 1.301029996, 0.77815125, 0.477121255,
0.954242509, 0.954242509, 1.447158031, 1.204119983, 1.176091259,
0.301029996, 0, 0, 1.531478917, 1.204119983, 1.278753501, 1.544068044,
1.505149978, 0.084509804, 0.301029996, 1.477121255, 1.462397998,
1.397940009, 1.447158031, 1.041392685, 1.491361694, 1.491361694,
1.447158031, 1.431363754, 1.531478917, 2.041392685, 1.939519253,
2.012837225, 1.857332495, 1.278753601, 1.662757382, 1.633468456,
2.029383778, 1.505149978, 1.414973348, 1.491361594, 1.079181245,
1.462397998, 1.361727835, 1.602059991, 1.698970004, 1.361727836,
1.531478917, 1.041392685, 0.954242509, 0.602059991, 1.380211242,
1.740362689, 1.146128036, 1.204119983, 1.707570176, 1.633468456,
0.301029996, 1.113943352, 0.903089987, 1.982271233, 1.716003344,
2.071882007, 1.50514997, 0, 0.903089987, 1.230448921, 1.53147891,
1.2397998, 1.57978359, 1.176091259, 0.602059991, 1.57978359,
0.301029996, 0, 0, 0, 0, 0, 0.477121255, 0.477121255, 0.602059991,
0.77815125, 0.602059991, 0.602059991, 1, 0.77815125, 0.84509804,
0.95424509, 1.176091259, 1.477121255, 1.39790009, 1.555302501,
1.113943352, 1.230448921, 1.633468456, 1.555302501, 1.77815125,
1.698970004, 1.414973348, 1.113943352, 1.113943352, 1.431353754,
1.255272505, 1.748188027, 1.982271233, 1.204119983, 1.73239376,
1.431363754, 1.361727835, 0.954242509, 0.698970004, 0.698970004,
0.77815125, 0.301029996, 0.77815125, 0.301029996, 0.477121255,
0.602059991, 0.477121255, 2.133538908, 1.851258349, 2.064457989,
1.447158031, 1.361727836, 1.880813592, 1.806179974, 1.991226076,
1.763427994, 1.414973348, 1.113943352, 1.113943352, 1.113943352,
1.255272505, 1.278753601, 1.380211242, 1.255272505, 1.230446921,
0.477121255, 1.361727835, 1.278753601, 0.954242509, 1.0411392685,
1.113943352, 1.301029996, 1.462397998, 1.462397998, 1.230448921,
1.301029995, 1.414973348, 1.851258349, 1.799340549, 1.72427587,
1.73239376, 1.301029996, 1.342422681, 1.255272505, 1.968482949,
1.698970004, 1.255272505, 1.079181246, 1.079181246, 1.491361694
), Diff = c(-0.255272505, 0.329058719, -0.338818557, -0.263241434,
-0.077121255, 0.544068044, 0.057991947, 0.085910629, -0.128666609,
0.139661993, -0.37161107, -0.62838893, 0.477121255, 0.176091259,
-0.21387982, 0.757947864, 0.191885527, -0.099131473, -0.011281011,
-0.054738789, -0.269772302, 0.191885526, -0.114719571, 0.275016563,
-1.005395032, 0.243038049, 0.15490196, 0.096910013, -0.119186408,
NA, -0.171292376, 0.273954718, -0.288513438, -0.17363066, -1.770852012,
0, 0, 0.301029996, -0.301029996, 1.041392685, -0.087150176, 0.577235408,
-0.355387658, 0.329058719, -0.182930683, 0.196294545, 0.110954516,
-0.042403849, -0.290034611, -0.522878746, -0.301029995, 0.477121254,
0, 0.492915522, -0.243038048, -0.028028724, -0.875061263, -0.301029996,
0, 1.531078917, -0.32735893, 0.070633618, 0.265310043, -0.038918066,
-0.660051938, -0.544068044, 1.176091259, -0.014723257, -0.064457989,
0.049218022, -0.405765346, 0.449969009, 0, -0.044203663, -0.015794267,
0.100115153, 0.509913768, -0.101873432, 0.073317972, -0.155504729,
-0.578578895, 0.384054231, -0.029289376, 0.395915322, -0.5202338,
-0.09017663, 0.076388346, -0.412180448, 0.383216752, -0.100670162,
0.240332155, 0.096910013, -0.337242168, 0.169751081, -0.490086232,
-0.087150176, -0.352182518, 0.778151251, 0.360151447, -0.594234653,
0.057991947, 0.503450193, -0.07410172, -1.33243846, 0.812913356,
-0.210853365, 1.079181246, -0.266267889, 0.355878663, -0.566732029,
-1.505149978, 0.903089987, 0.327358934, 0.301029996, -0.069080919,
0.117385599, -0.403692338, -0.574031268, 0.977723606, -1.278753601,
-0.301029996, 0, 0, 0, 0, 0.477121255, 0, 0.124938736, 0.176091259,
-0.176091259, 0, 0.397490009, -0.2218485, 0.06690679, 0.10914469,
0.22184875, 0.301029996, -0.079181206, 0.158362092, -0.442359149,
0.116505569, 0.403019535, -0.077165955, 0.221848749, -0.079181206,
-0.283996656, -0.301029996, 0, 0.317420412, -0.176091259, 0.492915522,
0.23483206, -0.77815125, 0.528273777, -0.301029996, -0.069635928,
-0.407485327, -0.255272505, 0, 0.079181246, -0.477121254, 0.477121254,
-0.477121254, 0.176091259, 0.124938736, -0.124938736, 1.656417653,
-0.282280559, 0.21319964, -0.617299958, -0.085430195, 0.5191085756,
-0.074533518, 0.185045102, -0.227798082, -0.348454546, -0.301029996,
0, 0, 0.141329153, 0.023481096, 0.101457641, -0.124938737, -0.024823584,
-0.753327666, 0.884606581, -0.082974235, -0.324511092, 0.087150176,
0.072550667, 0.187086644, 0.161368002, 0, -0.231949077, 0.070581075,
0.113903352, 0.436285001, -0.00519178, -0.075054679, 0.00811789,
-0.431363764, 0.041392685, -0.087150176, 0.713210444, -0.269512945,
-0.443697499, -0.176091259, 0, 0.412180448, -0.148939013)), class = "data.frame", row.names = c(NA,
-210L))
Code:
for(f in 1:11){
for(b in 1:5){
for (c in 1:5){
#To select test sets 1,2,3,4, and 5 years beyond the training set:
#Calculate the mean of abundance for the training set years.
Model1<-lm(mean~1, data=DD1)
#
Output2:
2 3 0.676209994477288 1.9365051784348e-09 4.44089209850063e-16
3 53 11.9236453578109 2.06371097988267e-09 1.13686837721616e-13
4 31 1.94583877614293 1.11022302462516e-15 1.99840144432528e-15
5 4 8.06660449042397 1.48071350736245e-08 3.19744231092045e-14
6 5 10.5321102149558 9.31706267692789e-10 1.4210854715202e-14
..
First, please see the time series graph of counts for different species and plots below.
library(ggplot2)
ggplot(DD, aes(Year, Count)) +
geom_point() +
geom_line() +
facet_grid(Plot ~ Species) +
scale_y_log10()
It is seen that there is no obvious trend which can be fitted by power or log-power function using nls.
Second, as I understand you are trying to use nls to predict outside the training data set. Usually it is not quite an effective to use least square models because of auto-correlated nature of time-series.
Third, the simplest prediction algorithm is Holt-Winters (see "dirty" implementation below). You can use as well a ton of other algorithms like ARIMA, exponential smoothing state space etc.
x <- ts(DD[DD$Species == "BG" & DD$Plot == "elq1a3", ]$LogCount)
m <- HoltWinters(x, gamma = FALSE)
library(forecast)
f <- forecast(m, 2)
plot(f, main = "elq1a3 at BG")
Fourth, about your algorithm in question, it throws
Error in qr.solve(QR.B, cc) : singular matrix 'a' in solve.
The reason is that in the first step of for-loop (at f = b = c = 1 DD2 data frame contains just one row. And executing
Model6<-nls(Diff~1+Count^T,start=list(T=1),trace=TRUE,algorithm ="plinear",data=DD2)
means that you are trying to fit a curve using only one data point, which is impossible.
However if you change f value in for-loop from 1:11 to 2:11 another error thrown:
Error in nls(Diff ~ 1 + Count^T, start = list(T = 1), trace = TRUE,
algorithm = "plinear", : step factor 0.000488281 reduced below
minFactor 0.000976562.
In this case you cannot use "naive" approach used by plinear algorithm with self-starting inital value and, e.g. nls.control(min.factor = 1e-5). You must feed all initial coefficients explicitely with default Gauss-Newton algorithm. Quite exausting, please try yourself :)

Broom::tidy error with dataframe of nnet::multinom models

I am generating multinom models using nnet, with a model fitted for each city in the dataset. When I attempt to use tidy with these models, I get the following error:
Error in probs[i, -1, drop = FALSE] : subscript out of bounds
However, if I produce a model for each City separately, and then use tidy I do not receive an error for any of the models. I am also able to use glace without an error.
What might be causing this error?
library(broom)
library(dplyr)
library(nnet)
dfstack <- structure(list(Var1 = c(73L, 71L, 66L, 75L, 96L, 98L, 98L, 65L,
75L, 74L, 71L, 98L, 100L, 87L, 78L, 50L, 73L, 82L, 70L, 70L,
31L, 34L, 32L, 100L, 100L, 100L, 54L, 51L, 36L, 48L, 66L, 60L,
59L, 72L, 76L, 90L, 85L, 76L, 55L, 53L, 42L, 54L, 54L, 10L, 34L,
18L, 6L, 16L, 63L, 41L, 68L, 55L, 52L, 57L, 64L, 61L, 68L, 44L,
33L, 19L, 38L, 54L, 44L, 87L, 100L, 100L, 63L, 75L, 76L, 100L,
100L, 64L, 95L, 90L, 99L, 98L, 87L, 62L, 62L, 88L, 79L, 85L),
Status = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("A",
"B", "C"), class = "factor"), City = structure(c(3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L,
3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = c("Denver", "Miami", "NYC"), class = "factor"),
ID = structure(c(52L, 63L, 74L, 77L, 78L, 79L, 80L, 81L,
82L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 64L,
31L, 42L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 32L, 1L, 12L,
23L, 25L, 26L, 27L, 28L, 29L, 30L, 2L, 3L, 4L, 5L, 65L, 66L,
67L, 68L, 69L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 6L,
7L, 8L, 9L, 10L, 11L, 13L, 70L, 71L, 72L, 73L, 75L, 76L,
41L, 43L, 44L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L,
24L), .Label = c("Denver1", "Denver10", "Denver11", "Denver12",
"Denver13", "Denver14", "Denver15", "Denver16", "Denver17",
"Denver18", "Denver19", "Denver2", "Denver20", "Denver21",
"Denver22", "Denver23", "Denver24", "Denver25", "Denver26",
"Denver27", "Denver28", "Denver29", "Denver3", "Denver30",
"Denver4", "Denver5", "Denver6", "Denver7", "Denver8", "Denver9",
"Miami1", "Miami10", "Miami11", "Miami12", "Miami13", "Miami14",
"Miami15", "Miami16", "Miami17", "Miami18", "Miami19", "Miami2",
"Miami20", "Miami21", "Miami3", "Miami4", "Miami5", "Miami6",
"Miami7", "Miami8", "Miami9", "NYC1", "NYC10", "NYC11", "NYC12",
"NYC13", "NYC14", "NYC15", "NYC16", "NYC17", "NYC18", "NYC19",
"NYC2", "NYC20", "NYC21", "NYC22", "NYC23", "NYC24", "NYC25",
"NYC26", "NYC27", "NYC28", "NYC29", "NYC3", "NYC30", "NYC31",
"NYC4", "NYC5", "NYC6", "NYC7", "NYC8", "NYC9"), class = "factor")), class = "data.frame", row.names = c(NA, -82L), .Names = c("Var1", "Status", "City", "ID"))
Model.List <- dfstack %>% group_by(City) %>% do(modfits = multinom(Status~Var1, data=.))
tidy(Model.List, modfits) # produces error
glance(Model.List, modfits) # no error
# no error when each city on its own
df1 <- dfstack %>% filter(City == "NYC") %>% do(modfit1 = multinom(Status~Var1, data=.))
tidy(df1, modfit1)
df2 <- dfstack %>% filter(City == "Miami") %>% do(modfit1 = multinom(Status~Var1, data=.))
tidy(df2, modfit1)
df3 <- dfstack %>% filter(City == "Denver") %>% do(modfit1 = multinom(Status~Var1, data=.))
tidy(df3, modfit1)
Don't ask me to explain why, but I figured it out.
tidy.multinom calls summary.multinom which calls vcov.multinom which calls multinomHess. The error was being generated down in multinomHess, which is only run when the Hessian matrix is not generated in the original call to multinom. That is to say, you don't necessarily need to spend the time calculating the Hessian matrix if you don't intend to use the summary object.
For some reason, when the multinom objects are formed within the do call, summary.multinom is unable to calculate the Hessian matrix. This can be circumvented by calling multinom with Hess = TRUE. See below:
Model.List <-
dfstack %>%
group_by(City) %>%
do(modfits = multinom(Status~Var1,
data=.,
Hess = TRUE))
tidy(Model.List, modfits)
glance(Model.List, modfits)
In your original code, glance did not cast an error because glance.multinom does not rely on summary.multinom.

dplyr n_distinct() in filter takes forever where as base length(unique()) works like charm

I have a data frame such as this:
structure(list(x = c(1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L,
6L, 6L, 7L, 7L, 8L, 8L, 9L, 9L, 10L, 10L, 11L, 11L, 12L, 12L,
13L, 13L, 14L, 14L, 15L, 15L, 16L, 16L, 17L, 17L, 18L, 18L, 19L,
19L, 20L, 20L, 21L, 21L, 22L, 22L, 23L, 23L, 24L, 24L, 25L, 25L,
26L, 26L, 27L, 27L, 28L, 28L, 29L, 29L, 30L, 30L, 31L, 31L, 32L,
32L, 33L, 33L, 34L, 34L, 35L, 35L, 36L, 36L, 37L, 37L, 38L, 38L,
39L, 39L, 40L, 40L, 41L, 41L, 42L, 42L, 43L, 43L, 44L, 44L, 45L,
45L, 46L, 46L, 47L, 47L, 48L, 48L, 49L, 49L, 50L, 50L, 51L, 51L,
52L, 52L, 53L, 53L, 54L, 54L, 55L, 55L, 56L, 56L, 57L, 57L, 58L,
58L, 59L, 59L, 60L, 60L, 61L, 61L, 62L, 62L, 63L, 63L, 64L, 64L,
65L, 65L, 66L, 66L, 67L, 67L, 68L, 68L, 69L, 69L, 70L, 70L, 71L,
71L, 72L, 72L, 73L, 73L, 74L, 74L, 75L, 75L, 76L, 76L, 77L, 77L,
78L, 78L, 79L, 79L, 80L, 80L, 81L, 81L, 82L, 82L, 83L, 83L, 84L,
84L, 85L, 85L, 86L, 86L, 87L, 87L, 88L, 88L, 89L, 89L, 90L, 90L,
91L, 91L, 92L, 92L, 93L, 93L, 94L, 94L, 95L, 95L, 96L, 96L, 97L,
97L, 98L, 98L, 99L, 99L, 100L, 100L), y = structure(c(1L, 2L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L,
2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L,
2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L,
1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L,
1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L,
1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 2L), .Label = c("one", "two"), class = "factor")), class = "data.frame", row.names = c(NA,
-200L), .Names = c("x", "y"))
I am trying to filter groups of x that have two distinct y values using:
library(dplyr)
df %>% group_by(x) %>% filter(n_distinct(y) > 1)
On a large data set, this almost never finishes.
Changing to this works reasonably fast for the full data set:
library(dplyr)
df %>% group_by(x) %>% filter(length(unique(y)) > 1)
Any idea why n_distinct() is super slow to never finishing?

Resources