Gtsummary output with mgcv gam - r

I have the following data set:
structure(list(Age = c(83L, 26L, 26L, 20L, 20L, 77L, 32L, 21L,
15L, 75L, 27L, 81L, 81L, 15L, 24L, 16L, 35L, 27L, 30L, 31L, 24L,
24L, 31L, 79L, 30L, 19L, 20L, 42L, 62L, 83L, 79L, 18L, 26L, 66L,
23L, 83L, 77L, 80L, 57L, 42L, 32L, 76L, 85L, 29L, 65L, 79L, 9L,
34L, 20L, 16L, 34L, 22L, 19L, 23L, 25L, 14L, 53L, 28L, 79L, 22L,
22L, 21L, 82L, 81L, 16L, 19L, 77L, 15L, 18L, 15L, 78L, 24L, 16L,
14L, 29L, 18L, 50L, 17L, 43L, 8L, 14L, 85L, 31L, 20L, 30L, 23L,
78L, 29L, 6L, 61L, 14L, 22L, 10L, 83L, 15L, 13L, 15L, 15L, 29L,
8L, 9L, 15L, 8L, 9L, 15L, 9L, 34L, 8L, 9L, 9L, 16L, 8L, 25L,
21L, 23L, 13L, 56L, 10L, 7L, 27L, 8L, 8L, 8L, 8L, 80L, 80L, 6L,
15L, 42L, 25L, 23L, 21L, 8L, 11L, 43L, 69L, 34L, 34L, 14L, 12L,
10L, 22L, 78L, 16L, 76L, 12L, 10L, 16L, 6L, 13L, 66L, 11L, 26L,
12L, 16L, 13L, 24L, 76L, 10L, 65L, 20L, 13L, 25L, 14L, 12L, 15L,
43L, 51L, 27L, 15L, 24L, 34L, 63L, 17L, 15L, 9L, 12L, 17L, 82L,
75L, 24L, 44L, 69L, 11L, 10L, 12L, 10L, 10L, 70L, 54L, 45L, 42L,
84L, 54L, 23L, 23L, 14L, 81L, 17L, 42L, 44L, 16L, 15L, 43L, 45L,
50L, 53L, 23L, 53L, 49L, 13L, 69L, 14L, 65L, 14L, 13L, 22L, 67L,
59L, 52L, 54L, 44L, 78L, 62L, 69L, 10L, 63L, 57L, 22L, 12L, 62L,
9L, 82L, 53L, 54L, 66L, 49L, 63L, 51L, 9L, 45L, 49L, 77L, 49L,
61L, 62L, 57L, 67L, 16L, 65L, 75L, 45L, 16L, 55L, 17L, 64L, 67L,
56L, 52L, 63L, 10L, 62L, 14L, 66L, 68L, 15L, 13L, 43L, 47L, 55L,
69L, 21L, 67L, 34L, 52L, 15L, 31L, 64L, 55L, 13L, 48L, 71L, 64L,
13L, 25L, 34L, 50L, 61L, 70L, 33L, 57L, 51L, 46L, 57L, 69L, 46L,
8L, 11L, 46L, 71L, 33L, 38L, 56L, 17L, 29L, 28L, 6L, 8L), Sex = structure(c(1L,
1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L,
1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L,
2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L,
2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L,
2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L,
2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L,
2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L,
2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L,
2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L,
2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L,
2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L,
1L, 2L, 2L), .Label = c("Male", "Female"), class = "factor"),
mean_AD_scaled = c(3.15891332561581, -0.0551328105526693,
0.582747640515478, 1.94179165777054, 1.7064645993306, 2.37250948563045,
1.015775832203, 1.36189033704266, -1.05640048650493, 0.184814975542474,
-0.143366705302007, 1.81560178585347, 2.06325078470728, -0.473088628698217,
0.414641167726219, 0.199887349084444, -0.60620959209809,
-0.17879228399189, -1.03483709078065, -1.43497010225613,
-0.958595084469815, 1.0203965598582, -1.44731404613503, -1.17191867788498,
-2.02547709312595, -1.22395687266857, -1.09952727795348,
-1.0830246791849, 1.21072653232248, 1.69997357714829, 1.53648783201423,
0.208688735094353, 0.0862394522314924, 1.08662698958276,
-0.731299290763917, 2.29307697689102, -0.660008064083659,
-1.21425334459264, 1.10191939777498, -2.0957781638801, -1.14947514355972,
0.248845058764562, 2.6526135953958, 0.197907037232212, -0.222469162066061,
1.92880961340592, 1.23328008397287, -1.17288683034607, -0.308282675662673,
-1.02603570477074, -1.32647101621898, -1.58316343919798,
-0.0440210607151585, -0.388375288352846, -0.935491446193807,
-0.63789458173376, 0.454577456746182, -1.77391147749773,
0.709267564407921, 0.125735671950958, -0.821073428064989,
-0.126534054558056, 0.519597695894384, 0.188005477971066,
0.212319306823438, -1.45807374053215, 1.5856655763446, -1.25641198358011,
-0.910847565366061, -1.1191763722206, 0.25300371365424, -0.750772357310844,
0.37932560636146, -0.871791414947088, -1.92771569802088,
-1.1752191976387, 0.210449012296334, -0.347778895382139,
-0.132254955464496, 0.953616043508016, -0.0862677135627232,
0.838977990728951, -1.8993092246739, -0.0254281327692267,
0.298022803094927, -1.21559555595915, 0.0134079829994995,
-0.763094297724715, 0.334768589686298, -1.12568939786794,
-2.11786964276497, -0.0434709740895377, 0.388237009696492,
1.30050066962355, -0.260645173884043, -0.60620959209809,
1.05945271027717, -0.275717547426008, -0.0238878902174922,
0.496604074943496, 0.534009965485611, -0.692903244295693,
-0.566933407028871, 0.125625654625835, -0.518305749324122,
1.79381835547894, -0.790708646330802, -0.227860010997131,
0.347420582075538, 0.784189362817269, -0.660118081408782,
1.29962053102256, -0.561652575422924, -0.710395998990384,
-1.29315777017148, -0.457356151205503, -1.01756437073621,
0.146528946399368, -1.07136284272178, -1.42968927065019,
0.798601632408495, -0.799730066990963, -0.431348055546223,
0.569545561500617, 2.32168148142323, 0.472070211440872, 1.65145593676866,
-0.814142336582189, -0.544489872703603, -0.315433801795725,
0.382626126115175, -0.623812364117908, 0.216279930527897,
-0.606099574772967, -0.367207954999011, 0.719829227619811,
-0.749122097433987, 0.934693063586709, -0.79026857703031,
-0.371872689584264, 0.0769979969210905, -0.793899148759394,
1.50414273842782, 0.730280873506577, -0.290569886317732,
0.303743704001367, 0.390877425499463, -1.00359217044547,
-0.534918365417827, 0.325967203676389, 0.129036191704673,
0.34434009697207, -0.141386393449775, -0.363401355549725,
-0.395416397160769, -0.0235578382421178, -1.13583299524436,
1.16781977552417, -1.31890182425046, 0.139377820266317, 0.0160483988024708,
0.481311666751279, -1.05475022662807, 0.839858129329941,
0.652498624644007, -0.350199276534864, -0.262075399110649,
0.178543988010412, -1.13198238886502, -0.05117218684821,
-1.29678834190056, 0.429603523943066, 1.05098137624263, -0.956504755292464,
0.502765045150433, -0.81678275238516, -1.50263075720731,
-0.826684311646306, 2.40100397283753, 2.06633126981075, -0.470558230220369,
0.484942238480364, 0.822035322659877, 0.143888530596397,
0.384056351341786, -0.63580425255641, 0.358422314587926,
-0.372422776209885, 0.0607154328027556, -0.113221958218067,
1.02710761669075, -0.349649189909243, 2.27195365046724, -0.507634068787109,
-0.326105482332738, -1.0396778530861, 1.06484355920824, 1.32151397872221,
-0.185173288849074, -0.651888785489516, -0.171311105883464,
-0.104200537557911, -0.693673365571561, -1.26609350819101,
0.411230630647381, -0.929770545287362, -0.481009876107135,
0.386146680519137, 0.0482834750637615, -0.198265350538812,
0.790020281048832, 0.926001694901924, -1.08918564939184,
0.50298507980068, -0.0694350628187722, 1.04966116834114,
0.00878725534429612, 1.48742010500899, 0.750194009353997,
0.423772605711498, -0.596418050162068, -0.652636903300361,
-0.308942779613417, 0.314437388003408, 0.679562886624478,
-1.24312189070515, -0.432712270377761, 0.00427654501421597,
-0.197935298563442, 0.228821905592019, 1.06957430418856,
-1.61612462980509, 1.9499329398297, -0.263285589687014, 0.156430505660519,
-0.322254875953402, -0.451085163673446, -0.35526007349056,
0.10780284795577, 0.408700232169533, -0.957604928543701,
-1.05662052115517, 1.00345389178912, -0.238751726184391,
0.300003114947154, -0.397946795638617, -0.0802167606809086,
0.943714484246865, 1.10973062785877, 1.76279346979401, 1.62087112038423,
0.25533608094687, 0.226841593739787, 0.869672824438507, -1.44960240649761,
-0.450315042397579, -0.199629565370345, 0.29813282042005,
0.760425620590513, 1.87391096816911, -0.454275666102039,
-0.0559029318285365, -0.343048150401812, -1.01371376435687,
0.68880434193488, -0.29222014619459, 1.16132875334186, -1.95715633422403,
-0.534368278792206, -0.560112332871189, 1.84508642898666,
-1.19150176175703, -0.772203732244971, -0.3443683583033,
-1.45684154649076, -0.633823940704178, -1.77454957798344,
0.279539892474118, -0.875532004001301, 1.26001429397797,
-0.536590628759707, 2.1869102581465, 0.211109116247078, 0.130246382281038,
-0.355810160116181, -0.898085555651692, -0.429741802599415,
1.13360438741065, 1.61338994227581, 0.588688576072169, 0.454137387445685,
0.747113524250528, 0.460848444278238, -0.38177424884541,
-0.169990897981981, -0.747361820232001, -0.760123829946369,
0.208028631143609, -1.28748087619509, 2.33950428809329, -0.973029357526068,
-1.06091119683501, 0.917530360867389, -0.35041931118511,
-1.90613029883158, -1.15057531681095, 0.65348878057012, 0.43147381847017
)), row.names = c(NA, -308L), class = c("tbl_df", "tbl",
"data.frame"))
I am using this gam model:
m1 <- gam(mean_AD_scaled ~ s(Age, bs = 'ad', k = -1) + Sex + ti(Age, by = Sex, bs ='fs'),
data = DF,
method = 'REML',
family = gaussian)
Output:
Family: gaussian
Link function: identity
Formula:
mean_AD_scaled ~ s(Age, bs = "ad", k = -1) + Sex + ti(Age,
by = Sex, bs = "fs")
Parametric coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.04691 0.06976 0.672 0.502
SexFemale -0.12950 0.09428 -1.374 0.171
Approximate significance of smooth terms:
edf Ref.df F p-value
s(Age) 2.980 3.959 8.72 2.24e-06 ***
ti(Age):SexMale 2.391 2.873 23.47 < 2e-16 ***
ti(Age):SexFemale 1.000 1.000 43.40 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Rank: 48/49
R-sq.(adj) = 0.34 Deviance explained = 35.6%
-REML = 375.4 Scale est. = 0.63867 n = 308
But when I use gtsummary, I get a repeated value for each gender 'interaction':
tbl_regression(m1, tidy_fun = tidy_gam)
I see the following in a publication, which I am trying to replicate with gender and age:
I am not sure how to fix this. My goal is to print a table for a manuscript so any other gam-related information that can be added like edf and R^2.

I think you've found a bug in the handling of these types of interactions. While we work on a fix to the bug, this code should get you what you need. Thanks
library(gtsummary)
#> #BlackLivesMatter
library(mgcv)
packageVersion("gtsummary")
#> [1] ‘1.5.2’
m1 <- gam(marker ~ s(age, bs = 'ad', k = -1) + grade + ti(age, by = grade, bs ='fs'),
data = gtsummary::trial,
method = 'REML',
family = gaussian)
tbl_regression(m1, tidy_fun = gtsummary::tidy_gam) %>%
modify_table_body(
~ .x %>%
dplyr::select(-n_obs) %>%
dplyr::distinct()
) %>%
as_kable() # convert to kable to display on SO
Characteristic
Beta
95% CI
p-value
Grade
I
—
—
II
-0.39
-0.70, -0.08
0.014
III
-0.13
-0.43, 0.18
0.4
s(age)
>0.9
ti(age):gradeI
0.6
ti(age):gradeII
>0.9
ti(age):gradeIII
0.6
Created on 2022-02-21 by the reprex package (v2.0.1)

Related

R plotly multiple plots only show last figure

I would like to make an interactive graphs based on user input. However I'm struggle to make more than one graphs using R plotly. Suppose I have following data and codes,
dput(norwd5)
structure(list(LENGTH_OF_STAY = c(57L, 28L, 15L, 28L, 14L, 49L,
15L, 22L, 17L, 81L, 34L, 24L, 31L, 38L, 33L, 22L, 21L, 49L, 188L,
21L, 21L, 36L, 24L, 23L, 48L, 54L, 42L, 62L, 13L, 139L, 29L,
49L, 15L, 7L, 43L, 28L, 31L, 22L, 23L, 26L, 33L, 30L, 127L, 22L,
22L, 15L, 28L, 26L, 15L, 31L, 22L, 89L, 28L, 60L, 54L, 37L, 20L,
135L, 155L, 51L, 15L, 8L, 38L, 16L, 16L, 22L, 30L, 14L, 16L,
18L, 14L, 272L, 25L, 22L, 18L, 21L, 188L, 264L, 34L, 34L, 136L,
23L, 142L, 25L, 32L, 58L, 163L, 16L, 35L, 23L, 50L, 71L, 10L,
19L, 22L, 24L, 45L, 29L, 15L, 82L), PRE_OPERATIVE_LOS = c(2L,
2L, 3L, 1L, 3L, 6L, 3L, 7L, 2L, 2L, 11L, 2L, 6L, 3L, 6L, 3L,
5L, 3L, 179L, 2L, 5L, 3L, 4L, 2L, 5L, 6L, 2L, 4L, 2L, 6L, 3L,
2L, 2L, 6L, 6L, 1L, 4L, 5L, 6L, 5L, 0L, 4L, 6L, 2L, 4L, 4L, 7L,
4L, 4L, 6L, 2L, 4L, 3L, 3L, 2L, 6L, 4L, 110L, 63L, 6L, 4L, 7L,
5L, 1L, 6L, 1L, 4L, 2L, 6L, 3L, 2L, 8L, 2L, 2L, 4L, 3L, 6L, 171L,
5L, 4L, 116L, 6L, 47L, 3L, 7L, 3L, 60L, 1L, 3L, 20L, 31L, 49L,
9L, 8L, 3L, 4L, 35L, 7L, 4L, 9L), POST_OPERATIVE_LOS = c(55L,
26L, 12L, 27L, 11L, 43L, 12L, 15L, 15L, 79L, 23L, 22L, 25L, 35L,
27L, 19L, 16L, 46L, 9L, 19L, 16L, 33L, 20L, 21L, 43L, 48L, 40L,
58L, 11L, 133L, 26L, 47L, 13L, 1L, 37L, 27L, 27L, 17L, 17L, 21L,
33L, 26L, 121L, 20L, 18L, 11L, 21L, 22L, 11L, 25L, 20L, 85L,
25L, 57L, 52L, 31L, 16L, 25L, 92L, 45L, 11L, 1L, 33L, 15L, 10L,
21L, 26L, 12L, 10L, 15L, 12L, 264L, 23L, 20L, 14L, 18L, 182L,
93L, 29L, 30L, 20L, 17L, 95L, 22L, 25L, 55L, 103L, 15L, 32L,
3L, 19L, 22L, 1L, 11L, 19L, 20L, 10L, 22L, 11L, 73L), digoxin_any = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L,
2L, 1L, 2L), .Label = c("0:No", "1.Yes"), class = "factor")), row.names = c(NA,
-100L), class = c("data.table", "data.frame"), .internal.selfref = <pointer: 0x0000012f36b61ef0>)
num <- c('PRE_OPERATIVE_LOS','POST_OPERATIVE_LOS')
plist <- scan(text=num,what = "",quiet = T)
groups <- 'digoxin_any'
bygrp <- scan(text=groups,what="",quiet=T)
norwd5[, (bygrp) := lapply(.SD, as.factor), .SDcols = bygrp]
plotList = list()
for(i in length(plist)){
gplot <- ggplot(norwd5,aes_string(x=plist[i],group=bygrp,color=bygrp))+geom_histogram(aes(y=..density..),position = "dodge")+geom_density(alpha=.5) +theme(legend.position = "left")
plotList[[i]] <- plotly_build(gplot)
}
for(i in length(plist)){
print(plotList[[i]])
}
The goal is to show both graphs for PRE_OPERATIVE_LOS and POST_OPERATIVE_LOS. However, the codes above only show histogram for POST_OPERATIVE_LOS.
I checked maybe subplot is the way to go but how to make subplot work in a loop? Any hints?
Thanks!
There is an error in your first loop and calling each subplot won't make both appear at the same time.
First-- the issue with your first for call- when you wrote
for(i in length(plist))
You wrote for i in 2 or i == 2, meaning that you never looped. If you modify it to a range of values, now it's written: for i in 1 to 2.
for(i in 1:length(plist))
So you're aware, if you had written for(i in plist) it would have done both loops, but instead of a value, i would be the strings.
Okay, so now there are two graphs. From the plotly library, you can use the function subplot. You will want to turn the legend off for one of them, though.
subplot(plotList[[1]],
style(plotList[[2]], showlegend = FALSE))
If you wanted the outline color, that's more than okay! However, if you wanted to bars to be filled, you need to assign fill instead of color.
If you change color = bygrp to fill = bygrp, this is how this would change:
If you leave the color assignment and add fill = bygrp (so you have both), this is how this would change:

Nonlinear model convergence

I have a time series data set and each time series has datapoint of 30-year from different/same species. I am developing a forecasting model using the first 23 years of data from each time series data point and I am using the rest 7 years as test set to know the predictive ability of model but the nonlinear model (Model 6 and Model 7) don't give succinct result?
Data:
DD <- structure(list(Plot = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("A",
"B", "C", "D"), class = "factor"), Species = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("BD", "BG"), class = "factor"), Year = c(37L,
38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L,
51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L,
64L, 65L, 66L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L,
47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L,
60L, 61L, 62L, 63L, 64L, 65L, 66L, 37L, 38L, 39L, 40L, 41L, 42L,
43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L,
56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 37L, 38L,
39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L,
52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L,
65L, 66L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L,
48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L,
61L, 62L, 63L, 64L, 65L, 66L, 37L, 38L, 39L, 40L, 41L, 42L, 43L,
44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L,
57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 37L, 38L, 39L,
40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L,
53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L,
66L), Count = c(81L, 45L, 96L, 44L, 24L, 8L, 28L, 32L, 39L, 29L,
40L, 17L, 4L, 12L, 18L, 11L, 63L, 98L, 78L, 76L, 67L, 36L, 56L,
43L, 81L, 8L, 14L, 20L, 25L, 19L, 135L, 91L, 171L, 88L, 59L,
1L, 1L, 1L, 2L, 1L, 11L, 9L, 34L, 15L, 32L, 21L, 33L, 43L, 39L,
20L, 6L, 3L, 9L, 9L, 28L, 16L, 15L, 2L, 1L, 1L, 34L, 16L, 19L,
35L, 32L, 7L, 2L, 30L, 29L, 25L, 28L, 11L, 31L, 31L, 28L, 27L,
34L, 110L, 87L, 103L, 72L, 19L, 46L, 43L, 107L, 32L, 26L, 31L,
12L, 29L, 23L, 40L, 50L, 23L, 34L, 11L, 9L, 4L, 24L, 55L, 14L,
16L, 51L, 43L, 2L, 13L, 8L, 96L, 52L, 118L, 32L, 1L, 8L, 17L,
34L, 29L, 38L, 15L, 4L, 38L, 2L, 1L, 1L, 1L, 1L, 1L, 3L, 3L,
4L, 6L, 4L, 4L, 10L, 6L, 7L, 9L, 15L, 30L, 25L, 36L, 13L, 17L,
43L, 36L, 60L, 50L, 26L, 13L, 13L, 27L, 18L, 56L, 96L, 16L, 54L,
2L, 2L, 9L, 5L, 5L, 6L, 2L, 6L, 2L, 3L, 4L, 3L, 136L, 71L, 116L,
28L, 23L, 76L, 64L, 98L, 58L, 26L, 13L, 13L, 13L, 18L, 19L, 24L,
18L, 17L, 3L, 23L, 19L, 9L, 11L, 13L, 20L, 29L, 29L, 17L, 20L,
26L, 71L, 63L, 53L, 54L, 20L, 22L, 18L, 93L, 50L, 18L, 12L, 12L,
31L), LogCount = c(1.908385019, 1.653212514, 1.982271233, 1.643462676,
1.380211242, 0.903089987, 1.447158031, 1.505109978, 1.591064607,
1.462397998, 1.602059991, 1.230448921, 0.602059991, 1.079181206,
1.255272505, 1.041392685, 1.799340549, 1.991226076, 1.892094603,
1.880813592, 1.826074803, 1.556302501, 1.748188027, 1.633468456,
1.908485019, 0.903089987, 1.146128035, 1.301029996, 1.397940009,
1.278753601, 2.130333768, 1.95904139, 2.2329961, 1.94448267,
1.770852012, 0, 0, 0, 0.30102999, 0, 1.0411392685, 0.954242509,
1.531478917, 1.176031259, 1.505149978, 1.322219295, 1.51851394,
1.6334684456, 1.591064607, 1.301029996, 0.77815125, 0.477121255,
0.954242509, 0.954242509, 1.447158031, 1.204119983, 1.176091259,
0.301029996, 0, 0, 1.531478917, 1.204119983, 1.278753501, 1.544068044,
1.505149978, 0.084509804, 0.301029996, 1.477121255, 1.462397998,
1.397940009, 1.447158031, 1.041392685, 1.491361694, 1.491361694,
1.447158031, 1.431363754, 1.531478917, 2.041392685, 1.939519253,
2.012837225, 1.857332495, 1.278753601, 1.662757382, 1.633468456,
2.029383778, 1.505149978, 1.414973348, 1.491361594, 1.079181245,
1.462397998, 1.361727835, 1.602059991, 1.698970004, 1.361727836,
1.531478917, 1.041392685, 0.954242509, 0.602059991, 1.380211242,
1.740362689, 1.146128036, 1.204119983, 1.707570176, 1.633468456,
0.301029996, 1.113943352, 0.903089987, 1.982271233, 1.716003344,
2.071882007, 1.50514997, 0, 0.903089987, 1.230448921, 1.53147891,
1.2397998, 1.57978359, 1.176091259, 0.602059991, 1.57978359,
0.301029996, 0, 0, 0, 0, 0, 0.477121255, 0.477121255, 0.602059991,
0.77815125, 0.602059991, 0.602059991, 1, 0.77815125, 0.84509804,
0.95424509, 1.176091259, 1.477121255, 1.39790009, 1.555302501,
1.113943352, 1.230448921, 1.633468456, 1.555302501, 1.77815125,
1.698970004, 1.414973348, 1.113943352, 1.113943352, 1.431353754,
1.255272505, 1.748188027, 1.982271233, 1.204119983, 1.73239376,
1.431363754, 1.361727835, 0.954242509, 0.698970004, 0.698970004,
0.77815125, 0.301029996, 0.77815125, 0.301029996, 0.477121255,
0.602059991, 0.477121255, 2.133538908, 1.851258349, 2.064457989,
1.447158031, 1.361727836, 1.880813592, 1.806179974, 1.991226076,
1.763427994, 1.414973348, 1.113943352, 1.113943352, 1.113943352,
1.255272505, 1.278753601, 1.380211242, 1.255272505, 1.230446921,
0.477121255, 1.361727835, 1.278753601, 0.954242509, 1.0411392685,
1.113943352, 1.301029996, 1.462397998, 1.462397998, 1.230448921,
1.301029995, 1.414973348, 1.851258349, 1.799340549, 1.72427587,
1.73239376, 1.301029996, 1.342422681, 1.255272505, 1.968482949,
1.698970004, 1.255272505, 1.079181246, 1.079181246, 1.491361694
), Diff = c(-0.255272505, 0.329058719, -0.338818557, -0.263241434,
-0.077121255, 0.544068044, 0.057991947, 0.085910629, -0.128666609,
0.139661993, -0.37161107, -0.62838893, 0.477121255, 0.176091259,
-0.21387982, 0.757947864, 0.191885527, -0.099131473, -0.011281011,
-0.054738789, -0.269772302, 0.191885526, -0.114719571, 0.275016563,
-1.005395032, 0.243038049, 0.15490196, 0.096910013, -0.119186408,
NA, -0.171292376, 0.273954718, -0.288513438, -0.17363066, -1.770852012,
0, 0, 0.301029996, -0.301029996, 1.041392685, -0.087150176, 0.577235408,
-0.355387658, 0.329058719, -0.182930683, 0.196294545, 0.110954516,
-0.042403849, -0.290034611, -0.522878746, -0.301029995, 0.477121254,
0, 0.492915522, -0.243038048, -0.028028724, -0.875061263, -0.301029996,
0, 1.531078917, -0.32735893, 0.070633618, 0.265310043, -0.038918066,
-0.660051938, -0.544068044, 1.176091259, -0.014723257, -0.064457989,
0.049218022, -0.405765346, 0.449969009, 0, -0.044203663, -0.015794267,
0.100115153, 0.509913768, -0.101873432, 0.073317972, -0.155504729,
-0.578578895, 0.384054231, -0.029289376, 0.395915322, -0.5202338,
-0.09017663, 0.076388346, -0.412180448, 0.383216752, -0.100670162,
0.240332155, 0.096910013, -0.337242168, 0.169751081, -0.490086232,
-0.087150176, -0.352182518, 0.778151251, 0.360151447, -0.594234653,
0.057991947, 0.503450193, -0.07410172, -1.33243846, 0.812913356,
-0.210853365, 1.079181246, -0.266267889, 0.355878663, -0.566732029,
-1.505149978, 0.903089987, 0.327358934, 0.301029996, -0.069080919,
0.117385599, -0.403692338, -0.574031268, 0.977723606, -1.278753601,
-0.301029996, 0, 0, 0, 0, 0.477121255, 0, 0.124938736, 0.176091259,
-0.176091259, 0, 0.397490009, -0.2218485, 0.06690679, 0.10914469,
0.22184875, 0.301029996, -0.079181206, 0.158362092, -0.442359149,
0.116505569, 0.403019535, -0.077165955, 0.221848749, -0.079181206,
-0.283996656, -0.301029996, 0, 0.317420412, -0.176091259, 0.492915522,
0.23483206, -0.77815125, 0.528273777, -0.301029996, -0.069635928,
-0.407485327, -0.255272505, 0, 0.079181246, -0.477121254, 0.477121254,
-0.477121254, 0.176091259, 0.124938736, -0.124938736, 1.656417653,
-0.282280559, 0.21319964, -0.617299958, -0.085430195, 0.5191085756,
-0.074533518, 0.185045102, -0.227798082, -0.348454546, -0.301029996,
0, 0, 0.141329153, 0.023481096, 0.101457641, -0.124938737, -0.024823584,
-0.753327666, 0.884606581, -0.082974235, -0.324511092, 0.087150176,
0.072550667, 0.187086644, 0.161368002, 0, -0.231949077, 0.070581075,
0.113903352, 0.436285001, -0.00519178, -0.075054679, 0.00811789,
-0.431363764, 0.041392685, -0.087150176, 0.713210444, -0.269512945,
-0.443697499, -0.176091259, 0, 0.412180448, -0.148939013)), class = "data.frame", row.names = c(NA,
-210L))
Code:
for(f in 1:11){
for(b in 1:5){
for (c in 1:5){
#To select test sets 1,2,3,4, and 5 years beyond the training set:
#Calculate the mean of abundance for the training set years.
Model1<-lm(mean~1, data=DD1)
#
Output2:
2 3 0.676209994477288 1.9365051784348e-09 4.44089209850063e-16
3 53 11.9236453578109 2.06371097988267e-09 1.13686837721616e-13
4 31 1.94583877614293 1.11022302462516e-15 1.99840144432528e-15
5 4 8.06660449042397 1.48071350736245e-08 3.19744231092045e-14
6 5 10.5321102149558 9.31706267692789e-10 1.4210854715202e-14
..
First, please see the time series graph of counts for different species and plots below.
library(ggplot2)
ggplot(DD, aes(Year, Count)) +
geom_point() +
geom_line() +
facet_grid(Plot ~ Species) +
scale_y_log10()
It is seen that there is no obvious trend which can be fitted by power or log-power function using nls.
Second, as I understand you are trying to use nls to predict outside the training data set. Usually it is not quite an effective to use least square models because of auto-correlated nature of time-series.
Third, the simplest prediction algorithm is Holt-Winters (see "dirty" implementation below). You can use as well a ton of other algorithms like ARIMA, exponential smoothing state space etc.
x <- ts(DD[DD$Species == "BG" & DD$Plot == "elq1a3", ]$LogCount)
m <- HoltWinters(x, gamma = FALSE)
library(forecast)
f <- forecast(m, 2)
plot(f, main = "elq1a3 at BG")
Fourth, about your algorithm in question, it throws
Error in qr.solve(QR.B, cc) : singular matrix 'a' in solve.
The reason is that in the first step of for-loop (at f = b = c = 1 DD2 data frame contains just one row. And executing
Model6<-nls(Diff~1+Count^T,start=list(T=1),trace=TRUE,algorithm ="plinear",data=DD2)
means that you are trying to fit a curve using only one data point, which is impossible.
However if you change f value in for-loop from 1:11 to 2:11 another error thrown:
Error in nls(Diff ~ 1 + Count^T, start = list(T = 1), trace = TRUE,
algorithm = "plinear", : step factor 0.000488281 reduced below
minFactor 0.000976562.
In this case you cannot use "naive" approach used by plinear algorithm with self-starting inital value and, e.g. nls.control(min.factor = 1e-5). You must feed all initial coefficients explicitely with default Gauss-Newton algorithm. Quite exausting, please try yourself :)

Use rbind() in nested for loop with apply() in r

How can you use rbind in a for loop that runs through a list of dataframes? I tried to follow Looping through list of data frames in R but receive the following:
Error in apply(dataFramesList, 2, function(x) { :
dim(X) must have a positive length
I have two dataframes, dfTraining and dfAccuracy (code to reproduce dataframes is below), and need to add a row for any of the crop types missing from either of two columns, CROP or CROP_LABEL. I believe my problem is in my last line of code.
My code block is:
dataFramesList <- list(dfTraining, dfAccuracy)
apply(dataFramesList, 2, function(x){
cropNumbers <- seq(1,23, by = 1)
cropNumbers <- cropNumbers[-c(3)]
cropNumbers <- append(cropNumbers, 34)
listofCROPandCROP_LABELColumns <- list(dataFrameList$CROP, dataFrameList$CROP_LABEL)
missingCROP <- NULL
for (i in listofCROPandCROP_LABELColumns){
for (j in cropNumbers){
if (!j %in% i){
# If crop number is missing from CROP_LABEL, add missingCROP observation (row)
# Make row for missing crop type
missingCrop <- list(FREQUENCY = 0, AA = 1, CROP = j, CROP_LABEL = j, ACRES = 0)
dataFrameList <- rbind(dataFrameList, missingCrop)
}
}
}
})
My dfAccuracy dataframe:
structure(list(FREQUENCY = c(4L, 2L, 1L, 1L, 1L, 1L, 65L, 1L,
1L, 4L, 1L, 5L, 5L, 2L, 4L, 1L, 1L, 1L, 1L, 4L, 9L, 2L, 1L, 1L,
1L, 2L, 4L, 1L, 2L, 18L, 1L, 10L, 3L, 1L, 7L, 1L, 1L, 1L, 3L,
1L, 7L, 1L), AA = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L),
CROP = c(1L, 4L, 12L, 13L, 14L, 18L, 1L, 1L, 1L, 1L, 1L,
4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 7L, 13L,
13L, 13L, 13L, 14L, 14L, 14L, 18L, 18L, 18L, 18L, 18L, 19L,
19L, 21L, 21L, 21L, 21L), CROP_LABEL = c(1L, 4L, 14L, 13L,
12L, 18L, 1L, 4L, 5L, 6L, 18L, 1L, 4L, 6L, 14L, 18L, 12L,
14L, 18L, 1L, 6L, 14L, 18L, 18L, 4L, 6L, 13L, 21L, 12L, 14L,
18L, 1L, 6L, 14L, 18L, 21L, 1L, 19L, 6L, 13L, 21L, 34L),
ACRES = c(331.737184484, 193.772138572, 26.48543619, 73.2696289437,
112.470306056, 66.6556450342, 3905.71121736, 24.9581079934,
39.9287379709, 259.662359273, 85.2786247851, 306.051491303,
368.342995232, 154.82030835, 265.754349805, 70.3722566979,
35.4066607701, 139.336463432, 58.4307705147, 251.070357093,
471.031628349, 150.965736858, 28.2780117926, 35.3426930108,
34.5730542194, 67.7383953308, 144.442123948, 33.2746560126,
69.4072817311, 1219.65459596, 92.4840910734, 582.983473317,
191.957841327, 35.708775262, 319.638682538, 60.6889287642,
82.6244195055, 36.2898952104, 267.422844756, 72.8352758659,
489.746546145, 65.5392893502)), row.names = c(25L, 26L, 27L,
29L, 30L, 31L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L, 68L, 69L,
70L, 71L, 72L, 73L, 74L, 75L, 76L, 77L, 78L, 79L, 80L, 81L, 82L,
83L, 84L, 85L, 86L, 87L, 88L, 89L, 90L, 91L, 92L, 93L, 94L, 95L
), class = "data.frame")
and my dfTraining dataframe is:
structure(list(FREQUENCY = c(7L, 1L, 1L, 4L, 2L, 6L, 1L, 107L,
1L, 21L, 1L, 1L, 1L, 2L, 1L, 19L, 3L, 1L, 1L, 12L, 1L, 2L, 32L,
2L, 2L, 29L, 2L, 18L, 1L), AA = c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L), CROP = c(1L, 1L, 4L, 4L, 12L, 13L, 21L,
1L, 1L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 7L, 12L, 13L, 14L, 14L,
14L, 18L, 18L, 18L, 19L, 21L, 34L), CROP_LABEL = c(1L, 4L, 1L,
4L, 12L, 13L, 21L, 1L, 6L, 4L, 6L, 1L, 5L, 14L, 18L, 6L, 14L,
1L, 12L, 13L, 1L, 6L, 14L, 6L, 14L, 18L, 19L, 21L, 34L), ACRES = c(624.940370218,
26.9188766351, 37.8773839813, 291.79294767, 140.949264214, 391.571023675,
44.5217011939, 6806.02216989, 72.7500299887, 1676.12121152, 14.8739557721,
67.0700291739, 59.7438207953, 82.6713019474, 75.62666152, 1370.78710769,
145.215281276, 41.7380537313, 66.5236760194, 679.91208779, 70.9661875374,
38.8514254734, 1749.63365551, 109.917242057, 79.7758083723, 1660.85759895,
96.8771921798, 1428.71888481, 69.473161379)), row.names = c(18L,
19L, 20L, 21L, 22L, 23L, 24L, 38L, 39L, 40L, 41L, 42L, 43L, 44L,
45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L,
58L, 59L), class = "data.frame")

Broom::tidy error with dataframe of nnet::multinom models

I am generating multinom models using nnet, with a model fitted for each city in the dataset. When I attempt to use tidy with these models, I get the following error:
Error in probs[i, -1, drop = FALSE] : subscript out of bounds
However, if I produce a model for each City separately, and then use tidy I do not receive an error for any of the models. I am also able to use glace without an error.
What might be causing this error?
library(broom)
library(dplyr)
library(nnet)
dfstack <- structure(list(Var1 = c(73L, 71L, 66L, 75L, 96L, 98L, 98L, 65L,
75L, 74L, 71L, 98L, 100L, 87L, 78L, 50L, 73L, 82L, 70L, 70L,
31L, 34L, 32L, 100L, 100L, 100L, 54L, 51L, 36L, 48L, 66L, 60L,
59L, 72L, 76L, 90L, 85L, 76L, 55L, 53L, 42L, 54L, 54L, 10L, 34L,
18L, 6L, 16L, 63L, 41L, 68L, 55L, 52L, 57L, 64L, 61L, 68L, 44L,
33L, 19L, 38L, 54L, 44L, 87L, 100L, 100L, 63L, 75L, 76L, 100L,
100L, 64L, 95L, 90L, 99L, 98L, 87L, 62L, 62L, 88L, 79L, 85L),
Status = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("A",
"B", "C"), class = "factor"), City = structure(c(3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L,
3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = c("Denver", "Miami", "NYC"), class = "factor"),
ID = structure(c(52L, 63L, 74L, 77L, 78L, 79L, 80L, 81L,
82L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 64L,
31L, 42L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 32L, 1L, 12L,
23L, 25L, 26L, 27L, 28L, 29L, 30L, 2L, 3L, 4L, 5L, 65L, 66L,
67L, 68L, 69L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 6L,
7L, 8L, 9L, 10L, 11L, 13L, 70L, 71L, 72L, 73L, 75L, 76L,
41L, 43L, 44L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L,
24L), .Label = c("Denver1", "Denver10", "Denver11", "Denver12",
"Denver13", "Denver14", "Denver15", "Denver16", "Denver17",
"Denver18", "Denver19", "Denver2", "Denver20", "Denver21",
"Denver22", "Denver23", "Denver24", "Denver25", "Denver26",
"Denver27", "Denver28", "Denver29", "Denver3", "Denver30",
"Denver4", "Denver5", "Denver6", "Denver7", "Denver8", "Denver9",
"Miami1", "Miami10", "Miami11", "Miami12", "Miami13", "Miami14",
"Miami15", "Miami16", "Miami17", "Miami18", "Miami19", "Miami2",
"Miami20", "Miami21", "Miami3", "Miami4", "Miami5", "Miami6",
"Miami7", "Miami8", "Miami9", "NYC1", "NYC10", "NYC11", "NYC12",
"NYC13", "NYC14", "NYC15", "NYC16", "NYC17", "NYC18", "NYC19",
"NYC2", "NYC20", "NYC21", "NYC22", "NYC23", "NYC24", "NYC25",
"NYC26", "NYC27", "NYC28", "NYC29", "NYC3", "NYC30", "NYC31",
"NYC4", "NYC5", "NYC6", "NYC7", "NYC8", "NYC9"), class = "factor")), class = "data.frame", row.names = c(NA, -82L), .Names = c("Var1", "Status", "City", "ID"))
Model.List <- dfstack %>% group_by(City) %>% do(modfits = multinom(Status~Var1, data=.))
tidy(Model.List, modfits) # produces error
glance(Model.List, modfits) # no error
# no error when each city on its own
df1 <- dfstack %>% filter(City == "NYC") %>% do(modfit1 = multinom(Status~Var1, data=.))
tidy(df1, modfit1)
df2 <- dfstack %>% filter(City == "Miami") %>% do(modfit1 = multinom(Status~Var1, data=.))
tidy(df2, modfit1)
df3 <- dfstack %>% filter(City == "Denver") %>% do(modfit1 = multinom(Status~Var1, data=.))
tidy(df3, modfit1)
Don't ask me to explain why, but I figured it out.
tidy.multinom calls summary.multinom which calls vcov.multinom which calls multinomHess. The error was being generated down in multinomHess, which is only run when the Hessian matrix is not generated in the original call to multinom. That is to say, you don't necessarily need to spend the time calculating the Hessian matrix if you don't intend to use the summary object.
For some reason, when the multinom objects are formed within the do call, summary.multinom is unable to calculate the Hessian matrix. This can be circumvented by calling multinom with Hess = TRUE. See below:
Model.List <-
dfstack %>%
group_by(City) %>%
do(modfits = multinom(Status~Var1,
data=.,
Hess = TRUE))
tidy(Model.List, modfits)
glance(Model.List, modfits)
In your original code, glance did not cast an error because glance.multinom does not rely on summary.multinom.

dplyr n_distinct() in filter takes forever where as base length(unique()) works like charm

I have a data frame such as this:
structure(list(x = c(1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L,
6L, 6L, 7L, 7L, 8L, 8L, 9L, 9L, 10L, 10L, 11L, 11L, 12L, 12L,
13L, 13L, 14L, 14L, 15L, 15L, 16L, 16L, 17L, 17L, 18L, 18L, 19L,
19L, 20L, 20L, 21L, 21L, 22L, 22L, 23L, 23L, 24L, 24L, 25L, 25L,
26L, 26L, 27L, 27L, 28L, 28L, 29L, 29L, 30L, 30L, 31L, 31L, 32L,
32L, 33L, 33L, 34L, 34L, 35L, 35L, 36L, 36L, 37L, 37L, 38L, 38L,
39L, 39L, 40L, 40L, 41L, 41L, 42L, 42L, 43L, 43L, 44L, 44L, 45L,
45L, 46L, 46L, 47L, 47L, 48L, 48L, 49L, 49L, 50L, 50L, 51L, 51L,
52L, 52L, 53L, 53L, 54L, 54L, 55L, 55L, 56L, 56L, 57L, 57L, 58L,
58L, 59L, 59L, 60L, 60L, 61L, 61L, 62L, 62L, 63L, 63L, 64L, 64L,
65L, 65L, 66L, 66L, 67L, 67L, 68L, 68L, 69L, 69L, 70L, 70L, 71L,
71L, 72L, 72L, 73L, 73L, 74L, 74L, 75L, 75L, 76L, 76L, 77L, 77L,
78L, 78L, 79L, 79L, 80L, 80L, 81L, 81L, 82L, 82L, 83L, 83L, 84L,
84L, 85L, 85L, 86L, 86L, 87L, 87L, 88L, 88L, 89L, 89L, 90L, 90L,
91L, 91L, 92L, 92L, 93L, 93L, 94L, 94L, 95L, 95L, 96L, 96L, 97L,
97L, 98L, 98L, 99L, 99L, 100L, 100L), y = structure(c(1L, 2L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L,
2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L,
2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L,
1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L,
1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L,
1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 2L), .Label = c("one", "two"), class = "factor")), class = "data.frame", row.names = c(NA,
-200L), .Names = c("x", "y"))
I am trying to filter groups of x that have two distinct y values using:
library(dplyr)
df %>% group_by(x) %>% filter(n_distinct(y) > 1)
On a large data set, this almost never finishes.
Changing to this works reasonably fast for the full data set:
library(dplyr)
df %>% group_by(x) %>% filter(length(unique(y)) > 1)
Any idea why n_distinct() is super slow to never finishing?

Resources