How to get the confidence interval after sum in R? - r
I have a dataset, which is on an event basis, like below,
Young<- structure(list(Year = c(2011L, 2011L, 2011L, 2011L, 2011L, 2011L,
2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L,
2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L,
2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L,
2012L, 2012L, 2012L, 2012L, 2012L, 2012L), Month = c(10L, 10L,
10L, 12L, 12L, 12L, 3L, 3L, 3L, 8L, 8L, 8L, 9L, 9L, 9L, 1L, 1L,
1L, 2L, 2L, 2L, 3L, 3L, 3L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L,
7L, 7L, 7L, 8L, 8L, 8L), Day = c(19L, 19L, 19L, 20L, 20L, 20L,
28L, 28L, 28L, 16L, 16L, 16L, 19L, 19L, 19L, 24L, 24L, 24L, 9L,
9L, 9L, 20L, 20L, 20L, 7L, 7L, 7L, 12L, 12L, 12L, 28L, 28L, 28L,
25L, 25L, 25L, 9L, 9L, 9L), Hour = c("11:00:00", "12:00:00",
"12:00:00", "16:00:00", "16:00:00", "16:00:00", "15:00:00", "16:00:00",
"16:00:00", "13:00:00", "13:00:00", "14:00:00", "13:00:00", "13:00:00",
"14:00:00", "15:00:00", "15:00:00", "16:00:00", "15:00:00", "15:00:00",
"15:00:00", "16:00:00", "16:00:00", "17:00:00", "14:00:00", "15:00:00",
"15:00:00", "16:00:00", "16:00:00", "16:00:00", "15:00:00", "15:00:00",
"16:00:00", "15:00:00", "16:00:00", "16:00:00", "14:00:00", "15:00:00",
"15:00:00"), Treatment = c("Control", "Control", "Control", "Control",
"Control", "Control", "Control", "Control", "Control", "Control",
"Control", "Control", "Control", "Control", "Control", "Control",
"Control", "Control", "Control", "Control", "Control", "Control",
"Control", "Control", "Control", "Control", "Control", "Control",
"Control", "Control", "Control", "Control", "Control", "Control",
"Control", "Control", "Control", "Control", "Control"), Age = c("Young",
"Young", "Young", "Young", "Young", "Young", "Young", "Young",
"Young", "Young", "Young", "Young", "Young", "Young", "Young",
"Young", "Young", "Young", "Young", "Young", "Young", "Young",
"Young", "Young", "Young", "Young", "Young", "Young", "Young",
"Young", "Young", "Young", "Young", "Young", "Young", "Young",
"Young", "Young", "Young"), Plot = c(2L, 4L, 3L, 3L, 2L, 4L,
3L, 4L, 2L, 4L, 2L, 3L, 3L, 2L, 4L, 4L, 2L, 3L, 2L, 4L, 3L, 2L,
4L, 3L, 2L, 4L, 3L, 3L, 4L, 2L, 4L, 2L, 3L, 2L, 4L, 3L, 3L, 2L,
4L), CutUncut = c("Uncut", "Uncut", "Uncut", "Uncut", "Uncut",
"Uncut", "Uncut", "Uncut", "Uncut", "Uncut", "Uncut", "Uncut",
"Uncut", "Uncut", "Uncut", "Uncut", "Uncut", "Uncut", "Uncut",
"Uncut", "Uncut", "Uncut", "Uncut", "Uncut", "Uncut", "Uncut",
"Uncut", "Uncut", "Uncut", "Uncut", "Uncut", "Uncut", "Uncut",
"Uncut", "Uncut", "Uncut", "Uncut", "Uncut", "Uncut"), DOY = c(293L,
293L, 293L, 355L, 355L, 355L, 88L, 88L, 88L, 229L, 229L, 229L,
263L, 263L, 263L, 24L, 24L, 24L, 40L, 40L, 40L, 80L, 80L, 80L,
128L, 128L, 128L, 164L, 164L, 164L, 180L, 180L, 180L, 207L, 207L,
207L, 222L, 222L, 222L), Season = c("Autumn", "Autumn", "Autumn",
"Winter", "Winter", "Winter", "Spring", "Spring", "Spring", "Summer",
"Summer", "Summer", "Autumn", "Autumn", "Autumn", "Winter", "Winter",
"Winter", "Winter", "Winter", "Winter", "Spring", "Spring", "Spring",
"Spring", "Spring", "Spring", "Summer", "Summer", "Summer", "Summer",
"Summer", "Summer", "Summer", "Summer", "Summer", "Summer", "Summer",
"Summer"), ParNEE = c(290.7248731, 599.7403381, 620.7083338,
32.78885425, 62.01252568, 45.52391483, 1071.670139, 1093.367386,
800.6788483, 419.0408238, 412.778134, 1327.117535, 839.7914373,
644.0409088, 279.2245603, 191.8809568, 198.8807589, 140.3205729,
467.648241, 165.6642216, 96.16016992, 762.2635568, 540.4431615,
785.3492862, 1732.119114, 1129.006272, 1057.400471, 600.8070811,
494.626709, 716.4676621, 874.3964178, 766.3067725, 722.804594,
1841.223679, 1857.914923, 1851.643175, 1885.19335, 1874.080518,
1886.155761), TsoilNEE = c(8.20641798, 8.87167614, 9.25483531,
5.35562467, 5.001391598, 5.142751522, 5.447442626, 2.774806172,
5.73145326, 16.39260095, 16.63689439, 16.00032251, 13.80826586,
13.70293495, 14.05993565, -148.3114899, -125.1212398, -39.50172764,
-0.153912547, 0.250569622, -0.180588761, 7.026564873, 6.613643378,
6.750276902, 10.9441073, 9.815122309, 10.96556178, 13.72630399,
13.91668339, 14.75620705, 16.07427164, 17.02269846, 15.89705194,
17.48706912, 18.1624207, 17.27015855, 15.88739078, -313.4308453,
-200.2484216), TairNEE = c(11.84885994, 12.25500113, 13.33651023,
9.929997869, 9.802112232, 9.682167101, 22.46247535, 25.07250631,
25.27596815, 27.62653943, 29.58273819, 29.58725575, 24.72666853,
24.55716774, 20.72507946, 9.805187988, 9.747996729, 9.705873269,
4.765671568, 3.773874476, 3.891349753, 20.38603849, 20.30943844,
20.69971814, 22.20306747, 24.75430113, 24.77324059, 23.4390849,
23.41247396, 23.06330721, 35.67870836, 34.35199338, 33.98895943,
40.66750538, 37.41804298, 37.19870659, 29.66119092, 31.58723976,
30.10279262), FluxNEE = c(-3.206353246, -4.163998348, -9.420162768,
0.037262928, -1.336588509, -0.224057957, -0.508366229, -0.303823561,
0.451989541, -4.629461578, -4.128336306, -19.31431488, -17.41555463,
-10.76842377, -3.773743494, -4.005118396, -3.10139429, -3.896511575,
-0.230811062, -0.387848425, 0.106525664, -2.093040652, -2.140873245,
-3.342366374, -4.283554855, -3.90440069, -6.694529951, -10.79442231,
-7.57943893, -8.668064652, -9.796745843, -9.068490841, -11.23867962,
-2.881690108, -6.139469398, -11.55337646, -17.73289009, -11.81309017,
-10.19259206), ParER = c(0.079582144, 0.078538246, 0.076666822,
0.075928068, 0.071107485, 0.067917311, 0.079550601, 0.079545707,
0.076062537, 0.066680977, 0.044227931, 0.064819571, 0.084869874,
0.085468009, 0.073554044, 0.078895503, 0.070483716, 0.079795927,
0.071635126, 0.074950603, 0.085683638, 0.067656389, 0.061385558,
0.063857453, 0.071950763, 0.09015295, 0.077798096, 0.05698102,
0.075428953, 0.048109063, 0.06919603, 0.068823704, 0.082922817,
1843.888325, 1855.967312, 1856.078984, 1887.559762, 1877.441029,
1881.282226), TsoilER = c(8.212540539, 8.913773301, 9.37379959,
5.352583624, 5.003265737, 5.129246633, 3.585953441, 4.227443559,
6.270562446, 16.35403573, 16.89910109, 15.72318925, 13.51008097,
13.64622863, 14.01804184, -136.13081, -112.691455, -29.0108879,
-0.250624887, 0.234876446, -0.192253951, 7.242462111, 6.581003774,
7.304472456, 10.73760849, 9.970181595, 11.00729429, 13.62692044,
13.94769746, 14.73160985, 16.18365114, 16.87261231, 15.8735923,
17.35071047, 18.44087514, 16.87495921, 15.93538216, -217.3594646,
18.1232224), TairER = c(11.52528319, 12.64744211, 13.63468403,
9.899656846, 9.727256358, 9.639747971, 24.83383448, 25.18722803,
25.49742845, 27.23686553, 28.59409676, 29.95920761, 24.58571143,
24.69913034, 20.61829429, 9.669081306, 9.732249093, 9.544825508,
5.041718517, 3.73260756, 3.753481235, 20.24927008, 20.38445081,
21.03628444, 22.96717296, 23.93519561, 24.59680342, 23.274129,
23.18265753, 23.30700413, 35.65016436, 34.33754475, 33.67091287,
40.44720666, 37.56765173, 37.26631998, 29.50898978, 31.78674575,
30.593999), FluxER = c(1.057273055, 1.131077804, 1.286326278,
0.933058574, 1.162736073, 1.056358927, 2.210981831, 1.401988009,
1.500512866, 5.052906165, 4.628101935, 6.530351267, 3.258632563,
2.530428826, 2.847726462, 1.041944383, 0.591929214, 0.888592631,
0.085761062, 0.137706908, 0.124532256, 1.102969996, 1.210864425,
1.550348555, 1.981517329, 1.691527815, 3.072262372, 4.667099959,
3.568560761, 3.798178884, 6.782221267, 5.772727381, 7.887602279,
7.772539297, 7.873300514, 10.72683275, 8.24859913, 5.51910253,
5.920298815), Photosynth = c(-4.263626301, -5.295076152, -10.706489046,
-0.895795646, -2.499324582, -1.280416884, -2.71934806, -1.70581157,
-1.048523325, -9.682367743, -8.756438241, -25.844666147, -20.674187193,
-13.298852596, -6.621469956, -5.047062779, -3.693323504, -4.785104206,
-0.316572124, -0.525555333, -0.018006592, -3.196010648, -3.35173767,
-4.892714929, -6.265072184, -5.595928505, -9.766792323, -15.461522269,
-11.147999691, -12.466243536, -16.57896711, -14.841218222, -19.126281899,
-10.654229405, -14.012769912, -22.28020921, -25.98148922, -17.3321927,
-16.112890875), DayNumber = c(4705L, 4705L, 4705L, 4767L, 4767L,
4767L, 4500L, 4500L, 4500L, 4641L, 4641L, 4641L, 4675L, 4675L,
4675L, 4802L, 4802L, 4802L, 4818L, 4818L, 4818L, 4858L, 4858L,
4858L, 4906L, 4906L, 4906L, 4942L, 4942L, 4942L, 4958L, 4958L,
4958L, 4985L, 4985L, 4985L, 5000L, 5000L, 5000L), Date = c("2011-10-19",
"2011-10-19", "2011-10-19", "2011-12-20", "2011-12-20", "2011-12-20",
"2011-03-28", "2011-03-28", "2011-03-28", "2011-08-16", "2011-08-16",
"2011-08-16", "2011-09-19", "2011-09-19", "2011-09-19", "2012-01-24",
"2012-01-24", "2012-01-24", "2012-02-09", "2012-02-09", "2012-02-09",
"2012-03-20", "2012-03-20", "2012-03-20", "2012-05-07", "2012-05-07",
"2012-05-07", "2012-06-12", "2012-06-12", "2012-06-12", "2012-06-28",
"2012-06-28", "2012-06-28", "2012-07-25", "2012-07-25", "2012-07-25",
"2012-08-09", "2012-08-09", "2012-08-09"), Location = c(19L,
21L, 20L, 20L, 19L, 21L, 20L, 21L, 19L, 21L, 19L, 20L, 20L, 19L,
21L, 21L, 19L, 20L, 19L, 21L, 20L, 19L, 21L, 20L, 19L, 21L, 20L,
20L, 21L, 19L, 21L, 19L, 20L, 19L, 21L, 20L, 20L, 19L, 21L),
MossBiomass = c(0.81122449, 0.870408163, 0.532653061, 0.532653061,
0.81122449, 0.870408163, 0.532653061, 0.870408163, 0.81122449,
0.870408163, 0.81122449, 0.532653061, 0.532653061, 0.81122449,
0.870408163, 0.870408163, 0.81122449, 0.532653061, 0.81122449,
0.870408163, 0.532653061, 0.81122449, 0.870408163, 0.532653061,
0.81122449, 0.870408163, 0.532653061, 0.532653061, 0.870408163,
0.81122449, 0.870408163, 0.81122449, 0.532653061, 0.81122449,
0.870408163, 0.532653061, 0.532653061, 0.81122449, 0.870408163
), CallunaBiomass = c(0.730612245, 0.766326531, 0.774489796,
0.774489796, 0.730612245, 0.766326531, 0.774489796, 0.766326531,
0.730612245, 0.766326531, 0.730612245, 0.774489796, 0.774489796,
0.730612245, 0.766326531, 0.766326531, 0.730612245, 0.774489796,
0.730612245, 0.766326531, 0.774489796, 0.730612245, 0.766326531,
0.774489796, 0.730612245, 0.766326531, 0.774489796, 0.774489796,
0.766326531, 0.730612245, 0.766326531, 0.730612245, 0.774489796,
0.730612245, 0.766326531, 0.774489796, 0.774489796, 0.730612245,
0.766326531), TotalBiomass = c(1.541836735, 1.636734694,
1.307142857, 1.307142857, 1.541836735, 1.636734694, 1.307142857,
1.636734694, 1.541836735, 1.636734694, 1.541836735, 1.307142857,
1.307142857, 1.541836735, 1.636734694, 1.636734694, 1.541836735,
1.307142857, 1.541836735, 1.636734694, 1.307142857, 1.541836735,
1.636734694, 1.307142857, 1.541836735, 1.636734694, 1.307142857,
1.307142857, 1.636734694, 1.541836735, 1.636734694, 1.541836735,
1.307142857, 1.541836735, 1.636734694, 1.307142857, 1.307142857,
1.541836735, 1.636734694), Efflux = c(NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA), SM = c(0.284347997493598, 0.284347997493598,
0.284347997493598, 0.245463893974316, 0.245463893974316,
0.245463893974316, 0.222652267105158, 0.222652267105158,
0.222652267105158, 0.252836852714222, 0.252836852714222,
0.252836852714222, 0.278406340793348, 0.278406340793348,
0.278406340793348, 0.259937125926954, 0.259937125926954,
0.259937125926954, 0.232801146590399, 0.232801146590399,
0.232801146590399, 0.227096876335852, 0.227096876335852,
0.227096876335852, 0.229079210892252, 0.229079210892252,
0.229079210892252, 0.230726906942308, 0.230726906942308,
0.230726906942308, 0.232011241533852, 0.232011241533852,
0.232011241533852, 0.220105334847203, 0.220105334847203,
0.220105334847203, 0.236470079582821, 0.236470079582821,
0.236470079582821), SoilTemp = c(9.950000286, 10.04999971,
10.04999971, 4.700000048, 4.700000048, 4.700000048, NA, NA,
NA, 15.75, 15.75, 16.10000038, 13.3499999, 13.3499999, 13.54999971,
4.200000048, 4.200000048, 4.299999952, 0.100000001, 0.100000001,
0.100000001, 6.700000048, 6.700000048, 6.75, 9.950000286,
10.25, 10.25, 14.19999981, 14.19999981, 14.19999981, 16.44999981,
16.44999981, 16.65000057, 17.19999981, 17.5, 17.5, 16.64999962,
16.80000019, 16.80000019), RelHumid = c(0.88, 0.84, 0.84,
0.86, 0.86, 0.86, 0.68, 0.68, 0.68, 0.6, 0.6, 0.54, 0.76,
0.76, 0.75, 0.93, 0.93, 0.94, 0.87, 0.87, 0.87, 0.62, 0.62,
0.64, 0.45, 0.43, 0.43, 0.74, 0.74, 0.74, 0.63, 0.63, 0.64,
0.43, 0.46, 0.46, 0.58, 0.56, 0.56), AirTemp = c(8.9, 10.4,
10.4, 6.6, 6.6, 6.6, 9.4, 9.7, 9.7, 20.9, 20.9, 21.2, 16.1,
16.1, 16.2, 4.9, 4.9, 4.8, 0.1, 0.1, 0.1, 13.4, 13.4, 12.9,
14, 14.7, 14.7, 17.2, 17.2, 17.2, 26.8, 26.8, 26.9, 30.3,
29.8, 29.8, 21.6, 21.6, 21.6), Solar = c(166.6, 272.8, 272.8,
12.8, 12.8, 12.8, 221.6, 258.2, 258.2, 578.6, 578.6, 525.5,
364.4, 364.4, 309.4, 111.7, 111.7, 73.2, 144.7, 144.7, 144.7,
368, 368, 227.1, 767.2, 651.9, 651.9, 375.4, 375.4, 375.4,
446.8, 446.8, 391.8, 796.5, 730.6, 730.6, 269.2, 787.4, 787.4
), PAR = c(300.3, 503.5, 503.5, 14.6, 14.6, 14.6, 419.3,
479.7, 479.7, 1100.5, 1100.5, 996.1, 703.1, 703.1, 607.9,
194.1, 194.1, 120.8, 263.7, 263.7, 263.7, 714.1, 714.1, 428.5,
1433.7, 1192, 1192, 705, 705, 705, 869.8, 869.8, 750.7, 1536.3,
1411.7, 1411.7, 540.2, 1534.4, 1534.4)), row.names = c(11L,
12L, 13L, 61L, 62L, 64L, 94L, 95L, 96L, 140L, 141L, 143L, 165L,
166L, 168L, 206L, 208L, 210L, 227L, 228L, 231L, 254L, 256L, 258L,
273L, 279L, 281L, 301L, 302L, 303L, 324L, 325L, 331L, 341L, 348L,
349L, 365L, 372L, 373L), class = "data.frame")
Then I use these data to fit a model
GPPyoung = nls(-Photosynth ~ (ParNEE*(a*SoilTemp))/(ParNEE+Pk),
data = Young,
start=list(a = 0.1,Pk=100), lower=0.0001, algorithm='port')
AIC(GPPyoung) # 201.99
coefficients(GPPyoung) #
summary(GPPyoung)#
confint(GPPyoung) # to get the 95% confident intervals for the perameters.
# Compare the observed vs predicted
Pk = coefficients(GPPyoung)['Pk']
a = coefficients(GPPyoung)['a']
Based on these data, I got the results of parameters Pk and a. Then I use this model to fit another dataset Meteo, which is an hourly dataset, to get the hourly results of the Photosynth. Finally, I used the sum() function to get the annual value of Photosynth.
But now my question is how to calculate the confidence interval or the uncertainty of the annual value of Photosynth. Becuase it's just a sum value not an average value at all. But we really want to know the uncertainty of the whole model.
Meteo$GPPyoung <- (Meteo$PAR*(a*Meteo$SoilTemp))/(Meteo$PAR+Pk)
Meteo<- structure(list(Date = c("1/06/2010 0:00", "1/06/2010 1:00", "1/06/2010 2:00",
"1/06/2010 3:00", "1/06/2010 4:00", "1/06/2010 5:00", "1/06/2010 6:00",
"1/06/2010 7:00", "1/06/2010 8:00", "1/06/2010 9:00", "1/06/2010 10:00",
"1/06/2010 11:00", "1/06/2010 12:00", "1/06/2010 13:00", "1/06/2010 14:00",
"1/06/2010 15:00", "1/06/2010 16:00", "1/06/2010 17:00", "1/06/2010 18:00",
"1/06/2010 19:00", "1/06/2010 20:00", "1/06/2010 21:00", "1/06/2010 22:00",
"1/06/2010 23:00", "2/06/2010 0:00", "2/06/2010 1:00", "2/06/2010 2:00",
"2/06/2010 3:00", "2/06/2010 4:00", "2/06/2010 5:00", "2/06/2010 6:00",
"2/06/2010 7:00", "2/06/2010 8:00", "2/06/2010 9:00", "2/06/2010 10:00",
"2/06/2010 11:00", "2/06/2010 12:00", "2/06/2010 13:00", "2/06/2010 14:00",
"2/06/2010 15:00", "2/06/2010 16:00", "2/06/2010 17:00", "2/06/2010 18:00",
"2/06/2010 19:00", "2/06/2010 20:00", "2/06/2010 21:00", "2/06/2010 22:00",
"2/06/2010 23:00", "3/06/2010 0:00", "3/06/2010 1:00"), Year = c(2010L,
2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L,
2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L,
2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L,
2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L,
2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L,
2010L, 2010L, 2010L, 2010L), Month = c(6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L), Day = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L), Hour = c(0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 0L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L,
15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 0L, 1L), RelHumid = c(0.95,
0.96, 0.97, 0.97, 0.97, 0.97, 0.97, 0.97, 0.98, 0.95, 0.82, 0.76,
0.7, 0.67, 0.62, 0.63, 0.59, 0.64, 0.65, 0.69, 0.73, 0.78, 0.84,
0.91, 0.94, 0.96, 0.96, 0.97, 0.97, 0.97, 0.97, 0.98, 0.93, 0.73,
0.65, 0.6, 0.52, 0.45, 0.42, 0.41, 0.42, 0.44, 0.46, 0.45, 0.43,
0.49, 0.62, 0.78, 0.9, 0.94), AirTemp = c(6.7, 6.2, 5.3, 4.2,
4, 3.4, 3.9, 5.7, 9.7, 11.6, 14, 15.2, 17.2, 18.2, 19.6, 19,
19.7, 19.1, 18.6, 17.6, 16.6, 15.9, 14.3, 13.3, 11.2, 9.2, 6.9,
5.7, 4.9, 4.1, 3.6, 6.9, 11.5, 14.7, 16.4, 18.3, 20.1, 21.4,
22.1, 22.8, 22.8, 22.8, 22.3, 21.6, 20, 17.9, 14.9, 10.6, 7,
5.8), Solar = c(1.8, 1.8, 1.8, 1.8, 1.8, 1.8, 12.8, 86.1, 212.4,
254.5, 375.4, 368, 629.9, 640.9, 705, 448.6, 465.1, 373.5, 234.4,
166.6, 102.5, 53.1, 12.8, 1.8, 1.8, 1.8, 1.8, 1.8, 1.8, 1.8,
14.6, 113.5, 238, 395.5, 529.2, 679.3, 792.8, 862.4, 884.4, 851.4,
785.5, 699.5, 554.8, 390, 227.1, 87.9, 12.8, 1.8, 1.8, 1.8),
PAR = c(0, 0, 0, 0, 0, 0, 14.6, 155.6, 391.8, 485.2, 734.3,
721.4, 1259.8, 1292.7, 1422.7, 869.8, 917.4, 734.3, 454.1,
311.3, 184.9, 86.1, 12.8, 0, 0, 0, 0, 0, 0, 0, 18.3, 197.8,
459.6, 791, 1073, 1355, 1618.7, 1759.6, 1779.8, 1730.3, 1571,
1386.1, 1080.3, 754.4, 448.6, 168.5, 22, 0, 0, 0), SoilTemp = c(10.23626137,
9.870399475, 9.595145226, 9.342157364, 9.180820465, 9.042314529,
9.019209862, 9.180820465, 9.549196243, 9.984869003, 10.41872025,
10.87355137, 11.3039856, 11.91307545, 12.81039906, 13.16781235,
13.3907938, 13.52444077, 13.25704098, 12.87747669, 12.56419373,
12.25024033, 11.9355793, 11.62017345, 11.23612499, 10.76001549,
10.28190517, 9.893303871, 9.61811161, 9.36518383, 9.203886032,
9.226944923, 9.572173119, 9.961985588, 10.32752895, 10.69184017,
11.19086361, 12.13794422, 13.3907938, 14.83355808, 16.04715157,
16.37726593, 15.95906639, 14.83355808, 13.90255451, 12.98920727,
12.3400116, 11.62017345, 10.94161892, 10.44150639), Hour1 = c("0:00:00",
"1:00:00", "2:00:00", "3:00:00", "4:00:00", "5:00:00", "6:00:00",
"7:00:00", "8:00:00", "9:00:00", "10:00:00", "11:00:00",
"12:00:00", "13:00:00", "14:00:00", "15:00:00", "16:00:00",
"17:00:00", "18:00:00", "19:00:00", "20:00:00", "21:00:00",
"22:00:00", "23:00:00", "0:00:00", "1:00:00", "2:00:00",
"3:00:00", "4:00:00", "5:00:00", "6:00:00", "7:00:00", "8:00:00",
"9:00:00", "10:00:00", "11:00:00", "12:00:00", "13:00:00",
"14:00:00", "15:00:00", "16:00:00", "17:00:00", "18:00:00",
"19:00:00", "20:00:00", "21:00:00", "22:00:00", "23:00:00",
"0:00:00", "1:00:00"), DayNumber = c(4200, 4200, 4200, 4200,
4200, 4200, 4200, 4200, 4200, 4200, 4200, 4200, 4200, 4200,
4200, 4200, 4200, 4200, 4200, 4200, 4200, 4200, 4200, 4200,
4201, 4201, 4201, 4201, 4201, 4201, 4201, 4201, 4201, 4201,
4201, 4201, 4201, 4201, 4201, 4201, 4201, 4201, 4201, 4201,
4201, 4201, 4201, 4201, 4202, 4202), Measurement.Time = structure(list(
sec = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), min = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L), hour = c(0L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 0L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 0L, 1L), mday = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L), mon = c(5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L), year = c(110L, 110L, 110L, 110L, 110L,
110L, 110L, 110L, 110L, 110L, 110L, 110L, 110L, 110L,
110L, 110L, 110L, 110L, 110L, 110L, 110L, 110L, 110L,
110L, 110L, 110L, 110L, 110L, 110L, 110L, 110L, 110L,
110L, 110L, 110L, 110L, 110L, 110L, 110L, 110L, 110L,
110L, 110L, 110L, 110L, 110L, 110L, 110L, 110L, 110L),
wday = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L), yday = c(151L,
151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L,
151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L,
151L, 151L, 151L, 151L, 151L, 152L, 152L, 152L, 152L,
152L, 152L, 152L, 152L, 152L, 152L, 152L, 152L, 152L,
152L, 152L, 152L, 152L, 152L, 152L, 152L, 152L, 152L,
152L, 152L, 153L, 153L), isdst = c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), zone = c("CEST", "CEST", "CEST", "CEST",
"CEST", "CEST", "CEST", "CEST", "CEST", "CEST", "CEST",
"CEST", "CEST", "CEST", "CEST", "CEST", "CEST", "CEST",
"CEST", "CEST", "CEST", "CEST", "CEST", "CEST", "CEST",
"CEST", "CEST", "CEST", "CEST", "CEST", "CEST", "CEST",
"CEST", "CEST", "CEST", "CEST", "CEST", "CEST", "CEST",
"CEST", "CEST", "CEST", "CEST", "CEST", "CEST", "CEST",
"CEST", "CEST", "CEST", "CEST"), gmtoff = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_)), class = c("POSIXlt",
"POSIXt")), GPPyoung = c(0, 0, 0, 0, 0, 0, 0.590594499464422,
4.25789413130195, 7.13964983293584, 8.09364257311046, 9.59430060918616,
9.96591619422887, 11.703172902607, 12.3885249598014, 13.5318467495935,
12.6479996031782, 13.0198166013451, 12.4542695460619, 10.4923575609493,
8.71893654495587, 6.4730880637325, 3.76653362912719, 0.689650416319457,
0, 0, 0, 0, 0, 0, 0, 0.74554835122207, 4.94356227894899,
7.60948634247972, 9.35134022285773, 10.3784721078367, 11.2061336432827,
12.0500200644512, 13.2185959001941, 14.6046656499378, 16.1185068720173,
17.205706913021, 17.2282182581709, 16.0592648798236, 13.7569296564082,
10.9537918957097, 6.32934387849538, 1.1861821961113, 0, 0,
0)), row.names = 745:794, class = "data.frame")
Young <- sum((Meteo$GPPyoung* 60 * 60 * 12 * (1/1000000)), na.rm=TRUE)
My idea is: the parameters Pk and a follow the normal distribution, so maybe I can use a loop function for the model fit with a range of Pk and a and then got a range of results, which then I could choose the confidence intervals based on the results. Finally, sum up them to get the annual confidence interval?
I am not sure about this idea.
I'm adding this as a partial solution with some caveats. First is that I'm assuming your initial model is correct, that is the observations can be treated as independent of each other given the covariates and that the shape of the curve is appropriate. If I was doing this for real I would think very hard about the resampling, that is whether I need to sample observations in clusters corresponding to the experimental design.
Second, the boostrapping doesn't always work, the model fails to converge for sometimes so you'll need find a way around this if you want to run a reasonable number of boostrap replicates.
You can use bootstrapping to estimate the confidence interval for the sum you requested as follows. I'm using predict to get the fitted values for the new data, and so I need to make the Meteo dataframe have predictor names that correspond to the names in the model. Then I use boot to estimate the confidence interval.
When I ran the bootstrap just to get the CIs for the model coefficients they were very close to the CIs reported by running confint on the original model so it seems to work OK. That also suggests it might be OK to resample the coefficients from the joint distribution of the estimates (you can get the covariance from vcov(GPPyoung) so maybe the boostrapping isn't the best way!
Meteo$ParNEE <-Meteo$PAR
getEstimates <- function(dat, w){
GPPyoung = nls(-Photosynth ~ (ParNEE*(a*SoilTemp))/(ParNEE+Pk),
data = dat[w,],
start=list(a = 0.1,Pk=100), lower=0.0001, algorithm='port')
sum((predict(GPPyoung, newdata = Meteo)* 60 * 60 * 12 * (1/1000000)), na.rm=TRUE)
}
library(boot)
b1 <- boot(Young, getEstimates, 100)
b1
boot.ci(b1)
> b1
ORDINARY NONPARAMETRIC BOOTSTRAP
Call:
boot(data = Young, statistic = getEstimates, R = 100)
Bootstrap Statistics :
original bias std. error
t1* 14.19008 0.1035427 0.9388517
> boot.ci(b1)
BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
Based on 100 bootstrap replicates
CALL :
boot.ci(boot.out = b1)
Intervals :
Level Normal Basic
95% (12.25, 15.93 ) (11.71, 15.98 )
Level Percentile BCa
95% (12.40, 16.67 ) (12.38, 16.46 )
Calculations and Intervals on Original Scale
Some basic intervals may be unstable
Some percentile intervals may be unstable
Some BCa intervals may be unstable
Related
ANOVA error: why is each row of output *not* identified by a unique combination of keys?
I have a two-way ANOVA test (w/repeated measures) that I'm using with four almost identical datasets: > res.aov <- anova_test( + data = LST_Weather_dataset_N, dv = LST, wid = Month, + within = c(Buffer, TimePeriod), + effect.size = "ges", + detailed = TRUE, + ) Where: LST = surface temperature deviation in C Month = 1-12 Buffer = a value 100-1900 - one of 19 areas outward from the boundary of a solar power plant (each 100m wide) TimePeriod = a factor with a value of 1 or 2 corresponding to pre-/post-construction of a solar power plant. For one dataset I get the error: Error: Each row of output must be identified by a unique combination of keys. Keys are shared for 38 rows: * 10, 11 * 217, 218 * 240, 241 * 263, 264 * 286, 287 * 309, 310 * 332, 333 ... As far as I can tell I have unique combinations. dplyr::count(LST_Weather_dataset_N, LST, Month, Buffer, TimePeriod, sort = TRUE) returns LST Month Buffer TimePeriod n 1 -6.309045316 12 100 2 1 2 -5.655279925 9 1000 2 1 3 -5.224196295 12 200 2 1 4 -5.194473224 9 1100 2 1 5 -5.025429891 12 400 2 1 6 -4.987575966 9 700 2 1 7 -4.979453868 12 600 2 1 8 -4.825298768 12 300 2 1 9 -4.668994574 12 500 2 1 10 -4.652282192 12 700 2 1 ... 'n' is always 1. I can't work out why this is happening. Extract of datafram below: > dput(LST_Weather_dataset_N[sample(1:nrow(LST_Weather_dataset_N), 50),]) structure(list(Buffer = c(1400L, 700L, 300L, 1400L, 100L, 200L, 1700L, 100L, 800L, 1900L, 1100L, 100L, 700L, 800L, 1400L, 400L, 1300L, 200L, 1200L, 500L, 1200L, 1300L, 400L, 1000L, 1300L, 1100L, 100L, 300L, 300L, 600L, 1100L, 1400L, 1500L, 1600L, 1700L, 1800L, 1700L, 1300L, 1200L, 300L, 1100L, 1900L, 1700L, 700L, 1400L, 1200L, 1600L, 1700L, 1900L, 1300L), Date = c("02/05/2014", "18/01/2017", "19/06/2014", "25/12/2013", "15/09/2017", "08/04/2017", "22/08/2014", "21/07/2014", "13/07/2017", "25/12/2013", "22/10/2013", "02/05/2014", "07/03/2017", "15/03/2014", "13/07/2017", "19/06/2014", "25/12/2013", "17/10/2017", "16/04/2014", "06/10/2013", "15/09/2017", "18/01/2017", "10/01/2014", "17/12/2016", "13/07/2017", "19/06/2014", "07/03/2017", "15/03/2014", "11/02/2014", "22/10/2013", "06/10/2013", "15/09/2017", "16/04/2014", "18/01/2017", "15/03/2014", "21/07/2014", "17/10/2017", "15/09/2017", "10/01/2014", "23/09/2014", "16/04/2014", "22/10/2013", "11/06/2017", "26/05/2017", "19/06/2014", "14/08/2017", "11/02/2014", "26/02/2017", "26/02/2017", "11/02/2014"), LST = c(1.255502397, 4.33385966, 3.327025603, -0.388631166, -0.865430798, 4.386292648, -0.243018665, 3.276865987, 0.957036835, -0.065821795, 0.69731779, 4.846851651, -1.437700684, 1.003808572, 0.572460421, 2.995902374, -0.334633662, -1.231447567, 0.644520741, 0.808262029, -3.392959991, 2.324569449, 2.346707612, -3.124354627, 0.58719862, 1.904859254, 1.701580958, 2.792443253, 1.638270039, 1.460743317, 0.699767335, -3.015643366, 0.930527864, 1.309519336, 0.477789664, 0.147584938, -0.498188865, -3.506795723, -1.007487965, 1.149604087, 1.192366386, 0.197471474, 0.999391224, -0.190613618, 1.27324015, 2.686622796, 0.573109026, 0.97847983, 0.395005095, -0.40855426), Month = c(5L, 1L, 6L, 12L, 9L, 4L, 8L, 7L, 7L, 12L, 10L, 5L, 3L, 3L, 7L, 6L, 12L, 10L, 4L, 10L, 9L, 1L, 1L, 12L, 7L, 6L, 3L, 3L, 2L, 10L, 10L, 9L, 4L, 1L, 3L, 7L, 10L, 9L, 1L, 9L, 4L, 10L, 6L, 5L, 6L, 8L, 2L, 2L, 2L, 2L), Year = c(2014L, 2017L, 2014L, 2013L, 2017L, 2017L, 2014L, 2014L, 2017L, 2013L, 2013L, 2014L, 2017L, 2014L, 2017L, 2014L, 2013L, 2017L, 2014L, 2013L, 2017L, 2017L, 2014L, 2016L, 2017L, 2014L, 2017L, 2014L, 2014L, 2013L, 2013L, 2017L, 2014L, 2017L, 2014L, 2014L, 2017L, 2017L, 2014L, 2014L, 2014L, 2013L, 2017L, 2017L, 2014L, 2017L, 2014L, 2017L, 2017L, 2014L ), JulianDay = c(122L, 18L, 170L, 359L, 258L, 98L, 234L, 202L, 194L, 359L, 295L, 122L, 66L, 74L, 194L, 170L, 359L, 290L, 106L, 279L, 258L, 18L, 10L, 352L, 194L, 170L, 66L, 74L, 42L, 295L, 279L, 258L, 106L, 18L, 74L, 202L, 290L, 258L, 10L, 266L, 106L, 295L, 162L, 146L, 170L, 226L, 42L, 57L, 57L, 42L), TimePeriod = c(1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L), Temperature = c(28L, 9L, 31L, 12L, 27L, 21L, 29L, 36L, 38L, 12L, 23L, 28L, 12L, 21L, 38L, 31L, 12L, 23L, 25L, 22L, 27L, 9L, 11L, 7L, 38L, 31L, 12L, 21L, 14L, 23L, 22L, 27L, 25L, 9L, 21L, 36L, 23L, 27L, 11L, 31L, 25L, 23L, 29L, 27L, 31L, 34L, 14L, 16L, 16L, 14L), Humidity = c(6L, 34L, 7L, 31L, 29L, 22L, 34L, 15L, 19L, 31L, 16L, 6L, 14L, 14L, 19L, 7L, 31L, 12L, 9L, 12L, 29L, 34L, 33L, 18L, 19L, 7L, 14L, 14L, 31L, 16L, 12L, 29L, 9L, 34L, 14L, 15L, 12L, 29L, 33L, 18L, 9L, 16L, 8L, 13L, 7L, 13L, 31L, 31L, 31L, 31L), Wind_speed = c(6L, 0L, 6L, 7L, 13L, 33L, 6L, 20L, 9L, 7L, 0L, 6L, 0L, 6L, 9L, 6L, 7L, 6L, 0L, 7L, 13L, 0L, 0L, 35L, 9L, 6L, 0L, 6L, 6L, 0L, 7L, 13L, 0L, 0L, 6L, 20L, 6L, 13L, 0L, 0L, 0L, 0L, 24L, 11L, 6L, 24L, 6L, 26L, 26L, 6L), Wind_gust = c(0L, 0L, 0L, 0L, 0L, 54L, 0L, 46L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 48L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 46L, 0L, 0L, 0L, 0L, 0L, 0L, 48L, 0L, 0L, 39L, 0L, 41L, 41L, 0L), Wind_trend = c(1L, 0L, 1L, 1L, 2L, 2L, 0L, 1L, 2L, 1L, 0L, 1L, 0L, 1L, 2L, 1L, 1L, 0L, 0L, 2L, 2L, 0L, 1L, 1L, 2L, 1L, 0L, 1L, 1L, 0L, 2L, 2L, 0L, 0L, 1L, 1L, 0L, 2L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), Wind_direction = c(0, 0, 0, 337.5, 360, 22.5, 0, 22.5, 0, 337.5, 0, 0, 0, 0, 0, 0, 337.5, 180, 0, 247.5, 360, 0, 0, 180, 0, 0, 0, 0, 337.5, 0, 247.5, 360, 0, 0, 0, 22.5, 180, 360, 0, 0, 0, 0, 360, 22.5, 0, 360, 337.5, 360, 360, 337.5), Pressure = c(940.2, 943.64, 937.69, 951.37, 932.69, 933.94, 937.07, 938.01, 937.69, 951.37, 939.72, 940.2, 948.33, 947.71, 937.69, 937.69, 951.37, 943.32, 932.69, 944.71, 932.69, 943.64, 942.31, 943.01, 937.69, 937.69, 948.33, 947.71, 941.94, 939.72, 944.71, 932.69, 932.69, 943.64, 947.71, 938.01, 943.32, 932.69, 942.31, 938.94, 932.69, 939.72, 928.31, 931.12, 937.69, 932.37, 941.94, 936.13, 936.13, 941.94), Pressure_trend = c(1L, 2L, 0L, 2L, 0L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 0L, 2L, 1L, 2L, 1L, 0L, 2L, 2L, 2L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 2L, 2L, 1L, 1L, 1L, 0L, 2L, 1L, 2L, 1L, 0L, 0L, 0L, 1L, 1L, 2L, 2L, 1L)), row.names = c(179L, 14L, 195L, 426L, 306L, 118L, 299L, 229L, 244L, 436L, 374L, 153L, 90L, 91L, 256L, 197L, 424L, 348L, 137L, 355L, 328L, 26L, 7L, 419L, 254L, 211L, 78L, 81L, 43L, 359L, 373L, 332L, 143L, 32L, 109L, 263L, 393L, 330L, 23L, 309L, 135L, 398L, 224L, 166L, 217L, 290L, 69L, 72L, 76L, 63L), class = "data.frame")
Well, this is a bit embarrassing. The error arose as there were not, in fact, paired months of the data. Rather than there being 38 data (19x2) for each month, due to an error in determining the month value one month had 57 data (19x3). Correcting this, and checking that each month had the same number of paired data for the ANOVA allowed the test to run sucessfully. > res.aov <- anova_test( + data = LST_Weather_dataset_N, dv = LST, wid = Month, + within = c(Buffer, TimePeriod), + effect.size = "ges", + detailed = TRUE, + ) > get_anova_table(res.aov, correction = "auto") ANOVA Table (type III tests) Effect DFn DFd SSn SSd F p p<.05 ges 1 (Intercept) 1 11 600.135 974.584 6.774 2.50e-02 * 0.189 2 Buffer 18 198 332.217 331.750 11.015 2.05e-21 * 0.115 3 TimePeriod 1 11 29.561 977.945 0.333 5.76e-01 0.011 4 Buffer:TimePeriod 18 198 13.055 283.797 0.506 9.53e-01 0.005 I still don't understand how the error message was telling me this, though.
Aesthetic Mapping Temperature Data in ggplot2
I am very new to r and I think I am just missing something very simple. At least I hope. I am trying to plot temperature change by site using aes in ggplot2. However, when I run the code no data is pulling up in the chart. This is the code I am using. My data is called diss, x=order, and y=Tempdiff. tempplot<-ggplot(data = diss,mapping = aes(order,Tempdiff))+geom_segment(data = diss , mapping = aes(x=order,y=Tempdiff, xend=order, yend=Tempdiff, color="red")) plot(tempplot) There are no errors when I run the code and the graph pulls up, it is just empty. Is there something in my code that I am unaware of that is removing the data or have I just missed a critical code in actually adding the data? If I have left something out that is necessary for answering this question I am happy to add more. Thank you in advance! Addition: > dput(diss) structure(list(Paper = c(1L, 2L, 3L, 4L, 5L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 9L, 9L, 10L, 10L, 10L), Specimen = structure(c(4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 2L, 2L, 4L, 4L, 4L), .Label = c("", "Ochotona_collaris", "Ochotona_curzoniae", "Ochotona_principes"), class = "factor"), Site = structure(c(15L, 16L, 6L, 10L, 3L, 7L, 14L, 18L, 8L, 17L, 17L, 17L, 17L, 11L, 2L, 13L, 9L, 9L, 4L, 12L, 5L), .Label = c("", "Arapaho_Basin", "Bodi ", "Bouldera_Pass", "Calf_Robe", "Columbia_River_Gorge", "Craters_of_Moon", "Grand_Teton ", "Kluane_Lake", "Lava_Butte Flow_OR", "Loveland_Pass", "Mad_Wolf", "Niwat_Ridge", "Rocky_Mt", "South_Couliee", "Sugar_Hill_Modoc", "Tibet_China", "Yellowstone"), class = "factor"), elevation = c(2565L, 2203L, 316L, 1200L, 2600L, 1700L, 3462L, 2424L, 2815L, 3846L, 3846L, 3846L, 3846L, 3654L, 3878L, 3744L, 2000L, 2000L, 2133L, 2026L, 2004L), yr1 = c(2012L, 1894L, 2011L, 2011L, 1972L, 2010L, 2010L, 2010L, 2010L, 2005L, 2006L, 2007L, 2008L, 1981L, 1981L, 1981L, 1999L, 2003L, 2007L, 2008L, 2008L), yr1pop = c(9L, 4L, 9L, 9L, 48L, 55L, 39L, 71L, 54L, 182L, 219L, 223L, 231L, 6L, 4L, 10L, 5L, 3L, 2L, 3L, 7L), yr1temp = c(25, 30, 32.1, 20, 20, 27.1, 15.4, 21.2, 18, 11, 13, 14, 12.5, 24.7, 26.5, 28.4, 12, 17, 20, 20, 20), yr2 = c(2013L, 1910L, 2012L, 2012L, 2006L, 2011L, 2011L, 2011L, 2011L, 2006L, 2007L, 2008L, 2009L, 1983L, 1983L, 1983L, 2002L, 2006L, 2009L, 2009L, 2009L), yr2temp = c(26, 31.4, 33.2, 17.1, 24.76, 30.7, 18.2, 22, 20, 13, 14, 12.5, 13, 26.1, 27.3, 28.3, 15, 16, 20, 20, 10), yr2pop = c(9L, 2L, 9L, 7L, 4L, 40L, 20L, 37L, 37L, 219L, 223L, 231L, 214L, 5L, 4L, 8L, 4L, 2L, 2L, 7L, 2L), delta_temp = c(1, 1.4, 1.1, 2.9, 4.76, 3.6, 2.8, 0.8, 2, 2, 1, 1.5, 0.5, 1.4, 0.8, 0.1, 3, 1, 0, 0, 10), avg_temp = c(25.5, 30.7, 32.65, 18.55, 44.76, 28.9, 16.8, 21.6, 19, 12, 13.5, 13.25, 12.75, 25.4, 26.9, 28.35, 13, 16.5, 20, 20, 15), delta_pop = c(0L, 2L, 0L, 2L, 44L, 15L, 19L, 34L, 17L, 37L, 4L, 8L, 17L, 1L, 0L, 2L, 1L, 1L, 0L, 4L, 5L), avg_pop = c(9, 3, 9, 8, 26, 47.5, 29.5, 54, 45.5, 200.5, 221, 227, 222.5, 5.5, 4, 9, 4.5, 3.5, 2, 4, 4.5), SE_temp = c(0.499, 0.7, 0.6, 1.45, 2.381, 1.8, 1.4, 0.4, 1, 1.414, 0.707, 1.06, 0.353, 0.989, 0.565, 0.07, 2.121, 0.707, 0, 0, 7.07), CI_low_temp = c(25.025, 30.035, 32.03, 17.172, 20.119, 27.19, 15.47, 21.22, 18.05, 11.05, 13.025, 12.537, 12.512, 24.735, 26.52, 28.302, 12.075, 16.025, 20, 20, 10.25), CI_upper_temp = c(25.975, 31.365, 33.17, 19.927, 24.641, 30.61, 18.13, 21.98, 19.95, 12.95, 13.974, 13.962, 12.987, 26.065, 27.78, 28.397, 14.925, 16.975, 20, 20, 19.75), SE_pop = c(0, 1.414, 0, 1.414, 31.112, 10.606, 13.425, 24.041, 12.02, 26.162, 2.828, 5.656, 12.02, 0.707, 0, 1.414, 0.707, 0.707, 0, 2.828, 3.535), CI_pop_low = c(9, 2.05, 9, 7.05, 5.1, 40.375, 20.475, 37.85, 37.425, 182.925, 219.1, 223.2, 214.425, 5.025, 4, 8.05, 4.025, 2.025, 2, 3.1, 2.125), CI_pop_upper = c(9, 3.95, 9, 8.95, 46.9, 54.625, 38.525, 70.15, 53.575, 218.075, 222.9, 230.8, 230.575, 5.975, 4, 9.95, 4.975, 2.975, 2, 6.9, 6.875), Tempdiff = c(1, 1.4, 1.1, -2.9, 4.76, 3.6, 2.8, 0.800000000000001, 2, 2, 1, -1.5, 0.5, 1.4, 0.800000000000001, -0.0999999999999979, 3, -1, 0, 0, -10), popdiff = c(0L, -2L, 0L, -2L, -44L, -15L, -19L, -34L, -17L, 37L, 4L, 8L, -17L, -1L, 0L, -2L, -1L, -1L, 0L, 4L, -5L), order = 1:21), .Names = c("Paper", "Specimen", "Site", "elevation", "yr1", "yr1pop", "yr1temp", "yr2", "yr2temp", "yr2pop", "delta_temp", "avg_temp", "delta_pop", "avg_pop", "SE_temp", "CI_low_temp", "CI_upper_temp", "SE_pop", "CI_pop_low", "CI_pop_upper", "Tempdiff", "popdiff", "order"), row.names = c(NA, 21L), class = "data.frame")
Trouble with GLMM with glmer in R: Error in pwrssUpdate...halvings failed to reduce deviance in pwrssUpdate
Here's a snipped of randomly selected data from my full dataframe: canopy<-structure(list(Stage = structure(c(6L, 5L, 3L, 6L, 7L, 5L, 4L, 7L, 2L, 7L, 5L, 1L, 1L, 4L, 3L, 6L, 5L, 7L, 4L, 4L), .Label = c("milpa", "robir", "jurup che", "pak che kor", "mehen che", "nu kux che", "tam che"), class = c("ordered", "factor")), ID = c(44L, 34L, 18L, 64L, 54L, 59L, 28L, 51L, 11L, 56L, 33L, 1L, 7L, 25L, 58L, 48L, 36L, 51L, 27L, 66L), Sample = c(4L, 2L, 2L, 10L, 6L, 9L, 4L, 3L, 3L, 8L, 1L, 1L, 7L, 1L, 10L, 8L, 4L, 3L, 3L, 10L), Subsample = c(2L, 3L, 4L, 3L, 2L, 1L, 3L, 2L, 4L, 3L, 1L, 3L, 2L, 4L, 1L, 1L, 3L, 1L, 1L, 4L), Size..ha. = c(0.5, 0.5, 0.5, 0.5, 6, 0.5, 0.5, 0.25, 0.5, 6, 1, 1, 0.5, 2, 1, 0.5, 1, 0.25, 0.5, 2), Avg.Subsample.Canopy = c(94.8, 94.8, 97.92, 96.88, 97.14, 92.46, 93.24, 97.4, 25.64, 97.4, 94.8, 33.7, 13.42, 98.18, 85.44, 96.36, 97.4, 95.58, 85.7, 92.2), dec = c(0.948, 0.948, 0.9792, 0.9688, 0.9714, 0.9246, 0.9324, 0.974, 0.2564, 0.974, 0.948, 0.337, 0.1342, 0.9818, 0.8544, 0.9636, 0.974, 0.9558, 0.857, 0.922)), .Names = c("Stage", "ID", "Sample", "Subsample", "Size..ha.", "Avg.Subsample.Canopy", "dec"), row.names = c(693L, 537L, 285L, 1017L, 853L, 929L, 441L, 805L, 173L, 889L, 513L, 9L, 101L, 397L, 913L, 753L, 569L, 801L, 417L, 1053L), class = "data.frame") I am trying to code a GLMM of dec as a function of Stage and Size..ha. The GLMM is necessary because each row represents a point Subsample measured within a larger Sample area. I am also using a binomial distribution given dec are proportional data. I tried the model: canopy.binomial.mod<-glmer(dec~Stage*Size..ha.+(1|Sample),family="binomial",data=canopy) summary(canopy.binomial.mod) but get the error: Error in pwrssUpdate(pp, resp, tol = tolPwrss, GQmat = GQmat, compDev = compDev, : (maxstephalfit) PIRLS step-halvings failed to reduce deviance in pwrssUpdate I've seen online that this can be a result of needing to scale a predictor variable, so I tried: cs. <- function(x) scale(x,scale=TRUE,center=TRUE) canopy.binomial.mod<-glmer(dec~Stage*cs.(Size..ha.)+(1|Sample),family="binomial",data=canopy.rmna) summary(canopy.binomial.mod) Which doesn't seem to help. I also thought that maybe I'm asking too much of the model and it's not converging due to too many predictor variables, so let's remove the Size variable, which is of less interest to me. canopy.binomial.mod<-glmer(dec~Stage+(1|Sample),family="binomial",data=canopy.rmna) summary(canopy.binomial.mod) Still no luck. Any ideas how to address this?
Multiple NA's for the last variables of linear regression model in R
I am trying to run a linear regression model where I have dummy variables in my data to indicate if a certain predictor variable is not present. I have a total of 15 predictor variables. No matter the order of my predictor variables, the last five variables always result in NA. This problem is almost exactly the same as the one asked here: linear regression "NA" estimate just for last coefficient I tried adding -1 or +0 to the code lm(H~id11+id21+id22+id23+id24+id31+id41+id42+id43+id52+id71+id81+id82+id90+id95, data=macro.shed) And that resulted in only one less value being NA. So now I have 4, instead of 5, predictor variables being NA. I am reading in my data from csv documents. This is my code: watershed = read.csv("nlcd_2000_watershed.csv") macro_2000 = read.csv("wapp_macro_2000.csv") temp1 = matrix(watershed$Area,ncol=15,byrow=T) nlcd_watershed = data.frame(cbind(unique(watershed$WaterID),temp1)) names(nlcd_watershed)=c("WaterID",paste("id",unique(watershed$Value),sep="")) macro.shed = merge(macro_2000,nlcd_watershed,by.x="WaterID",by.y="WaterID") data.frame(unique(watershed$Value),unique(watershed$NLCD)) This is my data for macro.shed: dput(macro.shed) structure(list(WaterID = c(1L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 10L, 10L, 10L, 10L, 11L), ID = structure(c(1L, 16L, 2L, 9L, 10L, 11L, 12L, 13L, 15L, 8L, 3L, 4L, 5L, 6L, 7L, 14L), .Label = c("L1", "L10", "L11", "L12", "L13", "L14", "L15", "L16", "L2", "L3", "L4", "L5", "L6", "L7", "L8", "L9"), class = "factor"), Date = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "8/20/2001", class = "factor"), UTMX = c(607308L, 607112L, 598526L, 592235L, 603094L, 597749L, 605523L, 608668L, 600517L, 601806L, 597548L, 593815L, 591453L, 607187L, 606851L, 589528L), UTMY = c(4639040L, 4643780L, 4622470L, 4608350L, 4629780L, 4623340L, 4634330L, 4636950L, 4628160L, 4630380L, 4621720L, 4611960L, 4607960L, 4636480L, 4636020L, 4605120L), Watershed = structure(c(1L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 10L, 10L, 10L, 10L, 11L), .Label = c("Cold Spring Creek", "Drake Brook", "Dutchess County Airport", "East Branch Wappinger", "Great Spring Creek", "Grist Mill Creek", "Hunns Lake Creek", "Little Wappinger", "Upton Lake Creek", "Wappinger Creek", "Wappinger Falls"), class = "factor"), richness = c(37L, 20L, 32L, 14L, 23L, 20L, 23L, 28L, 25L, 32L, 31L, 30L, 23L, 33L, 19L, 19L), H = c(0.9, 1, 0.9, 0.8, 1, 0.8, 0.7, 1, 1, 1, 1, 1, 1, 1, 0.9, 1), EPT = c(18L, 14L, 13L, 3L, 15L, 12L, 15L, 19L, 15L, 21L, 17L, 16L, 13L, 20L, 13L, 12L), DOM = c(62.1, 61.5, 64.1, 73.7, 53.4, 74, 80.3, 59.2, 55.6, 56.8, 57.4, 59.4, 54.2, 59.8, 66, 52.2), PMA = c(58.1, 51, 59.3, 39.9, 58.4, 45.2, 54.5, 75.3, 56.2, 64.3, 66, 53.7, 55.6, 60.4, 52.3, 42.4), FBI = c(3.8, 3.4, 4, 3.9, 3.6, 4.2, 5.2, 3.8, 3.5, 4.1, 3.7, 3.7, 4, 3.8, 3.5, 3.6), BAP = c(8.3, 6.8, 7.8, 3.9, 7.4, 6, 6.8, 8.4, 7.5, 8.2, 8.3, 7.8, 6.8, 8.3, 6.6, 6), Insects.sample = c(7123L, 516L, 2061L, 1341L, 921L, 961L, 580L, 1567L, 1180L, 4226L, 4133L, 1400L, 2325L, 2596L, 687L, 609L), id11 = c(216900L, 216900L, 4923900L, 131400L, 1806300L, 0L, 41945400L, 250200L, 200700L, 1908000L, 4500L, 4500L, 4500L, 4500L, 4500L, 25427700L), id21 = c(83700L, 83700L, 1163700L, 1290600L, 0L, 0L, 11841300L, 2824200L, 110700L, 136800L, 9000L, 9000L, 9000L, 9000L, 9000L, 9145800L ), id22 = c(111600L, 111600L, 596700L, 7245000L, 63900L, 11700L, 7293600L, 5060700L, 323100L, 179100L, 55800L, 55800L, 55800L, 55800L, 55800L, 3876300L), id23 = c(413100L, 413100L, 611100L, 1817100L, 0L, 0L, 11107800L, 208800L, 1713600L, 33300L, 204300L, 204300L, 204300L, 204300L, 204300L, 6268500L ), id24 = c(239400L, 239400L, 4547700L, 193500L, 26100L, 10800L, 48636900L, 88200L, 1139400L, 41400L, 16200L, 16200L, 16200L, 16200L, 16200L, 14818500L), id31 = c(63900L, 63900L, 14319000L, 526500L, 139500L, 0L, 58785300L, 398700L, 1723500L, 73800L, 0L, 0L, 0L, 0L, 0L, 31161600L), id41 = c(384300L, 384300L, 4142700L, 0L, 86400L, 0L, 9641700L, 357300L, 3166200L, 392400L, 0L, 0L, 0L, 0L, 0L, 963900L), id42 = c(729000L, 729000L, 508500L, 209700L, 13500L, 0L, 4072500L, 682200L, 2137500L, 31500L, 10800L, 10800L, 10800L, 10800L, 10800L, 3993300L), id43 = c(1224000L, 1224000L, 1266300L, 1532700L, 0L, 418500L, 6607800L, 695700L, 1356300L, 10800L, 78300L, 78300L, 78300L, 78300L, 78300L, 5419800L), id52 = c(16200L, 16200L, 57600L, 600300L, 17100L, 0L, 1730700L, 958500L, 120600L, 101700L, 20700L, 20700L, 20700L, 20700L, 20700L, 0L), id71 = c(22500L, 22500L, 780300L, 208800L, 5400L, 0L, 1139400L, 533700L, 7085700L, 582300L, 0L, 0L, 0L, 0L, 0L, 198000L), id81 = c(221400L, 221400L, 3398400L, 0L, 1649700L, 0L, 287100L, 155700L, 6300900L, 1511100L, 13500L, 13500L, 13500L, 13500L, 13500L, 264600L ), id82 = c(665100L, 665100L, 1513800L, 41400L, 447300L, 0L, 3083400L, 132300L, 616500L, 53100L, 2943900L, 2943900L, 2943900L, 2943900L, 2943900L, 931500L), id90 = c(2142000L, 2142000L, 826200L, 215100L, 0L, 17705700L, 630000L, 1156500L, 590400L, 15300L, 4598100L, 4598100L, 4598100L, 4598100L, 4598100L, 311400L), id95 = c(4628700L, 4628700L, 113400L, 4897800L, 0L, 10526400L, 358200L, 2281500L, 1431900L, 33300L, 4982400L, 4982400L, 4982400L, 4982400L, 4982400L, 0L)), .Names = c("WaterID", "ID", "Date", "UTMX", "UTMY", "Watershed", "richness", "H", "EPT", "DOM", "PMA", "FBI", "BAP", "Insects.sample", "id11", "id21", "id22", "id23", "id24", "id31", "id41", "id42", "id43", "id52", "id71", "id81", "id82", "id90", "id95"), row.names = c(NA, -16L ), class = "data.frame") How do I make it so that the last variables are not resulting in NAs?
You're trying to fit 14 predictors (15 if you include an intercept) with only 16 observations. That's not enough data to calculate that many parameters, which is why you're only getting estimates for some of them. You'll need to use some sort of regularisation or model selection, but even then your estimates will be sensitive to the method you choose.
To add on to the answer provided by #Pete, many of your variables have high collinearity. To visualize this easily, library(corrplot) corPlot <- cor( macro.shed[, c(15:29)]) corPlot <- cor(x) corrplot(corPlot, method = "number")
batch plots with ggplot2
I have a problem with a "serial" plotting with ggplot2. This is my df: structure(list(ID = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("P1", "P21", "P24", "P25" ), class = "factor"), Date = structure(c(21L, 22L, 24L, 25L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 1L, 2L, 3L, 4L, 6L, 7L, 8L, 9L, 10L, 12L, 22L, 23L, 26L, 27L, 29L, 30L, 31L, 32L, 1L, 2L, 3L, 4L, 5L, 7L, 9L, 8L, 11L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L), .Label = c("1996-05-30", "1996-12-06", "1997-03-18", "1997-06-27", "1997-09-29", "1997-09-30", "1997-11-24", "1998-03-13", "1998-05-07", "1998-07-09", "1998-07-14", "1998-10-26", "1998-10-30", "1999-03-15", "1999-06-16", "1999-09-06", "1999-11-10", "2000-03-15", "2000-06-21", "2000-11-28", "2007-09-04", "2007-10-16", "2008-05-21", "2008-05-22", "2008-08-06", "2008-08-12", "2008-10-16", "2008-10-17", "2009-07-01", "2009-10-14", "2010-07-02", "2010-09-29", "2011-06-09", "2011-08-23" ), class = "factor"), T = c(11.1, 11.1, 10.9, 10.9, 10.6, 11, 10.5, 11.2, 10.9, 10.7, 11.1, 10.9, 10.2, 10.2, 10.9, 10.2, 9.9, 10, 10.3, 10.5, 10.1, 11.1, 11.1, 11.1, 10.9, 11.1, 10.7, 11.3, 11, 11.4, 10.2, 10.2, 10.7, 10.3, 9.9, 10.2, 10.1, 10.2, 10.2, 10.2, 10.6, 10.7, 10.2, 10.3, 11, 10.6), ph = c(6.76, 6.72, 6.9, 6.91, 6.96, 6.98, 6.94, 7.02, 7, 6.92, 6.94, 6.5, 6.4, 6.7, 6.52, 6.6, 6.6, 6.5, 6.55, 6.51, 6.59, 6.72, 6.76, 6.82, 6.8, 6.76, 6.76, 6.88, 6.82, 6.7, 6.7, 6.9, 6.71, 6.9, 6.8, 6.7, 6.69, 6.79, 6.69, 6.68, 6.5, 6.67, 6.65, 6.73, 6.78, 6.68), EC = c(1499L, 2120L, 881L, 902L, 870L, 541L, 891L, 876L, 860L, 868L, 877L, 3630L, 3400L, 2470L, 2330L, 1810L, 2190L, 2810L, 2200L, 2440L, 1111L, 2120L, 1654L, 1746L, 1781L, 761L, 1627L, 1733L, 1633L, 2440L, 3130L, 3180L, 2530L, 2710L, 2450L, 2630L, 3610L, 2190L, 973L, 3650L, 3060L, 3280L, 2930L, 879L, 3040L, 3030L), Month = structure(c(9L, 8L, 6L, 1L, 8L, 3L, 8L, 3L, 9L, 4L, 1L, 6L, 2L, 5L, 4L, 9L, 7L, 5L, 6L, 3L, 8L, 8L, 6L, 1L, 8L, 3L, 8L, 3L, 9L, 6L, 2L, 5L, 4L, 9L, 7L, 6L, 5L, 3L, 8L, 5L, 4L, 9L, 7L, 5L, 4L, 7L), .Label = c("August", "December", "July", "June", "March", "May", "November", "October", "September"), class = "factor"), Year = c(2007L, 2007L, 2008L, 2008L, 2008L, 2009L, 2009L, 2010L, 2010L, 2011L, 2011L, 1996L, 1996L, 1997L, 1997L, 1997L, 1997L, 1998L, 1998L, 1998L, 1998L, 2007L, 2008L, 2008L, 2008L, 2009L, 2009L, 2010L, 2010L, 1996L, 1996L, 1997L, 1997L, 1997L, 1997L, 1998L, 1998L, 1998L, 1998L, 1999L, 1999L, 1999L, 1999L, 2000L, 2000L, 2000L)), .Names = c("ID", "Date", "T", "ph", "EC", "Month", "Year"), class = "data.frame", row.names = c(NA, -46L)) What I want to do is a function that plot for every ID, EC value versus Month and grouping them in Years. First I changed the Month column in an ordered factor: df$Month<-factor(df$Month, levels=month.name, ordered=T) Then I tried to do that with this code: by (df,df$ID,function(i){ ggplot(df) + geom_point(aes(i$Month, i$EC, group=i$Year)) }) But I receive an error. What is weird is that the same code works for lattice: by (df,df$ID,function(i){ xyplot(i$EC~i$Month, data=df, group=i$Year, main=list(unique(i$ID)), xlab="Months", ylab="EC",type=c('p','l','g'), auto.key=list(columns=3,lines=TRUE)) }) I really don't know where I'm missing with ggplot. Any suggestions?
You should put i inside ggplot() call because this is name for subsetted data frame now. And you don't need to use i$ inside aes(). by(df,df$ID,function(i) { ggplot(i) + geom_point(aes(Month, EC, group=Year)) })