batch plots with ggplot2 - r

I have a problem with a "serial" plotting with ggplot2. This is my df:
structure(list(ID = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L), .Label = c("P1", "P21", "P24", "P25"
), class = "factor"), Date = structure(c(21L, 22L, 24L, 25L,
28L, 29L, 30L, 31L, 32L, 33L, 34L, 1L, 2L, 3L, 4L, 6L, 7L, 8L,
9L, 10L, 12L, 22L, 23L, 26L, 27L, 29L, 30L, 31L, 32L, 1L, 2L,
3L, 4L, 5L, 7L, 9L, 8L, 11L, 13L, 14L, 15L, 16L, 17L, 18L, 19L,
20L), .Label = c("1996-05-30", "1996-12-06", "1997-03-18", "1997-06-27",
"1997-09-29", "1997-09-30", "1997-11-24", "1998-03-13", "1998-05-07",
"1998-07-09", "1998-07-14", "1998-10-26", "1998-10-30", "1999-03-15",
"1999-06-16", "1999-09-06", "1999-11-10", "2000-03-15", "2000-06-21",
"2000-11-28", "2007-09-04", "2007-10-16", "2008-05-21", "2008-05-22",
"2008-08-06", "2008-08-12", "2008-10-16", "2008-10-17", "2009-07-01",
"2009-10-14", "2010-07-02", "2010-09-29", "2011-06-09", "2011-08-23"
), class = "factor"), T = c(11.1, 11.1, 10.9, 10.9, 10.6, 11,
10.5, 11.2, 10.9, 10.7, 11.1, 10.9, 10.2, 10.2, 10.9, 10.2, 9.9,
10, 10.3, 10.5, 10.1, 11.1, 11.1, 11.1, 10.9, 11.1, 10.7, 11.3,
11, 11.4, 10.2, 10.2, 10.7, 10.3, 9.9, 10.2, 10.1, 10.2, 10.2,
10.2, 10.6, 10.7, 10.2, 10.3, 11, 10.6), ph = c(6.76, 6.72, 6.9,
6.91, 6.96, 6.98, 6.94, 7.02, 7, 6.92, 6.94, 6.5, 6.4, 6.7, 6.52,
6.6, 6.6, 6.5, 6.55, 6.51, 6.59, 6.72, 6.76, 6.82, 6.8, 6.76,
6.76, 6.88, 6.82, 6.7, 6.7, 6.9, 6.71, 6.9, 6.8, 6.7, 6.69, 6.79,
6.69, 6.68, 6.5, 6.67, 6.65, 6.73, 6.78, 6.68), EC = c(1499L,
2120L, 881L, 902L, 870L, 541L, 891L, 876L, 860L, 868L, 877L,
3630L, 3400L, 2470L, 2330L, 1810L, 2190L, 2810L, 2200L, 2440L,
1111L, 2120L, 1654L, 1746L, 1781L, 761L, 1627L, 1733L, 1633L,
2440L, 3130L, 3180L, 2530L, 2710L, 2450L, 2630L, 3610L, 2190L,
973L, 3650L, 3060L, 3280L, 2930L, 879L, 3040L, 3030L), Month = structure(c(9L,
8L, 6L, 1L, 8L, 3L, 8L, 3L, 9L, 4L, 1L, 6L, 2L, 5L, 4L, 9L, 7L,
5L, 6L, 3L, 8L, 8L, 6L, 1L, 8L, 3L, 8L, 3L, 9L, 6L, 2L, 5L, 4L,
9L, 7L, 6L, 5L, 3L, 8L, 5L, 4L, 9L, 7L, 5L, 4L, 7L), .Label = c("August",
"December", "July", "June", "March", "May", "November", "October",
"September"), class = "factor"), Year = c(2007L, 2007L, 2008L,
2008L, 2008L, 2009L, 2009L, 2010L, 2010L, 2011L, 2011L, 1996L,
1996L, 1997L, 1997L, 1997L, 1997L, 1998L, 1998L, 1998L, 1998L,
2007L, 2008L, 2008L, 2008L, 2009L, 2009L, 2010L, 2010L, 1996L,
1996L, 1997L, 1997L, 1997L, 1997L, 1998L, 1998L, 1998L, 1998L,
1999L, 1999L, 1999L, 1999L, 2000L, 2000L, 2000L)), .Names = c("ID",
"Date", "T", "ph", "EC", "Month", "Year"), class = "data.frame", row.names = c(NA,
-46L))
What I want to do is a function that plot for every ID, EC value versus Month and grouping them in Years. First I changed the Month column in an ordered factor:
df$Month<-factor(df$Month, levels=month.name, ordered=T)
Then I tried to do that with this code:
by (df,df$ID,function(i){
ggplot(df) +
geom_point(aes(i$Month, i$EC, group=i$Year))
})
But I receive an error. What is weird is that the same code works for lattice:
by (df,df$ID,function(i){
xyplot(i$EC~i$Month, data=df, group=i$Year, main=list(unique(i$ID)),
xlab="Months", ylab="EC",type=c('p','l','g'),
auto.key=list(columns=3,lines=TRUE))
})
I really don't know where I'm missing with ggplot. Any suggestions?

You should put i inside ggplot() call because this is name for subsetted data frame now. And you don't need to use i$ inside aes().
by(df,df$ID,function(i) {
ggplot(i) +
geom_point(aes(Month, EC, group=Year))
})

Related

How to get the confidence interval after sum in R?

I have a dataset, which is on an event basis, like below,
Young<- structure(list(Year = c(2011L, 2011L, 2011L, 2011L, 2011L, 2011L,
2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L,
2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L,
2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L,
2012L, 2012L, 2012L, 2012L, 2012L, 2012L), Month = c(10L, 10L,
10L, 12L, 12L, 12L, 3L, 3L, 3L, 8L, 8L, 8L, 9L, 9L, 9L, 1L, 1L,
1L, 2L, 2L, 2L, 3L, 3L, 3L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L,
7L, 7L, 7L, 8L, 8L, 8L), Day = c(19L, 19L, 19L, 20L, 20L, 20L,
28L, 28L, 28L, 16L, 16L, 16L, 19L, 19L, 19L, 24L, 24L, 24L, 9L,
9L, 9L, 20L, 20L, 20L, 7L, 7L, 7L, 12L, 12L, 12L, 28L, 28L, 28L,
25L, 25L, 25L, 9L, 9L, 9L), Hour = c("11:00:00", "12:00:00",
"12:00:00", "16:00:00", "16:00:00", "16:00:00", "15:00:00", "16:00:00",
"16:00:00", "13:00:00", "13:00:00", "14:00:00", "13:00:00", "13:00:00",
"14:00:00", "15:00:00", "15:00:00", "16:00:00", "15:00:00", "15:00:00",
"15:00:00", "16:00:00", "16:00:00", "17:00:00", "14:00:00", "15:00:00",
"15:00:00", "16:00:00", "16:00:00", "16:00:00", "15:00:00", "15:00:00",
"16:00:00", "15:00:00", "16:00:00", "16:00:00", "14:00:00", "15:00:00",
"15:00:00"), Treatment = c("Control", "Control", "Control", "Control",
"Control", "Control", "Control", "Control", "Control", "Control",
"Control", "Control", "Control", "Control", "Control", "Control",
"Control", "Control", "Control", "Control", "Control", "Control",
"Control", "Control", "Control", "Control", "Control", "Control",
"Control", "Control", "Control", "Control", "Control", "Control",
"Control", "Control", "Control", "Control", "Control"), Age = c("Young",
"Young", "Young", "Young", "Young", "Young", "Young", "Young",
"Young", "Young", "Young", "Young", "Young", "Young", "Young",
"Young", "Young", "Young", "Young", "Young", "Young", "Young",
"Young", "Young", "Young", "Young", "Young", "Young", "Young",
"Young", "Young", "Young", "Young", "Young", "Young", "Young",
"Young", "Young", "Young"), Plot = c(2L, 4L, 3L, 3L, 2L, 4L,
3L, 4L, 2L, 4L, 2L, 3L, 3L, 2L, 4L, 4L, 2L, 3L, 2L, 4L, 3L, 2L,
4L, 3L, 2L, 4L, 3L, 3L, 4L, 2L, 4L, 2L, 3L, 2L, 4L, 3L, 3L, 2L,
4L), CutUncut = c("Uncut", "Uncut", "Uncut", "Uncut", "Uncut",
"Uncut", "Uncut", "Uncut", "Uncut", "Uncut", "Uncut", "Uncut",
"Uncut", "Uncut", "Uncut", "Uncut", "Uncut", "Uncut", "Uncut",
"Uncut", "Uncut", "Uncut", "Uncut", "Uncut", "Uncut", "Uncut",
"Uncut", "Uncut", "Uncut", "Uncut", "Uncut", "Uncut", "Uncut",
"Uncut", "Uncut", "Uncut", "Uncut", "Uncut", "Uncut"), DOY = c(293L,
293L, 293L, 355L, 355L, 355L, 88L, 88L, 88L, 229L, 229L, 229L,
263L, 263L, 263L, 24L, 24L, 24L, 40L, 40L, 40L, 80L, 80L, 80L,
128L, 128L, 128L, 164L, 164L, 164L, 180L, 180L, 180L, 207L, 207L,
207L, 222L, 222L, 222L), Season = c("Autumn", "Autumn", "Autumn",
"Winter", "Winter", "Winter", "Spring", "Spring", "Spring", "Summer",
"Summer", "Summer", "Autumn", "Autumn", "Autumn", "Winter", "Winter",
"Winter", "Winter", "Winter", "Winter", "Spring", "Spring", "Spring",
"Spring", "Spring", "Spring", "Summer", "Summer", "Summer", "Summer",
"Summer", "Summer", "Summer", "Summer", "Summer", "Summer", "Summer",
"Summer"), ParNEE = c(290.7248731, 599.7403381, 620.7083338,
32.78885425, 62.01252568, 45.52391483, 1071.670139, 1093.367386,
800.6788483, 419.0408238, 412.778134, 1327.117535, 839.7914373,
644.0409088, 279.2245603, 191.8809568, 198.8807589, 140.3205729,
467.648241, 165.6642216, 96.16016992, 762.2635568, 540.4431615,
785.3492862, 1732.119114, 1129.006272, 1057.400471, 600.8070811,
494.626709, 716.4676621, 874.3964178, 766.3067725, 722.804594,
1841.223679, 1857.914923, 1851.643175, 1885.19335, 1874.080518,
1886.155761), TsoilNEE = c(8.20641798, 8.87167614, 9.25483531,
5.35562467, 5.001391598, 5.142751522, 5.447442626, 2.774806172,
5.73145326, 16.39260095, 16.63689439, 16.00032251, 13.80826586,
13.70293495, 14.05993565, -148.3114899, -125.1212398, -39.50172764,
-0.153912547, 0.250569622, -0.180588761, 7.026564873, 6.613643378,
6.750276902, 10.9441073, 9.815122309, 10.96556178, 13.72630399,
13.91668339, 14.75620705, 16.07427164, 17.02269846, 15.89705194,
17.48706912, 18.1624207, 17.27015855, 15.88739078, -313.4308453,
-200.2484216), TairNEE = c(11.84885994, 12.25500113, 13.33651023,
9.929997869, 9.802112232, 9.682167101, 22.46247535, 25.07250631,
25.27596815, 27.62653943, 29.58273819, 29.58725575, 24.72666853,
24.55716774, 20.72507946, 9.805187988, 9.747996729, 9.705873269,
4.765671568, 3.773874476, 3.891349753, 20.38603849, 20.30943844,
20.69971814, 22.20306747, 24.75430113, 24.77324059, 23.4390849,
23.41247396, 23.06330721, 35.67870836, 34.35199338, 33.98895943,
40.66750538, 37.41804298, 37.19870659, 29.66119092, 31.58723976,
30.10279262), FluxNEE = c(-3.206353246, -4.163998348, -9.420162768,
0.037262928, -1.336588509, -0.224057957, -0.508366229, -0.303823561,
0.451989541, -4.629461578, -4.128336306, -19.31431488, -17.41555463,
-10.76842377, -3.773743494, -4.005118396, -3.10139429, -3.896511575,
-0.230811062, -0.387848425, 0.106525664, -2.093040652, -2.140873245,
-3.342366374, -4.283554855, -3.90440069, -6.694529951, -10.79442231,
-7.57943893, -8.668064652, -9.796745843, -9.068490841, -11.23867962,
-2.881690108, -6.139469398, -11.55337646, -17.73289009, -11.81309017,
-10.19259206), ParER = c(0.079582144, 0.078538246, 0.076666822,
0.075928068, 0.071107485, 0.067917311, 0.079550601, 0.079545707,
0.076062537, 0.066680977, 0.044227931, 0.064819571, 0.084869874,
0.085468009, 0.073554044, 0.078895503, 0.070483716, 0.079795927,
0.071635126, 0.074950603, 0.085683638, 0.067656389, 0.061385558,
0.063857453, 0.071950763, 0.09015295, 0.077798096, 0.05698102,
0.075428953, 0.048109063, 0.06919603, 0.068823704, 0.082922817,
1843.888325, 1855.967312, 1856.078984, 1887.559762, 1877.441029,
1881.282226), TsoilER = c(8.212540539, 8.913773301, 9.37379959,
5.352583624, 5.003265737, 5.129246633, 3.585953441, 4.227443559,
6.270562446, 16.35403573, 16.89910109, 15.72318925, 13.51008097,
13.64622863, 14.01804184, -136.13081, -112.691455, -29.0108879,
-0.250624887, 0.234876446, -0.192253951, 7.242462111, 6.581003774,
7.304472456, 10.73760849, 9.970181595, 11.00729429, 13.62692044,
13.94769746, 14.73160985, 16.18365114, 16.87261231, 15.8735923,
17.35071047, 18.44087514, 16.87495921, 15.93538216, -217.3594646,
18.1232224), TairER = c(11.52528319, 12.64744211, 13.63468403,
9.899656846, 9.727256358, 9.639747971, 24.83383448, 25.18722803,
25.49742845, 27.23686553, 28.59409676, 29.95920761, 24.58571143,
24.69913034, 20.61829429, 9.669081306, 9.732249093, 9.544825508,
5.041718517, 3.73260756, 3.753481235, 20.24927008, 20.38445081,
21.03628444, 22.96717296, 23.93519561, 24.59680342, 23.274129,
23.18265753, 23.30700413, 35.65016436, 34.33754475, 33.67091287,
40.44720666, 37.56765173, 37.26631998, 29.50898978, 31.78674575,
30.593999), FluxER = c(1.057273055, 1.131077804, 1.286326278,
0.933058574, 1.162736073, 1.056358927, 2.210981831, 1.401988009,
1.500512866, 5.052906165, 4.628101935, 6.530351267, 3.258632563,
2.530428826, 2.847726462, 1.041944383, 0.591929214, 0.888592631,
0.085761062, 0.137706908, 0.124532256, 1.102969996, 1.210864425,
1.550348555, 1.981517329, 1.691527815, 3.072262372, 4.667099959,
3.568560761, 3.798178884, 6.782221267, 5.772727381, 7.887602279,
7.772539297, 7.873300514, 10.72683275, 8.24859913, 5.51910253,
5.920298815), Photosynth = c(-4.263626301, -5.295076152, -10.706489046,
-0.895795646, -2.499324582, -1.280416884, -2.71934806, -1.70581157,
-1.048523325, -9.682367743, -8.756438241, -25.844666147, -20.674187193,
-13.298852596, -6.621469956, -5.047062779, -3.693323504, -4.785104206,
-0.316572124, -0.525555333, -0.018006592, -3.196010648, -3.35173767,
-4.892714929, -6.265072184, -5.595928505, -9.766792323, -15.461522269,
-11.147999691, -12.466243536, -16.57896711, -14.841218222, -19.126281899,
-10.654229405, -14.012769912, -22.28020921, -25.98148922, -17.3321927,
-16.112890875), DayNumber = c(4705L, 4705L, 4705L, 4767L, 4767L,
4767L, 4500L, 4500L, 4500L, 4641L, 4641L, 4641L, 4675L, 4675L,
4675L, 4802L, 4802L, 4802L, 4818L, 4818L, 4818L, 4858L, 4858L,
4858L, 4906L, 4906L, 4906L, 4942L, 4942L, 4942L, 4958L, 4958L,
4958L, 4985L, 4985L, 4985L, 5000L, 5000L, 5000L), Date = c("2011-10-19",
"2011-10-19", "2011-10-19", "2011-12-20", "2011-12-20", "2011-12-20",
"2011-03-28", "2011-03-28", "2011-03-28", "2011-08-16", "2011-08-16",
"2011-08-16", "2011-09-19", "2011-09-19", "2011-09-19", "2012-01-24",
"2012-01-24", "2012-01-24", "2012-02-09", "2012-02-09", "2012-02-09",
"2012-03-20", "2012-03-20", "2012-03-20", "2012-05-07", "2012-05-07",
"2012-05-07", "2012-06-12", "2012-06-12", "2012-06-12", "2012-06-28",
"2012-06-28", "2012-06-28", "2012-07-25", "2012-07-25", "2012-07-25",
"2012-08-09", "2012-08-09", "2012-08-09"), Location = c(19L,
21L, 20L, 20L, 19L, 21L, 20L, 21L, 19L, 21L, 19L, 20L, 20L, 19L,
21L, 21L, 19L, 20L, 19L, 21L, 20L, 19L, 21L, 20L, 19L, 21L, 20L,
20L, 21L, 19L, 21L, 19L, 20L, 19L, 21L, 20L, 20L, 19L, 21L),
MossBiomass = c(0.81122449, 0.870408163, 0.532653061, 0.532653061,
0.81122449, 0.870408163, 0.532653061, 0.870408163, 0.81122449,
0.870408163, 0.81122449, 0.532653061, 0.532653061, 0.81122449,
0.870408163, 0.870408163, 0.81122449, 0.532653061, 0.81122449,
0.870408163, 0.532653061, 0.81122449, 0.870408163, 0.532653061,
0.81122449, 0.870408163, 0.532653061, 0.532653061, 0.870408163,
0.81122449, 0.870408163, 0.81122449, 0.532653061, 0.81122449,
0.870408163, 0.532653061, 0.532653061, 0.81122449, 0.870408163
), CallunaBiomass = c(0.730612245, 0.766326531, 0.774489796,
0.774489796, 0.730612245, 0.766326531, 0.774489796, 0.766326531,
0.730612245, 0.766326531, 0.730612245, 0.774489796, 0.774489796,
0.730612245, 0.766326531, 0.766326531, 0.730612245, 0.774489796,
0.730612245, 0.766326531, 0.774489796, 0.730612245, 0.766326531,
0.774489796, 0.730612245, 0.766326531, 0.774489796, 0.774489796,
0.766326531, 0.730612245, 0.766326531, 0.730612245, 0.774489796,
0.730612245, 0.766326531, 0.774489796, 0.774489796, 0.730612245,
0.766326531), TotalBiomass = c(1.541836735, 1.636734694,
1.307142857, 1.307142857, 1.541836735, 1.636734694, 1.307142857,
1.636734694, 1.541836735, 1.636734694, 1.541836735, 1.307142857,
1.307142857, 1.541836735, 1.636734694, 1.636734694, 1.541836735,
1.307142857, 1.541836735, 1.636734694, 1.307142857, 1.541836735,
1.636734694, 1.307142857, 1.541836735, 1.636734694, 1.307142857,
1.307142857, 1.636734694, 1.541836735, 1.636734694, 1.541836735,
1.307142857, 1.541836735, 1.636734694, 1.307142857, 1.307142857,
1.541836735, 1.636734694), Efflux = c(NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA), SM = c(0.284347997493598, 0.284347997493598,
0.284347997493598, 0.245463893974316, 0.245463893974316,
0.245463893974316, 0.222652267105158, 0.222652267105158,
0.222652267105158, 0.252836852714222, 0.252836852714222,
0.252836852714222, 0.278406340793348, 0.278406340793348,
0.278406340793348, 0.259937125926954, 0.259937125926954,
0.259937125926954, 0.232801146590399, 0.232801146590399,
0.232801146590399, 0.227096876335852, 0.227096876335852,
0.227096876335852, 0.229079210892252, 0.229079210892252,
0.229079210892252, 0.230726906942308, 0.230726906942308,
0.230726906942308, 0.232011241533852, 0.232011241533852,
0.232011241533852, 0.220105334847203, 0.220105334847203,
0.220105334847203, 0.236470079582821, 0.236470079582821,
0.236470079582821), SoilTemp = c(9.950000286, 10.04999971,
10.04999971, 4.700000048, 4.700000048, 4.700000048, NA, NA,
NA, 15.75, 15.75, 16.10000038, 13.3499999, 13.3499999, 13.54999971,
4.200000048, 4.200000048, 4.299999952, 0.100000001, 0.100000001,
0.100000001, 6.700000048, 6.700000048, 6.75, 9.950000286,
10.25, 10.25, 14.19999981, 14.19999981, 14.19999981, 16.44999981,
16.44999981, 16.65000057, 17.19999981, 17.5, 17.5, 16.64999962,
16.80000019, 16.80000019), RelHumid = c(0.88, 0.84, 0.84,
0.86, 0.86, 0.86, 0.68, 0.68, 0.68, 0.6, 0.6, 0.54, 0.76,
0.76, 0.75, 0.93, 0.93, 0.94, 0.87, 0.87, 0.87, 0.62, 0.62,
0.64, 0.45, 0.43, 0.43, 0.74, 0.74, 0.74, 0.63, 0.63, 0.64,
0.43, 0.46, 0.46, 0.58, 0.56, 0.56), AirTemp = c(8.9, 10.4,
10.4, 6.6, 6.6, 6.6, 9.4, 9.7, 9.7, 20.9, 20.9, 21.2, 16.1,
16.1, 16.2, 4.9, 4.9, 4.8, 0.1, 0.1, 0.1, 13.4, 13.4, 12.9,
14, 14.7, 14.7, 17.2, 17.2, 17.2, 26.8, 26.8, 26.9, 30.3,
29.8, 29.8, 21.6, 21.6, 21.6), Solar = c(166.6, 272.8, 272.8,
12.8, 12.8, 12.8, 221.6, 258.2, 258.2, 578.6, 578.6, 525.5,
364.4, 364.4, 309.4, 111.7, 111.7, 73.2, 144.7, 144.7, 144.7,
368, 368, 227.1, 767.2, 651.9, 651.9, 375.4, 375.4, 375.4,
446.8, 446.8, 391.8, 796.5, 730.6, 730.6, 269.2, 787.4, 787.4
), PAR = c(300.3, 503.5, 503.5, 14.6, 14.6, 14.6, 419.3,
479.7, 479.7, 1100.5, 1100.5, 996.1, 703.1, 703.1, 607.9,
194.1, 194.1, 120.8, 263.7, 263.7, 263.7, 714.1, 714.1, 428.5,
1433.7, 1192, 1192, 705, 705, 705, 869.8, 869.8, 750.7, 1536.3,
1411.7, 1411.7, 540.2, 1534.4, 1534.4)), row.names = c(11L,
12L, 13L, 61L, 62L, 64L, 94L, 95L, 96L, 140L, 141L, 143L, 165L,
166L, 168L, 206L, 208L, 210L, 227L, 228L, 231L, 254L, 256L, 258L,
273L, 279L, 281L, 301L, 302L, 303L, 324L, 325L, 331L, 341L, 348L,
349L, 365L, 372L, 373L), class = "data.frame")
Then I use these data to fit a model
GPPyoung = nls(-Photosynth ~ (ParNEE*(a*SoilTemp))/(ParNEE+Pk),
data = Young,
start=list(a = 0.1,Pk=100), lower=0.0001, algorithm='port')
AIC(GPPyoung) # 201.99
coefficients(GPPyoung) #
summary(GPPyoung)#
confint(GPPyoung) # to get the 95% confident intervals for the perameters.
# Compare the observed vs predicted
Pk = coefficients(GPPyoung)['Pk']
a = coefficients(GPPyoung)['a']
Based on these data, I got the results of parameters Pk and a. Then I use this model to fit another dataset Meteo, which is an hourly dataset, to get the hourly results of the Photosynth. Finally, I used the sum() function to get the annual value of Photosynth.
But now my question is how to calculate the confidence interval or the uncertainty of the annual value of Photosynth. Becuase it's just a sum value not an average value at all. But we really want to know the uncertainty of the whole model.
Meteo$GPPyoung <- (Meteo$PAR*(a*Meteo$SoilTemp))/(Meteo$PAR+Pk)
Meteo<- structure(list(Date = c("1/06/2010 0:00", "1/06/2010 1:00", "1/06/2010 2:00",
"1/06/2010 3:00", "1/06/2010 4:00", "1/06/2010 5:00", "1/06/2010 6:00",
"1/06/2010 7:00", "1/06/2010 8:00", "1/06/2010 9:00", "1/06/2010 10:00",
"1/06/2010 11:00", "1/06/2010 12:00", "1/06/2010 13:00", "1/06/2010 14:00",
"1/06/2010 15:00", "1/06/2010 16:00", "1/06/2010 17:00", "1/06/2010 18:00",
"1/06/2010 19:00", "1/06/2010 20:00", "1/06/2010 21:00", "1/06/2010 22:00",
"1/06/2010 23:00", "2/06/2010 0:00", "2/06/2010 1:00", "2/06/2010 2:00",
"2/06/2010 3:00", "2/06/2010 4:00", "2/06/2010 5:00", "2/06/2010 6:00",
"2/06/2010 7:00", "2/06/2010 8:00", "2/06/2010 9:00", "2/06/2010 10:00",
"2/06/2010 11:00", "2/06/2010 12:00", "2/06/2010 13:00", "2/06/2010 14:00",
"2/06/2010 15:00", "2/06/2010 16:00", "2/06/2010 17:00", "2/06/2010 18:00",
"2/06/2010 19:00", "2/06/2010 20:00", "2/06/2010 21:00", "2/06/2010 22:00",
"2/06/2010 23:00", "3/06/2010 0:00", "3/06/2010 1:00"), Year = c(2010L,
2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L,
2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L,
2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L,
2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L,
2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L,
2010L, 2010L, 2010L, 2010L), Month = c(6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L), Day = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L), Hour = c(0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 0L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L,
15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 0L, 1L), RelHumid = c(0.95,
0.96, 0.97, 0.97, 0.97, 0.97, 0.97, 0.97, 0.98, 0.95, 0.82, 0.76,
0.7, 0.67, 0.62, 0.63, 0.59, 0.64, 0.65, 0.69, 0.73, 0.78, 0.84,
0.91, 0.94, 0.96, 0.96, 0.97, 0.97, 0.97, 0.97, 0.98, 0.93, 0.73,
0.65, 0.6, 0.52, 0.45, 0.42, 0.41, 0.42, 0.44, 0.46, 0.45, 0.43,
0.49, 0.62, 0.78, 0.9, 0.94), AirTemp = c(6.7, 6.2, 5.3, 4.2,
4, 3.4, 3.9, 5.7, 9.7, 11.6, 14, 15.2, 17.2, 18.2, 19.6, 19,
19.7, 19.1, 18.6, 17.6, 16.6, 15.9, 14.3, 13.3, 11.2, 9.2, 6.9,
5.7, 4.9, 4.1, 3.6, 6.9, 11.5, 14.7, 16.4, 18.3, 20.1, 21.4,
22.1, 22.8, 22.8, 22.8, 22.3, 21.6, 20, 17.9, 14.9, 10.6, 7,
5.8), Solar = c(1.8, 1.8, 1.8, 1.8, 1.8, 1.8, 12.8, 86.1, 212.4,
254.5, 375.4, 368, 629.9, 640.9, 705, 448.6, 465.1, 373.5, 234.4,
166.6, 102.5, 53.1, 12.8, 1.8, 1.8, 1.8, 1.8, 1.8, 1.8, 1.8,
14.6, 113.5, 238, 395.5, 529.2, 679.3, 792.8, 862.4, 884.4, 851.4,
785.5, 699.5, 554.8, 390, 227.1, 87.9, 12.8, 1.8, 1.8, 1.8),
PAR = c(0, 0, 0, 0, 0, 0, 14.6, 155.6, 391.8, 485.2, 734.3,
721.4, 1259.8, 1292.7, 1422.7, 869.8, 917.4, 734.3, 454.1,
311.3, 184.9, 86.1, 12.8, 0, 0, 0, 0, 0, 0, 0, 18.3, 197.8,
459.6, 791, 1073, 1355, 1618.7, 1759.6, 1779.8, 1730.3, 1571,
1386.1, 1080.3, 754.4, 448.6, 168.5, 22, 0, 0, 0), SoilTemp = c(10.23626137,
9.870399475, 9.595145226, 9.342157364, 9.180820465, 9.042314529,
9.019209862, 9.180820465, 9.549196243, 9.984869003, 10.41872025,
10.87355137, 11.3039856, 11.91307545, 12.81039906, 13.16781235,
13.3907938, 13.52444077, 13.25704098, 12.87747669, 12.56419373,
12.25024033, 11.9355793, 11.62017345, 11.23612499, 10.76001549,
10.28190517, 9.893303871, 9.61811161, 9.36518383, 9.203886032,
9.226944923, 9.572173119, 9.961985588, 10.32752895, 10.69184017,
11.19086361, 12.13794422, 13.3907938, 14.83355808, 16.04715157,
16.37726593, 15.95906639, 14.83355808, 13.90255451, 12.98920727,
12.3400116, 11.62017345, 10.94161892, 10.44150639), Hour1 = c("0:00:00",
"1:00:00", "2:00:00", "3:00:00", "4:00:00", "5:00:00", "6:00:00",
"7:00:00", "8:00:00", "9:00:00", "10:00:00", "11:00:00",
"12:00:00", "13:00:00", "14:00:00", "15:00:00", "16:00:00",
"17:00:00", "18:00:00", "19:00:00", "20:00:00", "21:00:00",
"22:00:00", "23:00:00", "0:00:00", "1:00:00", "2:00:00",
"3:00:00", "4:00:00", "5:00:00", "6:00:00", "7:00:00", "8:00:00",
"9:00:00", "10:00:00", "11:00:00", "12:00:00", "13:00:00",
"14:00:00", "15:00:00", "16:00:00", "17:00:00", "18:00:00",
"19:00:00", "20:00:00", "21:00:00", "22:00:00", "23:00:00",
"0:00:00", "1:00:00"), DayNumber = c(4200, 4200, 4200, 4200,
4200, 4200, 4200, 4200, 4200, 4200, 4200, 4200, 4200, 4200,
4200, 4200, 4200, 4200, 4200, 4200, 4200, 4200, 4200, 4200,
4201, 4201, 4201, 4201, 4201, 4201, 4201, 4201, 4201, 4201,
4201, 4201, 4201, 4201, 4201, 4201, 4201, 4201, 4201, 4201,
4201, 4201, 4201, 4201, 4202, 4202), Measurement.Time = structure(list(
sec = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), min = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L), hour = c(0L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 0L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 0L, 1L), mday = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L), mon = c(5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L), year = c(110L, 110L, 110L, 110L, 110L,
110L, 110L, 110L, 110L, 110L, 110L, 110L, 110L, 110L,
110L, 110L, 110L, 110L, 110L, 110L, 110L, 110L, 110L,
110L, 110L, 110L, 110L, 110L, 110L, 110L, 110L, 110L,
110L, 110L, 110L, 110L, 110L, 110L, 110L, 110L, 110L,
110L, 110L, 110L, 110L, 110L, 110L, 110L, 110L, 110L),
wday = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L), yday = c(151L,
151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L,
151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L, 151L,
151L, 151L, 151L, 151L, 151L, 152L, 152L, 152L, 152L,
152L, 152L, 152L, 152L, 152L, 152L, 152L, 152L, 152L,
152L, 152L, 152L, 152L, 152L, 152L, 152L, 152L, 152L,
152L, 152L, 153L, 153L), isdst = c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), zone = c("CEST", "CEST", "CEST", "CEST",
"CEST", "CEST", "CEST", "CEST", "CEST", "CEST", "CEST",
"CEST", "CEST", "CEST", "CEST", "CEST", "CEST", "CEST",
"CEST", "CEST", "CEST", "CEST", "CEST", "CEST", "CEST",
"CEST", "CEST", "CEST", "CEST", "CEST", "CEST", "CEST",
"CEST", "CEST", "CEST", "CEST", "CEST", "CEST", "CEST",
"CEST", "CEST", "CEST", "CEST", "CEST", "CEST", "CEST",
"CEST", "CEST", "CEST", "CEST"), gmtoff = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_)), class = c("POSIXlt",
"POSIXt")), GPPyoung = c(0, 0, 0, 0, 0, 0, 0.590594499464422,
4.25789413130195, 7.13964983293584, 8.09364257311046, 9.59430060918616,
9.96591619422887, 11.703172902607, 12.3885249598014, 13.5318467495935,
12.6479996031782, 13.0198166013451, 12.4542695460619, 10.4923575609493,
8.71893654495587, 6.4730880637325, 3.76653362912719, 0.689650416319457,
0, 0, 0, 0, 0, 0, 0, 0.74554835122207, 4.94356227894899,
7.60948634247972, 9.35134022285773, 10.3784721078367, 11.2061336432827,
12.0500200644512, 13.2185959001941, 14.6046656499378, 16.1185068720173,
17.205706913021, 17.2282182581709, 16.0592648798236, 13.7569296564082,
10.9537918957097, 6.32934387849538, 1.1861821961113, 0, 0,
0)), row.names = 745:794, class = "data.frame")
Young <- sum((Meteo$GPPyoung* 60 * 60 * 12 * (1/1000000)), na.rm=TRUE)
My idea is: the parameters Pk and a follow the normal distribution, so maybe I can use a loop function for the model fit with a range of Pk and a and then got a range of results, which then I could choose the confidence intervals based on the results. Finally, sum up them to get the annual confidence interval?
I am not sure about this idea.
I'm adding this as a partial solution with some caveats. First is that I'm assuming your initial model is correct, that is the observations can be treated as independent of each other given the covariates and that the shape of the curve is appropriate. If I was doing this for real I would think very hard about the resampling, that is whether I need to sample observations in clusters corresponding to the experimental design.
Second, the boostrapping doesn't always work, the model fails to converge for sometimes so you'll need find a way around this if you want to run a reasonable number of boostrap replicates.
You can use bootstrapping to estimate the confidence interval for the sum you requested as follows. I'm using predict to get the fitted values for the new data, and so I need to make the Meteo dataframe have predictor names that correspond to the names in the model. Then I use boot to estimate the confidence interval.
When I ran the bootstrap just to get the CIs for the model coefficients they were very close to the CIs reported by running confint on the original model so it seems to work OK. That also suggests it might be OK to resample the coefficients from the joint distribution of the estimates (you can get the covariance from vcov(GPPyoung) so maybe the boostrapping isn't the best way!
Meteo$ParNEE <-Meteo$PAR
getEstimates <- function(dat, w){
GPPyoung = nls(-Photosynth ~ (ParNEE*(a*SoilTemp))/(ParNEE+Pk),
data = dat[w,],
start=list(a = 0.1,Pk=100), lower=0.0001, algorithm='port')
sum((predict(GPPyoung, newdata = Meteo)* 60 * 60 * 12 * (1/1000000)), na.rm=TRUE)
}
library(boot)
b1 <- boot(Young, getEstimates, 100)
b1
boot.ci(b1)
> b1
ORDINARY NONPARAMETRIC BOOTSTRAP
Call:
boot(data = Young, statistic = getEstimates, R = 100)
Bootstrap Statistics :
original bias std. error
t1* 14.19008 0.1035427 0.9388517
> boot.ci(b1)
BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
Based on 100 bootstrap replicates
CALL :
boot.ci(boot.out = b1)
Intervals :
Level Normal Basic
95% (12.25, 15.93 ) (11.71, 15.98 )
Level Percentile BCa
95% (12.40, 16.67 ) (12.38, 16.46 )
Calculations and Intervals on Original Scale
Some basic intervals may be unstable
Some percentile intervals may be unstable
Some BCa intervals may be unstable

Separate a rainfall time series based on duration of days using dplyr in R

I have the following data:
dat <- structure(list(Year = c(1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1980L, 1980L,
1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 1980L,
1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 1980L,
1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 1980L,
1980L, 1980L, 1981L, 1981L, 1981L, 1981L, 1981L, 1981L, 1981L,
1981L, 1981L, 1981L, 1981L, 1981L, 1981L, 1981L, 1981L, 1981L,
1981L, 1981L, 1981L, 1981L, 1981L, 1981L, 1981L, 1981L, 1981L,
1981L, 1981L, 1981L, 1981L, 1981L, 1981L, 1982L, 1982L, 1982L,
1982L, 1982L, 1982L, 1982L, 1982L, 1982L, 1982L, 1982L, 1982L,
1982L, 1982L, 1982L, 1982L, 1982L, 1982L, 1982L, 1982L, 1982L,
1982L, 1982L, 1982L, 1982L, 1982L, 1982L, 1982L, 1982L, 1982L,
1982L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L,
1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L,
1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L,
1983L, 1983L, 1983L, 1983L, 1983L, 1984L, 1984L, 1984L, 1984L,
1984L, 1984L, 1984L, 1984L, 1984L, 1984L, 1984L, 1984L, 1984L,
1984L, 1984L, 1984L, 1984L, 1984L, 1984L, 1984L, 1984L, 1984L,
1984L, 1984L, 1984L, 1984L, 1984L, 1984L, 1984L, 1984L, 1984L,
1985L, 1985L, 1985L, 1985L, 1985L, 1985L, 1985L, 1985L, 1985L,
1985L, 1985L, 1985L, 1985L, 1985L, 1985L, 1985L, 1985L, 1985L,
1985L, 1985L, 1985L, 1985L, 1985L, 1985L, 1985L, 1985L, 1985L,
1985L, 1985L, 1985L, 1985L, 1986L, 1986L, 1986L, 1986L, 1986L,
1986L, 1986L, 1986L, 1986L, 1986L, 1986L, 1986L, 1986L, 1986L,
1986L, 1986L, 1986L, 1986L, 1986L, 1986L, 1986L, 1986L, 1986L,
1986L, 1986L, 1986L, 1986L, 1986L, 1986L, 1986L, 1986L, 1987L,
1987L, 1987L, 1987L, 1987L, 1987L, 1987L, 1987L, 1987L, 1987L,
1987L, 1987L, 1987L, 1987L, 1987L, 1987L, 1987L, 1987L, 1987L,
1987L, 1987L, 1987L, 1987L, 1987L, 1987L, 1987L, 1987L, 1987L,
1987L, 1987L, 1987L), Month = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
Day = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L,
13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L,
25L, 26L, 27L, 28L, 29L, 30L, 31L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L,
31L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L,
26L, 27L, 28L, 29L, 30L, 31L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L,
20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L,
15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L,
27L, 28L, 29L, 30L, 31L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L,
21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L,
15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L,
27L, 28L, 29L, 30L, 31L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L,
21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L,
15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L,
27L, 28L, 29L, 30L, 31L), Rainfall = c(1, 35.5, 20.3, 2.5,
32, 66.8, 0, 0, 1.8, 0, 5.3, 0, 0, 0, 11.7, 40.4, 45.7, 15.3,
21.6, 10.5, 26.2, 54.1, 1.5, 26.9, 39.4, 21.6, 1.3, 95.6,
10.2, 0, 5.1, 9.1, 31.5, 24.6, 18.5, 0, 37.1, 111.3, 4.3,
21.1, 3.1, 0, 0, 0, 15.8, 30.7, 6.4, 68.6, 97.5, 64.3, 47.3,
0, 2.3, 8.7, 53.9, 6.9, 20.9, 94, 7.4, 0, 1.3, 0, 38.6, 2.5,
10.4, 22.6, 6.3, 20.2, 3.6, 30.8, 0.8, 0.5, 34.3, 315.9,
246.3, 131.1, 11.8, 4.9, 2.1, 0.6, 0, 0, 0.5, 0.3, 122.2,
85.9, 40.2, 9.4, 15.4, 27.7, 2.3, 41.1, 248.9, 0.8, 0, 9.3,
10.9, 6.4, 0.3, 0.8, 0, 56.2, 150.6, 80, 12.2, 3.3, 0, 10.4,
95.3, 53, 55.9, 6, 46.2, 19.8, 1, 1.5, 10, 12, 47.8, 10.9,
80.5, 144.5, 250.6, 17.1, 1, 0, 15, 7.3, 69, 14.2, 6.6, 4.1,
0, 0.8, 2.5, 6.1, 44.5, 0, 23.1, 0.5, 2.5, 11.2, 5.4, 6.1,
0.5, 0, 0, 0, 0, 3.8, 0, 15.6, 16.3, 0.8, 1.3, 88.7, 18.3,
0.9, 1.3, 1.5, 0.5, 0.6, 5.3, 60.2, 0, 0.8, 19.1, 83.8, 62.7,
51.6, 3.3, 33.8, 7.6, 8.1, 0, 0, 16.3, 19, 28.2, 35.6, 2.9,
1.5, 17.8, 38.8, 15.8, 18.8, 203.7, 13.5, 30, 129.8, 5.6,
2, 0, 0, 0, 0, 0, 0, 0, 1.3, 0, 1, 2.5, 0, 3.8, 9.4, 1, 0,
24.6, 11.4, 11.7, 11.4, 0, 0, 0, 2.3, 8.6, 3, 14.4, 20.4,
4.2, 1.4, 20.8, 58.4, 42.9, 45.4, 67.6, 19.5, 4, 8, 24.9,
6.9, 75, 52.2, 110.4, 76.1, 10.6, 44.6, 248, 119.2, 1.2,
2, 9.5, 4.6, 29, 5.7, 49.6, 0, 0, 3, 22.4, 3, 3.2, 0, 26.2,
11, 8.8, 0, 4.2, 2, 9.4, 23.6, 76.8, 1, 60.6, 25.4, 6.2,
15.9, 9.2, 2, 0, 23.2, 22.8, 15, 4.2, 10.5, 2.8, 6, 26)), na.action = structure(c(`2000` =
2000L,
`2007` = 2007L, `3515` = 3515L, `3653` = 3653L, `10318` = 10318L,
`10319` = 10319L, `10320` = 10320L, `10321` = 10321L, `10322` = 10322L,
`10323` = 10323L, `10324` = 10324L, `10325` = 10325L, `10326` = 10326L,
`10327` = 10327L, `10328` = 10328L, `10329` = 10329L, `10330` = 10330L,
`10331` = 10331L, `10332` = 10332L, `10333` = 10333L, `10334` = 10334L,
`10335` = 10335L, `10336` = 10336L, `10337` = 10337L, `10338` = 10338L,
`10339` = 10339L, `10340` = 10340L, `10341` = 10341L, `10342` = 10342L,
`10343` = 10343L, `10344` = 10344L, `10345` = 10345L, `10346` = 10346L,
`10347` = 10347L, `10501` = 10501L, `10502` = 10502L, `10503` = 10503L,
`10504` = 10504L, `10505` = 10505L, `10506` = 10506L, `10507` = 10507L,
`10508` = 10508L, `10509` = 10509L, `10510` = 10510L, `10511` = 10511L,
`10512` = 10512L, `10513` = 10513L, `10514` = 10514L, `10515` = 10515L,
`10516` = 10516L, `10517` = 10517L, `10518` = 10518L, `10519` = 10519L,
`10520` = 10520L, `10521` = 10521L, `10522` = 10522L, `10523` = 10523L,
`10524` = 10524L, `10525` = 10525L, `10526` = 10526L, `10527` = 10527L,
`10528` = 10528L, `10529` = 10529L, `10530` = 10530L, `10531` = 10531L,
`10593` = 10593L, `10594` = 10594L, `10595` = 10595L, `10596` = 10596L,
`10597` = 10597L, `10598` = 10598L, `10599` = 10599L, `10600` = 10600L,
`10601` = 10601L, `10602` = 10602L, `10603` = 10603L, `10604` = 10604L,
`10605` = 10605L, `10606` = 10606L, `10607` = 10607L, `10608` = 10608L,
`10609` = 10609L, `10610` = 10610L, `10611` = 10611L, `10612` = 10612L,
`10613` = 10613L, `10614` = 10614L, `10615` = 10615L, `10616` = 10616L,
`10617` = 10617L, `10618` = 10618L, `10619` = 10619L, `10620` = 10620L,
`10621` = 10621L, `10622` = 10622L, `10623` = 10623L, `10624` = 10624L,
`10625` = 10625L, `10626` = 10626L, `10627` = 10627L, `10628` = 10628L,
`10629` = 10629L, `10630` = 10630L, `10631` = 10631L, `10632` = 10632L,
`10633` = 10633L, `10634` = 10634L, `10635` = 10635L, `10636` = 10636L,
`10637` = 10637L, `10638` = 10638L, `10639` = 10639L, `10640` = 10640L,
`10641` = 10641L, `10642` = 10642L, `10643` = 10643L, `10644` = 10644L,
`10645` = 10645L, `10646` = 10646L, `10647` = 10647L, `10648` = 10648L,
`10649` = 10649L, `10650` = 10650L, `10651` = 10651L, `10652` = 10652L,
`10653` = 10653L, `10654` = 10654L, `10655` = 10655L, `10656` = 10656L,
`10657` = 10657L, `10658` = 10658L, `10659` = 10659L, `10660` = 10660L,
`10661` = 10661L, `10662` = 10662L, `10663` = 10663L, `10664` = 10664L,
`10665` = 10665L, `10666` = 10666L, `10667` = 10667L, `10668` = 10668L,
`10669` = 10669L, `10670` = 10670L, `10671` = 10671L, `10672` = 10672L,
`10673` = 10673L, `10674` = 10674L, `10675` = 10675L, `10676` = 10676L,
`10677` = 10677L, `10678` = 10678L, `10679` = 10679L, `10680` = 10680L,
`10681` = 10681L, `10682` = 10682L, `10683` = 10683L, `10775` = 10775L,
`10776` = 10776L, `10777` = 10777L, `10778` = 10778L, `10779` = 10779L,
`10780` = 10780L, `10781` = 10781L, `10782` = 10782L, `10783` = 10783L,
`10784` = 10784L, `10785` = 10785L, `10786` = 10786L, `10787` = 10787L,
`10788` = 10788L, `10789` = 10789L, `10790` = 10790L, `10791` = 10791L,
`10792` = 10792L, `10793` = 10793L, `10794` = 10794L, `10795` = 10795L,
`10796` = 10796L, `10797` = 10797L, `10798` = 10798L, `10799` = 10799L,
`10800` = 10800L, `10801` = 10801L, `10802` = 10802L, `10803` = 10803L,
`10804` = 10804L, `10805` = 10805L, `10928` = 10928L, `10929` = 10929L,
`10930` = 10930L, `10931` = 10931L, `10932` = 10932L, `10933` = 10933L,
`10934` = 10934L, `10935` = 10935L, `10936` = 10936L, `10937` = 10937L,
`10938` = 10938L, `10939` = 10939L, `10940` = 10940L, `10941` = 10941L,
`10942` = 10942L, `10943` = 10943L, `10944` = 10944L, `10945` = 10945L,
`10946` = 10946L, `10947` = 10947L, `10948` = 10948L, `10949` = 10949L,
`10950` = 10950L, `10951` = 10951L, `10952` = 10952L, `10953` = 10953L,
`10954` = 10954L, `10955` = 10955L, `10956` = 10956L, `10957` = 10957L,
`10958` = 10958L, `11306` = 11306L, `11307` = 11307L, `11308` = 11308L,
`11309` = 11309L, `11310` = 11310L, `11311` = 11311L, `11312` = 11312L,
`11444` = 11444L, `11445` = 11445L, `11446` = 11446L, `11447` = 11447L,
`11448` = 11448L, `11449` = 11449L, `11450` = 11450L, `11451` = 11451L,
`11452` = 11452L, `11453` = 11453L, `11454` = 11454L, `11455` = 11455L,
`11456` = 11456L, `11457` = 11457L, `11458` = 11458L, `11459` = 11459L,
`11460` = 11460L, `11461` = 11461L, `11462` = 11462L, `11463` = 11463L,
`11464` = 11464L, `11465` = 11465L, `11466` = 11466L, `11467` = 11467L,
`11468` = 11468L, `11469` = 11469L, `11470` = 11470L, `11471` = 11471L,
`11472` = 11472L, `11473` = 11473L, `11474` = 11474L, `11658` = 11658L,
`11659` = 11659L, `11660` = 11660L, `11661` = 11661L, `11662` = 11662L,
`11663` = 11663L, `11664` = 11664L, `11665` = 11665L, `11666` = 11666L,
`11667` = 11667L, `11668` = 11668L, `11669` = 11669L, `11670` = 11670L,
`11671` = 11671L, `11672` = 11672L, `11673` = 11673L, `11674` = 11674L,
`11675` = 11675L, `11676` = 11676L, `11677` = 11677L, `11678` = 11678L,
`11679` = 11679L, `11680` = 11680L, `11681` = 11681L, `11682` = 11682L,
`11683` = 11683L, `11684` = 11684L, `11685` = 11685L, `11686` = 11686L,
`11687` = 11687L, `11688` = 11688L, `12389` = 12389L, `12390` = 12390L,
`12391` = 12391L, `12392` = 12392L, `12393` = 12393L, `12394` = 12394L,
`12395` = 12395L, `12396` = 12396L, `12397` = 12397L, `12398` = 12398L,
`12399` = 12399L, `12400` = 12400L, `12401` = 12401L, `12402` = 12402L,
`12403` = 12403L, `12404` = 12404L, `12405` = 12405L, `12406` = 12406L,
`12407` = 12407L, `12408` = 12408L, `12409` = 12409L, `12410` = 12410L,
`12411` = 12411L, `12412` = 12412L, `12413` = 12413L, `12414` = 12414L,
`12415` = 12415L, `12416` = 12416L, `12417` = 12417L, `12418` = 12418L,
`12419` = 12419L, `13270` = 13270L, `13271` = 13271L, `13272` = 13272L,
`13273` = 13273L, `13274` = 13274L, `13275` = 13275L, `13276` = 13276L,
`13277` = 13277L, `13278` = 13278L, `13279` = 13279L, `13280` = 13280L,
`13281` = 13281L, `13282` = 13282L, `13283` = 13283L, `13284` = 13284L,
`13285` = 13285L, `13286` = 13286L, `13287` = 13287L, `13288` = 13288L,
`13289` = 13289L, `13290` = 13290L, `13291` = 13291L, `13292` = 13292L,
`13293` = 13293L, `13294` = 13294L, `13295` = 13295L, `13296` = 13296L,
`13297` = 13297L, `13298` = 13298L, `13299` = 13299L, `13300` = 13300L,
`13362` = 13362L, `13363` = 13363L, `13364` = 13364L, `13365` = 13365L,
`13366` = 13366L, `13367` = 13367L, `13368` = 13368L, `13369` = 13369L,
`13370` = 13370L, `13371` = 13371L, `13372` = 13372L, `13373` = 13373L,
`13374` = 13374L, `13375` = 13375L, `13376` = 13376L, `13377` = 13377L,
`13378` = 13378L, `13379` = 13379L, `13380` = 13380L, `13381` = 13381L,
`13382` = 13382L, `13383` = 13383L, `13384` = 13384L, `13385` = 13385L,
`13386` = 13386L, `13387` = 13387L, `13388` = 13388L, `13389` = 13389L,
`13390` = 13390L, `13391` = 13391L, `13392` = 13392L, `14050` = 14050L
), class = "omit"), row.names = c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L,
21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 366L,
367L, 368L, 369L, 370L, 371L, 372L, 373L, 374L, 375L, 376L, 377L,
378L, 379L, 380L, 381L, 382L, 383L, 384L, 385L, 386L, 387L, 388L,
389L, 390L, 391L, 392L, 393L, 394L, 395L, 396L, 732L, 733L, 734L,
735L, 736L, 737L, 738L, 739L, 740L, 741L, 742L, 743L, 744L, 745L,
746L, 747L, 748L, 749L, 750L, 751L, 752L, 753L, 754L, 755L, 756L,
757L, 758L, 759L, 760L, 761L, 762L, 1097L, 1098L, 1099L, 1100L,
1101L, 1102L, 1103L, 1104L, 1105L, 1106L, 1107L, 1108L, 1109L,
1110L, 1111L, 1112L, 1113L, 1114L, 1115L, 1116L, 1117L, 1118L,
1119L, 1120L, 1121L, 1122L, 1123L, 1124L, 1125L, 1126L, 1127L,
1462L, 1463L, 1464L, 1465L, 1466L, 1467L, 1468L, 1469L, 1470L,
1471L, 1472L, 1473L, 1474L, 1475L, 1476L, 1477L, 1478L, 1479L,
1480L, 1481L, 1482L, 1483L, 1484L, 1485L, 1486L, 1487L, 1488L,
1489L, 1490L, 1491L, 1492L, 1827L, 1828L, 1829L, 1830L, 1831L,
1832L, 1833L, 1834L, 1835L, 1836L, 1837L, 1838L, 1839L, 1840L,
1841L, 1842L, 1843L, 1844L, 1845L, 1846L, 1847L, 1848L, 1849L,
1850L, 1851L, 1852L, 1853L, 1854L, 1855L, 1856L, 1857L, 2193L,
2194L, 2195L, 2196L, 2197L, 2198L, 2199L, 2200L, 2201L, 2202L,
2203L, 2204L, 2205L, 2206L, 2207L, 2208L, 2209L, 2210L, 2211L,
2212L, 2213L, 2214L, 2215L, 2216L, 2217L, 2218L, 2219L, 2220L,
2221L, 2222L, 2223L, 2558L, 2559L, 2560L, 2561L, 2562L, 2563L,
2564L, 2565L, 2566L, 2567L, 2568L, 2569L, 2570L, 2571L, 2572L,
2573L, 2574L, 2575L, 2576L, 2577L, 2578L, 2579L, 2580L, 2581L,
2582L, 2583L, 2584L, 2585L, 2586L, 2587L, 2588L, 2923L, 2924L,
2925L, 2926L, 2927L, 2928L, 2929L, 2930L, 2931L, 2932L, 2933L,
2934L, 2935L, 2936L, 2937L, 2938L, 2939L, 2940L, 2941L, 2942L,
2943L, 2944L, 2945L, 2946L, 2947L, 2948L, 2949L, 2950L, 2951L,
2952L, 2953L), class = "data.frame")
This data has four columns: Year, Month, Day, Rainfall
I will be applying this for data with years from 1979 to 2017.
I would like to:
count the number of days with rainfall above 1 mm and with duration:
1-3 days, 4-7 days, 8-14 days, and above 14 days.
I have the following script:
library(dplyr)
dat %>%
group_by(Year,Month) %>%
mutate(extreme = Rainfall > 1)
filter(extreme) %>%
add_count(grp) %>%
ungroup %>%
select(-extreme, -grp) %>%
group_split(n)
The problem with this script is that it gives all intervals like 1 day, 2 day, 3 day consecutive, etc. I want to get the counts based on the above intervals only.
My expected output is a data frame containing the Year and counts for the four intervals.
I would like to ask for help on how to do this in R correctly.
Sincerely,
You can try the following :
library(dplyr)
dat %>%
#Mark the rows where rainfall > 1
mutate(Rain_above_1 = Rainfall > 1) %>%
#Count for each year, consecutive number of days when rainfall was above 1
count(Year, grp = Rain_above_1 * data.table::rleid(Rain_above_1)) %>%
#Remove the groups where there was no rainfall
filter(grp != 0) %>%
#Divide the data into different buckets
count(Year, grp1 = cut(n, breaks = c(-Inf, 3, 7, 14, Inf))) %>%
#Get the data in wide format
tidyr::pivot_wider(names_from = grp1, values_from = n, values_fill = 0)
# Year `(-Inf,3]` `(3,7]` `(14, Inf]` `(7,14]`
# <int> <int> <int> <int> <int>
#1 1979 3 1 1 0
#2 1980 1 4 0 0
#3 1981 0 1 0 2
#4 1982 1 2 0 1
#5 1983 5 2 0 0
#6 1984 3 0 0 2
#7 1985 4 2 0 0
#8 1986 0 0 1 0
#9 1987 1 3 0 1

adding rows with values of '0' for missing years

I have a question that is somewhat similar to others that have been posted, but after looking thoroughly at several posts, I can't get the code to work. Any help would be much appreciated.
My data frame looks like, this:
'data.frame': 501 obs. of 5 variables:
$ Tattoo.MUM : Factor w/ 250 levels "1004","1007",..: 76 76 76 81 81 81 85 85 85 85 ...
$ OffspringMUMs: int 4 4 4 4 4 4 11 11 11 11 ...
$ YearBIRTH.CUB: int 1988 1990 1991 1988 1991 2007 1989 1991 1992 1993 ...
$ YearBIRTH.MUM: int 1991 1991 NA NA NA NA 1987 1987 1987 1987 ...
$ OFFSpYR : int 2 1 1 1 2 1 1 4 3 3 ...
A few lines here:
structure(list(Tattoo.MUM = structure(c(6L, 6L, 6L, 6L, 7L, 7L,
7L, 8L, 9L, 11L, 11L, 11L, 11L, 5L, 1L, 4L, 2L, 3L, 3L, 10L,
10L, 10L, 10L, 10L), .Label = c("10454", "1045A", "1045X", "12392",
"1601", "22", "27", "29", "41", "424X", "60"), class = "factor"),
OffspringMUMs = c(11L, 11L, 11L, 11L, 5L, 5L, 5L, 1L, 3L,
7L, 7L, 7L, 7L, 1L, 2L, 1L, 1L, 4L, 4L, 6L, 6L, 6L, 6L, 6L
), YearBIRTH.CUB = c(1989L, 1991L, 1992L, 1993L, 1990L, 1991L,
1993L, 1989L, 1988L, 1988L, 1989L, 1991L, 1994L, 2015L, 2012L,
2015L, 2005L, 2009L, 2010L, 1996L, 1998L, 2000L, 2001L, 2006L
), YearBIRTH.MUM = c(1987L, 1987L, 1987L, 1987L, NA, NA,
NA, NA, NA, 1987L, 1987L, 1987L, 1987L, NA, NA, NA, NA, 2005L,
2005L, 1994L, 1994L, 1994L, 1994L, 1994L), OFFSpYR = c(1L,
4L, 3L, 3L, 1L, 1L, 3L, 1L, 3L, 3L, 1L, 2L, 1L, 1L, 2L, 1L,
1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L)), .Names = c("Tattoo.MUM",
"OffspringMUMs", "YearBIRTH.CUB", "YearBIRTH.MUM", "OFFSpYR"), class = "data.frame", row.names = c(NA,
-24L))
I want to add new rows for all missing years (YearBIRTH.CUB) in Tattoo.MUM keeping the rest of the values the same and adding '0' to OFFSpYR.
Like so:
structure(list(Tattoo.MUM = structure(c(6L, 6L, 6L, 6L, 6L, 7L,
7L, 7L, 7L, 8L, 9L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 5L, 1L,
4L, 2L, 3L, 3L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L), .Label = c("10454", "1045A", "1045X", "12392", "1601",
"22", "27", "29", "41", "424X", "60"), class = "factor"), OffspringMUMs = c(11L,
11L, 11L, 11L, 11L, 5L, 5L, 5L, 5L, 1L, 3L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 1L, 2L, 1L, 1L, 4L, 4L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L), YearBIRTH.CUB = c(1989L, 1990L, 1991L, 1992L, 1993L,
1990L, 1991L, 1992L, 1993L, 1989L, 1988L, 1988L, 1989L, 1990L,
1991L, 1992L, 1993L, 1994L, 2015L, 2012L, 2015L, 2005L, 2009L,
2010L, 1996L, 1997L, 1998L, 1999L, 2000L, 2001L, 2002L, 2003L,
2004L, 2005L, 2006L), YearBIRTH.MUM = c(1987L, 1987L, 1987L,
1987L, 1987L, NA, NA, NA, NA, NA, NA, 1987L, 1987L, 1987L, 1987L,
1987L, 1987L, 1987L, NA, NA, NA, NA, 2005L, 2005L, 1994L, 1994L,
1994L, 1994L, 1994L, 1994L, 1994L, 1994L, 1994L, 1994L, 1994L
), OFFSpYR = c(1L, 0L, 4L, 3L, 3L, 1L, 1L, 0L, 3L, 1L, 3L, 3L,
1L, 0L, 2L, 0L, 0L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 0L, 1L, 0L,
1L, 2L, 0L, 0L, 0L, 0L, 1L)), .Names = c("Tattoo.MUM", "OffspringMUMs",
"YearBIRTH.CUB", "YearBIRTH.MUM", "OFFSpYR"), class = "data.frame", row.names = c(NA,
-35L))
I've tried:
library(tidyr)
library(dplyr)
df1 <- pedMUM %>% group_by(Tattoo.MUM, OffspringMUMs) %>% complete(YearBIRTH.CUB = full_seq(YearBIRTH.CUB,1)) %>% fill(OFFSpYR=0)
library(data.table)
df1 <- setDT(pedMUM)[CJ(Tattoo.MUM=Tattoo.MUM, OffspringMUMs=OffspringMUMs, YearBIRTH.MUM=YearBIRTH.MUM, YearBIRTH.CUB=seq(min(YearBIRTH.CUB), max(YearBIRTH.CUB)), unique=TRUE),
on=.(Tattoo.MUM, OffspringMUMs, YearBIRTH.CUB), roll=T]
I am obviously using tidyr, dplyr, and data.table wrongly because none have given me the results I want.
I've had a look at the following posts:
Add rows with missing years by group
Adding rows with values of "0" to a dataframe with missing data
Find missing month after grouping with dplyr
And even tried loops:
R code - clever loop to add rows
but I get confused when I try to determine the year sequence for each Tattoo.MUM within the loop.
Would anyone be able to point me in the right direction?
I haven't used complete() before, but the following seems to work. nesting() allows you to keep two variables together, =full_seq() allows you to expand the values of a variable, fill=list() allows you to fill in blanks.
pedMUM <- structure(list(Tattoo.MUM = structure(c(6L, 6L, 6L, 6L, 7L, 7L,
7L, 8L, 9L, 11L, 11L, 11L, 11L, 5L, 1L, 4L, 2L, 3L, 3L, 10L,
10L, 10L, 10L, 10L), .Label = c("10454", "1045A", "1045X", "12392",
"1601", "22", "27", "29", "41", "424X", "60"), class = "factor"),
OffspringMUMs = c(11L, 11L, 11L, 11L, 5L, 5L, 5L, 1L, 3L,
7L, 7L, 7L, 7L, 1L, 2L, 1L, 1L, 4L, 4L, 6L, 6L, 6L, 6L, 6L
), YearBIRTH.CUB = c(1989L, 1991L, 1992L, 1993L, 1990L, 1991L,
1993L, 1989L, 1988L, 1988L, 1989L, 1991L, 1994L, 2015L, 2012L,
2015L, 2005L, 2009L, 2010L, 1996L, 1998L, 2000L, 2001L, 2006L
), YearBIRTH.MUM = c(1987L, 1987L, 1987L, 1987L, NA, NA,
NA, NA, NA, 1987L, 1987L, 1987L, 1987L, NA, NA, NA, NA, 2005L,
2005L, 1994L, 1994L, 1994L, 1994L, 1994L), OFFSpYR = c(1L,
4L, 3L, 3L, 1L, 1L, 3L, 1L, 3L, 3L, 1L, 2L, 1L, 1L, 2L, 1L,
1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L)), .Names = c("Tattoo.MUM",
"OffspringMUMs", "YearBIRTH.CUB", "YearBIRTH.MUM", "OFFSpYR"), class = "data.frame", row.names = c(NA,
-24L))
library(tidyr)
library(dplyr)
df1 <- pedMUM %>%
group_by(Tattoo.MUM) %>% # find min and max year for each mum
mutate(
minyear=min(YearBIRTH.CUB, na.rm=TRUE),
maxyear=max(YearBIRTH.CUB, na.rm=TRUE)
) %>%
complete( # complete table
nesting(Tattoo.MUM, minyear, maxyear, OffspringMUMs, YearBIRTH.MUM),
YearBIRTH.CUB=full_seq(YearBIRTH.CUB, 1),
fill=list(OFFSpYR=0)
) %>%
filter(YearBIRTH.CUB>=minyear & YearBIRTH.CUB<=maxyear) %>% # remove unwanted years
select(names(pedMUM)) # return original column order

Aesthetic Mapping Temperature Data in ggplot2

I am very new to r and I think I am just missing something very simple. At least I hope. I am trying to plot temperature change by site using aes in ggplot2. However, when I run the code no data is pulling up in the chart. This is the code I am using.
My data is called diss, x=order, and y=Tempdiff.
tempplot<-ggplot(data = diss,mapping =
aes(order,Tempdiff))+geom_segment(data = diss , mapping =
aes(x=order,y=Tempdiff, xend=order, yend=Tempdiff, color="red"))
plot(tempplot)
There are no errors when I run the code and the graph pulls up, it is just empty. Is there something in my code that I am unaware of that is removing the data or have I just missed a critical code in actually adding the data?
If I have left something out that is necessary for answering this question I am happy to add more. Thank you in advance!
Addition:
> dput(diss)
structure(list(Paper = c(1L, 2L, 3L, 4L, 5L, 6L, 6L, 6L, 6L,
7L, 7L, 7L, 7L, 8L, 8L, 8L, 9L, 9L, 10L, 10L, 10L), Specimen = structure(c(4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 2L,
2L, 4L, 4L, 4L), .Label = c("", "Ochotona_collaris", "Ochotona_curzoniae",
"Ochotona_principes"), class = "factor"), Site = structure(c(15L,
16L, 6L, 10L, 3L, 7L, 14L, 18L, 8L, 17L, 17L, 17L, 17L, 11L,
2L, 13L, 9L, 9L, 4L, 12L, 5L), .Label = c("", "Arapaho_Basin",
"Bodi ", "Bouldera_Pass", "Calf_Robe", "Columbia_River_Gorge",
"Craters_of_Moon", "Grand_Teton ", "Kluane_Lake", "Lava_Butte Flow_OR",
"Loveland_Pass", "Mad_Wolf", "Niwat_Ridge", "Rocky_Mt", "South_Couliee",
"Sugar_Hill_Modoc", "Tibet_China", "Yellowstone"), class = "factor"),
elevation = c(2565L, 2203L, 316L, 1200L, 2600L, 1700L, 3462L,
2424L, 2815L, 3846L, 3846L, 3846L, 3846L, 3654L, 3878L, 3744L,
2000L, 2000L, 2133L, 2026L, 2004L), yr1 = c(2012L, 1894L,
2011L, 2011L, 1972L, 2010L, 2010L, 2010L, 2010L, 2005L, 2006L,
2007L, 2008L, 1981L, 1981L, 1981L, 1999L, 2003L, 2007L, 2008L,
2008L), yr1pop = c(9L, 4L, 9L, 9L, 48L, 55L, 39L, 71L, 54L,
182L, 219L, 223L, 231L, 6L, 4L, 10L, 5L, 3L, 2L, 3L, 7L),
yr1temp = c(25, 30, 32.1, 20, 20, 27.1, 15.4, 21.2, 18, 11,
13, 14, 12.5, 24.7, 26.5, 28.4, 12, 17, 20, 20, 20), yr2 = c(2013L,
1910L, 2012L, 2012L, 2006L, 2011L, 2011L, 2011L, 2011L, 2006L,
2007L, 2008L, 2009L, 1983L, 1983L, 1983L, 2002L, 2006L, 2009L,
2009L, 2009L), yr2temp = c(26, 31.4, 33.2, 17.1, 24.76, 30.7,
18.2, 22, 20, 13, 14, 12.5, 13, 26.1, 27.3, 28.3, 15, 16,
20, 20, 10), yr2pop = c(9L, 2L, 9L, 7L, 4L, 40L, 20L, 37L,
37L, 219L, 223L, 231L, 214L, 5L, 4L, 8L, 4L, 2L, 2L, 7L,
2L), delta_temp = c(1, 1.4, 1.1, 2.9, 4.76, 3.6, 2.8, 0.8,
2, 2, 1, 1.5, 0.5, 1.4, 0.8, 0.1, 3, 1, 0, 0, 10), avg_temp = c(25.5,
30.7, 32.65, 18.55, 44.76, 28.9, 16.8, 21.6, 19, 12, 13.5,
13.25, 12.75, 25.4, 26.9, 28.35, 13, 16.5, 20, 20, 15), delta_pop = c(0L,
2L, 0L, 2L, 44L, 15L, 19L, 34L, 17L, 37L, 4L, 8L, 17L, 1L,
0L, 2L, 1L, 1L, 0L, 4L, 5L), avg_pop = c(9, 3, 9, 8, 26,
47.5, 29.5, 54, 45.5, 200.5, 221, 227, 222.5, 5.5, 4, 9,
4.5, 3.5, 2, 4, 4.5), SE_temp = c(0.499, 0.7, 0.6, 1.45,
2.381, 1.8, 1.4, 0.4, 1, 1.414, 0.707, 1.06, 0.353, 0.989,
0.565, 0.07, 2.121, 0.707, 0, 0, 7.07), CI_low_temp = c(25.025,
30.035, 32.03, 17.172, 20.119, 27.19, 15.47, 21.22, 18.05,
11.05, 13.025, 12.537, 12.512, 24.735, 26.52, 28.302, 12.075,
16.025, 20, 20, 10.25), CI_upper_temp = c(25.975, 31.365,
33.17, 19.927, 24.641, 30.61, 18.13, 21.98, 19.95, 12.95,
13.974, 13.962, 12.987, 26.065, 27.78, 28.397, 14.925, 16.975,
20, 20, 19.75), SE_pop = c(0, 1.414, 0, 1.414, 31.112, 10.606,
13.425, 24.041, 12.02, 26.162, 2.828, 5.656, 12.02, 0.707,
0, 1.414, 0.707, 0.707, 0, 2.828, 3.535), CI_pop_low = c(9,
2.05, 9, 7.05, 5.1, 40.375, 20.475, 37.85, 37.425, 182.925,
219.1, 223.2, 214.425, 5.025, 4, 8.05, 4.025, 2.025, 2, 3.1,
2.125), CI_pop_upper = c(9, 3.95, 9, 8.95, 46.9, 54.625,
38.525, 70.15, 53.575, 218.075, 222.9, 230.8, 230.575, 5.975,
4, 9.95, 4.975, 2.975, 2, 6.9, 6.875), Tempdiff = c(1, 1.4,
1.1, -2.9, 4.76, 3.6, 2.8, 0.800000000000001, 2, 2, 1, -1.5,
0.5, 1.4, 0.800000000000001, -0.0999999999999979, 3, -1,
0, 0, -10), popdiff = c(0L, -2L, 0L, -2L, -44L, -15L, -19L,
-34L, -17L, 37L, 4L, 8L, -17L, -1L, 0L, -2L, -1L, -1L, 0L,
4L, -5L), order = 1:21), .Names = c("Paper", "Specimen",
"Site", "elevation", "yr1", "yr1pop", "yr1temp", "yr2", "yr2temp",
"yr2pop", "delta_temp", "avg_temp", "delta_pop", "avg_pop", "SE_temp",
"CI_low_temp", "CI_upper_temp", "SE_pop", "CI_pop_low", "CI_pop_upper",
"Tempdiff", "popdiff", "order"), row.names = c(NA, 21L), class = "data.frame")

Multiple NA's for the last variables of linear regression model in R

I am trying to run a linear regression model where I have dummy variables in my data to indicate if a certain predictor variable is not present. I have a total of 15 predictor variables.
No matter the order of my predictor variables, the last five variables always result in NA.
This problem is almost exactly the same as the one asked here: linear regression "NA" estimate just for last coefficient
I tried adding -1 or +0 to the code
lm(H~id11+id21+id22+id23+id24+id31+id41+id42+id43+id52+id71+id81+id82+id90+id95, data=macro.shed)
And that resulted in only one less value being NA. So now I have 4, instead of 5, predictor variables being NA.
I am reading in my data from csv documents.
This is my code:
watershed = read.csv("nlcd_2000_watershed.csv")
macro_2000 = read.csv("wapp_macro_2000.csv")
temp1 = matrix(watershed$Area,ncol=15,byrow=T)
nlcd_watershed = data.frame(cbind(unique(watershed$WaterID),temp1)) names(nlcd_watershed)=c("WaterID",paste("id",unique(watershed$Value),sep=""))
macro.shed = merge(macro_2000,nlcd_watershed,by.x="WaterID",by.y="WaterID")
data.frame(unique(watershed$Value),unique(watershed$NLCD))
This is my data for macro.shed:
dput(macro.shed)
structure(list(WaterID = c(1L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 10L, 10L, 10L, 10L, 11L), ID = structure(c(1L, 16L,
2L, 9L, 10L, 11L, 12L, 13L, 15L, 8L, 3L, 4L, 5L, 6L, 7L, 14L), .Label = c("L1",
"L10", "L11", "L12", "L13", "L14", "L15", "L16", "L2", "L3",
"L4", "L5", "L6", "L7", "L8", "L9"), class = "factor"), Date = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "8/20/2001", class = "factor"),
UTMX = c(607308L, 607112L, 598526L, 592235L, 603094L, 597749L,
605523L, 608668L, 600517L, 601806L, 597548L, 593815L, 591453L,
607187L, 606851L, 589528L), UTMY = c(4639040L, 4643780L,
4622470L, 4608350L, 4629780L, 4623340L, 4634330L, 4636950L,
4628160L, 4630380L, 4621720L, 4611960L, 4607960L, 4636480L,
4636020L, 4605120L), Watershed = structure(c(1L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 10L, 10L, 10L, 10L, 11L), .Label = c("Cold Spring Creek",
"Drake Brook", "Dutchess County Airport", "East Branch Wappinger",
"Great Spring Creek", "Grist Mill Creek", "Hunns Lake Creek",
"Little Wappinger", "Upton Lake Creek", "Wappinger Creek",
"Wappinger Falls"), class = "factor"), richness = c(37L,
20L, 32L, 14L, 23L, 20L, 23L, 28L, 25L, 32L, 31L, 30L, 23L,
33L, 19L, 19L), H = c(0.9, 1, 0.9, 0.8, 1, 0.8, 0.7, 1, 1,
1, 1, 1, 1, 1, 0.9, 1), EPT = c(18L, 14L, 13L, 3L, 15L, 12L,
15L, 19L, 15L, 21L, 17L, 16L, 13L, 20L, 13L, 12L), DOM = c(62.1,
61.5, 64.1, 73.7, 53.4, 74, 80.3, 59.2, 55.6, 56.8, 57.4,
59.4, 54.2, 59.8, 66, 52.2), PMA = c(58.1, 51, 59.3, 39.9,
58.4, 45.2, 54.5, 75.3, 56.2, 64.3, 66, 53.7, 55.6, 60.4,
52.3, 42.4), FBI = c(3.8, 3.4, 4, 3.9, 3.6, 4.2, 5.2, 3.8,
3.5, 4.1, 3.7, 3.7, 4, 3.8, 3.5, 3.6), BAP = c(8.3, 6.8,
7.8, 3.9, 7.4, 6, 6.8, 8.4, 7.5, 8.2, 8.3, 7.8, 6.8, 8.3,
6.6, 6), Insects.sample = c(7123L, 516L, 2061L, 1341L, 921L,
961L, 580L, 1567L, 1180L, 4226L, 4133L, 1400L, 2325L, 2596L,
687L, 609L), id11 = c(216900L, 216900L, 4923900L, 131400L,
1806300L, 0L, 41945400L, 250200L, 200700L, 1908000L, 4500L,
4500L, 4500L, 4500L, 4500L, 25427700L), id21 = c(83700L,
83700L, 1163700L, 1290600L, 0L, 0L, 11841300L, 2824200L,
110700L, 136800L, 9000L, 9000L, 9000L, 9000L, 9000L, 9145800L
), id22 = c(111600L, 111600L, 596700L, 7245000L, 63900L,
11700L, 7293600L, 5060700L, 323100L, 179100L, 55800L, 55800L,
55800L, 55800L, 55800L, 3876300L), id23 = c(413100L, 413100L,
611100L, 1817100L, 0L, 0L, 11107800L, 208800L, 1713600L,
33300L, 204300L, 204300L, 204300L, 204300L, 204300L, 6268500L
), id24 = c(239400L, 239400L, 4547700L, 193500L, 26100L,
10800L, 48636900L, 88200L, 1139400L, 41400L, 16200L, 16200L,
16200L, 16200L, 16200L, 14818500L), id31 = c(63900L, 63900L,
14319000L, 526500L, 139500L, 0L, 58785300L, 398700L, 1723500L,
73800L, 0L, 0L, 0L, 0L, 0L, 31161600L), id41 = c(384300L,
384300L, 4142700L, 0L, 86400L, 0L, 9641700L, 357300L, 3166200L,
392400L, 0L, 0L, 0L, 0L, 0L, 963900L), id42 = c(729000L,
729000L, 508500L, 209700L, 13500L, 0L, 4072500L, 682200L,
2137500L, 31500L, 10800L, 10800L, 10800L, 10800L, 10800L,
3993300L), id43 = c(1224000L, 1224000L, 1266300L, 1532700L,
0L, 418500L, 6607800L, 695700L, 1356300L, 10800L, 78300L,
78300L, 78300L, 78300L, 78300L, 5419800L), id52 = c(16200L,
16200L, 57600L, 600300L, 17100L, 0L, 1730700L, 958500L, 120600L,
101700L, 20700L, 20700L, 20700L, 20700L, 20700L, 0L), id71 = c(22500L,
22500L, 780300L, 208800L, 5400L, 0L, 1139400L, 533700L, 7085700L,
582300L, 0L, 0L, 0L, 0L, 0L, 198000L), id81 = c(221400L,
221400L, 3398400L, 0L, 1649700L, 0L, 287100L, 155700L, 6300900L,
1511100L, 13500L, 13500L, 13500L, 13500L, 13500L, 264600L
), id82 = c(665100L, 665100L, 1513800L, 41400L, 447300L,
0L, 3083400L, 132300L, 616500L, 53100L, 2943900L, 2943900L,
2943900L, 2943900L, 2943900L, 931500L), id90 = c(2142000L,
2142000L, 826200L, 215100L, 0L, 17705700L, 630000L, 1156500L,
590400L, 15300L, 4598100L, 4598100L, 4598100L, 4598100L,
4598100L, 311400L), id95 = c(4628700L, 4628700L, 113400L,
4897800L, 0L, 10526400L, 358200L, 2281500L, 1431900L, 33300L,
4982400L, 4982400L, 4982400L, 4982400L, 4982400L, 0L)), .Names = c("WaterID",
"ID", "Date", "UTMX", "UTMY", "Watershed", "richness", "H", "EPT",
"DOM", "PMA", "FBI", "BAP", "Insects.sample", "id11", "id21",
"id22", "id23", "id24", "id31", "id41", "id42", "id43", "id52",
"id71", "id81", "id82", "id90", "id95"), row.names = c(NA, -16L
), class = "data.frame")
How do I make it so that the last variables are not resulting in NAs?
You're trying to fit 14 predictors (15 if you include an intercept) with only 16 observations.
That's not enough data to calculate that many parameters, which is why you're only getting estimates for some of them.
You'll need to use some sort of regularisation or model selection, but even then your estimates will be sensitive to the method you choose.
To add on to the answer provided by #Pete, many of your variables have high collinearity. To visualize this easily,
library(corrplot)
corPlot <- cor( macro.shed[, c(15:29)])
corPlot <- cor(x)
corrplot(corPlot, method = "number")

Resources