I have a long data set measuring height of trees once a week for 8 months. Also recorded are pot number ('pot'), the date of measuring ('date'), weeks since the start of the experiment ('no.week'), germination date ('germination'), weeks since germination ('after.germ').
I'm wanting to average tree height over 3 weeks starting at the week of germination.
For example, the experiment started on 3/25. Pot 3 germinated on 4/15 (no. week= 2). Pot 4 germinated on 4/29 (no. week= 4). I want to average the height of pot 3 starting on 4/15 and pot 4 starting on 4/29, and continue to average every 3 weeks for the duration of the experiment.
The key is starting the average at different points for each pot.
Any advice and tips would be great!
Subset:
pot table germination week no.week after.germ date height stem
61 3 2 4/15/2022 w1 1 NA 3/25/2022 NA NA
62 3 2 4/15/2022 w2 2 NA 4/15/2022 NA NA
63 3 2 4/15/2022 w3 3 1 4/22/2022 4.6 NA
64 3 2 4/15/2022 w4 4 2 4/29/2022 18.5 NA
65 3 2 4/15/2022 w5 5 3 5/6/2022 18.1 1
66 3 2 4/15/2022 w6 6 4 5/13/2022 18.1 1
67 3 2 4/15/2022 w7 7 5 5/20/2022 17.8 1
68 3 2 4/15/2022 w8 8 6 5/26/2022 19.4 1
69 3 2 4/15/2022 w9 9 7 6/3/2022 18.8 1
70 3 2 4/15/2022 w10 10 8 6/10/2022 19.3 1
71 3 2 4/15/2022 w11 11 9 6/17/2022 18.3 1
72 3 2 4/15/2022 w12 12 10 6/24/2022 18.6 1
73 3 2 4/15/2022 w13 13 11 7/1/2022 19.2 1
74 3 2 4/15/2022 w14 14 12 7/8/2022 19.2 1
75 3 2 4/15/2022 w15 15 13 7/15/2022 18.9 1
76 3 2 4/15/2022 w16 16 14 7/22/2022 15.3 1
77 3 2 4/15/2022 w17 17 15 7/29/2022 19.1 1
78 3 2 4/15/2022 w18 18 16 8/5/2022 19.0 1
79 3 2 4/15/2022 w19 19 17 8/12/2022 19.0 1
80 3 2 4/15/2022 w20 20 18 8/19/2022 19.8 1
81 3 2 4/15/2022 w21 21 19 8/26/2022 18.2 1
82 3 2 4/15/2022 w22 22 20 9/2/2022 19.2 1
83 3 2 4/15/2022 w24 24 21 9/16/2022 18.1 1
84 3 2 4/15/2022 w23 23 22 9/22/2022 19.2 1
85 3 2 4/15/2022 w25 25 23 9/30/2022 15.4 1
86 3 2 4/15/2022 w26 26 24 10/7/2022 18.4 1
87 3 2 4/15/2022 w27 27 25 10/14/2022 19.2 1
88 3 2 4/15/2022 w28 28 26 10/21/2022 19.0 1
89 3 2 4/15/2022 w29 29 27 10/29/2022 18.7 1
90 3 2 4/15/2022 w30 30 28 11/4/2022 19.3 1
91 6 4 4/29/2022 w1 1 NA 3/25/2022 NA NA
92 6 4 4/29/2022 w2 2 NA 4/15/2022 NA NA
93 6 4 4/29/2022 w3 3 NA 4/22/2022 NA NA
94 6 4 4/29/2022 w4 4 1 4/29/2022 16.7 NA
95 6 4 4/29/2022 w5 5 2 5/6/2022 17.5 1
96 6 4 4/29/2022 w6 6 3 5/13/2022 18.8 NA
97 6 4 4/29/2022 w7 7 4 5/20/2022 18.0 NA
98 6 4 4/29/2022 w8 8 5 5/26/2022 17.2 NA
99 6 4 4/29/2022 w9 9 6 6/3/2022 17.7 NA
100 6 4 4/29/2022 w10 10 7 6/10/2022 17.9 NA
101 6 4 4/29/2022 w11 11 8 6/17/2022 18.7 NA
102 6 4 4/29/2022 w12 12 9 6/24/2022 18.1 NA
103 6 4 4/29/2022 w13 13 10 7/1/2022 17.3 NA
104 6 4 4/29/2022 w14 14 11 7/8/2022 13.8 NA
105 6 4 4/29/2022 w15 15 12 7/15/2022 18.4 1
106 6 4 4/29/2022 w16 16 13 7/22/2022 19.0 1
107 6 4 4/29/2022 w17 17 14 7/29/2022 18.8 1
108 6 4 4/29/2022 w18 18 15 8/5/2022 NA 1
109 6 4 4/29/2022 w19 19 16 8/12/2022 19.0 1
110 6 4 4/29/2022 w20 20 17 8/19/2022 19.3 1
111 6 4 4/29/2022 w21 21 18 8/26/2022 18.6 1
112 6 4 4/29/2022 w22 22 19 9/2/2022 18.2 1
113 6 4 4/29/2022 w24 24 20 9/16/2022 18.0 1
114 6 4 4/29/2022 w23 23 21 9/22/2022 18.8 1
115 6 4 4/29/2022 w25 25 22 9/30/2022 19.7 1
116 6 4 4/29/2022 w26 26 23 10/7/2022 17.4 1
117 6 4 4/29/2022 w27 27 24 10/14/2022 18.8 1
118 6 4 4/29/2022 w28 28 25 10/21/2022 19.9 1
119 6 4 4/29/2022 w29 29 26 10/29/2022 17.9 1
120 6 4 4/29/2022 w30 30 27 11/4/2022 19.5 1
211 10 2 4/29/2022 w1 1 NA 3/25/2022 NA NA
212 10 2 4/29/2022 w2 2 NA 4/15/2022 NA NA
213 10 2 4/29/2022 w3 3 NA 4/22/2022 NA NA
214 10 2 4/29/2022 w4 4 NA 4/29/2022 NA NA
215 10 2 4/29/2022 w5 5 1 5/6/2022 9.5 1
216 10 2 4/29/2022 w6 6 2 5/13/2022 15.4 NA
217 10 2 4/29/2022 w7 7 3 5/20/2022 14.3 NA
218 10 2 4/29/2022 w8 8 4 5/26/2022 15.8 NA
219 10 2 4/29/2022 w9 9 5 6/3/2022 16.1 NA
220 10 2 4/29/2022 w10 10 6 6/10/2022 16.1 NA
221 10 2 4/29/2022 w11 11 7 6/17/2022 15.9 NA
222 10 2 4/29/2022 w12 12 8 6/24/2022 16.3 NA
223 10 2 4/29/2022 w13 13 9 7/1/2022 16.2 NA
224 10 2 4/29/2022 w14 14 10 7/8/2022 16.4 NA
225 10 2 4/29/2022 w15 15 11 7/15/2022 15.7 1
226 10 2 4/29/2022 w16 16 12 7/22/2022 15.5 1
227 10 2 4/29/2022 w17 17 13 7/29/2022 15.7 1
228 10 2 4/29/2022 w18 18 14 8/5/2022 15.5 1
229 10 2 4/29/2022 w19 19 15 8/12/2022 16.0 1
230 10 2 4/29/2022 w20 20 16 8/19/2022 15.9 1
231 10 2 4/29/2022 w21 21 17 8/26/2022 15.7 1
232 10 2 4/29/2022 w22 22 18 9/2/2022 15.5 1
233 10 2 4/29/2022 w24 24 19 9/16/2022 15.1 1
234 10 2 4/29/2022 w23 23 20 9/22/2022 15.8 1
235 10 2 4/29/2022 w25 25 21 9/30/2022 15.8 1
236 10 2 4/29/2022 w26 26 22 10/7/2022 15.1 1
237 10 2 4/29/2022 w27 27 23 10/14/2022 15.9 1
238 10 2 4/29/2022 w28 28 24 10/21/2022 16.5 1
239 10 2 4/29/2022 w29 29 25 10/29/2022 15.7 1
240 10 2 4/29/2022 w30 30 26 11/4/2022 16.2 1
271 14 2 4/15/2022 w1 1 NA 3/25/2022 NA NA
272 14 2 4/15/2022 w2 2 NA 4/15/2022 NA NA
273 14 2 4/15/2022 w3 3 1 4/22/2022 5.8 NA
274 14 2 4/15/2022 w4 4 2 4/29/2022 19.7 NA
275 14 2 4/15/2022 w5 5 3 5/6/2022 20.1 1
276 14 2 4/15/2022 w6 6 4 5/13/2022 19.4 1
277 14 2 4/15/2022 w7 7 5 5/20/2022 20.0 1
278 14 2 4/15/2022 w8 8 6 5/26/2022 19.6 1
279 14 2 4/15/2022 w9 9 7 6/3/2022 19.6 1
280 14 2 4/15/2022 w10 10 8 6/10/2022 20.2 1
281 14 2 4/15/2022 w11 11 9 6/17/2022 21.1 1
282 14 2 4/15/2022 w12 12 10 6/24/2022 21.3 1
283 14 2 4/15/2022 w13 13 11 7/1/2022 19.4 NA
284 14 2 4/15/2022 w14 14 12 7/8/2022 20.3 NA
285 14 2 4/15/2022 w15 15 13 7/15/2022 19.5 1
286 14 2 4/15/2022 w16 16 14 7/22/2022 19.3 1
287 14 2 4/15/2022 w17 17 15 7/29/2022 22.4 1
288 14 2 4/15/2022 w18 18 16 8/5/2022 20.0 1
289 14 2 4/15/2022 w19 19 17 8/12/2022 20.0 1
290 14 2 4/15/2022 w20 20 18 8/19/2022 20.4 1
291 14 2 4/15/2022 w21 21 19 8/26/2022 19.6 1
I calculated the rolling average, but a rolling average isn't quite what I am looking for since I need an average over a distinct period and starting at different points.
library(zoo)
cg22_avg<-cg22_long %>%
dplyr:::group_by(pot) %>%
dplyr:::mutate('3wkavg' = rollmean(height, 3, align="right", na.pad=TRUE ))
Perhaps this use of the slider package will help:
quux %>%
mutate(across(c(germination, date), ~ as.Date(., format = "%m/%d/%Y"))) %>%
dplyr::filter(date >= germination) %>%
group_by(pot) %>%
mutate(avg3 = slider::slide_period_dbl(.x=height, .i=date, .period="week", .f=mean, .before=3)) %>%
ungroup()
# # A tibble: 103 × 10
# pot table germination week no.week after.germ date height stem avg3
# <int> <int> <date> <chr> <int> <int> <date> <dbl> <int> <dbl>
# 1 3 2 2022-04-15 w2 2 NA 2022-04-15 NA NA NA
# 2 3 2 2022-04-15 w3 3 1 2022-04-22 4.6 NA NA
# 3 3 2 2022-04-15 w4 4 2 2022-04-29 18.5 NA NA
# 4 3 2 2022-04-15 w5 5 3 2022-05-06 18.1 1 NA
# 5 3 2 2022-04-15 w6 6 4 2022-05-13 18.1 1 14.8
# 6 3 2 2022-04-15 w7 7 5 2022-05-20 17.8 1 18.1
# 7 3 2 2022-04-15 w8 8 6 2022-05-26 19.4 1 18.4
# 8 3 2 2022-04-15 w9 9 7 2022-06-03 18.8 1 18.5
# 9 3 2 2022-04-15 w10 10 8 2022-06-10 19.3 1 18.8
# 10 3 2 2022-04-15 w11 11 9 2022-06-17 18.3 1 19.0
# # … with 93 more rows
# # ℹ Use `print(n = ...)` to see more rows
I chose .period="week", .before=3, which should include all observations within exactly 3 weeks. An alternative would be .period="day", .before=20 if you want 3 weeks but not including 3 weeks ago from today. There should be plenty of room to play here.
Data
quux <- structure(list(pot = c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L), table = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), germination = c("4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/29/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022", "4/15/2022"), week = c("w1", "w2", "w3", "w4", "w5", "w6", "w7", "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15", "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w24", "w23", "w25", "w26", "w27", "w28", "w29", "w30", "w1", "w2", "w3", "w4", "w5", "w6", "w7", "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15", "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w24", "w23", "w25", "w26", "w27", "w28", "w29", "w30", "w1", "w2", "w3", "w4", "w5", "w6", "w7", "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15", "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w24", "w23", "w25", "w26", "w27", "w28", "w29", "w30", "w1", "w2", "w3", "w4", "w5", "w6", "w7", "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15", "w16", "w17", "w18", "w19", "w20", "w21"), no.week = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 24L, 23L, 25L, 26L, 27L, 28L, 29L, 30L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 24L, 23L, 25L, 26L, 27L, 28L, 29L, 30L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 24L, 23L, 25L, 26L, 27L, 28L, 29L, 30L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L), after.germ = c(NA, NA, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, NA, NA, NA, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, NA, NA, NA, NA, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, NA, NA, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L), date = c("3/25/2022", "4/15/2022", "4/22/2022", "4/29/2022", "5/6/2022", "5/13/2022", "5/20/2022", "5/26/2022", "6/3/2022", "6/10/2022", "6/17/2022", "6/24/2022", "7/1/2022", "7/8/2022", "7/15/2022", "7/22/2022", "7/29/2022", "8/5/2022", "8/12/2022", "8/19/2022", "8/26/2022", "9/2/2022", "9/16/2022", "9/22/2022", "9/30/2022", "10/7/2022", "10/14/2022", "10/21/2022", "10/29/2022", "11/4/2022", "3/25/2022", "4/15/2022", "4/22/2022", "4/29/2022", "5/6/2022", "5/13/2022", "5/20/2022", "5/26/2022", "6/3/2022", "6/10/2022", "6/17/2022", "6/24/2022", "7/1/2022", "7/8/2022", "7/15/2022", "7/22/2022", "7/29/2022", "8/5/2022", "8/12/2022", "8/19/2022", "8/26/2022", "9/2/2022", "9/16/2022", "9/22/2022", "9/30/2022", "10/7/2022", "10/14/2022", "10/21/2022", "10/29/2022", "11/4/2022", "3/25/2022", "4/15/2022", "4/22/2022", "4/29/2022", "5/6/2022", "5/13/2022", "5/20/2022", "5/26/2022", "6/3/2022", "6/10/2022", "6/17/2022", "6/24/2022", "7/1/2022", "7/8/2022", "7/15/2022", "7/22/2022", "7/29/2022", "8/5/2022", "8/12/2022", "8/19/2022", "8/26/2022", "9/2/2022", "9/16/2022", "9/22/2022", "9/30/2022", "10/7/2022", "10/14/2022", "10/21/2022", "10/29/2022", "11/4/2022", "3/25/2022", "4/15/2022", "4/22/2022", "4/29/2022", "5/6/2022", "5/13/2022", "5/20/2022", "5/26/2022", "6/3/2022", "6/10/2022", "6/17/2022", "6/24/2022", "7/1/2022", "7/8/2022", "7/15/2022", "7/22/2022", "7/29/2022", "8/5/2022", "8/12/2022", "8/19/2022", "8/26/2022"), height = c(NA, NA, 4.6, 18.5, 18.1, 18.1, 17.8, 19.4, 18.8, 19.3, 18.3, 18.6, 19.2, 19.2, 18.9, 15.3, 19.1, 19, 19, 19.8, 18.2, 19.2, 18.1, 19.2, 15.4, 18.4, 19.2, 19, 18.7, 19.3, NA, NA, NA, 16.7, 17.5, 18.8, 18, 17.2, 17.7, 17.9, 18.7, 18.1, 17.3, 13.8, 18.4, 19, 18.8, NA, 19, 19.3, 18.6, 18.2, 18, 18.8, 19.7, 17.4, 18.8, 19.9, 17.9, 19.5, NA, NA, NA, NA, 9.5, 15.4, 14.3, 15.8, 16.1, 16.1, 15.9, 16.3, 16.2, 16.4, 15.7, 15.5, 15.7, 15.5, 16, 15.9, 15.7, 15.5, 15.1, 15.8, 15.8, 15.1, 15.9, 16.5, 15.7, 16.2, NA, NA, 5.8, 19.7, 20.1, 19.4, 20, 19.6, 19.6, 20.2, 21.1, 21.3, 19.4, 20.3, 19.5, 19.3, 22.4, 20, 20, 20.4, 19.6), stem = c(NA, NA, NA, NA, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, NA, NA, NA, NA, 1L, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, NA, NA, NA, NA, 1L, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, NA, NA, NA, NA, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, NA, NA, 1L, 1L, 1L, 1L, 1L, 1L, 1L)), class = "data.frame", row.names = c("61", "62", "63", "64", "65", "66", "67", "68", "69", "70", "71", "72", "73", "74", "75", "76", "77", "78", "79", "80", "81", "82", "83", "84", "85", "86", "87", "88", "89", "90", "91", "92", "93", "94", "95", "96", "97", "98", "99", "100", "101", "102", "103", "104", "105", "106", "107", "108", "109", "110", "111", "112", "113", "114", "115", "116", "117", "118", "119", "120", "211", "212", "213", "214", "215", "216", "217", "218", "219", "220", "221", "222", "223", "224", "225", "226", "227", "228", "229", "230", "231", "232", "233", "234", "235", "236", "237", "238", "239", "240", "271", "272", "273", "274", "275", "276", "277", "278", "279", "280", "281", "282", "283", "284", "285", "286", "287", "288", "289", "290", "291"))
This question already has answers here:
Extract the maximum value within each group in a dataframe [duplicate]
(3 answers)
Closed 6 years ago.
I have a time series dataset DF where the first column is the timestep and the second column is the cellNo.. How can I drop all rows except the max(DF$cellno.) of each timestep?
> head(DF, n=100)
timestep cellNo.
1 1 1
2 1 2
3 1 3
4 1 4
5 1 5
6 1 6
7 1 7
8 1 8
9 1 9
10 1 10
11 1 11
12 1 12
13 1 13
14 1 14
15 1 15
16 1 16
17 1 17
18 1 18
19 1 19
20 1 20
21 1 21
22 1 22
23 1 23
24 1 24
25 1 25
26 1 26
27 1 27
28 1 28
29 1 29
30 1 30
31 1 31
32 1 32
33 2 1
34 2 2
35 2 3
36 2 4
37 2 5
38 2 6
39 2 7
40 2 8
41 2 9
42 2 10
43 2 11
44 2 12
45 2 13
46 2 14
47 2 15
48 2 16
49 2 17
50 2 18
51 2 19
52 2 20
53 2 21
54 2 22
55 2 23
56 2 24
57 2 25
58 2 26
59 2 27
60 2 28
61 2 29
62 2 30
63 2 31
64 2 32
65 3 1
66 3 2
67 3 3
68 3 4
69 3 5
70 3 6
71 3 7
72 3 8
73 3 9
74 3 10
75 3 11
76 3 12
77 3 13
78 3 14
79 3 15
80 3 16
81 3 17
82 3 18
83 3 19
84 3 20
85 3 21
86 3 22
87 3 23
88 3 24
89 3 25
90 3 26
91 3 27
92 3 28
93 3 29
94 3 30
95 3 31
96 3 32
97 4 1
98 4 2
99 4 3
100 4 4
If you want only max(cellno.) per timestep, you could do:
aggregate(cellNo.~timestep, DF, max)
# timestep cellNo.
# 1 1 32
# 2 2 32
# 3 3 32
# 4 4 4
Try this
# dput your data
df <- structure(list(timestep = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L), cellNo. = c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L,
29L, 30L, 31L, 32L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L,
24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L,
32L, 1L, 2L, 3L, 4L)), .Names = c("timestep", "cellNo."), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24",
"25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35",
"36", "37", "38", "39", "40", "41", "42", "43", "44", "45", "46",
"47", "48", "49", "50", "51", "52", "53", "54", "55", "56", "57",
"58", "59", "60", "61", "62", "63", "64", "65", "66", "67", "68",
"69", "70", "71", "72", "73", "74", "75", "76", "77", "78", "79",
"80", "81", "82", "83", "84", "85", "86", "87", "88", "89", "90",
"91", "92", "93", "94", "95", "96", "97", "98", "99", "100"))
library(dplyr)
df %>% group_by(timestep) %>% summarise(max = max(cellNo.))
#Source: local data frame [4 x 2]
#timestep max
# (int) (int)
#1 1 32
#2 2 32
#3 3 32
#4 4 4
With data.table
library(data.table)
setDT(df1)[, .(Max = max(cellNo.)), timestep]