How to group by in base R - r

I would like to express the following SQL query using base R (without any particular package):
select month, day, count(*) as count, avg(dep_delay) as avg_delay
from flights
group by month, day
having count > 1000
It selects the mean departure delay and the number of flights per day on busy days (days with more than 1000 flights). The dataset is nycflights13 containing information of flights departed from NYC in 2013.
Notice I can easily write this in dplyr as:
flights %>%
group_by(month, day) %>%
summarise(count = n(), avg_delay = mean(dep_delay, na.rm = TRUE)) %>%
filter(count > 1000)

Since I was reminded earlier about the elegance of by (tip of the hat to #Parfait), here is a solution using by:
res <- by(flights, list(flights$month, flights$day), function(x)
if (nrow(x) > 1000) {
c(
month = unique(x$month),
day = unique(x$day),
count = nrow(x),
avg_delay = mean(x$dep_delay, na.rm = TRUE))
})
# Store in data.frame and order by month, day
df <- do.call(rbind, res);
df <- df[order(df[, 1], df[, 2]) ,];
# month day count avg_delay
#[1,] 7 8 1004 37.296646
#[2,] 7 9 1001 30.711499
#[3,] 7 10 1004 52.860702
#[4,] 7 11 1006 23.609392
#[5,] 7 12 1002 25.096154
#[6,] 7 17 1001 13.670707
#[7,] 7 18 1003 20.626789
#[8,] 7 25 1003 19.674134
#[9,] 7 31 1001 6.280843
#[10,] 8 7 1001 8.680402
#[11,] 8 8 1001 43.349947
#[12,] 8 12 1001 8.308157
#[13,] 11 27 1014 16.697651
#[14,] 12 2 1004 9.021978

as commented you can use a combi of subset and aggregate. Changed the order of day & month to recieve the same order as your dplyr approach. Using na.action = NULL to count rows inclunding NAs.
library(nycflights13)
#> Warning: Paket 'nycflights13' wurde unter R Version 3.4.4 erstellt
subset(aggregate(dep_delay ~ day + month, flights,
function(x) cbind(count=length(x), avg_delay=mean(x, na.rm = TRUE)),
na.action = NULL),
dep_delay[,1] > 1000)
#> day month dep_delay.1 dep_delay.2
#> 189 8 7 1004.000000 37.296646
#> 190 9 7 1001.000000 30.711499
#> 191 10 7 1004.000000 52.860702
#> 192 11 7 1006.000000 23.609392
#> 193 12 7 1002.000000 25.096154
#> 198 17 7 1001.000000 13.670707
#> 199 18 7 1003.000000 20.626789
#> 206 25 7 1003.000000 19.674134
#> 212 31 7 1001.000000 6.280843
#> 219 7 8 1001.000000 8.680402
#> 220 8 8 1001.000000 43.349947
#> 224 12 8 1001.000000 8.308157
#> 331 27 11 1014.000000 16.697651
#> 336 2 12 1004.000000 9.021978
Created on 2018-04-05 by the reprex package (v0.2.0).

Not a particularly elegant solution, but this will do what you want using Base R
flights_split <- split(flights, f = list(flights$month, flights$day))
result <- lapply(flights_split, function(x) {
if(nrow(x) > 1000) {
data.frame(month = unique(x$month), day = unique(x$day), avg_delay = mean(x$dep_delay, na.rm = T), count = nrow(x))
} else {
NULL
}
}
)
do.call(rbind, result)
# month day mean_delay n
# 12.2 12 2 9.021978 1004
# 8.7 8 7 8.680402 1001
# 7.8 7 8 37.296646 1004
# 8.8 8 8 43.349947 1001
# 7.9 7 9 30.711499 1001
# 7.10 7 10 52.860702 1004
# 7.11 7 11 23.609392 1006
# 7.12 7 12 25.096154 1002
# 8.12 8 12 8.308157 1001
# 7.17 7 17 13.670707 1001
# 7.18 7 18 20.626789 1003
# 7.25 7 25 19.674134 1003
# 11.27 11 27 16.697651 1014
# 7.31 7 31 6.280843 1001

Here is my solution:
grp <- expand.grid(mth = unique(flights$month), d = unique(flights$day))
out <- mapply(function(mth, d){
sub_data <- subset(flights, month == mth & day == d)
df <- data.frame(
month = mth,
day = d,
count = nrow(sub_data),
avg_delay = mean(sub_data$dep_delay, na.rm = TRUE)
)
df[df$count > 1000]
}, grp$mth, grp$d)
res <- do.call(rbind, out)
This is a lot slower than the dplyr solution.

Related

How to create a loop code from big dataframe in R?

I have a data series of daily snow depth values over a 60 year period. I would like to see the number of days with a snow depth higher than 30 cm for each season, for example from July 1980 to June 1981. What does the code for this have to look like? I know how I could calculate the daily values higher than 30 cm per season individually, but not how a code could calculate all seasons.
I have uploaded my dataframe on wetransfer: Dataframe
Thank you so much for your help in advance.
Pernilla
Something like this would work
library(dplyr)
library(lubridate)
df<-read.csv('BayrischerWald_Brennes_SH_daily_merged.txt', sep=';')
df_season <-df %>%
mutate(season=(Day %>% ymd() - days(181)) %>% floor_date("year") %>% year())
df_group_by_season <- df_season %>%
filter(!is.na(SHincm)) %>%
group_by(season) %>%
summarize(days_above_30=sum(SHincm>30)) %>%
ungroup()
df_group_by_season
#> # A tibble: 61 × 2
#> season days_above_30
#> <dbl> <int>
#> 1 1961 1
#> 2 1962 0
#> 3 1963 0
#> 4 1964 0
#> 5 1965 0
#> 6 1966 0
#> 7 1967 129
#> 8 1968 60
#> 9 1969 107
#> 10 1970 43
#> # … with 51 more rows
Created on 2022-01-15 by the reprex package (v2.0.1)
Here is an approach using the aggregate() function. After reading the data, convert the Date field to a date object and get rid of the rows with missing values for the date:
snow <- read.table("BayrischerWald_Brennes_SH_daily_merged.txt", header=TRUE, sep=";")
snow$Day <- as.Date(snow$Day)
str(snow)
# 'data.frame': 51606 obs. of 2 variables:
# $ Day : Date, format: "1961-11-01" "1961-11-02" "1961-11-03" "1961-11-04" ...
# $ SHincm: int 0 0 0 0 2 9 19 22 15 5 ...
snow <- snow[!is.na(snow$Day), ]
str(snow)
# 'data.frame': 21886 obs. of 2 variables:
# $ Day : Date, format: "1961-11-01" "1961-11-02" "1961-11-03" "1961-11-04" ...
# $ SHincm: int 0 0 0 0 2 9 19 22 15 5 ...
Notice more than half of your data has missing values for the date. Now we need to divide the data by ski season:
brks <- as.Date(paste(1961:2022, "07-01", sep="-"))
lbls <- paste(1961:2021, 1962:2022, sep="/")
snow$Season <- cut(snow$Day, breaks=brks, labels=lbls)
Now we use aggregate() to get the number of days with over 30 inches of snow:
days30cm <- aggregate(SHincm~Season, snow, subset=snow$SHincm > 30, length)
colnames(days30cm)[2] <- "Over30cm"
head(days30cm, 10)
# Season Over30cm
# 1 1961/1962 1
# 2 1967/1968 129
# 3 1968/1969 60
# 4 1969/1970 107
# 5 1970/1971 43
# 6 1972/1973 101
# 7 1973/1974 119
# 8 1974/1975 188
# 9 1975/1976 126
# 10 1976/1977 112
In addition, you can get other statistics such as the maximum snow of the season or the total cm of snow:
maxsnow <- aggregate(SHincm~Season, snow, max)
totalsnow <- aggregate(SHincm~Season, snow, sum)

Using dplyr mutate function to create new variable conditionally based on current row

I am working on creating conditional averages for a large data set that involves # of flu cases seen during the week for several years. The data is organized as such:
What I want to do is create a new column that tabulates that average number of cases for that same week in previous years. For instance, for the row where Week.Number is 1 and Flu.Year is 2017, I would like the new row to give the average count for any year with Week.Number==1 & Flu.Year<2017. Normally, I would use the case_when() function to conditionally tabulate something like this. For instance, when calculating the average weekly volume I used this code:
mutate(average = case_when(
Flu.Year==2016 ~ mean(chcc$count[chcc$Flu.Year==2016]),
Flu.Year==2017 ~ mean(chcc$count[chcc$Flu.Year==2017]),
Flu.Year==2018 ~ mean(chcc$count[chcc$Flu.Year==2018]),
Flu.Year==2019 ~ mean(chcc$count[chcc$Flu.Year==2019]),
),
However, since there are four years of data * 52 weeks which is a lot of iterations to spell out the conditions for. Is there a way to elegantly code this in dplyr? The problem I keep running into is that I want to call values in counts column based on Week.Number and Flu.Year values in other rows conditioned on the current value of Week.Number and Flu.Year, and I am not sure how to accomplish that. Please let me know if there is further information / detail I can provide.
Thanks,
Steven
dat <- tibble( Flu.Year = rep(2016:2019,each = 52), Week.Number = rep(1:52,4), count = sample(1000, size=52*4, replace=TRUE) )
It's bad-form and, in some cases, an error when you use $-indexing within dplyr verbs.
I think a better way to get that average field is to group_by(Flu.Year) and calculate it straight-up.
library(dplyr)
set.seed(42)
dat <- tibble(
Flu.Year = sample(2016:2020, size=100, replace=TRUE),
count = sample(1000, size=100, replace=TRUE)
)
dat %>%
group_by(Flu.Year) %>%
mutate(average = mean(count)) %>%
# just to show a quick summary
slice(1:3) %>%
ungroup()
# # A tibble: 15 x 3
# Flu.Year count average
# <int> <int> <dbl>
# 1 2016 734 578.
# 2 2016 356 578.
# 3 2016 411 578.
# 4 2017 217 436.
# 5 2017 453 436.
# 6 2017 920 436.
# 7 2018 963 558
# 8 2018 609 558
# 9 2018 536 558
# 10 2019 943 543.
# 11 2019 740 543.
# 12 2019 536 543.
# 13 2020 627 494.
# 14 2020 218 494.
# 15 2020 389 494.
An alternative approach is to generate a summary table (just one row per year) and join it back in to the original data.
dat %>%
group_by(Flu.Year) %>%
summarize(average = mean(count))
# # A tibble: 5 x 2
# Flu.Year average
# <int> <dbl>
# 1 2016 578.
# 2 2017 436.
# 3 2018 558
# 4 2019 543.
# 5 2020 494.
dat %>%
group_by(Flu.Year) %>%
summarize(average = mean(count)) %>%
full_join(dat, by = "Flu.Year")
# # A tibble: 100 x 3
# Flu.Year average count
# <int> <dbl> <int>
# 1 2016 578. 734
# 2 2016 578. 356
# 3 2016 578. 411
# 4 2016 578. 720
# 5 2016 578. 851
# 6 2016 578. 822
# 7 2016 578. 465
# 8 2016 578. 679
# 9 2016 578. 30
# 10 2016 578. 180
# # ... with 90 more rows
The result, after chat:
tibble( Flu.Year = rep(2016:2018,each = 3), Week.Number = rep(1:3,3), count = 1:9 ) %>%
arrange(Flu.Year, Week.Number) %>%
group_by(Week.Number) %>%
mutate(year_week.average = lag(cumsum(count) / seq_along(count)))
# # A tibble: 9 x 4
# # Groups: Week.Number [3]
# Flu.Year Week.Number count year_week.average
# <int> <int> <int> <dbl>
# 1 2016 1 1 NA
# 2 2016 2 2 NA
# 3 2016 3 3 NA
# 4 2017 1 4 1
# 5 2017 2 5 2
# 6 2017 3 6 3
# 7 2018 1 7 2.5
# 8 2018 2 8 3.5
# 9 2018 3 9 4.5
We can use aggregate from base R
aggregate(count ~ Flu.Year, data, FUN = mean)

Why does R throw an error on iterative calculation

I'm looking at covid-19 data to calculate estimates for the reproductive number R0.
library(ggplot2)
library(dplyr)
library(tidyr)
library(stringr)
library(TTR)
# Get COVID cases, available from:
url <- "https://static.usafacts.org/public/data/covid-19/covid_confirmed_usafacts.csv"
DoubleCOV <- read.csv(url, stringsAsFactors = FALSE)
names(DoubleCOV)[1] <- "countyFIPS"
DoubleCovid <- pivot_longer(DoubleCOV, cols=starts_with("X"),
values_to="cases",
names_to=c("X","date_infected"),
names_sep="X") %>%
mutate(infected = as.Date(date_infected, format="%m.%d.%y"),
countyFIPS = str_pad(as.character(countyFIPS), 5, pad="0"))
#data is by county, summarise for the state of interest
stateData <- DoubleCovid %>% filter(State == "AL") %>% filter(cases != 0) %>%
group_by(infected) %>% summarise(sum(cases)) %>%
mutate(DaysSince = infected - min(infected))
names(stateData)[2] <- "cumCases"
#3 day moving average to smooth a little
stateData <- stateData %>% mutate(MA = runMean(cumCases,3))
#calculate doubling rate (DR) and then R0 infectious period/doubling rate
for(j in 4:nrow(stateData)){
stateData$DR[j] <- log(2)/log(stateData$MA[j]/stateData$MA[j-1])
stateData$R0[j] <- 14/stateData$DR[j]
}
CDplot <- stateData %>%
ggplot(mapping = aes(x = as.numeric(DaysSince), y = R0)) +
geom_line(color = "firebrick")
print(CDplot)
So in the above the state of interest is Alabama, hence filter(State == "AL") and this works.
But if I change the state to "NY" I get
Error in `$<-.data.frame`(`*tmp*`, "DR", value = c(NA, NA, NA, 0.733907206043719 :
replacement has 4 rows, data has 39
head(stateData) yields
infected cumCases DaysSince MA
<date> <int> <drtn> <dbl>
1 2020-03-02 1 0 days NA
2 2020-03-03 2 1 days NA
3 2020-03-04 11 2 days 4.67
4 2020-03-05 23 3 days 12
5 2020-03-06 25 4 days 19.7
6 2020-03-07 77 5 days 41.7
The moving average values in rows 3 and 4 (12 and 4.67) would yield a doubling rate of 0.734 which aligns with the value in the error message value = c(NA, NA, NA, 0.733907206043719 but why does it throw an error after that?
Bonus question: I know loops are frowned upon in R...is there a way to get the moving average and R0 calculation without one?
You have to initialise the new variables before you can access them using the j index. Due to recycling, Alabama, which has 28 rows (divisible by 4), does not return an error, only the warnings about uninitialised columns. New York, however, has 39 rows, which is not divisible by 4 so recycling fails and R returns an error. You shouldn't ignore warnings, sometimes you can, but it's not a good idea.
Try this to see what R (you) is trying to do:
stateData[4]
You should get all rows of the 4th column, not the 4th row.
Solution: initialise your DR and R0 columns first.
stateData$DR <- NA
stateData$R0 <- NA
for(j in 4:nrow(stateData)){
stateData$DR[j] <- log(2)/log(stateData$MA[j]/stateData$MA[j-1])
stateData$R0[j] <- 14/stateData$DR[j]
}
For the bonus question, you can use lag in the same mutate with MA:
stateData <- stateData %>% mutate(MA = runMean(cumCases,3),
DR = log(2)/log(MA/lag(MA)),
R0 = 14 / DR)
stateData
# A tibble: 28 x 6
infected cumCases DaysSince MA DR R0
<date> <int> <drtn> <dbl> <dbl> <dbl>
1 2020-03-13 5 0 days NA NA NA
2 2020-03-14 11 1 days NA NA NA
3 2020-03-15 22 2 days 12.7 NA NA
4 2020-03-16 29 3 days 20.7 1.42 9.89
5 2020-03-17 39 4 days 30 1.86 7.53
6 2020-03-18 51 5 days 39.7 2.48 5.64
7 2020-03-19 78 6 days 56 2.01 6.96
8 2020-03-20 106 7 days 78.3 2.07 6.78
9 2020-03-21 131 8 days 105 2.37 5.92
10 2020-03-22 167 9 days 135. 2.79 5.03
# ... with 18 more rows
I'm using Alabama's data.

Subsetting data by date range across years in R

I have a long term sightings data set of identified individuals (~16,000 records from 1979- 2019) and I would like to subset the same date range (YYYY-09-01 to YYYY(+1)-08-31) across years in R. I have successfully done so for each "year" (and obtained the unique IDs) using:
library(dplyr)
library(lubridate)
year79 <-data%>%
select(ID, Sex, AgeClass, Age, Date, Month, Year)%>%
filter(Date>= as.Date("1978-09-01") & Date<= as.Date("1979-08-31")) %>%
filter(!duplicated(ID))
year80 <-data%>%
select(ID, Sex, AgeClass, Age, Date, Month, Year)%>%
filter(Date>= as.Date("1979-09-01") & Date<= as.Date("1980-08-31")) %>%
filter(!duplicated(ID))
I would like to clean up the code and ideally not need to specify the each range (just have it iterate through). I am new at R and stuck how to do this. Any suggestions?
FYI "Month" and "Year" are included for producing a table via melt and cast later on.
example data:
ID Year Month Day Date AgeClass Age Sex
1 1034 1979 4 17 1979-04-17 U 3 F
2 1127 1979 5 3 1979-05-03 A 13 F
3 1222 1979 5 3 1979-05-03 U 0 F
4 1303 1979 6 16 1979-06-16 U 0 F
5 1153 1980 4 16 1980-04-16 C 0 F
6 1014 1980 4 16 1980-04-16 U 6 F
ID Year Month Day Date AgeClass Age Sex
16428 2503 2019 5 8 2019-05-08 U NA F
16429 3760 2019 5 8 2019-05-08 A 12 F
16430 4080 2019 5 9 2019-05-09 A 9 F
16431 4095 2019 5 9 2019-05-09 A 9 U
16432 1204 2019 5 11 2019-05-11 A 37 F
16433 1204 2019 5 11 2019-05-11 A NA F
#> sessionInfo()
R version 3.5.1 (2018-07-02)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows >= 8 x64 (build 9200)
Every year has 122 days from Sept 1 to Dec 31 inclusive, so you could add a variable marking the "fiscal year" for each row:
set.seed(42)
library(dplyr)
my_data <- tibble(ID = 1:6,
Date = as.Date("1978-09-01") + c(-1, 0, 1, 364, 365, 366))
my_data
# There are 122 days from each Aug 31 (last of the FY) to the end of the CY.
# lubridate::ymd(19781231) - lubridate::ymd(19780831)
my_data %>%
mutate(FY = year(Date + 122))
## A tibble: 6 x 3
# ID Date FY
# <int> <date> <dbl>
#1 1 1978-08-31 1978
#2 2 1978-09-01 1979
#3 3 1978-09-02 1979
#4 4 1979-08-31 1979
#5 5 1979-09-01 1980
#6 6 1979-09-02 1980
You could keep the data in one table and do subsequent analysis using group_by(FY), or use %>% split(.$FY) to put each FY into its own element of a list. From my limited experience, I think it's generally an anti-pattern to create separate data frames for annual subsets of your data, as that makes your code harder to maintain, troubleshoot, and modify.

Iteration for time series data, using purrr

I have a bunch of time series data stacked on top of one another in a data frame; one series for each region in a country. I'd like to apply the seas() function (from the seasonal package) to each series, iteratively, to make the series seasonally adjusted. To do this, I first have to convert the series to a ts class. I'm struggling to do all this using purrr.
Here's a minimum worked example:
library(seasonal)
library(tidyverse)
set.seed(1234)
df <- data.frame(region = rep(1:10, each = 20),
quarter = rep(1:20, 10),
var = sample(5:200, 200, replace = T))
For each region (indexed by a number) I'd like to perform the following operations. Here's the first region as an example:
tem1 <- df %>% filter(region==1)
tem2 <- ts(data = tem1$var, frequency = 4, start=c(1990,1))
tem3 <- seas(tem2)
tem4 <- as.data.frame(tem3$data)
I'd then like to stack the output (ie. the multiple tem4 data frames, one for each region), along with the region and quarter identifiers.
So, the start of the output for region 1 would be this:
final seasonaladj trend irregular region quarter
1 27 27 96.95 -67.97279 1 1
2 126 126 96.95 27.87381 1 2
3 124 124 96.95 27.10823 1 3
4 127 127 96.95 30.55075 1 4
5 173 173 96.95 75.01355 1 5
6 130 130 96.95 32.10672 1 6
The data for region 2 would be below this etc.
I started with the following but without luck so far. Basically, I'm struggling to get the time series into the tibble:
seas.adjusted <- df %>%
group_by(region) %>%
mutate(data.ts = map(.x = data$var,
.f = as.ts,
start = 1990,
freq = 4))
I don't know much about the seasonal adjustment part, so there may be things I missed, but I can help with moving your calculations into a map-friendly function.
After grouping by region, you can nest the data so there's a nested data frame for each region. Then you can run essentially the same code as you had, but inside a function in map. Unnesting the resulting column gives you a long-shaped data frame of adjustments.
Like I said, I don't have the expertise to know whether those last two columns having NAs is expected or not.
Edit: Based on #wibeasley's question about retaining the quarter column, I'm adding a mutate that adds a column of the quarters listed in the nested data frame.
library(seasonal)
library(tidyverse)
set.seed(1234)
df <- data.frame(region = rep(1:10, each = 20),
quarter = rep(1:20, 10),
var = sample(5:200, 200, replace = T))
df %>%
group_by(region) %>%
nest() %>%
mutate(data.ts = map(data, function(x) {
tem2 <- ts(x$var, frequency = 4, start = c(1990, 1))
tem3 <- seas(tem2)
as.data.frame(tem3$data) %>%
mutate(quarter = x$quarter)
})) %>%
unnest(data.ts)
#> # A tibble: 200 x 8
#> region final seasonaladj trend irregular quarter seasonal adjustfac
#> <int> <dbl> <dbl> <dbl> <dbl> <int> <dbl> <dbl>
#> 1 1 27 27 97.0 -68.0 1 NA NA
#> 2 1 126 126 97.0 27.9 2 NA NA
#> 3 1 124 124 97.0 27.1 3 NA NA
#> 4 1 127 127 97.0 30.6 4 NA NA
#> 5 1 173 173 97.0 75.0 5 NA NA
#> 6 1 130 130 97.0 32.1 6 NA NA
#> 7 1 6 6 97.0 -89.0 7 NA NA
#> 8 1 50 50 97.0 -46.5 8 NA NA
#> 9 1 135 135 97.0 36.7 9 NA NA
#> 10 1 105 105 97.0 8.81 10 NA NA
#> # ... with 190 more rows
I also gave a bit more thought to doing this without nesting, and instead tried doing it with a split. Passing that list of data frames into imap_dfr let me take each split piece of the data frame and its name (in this case, the value of region), then return everything rbinded back together into one data frame. I sometimes shy away from nested data just because I have trouble seeing what's going on, so this is an alternative that is maybe more transparent.
df %>%
split(.$region) %>%
imap_dfr(function(x, reg) {
tem2 <- ts(x$var, frequency = 4, start = c(1990, 1))
tem3 <- seas(tem2)
as.data.frame(tem3$data) %>%
mutate(region = reg, quarter = x$quarter)
}) %>%
select(region, quarter, everything()) %>%
head()
#> region quarter final seasonaladj trend irregular seasonal adjustfac
#> 1 1 1 27 27 96.95 -67.97274 NA NA
#> 2 1 2 126 126 96.95 27.87378 NA NA
#> 3 1 3 124 124 96.95 27.10823 NA NA
#> 4 1 4 127 127 96.95 30.55077 NA NA
#> 5 1 5 173 173 96.95 75.01353 NA NA
#> 6 1 6 130 130 96.95 32.10669 NA NA
Created on 2018-08-12 by the reprex package (v0.2.0).
I put all the action inside of f(), and then called it with purrr::map_df(). The re-inclusion of quarter is a hack.
f <- function( .region ) {
d <- df %>%
dplyr::filter(region == .region)
y <- d %>%
dplyr::pull(var) %>%
ts(frequency = 4, start=c(1990,1)) %>%
seas()
y$data %>%
as.data.frame() %>%
# dplyr::select(-seasonal, -adjustfac) %>%
dplyr::mutate(
quarter = d$quarter
)
}
purrr::map_df(1:10, f, .id = "region")
results:
region final seasonaladj trend irregular quarter seasonal adjustfac
1 1 27.00000 27.00000 96.95000 -6.797279e+01 1 NA NA
2 1 126.00000 126.00000 96.95000 2.787381e+01 2 NA NA
3 1 124.00000 124.00000 96.95000 2.710823e+01 3 NA NA
4 1 127.00000 127.00000 96.95000 3.055075e+01 4 NA NA
5 1 173.00000 173.00000 96.95000 7.501355e+01 5 NA NA
6 1 130.00000 130.00000 96.95000 3.210672e+01 6 NA NA
7 1 6.00000 6.00000 96.95000 -8.899356e+01 7 NA NA
8 1 50.00000 50.00000 96.95000 -4.647254e+01 8 NA NA
9 1 135.00000 135.00000 96.95000 3.671077e+01 9 NA NA
10 1 105.00000 105.00000 96.95000 8.806955e+00 10 NA NA
...
96 5 55.01724 55.01724 60.25848 9.130207e-01 16 1.9084928 1.9084928
97 5 60.21549 60.21549 59.43828 1.013076e+00 17 1.0462424 1.0462424
98 5 58.30626 58.30626 58.87065 9.904130e-01 18 0.1715082 0.1715082
99 5 61.68175 61.68175 58.07827 1.062045e+00 19 1.0537962 1.0537962
100 5 59.30138 59.30138 56.70798 1.045733e+00 20 2.5294523 2.5294523
...

Resources