How to create list of 2 from dataframe? - r

I have transportation data between UK cities:
from
to
Transit: if there is transit between these two cities = 1,
otherwise =0
weight: average number of passengers
Here is given sample from my data:
df2 <- data.frame (from = c("London", "London", "London", "London" ,"Liverpool","Liverpool","Liverpool" , "Manchester", "Manchester", "Bristol"),
to = c("Liverpool", "Manchester", "Bristol","Derby", "Manchester", "Bristol","Derby","Bristol","Derby","Derby"),
Transit = c(1,0,1,1,1,1,1,1,0,1),
ave.pas = c(10,0,11,24,40,45,12,34,9,29))
output
from to Transit weight
1 London Liverpool 1 10
2 London Manchester 0 0
3 London Bristol 1 11
4 London Derby 1 24
5 Liverpool Manchester 1 40
6 Liverpool Bristol 1 45
7 Liverpool Derby 1 12
8 Manchester Bristol 1 34
9 Manchester Derby 0 9
10 Bristol Derby 1 29
Now I want to convert it in the list of 2 to get data like this (this is different data but idea is to get the same from my df):
$nodes
# A tibble: 16 x 2
id label
<int> <chr>
1 1 France
2 2 Belgium
3 3 Germany
4 4 Danemark
5 5 Croatia
6 6 Slovenia
7 7 Hungary
8 8 Spain
9 9 Italy
10 10 Netherlands
11 11 UK
12 12 Austria
13 13 Poland
14 14 Switzerland
15 15 Czech republic
16 16 Slovania
$edges
# A tibble: 18 x 3
from to weight
<int> <int> <dbl>
1 1 3 9
2 2 1 4
3 1 8 3
4 1 9 4
5 1 10 2
6 1 11 3
7 3 12 2
8 3 13 2
9 2 3 3
10 3 14 2
11 3 15 2
12 3 10 2
13 4 3 2
14 5 3 2
15 5 16 2
16 5 7 2
17 6 3 2
18 7 16 2.5

In base R:
f2 = c('from', 'to')
nodes = data.frame(label = unique(unlist(df2[f2])))
nodes$id = seq_len(nrow(nodes))
edges = df2[df2$Transit == 1, c(f2, 'ave.pas')]
edges[f2] = lapply(edges[f2], match, nodes$label)
nodes
# label id
# 1 London 1
# 2 Liverpool 2
# 3 Manchester 3
# 4 Bristol 4
# 5 Derby 5
edges
# from to ave.pas
# 1 1 2 10
# 3 1 4 11
# 4 1 5 24
# 5 2 3 40
# 6 2 4 45
# 7 2 5 12
# 8 3 4 34
# 10 4 5 29

Create the dataframe of unique factor levels and create ids using as.numeric, then use match to replace the values with the id.
df1 <- data.frame(id = as.numeric(factor(unique(unlist(df2[c(1,2)])), levels = unique(unlist(df2[c(1,2)])))),
label = factor(unique(unlist(df2[c(1,2)])), levels = unique(unlist(df2[c(1,2)]))))
# id label
#1 1 London
#2 2 Liverpool
#3 3 Manchester
#4 4 Bristol
#5 5 Derby
df2$from <- df1$id[match(df2$from, df1$label)]
df2$to <- df1$id[match(df2$to, df1$label)]
# from to Transit ave.pas
#1 1 2 1 10
#2 1 3 0 0
#3 1 4 1 11
#4 1 5 1 24
#5 2 3 1 40
#6 2 4 1 45
#7 2 5 1 12
#8 3 4 1 34
#9 3 5 0 9
#10 4 5 1 29
Edit: you actually don't need to convert to factor (this comes then very close to #sindri_baldur's answer):
un <- unique(unlist(df2[c(1, 2)]))
df1 <- data.frame(id = seq_along(un), label = un)
df2[c(1, 2)] <- sapply(df2[c(1, 2)], match, df1$label)

Related

Grouping similar elements together

I am trying to group similar entities together and can't find an easy way to do so.
For example, here is a table:
Names Initial_Group Final_Group
1 James,Gordon 6 A
2 James,Gordon 6 A
3 James,Gordon 6 A
4 James,Gordon 6 A
5 James,Gordon 6 A
6 James,Gordon 6 A
7 Amanda 1 A
8 Amanda 1 A
9 Amanda 1 A
10 Gordon,Amanda 5 A
11 Gordon,Amanda 5 A
12 Gordon,Amanda 5 A
13 Gordon,Amanda 5 A
14 Gordon,Amanda 5 A
15 Gordon,Amanda 5 A
16 Gordon,Amanda 5 A
17 Gordon,Amanda 5 A
18 Edward,Gordon,Amanda 4 A
19 Edward,Gordon,Amanda 4 A
20 Edward,Gordon,Amanda 4 A
21 Anna 2 B
22 Anna 2 B
23 Anna 2 B
24 Anna,Leonard 3 B
25 Anna,Leonard 3 B
26 Anna,Leonard 3 B
I am unsure how to get the 'Final_Group' field, in the table above.
For that, I need to assign any element that has any connections to another element, and group them together:
For example, rows 1 to 20 needs to be grouped together because they are all connected by at least one or more elements.
So for rows 1 to 6, 'James, Gordon' appear, and since "Gordon" is in rows 10:20, they all have to be grouped. Likewise, since 'Amanda' appears in rows 7:9, these have to be grouped with "James,Gordon", "Gordon, Amanda", and "Edward, Gordon, Amanda".
Below is code to generate the initial data:
# Manually generating data
Names <- c(rep('James,Gordon',6)
,rep('Amanda',3)
,rep('Gordon,Amanda',8)
,rep('Edward,Gordon,Amanda',3)
,rep('Anna',3)
,rep('Anna,Leonard',3))
Initial_Group <- rep(1:6,c(6,3,8,3,3,3))
Final_Group <- rep(c('A','B'),c(20,6))
data <- data.frame(Names,Initial_Group,Final_Group)
# Grouping
data %>%
select(Names) %>%
mutate(Initial_Group=group_indices(.,Names))
Does anyone know of anyway to do this in R?
This is a long one but you could do:
library(tidyverse)
library(igraph)
df %>%
select(Names)%>%
distinct() %>%
separate(Names, c('first', 'second'), extra = 'merge', fill = 'right')%>%
separate_rows(second) %>%
mutate(second = coalesce(second, as.character(cumsum(is.na(second)))))%>%
graph_from_data_frame()%>%
components()%>%
getElement('membership')%>%
imap(~str_detect(df$Names, .y)*.x) %>%
invoke(pmax, .)%>%
cbind(df, value = LETTERS[.], value1 = .)
Names Initial_Group Final_Group value value1
1 James,Gordon 6 A A 1
2 James,Gordon 6 A A 1
3 James,Gordon 6 A A 1
4 James,Gordon 6 A A 1
5 James,Gordon 6 A A 1
6 James,Gordon 6 A A 1
7 Amanda 1 A A 1
8 Amanda 1 A A 1
9 Amanda 1 A A 1
10 Gordon,Amanda 5 A A 1
11 Gordon,Amanda 5 A A 1
12 Gordon,Amanda 5 A A 1
13 Gordon,Amanda 5 A A 1
14 Gordon,Amanda 5 A A 1
15 Gordon,Amanda 5 A A 1
16 Gordon,Amanda 5 A A 1
17 Gordon,Amanda 5 A A 1
18 Edward,Gordon,Amanda 4 A A 1
19 Edward,Gordon,Amanda 4 A A 1
20 Edward,Gordon,Amanda 4 A A 1
21 Anna 2 B B 2
22 Anna 2 B B 2
23 Anna 2 B B 2
24 Anna,Leonard 3 B B 2
25 Anna,Leonard 3 B B 2
26 Anna,Leonard 3 B B 2
Check the column called value
I was wrong that I misunderstood that you're focus on Final_Group. If not, please let me know
My approach is based on distance between samples.
data <- data %>%
mutate(Names = sapply(Names, function(x) as.vector(str_split(x, ","))))
for (i in c(1:26)){
data$James[i] = ("James" %in% data$Names[[i]])
data$Gordon[i] = ("Gordon" %in% data$Names[[i]])
data$Amanda[i] = ("Amanda" %in% data$Names[[i]])
data$Edward[i] = ("Edward" %in% data$Names[[i]])
data$Anna[i] = ("Anna" %in% data$Names[[i]])
dummy$Leonard[i] = ("Leonard" %in% dummy$Names[[i]])
}
hc <- data%>% select(-Names,) %>%
select(-Final_Group, -Initial_Group ) %>%
dist() %>% hclust(.,method = "complete")
cutree(hc)
[1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2
plot(hc)
now that's similar to Final_Group

Use apply to create a list of adjacency matrices from dataframe in R

I have an edgelist of friendships with 5 different schools over 3 waves. I'd like to create a list for each school that contains 3 adjacency matrices (one for each wave). I can do this one by one, but I would like to use a loop or an apply function to automate it.
This is the code I have used for one school and wave:
school1_w1 <- filter(edges, school == 1 & wave == 1) %>%
graph_from_data_frame(., directed = TRUE) %>%
as_adjacency_matrix() %>% as.matrix()
school1_w2 <- filter(edges, school == 1 & wave == 2) %>%
graph_from_data_frame(., directed = TRUE) %>%
as_adjacency_matrix() %>% as.matrix()
school1_w3 <- filter(edges, school == 1 & wave == 3) %>%
graph_from_data_frame(., directed = TRUE) %>%
as_adjacency_matrix() %>% as.matrix()
school1 <- list(school1_w1, school1_w2, school1_w3)
How can I do this for all 5 schools with an apply or loop? Sample data below:
ego alter wave school
1 4 1 1
1 4 2 1
1 3 3 1
2 3 1 1
2 4 2 1
2 4 3 1
3 1 1 1
3 2 2 1
3 3 3 1
4 1 1 1
4 1 2 1
4 1 3 1
5 8 1 2
5 6 2 2
5 7 3 2
6 7 1 2
6 7 2 2
6 7 3 2
7 8 1 2
7 6 2 2
7 6 3 2
8 7 1 2
8 7 2 2
8 7 3 2
9 10 1 3
9 11 2 3
9 12 3 3
10 11 1 3
10 11 2 3
10 9 3 3
11 12 1 3
11 10 2 3
11 12 3 3
12 9 1 3
12 10 2 3
12 10 3 3
13 14 1 4
13 15 2 4
13 16 3 4
14 16 1 4
14 16 2 4
14 13 3 4
15 16 1 4
15 16 2 4
15 16 3 4
16 15 1 4
16 15 2 4
16 15 3 4
17 20 1 5
17 18 2 5
17 18 3 5
18 19 1 5
18 20 2 5
18 19 3 5
19 17 1 5
19 17 2 5
19 17 3 5
20 18 1 5
20 17 2 5
20 17 3 5
We can use split + lapply :
library(igraph)
result <- lapply(split(edges, list(edges$school, edges$wave)), function(x) {
graph_from_data_frame(x, directed = TRUE) %>%
as_adjacency_matrix() %>% as.matrix()
})
Or with by :
result <- by(edges, list(edges$school, edges$wave), function(x) {
graph_from_data_frame(x, directed = TRUE) %>%
as_adjacency_matrix() %>% as.matrix()
})

R: How to split a row in a dataframe into a number of rows, conditional on a value in a cell?

I have a data.frame which looks like the following:
id <- c("a","a","a","a","b","b","b","b")
age_from <- c(0,2,3,7,0,1,2,6)
age_to <- c(2,3,7,10,1,2,6,10)
y <- c(100,150,100,250,300,200,100,150)
df <- data.frame(id,age_from,age_to,y)
df$years <- df$age_to - df$age_from
Which gives a df that looks like:
id age_from age_to y years
1 a 0 2 100 2
2 a 2 3 150 1
3 a 3 7 100 4
4 a 7 10 250 3
5 b 0 1 300 1
6 b 1 2 200 1
7 b 2 6 100 4
8 b 6 10 150 4
Instead of having an unequal number of years per row, I would like to have 20 rows, 10 for each id, with each row accounting for one year. This would also involve averaging the y column across the number of years listed in the years column.
I believe this may have to be done using a loop 1:n with the n equaling a value in the years column. Although I am not sure how to start with this.
You can use rep to repeat the rows by the number of given years.
x <- df[rep(seq_len(nrow(df)), df$years),]
x
# id age_from age_to y years
#1 a 0 2 50.00000 2
#1.1 a 0 2 50.00000 2
#2 a 2 3 150.00000 1
#3 a 3 7 25.00000 4
#3.1 a 3 7 25.00000 4
#3.2 a 3 7 25.00000 4
#3.3 a 3 7 25.00000 4
#4 a 7 10 83.33333 3
#4.1 a 7 10 83.33333 3
#4.2 a 7 10 83.33333 3
#5 b 0 1 300.00000 1
#6 b 1 2 200.00000 1
#7 b 2 6 25.00000 4
#7.1 b 2 6 25.00000 4
#7.2 b 2 6 25.00000 4
#7.3 b 2 6 25.00000 4
#8 b 6 10 37.50000 4
#8.1 b 6 10 37.50000 4
#8.2 b 6 10 37.50000 4
#8.3 b 6 10 37.50000 4
When you mean with averaging the y column across the number of years to divide by the number of years:
x$y <- x$y / x$years
In case age_from should go from 0 to 9 and age_to from 1 to 10 for each id:
x$age_from <- x$age_from + ave(x$age_from, x$id, x$age_from, FUN=seq_along) - 1
#x$age_from <- ave(x$age_from, x$id, FUN=seq_along) - 1 #Alternative
x$age_to <- x$age_from + 1
Here is a solution with tidyr and dplyr.
First of all we complete age_from from 0 to 9 as you wanted, by keeping only the existing ids.
You will have several NAs on age_to, y and years. So, we fill them by dragging down each value in order to complete the immediately following values that are NA.
Now you can divide y by years (I assumed you meant this by setting the average value so to leave the sum consistent).
At that point, you only need to recalculate age_to accordingly.
Remember to ungroup at the end!
library(tidyr)
library(dplyr)
df %>%
complete(id, age_from = 0:9) %>%
group_by(id) %>%
fill(y, years, age_to) %>%
mutate(y = y/years) %>%
mutate(age_to = age_from + 1) %>%
ungroup()
# A tibble: 20 x 5
id age_from age_to y years
<chr> <dbl> <dbl> <dbl> <dbl>
1 a 0 1 50 2
2 a 1 2 50 2
3 a 2 3 150 1
4 a 3 4 25 4
5 a 4 5 25 4
6 a 5 6 25 4
7 a 6 7 25 4
8 a 7 8 83.3 3
9 a 8 9 83.3 3
10 a 9 10 83.3 3
11 b 0 1 300 1
12 b 1 2 200 1
13 b 2 3 25 4
14 b 3 4 25 4
15 b 4 5 25 4
16 b 5 6 25 4
17 b 6 7 37.5 4
18 b 7 8 37.5 4
19 b 8 9 37.5 4
20 b 9 10 37.5 4
A tidyverse solution.
library(tidyverse)
df %>%
mutate(age_to = age_from + 1) %>%
group_by(id) %>%
complete(nesting(age_from = 0:9, age_to = 1:10)) %>%
fill(y, years) %>%
mutate(y = y / years)
# A tibble: 20 x 5
# Groups: id [2]
id age_from age_to y years
<chr> <dbl> <dbl> <dbl> <dbl>
1 a 0 1 50 2
2 a 1 2 50 2
3 a 2 3 150 1
4 a 3 4 25 4
5 a 4 5 25 4
6 a 5 6 25 4
7 a 6 7 25 4
8 a 7 8 83.3 3
9 a 8 9 83.3 3
10 a 9 10 83.3 3
11 b 0 1 300 1
12 b 1 2 200 1
13 b 2 3 25 4
14 b 3 4 25 4
15 b 4 5 25 4
16 b 5 6 25 4
17 b 6 7 37.5 4
18 b 7 8 37.5 4
19 b 8 9 37.5 4
20 b 9 10 37.5 4

R: Separate data into combinations of two columns

I have some data where each id is measured by different types which can be have different values type_val. The measured value is val. A small dummy data is like this:
df <- data.frame(id=rep(letters[1:2],6),
type=c(rep('t1',6), rep('t2',6)),
type_val=rep(c(1,1,2,2,3,3),2),
val=1:12)
Then df is:
id type type_val val
1 a t1 1 1
2 b t1 1 2
3 a t1 2 3
4 b t1 2 4
5 a t1 3 5
6 b t1 3 6
7 a t2 1 7
8 b t2 1 8
9 a t2 2 9
10 b t2 2 10
11 a t2 3 11
12 b t2 3 12
I need to spread/cast data so that all combinations of type and type_val for each id are row-wise. I think this must be a job for pkgs reshape2 or tidyr but I have completely failed to generate anything other than errors.
The outcome data structure - somewhat redundant - would be something like this (hope I got it right!) where pairs of type (as given by combinations of the type_val) are columns type_t1 and type_t2 , and their associated values (val in df) are val_t1 and val_t2 - columns names are of cause arbitrary :
id type_t1 type_t2 val_t1 val_t2
1 a 1 1 1 7
2 a 1 2 1 9
3 a 1 3 1 11
4 a 2 1 3 7
5 a 2 2 3 9
6 a 2 3 3 11
7 a 3 1 5 7
8 a 3 2 5 9
9 a 3 3 5 11
10 b 1 1 2 8
11 b 1 2 2 10
12 b 1 3 2 12
13 b 2 1 4 8
14 b 2 2 4 10
15 b 2 3 4 12
16 b 3 1 6 8
17 b 3 2 6 10
18 b 3 3 6 12
UPDATE
Note that (#Sotos)
> spread(df, type, val)
id type_val t1 t2
1 a 1 1 7
2 a 2 3 9
3 a 3 5 11
4 b 1 2 8
5 b 2 4 10
6 b 3 6 12
is not the desired output - it fails to deliver the wide format defined by combinations of type and type_val in df.
how about this:
df1=df[df$type=="t1",]
df2=df[df$type=="t2",]
DF=merge(df1,df2,by="id")
DF=DF[,-c(2,5)]
colnames(DF)<-c("id", "type_t1", "val_t1","type_t2", "val_t2")
Here is something more generic that will work with an arbitrary number of unique type:
library(dplyr)
# This function takes a list of dataframes (.data) and merges them by ID
reduce_merge <- function(.data, ID) {
return(Reduce(function(x, y) merge(x, y, by = ID), .data))
}
# This function renames the cols columns in .data by appending _identifier
batch_rename <- function(.data, cols, identifier, sep = '_') {
return(plyr::rename(.data, sapply(cols, function(x){
x = paste(x, .data[1, identifier], sep = sep)
})))
}
# This function creates a list of subsetted dataframes
# (subsetted by values of key),
# uses batch_rename() to give each dataframe more informative column names,
# merges them together, and returns the columns you'd like in a sensible order
multi_spread <- function(.data, grp, key, vals) {
.data %>%
plyr::dlply(key, subset) %>%
lapply(batch_rename, vals, key) %>%
reduce_merge(grp) %>%
select(-starts_with(paste0(key, '.'))) %>%
select(id, sort(setdiff(colnames(.), c(grp, key, vals))))
}
# Your example
df <- data.frame(id=rep(letters[1:2],6),
type=c(rep('t1',6), rep('t2',6)),
type_val=rep(c(1,1,2,2,3,3),2),
val=1:12)
df %>% multi_spread('id', 'type', c('type_val', 'val'))
id type_val_t1 type_val_t2 val_t1 val_t2
1 a 1 1 1 7
2 a 1 2 1 9
3 a 1 3 1 11
4 a 2 1 3 7
5 a 2 2 3 9
6 a 2 3 3 11
7 a 3 1 5 7
8 a 3 2 5 9
9 a 3 3 5 11
10 b 1 1 2 8
11 b 1 2 2 10
12 b 1 3 2 12
13 b 2 1 4 8
14 b 2 2 4 10
15 b 2 3 4 12
16 b 3 1 6 8
17 b 3 2 6 10
18 b 3 3 6 12
# An example with three unique values of 'type'
df <- data.frame(id = rep(letters[1:2], 9),
type = c(rep('t1', 6), rep('t2', 6), rep('t3', 6)),
type_val = rep(c(1, 1, 2, 2, 3, 3), 3),
val = 1:18)
df %>% multi_spread('id', 'type', c('type_val', 'val'))
id type_val_t1 type_val_t2 type_val_t3 val_t1 val_t2 val_t3
1 a 1 1 1 1 7 13
2 a 1 1 2 1 7 15
3 a 1 1 3 1 7 17
4 a 1 2 1 1 9 13
5 a 1 2 2 1 9 15
6 a 1 2 3 1 9 17
7 a 1 3 1 1 11 13
8 a 1 3 2 1 11 15
9 a 1 3 3 1 11 17
10 a 2 1 1 3 7 13
11 a 2 1 2 3 7 15
12 a 2 1 3 3 7 17
13 a 2 2 1 3 9 13
14 a 2 2 2 3 9 15
15 a 2 2 3 3 9 17
16 a 2 3 1 3 11 13
17 a 2 3 2 3 11 15
18 a 2 3 3 3 11 17
19 a 3 1 1 5 7 13
20 a 3 1 2 5 7 15
21 a 3 1 3 5 7 17
22 a 3 2 1 5 9 13
23 a 3 2 2 5 9 15
24 a 3 2 3 5 9 17
25 a 3 3 1 5 11 13
26 a 3 3 2 5 11 15
27 a 3 3 3 5 11 17
28 b 1 1 1 2 8 14
29 b 1 1 2 2 8 16
30 b 1 1 3 2 8 18
31 b 1 2 1 2 10 14
32 b 1 2 2 2 10 16
33 b 1 2 3 2 10 18
34 b 1 3 1 2 12 14
35 b 1 3 2 2 12 16
36 b 1 3 3 2 12 18
37 b 2 1 1 4 8 14
38 b 2 1 2 4 8 16
39 b 2 1 3 4 8 18
40 b 2 2 1 4 10 14
41 b 2 2 2 4 10 16
42 b 2 2 3 4 10 18
43 b 2 3 1 4 12 14
44 b 2 3 2 4 12 16
45 b 2 3 3 4 12 18
46 b 3 1 1 6 8 14
47 b 3 1 2 6 8 16
48 b 3 1 3 6 8 18
49 b 3 2 1 6 10 14
50 b 3 2 2 6 10 16
51 b 3 2 3 6 10 18
52 b 3 3 1 6 12 14
53 b 3 3 2 6 12 16
54 b 3 3 3 6 12 18

Combining split() and cumsum()

I am trying to produce stats for cumulative goals by season by a particular soccer player. I have used the cut function to obtain the season from the game dates. I have data which corresponds to this dataframe
df.raw <-
data.frame(Game = 1:20,
Goals=c(1,0,0,2,1,0,3,2,0,0,0,1,0,4,1,2,0,0,0,3),
season = gl(4,5,labels = c("2001", "2002","2003", "2004")))
In real life, the number of games per season may not be constant
I want to end up with data that looks like this
df.seasoned <-
data.frame(Game = 1:20,seasonGame= rep(1:5),
Goals=c(1,0,0,2,1,0,3,2,0,0,0,1,0,4,1,2,0,0,0,3),
cumGoals = c(1,1,1,3,4,0,3,5,5,5,0,1,1,5,6,2,2,2,2,5),
season = gl(4,5,labels = c("2001", "2002","2003", "2004")))
With the goals cumulatively summed within year and a game number for the season
df.raw$cumGoals <- with(df.raw, ave(Goals, season, FUN=cumsum) )
df.raw$seasonGame <- with(df.raw, ave(Game, season, FUN=seq))
df.raw
Or with transform ... the original transform, that is:
df.seas <- transform(df.raw, seasonGame = ave(Game, season, FUN=seq),
cumGoals = ave(Goals, season, FUN=cumsum) )
df.seas
Game Goals season seasonGame cumGoals
1 1 1 2001 1 1
2 2 0 2001 2 1
3 3 0 2001 3 1
4 4 2 2001 4 3
5 5 1 2001 5 4
6 6 0 2002 1 0
7 7 3 2002 2 3
8 8 2 2002 3 5
9 9 0 2002 4 5
10 10 0 2002 5 5
snipped
Another job for ddply and transform (from the plyr package):
ddply(df.raw,.(season),transform,seasonGame = 1:NROW(piece),
cumGoals = cumsum(Goals))
Game Goals season seasonGame cumGoals
1 1 1 2001 1 1
2 2 0 2001 2 1
3 3 0 2001 3 1
4 4 2 2001 4 3
5 5 1 2001 5 4
6 6 0 2002 1 0
7 7 3 2002 2 3
8 8 2 2002 3 5
9 9 0 2002 4 5
10 10 0 2002 5 5
11 11 0 2003 1 0
12 12 1 2003 2 1
13 13 0 2003 3 1
14 14 4 2003 4 5
15 15 1 2003 5 6
16 16 2 2004 1 2
17 17 0 2004 2 2
18 18 0 2004 3 2
19 19 0 2004 4 2
20 20 3 2004 5 5
Here is a solution using data.table which is very fast.
library(data.table)
df.raw.tab = data.table(df.raw)
df.raw.tab[,list(seasonGame = 1:NROW(Goals), cumGoals = cumsum(Goals)),'season']

Resources