Summarise Data using group_by across several columns - r

Let's say I have a dataframe where all columns are factors:
set.seed(123)
gender <- sample(1:2,12,replace=T)
country <- c('FIN', 'FIN', 'EST', 'NIG','NIG','JAM', 'FIN', 'NIG', 'EST', 'NIG','NIG','JAM','FIN', 'FIN', 'EST', 'NIG','NIG','JAM', 'FIN', 'NIG', 'EST', 'NIG','NIG','JAM')
a <- sample(1:5,24,,replace=T)
b <- sample(1:5,24,,replace=T)
c <- sample(1:5,24,,replace=T)
d <- sample(1:5,24,,replace=T)
# Join the variables to create a data frame
df <- data.frame(gender,country,a,b,c,d)
df <- df %>% mutate_at(c("gender","country","a","b","c","d"), as.factor)
I want a table that will give a proportion percentage and n across every column a:d by Gender and Region so the end result would look something like this:
I've done this for a using the code (but I want to do this also for all the other columns b,c,d without replicating the code):
df %>%
group_by(gender,country,a) %>%
summarise(totaln=n()) %>%
group_by(country,gender) %>%
mutate(percentage=totaln/sum(totaln)*100)
so for a I get this:
gender country a totaln percentage
<fct> <fct> <fct> <int> <dbl>
1 1 EST 1 2 50
2 1 EST 3 1 25
3 1 EST 4 1 25
4 1 FIN 1 1 25
5 1 FIN 2 3 75
6 1 NIG 1 1 25
7 1 NIG 2 1 25
8 1 NIG 3 1 25
9 1 NIG 4 1 25
10 2 FIN 1 1 50
11 2 FIN 3 1 50
12 2 JAM 1 2 50
13 2 JAM 3 2 50
14 2 NIG 3 1 16.7
15 2 NIG 4 1 16.7
16 2 NIG 5 4 66.7

What you probably first want to do is to transform your data in a tidy-format by using pivot_longer on a:d, and then group also by this variable, from there it is almost the same code as you wrote:
df_summary <- df %>% pivot_longer(c(a, b, c, d), names_to = "abcd") %>%
group_by(gender, country, abcd, value) %>%
summarise(totaln = n()) %>%
group_by(country, gender, abcd) %>%
mutate(percentag = totaln / sum(totaln) * 100)
df_summary
#> # A tibble: 64 × 6
#> # Groups: country, gender, abcd [24]
#> gender country abcd value totaln percentag
#> <fct> <fct> <chr> <fct> <int> <dbl>
#> 1 1 EST a 1 1 25
#> 2 1 EST a 3 2 50
#> 3 1 EST a 4 1 25
#> 4 1 EST b 2 1 25
#> 5 1 EST b 3 1 25
#> 6 1 EST b 4 2 50
#> 7 1 EST c 2 1 25
#> 8 1 EST c 5 3 75
#> 9 1 EST d 1 1 25
#> 10 1 EST d 2 2 50
#> # … with 54 more rows
To create your desired output shape, you can do:
df_summary %>%
ungroup() %>%
mutate(summary = paste0(round(percentag), "% (n = ", totaln, ")")) %>%
select(gender, country, abcd, value, summary) %>%
pivot_wider(names_from = c(gender, country), values_from = summary, values_fill = "0")
Created on 2022-06-16 by the reprex package (v2.0.1)

Two options. Both retain the 0 combinations, that can easily be filtered out if needed.
Long format:
df %>%
tidyr::pivot_longer(-c(gender, country), names_to = "ltr", values_to = "val") %>%
group_by(gender, country, ltr) %>%
summarize(
totaln = sum(as.numeric(table(val))),
percentage = 100 * as.numeric(table(val)) / totaln
) %>%
ungroup()
# `summarise()` has grouped output by 'gender', 'country', 'ltr'. You can override using the `.groups` argument.
# # A tibble: 120 x 5
# gender country ltr totaln percentage
# <fct> <fct> <chr> <dbl> <dbl>
# 1 1 EST a 4 50
# 2 1 EST a 4 0
# 3 1 EST a 4 25
# 4 1 EST a 4 25
# 5 1 EST a 4 0
# 6 1 EST b 4 25
# 7 1 EST b 4 50
# 8 1 EST b 4 25
# 9 1 EST b 4 0
# 10 1 EST b 4 0
# # ... with 110 more rows
Wide format:
df %>%
group_by(gender, country) %>%
summarize(across(a:d,
list(
perc = ~ 100 * as.numeric(table(.)) / sum(as.numeric(table(.))),
total = ~ as.numeric(table(.)),
newval = ~ factor(names(table(.)), levels = levels(.))
)
)) %>%
ungroup()
# `summarise()` has grouped output by 'gender', 'country'. You can override using the `.groups` argument.
# # A tibble: 30 x 14
# gender country a_perc a_total a_newval b_perc b_total b_newval c_perc c_total c_newval d_perc d_total d_newval
# <fct> <fct> <dbl> <dbl> <fct> <dbl> <dbl> <fct> <dbl> <dbl> <fct> <dbl> <dbl> <fct>
# 1 1 EST 50 2 1 25 1 1 75 3 1 0 0 1
# 2 1 EST 0 0 2 50 2 2 0 0 2 50 2 2
# 3 1 EST 25 1 3 25 1 3 0 0 3 25 1 3
# 4 1 EST 25 1 4 0 0 4 0 0 4 25 1 4
# 5 1 EST 0 0 5 0 0 5 25 1 5 0 0 5
# 6 1 FIN 25 1 1 25 1 1 25 1 1 0 0 1
# 7 1 FIN 75 3 2 50 2 2 25 1 2 50 2 2
# 8 1 FIN 0 0 3 0 0 3 25 1 3 25 1 3
# 9 1 FIN 0 0 4 0 0 4 25 1 4 25 1 4
# 10 1 FIN 0 0 5 25 1 5 0 0 5 0 0 5
# # ... with 20 more rows

Related

How to calculate cumulative sum for each group in time?

For each unique ID and rep, I want to calculate the cumulative number of babies at each age?
For instance, A1, the cumulative sum should look like 1,3,6
I tried the folowing method
id <- c("A","A","A","A","A","A","B","B","B","B","B","B","B","B","B")
rep <- c(1,1,1,2,2,2,1,1,1,1,2,2,2,2,2)
age <- c(0,1,2,0,1,2,0,1,2,3,0,1,2,3,4)
babies <- c(1,2,3,0,1,3,0,1,5,1,0,0,12,1,1)
df <- data.frame(id,rep,age,babies)
df$csum <- ave(df$babies, c(df$id,df$age, df$age), FUN=cumsum)
The result is cumulative sum is calculated over ID alone but not replicate or age. Any suggestions?
How about this:
library(dplyr)
id <- c("A","A","A","A","A","A","B","B","B","B","B","B","B","B","B")
rep <- c(1,1,1,2,2,2,1,1,1,1,2,2,2,2,2)
age <- c(0,1,2,0,1,2,0,1,2,3,0,1,2,3,4)
babies <- c(1,2,3,0,1,3,0,1,5,1,0,0,12,1,1)
df <- data.frame(id,rep,age,babies)
df %>%
group_by(id, rep) %>%
arrange(age, .by_group = TRUE) %>%
mutate(csum = cumsum(babies))
#> # A tibble: 15 × 5
#> # Groups: id, rep [4]
#> id rep age babies csum
#> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 A 1 0 1 1
#> 2 A 1 1 2 3
#> 3 A 1 2 3 6
#> 4 A 2 0 0 0
#> 5 A 2 1 1 1
#> 6 A 2 2 3 4
#> 7 B 1 0 0 0
#> 8 B 1 1 1 1
#> 9 B 1 2 5 6
#> 10 B 1 3 1 7
#> 11 B 2 0 0 0
#> 12 B 2 1 0 0
#> 13 B 2 2 12 12
#> 14 B 2 3 1 13
#> 15 B 2 4 1 14
Created on 2022-12-08 by the reprex package (v2.0.1)

reset a ranking when a variable exceeds a value using dplyr

Suppose I have the following data:
df <- tibble(ID=c(1,2,3,4,5,6,7,8,9,10),
ID2=c(1,1,1,1,2,2,2,3,4,4),
VAR=c(25,10,120,60,85,90,20,40,60,150))
I want to add a new column with a ranking that would be reset either when the ID2 changes or when VAR is greater than 100.
The desired result is:
# A tibble: 10 x 4
ID ID2 VAR RANK
<dbl> <dbl> <dbl> <dbl>
1 1 1 25 1
2 2 1 10 2
3 3 1 120 1
4 4 1 60 2
5 5 2 85 1
6 6 2 90 2
7 7 2 20 3
8 8 3 40 1
9 9 4 60 1
10 10 4 150 1
I know how to add a new column with a ranking that would be reset only when the ID2 changes:
df %>%
arrange(ID2) %>%
group_by(ID2) %>%
mutate(RANK = row_number())
... but treating both conditions at the same time is more difficult. How should I do using dplyr?
You can group_by ID2 and cumsum(VAR > 100), i.e.:
library(dplyr)
df %>%
group_by(ID2, cumVAR = cumsum(VAR > 100)) %>%
mutate(RANK = row_number())
output
# A tibble: 10 x 5
# Groups: ID2, cumVAR [6]
ID ID2 VAR cumVAR RANK
<dbl> <dbl> <dbl> <int> <int>
1 1 1 25 0 1
2 2 1 10 0 2
3 3 1 120 1 1
4 4 1 60 1 2
5 5 2 85 1 1
6 6 2 90 1 2
7 7 2 20 1 3
8 8 3 40 1 1
9 9 4 60 1 1
10 10 4 150 2 1
rowid from data.table would be useful as well
library(dplyr)
library(data.table)
df %>%
mutate(RANK = rowid(ID2, cumsum(VAR > 100)))
-output
# A tibble: 10 × 4
ID ID2 VAR RANK
<dbl> <dbl> <dbl> <int>
1 1 1 25 1
2 2 1 10 2
3 3 1 120 1
4 4 1 60 2
5 5 2 85 1
6 6 2 90 2
7 7 2 20 3
8 8 3 40 1
9 9 4 60 1
10 10 4 150 1

Parse one column of json and bind with other column to make dataframe

I have data that takes the format:
have <- structure(list(V1 = c(4L, 28L, 2L),
V2 = c("[{\"group\":1,\"topic\":\"A\"},{\"group\":1,\"topic\":\"B\"},{\"group\":2,\"topic\":\"C\"},{\"group\":2,\"topic\":\"T\"},{\"group\":2,\"topic\":\"U\"},{\"group\":3,\"topic\":\"V\"},{\"group\":3,\"topic\":\"D\"},{\"group\":3,\"topic\":\"R\"},{\"group\":4,\"topic\":\"A\"},{\"group\":4,\"topic\":\"Q\"},{\"group\":4,\"topic\":\"S\"},{\"group\":4,\"topic\":\"W\"},{\"group\":6,\"topic\":\"O\"},{\"group\":6,\"topic\":\"P\"},{\"group\":6,\"topic\":\"E\"},{\"group\":6,\"topic\":\"F\"},{\"group\":6,\"topic\":\"G\"},{\"group\":6,\"topic\":\"H\"},{\"group\":6,\"topic\":\"I\"},{\"group\":6,\"topic\":\"J\"},{\"group\":6,\"topic\":\"K\"},{\"group\":6,\"topic\":\"L\"},{\"group\":6,\"topic\":\"M\"},{\"group\":6,\"topic\":\"N\"}]",
"[]",
"[{\"group\":2,\"topic\":\"C\"},{\"group\":3,\"topic\":\"D\"},{\"group\":6,\"topic\":\"O\"},{\"group\":6,\"topic\":\"P\"},{\"group\":6,\"topic\":\"E\"},{\"group\":6,\"topic\":\"G\"},{\"group\":6,\"topic\":\"M\"}]")
),
row.names = c(NA, 3L),
class = "data.frame")
The contents of V2 are nested groupings for each row like [{"group":1,"topic":"A"},{"group":1,"topic":"B"}...]
I want to get a wide dataframe that has an indicator (1/0) for each combination of group+topic (see also_have) for each row. Something like this:
# A tibble: 3 x 4
id topic_id_1 topic_id_2 topic_id_3 topic_id_4 ...
<dbl> <dbl> <dbl> <dbl>
1 4 1 1 0
2 28 0 0 0
3 2 0 0 0
The first step is to parse the json.
I can use purrr::map(have$V2, jsonlite::fromJSON) to unnest into a list, but I'm not sure how to bind the V1 column (that we might rename to id) to each element of the resulting list (note that list element two is empty because V1==28 is empty). Here's a snippet of what the first element might look like with the id (V1) added.
[[1]]
group topic id
1 1 A 4
2 1 B 4
3 2 C 4
4 2 T 4
...
Alternatively, I think purrr::map_df(have$V2, jsonlite::fromJSON) would get me closer to what I ultimately need, but here too I'm not sure how to add the row id (V1).
df <- purrr::map_df(have$V2, jsonlite::fromJSON)
df
What I get:
group topic
1 1 A
2 1 B
3 2 C
4 2 T
...
What I want (notice `V1==28` does not appear):
group topic id
1 1 A 4
2 1 B 4
3 2 C 4
4 2 T 4
5 2 U 4
6 3 V 4
7 3 D 4
8 3 R 4
9 4 A 4
10 4 Q 4
11 4 S 4
12 4 W 4
13 6 O 4
14 6 P 4
15 6 E 4
16 6 F 4
17 6 G 4
18 6 H 4
19 6 I 4
20 6 J 4
21 6 K 4
22 6 L 4
23 6 M 4
24 6 N 4
25 2 C 2
26 3 D 2
27 6 O 2
28 6 P 2
29 6 E 2
30 6 G 2
31 6 M 2
STOP.
I think if I can get the above dataframe with id I can get the rest of the way. The ultimate goal is to join this info with also_have and then pivot wide.
# join
also_have <- expand_grid(c(1:6), c(LETTERS)) %>%
mutate(topic_id = 1:n()) %>%
magrittr::set_colnames(c("group", "topic", "topic_id")) %>%
select(topic_id, group, topic)
# pivot wide
# A tibble: 3 x 4
id topic_id_1 topic_id_2 topic_id_3 topic_id_4 ...
<dbl> <dbl> <dbl> <dbl>
1 4 1 1 0
2 28 0 0 0
3 2 0 0 0
Update:
Applying #akrun's solution:
purrr::map_dfr(setNames(have$V2, have$V1),
jsonlite::fromJSON,
.id = 'V1') %>%
rename(id = V1) %>%
left_join(also_have, by=c("group", "topic")) %>%
select(-group, -topic) %>%
mutate(value = 1) %>%
pivot_wider(id_cols = id,
names_from = topic_id,
names_prefix = "topic_id",
values_from = value,
values_fill = 0
) %>%
full_join(tibble(id = as.character(have$V1))) %>%
replace(is.na(.), 0)
# A tibble: 3 x 25
id topic_id1 topic_id2 topic_id29 topic_id46 topic_id47 topic_id74 topic_id56
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 4 1 1 1 1 1 1 1
2 2 0 0 1 0 0 0 1
3 28 0 0 0 0 0 0 0
# … with 17 more variables: topic_id70 <dbl>, topic_id79 <dbl>, topic_id95 <dbl>,
# topic_id97 <dbl>, topic_id101 <dbl>, topic_id145 <dbl>, topic_id146 <dbl>,
# topic_id135 <dbl>, topic_id136 <dbl>, topic_id137 <dbl>, topic_id138 <dbl>,
# topic_id139 <dbl>, topic_id140 <dbl>, topic_id141 <dbl>, topic_id142 <dbl>,
# topic_id143 <dbl>, topic_id144 <dbl>
We could pass a named vector and then use .id in map_dfr
purrr::map_dfr(setNames(have$V2, have$V1), jsonlite::fromJSON, .id = 'id')
-output
id group topic
1 4 1 A
2 4 1 B
3 4 2 C
4 4 2 T
5 4 2 U
6 4 3 V
7 4 3 D
8 4 3 R
9 4 4 A
10 4 4 Q
11 4 4 S
12 4 4 W
...
Or this can be done within in dplyr framework itself after using rowwise
library(tidyr)
have %>%
rowwise %>%
transmute(ID = V1, V2 = list(fromJSON(V2))) %>%
ungroup %>%
unnest(c(V2), keep_empty = TRUE) %>%
select(-V2)
# A tibble: 32 x 3
ID group topic
<int> <int> <chr>
1 4 1 A
2 4 1 B
3 4 2 C
4 4 2 T
5 4 2 U
6 4 3 V
7 4 3 D
8 4 3 R
9 4 4 A
10 4 4 Q
# … with 22 more rows
For the second step do a join
out <- have %>%
rowwise %>%
transmute(ID = V1, V2 = list(fromJSON(V2))) %>%
ungroup %>%
unnest(c(V2), keep_empty = TRUE) %>%
select(-V2) %>% right_join(also_have)
out
Joining, by = c("group", "topic")
# A tibble: 163 x 4
ID group topic topic_id
<int> <int> <chr> <int>
1 4 1 A 1
2 4 1 B 2
3 4 2 C 29
4 4 2 T 46
5 4 2 U 47
6 4 3 V 74
7 4 3 D 56
8 4 3 R 70
9 4 4 A 79
10 4 4 Q 95
# … with 153 more rows

R Reshape Wide To Long Using Column Stub Strings

data1=data.frame("School"=c(1,1,2,2,3,3,4,4),
"Fund"=c(0,1,0,1,0,1,0,1),
"Total_A_Grade5"=c(22,20,21,24,24,26,25,22),
"Group1_A_Grade5"=c(10,6,6,10,9,9,9,10),
"Group2_A_Grade5"=c(5,9,9,8,10,8,8,6),
"Total_B_Grade5"=c(23,33,19,21,19,23,20,21),
"Group1_B_Grade5"=c(8,7,7,10,9,9,5,5),
"Group2_B_Grade5"=c(6,10,7,6,6,5,9,9),
"Total_A_Grade6"=c(18,24,16,24,26,25,16,19),
"Group1_A_Grade6"=c(7,7,5,9,10,9,5,7),
"Group2_A_Grade6"=c(5,8,6,7,10,8,8,9),
"Total_B_Grade6"=c(26,23,22,24,21,22,24,19),
"Group1_B_Grade6"=c(10,10,6,10,7,8,8,7),
"Group2_B_Grade6"=c(9,6,9,6,7,6,9,9),
"Total_A_Grade7"=c(20,19,18,25,16,21,19,26),
"Group1_A_Grade7"=c(9,7,7,9,7,7,5,8),
"Group2_A_Grade7"=c(8,5,7,9,6,5,5,9),
"Total_B_Grade7"=c(25,21,24,25,18,18,27,18),
"Group1_B_Grade7"=c(10,10,10,7,5,6,8,5),
"Group2_B_Grade7"=c(9,6,8,10,8,6,10,6))
data2=data.frame("School"=c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1),
"Fund"=c(0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1),
"Type"=c('Total','Total','Group1','Group1','Group2','Group2','Total','Total','Group1','Group1','Group2','Group2','Total','Total','Group1','Group1','Group2','Group2','Total','Total','Group1','Group1','Group2','Group2'),
"Class"=c('A','A','A','A','A','A','B','B','B','B','B','B','A','A','A','A','A','A','B','B','B','B','B','B'),
"Grade"=c(5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6),
"Score"=c(22,20,10,6,5,9,23,33,8,7,6,10,18,24,7,7,5,8,26,23,10,10,9,6))
I have 'data1' and want to reshape to make 'data2' which just shows example for School 1 grade 5 and 6 but I want all of data1 reshaped.
The column names of 'data1' contain rich information. For example, Group2_B_Grade6 indicated 'Type' = Group2, 'Class' = B, 'Grade' = 6. I wish to reshape 'data1' and then use these stubs separated by "_" as colnames to prepare 'data2'
data3=data.frame("School"=c(1,1,2,2,3,3,4,4),
"Fund"=c(0,1,0,1,0,1,0,1),
"Grade_5"=c(22,20,21,24,24,26,25,22),
"Grade_6"=c(10,6,6,10,9,9,9,10),
"Grade_7"=c(5,9,9,8,10,8,8,6))
You can do this directly with pivot_longer with some regex in names_pattern.
tidyr::pivot_longer(data1,
cols = -c(School, Fund),
names_to = c('Type', 'Class', 'Grade'),
names_pattern = '(.*?)_([A-Z])_Grade(\\d+)',
values_to = 'Score')
# A tibble: 144 x 6
# School Fund Type Class Grade Score
# <dbl> <dbl> <chr> <chr> <chr> <dbl>
# 1 1 0 Total A 5 22
# 2 1 0 Group1 A 5 10
# 3 1 0 Group2 A 5 5
# 4 1 0 Total B 5 23
# 5 1 0 Group1 B 5 8
# 6 1 0 Group2 B 5 6
# 7 1 0 Total A 6 18
# 8 1 0 Group1 A 6 7
# 9 1 0 Group2 A 6 5
#10 1 0 Total B 6 26
# … with 134 more rows
Using dplyr (and tidyr):
library(dplyr)
library(tidyr)
data2 <- data1 %>%
pivot_longer(-c(School, Fund)) %>%
separate(name, into = c('Type', 'Class', 'Grade')) %>%
extract(Grade, 'Grade', "([0-9]+)")
data2
#> # A tibble: 144 x 6
#> School Fund Type Class Grade value
#> <dbl> <dbl> <chr> <chr> <chr> <dbl>
#> 1 1 0 Total A 5 22
#> 2 1 0 Group1 A 5 10
#> 3 1 0 Group2 A 5 5
#> 4 1 0 Total B 5 23
#> 5 1 0 Group1 B 5 8
#> 6 1 0 Group2 B 5 6
#> 7 1 0 Total A 6 18
#> 8 1 0 Group1 A 6 7
#> 9 1 0 Group2 A 6 5
#> 10 1 0 Total B 6 26
#> # … with 134 more rows
Created on 2020-04-06 by the reprex package (v0.3.0)
We can use melt from data.table
library(data.table)
melt(setDT(data1), id.var = c('School', 'Fund'))[,
c('Type', 'Class', 'Grade') := tstrsplit(variable, "_")][,
Grade := sub('Grade', '', Grade)][, variable := NULL][]
# School Fund value Type Class Grade
# 1: 1 0 22 Total A 5
# 2: 1 1 20 Total A 5
# 3: 2 0 21 Total A 5
# 4: 2 1 24 Total A 5
# 5: 3 0 24 Total A 5
# ---
#140: 2 1 10 Group2 B 7
#141: 3 0 8 Group2 B 7
#142: 3 1 6 Group2 B 7
#143: 4 0 10 Group2 B 7
#144: 4 1 6 Group2 B 7

Aggregate rows with specific shared value

I want to aggregate my data as follows:
Aggregate only for successive rows where status = 0
Keep age and sum up points
Example data:
da <- data.frame(userid = c(1,1,1,1,2,2,2,2), status = c(0,0,0,1,1,1,0,0), age = c(10,10,10,11,15,16,16,16), points = c(2,2,2,6,3,5,5,5))
da
userid status age points
1 1 0 10 2
2 1 0 10 2
3 1 0 10 2
4 1 1 11 6
5 2 1 15 3
6 2 1 16 5
7 2 0 16 5
8 2 0 16 5
I would like to have:
da2
userid status age points
1 1 0 10 6
2 1 1 11 6
3 2 1 15 3
4 2 1 16 5
5 2 0 16 10
da %>%
mutate(grp = with(rle(status),
rep(seq_along(values), lengths)) + cumsum(status != 0)) %>%
group_by_at(vars(-points)) %>%
summarise(points = sum(points)) %>%
ungroup() %>%
select(-grp)
## A tibble: 5 x 4
# userid status age points
# <dbl> <dbl> <dbl> <dbl>
#1 1 0 10 6
#2 1 1 11 6
#3 2 0 16 10
#4 2 1 15 3
#5 2 1 16 5
You can use group_by from dplyr:
da %>% group_by(da$userid, cumsum(da$status), da$status)
%>% summarise(age=max(age), points=sum(points))
Output:
`da$userid` `cumsum(da$status)` `da$status` age points
<dbl> <dbl> <dbl> <dbl> <dbl>
1 1 0 0 10 6
2 1 1 1 11 6
3 2 2 1 15 3
4 2 3 0 16 10
5 2 3 1 16 5
Exactly the same idea as above :
library(dplyr)
data1 <- data %>% group_by(userid, age, status) %>%
filter(status == 0) %>%
summarise(points = sum(points))
data2 <- data %>%
group_by(userid, age, status) %>%
filter(status != 0) %>%
summarise(points = sum(points))
data <- rbind(data1,
data2)
We need to be more carreful with your specification of status equal to 0. I think the code of Quang Hoang works only for your specific example.
I hope it will help.

Resources