I have a list named lst which contains three iterations and in each iteration there is 2 years of projections. I want to subset and extract from my data gender =2 in such a way that finally I have two lists. one list with gender 1 and second list with gender2.
iteration1 <- list(year1 =data.frame(age=c(10,11,12,13), district=c(1,2,3,4),gender=c(1,2,2,1)
,weight=c(12.2,11.3,11.2,10.1)),
year2 =data.frame(age=c(10,11,12,13,10,10), district=c(1,2,3,4,2,1),gender=c(1,2,2,1,1,1),weight=c(12.2,11.3,11.2,10.1,12.2,13.1)))
iteration2 <- list(year1 =data.frame(age=c(10,11,12,13), district=c(1,2,3,4),gender=c(2,2,1,1)
,weight=c(12.2,11.3,11.2,10.1)),
year2 =data.frame(age=c(10,11,12,13,13,13,12), district=c(1,2,3,4,1,3,3),gender=c(2,2,1,1,2,2,2),weight=c(12.2,11.3,11.2,10.1,10.9,11.9,15.1)))
iteration3 <- list(year1 =data.frame(age=c(10,11,12,13), district=c(1,2,3,4),gender=c(2,2,1,1)
,weight=c(12.2,11.3,11.2,10.1)),
year2 =data.frame(age=c(10,11,12,13,10,10,11,12), district=c(1,2,3,4,4,3,2,2),gender=c(2,2,1,1,2,2,1,2),weight=c(12.2,11.3,11.2,10.1,13.5,12.8,13.9,14.9)))
lst <- list(iteration1 = iteration1, iteration2 = iteration2, iteration3= iteration3 )
I hope this is what you have in mind:
library(purrr)
map(1:2, function(a){
lst %>%
map_dfr(~ .x %>%
map_dfr(~ .x %>%
filter(gender == a)))
}) %>%
set_names(paste("gender", 1:length(.)))
$`gender 1`
age district gender weight
1 10 1 1 12.2
2 13 4 1 10.1
3 10 1 1 12.2
4 13 4 1 10.1
5 10 2 1 12.2
6 10 1 1 13.1
7 12 3 1 11.2
8 13 4 1 10.1
9 12 3 1 11.2
10 13 4 1 10.1
11 12 3 1 11.2
12 13 4 1 10.1
13 12 3 1 11.2
14 13 4 1 10.1
15 11 2 1 13.9
$`gender 2`
age district gender weight
1 11 2 2 11.3
2 12 3 2 11.2
3 11 2 2 11.3
4 12 3 2 11.2
5 10 1 2 12.2
6 11 2 2 11.3
7 10 1 2 12.2
8 11 2 2 11.3
9 13 1 2 10.9
10 13 3 2 11.9
11 12 3 2 15.1
12 10 1 2 12.2
13 11 2 2 11.3
14 10 1 2 12.2
15 11 2 2 11.3
16 10 4 2 13.5
17 10 3 2 12.8
18 12 2 2 14.9
You may also do this
library(tidyverse)
map_dfr(lst, \(x) map_dfr(x, ~.x)) %>% split(.$gender)
#> $`1`
#> age district gender weight
#> 1 10 1 1 12.2
#> 4 13 4 1 10.1
#> 5 10 1 1 12.2
#> 8 13 4 1 10.1
#> 9 10 2 1 12.2
#> 10 10 1 1 13.1
#> 13 12 3 1 11.2
#> 14 13 4 1 10.1
#> 17 12 3 1 11.2
#> 18 13 4 1 10.1
#> 24 12 3 1 11.2
#> 25 13 4 1 10.1
#> 28 12 3 1 11.2
#> 29 13 4 1 10.1
#> 32 11 2 1 13.9
#>
#> $`2`
#> age district gender weight
#> 2 11 2 2 11.3
#> 3 12 3 2 11.2
#> 6 11 2 2 11.3
#> 7 12 3 2 11.2
#> 11 10 1 2 12.2
#> 12 11 2 2 11.3
#> 15 10 1 2 12.2
#> 16 11 2 2 11.3
#> 19 13 1 2 10.9
#> 20 13 3 2 11.9
#> 21 12 3 2 15.1
#> 22 10 1 2 12.2
#> 23 11 2 2 11.3
#> 26 10 1 2 12.2
#> 27 11 2 2 11.3
#> 30 10 4 2 13.5
#> 31 10 3 2 12.8
#> 33 12 2 2 14.9
Created on 2021-06-19 by the reprex package (v2.0.0)
An option with bind_rows
library(dplyr)
library(purrr)
map_dfr(lst, bind_rows) %>%
group_split(gender)
-output
[[1]]
# A tibble: 15 x 4
age district gender weight
<dbl> <dbl> <dbl> <dbl>
1 10 1 1 12.2
2 13 4 1 10.1
3 10 1 1 12.2
4 13 4 1 10.1
5 10 2 1 12.2
6 10 1 1 13.1
7 12 3 1 11.2
8 13 4 1 10.1
9 12 3 1 11.2
10 13 4 1 10.1
11 12 3 1 11.2
12 13 4 1 10.1
13 12 3 1 11.2
14 13 4 1 10.1
15 11 2 1 13.9
[[2]]
# A tibble: 18 x 4
age district gender weight
<dbl> <dbl> <dbl> <dbl>
1 11 2 2 11.3
2 12 3 2 11.2
3 11 2 2 11.3
4 12 3 2 11.2
5 10 1 2 12.2
6 11 2 2 11.3
7 10 1 2 12.2
8 11 2 2 11.3
9 13 1 2 10.9
10 13 3 2 11.9
11 12 3 2 15.1
12 10 1 2 12.2
13 11 2 2 11.3
14 10 1 2 12.2
15 11 2 2 11.3
16 10 4 2 13.5
17 10 3 2 12.8
18 12 2 2 14.9
Using purrr:
library(dplyr)
library(purrr)
lst1 <- map(lst, ~ map(., filter, gender == 1))
lst2 <- map(lst, ~ map(., filter, gender == 2))
lst1
$iteration1
$iteration1$year1
age district gender weight
1 10 1 1 12.2
2 13 4 1 10.1
$iteration1$year2
age district gender weight
1 10 1 1 12.2
2 13 4 1 10.1
3 10 2 1 12.2
4 10 1 1 13.1
$iteration2
$iteration2$year1
age district gender weight
1 12 3 1 11.2
2 13 4 1 10.1
$iteration2$year2
age district gender weight
1 12 3 1 11.2
2 13 4 1 10.1
$iteration3
$iteration3$year1
age district gender weight
1 12 3 1 11.2
2 13 4 1 10.1
$iteration3$year2
age district gender weight
1 12 3 1 11.2
2 13 4 1 10.1
3 11 2 1 13.9
lst2
$iteration1
$iteration1$year1
age district gender weight
1 11 2 2 11.3
2 12 3 2 11.2
$iteration1$year2
age district gender weight
1 11 2 2 11.3
2 12 3 2 11.2
$iteration2
$iteration2$year1
age district gender weight
1 10 1 2 12.2
2 11 2 2 11.3
$iteration2$year2
age district gender weight
1 10 1 2 12.2
2 11 2 2 11.3
3 13 1 2 10.9
4 13 3 2 11.9
5 12 3 2 15.1
$iteration3
$iteration3$year1
age district gender weight
1 10 1 2 12.2
2 11 2 2 11.3
$iteration3$year2
age district gender weight
1 10 1 2 12.2
2 11 2 2 11.3
3 10 4 2 13.5
4 10 3 2 12.8
5 12 2 2 14.9
Using base R, you'd get the same output with the following lines:
lst1 <- lapply(lst, function(x) lapply(x, function(y) subset(y, gender == 1)))
lst2 <- lapply(lst, function(x) lapply(x, function(y) subset(y, gender == 2)))
suppose I have the next data frame.
table<-data.frame(group=c(0,5,10,15,20,25,30,35,40,0,5,10,15,20,25,30,35,40,0,5,10,15,20,25,30,35,40),plan=c(1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3),price=c(1,4,5,6,8,9,12,12,12,3,5,6,7,10,12,20,20,20,5,6,8,12,15,20,22,28,28))
group plan price
1 0 1 1
2 5 1 4
3 10 1 5
4 15 1 6
5 20 1 8
6 25 1 9
7 30 1 12
8 35 1 12
9 40 1 12
10 0 2 3
11 5 2 5
12 10 2 6
13 15 2 7
14 20 2 10
15 25 2 12
16 30 2 20
17 35 2 20
18 40 2 20
19 0 3 5
20 5 3 6
21 10 3 8
22 15 3 12
23 20 3 15
24 25 3 20
25 30 3 22
26 35 3 28
27 40 3 28
So, I want to group the columns so that for each "plan" with "group" greater than 20, group me 2-in-2 records (average of the next record) and when the largest number is repeated , Leave the latter without duplicates.
The example below shows how to result would be.
data.frame(group=c(0,5,10,15,20,30,0,5,10,15,20,30,0,5,10,15,20,30,40),plan=c(1,1,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,3,3),price=c(1,4,5,6,8.5,12,3,5,6,7,11,20,5,6,8,12,17.5,25,28))
group plan price
1 0 1 1.0
2 5 1 4.0
3 10 1 5.0
4 15 1 6.0
5 20 1 8.5
6 30 1 12.0
7 0 1 3.0
8 5 2 5.0
9 10 2 6.0
10 15 2 7.0
11 20 2 11.0
12 30 2 20.0
13 0 3 5.0
14 5 3 6.0
15 10 3 8.0
16 15 3 12.0
17 20 3 17.5
18 30 3 25.0
19 40 3 28.0
Thanks!
You could try this using the dplyr package:
library(dplyr)
table %>%
group_by(plan) %>%
mutate(group=ifelse(group<20,group,10*floor(group/10))) %>%
group_by(plan,group) %>%
summarise(price=mean(price)) %>%
## Keep the last row per group only if the price is different from the previous average price
group_by(plan) %>%
filter(!(row_number()==n() & price==lag(price)))
This returns:
plan group price
<dbl> <dbl> <dbl>
1 1 0 1.0
2 1 5 4.0
3 1 10 5.0
4 1 15 6.0
5 1 20 8.5
6 1 30 12.0
7 2 0 3.0
8 2 5 5.0
9 2 10 6.0
10 2 15 7.0
11 2 20 11.0
12 2 30 20.0
13 3 0 5.0
14 3 5 6.0
15 3 10 8.0
16 3 15 12.0
17 3 20 17.5
18 3 30 25.0
19 3 40 28.0
How about:
dat<-data.frame(group=c(0,5,10,15,20,25,30,35,40,0,5,10,15,20,25,30,35,40,0,5,10,15,20,25,30,35,40),plan=c(1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3),price=c(1,4,5,6,8,9,12,12,12,3,5,6,7,10,12,20,20,20,5,6,8,12,15,20,22,28,28))
s <- split(dat, ifelse(dat$group>20, ">20", "<=20"))
s20 <- s[[">20"]] # easier to read
tens <- which(s20$group %% 10 == 0)
tens
# [1] 2 4 6 8 10 12
subgroup <- rep(1:length(tens), each = nrow(s20)/length(tens)) # can handle different freqs
subgroup
# [1] 1 1 2 2 3 3 4 4 5 5 6 6
ToAddBack <- s20[tens,]
ToAddBack[,"price"] <- aggregate(s20$price, by = list(subgroup), mean)[2]
newdat <- rbind(s[["<=20"]], ToAddBack)
finaldat <- newdat[order(newdat$plan, newdat$group),]
Where your finaldat is a little different from your example as I think you left out some rows by accident:
finaldat
group plan price
1 0 1 1.0
2 5 1 4.0
3 10 1 5.0
4 15 1 6.0
5 20 1 8.0
7 30 1 10.5
9 40 1 12.0
10 0 2 3.0
11 5 2 5.0
12 10 2 6.0
13 15 2 7.0
14 20 2 10.0
16 30 2 16.0
18 40 2 20.0
19 0 3 5.0
20 5 3 6.0
21 10 3 8.0
22 15 3 12.0
23 20 3 15.0
25 30 3 21.0
27 40 3 28.0