R diff lead difference between two columns? - r

Here is my dataframe:
structure(list(replicate = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L,
7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 10L, 10L, 10L,
10L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L,
14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L), press_id = c(1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L), start_time = c(164429106370979,
164429411618825, 164429837271940, 164430399454285, 164429106370980,
164429411618826, 164429837271941, 164430399454286, 164429106370981,
164429411618827, 164429837271942, 164430399454287, 164429106370982,
164429411618828, 164429837271943, 164430399454288, 164429106370983,
164429411618829, 164429837271944, 164430399454289, 164429106370984,
164429411618830, 164429837271945, 164430399454290, 164429106370985,
164429411618831, 164429837271946, 164430399454291, 164429106370986,
164429411618832, 164429837271947, 164430399454292, 164429106370987,
164429411618833, 164429837271948, 164430399454293, 164429106370988,
164429411618834, 164429837271949, 164430399454294, 164429106370989,
164429411618835, 164429837271950, 164430399454295, 164429106370990,
164429411618836, 164429837271951, 164430399454296, 164429106370991,
164429411618837, 164429837271952, 164430399454297, 164429106370992,
164429411618838, 164429837271953, 164430399454298, 164429106370993,
164429411618839, 164429837271954, 164430399454299), end_time = c(164429182443825,
164429512525748, 164429903243170, 164430465927555, 164429182443826,
164429512525749, 164429903243171, 164430465927556, 164429182443827,
164429512525750, 164429903243172, 164430465927557, 164429182443828,
164429512525751, 164429903243173, 164430465927558, 164429182443829,
164429512525752, 164429903243174, 164430465927559, 164429182443830,
164429512525753, 164429903243175, 164430465927560, 164429182443831,
164429512525754, 164429903243176, 164430465927561, 164429182443832,
164429512525755, 164429903243177, 164430465927562, 164429182443833,
164429512525756, 164429903243178, 164430465927563, 164429182443834,
164429512525757, 164429903243179, 164430465927564, 164429182443835,
164429512525758, 164429903243180, 164430465927565, 164429182443836,
164429512525759, 164429903243181, 164430465927566, 164429182443837,
164429512525760, 164429903243182, 164430465927567, 164429182443838,
164429512525761, 164429903243183, 164430465927568, 164429182443839,
164429512525762, 164429903243184, 164430465927569)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -60L), vars = c("replicate",
"press_id"), drop = TRUE, indices = list(0L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L,
18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L,
30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L,
42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L,
54L, 55L, 56L, 57L, 58L, 59L), group_sizes = c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L, labels = structure(list(
replicate = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 7L, 7L,
7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L,
11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L,
14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L), press_id = c(1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L,
1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L)), class = "data.frame", row.names = c(NA,
-60L), vars = c("replicate", "press_id"), drop = TRUE, indices = list(
0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L,
26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L,
38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L,
50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L), group_sizes = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L, labels = structure(list(
replicate = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 7L, 7L,
7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L,
11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L,
14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L), press_id = c(1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L,
1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L)), class = "data.frame", row.names = c(NA,
-60L), vars = c("replicate", "press_id"), drop = TRUE, .Names = c("replicate",
"press_id")), .Names = c("replicate", "press_id")), .Names = c("replicate",
"press_id", "start_time", "end_time"))
I want to get the inter press_id time diff for example:
replicate press_id start_time end_time time_diff
1 1 1.644291e+14 1.644292e+14 0 (it's a first row)
1 2 1.644294e+14 1.644295e+14 1.644294e+14 - 1.644292e+14
1 3 1.644298e+14 1.644299e+14 1.644298e+14 - 1.644295e+14
1 4 1.644304e+14 1.644305e+14 .....
2 1 1.644291e+14 1.644292e+14
2 2 1.644294e+14 1.644295e+14
2 3 1.644298e+14 1.644299e+14
2 4 1.644304e+14 1.644305e+14
I am trying to do this using mutate, lag, lead and diff but without any luck. I have grouped, and ungrouped the dataset, nothing helped me.
df %>%
group_by(replicate) %>%
mutate(d = ifelse(row_number() == 1, 0, lead(start_time) - end_time))

df %>%
group_by(replicate) %>%
mutate(d = start_time - lag(end_time))
And if you want zeroes except NAs for the first row of each unique value in the replicate column, you could do:
df %>%
group_by(replicate) %>%
mutate(d = start_time - lag(end_time),
d = ifelse(is.na(d), 0, d))
Or just:
df %>%
group_by(replicate) %>%
mutate(d = ifelse(row_number() == 1, 0, start_time - lag(end_time)))

Related

Editing a Row Value in R

I have a data frame that looks like this
Pick Name Team Round Player Position Position..
1 1 Javi Texans 1 Patrick Mahomes QB QB1
2 2 Justin Chiefs 1 Russell Wilson QB QB2
3 3 Blake Titans 1 Lamar Jackson QB QB3
4 4 Connor Dolphins 1 Deshaun Watson QB QB4
5 5 Isaac Jaguars 1 Carson Wentz QB QB5
6 6 Fitz Rams 1 Dak Prescott QB QB6
with more rows of course and some of the rows in the Player, Position and Position... Column are empty because they haven't been drafted yet. Is there a way to just manually insert the names, pos, pos... of the newly drafted players.
I tried
Redraft[112, "Player"] <- "Calvin Ridley"; Redraft
Since the empty cells start on row 112, but it just came up as N/A
When I do that I also get an error message:
Warning message:
In `[<-.factor`(`*tmp*`, iseq, value = "Calvin Ridley") :
invalid factor level, NA generated
and the data frame looks like
08 108 Jack Packers 4 TE3 Darren Waller TE
109 109 Justin Saints 4 LT6 Taylor Lewan LT
110 110 Sam Steelers 4 FS5 Kevin Byard FS
111 111 Jeremy Falcons 4 LB7 Isaiah Simmons LB
112 112 Will Bills 4 1 <NA>
113 113 Jeremy Colts 4 1
And heres the whole data frame:
structure(list(Pick = 1:384, Name = structure(c(12L, 14L, 1L,
2L, 7L, 5L, 8L, 6L, 9L, 12L, 9L, 2L, 10L, 16L, 11L, 13L, 20L,
13L, 17L, 14L, 8L, 3L, 3L, 19L, 5L, 19L, 7L, 1L, 6L, 4L, 18L,
15L, 15L, 18L, 4L, 6L, 1L, 7L, 19L, 5L, 19L, 3L, 3L, 8L, 14L,
17L, 13L, 20L, 13L, 11L, 16L, 10L, 2L, 9L, 12L, 9L, 6L, 8L, 5L,
7L, 2L, 1L, 14L, 12L, 12L, 14L, 1L, 2L, 7L, 5L, 8L, 6L, 9L, 12L,
9L, 2L, 10L, 16L, 11L, 13L, 20L, 13L, 17L, 14L, 8L, 3L, 3L, 19L,
5L, 19L, 7L, 1L, 6L, 4L, 18L, 15L, 15L, 18L, 4L, 6L, 1L, 7L,
19L, 5L, 19L, 3L, 3L, 8L, 14L, 17L, 13L, 20L, 13L, 11L, 16L,
10L, 2L, 9L, 12L, 9L, 6L, 8L, 5L, 7L, 2L, 1L, 14L, 12L, 12L,
14L, 1L, 2L, 7L, 5L, 8L, 6L, 9L, 12L, 9L, 2L, 10L, 16L, 11L,
13L, 20L, 13L, 17L, 14L, 8L, 3L, 3L, 19L, 5L, 19L, 7L, 1L, 6L,
4L, 18L, 15L, 15L, 18L, 4L, 6L, 1L, 7L, 19L, 5L, 19L, 3L, 3L,
8L, 14L, 17L, 13L, 20L, 13L, 11L, 16L, 10L, 2L, 9L, 12L, 9L,
6L, 8L, 5L, 7L, 2L, 1L, 14L, 12L, 12L, 14L, 1L, 2L, 7L, 5L, 8L,
6L, 9L, 12L, 9L, 2L, 10L, 16L, 11L, 13L, 20L, 13L, 17L, 14L,
8L, 3L, 3L, 19L, 5L, 19L, 7L, 1L, 6L, 4L, 18L, 15L, 15L, 18L,
4L, 6L, 1L, 7L, 19L, 5L, 19L, 3L, 3L, 8L, 14L, 17L, 13L, 20L,
13L, 11L, 16L, 10L, 2L, 9L, 12L, 9L, 6L, 8L, 5L, 7L, 2L, 1L,
14L, 12L, 12L, 14L, 1L, 2L, 7L, 5L, 8L, 6L, 9L, 12L, 9L, 2L,
10L, 16L, 11L, 13L, 20L, 13L, 17L, 14L, 8L, 3L, 3L, 19L, 5L,
19L, 7L, 1L, 6L, 4L, 18L, 15L, 15L, 18L, 4L, 6L, 1L, 7L, 19L,
5L, 19L, 3L, 3L, 8L, 14L, 17L, 13L, 20L, 13L, 11L, 16L, 10L,
2L, 9L, 12L, 9L, 6L, 8L, 5L, 7L, 2L, 1L, 14L, 12L, 12L, 14L,
1L, 2L, 7L, 5L, 8L, 6L, 9L, 12L, 9L, 2L, 10L, 16L, 11L, 13L,
20L, 13L, 17L, 14L, 8L, 3L, 3L, 19L, 5L, 19L, 7L, 1L, 6L, 4L,
18L, 15L, 15L, 18L, 4L, 6L, 1L, 7L, 19L, 5L, 19L, 3L, 3L, 8L,
14L, 17L, 13L, 20L, 13L, 11L, 16L, 10L, 2L, 9L, 12L, 9L, 6L,
8L, 5L, 7L, 2L, 1L, 14L, 12L), .Label = c("Blake", "Connor",
"Dakota", "FFB", "Fitz", "Haydon", "Isaac", "Jack", "Jackson",
"Jacob", "Jacob H", "Javi", "Jeremy", "Justin", "Nick", "Pete",
"Sam", "Simon", "Tucker", "Will"), class = "factor"), Team = structure(c(30L,
10L, 31L, 13L, 17L, 24L, 18L, 6L, 3L, 8L, 7L, 28L, 9L, 21L, 14L,
11L, 4L, 15L, 29L, 27L, 20L, 1L, 25L, 5L, 23L, 26L, 32L, 19L,
12L, 16L, 22L, 2L, 2L, 22L, 16L, 12L, 19L, 32L, 26L, 23L, 5L,
25L, 1L, 20L, 27L, 29L, 15L, 4L, 11L, 14L, 21L, 9L, 28L, 7L,
8L, 3L, 6L, 18L, 24L, 17L, 13L, 31L, 10L, 30L, 30L, 10L, 31L,
13L, 17L, 24L, 18L, 6L, 3L, 8L, 7L, 28L, 9L, 21L, 14L, 11L, 4L,
15L, 29L, 27L, 20L, 1L, 25L, 5L, 23L, 26L, 32L, 19L, 12L, 16L,
22L, 2L, 2L, 22L, 16L, 12L, 19L, 32L, 26L, 23L, 5L, 25L, 1L,
20L, 27L, 29L, 15L, 4L, 11L, 14L, 21L, 9L, 28L, 7L, 8L, 3L, 6L,
18L, 24L, 17L, 13L, 31L, 10L, 30L, 30L, 10L, 31L, 13L, 17L, 24L,
18L, 6L, 3L, 8L, 7L, 28L, 9L, 21L, 14L, 11L, 4L, 15L, 29L, 27L,
20L, 1L, 25L, 5L, 23L, 26L, 32L, 19L, 12L, 16L, 22L, 2L, 2L,
22L, 16L, 12L, 19L, 32L, 26L, 23L, 5L, 25L, 1L, 20L, 27L, 29L,
15L, 4L, 11L, 14L, 21L, 9L, 28L, 7L, 8L, 3L, 6L, 18L, 24L, 17L,
13L, 31L, 10L, 30L, 30L, 10L, 31L, 13L, 17L, 24L, 18L, 6L, 3L,
8L, 7L, 28L, 9L, 21L, 14L, 11L, 4L, 15L, 29L, 27L, 20L, 1L, 25L,
5L, 23L, 26L, 32L, 19L, 12L, 16L, 22L, 2L, 2L, 22L, 16L, 12L,
19L, 32L, 26L, 23L, 5L, 25L, 1L, 20L, 27L, 29L, 15L, 4L, 11L,
14L, 21L, 9L, 28L, 7L, 8L, 3L, 6L, 18L, 24L, 17L, 13L, 31L, 10L,
30L, 30L, 10L, 31L, 13L, 17L, 24L, 18L, 6L, 3L, 8L, 7L, 28L,
9L, 21L, 14L, 11L, 4L, 15L, 29L, 27L, 20L, 1L, 25L, 5L, 23L,
26L, 32L, 19L, 12L, 16L, 22L, 2L, 2L, 22L, 16L, 12L, 19L, 32L,
26L, 23L, 5L, 25L, 1L, 20L, 27L, 29L, 15L, 4L, 11L, 14L, 21L,
9L, 28L, 7L, 8L, 3L, 6L, 18L, 24L, 17L, 13L, 31L, 10L, 30L, 30L,
10L, 31L, 13L, 17L, 24L, 18L, 6L, 3L, 8L, 7L, 28L, 9L, 21L, 14L,
11L, 4L, 15L, 29L, 27L, 20L, 1L, 25L, 5L, 23L, 26L, 32L, 19L,
12L, 16L, 22L, 2L, 2L, 22L, 16L, 12L, 19L, 32L, 26L, 23L, 5L,
25L, 1L, 20L, 27L, 29L, 15L, 4L, 11L, 14L, 21L, 9L, 28L, 7L,
8L, 3L, 6L, 18L, 24L, 17L, 13L, 31L, 10L, 30L), .Label = c("49ers",
"Bears", "Bengals", "Bills", "Broncos", "Browns", "Buccaneers",
"Cardinals", "Chargers", "Chiefs", "Colts", "Cowboys", "Dolphins",
"Eagles", "Falcons", "Giants", "Jaguars", "Jets", "Lions", "Packers",
"Panthers", "Patriots", "Raiders", "Rams", "Ravens", "Redskins",
"Saints", "Seahawks", "Steelers", "Texans", "Titans", "Vikings"
), class = "factor"), Round = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L),
Pos.. = structure(c(49L, 60L, 70L, 71L, 72L, 73L, 74L, 75L,
27L, 76L, 43L, 50L, 51L, 92L, 52L, 53L, 54L, 55L, 77L, 56L,
57L, 58L, 89L, 59L, 85L, 61L, 103L, 3L, 106L, 35L, 62L, 36L,
63L, 42L, 10L, 107L, 18L, 64L, 108L, 65L, 109L, 19L, 11L,
110L, 86L, 37L, 111L, 12L, 20L, 112L, 66L, 21L, 38L, 13L,
90L, 78L, 81L, 30L, 14L, 15L, 82L, 39L, 16L, 17L, 93L, 94L,
4L, 22L, 95L, 96L, 2L, 97L, 67L, 5L, 68L, 87L, 83L, 84L,
6L, 31L, 44L, 98L, 99L, 100L, 7L, 28L, 101L, 32L, 29L, 8L,
33L, 88L, 69L, 79L, 102L, 9L, 104L, 40L, 23L, 24L, 105L,
25L, 45L, 80L, 46L, 26L, 47L, 91L, 48L, 34L, 41L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("1", "C1", "CB1", "CB10", "CB11", "CB12",
"CB13", "CB14", "CB15", "CB2", "CB3", "CB4", "CB5", "CB6",
"CB7", "CB8", "CB9", "DE1", "DE2", "DE3", "DE4", "DE5", "DE6",
"DE7", "DE8", "DE9", "DT1", "DT2", "DT3", "FS1", "FS2", "FS3",
"FS4", "FS5", "LB1", "LB2", "LB3", "LB4", "LB5", "LB6", "LB7",
"LG1", "LT1", "LT2", "LT3", "LT4", "LT5", "LT6", "QB1", "QB10",
"QB11", "QB12", "QB13", "QB14", "QB15", "QB16", "QB17", "QB18",
"QB19", "QB2", "QB20", "QB21", "QB22", "QB23", "QB24", "QB25",
"QB26", "QB27", "QB28", "QB3", "QB4", "QB5", "QB6", "QB7",
"QB8", "QB9", "RB1", "RB2", "RB3", "RB4", "RG1", "RT1", "RT2",
"RT3", "SS1", "SS2", "SS3", "SS4", "TE1", "TE2", "TE3", "WR1",
"WR10", "WR11", "WR12", "WR13", "WR14", "WR15", "WR16", "WR17",
"WR18", "WR19", "WR2", "WR20", "WR21", "WR3", "WR4", "WR5",
"WR6", "WR7", "WR8", "WR9"), class = "factor"), Player = structure(c(87L,
91L, 72L, 38L, 14L, 24L, 79L, 78L, 3L, 57L, 90L, 70L, 107L,
31L, 39L, 10L, 56L, 68L, 20L, 4L, 94L, 93L, 45L, 52L, 51L,
44L, 80L, 97L, 62L, 67L, 40L, 98L, 101L, 89L, 50L, 9L, 85L,
104L, 19L, 41L, 109L, 58L, 106L, 81L, 37L, 26L, 29L, 27L,
18L, 86L, 60L, 84L, 17L, 74L, 105L, 95L, 111L, 63L, 76L,
55L, 92L, 110L, 12L, 15L, 64L, 96L, 34L, 25L, 61L, 103L,
54L, 21L, 53L, 49L, 59L, 108L, 71L, 83L, 77L, 82L, 102L,
7L, 65L, 2L, 69L, 32L, 22L, 75L, 43L, 5L, 8L, 46L, 35L, 42L,
23L, 88L, 6L, 11L, 60L, 48L, 16L, 13L, 30L, 36L, 73L, 33L,
99L, 28L, 100L, 66L, 47L, NA, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "A.J. Brown",
"Aaron Donald", "Aaron Rodgers", "Adoree' Jackson", "Allen Robinson",
"Amari Cooper", "Anthony Harris", "Antonio Brown", "Baker Mayfield",
"Bobby Wagner", "Byron Jones ", "Cameron Jordan", "Carson Wentz",
"Casey Hayward", "CeDee Lamb", "Chandler Jones", "Chase Young",
"Chris Godwin", "Christian McCaffrey", "Cooper Kupp", "Courtland Sutton",
"D.J. Moore", "Dak Prescott", "Danielle Hunter", "Darius Leonard",
"Darius Slay", "Darren Waller", "DaVante Adams", "David Bakhtiari",
"DeAndre Hopkins", "Deforest Buckner", "Demarcus Lawrence",
"Denzel Ward", "Derek Carr", "Derrick Henry", "Derwin James",
"Deshaun Watson", "Drew Brees", "Drew Lock", "Dwayne Haskins",
"Ezekiel Elliott", "Fletcher Cox", "Gardner Minshew", "George Kittle",
"Harrison Smith", "Isaiah Simmons", "J.J. Watt", "Jaire Alexander",
"Jalen Ramsey", "Jamal Adams", "Jared Goff", "Jarrett Stidham",
"Jason Kelce", "Jeffrey Okudah", "Jimmy Garappolo", "Joe Burrow",
"Joey Bosa", "Jordan Love", "Josh Allen", "Juju Smith-Schuster",
"Julio Jones", "Justin Simmons", "Keenan Allen", "Kenny Golladay",
"Kevin Byard", "Khalil Mack", "Kirk Cousins", "Kyle Fuller",
"Kyler Murray ", "La'el Collins", "Lamar Jackson ", "Laremy Tunsil",
"Marcus Peters", "Marcus Williams", "Marlon Humphrey", "Marshon Lattimore",
"Matt Ryan", "Matthew Stafford", "Michael Thomas", "Mike Evans",
"Minkah Fitzpatrick", "Mitchell Schwartz ", "Myles Garrett",
"Nick Bosa", "Odell Beckham Jr.", "Patrick Mahomes ", "Patrick Peterson",
"Quenton Nelson", "Ronnie Stanley", "Russell Wilson ", "Ryan Ramczyk",
"Ryan Tannehill", "Sam Darnold", "Saquon Barkley", "Stefon Diggs",
"Stephon Gilmore", "T.J. Watt", "Taylor Decker", "Taylor Lewan",
"Teddy Bridgewater", "Terron Armstead", "Terry McLaurin",
"Tom Brady", "Travis Kelce", "Tre White", "Tua Tagovailoa",
"Tyrann Mathieu", "Tyreek Hill", "Von Miller", "Zack Martin"
), class = "factor"), Position = structure(c(10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 5L, 10L, 9L, 10L, 10L, 16L, 10L,
10L, 10L, 10L, 11L, 10L, 10L, 10L, 15L, 10L, 14L, 10L, 16L,
3L, 16L, 7L, 10L, 7L, 10L, 8L, 3L, 16L, 4L, 10L, 16L, 10L,
16L, 4L, 3L, 16L, 14L, 7L, 16L, 3L, 4L, 16L, 10L, 4L, 7L,
3L, 15L, 11L, 12L, 6L, 3L, 3L, 13L, 7L, 3L, 3L, 16L, 16L,
3L, 4L, 16L, 16L, 2L, 16L, 10L, 3L, 10L, 14L, 13L, 13L, 3L,
6L, 9L, 16L, 16L, 16L, 3L, 5L, 16L, 6L, 5L, 3L, 6L, 14L,
10L, 11L, 16L, 3L, 16L, 7L, 4L, 4L, 16L, 4L, 9L, 11L, 9L,
4L, 9L, 15L, 9L, 6L, 7L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "C",
"CB", "DE", "DT", "FS", "LB", "LG", "LT", "QB", "RB", "RG",
"RT", "SS", "TE", "WR"), class = "factor")), row.names = c(NA,
384L), class = "data.frame")
You're dealing with a factor column. "Calvin Ridley" isn't yet a level of the factor. After adding it you can rename the cell.
class(Redraft$Player)
# [1] "factor"
levels(Redraft$Player) <- c(levels(Redraft$Player), "Calvin Ridley")
Redraft[112, "Player"] <- "Calvin Ridley"
Redraft[112, "Player"]
# [1] Calvin Ridley
# 112 Levels: A.J. Brown Aaron Donald Aaron Rodgers Adoree' Jackson Allen Robinson ... Calvin Ridley
jay.sf's answer is correct, of course, but I'd add my 2ยข since I think it's missing the point.
The reason you have factors instead of plain strings here in the first place, is kind of a historical accident with R being a statistical language. In practice, you rarely want to be dealing with factors in a dataframe of this kind. You probably want your player names to be plain-old strings.
Typically when you read a dataframe from a file, e.g. via read.csv, you have the option to pass the argument stringsAsFactors = TRUE, to ensure that strings are kept as strings rather than converted to factors. Some people (e.g. this guy) feel so strongly against this bizzare default behaviour, that they include a line in their .Rprofile to make importing data with stringsAsFactors=T as their default. (but this is dangerous for writing code that works the same across users with different .Rprofile initializations!)
If you already have the dataset, you can convert your factors to strings instead:
df[ , 'Player'] <- as.character( df[ , 'Player' ] )
You can now continue with your analysis without worrying about factors and their annoyances.
E.g. setting a new name is as simple as you'd expect:
df[112,'Player'] <- 'Calvin Ridley'

barchart - axis ticks are not adjusted according to the bars

I have to draw a bar chart in R ggplot2 with multiple variables (i.e each bar for BMI, weight, cholesterol, Blood pressure etc) in each group ( i.e. different populations ex: Indian, Korean, Philipinos etc.) But the bars are overflowing to the next group in the axis. for example: the bars of the Indian group is overflowing to Korean group. The axis marks are not adjusted accordingly. I have attached the figure .. can someone please help. Following is my code. dput(data) is also given.
p = ggplot(data = t,
aes(x = factor(Population, levels = names(sort(table(Population), increasing = TRUE))),
y = Snp_Count,
group = factor(Trait, levels = c("BMI", "DBP", "HDL", "Height", "LDL", "TC", "TG", "WC", "Weight"),
ordered = TRUE)))
p = p + geom_bar(aes(fill = Trait),
position = position_dodge(preserve = "single"),
stat = "identity") +
scale_fill_manual(values = c("#28559A", "#3EB650", "#E56B1F", "#A51890", "#FCC133", "#663300", "#6666ff", "#ff3300", "#ff66ff")) +
coord_flip()
structure(list(Trait = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L), .Label = c("BMI",
"DBP", "HDL", "HT", "LDL", "TC", "TG", "WC", "Weight"), class = "factor"),
Association = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = "Direct", class = "factor"), TraitClass = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Anthropometric",
"BP", "Lipid"), class = "factor"), Population = structure(c(2L,
3L, 4L, 5L, 7L, 8L, 10L, 11L, 12L, 13L, 22L, 24L, 3L, 5L,
11L, 22L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 18L, 20L, 28L, 5L, 7L, 13L, 14L, 1L, 3L, 5L, 7L,
9L, 11L, 12L, 16L, 18L, 20L, 22L, 5L, 6L, 7L, 10L, 12L, 18L,
20L, 3L, 5L, 6L, 7L, 8L, 11L, 12L, 13L, 14L, 15L, 18L, 19L,
20L, 21L, 22L, 23L, 26L, 28L, 3L, 4L, 5L, 8L, 12L, 22L, 24L,
3L, 5L, 7L, 8L, 17L, 25L, 27L), .Label = c("ACB", "AFR",
"ASW", "ASW/ACB", "CEU", "CHB", "EAS", "Filipino", "FIN",
"GBR", "Hispanic", "Hispanic/Latinos", "JPT", "Korean", "Kuwaiti",
"Micronesian", "Moroccan", "MXL", "Mylopotamos", "Orcadian",
"Pomak", "SAS", "Saudi_Arabian", "Seychellois", "Surinamese",
"Taiwanese", "Turkish", "YRI"), class = "factor"), Snp_Count = c(3L,
12L, 6L, 17L, 2L, 10L, 1L, 6L, 3L, 3L, 10L, 6L, 1L, 1L, 1L,
1L, 2L, 1L, 10L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 3L, 1L, 1L,
2L, 1L, 2L, 20L, 5L, 4L, 1L, 1L, 2L, 7L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 8L, 2L, 4L, 3L, 1L, 2L, 1L, 4L, 20L, 5L,
11L, 2L, 4L, 3L, 4L, 2L, 3L, 4L, 1L, 1L, 1L, 2L, 2L, 1L,
2L, 3L, 2L, 4L, 4L, 1L, 4L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L
), Gene_Count = c(3L, 9L, 7L, 9L, 2L, 8L, 1L, 7L, 3L, 2L,
8L, 7L, 1L, 1L, 1L, 1L, 2L, 1L, 4L, 1L, 1L, 1L, 1L, 2L, 2L,
1L, 2L, 1L, 1L, 1L, 1L, 1L, 9L, 6L, 5L, 1L, 1L, 2L, 5L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 6L, 2L, 3L, 3L, 1L, 2L, 1L, 3L,
10L, 4L, 7L, 1L, 3L, 3L, 4L, 1L, 3L, 5L, 1L, 1L, 1L, 3L,
3L, 1L, 1L, 2L, 2L, 3L, 3L, 1L, 3L, 2L, 3L, 3L, 2L, 3L, 2L,
2L, 2L)), class = "data.frame", row.names = c(NA, -86L))
The total width of each group in your barchart is 0.9 by default, which means that 90% of the area is covered. When you increase the width of the individual bars to 3 they will overlap with other groups, the maximum value for with should thus be 1 and then it will touch the other groups.
I'd suggest in your situation to use facet_wrap instead of a dodged barchart.
Note: geom_col is the same as geom_bar(stat = "identity).
my.df$Trait <- factor(my.df$Trait, levels = c("BMI", "DBP", "HDL", "HT", "LDL", "TC", "TG", "WC", "Weight"))
my.df$Population <- factor(my.df$Population, levels = names(sort(table(my.df$Population), increasing = TRUE)))
ggplot(my.df, aes(x = Trait, y = Snp_Count, fill = Trait)) +
geom_col(width = 1) +
scale_fill_manual(values = c("#28559A", "#3EB650", "#E56B1F", "#A51890", "#FCC133", "#663300", "#6666ff", "#ff3300", "#ff66ff")) +
# Split the data by Population, allow flexible scales and spacing for y axis (Trait)
facet_grid(Population ~ ., scales = "free_y", space = "free_y", switch = "y") +
coord_flip() +
theme(axis.text.y = element_blank(), # Remove Trait labels (indicated by color)
axis.ticks.y = element_blank(), # Remove tick marks
strip.background = element_blank(),
strip.text.y = element_text(angle = 180, hjust = 1), # Rotate Population labels
panel.spacing.y = unit(3, "pt")) # Spacing between groups
Data
my.df <-
structure(list(Trait = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L),
.Label = c("BMI", "DBP", "HDL", "HT", "LDL", "TC", "TG", "WC", "Weight"), class = "factor"),
Population = structure(c(2L, 3L, 4L, 5L, 7L, 8L, 10L, 11L,
12L, 13L, 22L, 24L, 3L, 5L, 11L, 22L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 18L, 20L, 28L, 5L,
7L, 13L, 14L, 1L, 3L, 5L, 7L, 9L, 11L, 12L, 16L, 18L, 20L,
22L, 5L, 6L, 7L, 10L, 12L, 18L, 20L, 3L, 5L, 6L, 7L, 8L,
11L, 12L, 13L, 14L, 15L, 18L, 19L, 20L, 21L, 22L, 23L, 26L,
28L, 3L, 4L, 5L, 8L, 12L, 22L, 24L, 3L, 5L, 7L, 8L, 17L,
25L, 27L),
.Label = c("ACB", "AFR", "ASW", "ASW/ACB", "CEU",
"CHB", "EAS", "Filipino", "FIN", "GBR", "Hispanic", "Hispanic/Latinos",
"JPT", "Korean", "Kuwaiti", "Micronesian", "Moroccan", "MXL",
"Mylopotamos", "Orcadian", "Pomak", "SAS", "Saudi_Arabian",
"Seychellois", "Surinamese", "Taiwanese", "Turkish", "YRI"), class = "factor"),
Snp_Count = c(3L, 12L, 6L, 17L, 2L,
10L, 1L, 6L, 3L, 3L, 10L, 6L, 1L, 1L, 1L, 1L, 2L, 1L, 10L,
1L, 1L, 2L, 1L, 2L, 1L, 1L, 3L, 1L, 1L, 2L, 1L, 2L, 20L,
5L, 4L, 1L, 1L, 2L, 7L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 8L,
2L, 4L, 3L, 1L, 2L, 1L, 4L, 20L, 5L, 11L, 2L, 4L, 3L, 4L,
2L, 3L, 4L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 3L, 2L, 4L, 4L, 1L,
4L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L)),
class = "data.frame", row.names = c(NA, -86L))

How to create histograms for each unique combination of levels from two factors?

I cannot figure out how to use a loop to plot one histogram for each unique combination of levels from TWO factors.
Here is my data: https://www.dropbox.com/sh/exsjhu23fnpwf4r/AABvitLBN1nRMpXcyYMVIOIDa?dl=0
# perhaps need to have factors
df$freq <- as.factor(df$freq)
df$time <- as.factor(df$time)
I learned how to use a loop to plot histograms for ONE factor levels:
# space for plots
windows(width=19, height=10)
par(las=1, cex.lab=0.75, cex.axis=0.6, bty="n", mgp=c(1, 0.6, 0),
oma=c(2, 4, 2, 0) + 0.1, mar=c(4, 0, 3, 3) + 0.1)
a <- layout(matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 20, 21), nrow=3, ncol=7, byrow=T))
layout.show(a)
# loop
for (i in 1:length(unique(df$freq))) {
value <- subset(df, freq == unique (df$freq)[i])
hist(value$thr, main=paste0("freq: ", unique(df$freq)[i]))
}
I tried variations of this loop for TWO factors but that unfortunately does not work:
for (i in 1:length(unique(df[c("freq", "time")]))) {
value <- subset(df, freq == unique (df$freq)[i] & time == unique(df$time)[i])
hist(value$thr, main=paste0("freq: ", unique(df$freq)[i]))
}
I would also like to learn how to label each histogram based on the levels of TWO factors (not just one)...
It's more convenient to use by here.
For the titles we prefer characters to factors.
df1[c("freq", "time")] <- lapply(df1[c("freq", "time")], as.character)
Then open windows,
windows(width=19, height=10)
par(las=1, cex.lab=0.75, cex.axis=0.6, bty="n", mgp=c(1, 0.6, 0),
oma=c(2, 4, 2, 0) + 0.1, mar=c(4, 0, 3, 3) + 0.1)
a <- layout(matrix(1:21, 3, 7))
layout.show(a)
and plot.
by(df1, df1[c("freq", "time")], function(x)
hist(x$thr, main=paste("freq:", paste(x[1, c(1, 3)], collapse=","))))
Result
Edit
To get the specific order we probably have to do some more stuff.
df1[c("freq", "time")] <- lapply(df1[c("freq", "time")], as.character)
windows(width=19, height=10)
par(las=1, cex.lab=0.75, cex.axis=0.6, bty="n", mgp=c(1, 0.6, 0),
oma=c(2, 4, 2, 0) + 0.1, mar=c(4, 0, 3, 3) + 0.1)
a <- layout(matrix(1:21, 3, 7, byrow=TRUE)) # with byrow
layout.show(a)
l <- split(df1, df1[c("freq", "time")])
m <- t(sapply(l, function(x) x[1, c(1, 3)])) # matrix of first rows of each subset
m[, 2] <- sub("m", "", m[, 2]) # use the values...
m <- apply(m, 1:2, as.numeric) # ... make numeric
Now we obtain the histograms within a lapply over the list ordered by m.
lapply(l[order(m[, 2], m[, 1])], function(x)
hist(x$thr, main=paste("freq:", paste(x[1, c(1, 3)], collapse=","))))
New Result
Data
df1 <- structure(list(freq = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L), .Label = c("4",
"8", "12.5", "16", "20", "25", "31.5"), class = "factor"), thr = c(60L,
25L, 20L, 15L, 15L, 30L, 35L, 60L, 25L, 10L, 15L, 15L, 30L, 35L,
55L, 30L, 15L, 15L, 10L, 25L, 40L, 50L, 25L, 15L, 10L, 15L, 20L,
40L, 50L, 30L, 10L, 15L, 15L, 20L, 25L, 50L, 25L, 10L, 10L, 10L,
20L, 25L, 45L, 20L, 10L, 10L, 10L, 20L, 25L, 45L, 15L, 10L, 10L,
10L, 20L, 30L, 60L, 30L, 10L, 10L, 10L, 15L, 30L, 50L, 25L, 10L,
10L, 10L, 20L, 30L, 45L, 25L, 15L, 10L, 15L, 30L, 35L, 50L, 25L,
15L, 10L, 15L, 25L, 35L, 60L, 25L, 10L, 10L, 15L, 20L, 30L, 60L,
25L, 5L, 5L, 10L, 20L, 30L, 45L, 20L, 5L, 10L, 10L, 20L, 30L,
45L, 20L, 10L, 10L, 10L, 20L, 30L, 60L, 30L, 15L, 10L, 15L, 25L,
30L, 55L, 25L, 10L, 10L, 10L, 20L, 30L, 55L, 35L, 10L, 10L, 10L,
20L, 30L, 60L, 35L, 15L, 10L, 10L, 15L, 25L, 50L, 30L, 10L, 10L,
10L, 20L, 25L, 55L, 25L, 10L, 10L, 15L, 25L, 25L, 65L, 30L, 10L,
10L, 15L, 20L, 30L, 60L, 30L, 15L, 15L, 15L, 15L, 30L, 55L, 35L,
15L, 15L, 15L, 25L, 35L, 55L, 35L, 15L, 15L, 15L, 25L, 35L, 60L,
35L, 15L, 15L, 15L, 25L, 35L, 60L, 30L, 10L, 10L, 15L, 25L, 35L,
55L, 30L, 15L, 10L, 10L, 25L, 30L, 50L, 25L, 10L, 10L, 10L, 20L,
30L, 55L, 30L, 10L, 10L, 15L, 20L, 30L, 55L, 30L, 10L, 15L, 20L,
25L, 35L, 55L, 25L, 15L, 15L, 15L, 25L, 40L, 50L, 20L, 10L, 10L,
20L, 30L, 40L, 45L, 25L, 10L, 10L, 10L, 20L, 30L, 50L, 25L, 10L,
10L, 10L, 20L, 25L, 55L, 20L, 10L, 10L, 15L, 25L, 35L, 50L, 20L,
10L, 10L, 15L, 25L, 30L, 45L, 20L, 15L, 10L, 10L, 20L, 30L, 50L,
20L, 15L, 15L, 15L, 20L, 30L, 60L, 35L, 15L, 10L, 15L, 25L, 30L,
60L, 35L, 15L, 15L, 15L, 30L, 35L, 55L, 25L, 10L, 15L, 15L, 25L,
35L, 50L, 30L, 10L, 15L, 15L, 25L, 35L, 55L, 25L, 20L, 15L, 15L,
25L, 30L, 55L, 25L, 15L, 15L, 15L, 30L, 35L), time = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L), .Label = c("3m", "6m", "9m"), class = "factor")), row.names = c(NA,
-322L), class = "data.frame")

Linear model with repeated measures factors

I have a dataframe df
df<-structure(list(subject = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L,
23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L,
36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L,
49L, 50L, 51L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L,
25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L,
38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L,
51L), sex = c(1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L,
1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L,
2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L,
1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L,
2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L), age = c(29L, 54L, 67L,
36L, 48L, 37L, 25L, 46L, 37L, 33L, 25L, 26L, 28L, 59L, 46L, 50L,
55L, 56L, 37L, 30L, 38L, 30L, 50L, 39L, 29L, 46L, 48L, 46L, 55L,
32L, 66L, 35L, 48L, 54L, 38L, 31L, 42L, 36L, 27L, 63L, 45L, 31L,
26L, 38L, 43L, 52L, 36L, 43L, 65L, 46L, 42L, 29L, 54L, 67L, 36L,
48L, 37L, 25L, 46L, 37L, 33L, 25L, 26L, 28L, 59L, 46L, 50L, 55L,
56L, 37L, 30L, 38L, 30L, 50L, 39L, 29L, 46L, 48L, 46L, 55L, 32L,
66L, 35L, 48L, 54L, 38L, 31L, 42L, 36L, 27L, 63L, 45L, 31L, 26L,
38L, 43L, 52L, 36L, 43L, 65L, 46L, 42L), edu = c(4L, 3L, 3L,
3L, 4L, 2L, 3L, 3L, 1L, 3L, 4L, 4L, 5L, 1L, 1L, 2L, 2L, 3L, 4L,
4L, 4L, 4L, 4L, 4L, 2L, 2L, 1L, 2L, 2L, 4L, 2L, 4L, 4L, 3L, 3L,
4L, 5L, 3L, 3L, 4L, 1L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 6L, 1L, 3L,
4L, 3L, 3L, 3L, 4L, 2L, 3L, 3L, 1L, 3L, 4L, 4L, 5L, 1L, 1L, 2L,
2L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 2L, 1L, 2L, 2L, 4L, 2L, 4L,
4L, 3L, 3L, 4L, 5L, 3L, 3L, 4L, 1L, 1L, 3L, 1L, 3L, 1L, 3L, 1L,
6L, 1L, 3L), biz_exp = c(5L, 15L, 3L, 4L, 10L, 6L, 0L, 5L, 8L,
5L, 0L, 8L, 3L, 23L, 5L, 7L, 5L, 11L, 4L, 4L, 11L, 3L, 15L, 4L,
4L, 6L, 6L, 5L, 13L, 2L, 13L, 6L, 8L, 27L, 7L, 3L, 11L, 5L, 1L,
4L, 8L, 8L, 4L, 15L, 18L, 30L, 9L, 14L, 18L, 21L, 16L, 5L, 15L,
3L, 4L, 10L, 6L, 0L, 5L, 8L, 5L, 0L, 8L, 3L, 23L, 5L, 7L, 5L,
11L, 4L, 4L, 11L, 3L, 15L, 4L, 4L, 6L, 6L, 5L, 13L, 2L, 13L,
6L, 8L, 27L, 7L, 3L, 11L, 5L, 1L, 4L, 8L, 8L, 4L, 15L, 18L, 30L,
9L, 14L, 18L, 21L, 16L), turnov = c(36L, NA, 12L, 9L, 48L, 9L,
8L, 24L, 4L, 250L, NA, 600L, 6L, 6L, 10L, 10L, 5L, 4L, 250L,
200L, 50L, 150L, 48L, NA, 9L, 6L, 2L, NA, NA, 3L, 7L, 23L, 75L,
7L, 5L, NA, 20L, 450L, 5L, 32L, 21L, 12L, 6L, 4L, 24L, 7L, 10L,
12L, 12L, 14L, 18L, 36L, NA, 12L, 9L, 48L, 9L, 8L, 24L, 4L, 250L,
NA, 600L, 6L, 6L, 10L, 10L, 5L, 4L, 250L, 200L, 50L, 150L, 48L,
NA, 9L, 6L, 2L, NA, NA, 3L, 7L, 23L, 75L, 7L, 5L, NA, 20L, 450L,
5L, 32L, 21L, 12L, 6L, 4L, 24L, 7L, 10L, 12L, 12L, 14L, 18L),
loc_pr = c(1L, 1L, 1L, 6L, 1L, 6L, 4L, 1L, 8L, 5L, 1L, 3L,
1L, 1L, 1L, 1L, 5L, 8L, 2L, 1L, 1L, 1L, 1L, 2L, 8L, 2L, 4L,
4L, 2L, 2L, 2L, 1L, 4L, 5L, 4L, 4L, 4L, 4L, NA, 4L, 5L, 5L,
5L, 8L, 1L, 2L, 4L, 3L, 3L, 4L, 3L, 1L, 1L, 1L, 6L, 1L, 6L,
4L, 1L, 8L, 5L, 1L, 3L, 1L, 1L, 1L, 1L, 5L, 8L, 2L, 1L, 1L,
1L, 1L, 2L, 8L, 2L, 4L, 4L, 2L, 2L, 2L, 1L, 4L, 5L, 4L, 4L,
4L, 4L, NA, 4L, 5L, 5L, 5L, 8L, 1L, 2L, 4L, 3L, 3L, 4L, 3L
), type = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 5L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 4L, 2L, 1L, 1L, 2L, 4L, 1L, 2L, 1L,
1L, 4L, 1L, 3L, 1L, 1L, 1L, 3L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 5L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 4L, 2L, 1L,
1L, 2L, 4L, 1L, 2L, 1L, 1L, 4L, 1L, 3L, 1L, 1L, 1L, 3L, 2L
), age_rec = c(2L, 4L, 4L, 100L, 4L, 100L, 100L, 4L, 100L,
2L, 1L, 2L, 2L, 4L, 4L, 4L, 4L, 100L, 3L, 2L, 3L, 2L, 4L,
3L, 100L, 27L, 100L, 100L, 4L, 2L, 100L, 2L, 4L, 30L, 3L,
2L, 59L, 8L, 100L, 27L, 3L, 59L, 2L, 59L, 3L, 59L, 3L, 3L,
4L, 64L, 3L, 2L, 4L, 4L, 100L, 4L, 100L, 100L, 4L, 100L,
2L, 1L, 2L, 2L, 4L, 4L, 4L, 4L, 100L, 3L, 2L, 3L, 2L, 4L,
3L, 100L, 27L, 100L, 100L, 4L, 2L, 100L, 2L, 4L, 30L, 3L,
2L, 59L, 8L, 100L, 27L, 3L, 59L, 2L, 59L, 3L, 59L, 3L, 3L,
4L, 64L, 3L), biz_exp_rec = c(2L, 4L, 2L, 3L, 3L, 3L, 1L,
2L, 3L, 2L, 1L, 3L, 2L, 4L, 2L, 3L, 2L, 4L, 2L, 2L, 4L, 2L,
4L, 2L, 2L, 4L, 4L, 4L, 4L, 1L, 4L, 3L, 3L, 4L, 3L, 2L, 3L,
3L, 2L, 4L, 3L, 2L, 2L, 3L, 4L, 4L, 3L, 4L, 4L, 4L, 4L, 2L,
4L, 2L, 3L, 3L, 3L, 1L, 2L, 3L, 2L, 1L, 3L, 2L, 4L, 2L, 3L,
2L, 4L, 2L, 2L, 4L, 2L, 4L, 2L, 2L, 4L, 4L, 4L, 4L, 1L, 4L,
3L, 3L, 4L, 3L, 2L, 3L, 3L, 2L, 4L, 3L, 2L, 2L, 3L, 4L, 4L,
3L, 4L, 4L, 4L, 4L), turnov_rec = structure(c(3L, NA, 3L,
2L, 3L, 3L, 1L, 3L, 3L, 4L, NA, 4L, 2L, 2L, 2L, 2L, 2L, 4L,
4L, 4L, 3L, 4L, 3L, 5L, 2L, 3L, 3L, 2L, NA, 2L, 4L, 3L, 4L,
4L, 2L, NA, 4L, 2L, 1L, 2L, 3L, 3L, 2L, 4L, 3L, 4L, 2L, 3L,
3L, 4L, 3L, 3L, NA, 3L, 2L, 3L, 3L, 1L, 3L, 3L, 4L, NA, 4L,
2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 3L, 4L, 3L, NA, 2L, 3L, 3L,
2L, NA, 2L, 4L, 3L, 4L, 4L, 2L, NA, 4L, 2L, 1L, 2L, 3L, 3L,
2L, 4L, 3L, 4L, 2L, 3L, 3L, 4L, 3L), .Label = c("1", "2",
"3", "4", "MA"), class = "factor"), bundle = c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), investment = c(86L,
100L, 100L, 75L, 100L, 59L, 68L, 86L, 80L, 100L, 86L, 100L,
100L, 100L, 100L, 100L, 100L, 93L, 64L, 100L, 24L, 18L, 89L,
75L, 80L, 29L, 54L, 65L, 100L, 27L, 59L, 30L, 59L, 43L, 59L,
59L, 5L, 26L, 100L, 75L, 59L, 5L, 59L, 74L, 59L, 79L, 75L,
75L, 86L, 66L, 86L, 55L, 100L, 68L, 1L, 75L, 1L, 1L, 79L,
1L, 54L, 48L, 33L, 55L, 90L, 85L, 39L, 70L, 1L, 45L, 54L,
33L, 3L, 44L, 75L, 1L, 1L, 1L, 1L, 96L, 26L, 1L, 23L, 66L,
1L, 89L, 83L, 52L, 61L, 1L, 88L, 45L, 72L, 60L, 1L, 60L,
2L, 86L, 10L, 63L, 1L, 88L)), .Names = c("subject", "sex",
"age", "edu", "biz_exp", "turnov", "loc_pr", "type", "age_rec",
"biz_exp_rec", "turnov_rec", "bundle", "investment"), class = "data.frame", row.names = c(NA,
-102L))
In this dataframe investment is my dependent variable and the other variables are my independent variables. My subjects are crossed within type of bundle. First of all, I would like know whether my subjects do bundle or not (bundle= 1 means that people bundle and bundle=0 means that people do not bundle), it will have an effect on the investment.
I have done this mixed effect linear model but I am not sure if this is correct as my p-value are equal to zero.
library(nlme)
model <- lme(investment~bundle, random = ~1|subject/bundle, data=df)
I have also tried to make an anova with repeated measures as such:
aov(investment~bundle+ Error(subject/bundle), data=df)
It works but not sure if the model formula is right
Anyone could help me with that?

Calculating seasonal index from tbats components

I have aggregated retail weekly data with seasonal periods of 52.2 (a 53rd week every five years). I want to use this aggregated data to calculate a seasonal index that can be applied to each item within the category to derive its de-seasonalised demand.
Using stl, I would calculate the seasonal index as "seasonal" / "trend" + 1 (normalised to 52). I switched to tbats because my seasonality was not an integer and I have multiple seasonal periods (52.2 and 261)
I am using tbats with seasonal.periods = 52.2 and extract the components using tbats.components. The components are "observed", "level" and "season". Google has not revealed much in terms of what these components are and how to consume them. I also extracted the residuals
I noticed that "observed" is the log of my data. I also notice that season is changing over time (which is exactly what I want)
My questions are:
1.Is "season" a natural log too?
2.How can I extract the future "season" values? I can run a forecast on the data so I am assuming that there must be a projected "season"
3. What would be the best approach to calculating an "index" considering that it will be divided into the granular data. I am currently using: exp("season") / centered moving average(exp("season"))
My Data:
weeklyu <-structure(list(V1 = c(8L, 5L, 7L, 3L, 1L, 2L, 3L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 8L, 2L,
4L, 8L, 6L, 7L, 8L, 9L, 15L, 15L, 13L, 9L, 16L, 19L, 16L, 16L,
10L, 31L, 45L, 90L, 185L, 34L, 8L, 19L, 11L, 19L, 21L, 8L, 5L,
7L, 6L, 3L, 10L, 2L, 2L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 16L, 22L, 18L, 23L, 11L, 5L, 8L, 21L, 18L, 11L, 26L,
28L, 9L, 3L, 6L, 3L, 6L, 1L, 5L, 3L, 3L, 2L, 1L, 4L, 1L, 1L,
3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 5L,
19L, 11L, 17L, 23L, 50L, 52L, 23L, 18L, 22L, 44L, 37L, 22L, 30L,
32L, 47L, 34L, 30L, 26L, 25L, 44L, 87L, 65L, 30L, 17L, 12L, 2L,
16L, 14L, 17L, 6L, 7L, 3L, 6L, 7L, 8L, 11L, 12L, 4L, 1L, 3L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L)), .Names = "V1", class = "data.frame", row.names = c(NA,
-188L))
My Code:
wklytbat <- tbats(msts(weeklyu, seasonal.periods = 52.2, ts.frequency=52.2), use.parallel=FALSE)
extract season:
seasu <-data.table(exp(as.numeric(tbats.components(wklytbat)[,'season'])))

Resources