I have this data set as follows
structure(list(count = c("0-0", "1-0", "2-0", "2-1", "0-0", "0-1",
"0-2", "1-2", "1-2", "0-0", "0-1", "1-1", "1-2", "2-2", "2-2",
"0-0", "1-0", "1-1", "2-1", "3-1", "3-2", "0-0", "1-0", "1-1",
"0-0", "0-1", "1-1", "1-2", "0-0", "1-0", "1-1", "0-0", "0-1",
"0-0", "1-0", "1-1", "1-2", "0-0", "0-1", "0-2", "0-0", "0-1",
"0-2", "1-2", "1-2", "0-0", "0-0", "0-1", "0-0", "0-0", "0-0",
"1-0", "2-0", "0-0", "1-0", "2-0", "3-0", "0-0", "0-0", "1-0",
"1-1", "0-0", "0-0", "1-0", "2-0", "0-0", "0-1", "0-2", "0-2",
"0-0", "1-0", "1-1", "2-1", "2-2", "2-2", "0-0", "1-0", "2-0",
"2-1", "2-2", "0-0", "0-1", "0-0", "0-0", "0-1", "0-2", "0-2",
"1-2", "2-2", "0-0", "1-0", "1-1", "0-0", "1-0", "0-0", "0-1",
"1-1", "1-2"), pitchResult = c("Ball", "Ball",
"Foul", "Ground Out", "Foul", "Strike Looking", "Ball", "Foul",
"Ground Out", "Strike Looking", "Ball", "Foul", "Ball", "Foul",
"Ground Out", "Ball", "Strike Looking", "Ball", "Ball", "Strike Swinging",
"Single on a Fly Ball", "Ball", "Strike Swinging", "Double Play",
"Strike Looking", "Ball", "Strike Looking", "Ground Out", "Ball",
"Strike Swinging", "Ground Out", "Foul", "Single on a Fly Ball",
"Ball", "Strike Swinging", "Strike Swinging", "Strikeout (Swinging)",
"Strike Looking", "Foul", "Strikeout (Swinging)", "Strike Looking",
"Strike Looking", "Ball", "Foul", "Fly Out", "Fly Out", "Strike Looking",
"Fly Out", "Double on a Fly Ball", "Hit By Pitch", "Ball", "Ball",
"Fly Out", "Ball", "Ball", "Ball", "Walk", "Double Play", "Ball",
"Strike Looking", "Single on a Ground Ball", "Fly Out", "Ball",
"Ball", "Fly Out", "Strike Looking", "Foul", "Foul", "Single on a Ground Ball",
"Ball", "Strike Looking", "Ball", "Foul", "Foul", "Home Run on a 402.65 ft Fly Ball",
"Ball", "Ball", "Strike Swinging", "Foul", "Fly Out", "Strike Swinging",
"Line Out", "Fly Out", "Strike Looking", "Foul", "Foul", "Ball",
"Ball", "Single on a Ground Ball", "Ball", "Strike Looking",
"Fielder's Choice", "Ball", "Ground Out", "Foul", "Ball", "Strike Swinging",
"Single on a Line Drive"), gameId = c(536158720L,
536158720L, 536158720L, 536158720L, 536158720L, 536158720L, 536158720L,
536158720L, 536158720L, 536158720L, 536158720L, 536158720L, 536158720L,
536158720L, 536158720L, 536158720L, 536158720L, 536158720L, 536158720L,
536158720L, 536158720L, 536158720L, 536158720L, 536158720L, 536158720L,
536158720L, 536158720L, 536158720L, 536158720L, 536158720L, 536158720L,
536158720L, 536158720L, 536158720L, 536158720L, 536158720L, 536158720L,
536158720L, 536158720L, 536158720L, 536158720L, 536158720L, 536158720L,
536158720L, 536158720L, 536158720L, 536158720L, 536158720L, 536158720L,
536158720L, 536158720L, 536158720L, 536158720L, 536158720L, 536158720L,
536158720L, 536158720L, 536158720L, 536158720L, 536158720L, 536158720L,
536158720L, 536158720L, 536158720L, 536158720L, 536158720L, 536158720L,
536158720L, 536158720L, 536158720L, 536158720L, 536158720L, 536158720L,
536158720L, 536158720L, 536158720L, 536158720L, 536158720L, 536158720L,
536158720L, 536158720L, 536158720L, 536158720L, 536158720L, 536158720L,
536158720L, 536158720L, 536158720L, 536158720L, 536158720L, 536158720L,
536158720L, 536158720L, 536158720L, 536158720L, 536158720L, 536158720L,
536158720L), inn = c("Top 1", "Top 1",
"Top 1", "Top 1", "Top 1", "Top 1", "Top 1", "Top 1", "Top 1",
"Top 1", "Top 1", "Top 1", "Top 1", "Top 1", "Top 1", "Top 2",
"Top 2", "Top 2", "Top 2", "Top 2", "Top 2", "Top 2", "Top 2",
"Top 2", "Top 2", "Top 2", "Top 2", "Top 2", "Top 3", "Top 3",
"Top 3", "Top 3", "Top 3", "Top 3", "Top 3", "Top 3", "Top 3",
"Top 3", "Top 3", "Top 3", "Top 4", "Top 4", "Top 4", "Top 4",
"Top 4", "Top 4", "Top 4", "Top 4", "Top 5", "Top 5", "Top 5",
"Top 5", "Top 5", "Top 5", "Top 5", "Top 5", "Top 5", "Top 5",
"Top 6", "Top 6", "Top 6", "Top 6", "Top 6", "Top 6", "Top 6",
"Top 6", "Top 6", "Top 6", "Top 6", "Top 6", "Top 6", "Top 6",
"Top 6", "Top 6", "Top 6", "Top 6", "Top 6", "Top 6", "Top 6",
"Top 6", "Top 7", "Top 7", "Top 7", "Top 7", "Top 7", "Top 7",
"Top 7", "Top 7", "Top 7", "Top 7", "Top 7", "Top 7", "Top 8",
"Top 8", "Top 8", "Top 8", "Top 8", "Top 8"
), batter = c("Player A", "Player A", "Player A", "Player A", "Player B", "Player B",
"Player B", "Player B", "Player B", "Player C", "Player C", "Player C", "Player C",
"Player C", "Player C", "Player D", "Player D", "Player D", "Player D", "Player D",
"Player D", "Player E", "Player E", "Player E", "Player F", "Player F", "Player F",
"Player F", "Player G", "Player G", "Player G", "Player H", "Player H", "Player I",
"Player I", "Player I", "Player I", "Player A", "Player A", "Player A", "Player B",
"Player B", "Player B", "Player B", "Player B", "Player C", "Player D", "Player D",
"Player E", "Player F", "Player G", "Player G", "Player G", "Player H", "Player H",
"Player H", "Player H", "Player I", "Player A", "Player A", "Player A", "Player B",
"Player C", "Player C", "Player C", "Player D", "Player D", "Player D", "Player D",
"Player E", "Player E", "BPlayer E", "Player E", "Player E", "Player E", "Player F", "Player F",
"Player F", "Player F", "Player F", "Player G", "Player G", "Player H", "Player I",
"Player I", "Player I", "Player I", "Player I", "Player I", "Player A", "Player A", "Player A",
"Player B", "Player B", "Player C", "Player C", "Player C", "Player C"), pitcher = c("Player 1", "Player 1", "Player 1", "Player 1", "Player 1",
"Player 1", "Player 1", "Player 1", "Player 1", "Player 1", "Player 1", "Player 1",
"Player 1", "Player 1", "Player 1", "Player 1", "Player 1", "Player 1", "Player 1",
"Player 1", "Player 1", "Player 1", "Player 1", "Player 1", "Player 1", "Player 1",
"Player 1", "Player 1", "Player 1", "Player 1", "Player 1", "Player 1", "Player 1",
"Player 1", "Player 1", "Player 1", "Player 1", "Player 1", "Player 1", "Player 1",
"Player 1", "Player 1", "Player 1", "Player 1", "Player 1", "Player 1", "Player 1",
"Player 1", "Player 1", "Player 1", "Player 1", "Player 1", "Player 1", "Player 1",
"Player 1", "Player 1", "Player 1", "Player 1", "Player 1", "Player 1", "Player 1",
"Player 1", "Player 1", "Player 1", "Player 1", "Player 1", "Player 1", "Player 1",
"Player 1", "Player 1", "Player 1", "Player 1", "Player 1", "Player 1", "Player 1",
"Player 1", "Player 1", "Player 1", "Player 1", "Player 1", "Player 1", "Player 1",
"Player 2", "Player 2", "Player 2", "Player 2", "Player 2", "Player 2",
"Player 2", "Player 2", "Player 2", "Player 2", "Player 2", "Player 2",
"Player 2", "Player 2", "Player 2", "Player 2"
), bb = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)), row.names = c(NA,
100L), class = "data.frame")
Where Players A-I take at bats (groups of pitches) against Player 1 and 2... If Player A Walked or got Hit by the pitch in their at bat it is denoted in the bb column on the pitch where that event occurred. What I want to do is have that bb column show a 1 for the entire at bat if they were walked or hit by the pitch instead of just on the row where the event happened.
I am not familiar with baseball rules, but I tried to get the result that matches your description. Note that I found your data has 100 rows, but the 98th row and the 99th row are "NA" so I just use row 1 to row 98. Here is my trial:
dat[1:98,] %>%
group_by(batter, inn) %>%
mutate(wanted_bb = ifelse(any(bb ==1) & str_detect(pitchResult, "[Walk|Hit]"), 1, 0)) %>%
select(-bb) %>%
as.data.frame()
The result:
count pitchResult gameId inn batter pitcher wanted_bb
1 0-0 Ball 536158720 Top 1 Player A Player 1 0
2 1-0 Ball 536158720 Top 1 Player A Player 1 0
3 2-0 Foul 536158720 Top 1 Player A Player 1 0
4 2-1 Ground Out 536158720 Top 1 Player A Player 1 0
5 0-0 Foul 536158720 Top 1 Player B Player 1 0
6 0-1 Strike Looking 536158720 Top 1 Player B Player 1 0
7 0-2 Ball 536158720 Top 1 Player B Player 1 0
8 1-2 Foul 536158720 Top 1 Player B Player 1 0
9 1-2 Ground Out 536158720 Top 1 Player B Player 1 0
10 0-0 Strike Looking 536158720 Top 1 Player C Player 1 0
11 0-1 Ball 536158720 Top 1 Player C Player 1 0
12 1-1 Foul 536158720 Top 1 Player C Player 1 0
13 1-2 Ball 536158720 Top 1 Player C Player 1 0
14 2-2 Foul 536158720 Top 1 Player C Player 1 0
15 2-2 Ground Out 536158720 Top 1 Player C Player 1 0
16 0-0 Ball 536158720 Top 2 Player D Player 1 0
17 1-0 Strike Looking 536158720 Top 2 Player D Player 1 0
18 1-1 Ball 536158720 Top 2 Player D Player 1 0
19 2-1 Ball 536158720 Top 2 Player D Player 1 0
20 3-1 Strike Swinging 536158720 Top 2 Player D Player 1 0
21 3-2 Single on a Fly Ball 536158720 Top 2 Player D Player 1 0
22 0-0 Ball 536158720 Top 2 Player E Player 1 0
23 1-0 Strike Swinging 536158720 Top 2 Player E Player 1 0
24 1-1 Double Play 536158720 Top 2 Player E Player 1 0
25 0-0 Strike Looking 536158720 Top 2 Player F Player 1 0
26 0-1 Ball 536158720 Top 2 Player F Player 1 0
27 1-1 Strike Looking 536158720 Top 2 Player F Player 1 0
28 1-2 Ground Out 536158720 Top 2 Player F Player 1 0
29 0-0 Ball 536158720 Top 3 Player G Player 1 0
30 1-0 Strike Swinging 536158720 Top 3 Player G Player 1 0
31 1-1 Ground Out 536158720 Top 3 Player G Player 1 0
32 0-0 Foul 536158720 Top 3 Player H Player 1 0
33 0-1 Single on a Fly Ball 536158720 Top 3 Player H Player 1 0
34 0-0 Ball 536158720 Top 3 Player I Player 1 0
35 1-0 Strike Swinging 536158720 Top 3 Player I Player 1 0
36 1-1 Strike Swinging 536158720 Top 3 Player I Player 1 0
37 1-2 Strikeout (Swinging) 536158720 Top 3 Player I Player 1 0
38 0-0 Strike Looking 536158720 Top 3 Player A Player 1 0
39 0-1 Foul 536158720 Top 3 Player A Player 1 0
40 0-2 Strikeout (Swinging) 536158720 Top 3 Player A Player 1 0
41 0-0 Strike Looking 536158720 Top 4 Player B Player 1 0
42 0-1 Strike Looking 536158720 Top 4 Player B Player 1 0
43 0-2 Ball 536158720 Top 4 Player B Player 1 0
44 1-2 Foul 536158720 Top 4 Player B Player 1 0
45 1-2 Fly Out 536158720 Top 4 Player B Player 1 0
46 0-0 Fly Out 536158720 Top 4 Player C Player 1 0
47 0-0 Strike Looking 536158720 Top 4 Player D Player 1 0
48 0-1 Fly Out 536158720 Top 4 Player D Player 1 0
49 0-0 Double on a Fly Ball 536158720 Top 5 Player E Player 1 0
50 0-0 Hit By Pitch 536158720 Top 5 Player F Player 1 1
51 0-0 Ball 536158720 Top 5 Player G Player 1 0
52 1-0 Ball 536158720 Top 5 Player G Player 1 0
53 2-0 Fly Out 536158720 Top 5 Player G Player 1 0
54 0-0 Ball 536158720 Top 5 Player H Player 1 1
55 1-0 Ball 536158720 Top 5 Player H Player 1 1
56 2-0 Ball 536158720 Top 5 Player H Player 1 1
57 3-0 Walk 536158720 Top 5 Player H Player 1 1
58 0-0 Double Play 536158720 Top 5 Player I Player 1 0
59 0-0 Ball 536158720 Top 6 Player A Player 1 0
60 1-0 Strike Looking 536158720 Top 6 Player A Player 1 0
61 1-1 Single on a Ground Ball 536158720 Top 6 Player A Player 1 0
62 0-0 Fly Out 536158720 Top 6 Player B Player 1 0
63 0-0 Ball 536158720 Top 6 Player C Player 1 0
64 1-0 Ball 536158720 Top 6 Player C Player 1 0
65 2-0 Fly Out 536158720 Top 6 Player C Player 1 0
66 0-0 Strike Looking 536158720 Top 6 Player D Player 1 0
67 0-1 Foul 536158720 Top 6 Player D Player 1 0
68 0-2 Foul 536158720 Top 6 Player D Player 1 0
69 0-2 Single on a Ground Ball 536158720 Top 6 Player D Player 1 0
70 0-0 Ball 536158720 Top 6 Player E Player 1 0
71 1-0 Strike Looking 536158720 Top 6 Player E Player 1 0
72 1-1 Ball 536158720 Top 6 BPlayer E Player 1 0
73 2-1 Foul 536158720 Top 6 Player E Player 1 0
74 2-2 Foul 536158720 Top 6 Player E Player 1 0
75 2-2 Home Run on a 402.65 ft Fly Ball 536158720 Top 6 Player E Player 1 0
76 0-0 Ball 536158720 Top 6 Player F Player 1 0
77 1-0 Ball 536158720 Top 6 Player F Player 1 0
78 2-0 Strike Swinging 536158720 Top 6 Player F Player 1 0
79 2-1 Foul 536158720 Top 6 Player F Player 1 0
80 2-2 Fly Out 536158720 Top 6 Player F Player 1 0
81 0-0 Strike Swinging 536158720 Top 7 Player G Player 1 0
82 0-1 Line Out 536158720 Top 7 Player G Player 1 0
83 0-0 Fly Out 536158720 Top 7 Player H Player 2 0
84 0-0 Strike Looking 536158720 Top 7 Player I Player 2 0
85 0-1 Foul 536158720 Top 7 Player I Player 2 0
86 0-2 Foul 536158720 Top 7 Player I Player 2 0
87 0-2 Ball 536158720 Top 7 Player I Player 2 0
88 1-2 Ball 536158720 Top 7 Player I Player 2 0
89 2-2 Single on a Ground Ball 536158720 Top 7 Player I Player 2 0
90 0-0 Ball 536158720 Top 7 Player A Player 2 0
91 1-0 Strike Looking 536158720 Top 7 Player A Player 2 0
92 1-1 Fielder's Choice 536158720 Top 7 Player A Player 2 0
93 0-0 Ball 536158720 Top 8 Player B Player 2 0
94 1-0 Ground Out 536158720 Top 8 Player B Player 2 0
95 0-0 Foul 536158720 Top 8 Player C Player 2 0
96 0-1 Ball 536158720 Top 8 Player C Player 2 0
97 1-1 Strike Swinging 536158720 Top 8 Player C Player 2 0
98 1-2 Single on a Line Drive 536158720 Top 8 Player C Player 2 0
A similar approach like Abdur Rohman but with slightly different interpretation of your description:
library(dplyr)
library(stringr)
df[1:98,] %>%
group_by(batter, grp = cumsum(coalesce(batter != lag(batter), FALSE))) %>%
mutate(new_bb = +any(str_detect(pitchResult, "Walk|Hit"))) %>%
ungroup() %>%
select(-grp)
This returns
# A tibble: 98 x 8
count pitchResult gameId inn batter pitcher bb new_bb
<chr> <chr> <int> <chr> <chr> <chr> <dbl> <int>
1 0-0 Ball 536158720 Top 1 Player A Player 1 0 0
2 1-0 Ball 536158720 Top 1 Player A Player 1 0 0
3 2-0 Foul 536158720 Top 1 Player A Player 1 0 0
4 2-1 Ground Out 536158720 Top 1 Player A Player 1 0 0
5 0-0 Foul 536158720 Top 1 Player B Player 1 0 0
6 0-1 Strike Looking 536158720 Top 1 Player B Player 1 0 0
7 0-2 Ball 536158720 Top 1 Player B Player 1 0 0
8 1-2 Foul 536158720 Top 1 Player B Player 1 0 0
9 1-2 Ground Out 536158720 Top 1 Player B Player 1 0 0
10 0-0 Strike Looking 536158720 Top 1 Player C Player 1 0 0
...
48 0-1 Fly Out 536158720 Top 4 Player D Player 1 0 0
49 0-0 Double on a Fly Ball 536158720 Top 5 Player E Player 1 0 0
50 0-0 Hit By Pitch 536158720 Top 5 Player F Player 1 1 1
51 0-0 Ball 536158720 Top 5 Player G Player 1 0 0
52 1-0 Ball 536158720 Top 5 Player G Player 1 0 0
53 2-0 Fly Out 536158720 Top 5 Player G Player 1 0 0
54 0-0 Ball 536158720 Top 5 Player H Player 1 0 1
55 1-0 Ball 536158720 Top 5 Player H Player 1 0 1
56 2-0 Ball 536158720 Top 5 Player H Player 1 0 1
57 3-0 Walk 536158720 Top 5 Player H Player 1 1 1
58 0-0 Double Play 536158720 Top 5 Player I Player 1 0 0
59 0-0 Ball 536158720 Top 6 Player A Player 1 0 0
...
95 0-0 Foul 536158720 Top 8 Player C Player 2 0 0
96 0-1 Ball 536158720 Top 8 Player C Player 2 0 0
97 1-1 Strike Swinging 536158720 Top 8 Player C Player 2 0 0
98 1-2 Single on a Line Drive 536158720 Top 8 Player C Player 2 0 0
I don't know if the last batter of an inn (whatever this means) can be the new first batter of a new inn. In this case you need to include inn in the group_by statement.
I have census data of Male and Female populations organizaed by age group:
library(tidyverse)
url <- "https://www2.census.gov/programs-surveys/popest/datasets/2010-2018/counties/asrh/cc-est2018-alldata-54.csv"
if (!file.exists("./datafiles/cc-est2018-alldata-54.csv"))
download.file(url, destfile = "./datafiles/cc-est2018-alldata-54.csv", mode = "wb")
popSample <- read.csv("./datafiles/cc-est2018-alldata-54.csv") %>%
filter(AGEGRP != 0 & YEAR == 1) %>%
select("STNAME", "CTYNAME", "AGEGRP", "TOT_POP", "TOT_MALE", "TOT_FEMALE")
popSample$AGEGRP <- as.factor(popSample$AGEGRP)
I then plot the Male and Female population relationships, faceted by age group (1-18, which is currently treated as a int
g <- ggplot(popSample, aes(x=TOT_MALE, y=TOT_FEMALE)) +
geom_point(alpha = 0.5, colour="darkblue") +
scale_x_log10() +
scale_y_log10() +
facet_wrap(~AGEGRP) +
stat_smooth(method = "lm", col = "darkred", size=.75) +
labs(title = "F vs. M Population across all Age Groups", x = "Total Male (log10)", y = "Total Female (log10)") +
theme_light()
g
Which results in this plot: https://share.getcloudapp.com/v1ur6O4e
The problem: I am trying to convert the column AGEGRP from ‘int’ to ‘factor’, and change the factors labels from “1”, “2”, “3”, … “18” to "AgeGroup1", "AgeGroup2", "AgeGroup3", … "AgeGroup18"
When I try this code, my AGEGRP column's observation values are all replaced with NAs:popSample$AGEGRP <- factor(popSample$AGEGRP, levels = c("0 to 4", "5 to 9", "10 to 14", "15 to 19", "20 to 24", "25 to 29", "30 to 34", "35 to 39", "40 to 44", "45 to 49", "50 to 54", "55 to 59", "60 to 64", "65 to 69", "70 to 74", "75 to 79", "80 to 84", "85+"))
https://share.getcloudapp.com/qGuo1O4y
Thank you for your help,
popSample$AGEGRP <- factor( popSample$AGEGRP, levels = c("0 to 4", "5 to 9", "10 to 14", "15 to 19", "20 to 24", "25 to 29", "30 to 34", "35 to 39", "40 to 44", "45 to 49", "50 to 54", "55 to 59", "60 to 64", "65 to 69", "70 to 74", "75 to 79", "80 to 84", "85+"))
Need to add all levels though.
Alternatively
levels(popSample$AGEGRP) <- c("0 to 4", "5 to 9", "10 to 14", "15 to 19", "20 to 24", "25 to 29", "30 to 34", "35 to 39", "40 to 44", "45 to 49", "50 to 54", "55 to 59", "60 to 64", "65 to 69", "70 to 74", "75 to 79", "80 to 84", "85+")
should work as well.
Read in the csv again:
library(tidyverse)
url <- "https://www2.census.gov/programs-surveys/popest/datasets/2010-2018/counties/asrh/cc-est2018-alldata-54.csv"
popSample <- read.csv(url) %>%
filter(AGEGRP != 0 & YEAR == 1) %>%
select("STNAME", "CTYNAME", "AGEGRP", "TOT_POP", "TOT_MALE", "TOT_FEMALE")
If you just want to add a prefix "AgeGroup" to your facet labels, you do:
ggplot(popSample, aes(x=TOT_MALE, y=TOT_FEMALE)) +
geom_point(alpha = 0.5, colour="darkblue") +
scale_x_log10() +
scale_y_log10() +
facet_wrap(~AGEGRP,labeller=labeller(AGEGRP = function(i)paste0("AgeGroup",i))) +
stat_smooth(method = "lm", col = "darkred", size=.75) +
labs(title = "F vs. M Population across all Age Groups",
x = "Total Male (log10)", y = "Total Female (log10)") +
theme_light()
If there is a need for new factors, then you need to refactor (like #Annet's answer below):
lvls = c("0 to 4", "5 to 9", "10 to 14", "15 to 19",
"20 to 24", "25 to 29", "30 to 34", "35 to 39",
"40 to 44", "45 to 49", "50 to 54", "55 to 59",
"60 to 64", "65 to 69", "70 to 74", "75 to 79", "80 to 84", "85+")
#because you have factorize it
# if you can read the csv again, skip the factorization
popSample$AGEGRP = factor(lvls[popSample$AGEGRP],levels=lvls)
Then plot:
ggplot(popSample, aes(x=TOT_MALE, y=TOT_FEMALE)) +
geom_point(alpha = 0.5, colour="darkblue") +
scale_x_log10() +
scale_y_log10() +
facet_wrap(~AGEGRP) +
stat_smooth(method = "lm", col = "darkred", size=.75) +
labs(title = "F vs. M Population across all Age Groups",
x = "Total Male (log10)", y = "Total Female (log10)") +
theme_light()
To change all the factor labels with one function, you can use forcats::fct_relabel (forcats ships as part of the tidyverse, which you've already got loaded). The changed factor labels will carry over to the plot facets and the order stays the same.
First few entries:
# before relabelling
popSample$AGEGRP[1:4]
#> [1] 1 2 3 4
#> Levels: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
# after relabelling
forcats::fct_relabel(popSample$AGEGRP, ~paste0("AgeGroup", .))[1:4]
#> [1] AgeGroup1 AgeGroup2 AgeGroup3 AgeGroup4
#> 18 Levels: AgeGroup1 AgeGroup2 AgeGroup3 AgeGroup4 AgeGroup5 ... AgeGroup18
Or with base R, reassign the levels:
levels(popSample$AGEGRP) <- paste0("AgeGroup", levels(popSample$AGEGRP))
popSample$AGEGRP[1:4]
#> [1] AgeGroup1 AgeGroup2 AgeGroup3 AgeGroup4
#> 18 Levels: AgeGroup1 AgeGroup2 AgeGroup3 AgeGroup4 AgeGroup5 ... AgeGroup18