Trying to manually reorder a bar chart in R - r

I need this chart to show the bars in a different order.
The "pretest" bar needs to be first. Nothing happens other than my labels changing. Would really appreciate some help!
This research only makes sense if the table is in the correct order.
I have been using this code trying to change the order.
plot_data2 <- main_data %>%
dplyr::select(training_type,
pretest_result,
C1_reps,
C2_reps,
P1_reps,
P2_reps) %>%
drop_na(pretest_result) %>%
gather(test, reps, pretest_result, C1_reps, C2_reps, P1_reps, P2_reps) %>%
group_by(test, training_type) %>%
summarise(
mean = mean(reps),
lci = t.test(reps)$conf.int[[1]],
uci = t.test(reps)$conf.int[[2]]
) %>%
ungroup() %>%
mutate(test = factor(
test,
levels = c("pretest_result", "C1_reps", "C2_reps", "P1_reps", "P2_reps")
))
This is my code for the plot.
ggplot(plot_data2, aes(x=test, y = mean, fill = training_type)) +
geom_bar(stat="identity", position=position_dodge()) +
geom_errorbar(aes(ymin=lci, ymax=uci),
width=.2,
position=position_dodge(.9)) +
scale_y_continuous(breaks = c(5,1,2,3,4)) +
scale_x_discrete(labels = c("Pretest", "C1", "C2", "P1", "P2")) +
labs(x = "Test type", y = "Average repitions", fill = "Training type") +
theme_bw()
This is my data
main_data <- structure(
list(
Horse = c("Skori", "Raudhetta", "Emma", "Freyr",
"Nick", "Hilda", "Aleiga", "Sinfonia", "Saga", "Fengur", "Herkules",
"Rumur", "Gaia", "Frøya", "Fanta", "Lindus", "Betty", "Sjamina",
"Dimma", "Astrix", "Presley", "Odin", "Poineten", "Gåte", "Skori",
"Raudhetta", "Emma", "Freyr", "Nick", "Hilda", "Aleiga", "Sinfonia",
"Saga", "Fengur", "Herkules", "Rumur", "Gaia", "Frøya", "Fanta",
"Lindus", "Betty", "Sjamina", "Dimma", "Astrix", "Presley", "Odin",
"Poineten", "Gåte"),
C1_reps = c(5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 5, 5, 5, 5, 6,
4, 5, 6, 6, 4, 5, 5, 5, 4, 5, 5, 4, 5, 6, 6, 5, 5, 5, 5, 6, 5,
4, 4, 5, 4, 5),
C2_reps = c(5, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 6, 6, 5, 6, 5, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 6, 5, 5, 6, 6, 6, 4, 5),
Compliance = c(3,
4, 4, 3, 3, 2, 1, 4, 4, 4, 3, 4, 1, 4, 1, 3, 3, 4, 4, 4, 3, 4,
1, 3, 4, 4, 4, 4, 3, 3, 4, 4, 4, 4, 4, 3, 4, 3, 4, 3, 4, 3, 4,
4, 4, 3, 4, 3),
P1_reps = c(5, 5, 4, 5, 5, 4, 3, 4, 6, 7, 7,
0, 4, 6, 3, 6, 7, 0, 7, 4, 4, 3, 6, 5, 1, 0, 1, 0, 1, 1, 0, 2,
1, 0, 0, 0, 1, 2, 1, 3, 0, 0, 0, 1, 1, 2, 2, 1),
P2_reps = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 6, 6, 2, 6, 7, 4,
6, 5, 6, 4, 4, 6, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, 0, 1, 1, 1, 0, 0, 0, 0, 0, 2, 1, 0),
Test.group = c(1, 1,
1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3),
training_type = c("PC", "PC", "PC", "PC", "PC",
"PC", "PC", "PC", "PC", "PC", "PC", "PC", "PC", "PC", "PC", "PC",
"PC", "PC", "PC", "PC", "PC", "PC", "PC", "PC", "TT", "TT", "TT",
"TT", "TT", "TT", "TT", "TT", "TT", "TT", "TT", "TT", "TT", "TT",
"TT", "TT", "TT", "TT", "TT", "TT", "TT", "TT", "TT", "TT"),
pretest_result = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 3, 6, 2, 4, 0, 0, 3, 5, 4, 2, 9, 0, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 6, 4, 2, 2, 1, 0, 5, 4, 6,
6, 16, 0)
), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-48L))

Bar order is determined by order of factor levels. To reorder bars, you need to reorder factor levels by adding something like the following after your first call to mutate(). List the levels of your factor in the order you want your bars to appear
mutate(test = forcats::fct_relevel(test, "pretest_result", "C1_reps", "C2_reps", "P1_reps", "P2_reps"))

add ordered = T parameter to your script
...
mutate(test = factor(
test,
levels = c("pretest_result", "C1_reps", "C2_reps", "P1_reps", "P2_reps")
)
...
...
mutate(test = factor(
test,
levels = c("pretest_result", "C1_reps", "C2_reps", "P1_reps", "P2_reps"), ordered = T
)

Related

R remove rows with NA in groups of columns containing the same string

I have a dataframe that contains multiple variables each measured with multiple items at two different time points. What I want to remove all rows with NA entries in groups of columns containing the same part of a string. Some of these groups contain multiple columns (e.g., grep("learn"), some only one (e.g., T1_age. This is my original dataframe (a part of it):
data <- data.frame(
T1_age = c(39, 30, 20, 48, 27, 55, 37, 50, 50, 37),
T1_sex = c(2, 1, 1, 2, 2, 1, 1, 2, 1, 1),
T2_learn1 = c(2, NA, 3, 4, 1, NA, NA, 2, 4, 4),
T2_learn2 = c(1, NA, 4, 4, 1, NA, NA, 2, 4, 4),
T2_learn3 = c(2, NA, 4, 4, 1, NA, NA, 3, 4, 4),
T2_learn4 = c(2, NA, 2, 5, 5, NA, NA, 5, 5, 5),
T2_learn5 = c(4, NA, 3, 4, 3, NA, NA, 3, 4, 3),
T2_aut1 = c(NA, NA, 4, 4, 4, NA, NA, 3, 5, 4),
T2_aut2 = c(NA, NA, 4, 4, 4, NA, NA, 3, 5, 5),
T2_aut3 = c(NA, NA, 4, 4, 3, NA, NA, 3, 5, 5),
T2_ssup1 = c(1, NA, 4, 5, 4, NA, NA, 2, 4, 3),
T2_ssup2 = c(3, NA, 4, 5, 5, NA, NA, 3, 4, 4),
T2_ssup3 = c(4, NA, 4, 5, 5, NA, NA, 4, 4, 4),
T2_ssup4 = c(2, NA, 3, 5, 5, NA, NA, 3, 4, 4),
T3_learn1 = c(3, NA, NA, 4, 4, NA, NA, 3, 3, 4),
T3_learn2 = c(1, NA, NA, 4, 3, NA, NA, 3, 3, 4),
T3_learn3 = c(3, NA, NA, 4, 4, NA, NA, 3, 3, 5),
T3_learn4 = c(4, NA, NA, 5, 4, NA, NA, 4, 5, 5),
T3_learn5 = c(4, NA, NA, 3, 4, NA, NA, 3, 3, 4),
T3_aut1 = c(NA, NA, NA, 4, 4, NA, NA, 3, 5, 5),
T3_aut2 = c(NA, NA, NA, 3, 4, NA, NA, 3, 5, 5),
T3_aut3 = c(NA, NA, NA, 3, 2, NA, NA, 3, 5, 5),
T3_ssup1 = c(3, NA, NA, 5, 4, NA, NA, 2, 4, 1),
T3_ssup2 = c(3, NA, NA, 5, 5, NA, NA, 4, 5, 5),
T3_ssup3 = c(4, NA, NA, 5, 5, NA, NA, 4, 5, 3),
T3_ssup4 = c(3, NA, NA, 5, 5, NA, NA, 4, 5, 4)
)
Now I already found a very horrible solution and I believe that could be improved. So this code basically does what I want:
library(dplyr)
library(tidyr)
data <- data %>% filter(rowSums(is.na(.[ , grep("learn", colnames(.))])) != ncol(.[ , grep("learn", colnames(.))]))
data <- data %>% filter(rowSums(is.na(.[ , grep("aut", colnames(.))])) != ncol(.[ , grep("aut", colnames(.))]))
data <- data %>% filter(rowSums(is.na(.[ , grep("ssup", colnames(.))])) != ncol(.[ , grep("ssup", colnames(.))]))
data <- data %>% drop_na(T1_age)
data <- data %>% drop_na(T1_sex)
So the new data frame (and what I want to achieve) looks like this:
data2 <- data.frame(
T1_age = c(20, 48, 27, 50, 50, 37),
T1_sex = c(1, 2, 2, 2, 1, 1),
T2_learn1 = c(3, 4, 1, 2, 4, 4),
T2_learn2 = c(4, 4, 1, 2, 4, 4),
T2_learn3 = c(4, 4, 1, 3, 4, 4),
T2_learn4 = c(2, 5, 5, 5, 5, 5),
T2_learn5 = c(3, 4, 3, 3, 4, 3),
T2_aut1 = c(4, 4, 4, 3, 5, 4),
T2_aut2 = c(4, 4, 4, 3, 5, 5),
T2_aut3 = c(4, 4, 3, 3, 5, 5),
T2_ssup1 = c(4, 5, 4, 2, 4, 3),
T2_ssup2 = c(4, 5, 5, 3, 4, 4),
T2_ssup3 = c(4, 5, 5, 4, 4, 4),
T2_ssup4 = c(3, 5, 5, 3, 4, 4),
T3_learn1 = c(NA, 4, 4, 3, 3, 4),
T3_learn2 = c(NA, 4, 3, 3, 3, 4),
T3_learn3 = c(NA, 4, 4, 3, 3, 5),
T3_learn4 = c(NA, 5, 4, 4, 5, 5),
T3_learn5 = c(NA, 3, 4, 3, 3, 4),
T3_aut1 = c(NA, 4, 4, 3, 5, 5),
T3_aut2 = c(NA, 3, 4, 3, 5, 5),
T3_aut3 = c(NA, 3, 2, 3, 5, 5),
T3_ssup1 = c(NA, 5, 4, 2, 4, 1),
T3_ssup2 = c(NA, 5, 5, 4, 5, 5),
T3_ssup3 = c(NA, 5, 5, 4, 5, 3),
T3_ssup4 = c(NA, 5, 5, 4, 5, 4)
)
Could you help me improve this a bit? Thank you!!!
You may iterate over grep in an sapply and check if the rowSums in the slices reach their number of columns.
V <- c('learn', 'aut', 'ssup')
res <- data[!rowSums(sapply(V, \(v) {
X <- data[grep(v, names(data))]
rowSums(is.na(X)) == dim(X)[2]
})), ]
stopifnot(all.equal(res, data2, check.attributes=FALSE))
Or probably just checking if the sums of NA's in the "hot" columns reach the number of columns (without the demographics) is enough.
res1 <- data[rowSums(is.na(data[grep(paste(V, collapse='|'), names(data))])) !=
dim(data[-(1:2)])[2], ]
stopifnot(all.equal(res1, data2, check.attributes=FALSE))
data2 is the result data frame you provide in OP. dim(data)[2] gives the same as ncol(data).
Note: R version 4.1.2 (2021-11-01)

What is the other way to qount tertiles using tidyverse (or any other packages) in R?

I have WVS 6th wave dataframe. Computed the outgroup trust index (outgroup_index) and I want to divide this vector into 3 groups according to tertiles.
I use base R functions to do that:
# Recoding will be based on tertiles
# Find the tretiles of the index
tertiles <- quantile(filtered_df$outgroup_index, c(0:3) / 3)
# cut the target variable into tertiles
filtered_df$index_recoded <- with(
filtered_df,
cut(outgroup_index,
tertiles,
include.lowest = T)
)
But I am wondering about other possible and more neat ways to do it (preferably using dplyr/tidyverse or any other packages)?
Data:
structure(list(V2 = structure(c(643, 643, 643, 643, 643, 643,
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643,
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643,
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643,
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643,
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643,
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643,
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643,
643, 643, 643, 643), label = "Country/region", format.spss = "F4.0", labels = c(`Not asked in survey` = -4,
Algeria = 12, Azerbaijan = 31, Argentina = 32, Australia = 36,
Armenia = 51, Brazil = 76, Belarus = 112, Chile = 152, China = 156,
`Taiwan ROC` = 158, Colombia = 170, Cyprus = 196, Ecuador = 218,
Estonia = 233, Georgia = 268, Palestine = 275, Germany = 276,
Ghana = 288, Haiti = 332, `Hong Kong SAR` = 344, India = 356,
Iraq = 368, Japan = 392, Kazakhstan = 398, Jordan = 400, `South Korea` = 410,
Kuwait = 414, Kyrgyzstan = 417, Lebanon = 422, Libya = 434, Malaysia = 458,
Mexico = 484, Morocco = 504, Netherlands = 528, `New Zealand` = 554,
Nigeria = 566, Pakistan = 586, Peru = 604, Philippines = 608,
Poland = 616, Qatar = 634, Romania = 642, Russia = 643, Rwanda = 646,
Singapore = 702, Slovenia = 705, `South Africa` = 710, Zimbabwe = 716,
Spain = 724, Sweden = 752, Thailand = 764, `Trinidad and Tobago` = 780,
Tunisia = 788, Turkey = 792, Ukraine = 804, Egypt = 818, `United States` = 840,
Uruguay = 858, Uzbekistan = 860, Yemen = 887), class = c("haven_labelled",
"vctrs_vctr", "double")), V105 = structure(c(4, 3, 3, 4, 3, 4,
4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 2, 2, 2, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 2, 2, 2, 1, 1,
2, 4, 2, 2, 2, 1, 2, 1, 4, 2, 1, 4, 2, 3, 3, 2, 3, 2, 3, 2, 3,
2, 2, 3, 3, 3, 3, 3, 3, NA, 3, 3, 4, 2, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 2, 2, 3, 3, 3, 2, 3, NA), label = "Trust: People you meet for the first time (B)", format.spss = "F3.0", labels = c(`SE:Inapplicable ; RU:Inappropriate response; HT: Dropped out` = -5,
`Not asked` = -4, `Not applicable` = -3, `No answer` = -2, `Don<U+00B4>t know` = -1,
`Trust completely` = 1, `Trust somewhat` = 2, `Do not trust very much` = 3,
`Do not trust at all` = 4), class = c("haven_labelled", "vctrs_vctr",
"double")), V106 = structure(c(3, 2, NA, 4, 2, 4, 4, 3, 3, 4,
3, 3, 4, 4, 4, 4, NA, NA, NA, NA, 3, 2, 2, 2, 2, 2, 2, 3, 3,
3, 3, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 2, 2, 2, 1, 1, 2, 2,
2, 2, 1, 1, 2, 1, 4, 2, 1, 4, 2, 3, 3, 2, 2, 2, 3, 2, 3, 2, 2,
NA, 3, NA, 3, 3, 3, 2, 3, 3, 4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 2, 2, 2, 3, 2, 2, 2, 3), label = "Trust: People of another religion (B)", format.spss = "F3.0", labels = c(`DE,SE:Inapplicable ; RU:Inappropriate response; HT: Dropped` = -5,
`Not asked` = -4, `Not applicable` = -3, `No answer` = -2, `Don<U+00B4>t know` = -1,
`Trust completely` = 1, `Trust somewhat` = 2, `Do not trust very much` = 3,
`Do not trust at all` = 4), class = c("haven_labelled", "vctrs_vctr",
"double")), V107 = structure(c(3, 4, NA, 4, 2, 4, 4, 3, 3, 4,
3, 3, 4, 4, 4, 4, 3, 2, NA, NA, 3, 2, 2, 2, 2, 2, 2, 3, 3, 3,
3, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 1, 2, 2, 2, 2, 1, 1, 2, 1, 2,
2, 1, 1, 2, 1, 4, 2, 1, 3, 2, 3, 2, 2, 2, 2, 3, 2, 3, 2, 2, NA,
3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 3, 2, 3, 2, 2, 2, 3), label = "Trust: People of another nationality (B)", format.spss = "F3.0", labels = c(`DE,SE:Inapplicable ; RU:Inappropriate response; HT: Dropped` = -5,
`Not asked` = -4, `Not applicable` = -3, `No answer` = -2, `Don<U+00B4>t know` = -1,
`Trust completely` = 1, `Trust somewhat` = 2, `Do not trust very much` = 3,
`Do not trust at all` = 4), class = c("haven_labelled", "vctrs_vctr",
"double")), V248 = structure(c(9, 8, 5, 8, 8, 8, 8, 9, 7, 9,
9, 5, 5, 6, 5, 5, 5, 5, 5, 4, 9, 9, 4, 9, 9, 3, 6, 9, 8, 9, 9,
9, NA, 9, 5, 9, 5, 7, 9, 5, 5, 9, 9, 8, 9, 9, 5, 5, 5, 9, 9,
8, 5, 8, 9, 9, 5, 8, 9, 9, 9, 7, 7, 5, 4, 6, 9, 6, 6, 9, 9, 5,
6, 7, 5, 4, 7, 7, 5, 5, 5, 5, 8, 9, 8, 9, 9, 9, 9, 9, 9, 9, 5,
9, 9, 5, 9, 8, 9, 5, 5), label = "Highest educational level attained", format.spss = "F3.0", labels = c(`AU: Inapplicable (No-school education) DE,SE:Inapplicable ;` = -5,
`Not asked` = -4, `Not applicable` = -3, `No answer` = -2, `Don<U+00B4>t know` = -1,
`No formal education` = 1, `Incomplete primary school` = 2, `Complete primary school` = 3,
`Incomplete secondary school: technical/ vocational type` = 4,
`Complete secondary school: technical/ vocational type` = 5,
`Incomplete secondary school: university-preparatory type` = 6,
`Complete secondary school: university-preparatory type` = 7,
`Some university-level education, without degree` = 8, `University - level education, with degree` = 9
), class = c("haven_labelled", "vctrs_vctr", "double")), V59 = structure(c(9,
5, 6, 8, 6, 7, NA, 8, 5, 3, 4, 7, 2, 1, 1, 6, 8, 6, NA, NA, 1,
5, NA, 6, 1, 2, 9, 5, 6, NA, NA, 3, 6, 6, 4, NA, 6, 6, NA, NA,
3, 9, 8, 10, 9, 6, 10, 9, 8, 9, 9, 10, 6, 4, 4, 6, 4, 10, 3,
3, 4, 3, 5, 4, 7, 3, 3, 4, 3, 7, 4, 6, 4, 1, 1, 6, 1, 1, 6, 1,
1, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 7, 3, 1, 5, 6, 7, 2, 4, 5
), label = "Satisfaction with financial situation of household", format.spss = "F3.0", labels = c(`HT: Dropped out survey;DE,SE:Inapplicable ; RU:Inappropriate` = -5,
`Not asked` = -4, `No answer` = -2, `Don<U+00B4>t know` = -1,
Dissatisfied = 1, `2` = 2, `3` = 3, `4` = 4, `5` = 5, `6` = 6,
`7` = 7, `8` = 8, `9` = 9, Satisfied = 10), class = c("haven_labelled",
"vctrs_vctr", "double")), V237 = structure(c(3, 2, 2, 2, NA,
1, 2, 2, 1, 2, 2, 2, 2, 3, 2, 1, 1, 3, 2, 2, NA, 2, 2, 3, 4,
2, 2, 1, NA, 1, 1, 1, NA, NA, NA, 1, NA, 1, 1, NA, 2, 1, 2, 1,
1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3,
2, 3, 2, 1, 2, 3, 2, 2, 2, NA, 2, 2, 4, 2, 2, 2, 1, 1, 2, 1,
2, 3, 2, 2, 1, 2, 2, 2, 3, 3, 2, 3, 2, 2, NA, 3), label = "Family savings during past year", format.spss = "F3.0", labels = c(`DE,SE:Inapplicable ; RU:Inappropriate response; BH: Missing;` = -5,
`Not asked` = -4, `Not applicable` = -3, `No answer` = -2, `Don<U+00B4>t know` = -1,
`Save money` = 1, `Just get by` = 2, `Spent some savings and borrowed money` = 3,
`Spent savings and borrowed money` = 4), class = c("haven_labelled",
"vctrs_vctr", "double")), V105_rec = c(1, 2, 2, 1, 2, 1, 1, 1,
1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 3, 3, 3, 4, 4, 3, 1,
3, 3, 3, 4, 3, 4, 1, 3, 4, 1, 3, 2, 2, 3, 2, 3, 2, 3, 2, 3, 3,
2, 2, 2, 2, 2, 2, NA, 2, 2, 1, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 3, 3, 2, 2, 2, 3, 2, NA), V106_rec = c(2, 3, NA, 1, 3,
1, 1, 2, 2, 1, 2, 2, 1, 1, 1, 1, NA, NA, NA, NA, 2, 3, 3, 3,
3, 3, 3, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 3, 3,
3, 4, 4, 3, 3, 3, 3, 4, 4, 3, 4, 1, 3, 4, 1, 3, 2, 2, 3, 3, 3,
2, 3, 2, 3, 3, NA, 2, NA, 2, 2, 2, 3, 2, 2, 1, 3, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 3, 3, 3, 2), V107_rec = c(2,
1, NA, 1, 3, 1, 1, 2, 2, 1, 2, 2, 1, 1, 1, 1, 2, 3, NA, NA, 2,
3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 4,
3, 3, 3, 3, 4, 4, 3, 4, 3, 3, 4, 4, 3, 4, 1, 3, 4, 2, 3, 2, 3,
3, 3, 3, 2, 3, 2, 3, 3, NA, 2, 3, 2, 2, 2, 3, 2, 2, 2, 3, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 2, 3, 2, 3, 3, 3, 2), outgroup_index = c(1.66666666666667,
2, 2, 1, 2.66666666666667, 1, 1, 1.66666666666667, 1.66666666666667,
1, 1.66666666666667, 2, 1, 1, 1, 1, 1.5, 2.5, 2, 2, 2, 3, 3,
3, 3, 3, 2.66666666666667, 2, 2, 2, 2, 1.33333333333333, 1.33333333333333,
2, 2, 2, 2, 2, 2, 2, 2, 2.66666666666667, 2, 3, 3, 3, 4, 4, 3,
2.66666666666667, 3, 3, 3.66666666666667, 4, 3, 4, 1, 3, 4, 1.33333333333333,
3, 2, 2.33333333333333, 3, 2.66666666666667, 3, 2, 3, 2, 3, 3,
2, 2, 2.5, 2, 2, 2, 3, 2, 2, 1.33333333333333, 3, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 3, 2.66666666666667, 2.66666666666667, 2,
2.66666666666667, 3, 2.66666666666667, 2), V59_rec = structure(c(5,
3, 3, 4, 3, 4, NA, 4, 3, 2, 2, 4, 1, 1, 1, 3, 4, 3, NA, NA, 1,
3, NA, 3, 1, 1, 5, 3, 3, NA, NA, 2, 3, 3, 2, NA, 3, 3, NA, NA,
2, 5, 4, 5, 5, 3, 5, 5, 4, 5, 5, 5, 3, 2, 2, 3, 2, 5, 2, 2, 2,
2, 3, 2, 4, 2, 2, 2, 2, 4, 2, 3, 2, 1, 1, 3, 1, 1, 3, 1, 1, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 4, 2, 1, 3, 3, 4, 1, 2, 3), labels = c(`Not satisfied at all` = 1,
`Rather not satisfied` = 2, `Neither satisfied, nor not satisfied` = 3,
`Rather satisfied` = 4, Satisfied = 5), class = c("haven_labelled",
"vctrs_vctr", "double")), V248_dummy = structure(c(1, 1, 0, 1,
1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1,
0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1,
0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1,
0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0), labels = c(`A university education and higher` = 1,
`No university education` = 0), class = c("haven_labelled", "vctrs_vctr",
"double")), V237_rec = structure(c(3, 2, 2, 2, NA, 1, 2, 2, 1,
2, 2, 2, 2, 3, 2, 1, 1, 3, 2, 2, NA, 2, 2, 3, 3, 2, 2, 1, NA,
1, 1, 1, NA, NA, NA, 1, NA, 1, 1, NA, 2, 1, 2, 1, 1, 1, 1, 1,
1, 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 2, 3, 2, 1,
2, 3, 2, 2, 2, NA, 2, 2, 3, 2, 2, 2, 1, 1, 2, 1, 2, 3, 2, 2,
1, 2, 2, 2, 3, 3, 2, 3, 2, 2, NA, 3), labels = c(`Save money` = 1,
`Just get by` = 2, `Spent savings and borrowed money` = 3), class = c("haven_labelled",
"vctrs_vctr", "double"))), row.names = c(NA, -101L), class = c("tbl_df",
"tbl", "data.frame"), label = "filelabel")
A bit unintuitive, but ggplot2 has the functionality you are looking for.
filtered_df %>%
mutate(index_recoded = ggplot2::cut_interval(outgroup_index, 3))
And to verify the levels are the same:
# smaller dput would be nice
start <- Data
all(
{
filtered_df <- start
tertiles <- quantile(filtered_df$outgroup_index, c(0:3) / 3)
filtered_df$index_recoded <- with(
filtered_df,
cut(outgroup_index,
tertiles,
include.lowest = T)
)
filtered_df$index_recoded
} == {
tv_df <- start
tv_df %>%
mutate(index_recoded = ggplot2::cut_interval(outgroup_index, 3)) %>%
pull(index_recoded)
}
)
[1] TRUE
cut has a simpler syntax if you want to divide the data into fixed intervals.
filtered_df$index_recoded <- cut(filtered_df$outgroup_index, 3)
You can also use it with labels = FALSE to get 1, 2 and 3 as output.
filtered_df$index_recoded <- cut(filtered_df$outgroup_index, 3, labels = FALSE)

How to create ggplot graphs with the three groups into one plot?

My codes are:
ggplot(data=df2, aes(x=stress, fill=as.factor(JP_Gender))) + geom_density(alpha=.3)
ggplot(data=df1, aes(x=CGstress)) + geom_density(alpha=.3)
My dataset 1:
structure(list(CGstress = c(4, 1, 10, 8, 9.5, 5, 5, 6, 6, 6,
7, 3, 4.5, 8, 9, 1, 5, 1, 5.5, 4, 1, 7, 9, 8, 3, NA, 10, 9, 5,
3, NA, 10, 6, NA, 10, 7)), row.names = c(NA, -36L), class = c("tbl_df",
"tbl", "data.frame"))
My dataset 2:
structure(list(stress = c(7, 2, 5, 6, 7, 1, 6, 10, 9, 10, 10,
10, 10, 8, 9, 4, 7, 6, 4, 9, 4, 8, 3.5, 7, 6, 6, 1, 7, 9, 8,
10, 6, 3, 1, 1, 1, 9, 6, 4), JP_Gender = structure(c(1, 2, 1,
2, 2, 1, 1, 2, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1), label = "What is your gender?", format.stata = "%12.0g", labels = c(Male = 1,
Female = 2, Transgender = 3, Other = 4), class = c("haven_labelled",
"vctrs_vctr", "double"))), row.names = c(NA, -39L), class = c("tbl_df",
"tbl", "data.frame"))
Above codes give me 2 graphs. How to combine 2 graphs into one plot? And how to label the legends?
You can try combining the two datasets and then plot :
library(dplyr)
library(ggplot2)
df1 %>%
mutate(id = 3) %>%
rename(stress = CGstress) %>%
bind_rows(df2 %>%
mutate(id = as.integer(JP_Gender)) %>%
select(stress, id)) %>%
mutate(id = factor(id)) %>%
ggplot(aes(x=stress, fill=factor(id))) + geom_density(alpha=.3)

How to remove rows from dataframe without rewriting it?

df = structure(list(V1 = c(1, 2, 2, 3, 4, 5, 5, 6, 7), V2 = c(3.5, 3, 2.5, 2, 3, 2, 3, 5, 4), V3 = c(6.5, 8, 9, 5, 7, 4, 3, 6, 7)), row.names = c(NA, 9L), class = "data.frame")
trash = c(2,3)
How to remove the rows having the IDs in trash without rewriting the df?
I don't think there are inplace operations in r, even if you do
df = structure(list(V1 = c(1, 2, 2, 3, 4, 5, 5, 6, 7), V2 = c(3.5, 3, 2.5, 2, 3, 2, 3, 5, 4), V3 = c(6.5, 8, 9, 5, 7, 4, 3, 6, 7)), row.names = c(NA, 9L), class = "data.frame")
trash = c(2,3)
df = df[-trash,]
It should still rewrite df.

Add legend to graph in R

For a sample dataframe:
df <- structure(list(antibiotic = c(0.828080341411847, 1.52002304506738,
1.31925434545302, 1.66681722567074, 1.17791610945551, 0.950096368502059,
1.10507733691997, 1.0568193215304, 1.03853131016669, 1.02313195567946,
0.868629787234043, 0.902126485349154, 1.12005679002801, 1.88261441540084,
0.137845900627507, 1.07040656448604, 1.41496470588235, 1.30978543173373,
1.16931780610558, 1.05894439450366, 1.24805122785724, 1.21318238007025,
0.497310305098053, 0.872362356327429, 0.902584749481137, 0.999731895498823,
0.907560340983954, 1.05930840957587, 1.40457554864091, 1.09747179272879,
0.944219456216072, 1.10363111431903, 0.974649273935516, 0.989983064420841,
1.14784471036171, 1.17232858907798, 1.44675812720393, 0.727078405331282,
1.36341361598635, 1.06120293299474, 1.06920290856811, 0.711007267992205,
1.39034247642439, 0.710873996527168, 1.30529753573398, 0.781191310196629,
0.921788181250106, 0.932214675722466, 0.752289683770589, 0.942392026874501
), year = c(3, 1, 4, 1, 2, 4, 1, 3, 4, 3, 4, 1, 2, 3, 4, 1, 1,
4, 1, 1, 1, 1, 4, 1, 3, 3, 1, 4, 1, 4, 2, 1, 1, 1, 3, 4, 3, 2,
2, 2, 3, 3, 1, 2, 3, 2, 3, 4, 4, 1), imd.decile = c(8, 2, 5,
5, 4, 3, 2, 8, 6, 4, 3, 6, 9, 2, 5, 3, 5, 6, 4, 2, 9, 11, 2,
8, 3, 5, 7, 8, 7, 4, 9, 7, 6, 4, 8, 10, 5, 6, 6, 11, 6, 4, 2,
4, 10, 8, 2, 8, 4, 3)), .Names = c("antibiotic", "year", "imd.decile"
), row.names = c(17510L, 6566L, 24396L, 2732L, 13684L, 28136L,
1113L, 15308L, 28909L, 21845L, 23440L, 1940L, 8475L, 22406L,
27617L, 4432L, 3411L, 27125L, 6891L, 6564L, 1950L, 5683L, 25240L,
5251L, 20058L, 18068L, 5117L, 29066L, 2807L, 24159L, 12309L,
6044L, 7629L, 2336L, 16583L, 23921L, 17465L, 14911L, 8879L, 13929L,
17409L, 19421L, 7239L, 11570L, 15283L, 8283L, 16246L, 27950L,
23723L, 4411L), class = "data.frame")
I am trying to graph imd.decile by antibiotic for each year
library(ggplot2)
p <- ggplot(df, aes(x = imd.decile, y = antibiotic, group = factor(year))) +
stat_summary(geom = "line", fun.y = mean)
p
How do I add the wave to colour the corresponding graph and add a legend (I can't seem to use the aes command correctly).

Resources