t-test of one group versus many groups in tidyverse - r

I have the following tibble
test_tbl <- tibble(name = rep(c("John", "Allan", "George", "Peter", "Paul"), each = 12),
category = rep(rep(LETTERS[1:4], each = 3), 5),
replicate = rep(1:3, 20),
value = sample.int(n = 1e5, size = 60, replace = T))
# A tibble: 60 x 4
name category replicate value
<chr> <chr> <int> <int>
1 John A 1 71257
2 John A 2 98887
3 John A 3 87354
4 John B 1 25352
5 John B 2 69913
6 John B 3 43086
7 John C 1 24957
8 John C 2 33928
9 John C 3 79854
10 John D 1 32842
11 John D 2 19156
12 John D 3 50283
13 Allan A 1 98188
14 Allan A 2 26208
15 Allan A 3 69329
16 Allan B 1 32696
17 Allan B 2 81240
18 Allan B 3 54689
19 Allan C 1 77044
20 Allan C 2 97776
# … with 40 more rows
I want to group_by(name, category) and perform 3 t.test calls, comparing category B, C and D with category A.
I would like to store the estimate and p.value from the output. The expected result is something like this:
# A tibble: 5 x 7
name B_vs_A_estimate B_vs_A_p_value C_vs_A_estimate C_vs_A_p_value D_vs_A_estimate D_vs_A_p_value
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 John -0.578 0.486 0.198 0.309 0.631 0.171
2 Allan 0.140 0.644 0.728 0.283 0.980 0.485
3 George -0.778 0.320 -0.424 0.391 -0.154 0.589
4 Peter -0.435 0.470 -0.156 0.722 0.315 0.0140
5 Paul 0.590 0.0150 -0.473 0.475 0.681 0.407
I would prefer a solution using tidyverse and/or broom.

There are many ways to achieve the desired output but maybe this one is the more intuitive one and easy to debug (you can put a browser() anywhere)
test_tbl %>%
group_by(name) %>%
do({
sub_tbl <- .
expand.grid(g1="A", g2=c("B", "C", "D"), stringsAsFactors = FALSE) %>%
mutate(test=as.character(glue::glue("{g1}_vs_{g2}"))) %>%
rowwise() %>%
do({
gs <- .
t_res <- t.test(sub_tbl %>% filter(category == gs$g1) %>% pull(value),
sub_tbl %>% filter(category == gs$g2) %>% pull(value))
data.frame(test=gs$test, estimate=t_res$statistic, p_value=t_res$p.value,
stringsAsFactors = FALSE)
})
}) %>%
ungroup() %>%
gather(key="statistic", value="val", -name, -test) %>%
mutate(test_statistic = paste(test, statistic, sep = "_")) %>%
select(-test, -statistic) %>%
spread(key="test_statistic", value="val")
Result
# A tibble: 5 x 7
name A_vs_B_estimate A_vs_B_p_value A_vs_C_estimate A_vs_C_p_value A_vs_D_estimate A_vs_D_p_value
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Allan -0.270 0.803 -1.03 0.396 1.55 0.250
2 George 0.201 0.855 0.221 0.838 1.07 0.380
3 John -1.59 0.249 0.0218 0.984 -0.410 0.704
4 Paul 0.116 0.918 -1.62 0.215 -1.53 0.212
5 Peter 0.471 0.664 0.551 0.611 0.466 0.680
It groups the records by name then apply a function (do #1). Save the sub dataframe in sub_tbl, expand all the test cases (expand.grid) and create a test name with the two letters combined. Now, for each combination apply the function to run the t-tests (do #2). That anonymous function performs the test between group 1 (g1) and group 2 (g2) and returns a dataframe with the results.
The second part basically rearranges the columns to have the final output.

test_tbl %>%
dplyr::group_by(name) %>%
dplyr::summarise(estimate_AB =
t.test(value[category == "A"| category == "B"] ~ category[category == "A" | category == "B"]) %>% (function(x){x$estimate[1] - x$estimate[2]}),
pvalue_AB = t.test(value[category == "A"| category == "B"] ~ category[category == "A" | category == "B"]) %>% (function(x){x$p.value})
)
Here is what I did for testing the A against B by group. I think that you could extend my approach, or try to incorporate the code from the first solution.

EDIT : cleanner code
map(unique(test_tbl$name),function(nm){test_tbl %>% filter(name == nm)}) %>%
map2(unique(test_tbl$name),function(dat,nm){
map(LETTERS[2:4],function(cat){
dat %>%
filter(category == "A") %>%
pull %>%
t.test(dat %>% filter(category == cat) %>% pull)
}) %>%
map_dfr(broom::glance) %>%
select(statistic,p.value) %>%
mutate(
name = nm,
cross_cat = paste0(LETTERS[2:4]," versus A")
)
}) %>%
{do.call(rbind,.)}

We can use
library(dplyr)
library(purrr)
library(stringr)
library(tidyr)
test_tbl %>%
split(.$name) %>%
map_dfr(~ {
Avalue <- .x$value[.x$category == 'A']
.x %>%
filter(category != 'A') %>%
group_by(category) %>%
summarise(out = t.test(value, Avalue)$p.value) %>%
mutate(category = str_c(category, '_vs_A_p_value'))}, .id = 'name') %>%
pivot_wider(names_from = category, values_from = out)

Related

How to scale up a transmute in tidyverse?

I have a tibble with many variables organised this way:
tibble(
A = rep("A",10),
xyz1 = rnorm(10),
xyz2 = rnorm(10),
xyz3 = rnorm(10),
abc1 = rnorm(10),
abc2 = rnorm(10),
abb3 = rnorm(10),
acc4 = rnorm(10)
)
where xyz, abc, etc. are placeholder. After the placeholder there is a number. Assume it can be any integer.
In my tibble, that number can be any integer.
I want to trasmute it according to the formula
xyzn = xyzn - 'xyzn-1', where n is the symbol for the counted integer.
Whereas 'xyzn-1' does not exist, the result can be ignored and not join the transmute.
Schematic output:
tibble(
A = A
xyz2 = xyz2 - xyz1,
xyz3 = xyz3 - xyz2,
abc2 = abc2 - abc1
)
Perhaps this helps
library(dplyr)
library(tidyr)
df1 %>%
mutate(rn = row_number()) %>%
pivot_longer(cols = -c(A, rn), names_to = c(".value", "ind"),
names_pattern = "(\\D+)(\\d+)",
names_transform = list(ind = as.integer)) %>%
arrange(A, rn, ind) %>%
group_by(A, rn) %>%
mutate(across(-ind, ~ c(NA, diff(.x)))) %>%
ungroup %>%
pivot_wider(names_from = ind, values_from = xyz:acc,
names_sep = "") %>%
select(-rn) %>%
select(where(~ any(complete.cases(.x))))
-output
# A tibble: 10 × 4
A xyz2 xyz3 abc2
<chr> <dbl> <dbl> <dbl>
1 A -1.60 1.75 -1.53
2 A 2.89 -3.81 0.0701
3 A -0.657 0.920 -0.912
4 A 0.305 0.395 -0.477
5 A -0.289 1.39 1.38
6 A -0.103 0.426 -1.38
7 A -2.16 1.44 -0.913
8 A -0.260 -0.249 -1.45
9 A -1.15 1.14 -1.42
10 A -0.306 0.198 -0.118
Or may be
cbind(df1[1], do.call(cbind, unname(Filter(nrow, lapply(split.default(df1[-1],
sub("\\d+$", "", names(df1)[-1])),
\(x) {
i1 <- order(as.integer(sub("\\D+", "", names(x))))
x <- x[i1]
x[-1]- x[-ncol(x)]
})))
))
-output
A abc2 xyz2 xyz3
1 A -1.52667071 -1.5985160 1.7533450
2 A 0.07013841 2.8939503 -3.8113492
3 A -0.91213998 -0.6573093 0.9197824
4 A -0.47712113 0.3049918 0.3945995
5 A 1.37871603 -0.2886773 1.3933839
6 A -1.37608993 -0.1031296 0.4264927
7 A -0.91313982 -2.1630265 1.4407289
8 A -1.45439105 -0.2598476 -0.2493127
9 A -1.41590040 -1.1490018 1.1383060
10 A -0.11775196 -0.3061306 0.1984115
Usually the {dplyover} package can help with this kind of problems (disclaimer: I'm the maintainer). However, in your specific case the problem is not that easy to solve due to he specific conditions for variable selection.
In the approach below we first construct the variable names that we want to subtract from each other myvars1 and myvars2.
After that we can use dplyover::across2() together with all_of().
See the code comments for what we do in each step:
library(dplyr)
library(stringr)
library(dplyover) # https://timteafan.github.io/dplyover/
# get all variable stems
all_stems <- dplyover::cut_names("[0-9]$", .vars = names(df1))
# exlcude stems which don't start with 1
use_stems <- all_stems[paste0(all_stems, 1) %in% names(df1)]
# construct regex pattern to select all vars with > 1
patrn <- paste0("(", paste(use_stems, collapse = "|"), ")[^1]$")
# select vars with > 1
myvars1 <- grep(patrn, names(df1), value = TRUE)
# select vars to substract from `myvars1`
myvars2 <- str_replace(myvars1, "\\d$", ~ as.numeric(.x) - 1)
# use `dplyover::across2()` with `all_of()`
df1 %>%
transmute(
A = A, # dplyover doesn't support the `.keep` argument so we need a workaround
across2(all_of(myvars1),
all_of(myvars2),
~ .x - .y,
.names = "{xcol}")
)
#> # A tibble: 10 × 4
#> A xyz2 xyz3 abc2
#> <chr> <dbl> <dbl> <dbl>
#> 1 A 0.847 -1.19 0.413
#> 2 A 1.00 0.946 -3.26
#> 3 A 0.856 -1.11 -2.62
#> 4 A -0.325 1.47 1.11
#> 5 A -1.18 0.0830 2.78
#> 6 A -2.65 -0.520 -0.337
#> 7 A 0.197 -0.447 0.347
#> 8 A -0.484 1.18 -0.717
#> 9 A -1.94 1.81 1.05
#> 10 A -3.80 1.36 0.202
The from OP:
df1 <- tibble(
A = rep("A",10),
xyz1 = rnorm(10), # 2
xyz2 = rnorm(10), # 1 2
xyz3 = rnorm(10), # 1
abc1 = rnorm(10), # 2
abc2 = rnorm(10), # 1
abb3 = rnorm(10), #
acc4 = rnorm(10) #
)
Created on 2023-01-07 with reprex v2.0.2

Nested group_by operation in dplyr: does the second call include the first call?

In my data below, First, I'm want to group_by(study), and get the mean of X for each unique study value and subtract it from each X value in each study.
Second, and while groupe_by(study) is still in effect, I want to further group_by(outcome) within each study and get the mean of X for unique outcome value within a unique study value and subtract it from each X value in each outcome in each study.
I'm using the following workaround, but it seems it doesn't achieve my goal, because it seems the the group_by(outcome) call is ignoring the previous group_by(study).
Is there a way to achieve what I described above?
library(dplyr)
set.seed(0)
(data <- expand.grid(study = 1:2, outcome = rep(1:2,2)))
data$X <- rnorm(nrow(data))
(data <- arrange(data,study))
# study outcome X
#1 1 1 1.2629543
#2 1 2 1.3297993
#3 1 1 0.4146414
#4 1 2 -0.9285670
#5 2 1 -0.3262334
#6 2 2 1.2724293
#7 2 1 -1.5399500
#8 2 2 -0.2947204
data %>%
group_by(study) %>%
mutate(X_between_st = mean(X), X_within_st = X-X_between_st) %>%
group_by(outcome) %>%
mutate(X_between_ou = mean(X), X_within_ou = X-X_between_ou)
Yes, the second group_by overwrites the previous group_by which can be checked with group_vars function.
library(dplyr)
data %>%
group_by(study) %>%
mutate(X_between_st = mean(X), X_within_st = X-X_between_st) %>%
group_by(outcome) %>%
group_vars()
#[1] "outcome"
As you can see at this stage the data is grouped only by outcome.
You can achieve your goal by including .add = TRUE in group_by which will add to the existing groups.
data %>%
group_by(study) %>%
mutate(X_between_st = mean(X), X_within_st = X-X_between_st) %>%
group_by(outcome, .add = TRUE) %>%
group_vars()
#[1] "study" "outcome"
So ultimately, now the code would become -
data %>%
group_by(study) %>%
mutate(X_between_st = mean(X), X_within_st = X-X_between_st) %>%
group_by(outcome, .add = TRUE) %>%
mutate(X_between_ou = mean(X), X_within_ou = X-X_between_ou)
# study outcome X X_between_st X_within_st X_between_ou X_within_ou
# <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 1 1 1.26 0.520 0.743 0.839 0.424
#2 1 2 1.33 0.520 0.810 0.201 1.13
#3 1 1 0.415 0.520 -0.105 0.839 -0.424
#4 1 2 -0.929 0.520 -1.45 0.201 -1.13
#5 2 1 -0.326 -0.222 -0.104 -0.933 0.607
#6 2 2 1.27 -0.222 1.49 0.489 0.784
#7 2 1 -1.54 -0.222 -1.32 -0.933 -0.607
#8 2 2 -0.295 -0.222 -0.0726 0.489 -0.784
We may use cur_group
data %>%
group_by(study) %>%
summarise(grps = names(cur_group())) %>%
slice(1) %>%
pull(grps)
[1] "study"

Stack (rbind) two different table or tibble together

I want to stack two different table or tibble in R. But if I use rbind() or bind_rows(), I have a table but that is not what I want. Both don't have any common ID or variables. For example,
xx <- mtcars %>%
group_by(vs) %>%
summarize(mean(mpg), sd(mpg))
yy <- mtcars %>%
group_by(am) %>%
summarise(mean(wt), sd(wt))
I want to have this outcome:
am mean(wt) sd(wt)
0 3.77 0.777
1 2.41 0.617
vs mean(mpg) sd(mpg)
0 16.6 3.86
1 24.6 5.38
I have tried multiple different ways to do it, but haven't had a luck because of my limited R skill. I will appreciate if someone helps this problem. Thank you.
Try formating the data with equal standard names:
#Format data
nxx <- rbind(colnames(xx),xx)
nyy <- rbind(colnames(yy),yy)
#Assign common names
nxx <- set_names(nxx,paste0('V',1:dim(nxx)[2]))
nyy <- set_names(nyy,paste0('V',1:dim(nyy)[2]))
#Bind
ndf <- rbind(nxx,nyy)
Output:
# A tibble: 6 x 3
V1 V2 V3
<chr> <chr> <chr>
1 vs mean(mpg) sd(mpg)
2 0 16.6166666666667 3.86069941849919
3 1 24.5571428571429 5.37897821090647
4 am mean(wt) sd(wt)
5 0 3.76889473684211 0.777400146838225
6 1 2.411 0.616981631277085
We can use tidyverse methods. Convert the columns to character in each dataset, then add the first row (add_row) as the column name, and use bind_rows to bind the datasets
library(dplyr)
library(tibble)
library(stringr)
xx %>%
mutate(across(everything(), ~ as.character(round(., 2)))) %>%
add_row(!!! setNames(names(.), names(.)), .before = 1) %>%
rename_all(~ str_c('v', seq_along(.))) %>%
bind_rows(yy %>%
mutate(across(everything(), ~ as.character(round(., 2)))) %>%
add_row(!!! setNames(names(.), names(.)), .before = 1) %>%
rename_all(~ str_c('v', seq_along(.))))
# A tibble: 6 x 3
# v1 v2 v3
# <chr> <chr> <chr>
#1 vs mean(mpg) sd(mpg)
#2 0 16.62 3.86
#3 1 24.56 5.38
#4 am mean(wt) sd(wt)
#5 0 3.77 0.78
#6 1 2.41 0.62
Or this can be made slightly more compact
library(zeallot)
library(stringr)
library(purrr)
list(xx, yy) %>%
map_dfr( ~ destructure(.x) %>%
imap_dfr(~ c(.y, round(.x, 2))) %>%
rename_with(~ str_c('v', seq_along(.))))
# A tibble: 6 x 3
# v1 v2 v3
# <chr> <chr> <chr>
#1 vs mean(mpg) sd(mpg)
#2 0 16.62 3.86
#3 1 24.56 5.38
#4 am mean(wt) sd(wt)
#5 0 3.77 0.78
#6 1 2.41 0.62
Or using base R
do.call(rbind, lapply(list(xx, yy), function(x)
unname(rbind(colnames(x), round(x, 2)))))

R - Merging columns and renaming values

To perform an ANOVA I am looking to merge this data in a new variable called CompensationGroup. Thereby, people who have been in "Compensationproject1" should be displayed as 1, people who have been in "Compensationproject2" should be displayed as 2...
library(tidyverse)
data %>%
mutate(Compensationproject2 = case_when(
Compensationproject2 == 1 ~ 2,
T ~ NA_real_
)) %>%
mutate(Compensationproject3 = case_when(
Compensationproject3 == 1 ~ 3,
T ~ NA_real_
)) %>%
unite("CompensationGroup",c(Compensationproject1,Compensationproject2,Compensationproject3),remove = F,na.rm = T) %>%
mutate(CompensationGroup = str_extract(CompensationGroup,'\\d'))""
Here is an option that does not require the use of pivot. However, it requires the use of a bunch of other tidyversefunctions ala mutate, case_when, unite and str_extract.
library(tidyverse)
df <- tribble(~id,~Comp1,~Comp2,~Comp3,
1,1,NA,NA,
2,NA,1,NA,
3,NA,NA,1)
df %>%
mutate(Comp2 = case_when(
Comp2 == 1 ~ 2,
T ~ NA_real_
)) %>%
mutate(Comp3 = case_when(
Comp3 == 1 ~ 3,
T ~ NA_real_
)) %>%
unite("group",c(Comp1,Comp2,Comp3),remove = F,na.rm = T) %>%
mutate(group = str_extract(group,'\\d'))
Good luck!
In the absence of a reproducible example I give you an example with toy data using the pivot_longer function from the tidyverse package.
library(tidyverse)
data <- tibble::tribble(
~Category, ~MeasureA, ~MeasureB, ~MeasureC, ~MeasureD,
1, 0.0930158825381708, 0.0138786762728455, 0.0659906858706141, 0.0677226540871513,
2, 0.103201113378404, 0.0149293889876177, 0.0644022070960172, 0.0605524137103402,
3, 0.12028743617311, 0.0209951412575897, 0.0598004419601402, 0.0584817396677436,
4, 0.0996307145670469, 0.016288452837476, 0.0624144782432749, 0.0538275028212587
)
data
# A tibble: 4 x 5
Category MeasureA MeasureB MeasureC MeasureD
<dbl> <dbl> <dbl> <dbl> <dbl>
1 1 0.0930 0.0139 0.0660 0.0677
2 2 0.103 0.0149 0.0644 0.0606
3 3 0.120 0.0210 0.0598 0.0585
4 4 0.0996 0.0163 0.0624 0.0538
The following tells R to take columns 2:5 which hold the values, put the values into a column called Value and to put the label from the column names into a column called Measurement whilst removing the Measure label that is prefixed in the Measure columns.
data %>% pivot_longer(cols = 2:5, names_to = "Measurement", names_prefix = "Measure", values_to = "Value")
# A tibble: 16 x 3
Category Measurement Value
<dbl> <chr> <dbl>
1 1 A 0.0930
2 1 B 0.0139
3 1 C 0.0660
4 1 D 0.0677
5 2 A 0.103
6 2 B 0.0149
7 2 C 0.0644
8 2 D 0.0606
9 3 A 0.120
10 3 B 0.0210
11 3 C 0.0598
12 3 D 0.0585
13 4 A 0.0996
14 4 B 0.0163
15 4 C 0.0624
16 4 D 0.0538
Found an answer myself:
data[, "CompensationGroup"] <- 1
for(i in seq(2,3,1)){
data[which(is.na(data[,paste0("Compensationproject",i)]) == F), "CompensationGroup"] <- as.numeric(i)
}

How to write a function that conducts paired t-tests on all group/variable combinations in a data frame

I have a data frame similar to data created below:
ID <- data.frame(ID=rep(c(12,122,242,329,595,130,145,245,654,878),each=5))
Var <- data.frame(Variable=c("Copper","Iron","Lead","Zinc","CaCO"))
n <- 10
Variable <- do.call("rbind",replicate(n,Var,simplify=F))
Location <- rep(c("Alpha","Beta","Gamma"), times=c(20,20,10))
Location <- data.frame(Location)
set.seed(1)
FirstPt<- data.frame(FirstPt=sample(1:100,50,replace=T))
LastPt <- data.frame(LastPt=sample(1:100,50,replace=T))
First3<- data.frame(First3=sample(1:100,50,replace=T))
First5<- data.frame(First5=sample(1:100,50,replace=T))
First7<- data.frame(First7=sample(1:100,50,replace=T))
First10<- data.frame(First10=sample(1:100,50,replace=T))
Last3<- data.frame(Last3=sample(1:100,50,replace=T))
Last5<- data.frame(Last5=sample(1:100,50,replace=T))
Last7<- data.frame(Last7=sample(1:100,50,replace=T))
Last10<- data.frame(Last10=sample(1:100,50,replace=T))
data <- cbind(ID,Location,Variable,FirstPt,LastPt,First3,First5,First7,
First10,Last3,Last5,Last7,Last10)
This may be a two part question, but I want to write a function that groups all Variables that are the same (for instance, all the observations that are Copper) and conducts a paired t test between all possible combinations of the numeric columns (FirstPt:Last10). I want it to return the p values in a data frame like this:
Test P-Value
FirstPt.vs.LastPt …
FirstPt.vs.First3 …
ect... …
This will likely be a second function, but I also want to do this after the observations are grouped by Location so that the output data frame will look like this:
Test P-Value
FirstPt.vs.LastPt.InAlpha
FirstPt.vs.LastPt.InBeta
ect...
You can do both of these with one function:
library(tidyverse)
t.test.by.group.combos <- function(.data, groups){
by <- gsub(x = rlang::quo_get_expr(enquo(groups)), pattern = "\\((.*)?\\)", replacement = "\\1")[-1]
.data %>%
group_by(!!!groups) %>%
select_if(is.integer) %>%
group_split() %>%
map(.,
~pivot_longer(., cols = (FirstPt:Last10), names_to = "name", values_to = "val") %>%
nest(data = val) %>%
full_join(.,.,by = by) %>%
filter(name.x != name.y) %>%
mutate(test = paste(name.x, "vs",name.y, !!!groups, sep = "."),
p.value = map2_dbl(data.x,data.y, ~t.test(unlist(.x), unlist(.y))$p.value)) %>%
select(test,p.value)%>%
filter(!duplicated(p.value))
) %>%
bind_rows()
}
t.test.by.group.combos(data, vars(Variable))
#> # A tibble: 225 x 2
#> test p.value
#> <chr> <dbl>
#> 1 FirstPt.vs.LastPt.CaCO 0.511
#> 2 FirstPt.vs.First3.CaCO 0.184
#> 3 FirstPt.vs.First5.CaCO 0.494
#> 4 FirstPt.vs.First7.CaCO 0.354
#> 5 FirstPt.vs.First10.CaCO 0.893
#> 6 FirstPt.vs.Last3.CaCO 0.496
#> 7 FirstPt.vs.Last5.CaCO 0.909
#> 8 FirstPt.vs.Last7.CaCO 0.439
#> 9 FirstPt.vs.Last10.CaCO 0.146
#> 10 LastPt.vs.First3.CaCO 0.578
#> # … with 215 more rows
t.test.by.group.combos(data, vars(Variable, Location))
#> # A tibble: 674 x 2
#> test p.value
#> <chr> <dbl>
#> 1 FirstPt.vs.LastPt.CaCO.Alpha 0.850
#> 2 FirstPt.vs.First3.CaCO.Alpha 0.822
#> 3 FirstPt.vs.First5.CaCO.Alpha 0.895
#> 4 FirstPt.vs.First7.CaCO.Alpha 0.810
#> 5 FirstPt.vs.First10.CaCO.Alpha 0.645
#> 6 FirstPt.vs.Last3.CaCO.Alpha 0.870
#> 7 FirstPt.vs.Last5.CaCO.Alpha 0.465
#> 8 FirstPt.vs.Last7.CaCO.Alpha 0.115
#> 9 FirstPt.vs.Last10.CaCO.Alpha 0.474
#> 10 LastPt.vs.First3.CaCO.Alpha 0.991
#> # … with 664 more rows
This is kind of a lengthy function, but in general we group by the groups argument, then we select the groups and any integer columns, then we split the dataframe by the groups. After, we map all the combinations of variables and perform t.tests for each combo. Lastly, we rejoin all the groups into one dataframe.
I think this is what you want. The key was to use group_by and do from tidyverse.
df <- NULL
for(i in (4:(ncol(data)-1))){
for(j in ((i+1):ncol(data))){
df <- rbind(df,data %>%
group_by(Location) %>%
do(data.frame(pval = t.test(.[[i]],.[[j]], data = .)$p.value)) %>%
ungroup() %>%
mutate(Test = paste0(colnames(data)[i],'.vs.',colnames(data)[j]))
)
}
}
df$Test <- paste0(df$Test,'.In',df$Location)
Probably, you can acheive what you want using the below code :
library(dplyr)
library(tidyr)
data %>%
pivot_longer(cols = FirstPt:Last10) %>%
group_by(Variable) %>%
summarise(p_value = list(combn(name, 2, function(x)
t.test(value[name == x[1]], value[name == x[2]])$p.value)),
test = list(combn(name, 2, paste, collapse = "_"))) %>%
unnest(cols = c(test, p_value))
# Variable p_value test
# <fct> <dbl> <chr>
# 1 CaCO 0.915 FirstPt_LastPt
# 2 CaCO 0.529 FirstPt_First3
# 3 CaCO 0.337 FirstPt_First5
# 4 CaCO 0.350 FirstPt_First7
# 5 CaCO 0.395 FirstPt_First10
# 6 CaCO 0.765 FirstPt_Last3
# 7 CaCO 0.204 FirstPt_Last5
# 8 CaCO 0.873 FirstPt_Last7
# 9 CaCO 0.479 FirstPt_Last10
#10 CaCO 1 FirstPt_FirstPt
# … with 24,740 more rows
To do it grouped by Location you can add that into group_by command and keep rest of the code as it is.

Resources