Dataset Dput
structure(list(V1 = structure(c(4, 4, 2, 2, 2, 2, 2, 2, 4, 4,
2, 3, 2, 3, 4, 2, 2, 2, 3, 3, 2, 3, 1, 3, 3, 3, 3, 4, 1, 2, 4,
1, 2, 3, 2, 3, 1, 1, 2, 2, 4, 3, 2, 1, 2, 3, 3, 4, 3, 3, 2, 3,
1, 4, 3, 2, 3, 4, 1, 3, 3, 3, 2, 2, 1, 2, 3, 4, 4, 2, 4, 3, 2,
3, 3, 3, 3, 2, 4, 3, 3, 3, 2, 2, 3, 4, 2, 4, 4, 2, 2, 3, 3), format.spss = "F8.0"),
V2 = structure(c(4, 4, 3, 4, 3, 4, 3, 2, 4, 1, 3, 3, 3, 4,
3, 3, 2, 3, 4, 3, 1, 4, 2, 3, 4, 2, 4, 3, 3, 2, 3, 2, 3,
3, 4, 3, 3, 3, 3, 3, 3, 2, 4, 2, 2, 2, 4, 3, 4, 4, 2, 4,
2, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 4, 3, 3, 4, 4, 4, 4, 4,
3, 4, 3, 3, 3, 4, 2, 4, 3, 4, 3, 3, 2, 3, 3, 4, 3, 4, 3,
4, 4, 3), format.spss = "F8.0"), V3 = structure(c(4, 4, 4,
4, 4, 4, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4), format.spss = "F8.0"),
V4 = structure(c(4, 4, 3, 4, 3, 4, 2, 1, 3, 2, 3, 1, 4, 4,
2, 3, 2, 2, 2, 4, 1, 2, 2, 2, 3, 2, 3, 2, 2, 1, 3, 1, 1,
2, 4, 1, 1, 2, 3, 2, 2, 1, 1, 1, 3, 2, 4, 3, 3, 3, 3, 3,
3, 4, 3, 1, 4, 3, 4, 3, 2, 3, 2, 1, 4, 1, 4, 1, 2, 4, 4,
4, 3, 3, 3, 2, 2, 1, 4, 3, 2, 3, 2, 1, 3, 4, 1, 2, 4, 3,
4, 2, 2), format.spss = "F8.0"), V5 = structure(c(3, 3, 3,
4, 3, 4, 3, 1, 1, 1, 1, 2, 1, 2, 2, 2, 1, 2, 2, 2, 3, 2,
2, 2, 2, 4, 2, 3, 2, 3, 4, 1, 4, 2, 3, 3, 2, 2, 3, 2, 2,
3, 3, 2, 3, 3, 3, 2, 2, 2, 3, 2, 3, 3, 2, 2, 3, 3, 2, 3,
2, 2, 3, 3, 3, 2, 3, 3, 3, 4, 3, 2, 3, 3, 3, 3, 3, 3, 4,
3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 4, 3, 3), format.spss = "F8.0"),
V6 = structure(c(4, 4, 3, 4, 3, 4, 4, 1, 3, 3, 3, 3, 2, 3,
4, 2, 4, 3, 3, 3, 3, 4, 4, 3, 3, 3, 4, 4, 4, 3, 4, 4, 3,
3, 3, 4, 2, 2, 3, 3, 3, 4, 2, 4, 3, 4, 4, 4, 3, 4, 2, 4,
3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 3, 1, 4, 4, 4, 4, 4, 4,
4, 3, 4, 4, 4, 4, 2, 4, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3,
4, 4, 4), format.spss = "F8.0"), V7 = structure(c(4, 4, 2,
4, 2, 4, 4, 3, 3, 3, 2, 2, 4, 4, 3, 3, 1, 4, 3, 3, 1, 2,
4, 3, 4, 2, 4, 4, 3, 3, 2, 2, 3, 2, 4, 3, 3, 3, 3, 3, 3,
1, 4, 3, 2, 2, 4, 3, 4, 4, 2, 4, 2, 3, 4, 3, 3, 3, 4, 3,
4, 4, 3, 4, 4, 3, 4, 4, 4, 4, 3, 4, 4, 4, 3, 3, 4, 3, 4,
3, 3, 3, 3, 2, 2, 4, 4, 4, 4, 2, 4, 4, 3), format.spss = "F8.0"),
V8 = structure(c(4, 4, 2, 1, 2, 1, 1, 1, 3, 3, 2, 3, 2, 3,
4, 2, 2, 2, 3, 3, 2, 3, 1, 3, 3, 3, 3, 4, 1, 2, 4, 1, 2,
3, 2, 3, 1, 1, 2, 2, 3, 1, 1, 1, 2, 3, 3, 4, 3, 3, 2, 3,
1, 3, 4, 2, 3, 4, 1, 3, 3, 3, 2, 2, 1, 2, 3, 4, 4, 2, 4,
3, 4, 4, 4, 4, 3, 2, 4, 3, 3, 3, 2, 2, 3, 4, 2, 4, 4, 2,
1, 3, 4), format.spss = "F8.0"), V9 = structure(c(4, 4, 4,
4, 4, 4, 4, 4, 3, 3, 2, 3, 3, 3, 3, 2, 3, 3, 2, 3, 4, 4,
4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 4, 3, 2, 4, 3, 4,
4, 4, 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 4, 3, 4, 3, 2, 4,
3, 3, 4, 4, 4, 3, 4, 4, 4, 4, 4, 3, 4, 3, 4, 3, 4, 4, 4,
4, 3, 4, 4, 4, 4, 4, 3, 2, 4, 4, 4, 4, 4), format.spss = "F8.0"),
V10 = structure(c(4, 4, 2, 4, 2, 4, 3, 2, 3, 3, 3, 2, 4,
4, 2, 2, 1, 3, 4, 4, 1, 4, 2, 3, 3, 2, 4, 3, 2, 3, 3, 1,
3, 2, 4, 3, 2, 3, 3, 3, 3, 1, 2, 4, 2, 3, 4, 4, 3, 3, 2,
4, 2, 4, 3, 3, 4, 3, 4, 3, 4, 4, 4, 1, 4, 3, 3, 4, 3, 4,
4, 3, 3, 3, 3, 3, 4, 1, 4, 3, 3, 3, 3, 2, 3, 4, 4, 2, 4,
2, 4, 4, 3), format.spss = "F8.0"), V11 = structure(c(3,
3, 1, 4, 1, 4, 1, 1, 1, 1, 2, 1, 1, 1, 3, 2, 2, 2, 2, 1,
2, 3, 1, 2, 3, 3, 2, 1, 2, 2, 2, 3, 2, 2, 3, 2, 1, 2, 2,
1, 1, 4, 3, 1, 3, 2, 3, 1, 2, 1, 2, 1, 2, 2, 1, 2, 2, 3,
2, 2, 2, 2, 2, 2, 1, 1, 1, 3, 3, 4, 2, 1, 2, 2, 3, 3, 3,
3, 4, 3, 2, 3, 3, 2, 2, 2, 2, 1, 3, 1, 4, 1, 3), format.spss = "F8.0"),
V12 = structure(c(4, 4, 3, 2, 3, 2, 3, 1, 3, 3, 3, 3, 2,
3, 3, 2, 4, 3, 3, 4, 4, 3, 3, 4, 4, 3, 3, 3, 4, 3, 4, 4,
3, 3, 3, 4, 2, 2, 3, 3, 3, 4, 2, 4, 3, 4, 4, 4, 3, 4, 2,
4, 3, 3, 3, 3, 4, 3, 3, 2, 2, 1, 1, 3, 1, 4, 4, 4, 4, 4,
4, 4, 3, 3, 2, 2, 2, 2, 4, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
3, 2, 3, 4), format.spss = "F8.0")), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -93L))
EFA Before Scree Plot
I have done the work of performing exploratory factor analysis on the data:
fa3 <- fa(hwk2,
nfactors = 3,
n.obs = 93,
rotate = "oblimin",
max.iter = 100)
fa3
Which gives me this:
MR1 MR3 MR2 h2 u2 com
V1 0.03 0.87 -0.05 0.77 0.23 1.0
V2 0.75 0.05 0.09 0.63 0.37 1.0
V3 0.13 0.06 0.67 0.53 0.47 1.1
V4 0.50 0.07 0.08 0.31 0.69 1.1
V5 0.03 -0.06 0.88 0.77 0.23 1.0
V6 0.00 0.47 0.32 0.37 0.63 1.8
V7 0.80 -0.08 -0.04 0.60 0.40 1.0
V8 0.05 0.88 -0.03 0.80 0.20 1.0
V9 -0.22 0.02 0.58 0.34 0.66 1.3
V10 0.75 0.10 0.01 0.63 0.37 1.0
V11 0.03 0.00 0.53 0.29 0.71 1.0
V12 -0.24 0.52 0.14 0.28 0.72 1.6
MR1 MR3 MR2
SS loadings 2.18 2.09 2.03
Proportion Var 0.18 0.17 0.17
Cumulative Var 0.18 0.36 0.53
Proportion Explained 0.35 0.33 0.32
Cumulative Proportion 0.35 0.68 1.00
With factor correlations of
MR1 MR3 MR2
MR1 1.00 0.32 0.19
MR3 0.32 1.00 0.15
MR2 0.19 0.15 1.00
Basic Scree
Making a normal scree plot from there is quite simple. I just add this to my script:
scree(hwk2,
pc=T,
factors = F,
main = "Scree Plot of Eigenvalues")
Which creates this:
What I Want
However, I want to graph simulated parallel analysis with it. In Jamovi this is super easy to accomplish:
However, I don't see an option for this so far. There is another version of scree I have tried fa.parallel but the legend comes out really strange:
fa.parallel(
hwk2,
n.obs = 93,
fm = "minres",
nfactors = 3,
main = "Parallel Analysis Scree Plots",
n.iter = 100,
error.bars = FALSE,
se.bars = FALSE,
SMC = FALSE,
ylabel = NULL,
show.legend = F,
sim = TRUE,
quant = .95,
use = "pairwise",
plot = TRUE,
correct = .5
)
I get either this if I remove the legend:
Or I get this annoying one with the legend:
Basically, I just need factor analysis and don't need principal components in the plot, but I can't figure out how to remove it.
The only problem is that there are Heywood cases, so the fa analysis isn't trustworthy.
library(psych)
fa.parallel(
hwk2,
n.obs = 93,
fa = "fa", # you want only "fa", not "pc"
show.legend = TRUE, # show legend
fm = "minres",
nfactors = 3,
main = "Parallel Analysis Scree Plots",
n.iter = 100,
error.bars = FALSE,
se.bars = FALSE,
SMC = FALSE,
ylabel = NULL,
sim = TRUE,
quant = .95,
use = "pairwise",
plot = TRUE,
correct = .5
)
I've converted a data frame into wide format and now want to compute paired t-tests to obtain p-values. I have managed to do this for each pair of columns individually, but it's a lot more code than I feel is necessary. I'm still very new to R, data and coding generally, and couldn't easily see a solution here on Stack Overflow.
My wide data frame is:
> head(df_wide)
# A tibble: 6 x 21
Assessor `Appearance1 `Appearance2 `Aroma_1 `Aroma_2 `Flavour_1 `Flavour_2
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 10 10 10 10 10 10
2 6 7 7 5 8 4
# ... with 14 more variables
I want to perform a paired T-Test over the attributes, i.e. Appearance1 and Appearance2, Aroma1 and Aroma2, etc. The 14 other variables are all <dbl> and are also attributes to be included as paired columns for the T-Test.
Ideally, the output would be a vector of just the p-values, rather than having all the information. I've managed to do that coding for individual pairs, but I wanted to know if this would be possible to do as part of performing the T-Test over multiple pairs of columns.
Here is the code I have for the first two attributes:
p_values <- c(t.test(df_wide$`Appearance1`, df_wide$`Appearance2`, paired = T)[["p.value"]],
t.test(df_wide$`Aroma1`, df_wide$`Aroma2`, paired = T)[["p.value"]])
This creates the vector I want, but is cumbersome and error-prone. Ideally, I'd be able to perform it over all the pairs at once without needing to use column names.
I do have the original data frame in long format, if it would be easier to do it using that (EDIT: used dput() for first 20 rows instead of head():
> dput(df_test[1:20,])
structure(list(Assessor = c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10),
Product = c("MC", "MV", "MC", "MV", "MV", "MC", "MC", "MV", "MV", "MC", "MC", "MV", "MC", "MV", "MC", "MV", "MV", "MC", "MV", "MC"),
Appearance = c(10, 10, 6, 7, 9, 6, 7, 8, 9, 8, 10, 8, 6, 6, 9, 8, 8, 8, 9, 9),
Aroma = c(10, 10, 7, 5, 9, 8, 6, 7, 5, 7, 9, 8, 6, 6, 5, 3, 6, 7, 9, 6),
Flavour = c(10, 10, 8, 4, 10, 7, 7, 6, 8, 8, 9, 10, 8, 8, 6, 8, 7, 9, 9, 8),
Texture = c(10, 10, 8, 8, 9, 6, 7, 8, 8, 8, 9, 10, 8, 8, 9, 8, 8, 9, 9, 8),
`JAR Colour` = c(3, 2, 2, 3, 3, 3, 3, 3, 3, 2, 3, 2, 3, 2, 3, 3, 3, 3, 3, 3),
`JAR Strength Chocolate` = c(2, 2, 3, 2, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 3, 3, 2),
`JAR Strength Vanilla` = c(3, 3, 3, 2, 3, 2, 3, 3, 2, 3, 2, 3, 3, 3, 2, 2, 3, 3, 2, 3),
`JAR Sweetness` = c(2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 2, 3, 3, 3, 3, 2, 3, 3, 3, 3),
`JAR Creaminess` = c(3, 3, 3, 3, 3, 1, 3, 2, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3),
`Overall Acceptance` = c(9, 10, 8, 4, 10, 5, 7, 7, 8, 8, 9, 10, 8, 8, 8, 8, 8, 9, 8, 8)),
row.names = c(NA, -20L), class = c("tbl_df", "tbl", "data.frame"))
The Product variable is the one which was used to make the paired columns in the wide format data frame. Thanks in advance.
if I understand correctly
df <- structure(list(Assessor = c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10),
Product = c("MC", "MV", "MC", "MV", "MV", "MC", "MC", "MV", "MV", "MC", "MC", "MV", "MC", "MV", "MC", "MV", "MV", "MC", "MV", "MC"),
Appearance = c(10, 10, 6, 7, 9, 6, 7, 8, 9, 8, 10, 8, 6, 6, 9, 8, 8, 8, 9, 9),
Aroma = c(10, 10, 7, 5, 9, 8, 6, 7, 5, 7, 9, 8, 6, 6, 5, 3, 6, 7, 9, 6),
Flavour = c(10, 10, 8, 4, 10, 7, 7, 6, 8, 8, 9, 10, 8, 8, 6, 8, 7, 9, 9, 8),
Texture = c(10, 10, 8, 8, 9, 6, 7, 8, 8, 8, 9, 10, 8, 8, 9, 8, 8, 9, 9, 8),
`JAR Colour` = c(3, 2, 2, 3, 3, 3, 3, 3, 3, 2, 3, 2, 3, 2, 3, 3, 3, 3, 3, 3),
`JAR Strength Chocolate` = c(2, 2, 3, 2, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 3, 3, 2),
`JAR Strength Vanilla` = c(3, 3, 3, 2, 3, 2, 3, 3, 2, 3, 2, 3, 3, 3, 2, 2, 3, 3, 2, 3),
`JAR Sweetness` = c(2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 2, 3, 3, 3, 3, 2, 3, 3, 3, 3),
`JAR Creaminess` = c(3, 3, 3, 3, 3, 1, 3, 2, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3),
`Overall Acceptance` = c(9, 10, 8, 4, 10, 5, 7, 7, 8, 8, 9, 10, 8, 8, 8, 8, 8, 9, 8, 8)),
row.names = c(NA, -20L), class = c("tbl_df", "tbl", "data.frame"))
head(df)
#> # A tibble: 6 x 12
#> Assessor Product Appearance Aroma Flavour Texture `JAR Colour`
#> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1 MC 10 10 10 10 3
#> 2 1 MV 10 10 10 10 2
#> 3 2 MC 6 7 8 8 2
#> 4 2 MV 7 5 4 8 3
#> 5 3 MV 9 9 10 9 3
#> 6 3 MC 6 8 7 6 3
#> # ... with 5 more variables: JAR Strength Chocolate <dbl>,
#> # JAR Strength Vanilla <dbl>, JAR Sweetness <dbl>, JAR Creaminess <dbl>,
#> # Overall Acceptance <dbl>
library(tidyverse)
map_df(df[-c(1:2)], ~t.test(.x ~ df$Product, paired = TRUE)$p.value)
#> # A tibble: 1 x 10
#> Appearance Aroma Flavour Texture `JAR Colour` `JAR Strength Chocolate`
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 0.496 0.576 1 0.309 0.678 1
#> # ... with 4 more variables: JAR Strength Vanilla <dbl>, JAR Sweetness <dbl>,
#> # JAR Creaminess <dbl>, Overall Acceptance <dbl>
sapply(df[-c(1:2)], function(x) t.test(x ~ df$Product, paired = TRUE)$p.value)
#> Appearance Aroma Flavour
#> 0.4961016 0.5763122 1.0000000
#> Texture JAR Colour JAR Strength Chocolate
#> 0.3092332 0.6783097 1.0000000
#> JAR Strength Vanilla JAR Sweetness JAR Creaminess
#> 0.6783097 1.0000000 0.4433319
#> Overall Acceptance
#> 0.7803523
Created on 2021-06-22 by the reprex package (v2.0.0)
I'm analysing some data, and I've come a difficulty - I don't know how to summarise my whole dataset using the variable names as the "group".
A sample data:
structure(list(x4 = c(3, 4, 7, 4, 5, 1, 5, 2, 7, 1), x5 = c(2,
4, 4, 4, 5, 3, 6, 1, 7, 1), x6 = c(3, 5, 4, 7, 5, 4, 6, 4, 6,
2), x7 = c(4, 1, 6, 4, 6, 4, 6, 2, 7, 2), x9 = c(5, 5, 4, 5,
6, 3, 7, 5, 6, 1), x10 = c(3, 6, 5, 4, 6, 5, 6, 3, 6, 1), x11 = c(6,
7, 7, 7, 6, 7, 7, 5, 7, 4), x12 = c(6, 7, 6, 7, 6, 4, 6, 6, 7,
5), x14 = c(5, 7, 5, 6, 4, 6, 6, 5, 6, 4), x15 = c(4, 7, 7, 7,
6, 4, 6, 5, 6, 1), x16 = c(4, 7, 7, 7, 6, 5, 7, 3, 6, 4), x17 = c(4,
5, 5, 7, 6, 6, 7, 4, 6, 2), x18 = c(3, 4, 7, 7, 6, 5, 6, 4, 6,
2), x19 = c(5, 7, 5, 7, 6, 6, 6, 3, 6, 1), x22 = c(4, 4, 5, 7,
6, 7, 6, 5, 6, 2), x26 = c(6, 7, 5, 4, 6, 7, 7, 4, 6, 4), x29 = c(4,
7, 2, 7, 6, 4, 7, 3, 6, 1), x33 = c(3, 7, 7, 7, 6, 5, 6, 3, 6,
3), x34 = c(5, 5, 4, 7, 6, 7, 7, 5, 6, 2), x35 = c(4, 4, 7, 7,
5, 7, 6, 4, 6, 2), x36 = c(4, 7, 6, 7, 6, 5, 5, 4, 6, 2), x37 = c(3,
4, 7, 4, 5, 4, 6, 3, 5, 2), x49 = c(4, 7, 7, 7, 6, 5, 5, 6, 6,
3), x50 = c(4, 7, 6, 5, 5, 5, 6, 5, 7, 4)), row.names = c(NA,
-10L), class = "data.frame", .Names = c("x4", "x5", "x6", "x7",
"x9", "x10", "x11", "x12", "x14", "x15", "x16", "x17", "x18",
"x19", "x22", "x26", "x29", "x33", "x34", "x35", "x36", "x37",
"x49", "x50"))
I just want some statistics, like this:
summary <- dados_afc %>%
summarise_all(funs(mean, sd, mode, median))
But the result is a df with one observation and lots of variable. I wanted it to have 5 columns: varname, mean, sd, mode, median, but I'm not sure how to do it. Any tips?
Note: I am not aware of a built-in way to get mode from R. See here for some discussion:
Is there a built-in function for finding the mode?
# From the top answer there:
Mode <- function(x) {
ux <- unique(x)
ux[which.max(tabulate(match(x, ux)))]
}
To treat each column as a group, you can use tidyr::gather to convert your "wide" data into long form, and then dplyr::group_by to create groups with their own summary calculations:
library(tidyverse)
summary <- dados_afc %>%
gather(group, value) %>%
group_by(group) %>%
summarise_all(funs(mean, sd, Mode, median))
> summary
# A tibble: 24 x 5
group mean sd Mode median
<chr> <dbl> <dbl> <dbl> <dbl>
1 x10 4.5 1.72 6 5
2 x11 6.3 1.06 7 7
3 x12 6 0.943 6 6
4 x14 5.4 0.966 6 5.5
5 x15 5.3 1.89 7 6
6 x16 5.6 1.51 7 6
7 x17 5.2 1.55 6 5.5
8 x18 5 1.70 6 5.5
9 x19 5.2 1.87 6 6
10 x22 5.2 1.55 6 5.5
I have a vector of numbers below which has a repeating pattern (usually 2, 3, 4, 5, 6 before starting over again, but sometimes one or more will not be in there due to holidays, etc). I want to mark the second occurrence in each of these sets (usually 3 but not always if for example 2 isnt there it would be 4 that I want marked). Any ideas how to flag what essentially is the 2nd business day of a week?
code example:
test_vector <- c(2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6,
2, 3, 4, 2, 3, 4, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5,
6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6,
2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2,
3, 4, 5, 6, 2, 3, 4, 5, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5,
6, 2, 3, 4, 5, 6, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 3,
4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4,
5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5,
6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6,
2, 3, 4, 5, 6, 3, 4, 5, 6, 2)
inds <- which(c(TRUE, diff(test_vector) != 1L) & #find start of week
c(TRUE, diff(test_vector[-1]) == 1L, FALSE) #protect against one-day weeks
) + 1L
test_vector[inds]
#[1] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 3 3 3 4 3 3 4 3 3 3 3 3 3 3 3 3 3 3 3 3 4
Not sure what you what as far as a flag, but this will let you know where that value exists.
for(i in 1:length(unique(test_vector))){
print(paste0(unique(test_vector)[i], " at position ", which(test_vector == unique(test_vector)[i])[2]))
}
To see the next sets you would change the [2] to 4 or 6 or 8 or however many sets you have.