Dataset Dput
structure(list(V1 = structure(c(4, 4, 2, 2, 2, 2, 2, 2, 4, 4,
2, 3, 2, 3, 4, 2, 2, 2, 3, 3, 2, 3, 1, 3, 3, 3, 3, 4, 1, 2, 4,
1, 2, 3, 2, 3, 1, 1, 2, 2, 4, 3, 2, 1, 2, 3, 3, 4, 3, 3, 2, 3,
1, 4, 3, 2, 3, 4, 1, 3, 3, 3, 2, 2, 1, 2, 3, 4, 4, 2, 4, 3, 2,
3, 3, 3, 3, 2, 4, 3, 3, 3, 2, 2, 3, 4, 2, 4, 4, 2, 2, 3, 3), format.spss = "F8.0"),
V2 = structure(c(4, 4, 3, 4, 3, 4, 3, 2, 4, 1, 3, 3, 3, 4,
3, 3, 2, 3, 4, 3, 1, 4, 2, 3, 4, 2, 4, 3, 3, 2, 3, 2, 3,
3, 4, 3, 3, 3, 3, 3, 3, 2, 4, 2, 2, 2, 4, 3, 4, 4, 2, 4,
2, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 4, 3, 3, 4, 4, 4, 4, 4,
3, 4, 3, 3, 3, 4, 2, 4, 3, 4, 3, 3, 2, 3, 3, 4, 3, 4, 3,
4, 4, 3), format.spss = "F8.0"), V3 = structure(c(4, 4, 4,
4, 4, 4, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4), format.spss = "F8.0"),
V4 = structure(c(4, 4, 3, 4, 3, 4, 2, 1, 3, 2, 3, 1, 4, 4,
2, 3, 2, 2, 2, 4, 1, 2, 2, 2, 3, 2, 3, 2, 2, 1, 3, 1, 1,
2, 4, 1, 1, 2, 3, 2, 2, 1, 1, 1, 3, 2, 4, 3, 3, 3, 3, 3,
3, 4, 3, 1, 4, 3, 4, 3, 2, 3, 2, 1, 4, 1, 4, 1, 2, 4, 4,
4, 3, 3, 3, 2, 2, 1, 4, 3, 2, 3, 2, 1, 3, 4, 1, 2, 4, 3,
4, 2, 2), format.spss = "F8.0"), V5 = structure(c(3, 3, 3,
4, 3, 4, 3, 1, 1, 1, 1, 2, 1, 2, 2, 2, 1, 2, 2, 2, 3, 2,
2, 2, 2, 4, 2, 3, 2, 3, 4, 1, 4, 2, 3, 3, 2, 2, 3, 2, 2,
3, 3, 2, 3, 3, 3, 2, 2, 2, 3, 2, 3, 3, 2, 2, 3, 3, 2, 3,
2, 2, 3, 3, 3, 2, 3, 3, 3, 4, 3, 2, 3, 3, 3, 3, 3, 3, 4,
3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 4, 3, 3), format.spss = "F8.0"),
V6 = structure(c(4, 4, 3, 4, 3, 4, 4, 1, 3, 3, 3, 3, 2, 3,
4, 2, 4, 3, 3, 3, 3, 4, 4, 3, 3, 3, 4, 4, 4, 3, 4, 4, 3,
3, 3, 4, 2, 2, 3, 3, 3, 4, 2, 4, 3, 4, 4, 4, 3, 4, 2, 4,
3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 3, 1, 4, 4, 4, 4, 4, 4,
4, 3, 4, 4, 4, 4, 2, 4, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3,
4, 4, 4), format.spss = "F8.0"), V7 = structure(c(4, 4, 2,
4, 2, 4, 4, 3, 3, 3, 2, 2, 4, 4, 3, 3, 1, 4, 3, 3, 1, 2,
4, 3, 4, 2, 4, 4, 3, 3, 2, 2, 3, 2, 4, 3, 3, 3, 3, 3, 3,
1, 4, 3, 2, 2, 4, 3, 4, 4, 2, 4, 2, 3, 4, 3, 3, 3, 4, 3,
4, 4, 3, 4, 4, 3, 4, 4, 4, 4, 3, 4, 4, 4, 3, 3, 4, 3, 4,
3, 3, 3, 3, 2, 2, 4, 4, 4, 4, 2, 4, 4, 3), format.spss = "F8.0"),
V8 = structure(c(4, 4, 2, 1, 2, 1, 1, 1, 3, 3, 2, 3, 2, 3,
4, 2, 2, 2, 3, 3, 2, 3, 1, 3, 3, 3, 3, 4, 1, 2, 4, 1, 2,
3, 2, 3, 1, 1, 2, 2, 3, 1, 1, 1, 2, 3, 3, 4, 3, 3, 2, 3,
1, 3, 4, 2, 3, 4, 1, 3, 3, 3, 2, 2, 1, 2, 3, 4, 4, 2, 4,
3, 4, 4, 4, 4, 3, 2, 4, 3, 3, 3, 2, 2, 3, 4, 2, 4, 4, 2,
1, 3, 4), format.spss = "F8.0"), V9 = structure(c(4, 4, 4,
4, 4, 4, 4, 4, 3, 3, 2, 3, 3, 3, 3, 2, 3, 3, 2, 3, 4, 4,
4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 4, 3, 2, 4, 3, 4,
4, 4, 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 4, 3, 4, 3, 2, 4,
3, 3, 4, 4, 4, 3, 4, 4, 4, 4, 4, 3, 4, 3, 4, 3, 4, 4, 4,
4, 3, 4, 4, 4, 4, 4, 3, 2, 4, 4, 4, 4, 4), format.spss = "F8.0"),
V10 = structure(c(4, 4, 2, 4, 2, 4, 3, 2, 3, 3, 3, 2, 4,
4, 2, 2, 1, 3, 4, 4, 1, 4, 2, 3, 3, 2, 4, 3, 2, 3, 3, 1,
3, 2, 4, 3, 2, 3, 3, 3, 3, 1, 2, 4, 2, 3, 4, 4, 3, 3, 2,
4, 2, 4, 3, 3, 4, 3, 4, 3, 4, 4, 4, 1, 4, 3, 3, 4, 3, 4,
4, 3, 3, 3, 3, 3, 4, 1, 4, 3, 3, 3, 3, 2, 3, 4, 4, 2, 4,
2, 4, 4, 3), format.spss = "F8.0"), V11 = structure(c(3,
3, 1, 4, 1, 4, 1, 1, 1, 1, 2, 1, 1, 1, 3, 2, 2, 2, 2, 1,
2, 3, 1, 2, 3, 3, 2, 1, 2, 2, 2, 3, 2, 2, 3, 2, 1, 2, 2,
1, 1, 4, 3, 1, 3, 2, 3, 1, 2, 1, 2, 1, 2, 2, 1, 2, 2, 3,
2, 2, 2, 2, 2, 2, 1, 1, 1, 3, 3, 4, 2, 1, 2, 2, 3, 3, 3,
3, 4, 3, 2, 3, 3, 2, 2, 2, 2, 1, 3, 1, 4, 1, 3), format.spss = "F8.0"),
V12 = structure(c(4, 4, 3, 2, 3, 2, 3, 1, 3, 3, 3, 3, 2,
3, 3, 2, 4, 3, 3, 4, 4, 3, 3, 4, 4, 3, 3, 3, 4, 3, 4, 4,
3, 3, 3, 4, 2, 2, 3, 3, 3, 4, 2, 4, 3, 4, 4, 4, 3, 4, 2,
4, 3, 3, 3, 3, 4, 3, 3, 2, 2, 1, 1, 3, 1, 4, 4, 4, 4, 4,
4, 4, 3, 3, 2, 2, 2, 2, 4, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
3, 2, 3, 4), format.spss = "F8.0")), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -93L))
EFA Before Scree Plot
I have done the work of performing exploratory factor analysis on the data:
fa3 <- fa(hwk2,
nfactors = 3,
n.obs = 93,
rotate = "oblimin",
max.iter = 100)
fa3
Which gives me this:
MR1 MR3 MR2 h2 u2 com
V1 0.03 0.87 -0.05 0.77 0.23 1.0
V2 0.75 0.05 0.09 0.63 0.37 1.0
V3 0.13 0.06 0.67 0.53 0.47 1.1
V4 0.50 0.07 0.08 0.31 0.69 1.1
V5 0.03 -0.06 0.88 0.77 0.23 1.0
V6 0.00 0.47 0.32 0.37 0.63 1.8
V7 0.80 -0.08 -0.04 0.60 0.40 1.0
V8 0.05 0.88 -0.03 0.80 0.20 1.0
V9 -0.22 0.02 0.58 0.34 0.66 1.3
V10 0.75 0.10 0.01 0.63 0.37 1.0
V11 0.03 0.00 0.53 0.29 0.71 1.0
V12 -0.24 0.52 0.14 0.28 0.72 1.6
MR1 MR3 MR2
SS loadings 2.18 2.09 2.03
Proportion Var 0.18 0.17 0.17
Cumulative Var 0.18 0.36 0.53
Proportion Explained 0.35 0.33 0.32
Cumulative Proportion 0.35 0.68 1.00
With factor correlations of
MR1 MR3 MR2
MR1 1.00 0.32 0.19
MR3 0.32 1.00 0.15
MR2 0.19 0.15 1.00
Basic Scree
Making a normal scree plot from there is quite simple. I just add this to my script:
scree(hwk2,
pc=T,
factors = F,
main = "Scree Plot of Eigenvalues")
Which creates this:
What I Want
However, I want to graph simulated parallel analysis with it. In Jamovi this is super easy to accomplish:
However, I don't see an option for this so far. There is another version of scree I have tried fa.parallel but the legend comes out really strange:
fa.parallel(
hwk2,
n.obs = 93,
fm = "minres",
nfactors = 3,
main = "Parallel Analysis Scree Plots",
n.iter = 100,
error.bars = FALSE,
se.bars = FALSE,
SMC = FALSE,
ylabel = NULL,
show.legend = F,
sim = TRUE,
quant = .95,
use = "pairwise",
plot = TRUE,
correct = .5
)
I get either this if I remove the legend:
Or I get this annoying one with the legend:
Basically, I just need factor analysis and don't need principal components in the plot, but I can't figure out how to remove it.
The only problem is that there are Heywood cases, so the fa analysis isn't trustworthy.
library(psych)
fa.parallel(
hwk2,
n.obs = 93,
fa = "fa", # you want only "fa", not "pc"
show.legend = TRUE, # show legend
fm = "minres",
nfactors = 3,
main = "Parallel Analysis Scree Plots",
n.iter = 100,
error.bars = FALSE,
se.bars = FALSE,
SMC = FALSE,
ylabel = NULL,
sim = TRUE,
quant = .95,
use = "pairwise",
plot = TRUE,
correct = .5
)
I'd like to find the rank of a value in a sorted vector, i.e., given a sorted (increasing) vector and a value, find the index of the value in the vector if it is present (or the mean of indices if more than once), or the index of the greatest element less than the value, if it is not present, but within the range of the vector, or something reasonable if the value is outside the range of the vector altogether.
Let's say xx is the vector and x is the value. mean(which(xx == x)) covers the value-present case, and max(which(xx < x)) covers the value-not-present-and-in-range case. 1 and length(xx) are probably reasonable outputs for the not-in-range case.
So I could do that, but I'd like to avoid creating a Boolean vector the size of xx, and also there are just enough wrinkles that I'd prefer to call a built-in or library function instead of rolling my own. Perhaps there is something simple which I've overlooked.
Here's an example. The first value, 7, is present in the vector. The second, 7.3, is not present. I'd like to get the outputs 82.5 and 86, respectively.
> sort (floor (runif (100) * 10)) -> xx
> xx
[1] 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
[38] 2 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6
[75] 6 6 6 6 7 7 7 7 7 7 7 7 8 8 8 8 8 8 8 8 8 8 8 9 9 9
> mean (which (xx == 7))
[1] 82.5
> max (which (xx <= 7.3))
[1] 86
EDIT: with hints from akrun, I've come up with the following. Note that when there are duplicates, make use of the fact that match returns the least index and findInterval returns the greatest.
# assume xx is sorted already
mean.rank.in <- function (xx, x) {
findInterval (x, xx) -> i
if (i == 0) 0
else
if (xx[[i]] == x)
# account for duplicates here:
# findInterval returned greatest index, call match to find least
(match(x, xx) + i)/2
else i
}
Here are some checks:
xx <- c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3,
3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7,
7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9)
mean.rank.in (xx, 7) == 82.5 # expect TRUE
mean.rank.in (xx, 7.3) == 86 # expect TRUE
sapply (xx, function (x) mean.rank.in (xx, x)) # looks right
sum (sapply (xx, function (x) mean.rank.in (xx, x))) == 5050 # expect TRUE
yy <- sort (runif (100))
all (sapply (yy, function (y) mean.rank.in (yy, y)) == 1:100) # expect TRUE
dyy <- min (yy[2:100] - yy[1:99])
yy1 <- yy + dyy/2
all (sapply (yy1, function (y) mean.rank.in (yy1, y)) == 1:100) # expect TRUE
mean.rank.in (yy, yy[[1]] - 1) == 0 # expect TRUE
mean.rank.in (yy, yy[[100]] + 1) == 100 # expect TRUE
Here is one option with rank
rank(xx)[match(7, xx)]
#[1] 82.5
and with findInterval
findInterval(7.3, xx)
#[1] 86
data
xx <- c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3,
3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7,
7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9)
If I have
a=[1,3,5,7,9]
b=[2,4,6,8,10]
and I want to create every combination of length 5 of the two lists with ordering.
So far I can get every possible combination through:
ab=hcat(a,b)
collect(combinations(ab,5))
but I want to receive only the 32 (in this case) ordered combinations.
A function similar to what I am looking for would be the Tuples[Transpose#{a,b}] function in Mathematica.
EDIT:
Mathematica output would be as follows
a = {1, 3, 5, 7, 9};
b = {2, 4, 6, 8, 10};
combin = Tuples[Transpose#{a, b}]
Length[combin]
Out[1]:= {{1, 3, 5, 7, 9}, {1, 3, 5, 7, 10}, {1, 3, 5, 8, 9}, {1, 3, 5, 8,
10}, {1, 3, 6, 7, 9}, {1, 3, 6, 7, 10}, {1, 3, 6, 8, 9}, {1, 3, 6,
8, 10}, {1, 4, 5, 7, 9}, {1, 4, 5, 7, 10}, {1, 4, 5, 8, 9}, {1, 4,
5, 8, 10}, {1, 4, 6, 7, 9}, {1, 4, 6, 7, 10}, {1, 4, 6, 8, 9}, {1,
4, 6, 8, 10}, {2, 3, 5, 7, 9}, {2, 3, 5, 7, 10}, {2, 3, 5, 8,
9}, {2, 3, 5, 8, 10}, {2, 3, 6, 7, 9}, {2, 3, 6, 7, 10}, {2, 3, 6,
8, 9}, {2, 3, 6, 8, 10}, {2, 4, 5, 7, 9}, {2, 4, 5, 7, 10}, {2, 4,
5, 8, 9}, {2, 4, 5, 8, 10}, {2, 4, 6, 7, 9}, {2, 4, 6, 7, 10}, {2,
4, 6, 8, 9}, {2, 4, 6, 8, 10}}
Out[2]:= 32
Here's a v0.5 solution using Base.product.
With
a = [1,3,5,7,9]
b = [2,4,6,8,10]
To create an array of tuples
julia> vec(collect(Base.product(zip(a, b)...)))
32-element Array{Tuple{Int64,Int64,Int64,Int64,Int64},1}:
(1,3,5,7,9)
(2,3,5,7,9)
(1,4,5,7,9)
(2,4,5,7,9)
(1,3,6,7,9)
(2,3,6,7,9)
(1,4,6,7,9)
(2,4,6,7,9)
(1,3,5,8,9)
(2,3,5,8,9)
⋮
(2,4,6,7,10)
(1,3,5,8,10)
(2,3,5,8,10)
(1,4,5,8,10)
(2,4,5,8,10)
(1,3,6,8,10)
(2,3,6,8,10)
(1,4,6,8,10)
(2,4,6,8,10)
and to collect that result into a matrix
julia> hcat((collect(row) for row in ans)...)
5×32 Array{Int64,2}:
1 2 1 2 1 2 1 2 1 2 1 2 1 … 2 1 2 1 2 1 2 1 2
3 3 4 4 3 3 4 4 3 3 4 4 3 4 3 3 4 4 3 3 4 4
5 5 5 5 6 6 6 6 5 5 5 5 6 6 5 5 5 5 6 6 6 6
7 7 7 7 7 7 7 7 8 8 8 8 8 7 8 8 8 8 8 8 8 8
9 9 9 9 9 9 9 9 9 9 9 9 9 10 10 10 10 10 10 10 10 10
There is a package Iterators.jl. By using it (First you should install it by Pkg.add("Iterators")) you can do the following:
using Iterators
for p in product([1,2],[3,4],[5,6],[7,8],[9,10])
#show p
end
Output:
p = (1,3,5,7,9)
p = (2,3,5,7,9)
p = (1,4,5,7,9)
p = (2,4,5,7,9)
p = (1,3,6,7,9)
p = (2,3,6,7,9)
p = (1,4,6,7,9)
p = (2,4,6,7,9)
p = (1,3,5,8,9)
p = (2,3,5,8,9)
p = (1,4,5,8,9)
p = (2,4,5,8,9)
p = (1,3,6,8,9)
p = (2,3,6,8,9)
p = (1,4,6,8,9)
p = (2,4,6,8,9)
p = (1,3,5,7,10)
p = (2,3,5,7,10)
p = (1,4,5,7,10)
p = (2,4,5,7,10)
p = (1,3,6,7,10)
p = (2,3,6,7,10)
p = (1,4,6,7,10)
p = (2,4,6,7,10)
p = (1,3,5,8,10)
p = (2,3,5,8,10)
p = (1,4,5,8,10)
p = (2,4,5,8,10)
p = (1,3,6,8,10)
p = (2,3,6,8,10)
p = (1,4,6,8,10)
p = (2,4,6,8,10)
EDIT
To get the results as array of arrays or matrix you can do :
arr = Any[]
for p in product([1,2],[3,4],[5,6],[7,8],[9,10])
push!(arr,[y for y in p])
end
# now arr is array of arrays. If you want matrix:
hcat(arr...)
Probably the simplest solution is to simply filter out the unsorted elements; filter(issorted, …) should do the trick. This yields 26 elements, though, so perhaps I'm misunderstanding your intention:
julia> collect(filter(issorted, combinations(ab,5)))
26-element Array{Array{Int64,1},1}:
[1,3,5,7,9]
[1,3,5,7,8]
⋮
I have a vector of numbers below which has a repeating pattern (usually 2, 3, 4, 5, 6 before starting over again, but sometimes one or more will not be in there due to holidays, etc). I want to mark the second occurrence in each of these sets (usually 3 but not always if for example 2 isnt there it would be 4 that I want marked). Any ideas how to flag what essentially is the 2nd business day of a week?
code example:
test_vector <- c(2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6,
2, 3, 4, 2, 3, 4, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5,
6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6,
2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2,
3, 4, 5, 6, 2, 3, 4, 5, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5,
6, 2, 3, 4, 5, 6, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 3,
4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4,
5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5,
6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6,
2, 3, 4, 5, 6, 3, 4, 5, 6, 2)
inds <- which(c(TRUE, diff(test_vector) != 1L) & #find start of week
c(TRUE, diff(test_vector[-1]) == 1L, FALSE) #protect against one-day weeks
) + 1L
test_vector[inds]
#[1] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 3 3 3 4 3 3 4 3 3 3 3 3 3 3 3 3 3 3 3 3 4
Not sure what you what as far as a flag, but this will let you know where that value exists.
for(i in 1:length(unique(test_vector))){
print(paste0(unique(test_vector)[i], " at position ", which(test_vector == unique(test_vector)[i])[2]))
}
To see the next sets you would change the [2] to 4 or 6 or 8 or however many sets you have.