Related
Dataset Dput
structure(list(V1 = structure(c(4, 4, 2, 2, 2, 2, 2, 2, 4, 4,
2, 3, 2, 3, 4, 2, 2, 2, 3, 3, 2, 3, 1, 3, 3, 3, 3, 4, 1, 2, 4,
1, 2, 3, 2, 3, 1, 1, 2, 2, 4, 3, 2, 1, 2, 3, 3, 4, 3, 3, 2, 3,
1, 4, 3, 2, 3, 4, 1, 3, 3, 3, 2, 2, 1, 2, 3, 4, 4, 2, 4, 3, 2,
3, 3, 3, 3, 2, 4, 3, 3, 3, 2, 2, 3, 4, 2, 4, 4, 2, 2, 3, 3), format.spss = "F8.0"),
V2 = structure(c(4, 4, 3, 4, 3, 4, 3, 2, 4, 1, 3, 3, 3, 4,
3, 3, 2, 3, 4, 3, 1, 4, 2, 3, 4, 2, 4, 3, 3, 2, 3, 2, 3,
3, 4, 3, 3, 3, 3, 3, 3, 2, 4, 2, 2, 2, 4, 3, 4, 4, 2, 4,
2, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 4, 3, 3, 4, 4, 4, 4, 4,
3, 4, 3, 3, 3, 4, 2, 4, 3, 4, 3, 3, 2, 3, 3, 4, 3, 4, 3,
4, 4, 3), format.spss = "F8.0"), V3 = structure(c(4, 4, 4,
4, 4, 4, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4), format.spss = "F8.0"),
V4 = structure(c(4, 4, 3, 4, 3, 4, 2, 1, 3, 2, 3, 1, 4, 4,
2, 3, 2, 2, 2, 4, 1, 2, 2, 2, 3, 2, 3, 2, 2, 1, 3, 1, 1,
2, 4, 1, 1, 2, 3, 2, 2, 1, 1, 1, 3, 2, 4, 3, 3, 3, 3, 3,
3, 4, 3, 1, 4, 3, 4, 3, 2, 3, 2, 1, 4, 1, 4, 1, 2, 4, 4,
4, 3, 3, 3, 2, 2, 1, 4, 3, 2, 3, 2, 1, 3, 4, 1, 2, 4, 3,
4, 2, 2), format.spss = "F8.0"), V5 = structure(c(3, 3, 3,
4, 3, 4, 3, 1, 1, 1, 1, 2, 1, 2, 2, 2, 1, 2, 2, 2, 3, 2,
2, 2, 2, 4, 2, 3, 2, 3, 4, 1, 4, 2, 3, 3, 2, 2, 3, 2, 2,
3, 3, 2, 3, 3, 3, 2, 2, 2, 3, 2, 3, 3, 2, 2, 3, 3, 2, 3,
2, 2, 3, 3, 3, 2, 3, 3, 3, 4, 3, 2, 3, 3, 3, 3, 3, 3, 4,
3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 4, 3, 3), format.spss = "F8.0"),
V6 = structure(c(4, 4, 3, 4, 3, 4, 4, 1, 3, 3, 3, 3, 2, 3,
4, 2, 4, 3, 3, 3, 3, 4, 4, 3, 3, 3, 4, 4, 4, 3, 4, 4, 3,
3, 3, 4, 2, 2, 3, 3, 3, 4, 2, 4, 3, 4, 4, 4, 3, 4, 2, 4,
3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 3, 1, 4, 4, 4, 4, 4, 4,
4, 3, 4, 4, 4, 4, 2, 4, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3,
4, 4, 4), format.spss = "F8.0"), V7 = structure(c(4, 4, 2,
4, 2, 4, 4, 3, 3, 3, 2, 2, 4, 4, 3, 3, 1, 4, 3, 3, 1, 2,
4, 3, 4, 2, 4, 4, 3, 3, 2, 2, 3, 2, 4, 3, 3, 3, 3, 3, 3,
1, 4, 3, 2, 2, 4, 3, 4, 4, 2, 4, 2, 3, 4, 3, 3, 3, 4, 3,
4, 4, 3, 4, 4, 3, 4, 4, 4, 4, 3, 4, 4, 4, 3, 3, 4, 3, 4,
3, 3, 3, 3, 2, 2, 4, 4, 4, 4, 2, 4, 4, 3), format.spss = "F8.0"),
V8 = structure(c(4, 4, 2, 1, 2, 1, 1, 1, 3, 3, 2, 3, 2, 3,
4, 2, 2, 2, 3, 3, 2, 3, 1, 3, 3, 3, 3, 4, 1, 2, 4, 1, 2,
3, 2, 3, 1, 1, 2, 2, 3, 1, 1, 1, 2, 3, 3, 4, 3, 3, 2, 3,
1, 3, 4, 2, 3, 4, 1, 3, 3, 3, 2, 2, 1, 2, 3, 4, 4, 2, 4,
3, 4, 4, 4, 4, 3, 2, 4, 3, 3, 3, 2, 2, 3, 4, 2, 4, 4, 2,
1, 3, 4), format.spss = "F8.0"), V9 = structure(c(4, 4, 4,
4, 4, 4, 4, 4, 3, 3, 2, 3, 3, 3, 3, 2, 3, 3, 2, 3, 4, 4,
4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 4, 3, 2, 4, 3, 4,
4, 4, 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 4, 3, 4, 3, 2, 4,
3, 3, 4, 4, 4, 3, 4, 4, 4, 4, 4, 3, 4, 3, 4, 3, 4, 4, 4,
4, 3, 4, 4, 4, 4, 4, 3, 2, 4, 4, 4, 4, 4), format.spss = "F8.0"),
V10 = structure(c(4, 4, 2, 4, 2, 4, 3, 2, 3, 3, 3, 2, 4,
4, 2, 2, 1, 3, 4, 4, 1, 4, 2, 3, 3, 2, 4, 3, 2, 3, 3, 1,
3, 2, 4, 3, 2, 3, 3, 3, 3, 1, 2, 4, 2, 3, 4, 4, 3, 3, 2,
4, 2, 4, 3, 3, 4, 3, 4, 3, 4, 4, 4, 1, 4, 3, 3, 4, 3, 4,
4, 3, 3, 3, 3, 3, 4, 1, 4, 3, 3, 3, 3, 2, 3, 4, 4, 2, 4,
2, 4, 4, 3), format.spss = "F8.0"), V11 = structure(c(3,
3, 1, 4, 1, 4, 1, 1, 1, 1, 2, 1, 1, 1, 3, 2, 2, 2, 2, 1,
2, 3, 1, 2, 3, 3, 2, 1, 2, 2, 2, 3, 2, 2, 3, 2, 1, 2, 2,
1, 1, 4, 3, 1, 3, 2, 3, 1, 2, 1, 2, 1, 2, 2, 1, 2, 2, 3,
2, 2, 2, 2, 2, 2, 1, 1, 1, 3, 3, 4, 2, 1, 2, 2, 3, 3, 3,
3, 4, 3, 2, 3, 3, 2, 2, 2, 2, 1, 3, 1, 4, 1, 3), format.spss = "F8.0"),
V12 = structure(c(4, 4, 3, 2, 3, 2, 3, 1, 3, 3, 3, 3, 2,
3, 3, 2, 4, 3, 3, 4, 4, 3, 3, 4, 4, 3, 3, 3, 4, 3, 4, 4,
3, 3, 3, 4, 2, 2, 3, 3, 3, 4, 2, 4, 3, 4, 4, 4, 3, 4, 2,
4, 3, 3, 3, 3, 4, 3, 3, 2, 2, 1, 1, 3, 1, 4, 4, 4, 4, 4,
4, 4, 3, 3, 2, 2, 2, 2, 4, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
3, 2, 3, 4), format.spss = "F8.0")), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -93L))
EFA Before Scree Plot
I have done the work of performing exploratory factor analysis on the data:
fa3 <- fa(hwk2,
nfactors = 3,
n.obs = 93,
rotate = "oblimin",
max.iter = 100)
fa3
Which gives me this:
MR1 MR3 MR2 h2 u2 com
V1 0.03 0.87 -0.05 0.77 0.23 1.0
V2 0.75 0.05 0.09 0.63 0.37 1.0
V3 0.13 0.06 0.67 0.53 0.47 1.1
V4 0.50 0.07 0.08 0.31 0.69 1.1
V5 0.03 -0.06 0.88 0.77 0.23 1.0
V6 0.00 0.47 0.32 0.37 0.63 1.8
V7 0.80 -0.08 -0.04 0.60 0.40 1.0
V8 0.05 0.88 -0.03 0.80 0.20 1.0
V9 -0.22 0.02 0.58 0.34 0.66 1.3
V10 0.75 0.10 0.01 0.63 0.37 1.0
V11 0.03 0.00 0.53 0.29 0.71 1.0
V12 -0.24 0.52 0.14 0.28 0.72 1.6
MR1 MR3 MR2
SS loadings 2.18 2.09 2.03
Proportion Var 0.18 0.17 0.17
Cumulative Var 0.18 0.36 0.53
Proportion Explained 0.35 0.33 0.32
Cumulative Proportion 0.35 0.68 1.00
With factor correlations of
MR1 MR3 MR2
MR1 1.00 0.32 0.19
MR3 0.32 1.00 0.15
MR2 0.19 0.15 1.00
Basic Scree
Making a normal scree plot from there is quite simple. I just add this to my script:
scree(hwk2,
pc=T,
factors = F,
main = "Scree Plot of Eigenvalues")
Which creates this:
What I Want
However, I want to graph simulated parallel analysis with it. In Jamovi this is super easy to accomplish:
However, I don't see an option for this so far. There is another version of scree I have tried fa.parallel but the legend comes out really strange:
fa.parallel(
hwk2,
n.obs = 93,
fm = "minres",
nfactors = 3,
main = "Parallel Analysis Scree Plots",
n.iter = 100,
error.bars = FALSE,
se.bars = FALSE,
SMC = FALSE,
ylabel = NULL,
show.legend = F,
sim = TRUE,
quant = .95,
use = "pairwise",
plot = TRUE,
correct = .5
)
I get either this if I remove the legend:
Or I get this annoying one with the legend:
Basically, I just need factor analysis and don't need principal components in the plot, but I can't figure out how to remove it.
The only problem is that there are Heywood cases, so the fa analysis isn't trustworthy.
library(psych)
fa.parallel(
hwk2,
n.obs = 93,
fa = "fa", # you want only "fa", not "pc"
show.legend = TRUE, # show legend
fm = "minres",
nfactors = 3,
main = "Parallel Analysis Scree Plots",
n.iter = 100,
error.bars = FALSE,
se.bars = FALSE,
SMC = FALSE,
ylabel = NULL,
sim = TRUE,
quant = .95,
use = "pairwise",
plot = TRUE,
correct = .5
)
I've converted a data frame into wide format and now want to compute paired t-tests to obtain p-values. I have managed to do this for each pair of columns individually, but it's a lot more code than I feel is necessary. I'm still very new to R, data and coding generally, and couldn't easily see a solution here on Stack Overflow.
My wide data frame is:
> head(df_wide)
# A tibble: 6 x 21
Assessor `Appearance1 `Appearance2 `Aroma_1 `Aroma_2 `Flavour_1 `Flavour_2
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 10 10 10 10 10 10
2 6 7 7 5 8 4
# ... with 14 more variables
I want to perform a paired T-Test over the attributes, i.e. Appearance1 and Appearance2, Aroma1 and Aroma2, etc. The 14 other variables are all <dbl> and are also attributes to be included as paired columns for the T-Test.
Ideally, the output would be a vector of just the p-values, rather than having all the information. I've managed to do that coding for individual pairs, but I wanted to know if this would be possible to do as part of performing the T-Test over multiple pairs of columns.
Here is the code I have for the first two attributes:
p_values <- c(t.test(df_wide$`Appearance1`, df_wide$`Appearance2`, paired = T)[["p.value"]],
t.test(df_wide$`Aroma1`, df_wide$`Aroma2`, paired = T)[["p.value"]])
This creates the vector I want, but is cumbersome and error-prone. Ideally, I'd be able to perform it over all the pairs at once without needing to use column names.
I do have the original data frame in long format, if it would be easier to do it using that (EDIT: used dput() for first 20 rows instead of head():
> dput(df_test[1:20,])
structure(list(Assessor = c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10),
Product = c("MC", "MV", "MC", "MV", "MV", "MC", "MC", "MV", "MV", "MC", "MC", "MV", "MC", "MV", "MC", "MV", "MV", "MC", "MV", "MC"),
Appearance = c(10, 10, 6, 7, 9, 6, 7, 8, 9, 8, 10, 8, 6, 6, 9, 8, 8, 8, 9, 9),
Aroma = c(10, 10, 7, 5, 9, 8, 6, 7, 5, 7, 9, 8, 6, 6, 5, 3, 6, 7, 9, 6),
Flavour = c(10, 10, 8, 4, 10, 7, 7, 6, 8, 8, 9, 10, 8, 8, 6, 8, 7, 9, 9, 8),
Texture = c(10, 10, 8, 8, 9, 6, 7, 8, 8, 8, 9, 10, 8, 8, 9, 8, 8, 9, 9, 8),
`JAR Colour` = c(3, 2, 2, 3, 3, 3, 3, 3, 3, 2, 3, 2, 3, 2, 3, 3, 3, 3, 3, 3),
`JAR Strength Chocolate` = c(2, 2, 3, 2, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 3, 3, 2),
`JAR Strength Vanilla` = c(3, 3, 3, 2, 3, 2, 3, 3, 2, 3, 2, 3, 3, 3, 2, 2, 3, 3, 2, 3),
`JAR Sweetness` = c(2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 2, 3, 3, 3, 3, 2, 3, 3, 3, 3),
`JAR Creaminess` = c(3, 3, 3, 3, 3, 1, 3, 2, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3),
`Overall Acceptance` = c(9, 10, 8, 4, 10, 5, 7, 7, 8, 8, 9, 10, 8, 8, 8, 8, 8, 9, 8, 8)),
row.names = c(NA, -20L), class = c("tbl_df", "tbl", "data.frame"))
The Product variable is the one which was used to make the paired columns in the wide format data frame. Thanks in advance.
if I understand correctly
df <- structure(list(Assessor = c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10),
Product = c("MC", "MV", "MC", "MV", "MV", "MC", "MC", "MV", "MV", "MC", "MC", "MV", "MC", "MV", "MC", "MV", "MV", "MC", "MV", "MC"),
Appearance = c(10, 10, 6, 7, 9, 6, 7, 8, 9, 8, 10, 8, 6, 6, 9, 8, 8, 8, 9, 9),
Aroma = c(10, 10, 7, 5, 9, 8, 6, 7, 5, 7, 9, 8, 6, 6, 5, 3, 6, 7, 9, 6),
Flavour = c(10, 10, 8, 4, 10, 7, 7, 6, 8, 8, 9, 10, 8, 8, 6, 8, 7, 9, 9, 8),
Texture = c(10, 10, 8, 8, 9, 6, 7, 8, 8, 8, 9, 10, 8, 8, 9, 8, 8, 9, 9, 8),
`JAR Colour` = c(3, 2, 2, 3, 3, 3, 3, 3, 3, 2, 3, 2, 3, 2, 3, 3, 3, 3, 3, 3),
`JAR Strength Chocolate` = c(2, 2, 3, 2, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 3, 3, 2),
`JAR Strength Vanilla` = c(3, 3, 3, 2, 3, 2, 3, 3, 2, 3, 2, 3, 3, 3, 2, 2, 3, 3, 2, 3),
`JAR Sweetness` = c(2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 2, 3, 3, 3, 3, 2, 3, 3, 3, 3),
`JAR Creaminess` = c(3, 3, 3, 3, 3, 1, 3, 2, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3),
`Overall Acceptance` = c(9, 10, 8, 4, 10, 5, 7, 7, 8, 8, 9, 10, 8, 8, 8, 8, 8, 9, 8, 8)),
row.names = c(NA, -20L), class = c("tbl_df", "tbl", "data.frame"))
head(df)
#> # A tibble: 6 x 12
#> Assessor Product Appearance Aroma Flavour Texture `JAR Colour`
#> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1 MC 10 10 10 10 3
#> 2 1 MV 10 10 10 10 2
#> 3 2 MC 6 7 8 8 2
#> 4 2 MV 7 5 4 8 3
#> 5 3 MV 9 9 10 9 3
#> 6 3 MC 6 8 7 6 3
#> # ... with 5 more variables: JAR Strength Chocolate <dbl>,
#> # JAR Strength Vanilla <dbl>, JAR Sweetness <dbl>, JAR Creaminess <dbl>,
#> # Overall Acceptance <dbl>
library(tidyverse)
map_df(df[-c(1:2)], ~t.test(.x ~ df$Product, paired = TRUE)$p.value)
#> # A tibble: 1 x 10
#> Appearance Aroma Flavour Texture `JAR Colour` `JAR Strength Chocolate`
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 0.496 0.576 1 0.309 0.678 1
#> # ... with 4 more variables: JAR Strength Vanilla <dbl>, JAR Sweetness <dbl>,
#> # JAR Creaminess <dbl>, Overall Acceptance <dbl>
sapply(df[-c(1:2)], function(x) t.test(x ~ df$Product, paired = TRUE)$p.value)
#> Appearance Aroma Flavour
#> 0.4961016 0.5763122 1.0000000
#> Texture JAR Colour JAR Strength Chocolate
#> 0.3092332 0.6783097 1.0000000
#> JAR Strength Vanilla JAR Sweetness JAR Creaminess
#> 0.6783097 1.0000000 0.4433319
#> Overall Acceptance
#> 0.7803523
Created on 2021-06-22 by the reprex package (v2.0.0)
This code generates a data frame just so:
library(tidyverse)
A = c(7, 4, 3, 12, 6)
B = c(1, 10, 9, 8, 5)
C = c(5, 3, 1, 7, 6)
df <- data_frame(A, B, C) %>% gather(letter1, rank)
nested <- df %>% group_by(letter1) %>% nest(ranks = c(rank))
nested
A grouped_df: 3 × 2
letter1 ranks
<chr> <list>
A 7, 4, 3, 12, 6
B 1, 10, 9, 8, 5
C 5, 3, 1, 7, 6
This is the desired data frame:
A tibble: 9 × 4
letter1 letter2 data1 data2
<chr> <chr> <list> <list>
A A 7, 4, 3, 12, 6 7, 4, 3, 12, 6
B A 1, 10, 9, 8, 5 7, 4, 3, 12, 6
C A 5, 3, 1, 7, 6 7, 4, 3, 12, 6
A B 7, 4, 3, 12, 6 1, 10, 9, 8, 5
B B 1, 10, 9, 8, 5 1, 10, 9, 8, 5
C B 5, 3, 1, 7, 6 1, 10, 9, 8, 5
A C 7, 4, 3, 12, 6 5, 3, 1, 7, 6
B C 1, 10, 9, 8, 5 5, 3, 1, 7, 6
C C 5, 3, 1, 7, 6 5, 3, 1, 7, 6
Once this step is solved, I'll run a mutate using data1 and data2 to get value, and then selecting letter1, letter2 and value will give an edgelist. I'm working with about 700 letters and the ranks lists will all be the same size and contain about 20 elements.
I'd expected to be able to use expand or expand.grid, but to no avail. Any tidyverse suggestions will be greatly appreciated.
crossing can be used
library(tidyr)
library(purrr)
library(dplyr)
crossing(ind1 = seq_len(nrow(nested)),
ind2 = seq_len(nrow(nested))) %>%
pmap_dfr(~ bind_cols(nested[..1,], nested[..2,]) )
We can use crossing after renaming the second dataframe.
tidyr::crossing(nested, setNames(nested, c('letter2', 'rank2')))
# letter1 ranks letter2 rank2
#1 A 7, 4, 3, 12, 6 A 7, 4, 3, 12, 6
#2 A 7, 4, 3, 12, 6 B 1, 10, 9, 8, 5
#3 A 7, 4, 3, 12, 6 C 5, 3, 1, 7, 6
#4 B 1, 10, 9, 8, 5 A 7, 4, 3, 12, 6
#5 B 1, 10, 9, 8, 5 B 1, 10, 9, 8, 5
#6 B 1, 10, 9, 8, 5 C 5, 3, 1, 7, 6
#7 C 5, 3, 1, 7, 6 A 7, 4, 3, 12, 6
#8 C 5, 3, 1, 7, 6 B 1, 10, 9, 8, 5
#9 C 5, 3, 1, 7, 6 C 5, 3, 1, 7, 6
The same is also valid for expand_grid.
tidyr::expand_grid(nested, setNames(nested, c('letter2', 'rank2')))
I have a data frame with 3 months of data with individual information. Individual information must be fixed during the whole period, however, in my real data set it is not the case. I would like to check row by row and highlight the dates that something went wrong during data entry.
Here is sample of my dataset ( real dataset has more variables):
input <- data.frame(stringsAsFactors=FALSE,
date = c(20190218, 20190219, 20190220, 20190221, 20190222,
20190223, 20190101, 20190103, 20190105, 20190110,
20190112, 20190218, 20190219, 20190220, 20190221, 20190222,
20190223),
id = c("18105265-ab", "18105265-ab", "18105265-ab",
"18105265-ab", "18105265-ab", "18105265-ab",
"18161665-aa", "18161665-aa", "18161665-aa", "18161665-aa",
"18161665-aa", "18502020-aa", "18502020-aa", "18502020-aa",
"18502020-aa", "18502020-aa", "18502020-aa"),
size = c(3, 3, 3, 3, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 1, 1),
type = c(4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 4, 4, 4, 4, 2, 2),
county = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5),
member_p10 = c(3, 3, 3, 3, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 1, 1),
youngest_age = c(5, 5, 5, 5, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7),
sex = c(1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1),
position = c(5, 5, 5, 5, 5, 5, 4, 4, 4, 0, 0, 3, 3, 3, 3, 0, 0))
Is there any way for this type of operation? I would like to have this output at the end:
date id size type county member_p10 youngest_age sex position
1 20190221 18105265-ab 3 4 1 3 5 1 5
2 20190222 18105265-ab 2 4 1 2 7 1 5
3 20190105 18161665-aa 2 4 1 2 7 2 4
4 20190110 18161665-aa 1 2 1 1 7 2 0
5 20190221 18502020-aa 2 4 5 2 7 1 3
6 20190222 18502020-aa 1 2 5 1 7 1 0
I am trying to use the mlogit package to run a rank-ordered logit on my data. I had participants choose their top three out of 24 choices, and then rank them in order of preference. When I try to prepare the data for analysis using the mlogit.data command, I keep getting the following error:
Error in rep(k, Z) : invalid 'times' argument
I am wondering if the error has to do with rank ties. Out of 24 alternatives, participants only ranked 3, leaving the rest blank. I have since replaced the blanks with 4s. My data looks like the following:
head(t)
id ch.1 ch.2 ch.3 ch.4 ch.5 ch.6 ch.7 ch.8 ch.9 ch.10 ch.11 ch.12 ch.13
1 4 4 4 4 4 4 4 4 4 4 4 4 2
2 4 4 4 4 4 2 4 4 4 4 4 3 4
3 4 4 4 4 4 4 4 4 4 4 2 4 4
4 4 4 4 4 4 4 4 1 4 4 4 4 2
5 4 4 4 4 1 4 4 4 4 4 4 4 3
6 3 4 4 4 4 4 4 4 2 4 4 4 4
ch.14 ch.15 ch.16 ch.17 ch.18 ch.19 ch.20 ch.21 ch.22 ch.23 ch.24
4 4 4 4 1 4 4 4 4 3 4
4 4 4 4 4 4 4 1 4 4 4
4 4 4 4 4 4 4 3 4 1 4
4 4 3 4 4 4 4 4 4 4 4
4 4 4 4 4 4 4 4 4 2 4
4 4 4 4 4 4 4 4 4 1 4
gender condition
0 0
1 0
0 1
1 1
0 0
1 1
Code for reproducible example:
ch.1 <- c(4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4)
ch.2 <- c(4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4)
ch.3 <- c(4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4)
ch.4 <- c(4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 3, 4, 2)
ch.5 <- c(4, 4, 4, 4, 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 4, 2, 4,
4, 4, 4, 4)
ch.6 <- c(4, 2, 4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4)
ch.7 <- c(4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4)
ch.8 <- c(4, 4, 4, 1, 4, 4, 3, 4, 3, 4, 4, 3, 4, 2, 4, 4, 4, 4, 4, 4,
2, 4, 1, 4)
ch.9 <- c(4, 4, 4, 4, 4, 2, 4, 4, 4, 4, 4, 1, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4)
ch.10 <- c(4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4)
ch.11 <- c(4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 1, 4,
4, 4, 4, 4)
ch.12 <- c(4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 1, 4, 4, 4, 4, 4,
4, 4, 4, 3)
ch.13 <- c(2, 4, 4, 2, 3, 4, 1, 4, 2, 4, 2, 4, 4, 1, 2, 2, 4, 4, 4, 4,
3, 4, 4, 4)
ch.14 <- c(4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4)
ch.15 <- c(4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4)
ch.16 <- c(4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 3, 4,
4, 4, 4, 4)
ch.17 <- c(4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4)
ch.18 <- c(1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 2,
1, 4, 4, 4)
ch.19 <- c(4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 4, 4,
4, 4, 4, 4)
ch.20 <- c(4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3,
4, 1, 4, 4)
ch.21 <- c(4, 1, 3, 4, 4, 4, 4, 1, 4, 4, 1, 2, 3, 4, 4, 1, 4, 1, 4, 4,
4, 4, 3, 4)
ch.22 <- c(4, 4, 4, 4, 4, 4, 4, 2, 1, 2, 4, 4, 1, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4)
ch.23 <- c(3, 4, 1, 4, 2, 1, 2, 4, 4, 1, 4, 4, 2, 3, 3, 3, 1, 4, 4, 1,
4, 2, 2, 1)
ch.24 <- c(4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4)
gender <- sample(c(0,1), 24, replace=T)
condition <- sample(c(0,1), 24, replace=T)
t <- cbind(ch.1, ch.2, ch.3, ch.4, ch.5, ch.6, ch.7, ch.8, ch.9, ch.10,
ch.11, ch.12, ch.13, ch.14, ch.15, ch.16, ch.17, ch.18, ch.19, ch.20,
ch.21, ch.22, ch.23, ch.24, gender, condition)
t <- as.data.frame(t)
G <- mlogit.data(t, shape="wide", choice="ch", varying=1:24, ranked=TRUE)
Error in rep(k, Z) : invalid 'times' argument
Thanks for any insight you can provide, and if mlogit can't handle this data, does anyone have any other suggestions?
letitburn,
-- quite sure that the issue are ties at the bottom. You have probably discovered by now Stata's "rologit" routine for rank-ordered data. It can handle these types of incomplete rankings.