Summarise grouping by varname - r

I'm analysing some data, and I've come a difficulty - I don't know how to summarise my whole dataset using the variable names as the "group".
A sample data:
structure(list(x4 = c(3, 4, 7, 4, 5, 1, 5, 2, 7, 1), x5 = c(2,
4, 4, 4, 5, 3, 6, 1, 7, 1), x6 = c(3, 5, 4, 7, 5, 4, 6, 4, 6,
2), x7 = c(4, 1, 6, 4, 6, 4, 6, 2, 7, 2), x9 = c(5, 5, 4, 5,
6, 3, 7, 5, 6, 1), x10 = c(3, 6, 5, 4, 6, 5, 6, 3, 6, 1), x11 = c(6,
7, 7, 7, 6, 7, 7, 5, 7, 4), x12 = c(6, 7, 6, 7, 6, 4, 6, 6, 7,
5), x14 = c(5, 7, 5, 6, 4, 6, 6, 5, 6, 4), x15 = c(4, 7, 7, 7,
6, 4, 6, 5, 6, 1), x16 = c(4, 7, 7, 7, 6, 5, 7, 3, 6, 4), x17 = c(4,
5, 5, 7, 6, 6, 7, 4, 6, 2), x18 = c(3, 4, 7, 7, 6, 5, 6, 4, 6,
2), x19 = c(5, 7, 5, 7, 6, 6, 6, 3, 6, 1), x22 = c(4, 4, 5, 7,
6, 7, 6, 5, 6, 2), x26 = c(6, 7, 5, 4, 6, 7, 7, 4, 6, 4), x29 = c(4,
7, 2, 7, 6, 4, 7, 3, 6, 1), x33 = c(3, 7, 7, 7, 6, 5, 6, 3, 6,
3), x34 = c(5, 5, 4, 7, 6, 7, 7, 5, 6, 2), x35 = c(4, 4, 7, 7,
5, 7, 6, 4, 6, 2), x36 = c(4, 7, 6, 7, 6, 5, 5, 4, 6, 2), x37 = c(3,
4, 7, 4, 5, 4, 6, 3, 5, 2), x49 = c(4, 7, 7, 7, 6, 5, 5, 6, 6,
3), x50 = c(4, 7, 6, 5, 5, 5, 6, 5, 7, 4)), row.names = c(NA,
-10L), class = "data.frame", .Names = c("x4", "x5", "x6", "x7",
"x9", "x10", "x11", "x12", "x14", "x15", "x16", "x17", "x18",
"x19", "x22", "x26", "x29", "x33", "x34", "x35", "x36", "x37",
"x49", "x50"))
I just want some statistics, like this:
summary <- dados_afc %>%
summarise_all(funs(mean, sd, mode, median))
But the result is a df with one observation and lots of variable. I wanted it to have 5 columns: varname, mean, sd, mode, median, but I'm not sure how to do it. Any tips?

Note: I am not aware of a built-in way to get mode from R. See here for some discussion:
Is there a built-in function for finding the mode?
# From the top answer there:
Mode <- function(x) {
ux <- unique(x)
ux[which.max(tabulate(match(x, ux)))]
}
To treat each column as a group, you can use tidyr::gather to convert your "wide" data into long form, and then dplyr::group_by to create groups with their own summary calculations:
library(tidyverse)
summary <- dados_afc %>%
gather(group, value) %>%
group_by(group) %>%
summarise_all(funs(mean, sd, Mode, median))
> summary
# A tibble: 24 x 5
group mean sd Mode median
<chr> <dbl> <dbl> <dbl> <dbl>
1 x10 4.5 1.72 6 5
2 x11 6.3 1.06 7 7
3 x12 6 0.943 6 6
4 x14 5.4 0.966 6 5.5
5 x15 5.3 1.89 7 6
6 x16 5.6 1.51 7 6
7 x17 5.2 1.55 6 5.5
8 x18 5 1.70 6 5.5
9 x19 5.2 1.87 6 6
10 x22 5.2 1.55 6 5.5

Related

Removing a row based on a condition

my_df <- tibble(
b1 = c(2, 1, 1, 2, 2, 2, 1, 1, 2),
b2 = c(NA, 4, 6, 2, 6, 6, 1, 1, 7),
b3 = c(5, 9, 8, NA, 2, 3, 9, 5, NA),
b4 = c(NA, 6, NA, 10, 12, 8, 3, 6, 2),
b5 = c(2, 12, 1, 7, 8, 5, 5, 6, NA),
b6 = c(9, 2, 4, 6, 7, 6, 6, 7, 9),
b7 = c(1, 3, 7, 7, 4, 2, 2, 9, 5),
b8 = c(NA, 8, 4, 5, 1, 4, 1, 3, 6),
b9 = c(4, 5, NA, 9, 5, 1, 1, 2, NA),
b10 = c(14, 3, NA, 2, 2, 2, 3, NA, 5))
I have a df like this, and would like to tell R to remove all '3' or 'NA' in b10 if b1 = 1. I have tried this with this, but it seems to keep the '3' and 'NA' instead of removing them;
new_df <- my_df %>% filter(is.na(b10) | b10 == 3 | b1==1 & b10 ==NA)
This should do the trick:
library(dplyr)
my_df %>%
filter(!(b1 == 1 & (is.na(b10) | b10 == 3)))
Edit: assuming you want to remove the rows where the conditions meet

Paired T-Test over multiple paired columns (wide data format)

I've converted a data frame into wide format and now want to compute paired t-tests to obtain p-values. I have managed to do this for each pair of columns individually, but it's a lot more code than I feel is necessary. I'm still very new to R, data and coding generally, and couldn't easily see a solution here on Stack Overflow.
My wide data frame is:
> head(df_wide)
# A tibble: 6 x 21
Assessor `Appearance1 `Appearance2 `Aroma_1 `Aroma_2 `Flavour_1 `Flavour_2
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 10 10 10 10 10 10
2 6 7 7 5 8 4
# ... with 14 more variables
I want to perform a paired T-Test over the attributes, i.e. Appearance1 and Appearance2, Aroma1 and Aroma2, etc. The 14 other variables are all <dbl> and are also attributes to be included as paired columns for the T-Test.
Ideally, the output would be a vector of just the p-values, rather than having all the information. I've managed to do that coding for individual pairs, but I wanted to know if this would be possible to do as part of performing the T-Test over multiple pairs of columns.
Here is the code I have for the first two attributes:
p_values <- c(t.test(df_wide$`Appearance1`, df_wide$`Appearance2`, paired = T)[["p.value"]],
t.test(df_wide$`Aroma1`, df_wide$`Aroma2`, paired = T)[["p.value"]])
This creates the vector I want, but is cumbersome and error-prone. Ideally, I'd be able to perform it over all the pairs at once without needing to use column names.
I do have the original data frame in long format, if it would be easier to do it using that (EDIT: used dput() for first 20 rows instead of head():
> dput(df_test[1:20,])
structure(list(Assessor = c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10),
Product = c("MC", "MV", "MC", "MV", "MV", "MC", "MC", "MV", "MV", "MC", "MC", "MV", "MC", "MV", "MC", "MV", "MV", "MC", "MV", "MC"),
Appearance = c(10, 10, 6, 7, 9, 6, 7, 8, 9, 8, 10, 8, 6, 6, 9, 8, 8, 8, 9, 9),
Aroma = c(10, 10, 7, 5, 9, 8, 6, 7, 5, 7, 9, 8, 6, 6, 5, 3, 6, 7, 9, 6),
Flavour = c(10, 10, 8, 4, 10, 7, 7, 6, 8, 8, 9, 10, 8, 8, 6, 8, 7, 9, 9, 8),
Texture = c(10, 10, 8, 8, 9, 6, 7, 8, 8, 8, 9, 10, 8, 8, 9, 8, 8, 9, 9, 8),
`JAR Colour` = c(3, 2, 2, 3, 3, 3, 3, 3, 3, 2, 3, 2, 3, 2, 3, 3, 3, 3, 3, 3),
`JAR Strength Chocolate` = c(2, 2, 3, 2, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 3, 3, 2),
`JAR Strength Vanilla` = c(3, 3, 3, 2, 3, 2, 3, 3, 2, 3, 2, 3, 3, 3, 2, 2, 3, 3, 2, 3),
`JAR Sweetness` = c(2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 2, 3, 3, 3, 3, 2, 3, 3, 3, 3),
`JAR Creaminess` = c(3, 3, 3, 3, 3, 1, 3, 2, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3),
`Overall Acceptance` = c(9, 10, 8, 4, 10, 5, 7, 7, 8, 8, 9, 10, 8, 8, 8, 8, 8, 9, 8, 8)),
row.names = c(NA, -20L), class = c("tbl_df", "tbl", "data.frame"))
The Product variable is the one which was used to make the paired columns in the wide format data frame. Thanks in advance.
if I understand correctly
df <- structure(list(Assessor = c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10),
Product = c("MC", "MV", "MC", "MV", "MV", "MC", "MC", "MV", "MV", "MC", "MC", "MV", "MC", "MV", "MC", "MV", "MV", "MC", "MV", "MC"),
Appearance = c(10, 10, 6, 7, 9, 6, 7, 8, 9, 8, 10, 8, 6, 6, 9, 8, 8, 8, 9, 9),
Aroma = c(10, 10, 7, 5, 9, 8, 6, 7, 5, 7, 9, 8, 6, 6, 5, 3, 6, 7, 9, 6),
Flavour = c(10, 10, 8, 4, 10, 7, 7, 6, 8, 8, 9, 10, 8, 8, 6, 8, 7, 9, 9, 8),
Texture = c(10, 10, 8, 8, 9, 6, 7, 8, 8, 8, 9, 10, 8, 8, 9, 8, 8, 9, 9, 8),
`JAR Colour` = c(3, 2, 2, 3, 3, 3, 3, 3, 3, 2, 3, 2, 3, 2, 3, 3, 3, 3, 3, 3),
`JAR Strength Chocolate` = c(2, 2, 3, 2, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 3, 3, 2),
`JAR Strength Vanilla` = c(3, 3, 3, 2, 3, 2, 3, 3, 2, 3, 2, 3, 3, 3, 2, 2, 3, 3, 2, 3),
`JAR Sweetness` = c(2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 2, 3, 3, 3, 3, 2, 3, 3, 3, 3),
`JAR Creaminess` = c(3, 3, 3, 3, 3, 1, 3, 2, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3),
`Overall Acceptance` = c(9, 10, 8, 4, 10, 5, 7, 7, 8, 8, 9, 10, 8, 8, 8, 8, 8, 9, 8, 8)),
row.names = c(NA, -20L), class = c("tbl_df", "tbl", "data.frame"))
head(df)
#> # A tibble: 6 x 12
#> Assessor Product Appearance Aroma Flavour Texture `JAR Colour`
#> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1 MC 10 10 10 10 3
#> 2 1 MV 10 10 10 10 2
#> 3 2 MC 6 7 8 8 2
#> 4 2 MV 7 5 4 8 3
#> 5 3 MV 9 9 10 9 3
#> 6 3 MC 6 8 7 6 3
#> # ... with 5 more variables: JAR Strength Chocolate <dbl>,
#> # JAR Strength Vanilla <dbl>, JAR Sweetness <dbl>, JAR Creaminess <dbl>,
#> # Overall Acceptance <dbl>
library(tidyverse)
map_df(df[-c(1:2)], ~t.test(.x ~ df$Product, paired = TRUE)$p.value)
#> # A tibble: 1 x 10
#> Appearance Aroma Flavour Texture `JAR Colour` `JAR Strength Chocolate`
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 0.496 0.576 1 0.309 0.678 1
#> # ... with 4 more variables: JAR Strength Vanilla <dbl>, JAR Sweetness <dbl>,
#> # JAR Creaminess <dbl>, Overall Acceptance <dbl>
sapply(df[-c(1:2)], function(x) t.test(x ~ df$Product, paired = TRUE)$p.value)
#> Appearance Aroma Flavour
#> 0.4961016 0.5763122 1.0000000
#> Texture JAR Colour JAR Strength Chocolate
#> 0.3092332 0.6783097 1.0000000
#> JAR Strength Vanilla JAR Sweetness JAR Creaminess
#> 0.6783097 1.0000000 0.4433319
#> Overall Acceptance
#> 0.7803523
Created on 2021-06-22 by the reprex package (v2.0.0)

Running Omega with Psych library in R

I have five items on a construct when i run alpha on it on i get the following results without any errors
psych::alpha(construct,
na.rm = TRUE,
title = 'myscale',
n.iter = 1000)
Reliability analysis myscale
Call: psych::alpha(x = construct, title = "myscale", na.rm = TRUE,
n.iter = 1000)
raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
0.81 0.81 0.78 0.46 4.3 0.013 2.6 0.89
lower alpha upper 95% confidence boundaries
0.78 0.81 0.84
lower median upper bootstrapped confidence intervals
0.77 0.81 0.84
I have been reading the paper From Alpha to omega: A practical solution to the pervasive problem of internal consistency estimation link
It recommends using the code below
MBESS::ci.reliability(construct, interval.type="bca", B=1000, type = "omega")
$est
[1] 0.8107376
$se
[1] 0.01651936
$ci.lower
[1] 0.7764029
$ci.upper
[1] 0.839944
$conf.level
[1] 0.95
$type
[1] "omega"
$interval.type
[1] "bca bootstrap"
I have been trying to run omega on my sample set using the psych package to keep things consistent in my analysis
psych::omega(m = construct,
nfactors = 1, fm = "pa", n.iter = 1000, p = 0.05,
title = "Omega", plot = FALSE, n.obs = 506)
I get the two error messages
In factor.scores, the correlation matrix is singular, an approximation is used
Omega_h for 1 factor is not meaningful, just omega_t
This warning is occurring because the number of columns are two small for Omega_h. A previous question on SO answers this somewhat
McDonalds omega: warnings in R
The error im having is below
Error in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, :
I am sorry: missing values (NAs) in the correlation matrix do not allow me to continue.
Please drop those variables and try again.
In addition: There were 50 or more warnings (use warnings() to see the first 50)
There are no missing values so im not sure about error number two
The details of my construct are
Q1 Q2 Q3
Min. :0.000 Min. :0.000 Min. :0.000
1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000
Median :3.000 Median :2.000 Median :3.000
Mean :2.597 Mean :2.393 Mean :3.227
3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:4.000
Max. :6.000 Max. :6.000 Max. :6.000
Q4 Q5
Min. :0.00 Min. :0.000
1st Qu.:1.00 1st Qu.:2.000
Median :2.00 Median :2.000
Mean :2.17 Mean :2.445
3rd Qu.:3.00 3rd Qu.:3.000
Max. :6.00 Max. :6.000
EDIT
Created data with the same properties - 100 entries (Alpha roughly 0.56) but it generates the same error on omega
structure(list(Q1 = c(4, 5, 3, 5, 4, 5, 3, 5, 5, 5, 6,
3, 5, 4, 6, 5, 5, 6, 7, 4, 5, 5, 3, 4, 4, 5, 4, 3, 5, 4, 5, 5,
6, 6, 3, 6, 3, 4, 4, 4, 6, 5, 3, 2, 6, 6, 4, 5, 4, 3, 6, 4, 4,
5, 6, 2, 4, 3, 4, 6, 4, 6, 4, 5, 5, 6, 4, 6, 5, 5, 4, 5, 6, 6,
2, 5, 4, 3, 4, 4, 4, 6, 3, 3, 5, 4, 4, 4, 5, 5, 5, 3, 6, 6, 6,
6, 5, 4, 3, 5), Q2 = c(7, 4, 4, 4, 4, 6, 6, 6, 7, 6, 5,
6, 5, 4, 5, 6, 6, 6, 7, 5, 4, 4, 6, 6, 4, 4, 6, 2, 6, 5, 4, 6,
4, 6, 6, 6, 5, 4, 4, 4, 4, 3, 3, 4, 4, 4, 4, 6, 2, 6, 6, 5, 4,
6, 6, 4, 4, 7, 6, 5, 5, 5, 5, 6, 5, 5, 4, 5, 5, 5, 4, 6, 7, 5,
5, 5, 6, 5, 6, 5, 6, 7, 2, 6, 5, 7, 3, 5, 5, 3, 3, 3, 7, 4, 5,
6, 6, 6, 5, 7), Q3 = c(5, 4, 5, 6, 4, 4, 5, 4, 2, 6, 5,
5, 5, 5, 7, 5, 5, 6, 7, 6, 3, 6, 6, 6, 5, 6, 6, 5, 5, 4, 5, 5,
6, 6, 5, 6, 5, 5, 4, 4, 6, 4, 4, 4, 4, 4, 4, 5, 5, 4, 5, 5, 4,
3, 5, 4, 5, 6, 6, 6, 4, 5, 5, 5, 6, 4, 5, 5, 7, 4, 5, 6, 6, 5,
5, 3, 3, 5, 4, 6, 5, 5, 1, 3, 5, 3, 2, 5, 4, 6, 6, 6, 6, 4, 6,
3, 6, 6, 6, 5), Q4 = c(6, 6, 4, 7, 4, 6, 7, 6, 7, 6, 6,
6, 5, 7, 7, 6, 6, 5, 7, 7, 6, 6, 7, 7, 6, 6, 6, 5, 6, 7, 5, 6,
7, 5, 4, 6, 4, 3, 6, 4, 6, 6, 6, 3, 5, 7, 5, 6, 4, 6, 7, 6, 7,
4, 6, 3, 5, 7, 5, 4, 6, 6, 4, 6, 5, 5, 5, 5, 7, 7, 7, 6, 6, 6,
5, 6, 6, 4, 5, 7, 6, 7, 3, 5, 6, 5, 6, 5, 5, 7, 7, 6, 6, 2, 7,
6, 6, 7, 7, 5)), .Names = c("Q1", "Q2", "Q3",
"Q4"), row.names = c(NA, 100L), class = "data.frame")
Can anyone see where im falling down?
Thank you for your time
so I tried this:
psych::omega(m = construct)
and it worked with this result:
Omega
Call: psych::omega(m = construct)
Alpha: 0.56
G.6: 0.49
Omega Hierarchical: 0.53
Omega H asymptotic: 0.89
Omega Total 0.6
Schmid Leiman Factor loadings greater than 0.2
g F1* F2* F3* h2 u2 p2
Q1 0.41 0.30 0.26 0.74 0.65
Q2 0.37 0.25 0.20 0.80 0.67
Q3 0.50 0.25 0.31 0.69 0.80
Q4 0.64 0.23 0.46 0.54 0.89
With eigenvalues of:
g F1* F2* F3*
0.95 0.15 0.06 0.05
general/max 6.35 max/min = 2.83
mean percent general = 0.75 with sd = 0.11 and cv of 0.15
Explained Common Variance of the general factor = 0.78
The degrees of freedom are -3 and the fit is 0
The number of observations was 100 with Chi Square = 0 with prob < NA
The root mean square of the residuals is 0
The df corrected root mean square of the residuals is NA
Compare this with the adequacy of just a general factor and no group factors
The degrees of freedom for just the general factor are 2 and the fit is 0.01
The number of observations was 100 with Chi Square = 0.62 with prob < 0.73
The root mean square of the residuals is 0.03
The df corrected root mean square of the residuals is 0.05
RMSEA index = 0 and the 90 % confidence intervals are NA 0.14
BIC = -8.59
Measures of factor score adequacy
g F1* F2* F3*
Correlation of scores with factors 0.75 0.37 0.27 0.24
Multiple R square of scores with factors 0.57 0.14 0.07 0.06
Minimum correlation of factor score estimates 0.14 -0.72 -0.86 -0.88
Total, General and Subset omega for each subset
g F1* F2* F3*
Omega total for total scores and subscales 0.60 0.37 0.31 0.46
Omega general for total scores and subscales 0.53 0.25 0.25 0.41
Omega group for total scores and subscales 0.06 0.12 0.06 0.05
I checked the defaults and nfactors = 3 and the n.iter = 1. Then I slowly increased the n.iter, and decreased the n.factor, and it keeps working until n.iter =7, and keeping nfactors as 3
psych::omega(m = construct, n.iter = 7, p = 0.05, nfactors = 3)
with your full dataset you should be able to get higher n.iter

Mark second location in a repeating pattern

I have a vector of numbers below which has a repeating pattern (usually 2, 3, 4, 5, 6 before starting over again, but sometimes one or more will not be in there due to holidays, etc). I want to mark the second occurrence in each of these sets (usually 3 but not always if for example 2 isnt there it would be 4 that I want marked). Any ideas how to flag what essentially is the 2nd business day of a week?
code example:
test_vector <- c(2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6,
2, 3, 4, 2, 3, 4, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5,
6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6,
2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2,
3, 4, 5, 6, 2, 3, 4, 5, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5,
6, 2, 3, 4, 5, 6, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 3,
4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4,
5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5,
6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6,
2, 3, 4, 5, 6, 3, 4, 5, 6, 2)
inds <- which(c(TRUE, diff(test_vector) != 1L) & #find start of week
c(TRUE, diff(test_vector[-1]) == 1L, FALSE) #protect against one-day weeks
) + 1L
test_vector[inds]
#[1] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 3 3 3 4 3 3 4 3 3 3 3 3 3 3 3 3 3 3 3 3 4
Not sure what you what as far as a flag, but this will let you know where that value exists.
for(i in 1:length(unique(test_vector))){
print(paste0(unique(test_vector)[i], " at position ", which(test_vector == unique(test_vector)[i])[2]))
}
To see the next sets you would change the [2] to 4 or 6 or 8 or however many sets you have.

Mlogit for Rank-Ordered Logit - Troubleshooting

I am trying to use the mlogit package to run a rank-ordered logit on my data. I had participants choose their top three out of 24 choices, and then rank them in order of preference. When I try to prepare the data for analysis using the mlogit.data command, I keep getting the following error:
Error in rep(k, Z) : invalid 'times' argument
I am wondering if the error has to do with rank ties. Out of 24 alternatives, participants only ranked 3, leaving the rest blank. I have since replaced the blanks with 4s. My data looks like the following:
head(t)
id ch.1 ch.2 ch.3 ch.4 ch.5 ch.6 ch.7 ch.8 ch.9 ch.10 ch.11 ch.12 ch.13
1 4 4 4 4 4 4 4 4 4 4 4 4 2
2 4 4 4 4 4 2 4 4 4 4 4 3 4
3 4 4 4 4 4 4 4 4 4 4 2 4 4
4 4 4 4 4 4 4 4 1 4 4 4 4 2
5 4 4 4 4 1 4 4 4 4 4 4 4 3
6 3 4 4 4 4 4 4 4 2 4 4 4 4
ch.14 ch.15 ch.16 ch.17 ch.18 ch.19 ch.20 ch.21 ch.22 ch.23 ch.24
4 4 4 4 1 4 4 4 4 3 4
4 4 4 4 4 4 4 1 4 4 4
4 4 4 4 4 4 4 3 4 1 4
4 4 3 4 4 4 4 4 4 4 4
4 4 4 4 4 4 4 4 4 2 4
4 4 4 4 4 4 4 4 4 1 4
gender condition
0 0
1 0
0 1
1 1
0 0
1 1
Code for reproducible example:
ch.1 <- c(4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4)
ch.2 <- c(4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4)
ch.3 <- c(4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4)
ch.4 <- c(4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 3, 4, 2)
ch.5 <- c(4, 4, 4, 4, 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 4, 2, 4,
4, 4, 4, 4)
ch.6 <- c(4, 2, 4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4)
ch.7 <- c(4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4)
ch.8 <- c(4, 4, 4, 1, 4, 4, 3, 4, 3, 4, 4, 3, 4, 2, 4, 4, 4, 4, 4, 4,
2, 4, 1, 4)
ch.9 <- c(4, 4, 4, 4, 4, 2, 4, 4, 4, 4, 4, 1, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4)
ch.10 <- c(4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4)
ch.11 <- c(4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 1, 4,
4, 4, 4, 4)
ch.12 <- c(4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 1, 4, 4, 4, 4, 4,
4, 4, 4, 3)
ch.13 <- c(2, 4, 4, 2, 3, 4, 1, 4, 2, 4, 2, 4, 4, 1, 2, 2, 4, 4, 4, 4,
3, 4, 4, 4)
ch.14 <- c(4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4)
ch.15 <- c(4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4)
ch.16 <- c(4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 3, 4,
4, 4, 4, 4)
ch.17 <- c(4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4)
ch.18 <- c(1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 2,
1, 4, 4, 4)
ch.19 <- c(4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 4, 4,
4, 4, 4, 4)
ch.20 <- c(4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3,
4, 1, 4, 4)
ch.21 <- c(4, 1, 3, 4, 4, 4, 4, 1, 4, 4, 1, 2, 3, 4, 4, 1, 4, 1, 4, 4,
4, 4, 3, 4)
ch.22 <- c(4, 4, 4, 4, 4, 4, 4, 2, 1, 2, 4, 4, 1, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4)
ch.23 <- c(3, 4, 1, 4, 2, 1, 2, 4, 4, 1, 4, 4, 2, 3, 3, 3, 1, 4, 4, 1,
4, 2, 2, 1)
ch.24 <- c(4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4)
gender <- sample(c(0,1), 24, replace=T)
condition <- sample(c(0,1), 24, replace=T)
t <- cbind(ch.1, ch.2, ch.3, ch.4, ch.5, ch.6, ch.7, ch.8, ch.9, ch.10,
ch.11, ch.12, ch.13, ch.14, ch.15, ch.16, ch.17, ch.18, ch.19, ch.20,
ch.21, ch.22, ch.23, ch.24, gender, condition)
t <- as.data.frame(t)
G <- mlogit.data(t, shape="wide", choice="ch", varying=1:24, ranked=TRUE)
Error in rep(k, Z) : invalid 'times' argument
Thanks for any insight you can provide, and if mlogit can't handle this data, does anyone have any other suggestions?
letitburn,
-- quite sure that the issue are ties at the bottom. You have probably discovered by now Stata's "rologit" routine for rank-ordered data. It can handle these types of incomplete rankings.

Resources