How to specify predictor matrix for stan data block? - r

Dear stackoverflow community. I want to use the variables w1 to w10 as predictor matrix matrix[N, W] weights; in my stan model. I am not certain how to accomplish that.
data frame
(dat <- data.frame(
id = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4),
imput = c(1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5),
A = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
B = c(1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0),
Pass = c(278, 278, 278, 278, 278, 100, 100, 100, 100, 100, 153, 153, 153, 153, 153, 79, 79, 79, 79, 79),
Fail = c(740, 743, 742, 743, 740, 7581, 7581, 7581, 7581, 7581, 1231, 1232, 1235, 1235, 1232, 1731, 1732, 1731, 1731, 1731),
W_1= c(4, 3, 4, 3, 3, 1, 2, 1, 2, 1, 12, 12, 11, 12, 12, 3, 5, 3, 3, 3),
W_2= c(3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 12, 12, 12, 12, 12, 3, 3, 3, 3, 3),
W_3= c(4, 3, 3, 3, 3, 1, 2, 1, 1, 1, 12, 12, 11, 12, 12, 3, 3, 3, 3, 3),
W_4= c(3, 3, 4, 3, 3, 1, 1, 1, 2, 1, 12, 12, 13, 12, 12, 3, 2, 3, 3, 3),
W_5= c(3, 3, 3, 3, 3, 1, 0, 1, 1, 1, 12, 12, 12, 12, 12, 3, 3, 3, 3, 3),
W_6= c(4, 3, 3, 3, 3, 1, 1, 1, 1, 1, 12, 12, 12, 12, 12, 3, 3, 3, 3, 3),
W_7= c(3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 12, 12, 12, 12, 12, 3, 3, 3, 3, 3),
W_8= c(3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 15, 12, 12, 12, 12, 3, 3, 3, 3, 3),
W_9= c(3, 3, 3, 4, 3, 1, 1, 1, 1, 1, 12, 12, 12, 12, 12, 2, 3, 3, 3, 3),
W_10= c(3, 3, 4, 3, 3, 1, 1, 1, 1, 1, 12, 10, 12, 12, 12, 3, 3, 3, 3, 3)
))
creating list
N <- nrow(dat)
ncases <- dat$Pass
nn <- dat$Fail + dat$Pass
A <- dat$A
B <- dat$B
id <- dat$id
imput <- dat$imput
w_1 <- dat$W_1
w_2 <- dat$W_2
w_3 <- dat$W_3
w_4 <- dat$W_4
w_5 <- dat$W_5
w_6 <- dat$W_6
w_7 <- dat$W_7
w_8 <- dat$W_8
w_9 <- dat$W_9
w_10 <- dat$W_10
dat1 <- list (N = N,
ncases = ncases, A = A, B = B, id = id, P = imput, nn = nn,
w1 = w_1, w2 = w_2, w3 = w_3, w4 = w_4, w5 = w_5,
w6 = w_6, w7 = w_7, w8 = w_8, w9 = w_9, w10 = w_10)
data block
data{
int N; // number of observations
int ncases[N]; // independent variable
int A[N]; // independent variable
int B[N]; // independent variable
int nn[N]; // independent variable
int id[N]; //individual id
int W[N]; //vector of weights
int P[N]; // number of imputations
matrix[N, W] weights; // design matrix of weights
}
Thank you in advance for any help.

If W in the data block is actually an int (rather than a vector; i.e., W is the number of columns in weights), then I would expect this to do what you need:
dat1 <- list (N = N,
ncases = ncases, A = A, B = B, id = id, P = imput, nn = nn, W = 10,
weights = cbind(w_1, w_2, w_3, w_4, w_5, w_6, w_7, w_8, w_9, w_10))

Related

geom_bar(), Y-axis goes way above data value

I am trying to visualize a data frame from a survey. I'm currently trying to plot a barplot with geom_bar(), that takes in "Life Satisfaction" as the y-axis, and "Family Values" as the x-axis. Note that the survey answer for Life Satisfaction is 1(very unsatisfied) to 10(very satisfied).
But for some reason when I try to plot this barplot, the y-axis goes way above 10, and I don't understand why.
This is my code:
df1 %>%
filter(df1$B_COUNTRY_ALPHA == "PAK") %>%
drop_na(Q49) %>%
ggplot(aes(x = Q1, y = Q49, fill = B_COUNTRY_ALPHA)) +
geom_bar(stat = "identity") +
labs(x = "Family Value",
y = "Life Satisfaction")
This is the graph that I get when I run it:
This is the first 20 rows of data that I want to work with:
On a side note: I was thinking of finding the mean of the Life Satisfaction data and maybe that will make the plot make sense but I am not sure how to do that
#GregorThomas I followed your instructions and I got this.
structure(list(B_COUNTRY_ALPHA = c("PAK", "PAK", "PAK", "PAK",
"PAK", "PAK", "PAK", "PAK", "PAK", "PAK", "PAK", "PAK", "PAK",
"PAK", "PAK", "PAK", "PAK", "PAK", "PAK", "PAK"), Q49 = c(7,
10, 10, 5, 1, 6, 6, 10, 10, 10, 4, 4, 8, 10, 10, 10, 10, 9, 10,
8), Q1 = c(1, 2, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1), Q2 = c(1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 2, 1,
4, 1, 2, 2, 2), Q3 = c(2, 2, 1, 1, 3, 1, 2, 2, 2, NA, 2, 4, 1,
1, 2, 2, 4, 2, 4, 2), Q4 = c(3, 4, 2, 4, 2, 3, 4, 2, 1, 4, 4,
4, 4, 1, 3, 4, 3, 4, 4, 2), Q5 = c(1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 1, 2, 1, 1, 1, 4, 1, 1, 4), Q6 = c(1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 4), Q57 = c(2, 2, 2, 1, 1,
1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 2, 2, 1), Q106 = c(7, 5,
10, 4, 10, 7, 1, 10, 10, 10, 1, 10, 1, 10, 10, 10, 9, 4, 10,
6), Q107 = c(7, 6, 5, 5, 10, 3, 1, 10, 10, NA, 1, 1, 1, 10, 3,
10, 10, 8, 10, 4), Q108 = c(7, 9, 1, 4, 1, 1, 10, 10, 5, 10,
10, 10, 1, 10, 10, 10, 10, 10, 1, 3), Q109 = c(6, 4, 1, 4, 1,
1, 1, 10, 10, 1, 6, 2, 10, 5, 10, 1, 10, 9, 1, 4), Q110 = c(6,
3, 1, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 10, 1, 10, 3, 1, 3), Q112 =
c(8,
8, 10, 6, 10, 5, 10, 10, 10, 10, NA, 10, 10, 10, 10, 10, 10,
10, 10, 7), Q163 = c(6, 2, 10, 7, 9, 10, 10, 10, 10, NA, 10,
10, 6, 10, 3, NA, 8, 7, NA, 9), Q164 = c(4, 9, 10, 8, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, NA, 8, 10, 10, 10), Q222 = c(2,
1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 4, NA, 1, NA, 2, 3, NA, 3),
Q260 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
1, 1, 0, 1), Q262 = c(33, 21, 60, 18, 60, 50, 45, 29, 62,
46, 35, 40, 30, NA, 45, NA, 30, 50, 36, 34), Q273 = c(1,
6, 1, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
Q275 = c(0, 2, 3, 3, 3, 2, 3, 2, 4, 0, 0, 0, 1, NA, 3, NA,
1, 1, 0, 1), Q281 = c(8, 0, 3, 0, 10, 3, 4, 6, 3, 8, 4, 4,
4, 0, 5, 0, 0, 0, 9, 0)), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -20L))
Here's a couple ideas using your sample data:
Use a dodged bar plot:
sample_data %>%
ggplot(aes(x = factor(Q1), fill = factor(Q49))) +
geom_bar(position = position_dodge(preserve = 'single')) +
labs(x = "Family Value",
y = "Count of Responses",
fill = "Life Satisfaction")
Use facets:
sample_data %>%
ggplot(aes(x = factor(Q49), fill = factor(Q49))) +
geom_bar() +
labs(x = "Life Satisfaction",
y = "Count of Responses",
fill = "Life Satisfaction") +
facet_wrap(vars(paste("Family Value", Q1)))
Use a heat map:
sample_data %>%
ggplot(aes(x = factor(Q1),y = factor(Q49))) +
geom_bin2d() +
coord_fixed() +
labs(y = "Life Satisfaction", x = "Family Value")

What is the other way to qount tertiles using tidyverse (or any other packages) in R?

I have WVS 6th wave dataframe. Computed the outgroup trust index (outgroup_index) and I want to divide this vector into 3 groups according to tertiles.
I use base R functions to do that:
# Recoding will be based on tertiles
# Find the tretiles of the index
tertiles <- quantile(filtered_df$outgroup_index, c(0:3) / 3)
# cut the target variable into tertiles
filtered_df$index_recoded <- with(
filtered_df,
cut(outgroup_index,
tertiles,
include.lowest = T)
)
But I am wondering about other possible and more neat ways to do it (preferably using dplyr/tidyverse or any other packages)?
Data:
structure(list(V2 = structure(c(643, 643, 643, 643, 643, 643,
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643,
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643,
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643,
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643,
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643,
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643,
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643,
643, 643, 643, 643), label = "Country/region", format.spss = "F4.0", labels = c(`Not asked in survey` = -4,
Algeria = 12, Azerbaijan = 31, Argentina = 32, Australia = 36,
Armenia = 51, Brazil = 76, Belarus = 112, Chile = 152, China = 156,
`Taiwan ROC` = 158, Colombia = 170, Cyprus = 196, Ecuador = 218,
Estonia = 233, Georgia = 268, Palestine = 275, Germany = 276,
Ghana = 288, Haiti = 332, `Hong Kong SAR` = 344, India = 356,
Iraq = 368, Japan = 392, Kazakhstan = 398, Jordan = 400, `South Korea` = 410,
Kuwait = 414, Kyrgyzstan = 417, Lebanon = 422, Libya = 434, Malaysia = 458,
Mexico = 484, Morocco = 504, Netherlands = 528, `New Zealand` = 554,
Nigeria = 566, Pakistan = 586, Peru = 604, Philippines = 608,
Poland = 616, Qatar = 634, Romania = 642, Russia = 643, Rwanda = 646,
Singapore = 702, Slovenia = 705, `South Africa` = 710, Zimbabwe = 716,
Spain = 724, Sweden = 752, Thailand = 764, `Trinidad and Tobago` = 780,
Tunisia = 788, Turkey = 792, Ukraine = 804, Egypt = 818, `United States` = 840,
Uruguay = 858, Uzbekistan = 860, Yemen = 887), class = c("haven_labelled",
"vctrs_vctr", "double")), V105 = structure(c(4, 3, 3, 4, 3, 4,
4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 2, 2, 2, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 2, 2, 2, 1, 1,
2, 4, 2, 2, 2, 1, 2, 1, 4, 2, 1, 4, 2, 3, 3, 2, 3, 2, 3, 2, 3,
2, 2, 3, 3, 3, 3, 3, 3, NA, 3, 3, 4, 2, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 2, 2, 3, 3, 3, 2, 3, NA), label = "Trust: People you meet for the first time (B)", format.spss = "F3.0", labels = c(`SE:Inapplicable ; RU:Inappropriate response; HT: Dropped out` = -5,
`Not asked` = -4, `Not applicable` = -3, `No answer` = -2, `Don<U+00B4>t know` = -1,
`Trust completely` = 1, `Trust somewhat` = 2, `Do not trust very much` = 3,
`Do not trust at all` = 4), class = c("haven_labelled", "vctrs_vctr",
"double")), V106 = structure(c(3, 2, NA, 4, 2, 4, 4, 3, 3, 4,
3, 3, 4, 4, 4, 4, NA, NA, NA, NA, 3, 2, 2, 2, 2, 2, 2, 3, 3,
3, 3, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 2, 2, 2, 1, 1, 2, 2,
2, 2, 1, 1, 2, 1, 4, 2, 1, 4, 2, 3, 3, 2, 2, 2, 3, 2, 3, 2, 2,
NA, 3, NA, 3, 3, 3, 2, 3, 3, 4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 2, 2, 2, 3, 2, 2, 2, 3), label = "Trust: People of another religion (B)", format.spss = "F3.0", labels = c(`DE,SE:Inapplicable ; RU:Inappropriate response; HT: Dropped` = -5,
`Not asked` = -4, `Not applicable` = -3, `No answer` = -2, `Don<U+00B4>t know` = -1,
`Trust completely` = 1, `Trust somewhat` = 2, `Do not trust very much` = 3,
`Do not trust at all` = 4), class = c("haven_labelled", "vctrs_vctr",
"double")), V107 = structure(c(3, 4, NA, 4, 2, 4, 4, 3, 3, 4,
3, 3, 4, 4, 4, 4, 3, 2, NA, NA, 3, 2, 2, 2, 2, 2, 2, 3, 3, 3,
3, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 1, 2, 2, 2, 2, 1, 1, 2, 1, 2,
2, 1, 1, 2, 1, 4, 2, 1, 3, 2, 3, 2, 2, 2, 2, 3, 2, 3, 2, 2, NA,
3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 3, 2, 3, 2, 2, 2, 3), label = "Trust: People of another nationality (B)", format.spss = "F3.0", labels = c(`DE,SE:Inapplicable ; RU:Inappropriate response; HT: Dropped` = -5,
`Not asked` = -4, `Not applicable` = -3, `No answer` = -2, `Don<U+00B4>t know` = -1,
`Trust completely` = 1, `Trust somewhat` = 2, `Do not trust very much` = 3,
`Do not trust at all` = 4), class = c("haven_labelled", "vctrs_vctr",
"double")), V248 = structure(c(9, 8, 5, 8, 8, 8, 8, 9, 7, 9,
9, 5, 5, 6, 5, 5, 5, 5, 5, 4, 9, 9, 4, 9, 9, 3, 6, 9, 8, 9, 9,
9, NA, 9, 5, 9, 5, 7, 9, 5, 5, 9, 9, 8, 9, 9, 5, 5, 5, 9, 9,
8, 5, 8, 9, 9, 5, 8, 9, 9, 9, 7, 7, 5, 4, 6, 9, 6, 6, 9, 9, 5,
6, 7, 5, 4, 7, 7, 5, 5, 5, 5, 8, 9, 8, 9, 9, 9, 9, 9, 9, 9, 5,
9, 9, 5, 9, 8, 9, 5, 5), label = "Highest educational level attained", format.spss = "F3.0", labels = c(`AU: Inapplicable (No-school education) DE,SE:Inapplicable ;` = -5,
`Not asked` = -4, `Not applicable` = -3, `No answer` = -2, `Don<U+00B4>t know` = -1,
`No formal education` = 1, `Incomplete primary school` = 2, `Complete primary school` = 3,
`Incomplete secondary school: technical/ vocational type` = 4,
`Complete secondary school: technical/ vocational type` = 5,
`Incomplete secondary school: university-preparatory type` = 6,
`Complete secondary school: university-preparatory type` = 7,
`Some university-level education, without degree` = 8, `University - level education, with degree` = 9
), class = c("haven_labelled", "vctrs_vctr", "double")), V59 = structure(c(9,
5, 6, 8, 6, 7, NA, 8, 5, 3, 4, 7, 2, 1, 1, 6, 8, 6, NA, NA, 1,
5, NA, 6, 1, 2, 9, 5, 6, NA, NA, 3, 6, 6, 4, NA, 6, 6, NA, NA,
3, 9, 8, 10, 9, 6, 10, 9, 8, 9, 9, 10, 6, 4, 4, 6, 4, 10, 3,
3, 4, 3, 5, 4, 7, 3, 3, 4, 3, 7, 4, 6, 4, 1, 1, 6, 1, 1, 6, 1,
1, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 7, 3, 1, 5, 6, 7, 2, 4, 5
), label = "Satisfaction with financial situation of household", format.spss = "F3.0", labels = c(`HT: Dropped out survey;DE,SE:Inapplicable ; RU:Inappropriate` = -5,
`Not asked` = -4, `No answer` = -2, `Don<U+00B4>t know` = -1,
Dissatisfied = 1, `2` = 2, `3` = 3, `4` = 4, `5` = 5, `6` = 6,
`7` = 7, `8` = 8, `9` = 9, Satisfied = 10), class = c("haven_labelled",
"vctrs_vctr", "double")), V237 = structure(c(3, 2, 2, 2, NA,
1, 2, 2, 1, 2, 2, 2, 2, 3, 2, 1, 1, 3, 2, 2, NA, 2, 2, 3, 4,
2, 2, 1, NA, 1, 1, 1, NA, NA, NA, 1, NA, 1, 1, NA, 2, 1, 2, 1,
1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3,
2, 3, 2, 1, 2, 3, 2, 2, 2, NA, 2, 2, 4, 2, 2, 2, 1, 1, 2, 1,
2, 3, 2, 2, 1, 2, 2, 2, 3, 3, 2, 3, 2, 2, NA, 3), label = "Family savings during past year", format.spss = "F3.0", labels = c(`DE,SE:Inapplicable ; RU:Inappropriate response; BH: Missing;` = -5,
`Not asked` = -4, `Not applicable` = -3, `No answer` = -2, `Don<U+00B4>t know` = -1,
`Save money` = 1, `Just get by` = 2, `Spent some savings and borrowed money` = 3,
`Spent savings and borrowed money` = 4), class = c("haven_labelled",
"vctrs_vctr", "double")), V105_rec = c(1, 2, 2, 1, 2, 1, 1, 1,
1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 3, 3, 3, 4, 4, 3, 1,
3, 3, 3, 4, 3, 4, 1, 3, 4, 1, 3, 2, 2, 3, 2, 3, 2, 3, 2, 3, 3,
2, 2, 2, 2, 2, 2, NA, 2, 2, 1, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 3, 3, 2, 2, 2, 3, 2, NA), V106_rec = c(2, 3, NA, 1, 3,
1, 1, 2, 2, 1, 2, 2, 1, 1, 1, 1, NA, NA, NA, NA, 2, 3, 3, 3,
3, 3, 3, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 3, 3,
3, 4, 4, 3, 3, 3, 3, 4, 4, 3, 4, 1, 3, 4, 1, 3, 2, 2, 3, 3, 3,
2, 3, 2, 3, 3, NA, 2, NA, 2, 2, 2, 3, 2, 2, 1, 3, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 3, 3, 3, 2), V107_rec = c(2,
1, NA, 1, 3, 1, 1, 2, 2, 1, 2, 2, 1, 1, 1, 1, 2, 3, NA, NA, 2,
3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 4,
3, 3, 3, 3, 4, 4, 3, 4, 3, 3, 4, 4, 3, 4, 1, 3, 4, 2, 3, 2, 3,
3, 3, 3, 2, 3, 2, 3, 3, NA, 2, 3, 2, 2, 2, 3, 2, 2, 2, 3, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 2, 3, 2, 3, 3, 3, 2), outgroup_index = c(1.66666666666667,
2, 2, 1, 2.66666666666667, 1, 1, 1.66666666666667, 1.66666666666667,
1, 1.66666666666667, 2, 1, 1, 1, 1, 1.5, 2.5, 2, 2, 2, 3, 3,
3, 3, 3, 2.66666666666667, 2, 2, 2, 2, 1.33333333333333, 1.33333333333333,
2, 2, 2, 2, 2, 2, 2, 2, 2.66666666666667, 2, 3, 3, 3, 4, 4, 3,
2.66666666666667, 3, 3, 3.66666666666667, 4, 3, 4, 1, 3, 4, 1.33333333333333,
3, 2, 2.33333333333333, 3, 2.66666666666667, 3, 2, 3, 2, 3, 3,
2, 2, 2.5, 2, 2, 2, 3, 2, 2, 1.33333333333333, 3, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 3, 2.66666666666667, 2.66666666666667, 2,
2.66666666666667, 3, 2.66666666666667, 2), V59_rec = structure(c(5,
3, 3, 4, 3, 4, NA, 4, 3, 2, 2, 4, 1, 1, 1, 3, 4, 3, NA, NA, 1,
3, NA, 3, 1, 1, 5, 3, 3, NA, NA, 2, 3, 3, 2, NA, 3, 3, NA, NA,
2, 5, 4, 5, 5, 3, 5, 5, 4, 5, 5, 5, 3, 2, 2, 3, 2, 5, 2, 2, 2,
2, 3, 2, 4, 2, 2, 2, 2, 4, 2, 3, 2, 1, 1, 3, 1, 1, 3, 1, 1, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 4, 2, 1, 3, 3, 4, 1, 2, 3), labels = c(`Not satisfied at all` = 1,
`Rather not satisfied` = 2, `Neither satisfied, nor not satisfied` = 3,
`Rather satisfied` = 4, Satisfied = 5), class = c("haven_labelled",
"vctrs_vctr", "double")), V248_dummy = structure(c(1, 1, 0, 1,
1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1,
0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1,
0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1,
0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0), labels = c(`A university education and higher` = 1,
`No university education` = 0), class = c("haven_labelled", "vctrs_vctr",
"double")), V237_rec = structure(c(3, 2, 2, 2, NA, 1, 2, 2, 1,
2, 2, 2, 2, 3, 2, 1, 1, 3, 2, 2, NA, 2, 2, 3, 3, 2, 2, 1, NA,
1, 1, 1, NA, NA, NA, 1, NA, 1, 1, NA, 2, 1, 2, 1, 1, 1, 1, 1,
1, 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 2, 3, 2, 1,
2, 3, 2, 2, 2, NA, 2, 2, 3, 2, 2, 2, 1, 1, 2, 1, 2, 3, 2, 2,
1, 2, 2, 2, 3, 3, 2, 3, 2, 2, NA, 3), labels = c(`Save money` = 1,
`Just get by` = 2, `Spent savings and borrowed money` = 3), class = c("haven_labelled",
"vctrs_vctr", "double"))), row.names = c(NA, -101L), class = c("tbl_df",
"tbl", "data.frame"), label = "filelabel")
A bit unintuitive, but ggplot2 has the functionality you are looking for.
filtered_df %>%
mutate(index_recoded = ggplot2::cut_interval(outgroup_index, 3))
And to verify the levels are the same:
# smaller dput would be nice
start <- Data
all(
{
filtered_df <- start
tertiles <- quantile(filtered_df$outgroup_index, c(0:3) / 3)
filtered_df$index_recoded <- with(
filtered_df,
cut(outgroup_index,
tertiles,
include.lowest = T)
)
filtered_df$index_recoded
} == {
tv_df <- start
tv_df %>%
mutate(index_recoded = ggplot2::cut_interval(outgroup_index, 3)) %>%
pull(index_recoded)
}
)
[1] TRUE
cut has a simpler syntax if you want to divide the data into fixed intervals.
filtered_df$index_recoded <- cut(filtered_df$outgroup_index, 3)
You can also use it with labels = FALSE to get 1, 2 and 3 as output.
filtered_df$index_recoded <- cut(filtered_df$outgroup_index, 3, labels = FALSE)

Bootstrapping multiple regression error: number of items to replace is not a multiple of replacement length

I want to bootstrap my dataset for multiple regression. Unfortunately I get this error message:
"number of items to replace is not a multiple of replacement length"
I suspect that the factors in my regression formula may be problematic.
What could I do to solve my problem?
My code is as following (I read Andy FieldĀ“s Discovering Statistics using R):
BootReg <- function(data, indices, formula) {
d <- data[indices,]
fit <- lm(formula, data=d)
return(coef(fit))
}
bootResults <-boot(statistic = BootReg, formula = TICS_Skala1 ~HSPhoch + HSPhoch*extra.c
+ psy + sex + age.c, data = mod.reg.data, R = 2000)
psy (psychiatric disease), sex and HSPhoch (high sensory-processing sensitivity) are factors. TICS_Skala1, extra.c, age.c are continuos variables.
my sample data:
> dput(head(mod.reg.data, 20))
structure(list(neo_01 = c(3, 4, 3, 0, 4, 4, 3, 2, 3, 1, 4, 2,
3, 3, 1, 2, 3, 4, 0, 2), neo_03 = c(1, 1, 1, 3, 1, 2, 0, 0, 0,
0, 0, 0, 1, 3, 1, 1, 1, 1, 3, 1), neo_04 = c(2, 4, 3, 0, 4, 3,
4, 3, 2, 3, 3, 3, 3, 4, 2, 4, 3, 4, 3, 3), neo_08 = c(3, 0, 1,
2, 3, 3, 4, 3, 2, 1, 2, 4, 0, 3, 1, 1, 3, 1, 3, 1), neo_12 = c(3,
1, 1, 2, 2, 2, 4, 1, 1, 2, 1, 4, 1, 3, 1, 1, 3, 2, 3, 2), neo_13 = c(3,
2, 2, 4, 3, 3, 3, 2, 2, 1, 2, 3, 0, 3, 1, 0, 2, 3, 0, 2), neo_16 = c(3,
1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 3, 0, 2, 0, 0, 0, 0, 2, 1), neo_17 = c(2,
1, 3, 0, 1, 1, 1, 4, 3, 1, 2, 2, 2, 3, 1, 0, 2, 0, 2, 2), neo_18 = c(2,
3, 4, 0, 4, 3, 4, 3, 3, 1, 3, 2, 4, 2, 3, 4, 3, 4, 2, 2), neo_21 = c(3,
0, 1, 2, 1, 2, 1, 1, 1, 1, 1, 3, 0, 4, 1, 0, 0, 0, 4, 1), neo_26 = c(3,
0, 0, 0, 2, 1, 3, 0, 1, 1, 0, 2, 3, 3, 0, 0, 1, 1, 4, 1), neo_27 = c(3,
3, 4, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 4, 3, 3, 3, 3, 2, 2), TICS_1 = c(3,
0, 3, 2, 2, 1, 3, 3, 1, 2, 0, 4, 2, 3, 2, 3, 4, 1, 3, 2), TICS_2 = c(3,
1, 1, 1, 1, 2, 0, 0, 0, 0, 0, 4, 3, 1, 1, 1, 2, 1, 2, 1), TICS_3 = c(2,
1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 3, 1, 2, 0, 1, 1, 0, 1, 0), TICS_4 = c(2,
0, 2, 0, 1, 2, 1, 3, 0, 0, 0, 4, 1, 2, 1, 2, 1, 1, 2, 2), TICS_5 = c(2,
3, 2, 1, 2, 2, 2, 2, 0, 2, 1, 2, 2, 2, 2, 1, 1, 1, 2, 1), TICS_6 = c(3,
2, 2, 4, 2, 2, 1, 3, 1, 1, 1, 2, 2, 2, 2, 1, 1, 2, 1, 2), TICS_7 = c(3,
3, 2, 2, 2, 2, 0, 3, 1, 2, 1, 4, 2, 0, 2, 1, 4, 1, 0, 1), TICS_8 =c(NA,
NA, NA, NA, NA, NA, NA, NA, 1, 1, 0, 4, 3, 1, 1, 3, 3, 2, 1,
2), TICS_9 = c(NA, NA, NA, NA, NA, NA, NA, NA, 0, 3, 2, 2, 1,
3, 0, 1, 3, 1, 1, 2), TICS_10 = c(2, 2, 0, 0, 2, 3, 0, 2, 1,
1, 2, 2, 1, 0, 0, 1, 1, 2, 2, 1), TICS_11 = c(1, 2, 1, 0, 1,
1, 0, 0, 0, 0, 2, 4, 1, 0, 0, 0, 0, 1, 1, 0), TICS_12 = c(2,
2, 1, 0, 1, 1, 1, 3, 1, 1, 1, 4, 2, 2, 2, 3, 3, 1, 2, 3), TICS_13=
c(1, 1, 3, 0, 2, 3, 2, 1, 1, 2, 1, 2, 2, 3, 2, 2, 1, 2, 2, 2),
TICS_14= c(4, 1, 1, 0, 1, 1, 3, 4, 0, 2, 0, 4, 2, 3, 0, 1, 3, 1, 1,
1), TICS_15= c(3, 1, 1, 3, 0, 2, 0, 2, 0, 2, 1, 2, 0, 1, 1, 1, 0, 0,
0, 1), ICS_16= c(4, 2, 1, 3, 3, 2, 1, 2, 1, 1, 1, 3, 1, 3, 1, 2, 3,
1, 2, 1), TICS_17= c(3, 0, 2, 2, 1, 2, 2, 3, 0, 1, 1, 2, 1, 2, 2, 3,
1, 1, 1, 2), TICS_18= c(3, 0, 1, 2, 0, 1, 1, 0, 0, 1, 0, 4, 2, 2, 0,
0, 1, 0, 2, 0), TICS_19= c(4, 2, 2, 2, 2, 2, 0, 2, 1, 2, 1, 4, 3, 2,
1, 1, 1, 0, 1, 2), TICS_20= c(2, 0, 2, 0, 0, 0, 1, 0, 1, 1, 0, 4, 1,
1, 0, 0, 1, 0, 2, 0), TICS_21= c(2, 1, 1, 0, 2, 3, 0, 1, 0, 1, 3, 2,
2, 1, 2, 1, 1, 1, 3, 0), TICS_22= c(3, 0, 1, 2, 2, 3, 1, 4, 0, 1, 1,
2, 3, 1, 1, 2, 3, 2, 0, 3), TICS_24= c(2, 0, 0, 1, 0, 0, 2, 0, 1, 1,
0, 2, 0, 0, 0, 1, 1, 0, 0, 1), TICS_25= c(4, 0, 1, 2, 2, 2, 4, 2, 1,
1, 0, 3, 0, 2, 0, 1, 2, 1, 2, 1), TICS_26= c(3, 0, 2, 2, 0, 1, 1, 0,
0, 1, 0, 2, 0, 2, 0, 0, 0, 0, 0, 1), TICS_27= c(3,
1, 4, 2, 3, 3, 4, 4, 0, 1, 0, 3, 2, 3, 2, 3, 2, 2, 4, 3), TICS_28=
c(3, 2, 2, 1, 1, 2, 1, 2, 1, 1, 0, 4, 1, 2, 1, 0, 1, 0, 0, 2),
TICS_29= c(2, 0, 1, 0, 2, 2, 1, 0, 1, 0, 0, 4, 1, 1, 0, 1, 0, 0, 1,
1), TICS_30= c(2, 1, 3, 1, 2, 2, 1, 0, 1, 1, 1, 3, 2, 0, 1, 0, 1, 2,
2, 2), TICS_31= c(2, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 3, 2, 1, 0, 0, 1,
0, 2, 1), TICS_32= c(4, 1, 1, 0, 1, 2, 1, 4, 0, 3, 0, 3, 3, 2, 1, 2,
2, 2, 3, 3), TICS_33= c(2,
1, 0, 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 2, 0, 0, 0, 1, 1, 1), TICS_34=
c(1, 3, 0, 0, 2, 1, 1, 1, 0, 0, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0),
TICS_35= c(1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 2, 0, 1, 0, 1, 1, 0, 4,
1), TICS_36= c(4, 1, 2, 3, 3, 2, 4, 1, 0, 1, 2, 3, 1, 3, 0, 1, 1, 0,
2, 1), TICS_37= c(1, 1, 2, 0, 2, 3, 3, 0, 1, 2, 1, 2, 1, 0, 2, 2, 1,
1, 2, 1), TICS_38= c(3, 0, 3, 1, 2, 2, 2, 3, 0, 2, 0, 4, 0, 2, 1, 2,
2, 1, 1, 2), TICS_39= c(1, 1, 2, 2, 3, 1, 1, 2, 1, 1, 1, 4, 1, 1, 1,
1, 3, 0, 0, 3), TICS_40= c(2, 0, 2, 0, 3, 2, 1, 2, 0, 0, 0, 3, 2, 2,
0, 1, 2, 0, 0, 1), TICS_41= c(2, 2, 0, 0, 2, 3, 1, 1, 0, 1, 3, 1, 2,
0, 1, 0, 0, 1, 2, 0), TICS_42= c(1, 2, 0, 0, 2, 1, 0, 0, 0, 1, 1, 2,
1, 1, 1, 0, 0, 0, 0, 0), TICS_43= c(4,
1, 1, 2, 2, 3, 3, 3, 0, 2, 1, 4, 3, 2, 1, 1, 3, 1, 2, 3), TICS_44=
c(3, 0, 2, 1, 2, 2, 3, 3, 0, 1, 0, 4, 1, 3, 0, 2, 2, 1, 3, 1),
TICS_45= c(2,
0, 1, 2, 0, 1, 0, 2, 0, 1, 0, 2, 0, 2, 0, 0, 0, 0, 0, 1), TICS_46=
c(2, 1, 0, 1, 2, 2, 1, 0, 0, 3, 1, 4, 3, 1, 1, 0, 1, 1, 2, 1),
TICS_47= c(3,
1, 2, 1, 2, 2, 1, 1, 1, 2, 0, 3, 1, 2, 1, 2, 1, 1, 4, 1), TICS_48=
c(1,
2, 3, 1, 2, 3, 1, 1, 0, 2, 2, 4, 2, 3, 2, 2, 1, 0, 2, 0), TICS_49=
c(1,
3, 2, 2, 1, 2, 2, 1, 0, 1, 1, 4, 3, 0, 1, 2, 4, 1, 0, 3), TICS_50=
c(3,
0, 3, 1, 1, 2, 4, 3, 0, 2, 0, 4, 2, 3, 2, 2, 2, 2, 2, 3), TICS_51=
c(1,
2, 0, 0, 2, 1, 0, 0, 0, 0, 1, 2, 1, 0, 1, 0, 0, 0, 0, 0), TICS_52=
c(2,
1, 3, 0, 1, 1, 1, 1, 0, 1, 0, 2, 0, 3, 0, 0, 0, 0, 0, 1), TICS_53=
c(2,
2, 2, 0, 2, 3, 1, 1, 0, 2, 2, 3, 2, 2, 2, 1, 1, 1, 2, 1), TICS_54=
c(3,
0, 3, 2, 2, 2, 3, 3, 1, 2, 0, 4, 0, 2, 0, 2, 2, 0, 2, 1), TICS_55=
c(2,
0, 0, 1, 0, 1, 2, 0, 0, 1, 0, 4, 0, 1, 0, 1, 1, 0, 2, 0), TICS_56=
c(4,
3, 1, 0, 2, 0, 0, 0, 1, 0, 1, 2, 1, 1, 1, 0, 0, 0, 2, 0), TICS_57=
c(2,
1, 1, 0, 2, 1, 0, 0, 1, 1, 1, 4, 3, 0, 0, 1, 1, 0, 0, 2), HSPS_1 =
c(3,
4, 3, 3, 4, 2, 4, 2, 4, 2, 3, 4, 2, 2, 4, 2, 3, 3, 5, 2), HSPS_2 =
c(4,
4, 3, 5, 5, 3, 2, 4, 5, 5, 3, 4, 3, 4, 4, 2, 4, 3, 4, 3), HSPS_3 =
c(4,
4, 4, 3, 3, 4, 3, 3, 3, 3, 3, 5, 3, 4, 5, 3, 3, 3, 4, 2), HSPS_4 =
c(4,
2, 1, 4, 2, 3, 5, 3, 5, 2, 3, 3, 3, 4, 3, 3, 4, 2, 5, 2), HSPS_5 =
c(2,
2, 2, 4, 3, 3, 3, 1, 4, 3, 3, 4, 3, 2, 4, 3, 4, 3, 5, 1), HSPS_6 =
c(4,
3, 1, 3, 4, 3, 3, 3, 3, 2, 1, 1, 1, 3, 5, 3, 3, 1, 1, 2), HSPS_7 =
c(4,
3, 1, 3, 4, 2, 3, 1, 4, 3, 2, 4, 1, 1, 5, 3, 3, 1, 5, 1), HSPS_8 =
c(4,
3, 5, 5, 4, 5, 5, 3, 4, 4, 3, 3, 2, 4, 4, 3, 4, 3, 3, 3), HSPS_9 =
c(3,
2, 2, 5, 3, 3, 4, 1, 5, 2, 2, 4, 1, 2, 4, 4, 3, 1, 5, 2), HSPS_10=
c(4,
4, 5, 4, 4, 4, 3, 1, 4, 3, 3, 4, 2, 1, 5, 3, 4, 4, 3, 2), HSPS_11=
c(3,
2, 2, 3, 2, 2, 3, 1, 3, 2, 4, 5, 1, 3, 3, 3, 3, 2, 3, 2), HSPS_12=
c(4,
4, 5, 5, 4, 5, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 4, 4, 5, 4), HSPS_13=
c(3,
2, 3, 2, 2, 2, 5, 2, 3, 2, 3, 4, 3, 3, 3, 3, 4, 2, 5, 2), HSPS_14=
c(3,
2, 2, 3, 3, 3, 5, 3, 3, 2, 3, 3, 2, 3, 2, 3, 3, 2, 4, 2), HSPS_15=
c(4,
4, 2, 3, 4, 3, 3, 3, 4, 2, 3, 3, 5, 2, 4, 2, 3, 3, 3, 2), HSPS_16=
c(2,
2, 1, 5, 2, 3, 2, 2, 3, 3, 3, 5, 2, 3, 3, 3, 2, 2, 5, 2), HSPS_17=
c(4,
3, 4, 5, 3, 4, 4, 2, 4, 3, 5, 4, 4, 4, 5, 4, 5, 2, 5, 4), HSPS_18=
c(2,
2, 1, 2, 1, 2, 2, 1, 3, 2, 2, 5, 2, 1, 4, 3, 2, 1, 5, 1), HSPS_19=
c(3,
2, 2, 4, 2, 2, 3, 1, 4, 2, 2, 4, 1, 1, 4, 3, 2, 2, 5, 2), HSPS_20=
c(4,
4, 4, 3, 4, 3, 5, 3, 3, 3, 4, 3, 3, 4, 4, 3, 5, 3, 5, 2), HSPS_21=
c(3,
3, 4, 5, 3, 3, 5, 2, 4, 2, 3, 5, 4, 4, 3, 2, 3, 2, 5, 2), HSPS_22=
c(3,
5, 5, 4, 5, 4, 3, 2, 4, 3, 3, 5, 3, 2, 4, 2, 4, 3, 5, 2), HSPS_23=
c(2,
2, 1, 4, 2, 3, 4, 3, 3, 2, 2, 5, 3, 3, 3, 3, 3, 2, 5, 3), HSPS_24=
c(3,
2, 2, 3, 3, 3, 3, 2, 4, 2, 3, 5, 4, 2, 4, 4, 4, 3, 4, 2), HSPS_25=
c(3,
2, 2, 5, 3, 3, 5, 1, 4, 2, 3, 5, 3, 2, 4, 3, 3, 2, 5, 2), HSPS_26=
c(2,
1, 1, 3, 3, 3, 3, 2, 3, 2, 2, 5, 2, 2, 3, 3, 3, 2, 5, 2), HSPS_27=
c(2,
2, 1, 4, 3, 2, 3, 4, 3, 1, 4, 1, 1, 3, 4, 2, 3, 2, 5, 3), sex =
structure(c(2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L), .Label = c("m", "w", "d"), class = "factor"), Bildung =
structure(c(6L,
5L, 5L, 6L, 6L, 6L, 5L, 6L, 5L, 6L, 6L, 4L, 6L, 5L, 5L, 6L, 6L,
5L, 5L, 6L), .Label = c("kein", "Haupt", "mittlereR", "Fachabi",
"Abi", "Studium"), class = "factor"), job = structure(c(6L, 2L,
2L, 2L, 2L, 6L, 2L, 6L, 5L, 2L, 2L, 1L, 6L, 2L, 2L, 2L, 6L, 2L,
2L, 6L), .Label = c("hausl", "Student", "Azubi", "Suchend", "Rente",
"berufstaetig"), class = "factor"), age = c(23, 24, 21, 70, 25,
29, 22, 25, 57, 24, 25, 30, 31, 20, 28, 27, 26, 21, 24, 53),
VPN = 1:20, consent = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label =
c("ja",
"nein"), class = "factor"), psy = c(0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0), HSPS = c(86, 75,
69, 102, 85, 82, 97, 59, 100, 68, 80, 106, 68, 73, 105, 79,
91, 63, 119, 59), neuro = c(16, 3, 4, 10, 10, 11, 12, 5,
5, 5, 5, 16, 5, 18, 4, 3, 8, 5, 19, 7), extra = c(15, 17,
19, 7, 19, 17, 18, 17, 16, 10, 17, 14, 15, 19, 11, 13, 16,
18, 9, 13), TICS_Skala1 = c(23, 1, 22, 11, 14, 16, 22, 25,
2, 11, 1, 29, 9, 20, 10, 19, 16, 9, 18, 16), TICS_Skala2 = c(14,
12, 11, 9, 11, 10, 4, 10, 5, 8, 5, 24, 13, 5, 6, 6, 14, 2,
1, 13), TICS_Skala3 = c(21, 6, 10, 5, 12, 14, 11, 20, 3,
11, 4, 27, 20, 13, 7, 13, 20, 11, 11, 18), TICS_Skala4 = c(13,
14, 13, 2, 16, 23, 10, 9, 3, 13, 15, 18, 14, 11, 13, 10,
7, 9, 17, 6), TICS_Skala5 = c(12, 2, 6, 5, 3, 5, 8, 3, 4,
6, 0, 18, 3, 7, 1, 6, 6, 1, 13, 3), TICS_Skala6 = c(10, 2,
3, 4, 4, 6, 3, 0, 0, 5, 2, 15, 10, 5, 2, 1, 5, 2, 8, 3),
TICS_Skala7 = c(15, 5, 9, 13, 4, 8, 4, 9, 1, 6, 2, 11, 2,
12, 3, 2, 1, 3, 2, 7), TICS_Skala8 = c(8, 10, 3, 0, 11, 7,
2, 1, 2, 2, 7, 20, 7, 2, 2, 2, 1, 1, 2, 3), TICS_Skala9 = c(12,
3, 4, 8, 8, 6, 9, 5, 2, 6, 5, 11, 3, 11, 1, 5, 9, 3, 7, 5
), TICS_Skala10 = c(32, 5, 18, 16, 19, 18, 21, 16, 5, 17,
7, 39, 12, 24, 3, 15, 20, 6, 25, 14), neuro.c = c(6.08921933085502,
-6.91078066914498, -5.91078066914498, 0.089219330855018,
0.089219330855018, 1.08921933085502, 2.08921933085502,
-4.91078066914498,
-4.91078066914498, -4.91078066914498, -4.91078066914498,
6.08921933085502, -4.91078066914498, 8.08921933085502,
-5.91078066914498,
-6.91078066914498, -1.91078066914498, -4.91078066914498,
9.08921933085502, -2.91078066914498), extra.c = c(5.21003717472119,
7.21003717472119, 9.21003717472119, -2.78996282527881,
9.21003717472119,
7.21003717472119, 8.21003717472119, 7.21003717472119,
6.21003717472119,
0.21003717472119, 7.21003717472119, 4.21003717472119,
5.21003717472119,
9.21003717472119, 1.21003717472119, 3.21003717472119,
6.21003717472119,
8.21003717472119, -0.78996282527881, 3.21003717472119), age.c =
c(-15.4460966542751,
-14.4460966542751, -17.4460966542751, 31.5539033457249,
-13.4460966542751,
-9.4460966542751, -16.4460966542751, -13.4460966542751,
18.5539033457249,
-14.4460966542751, -13.4460966542751, -8.4460966542751,
-7.4460966542751,
-18.4460966542751, -10.4460966542751, -11.4460966542751,
-12.4460966542751, -17.4460966542751, -14.4460966542751,
14.5539033457249), HSP.c = c(-1.92936802973978, -12.9293680297398,
-18.9293680297398, 14.0706319702602, -2.92936802973978,
-5.92936802973978,
9.07063197026022, -28.9293680297398, 12.0706319702602,
-19.9293680297398,
-7.92936802973978, 18.0706319702602, -19.9293680297398,
-14.9293680297398,
17.0706319702602, -8.92936802973978, 3.07063197026022,
-24.9293680297398,
31.0706319702602, -28.9293680297398), HSPhoch = c(1, 0, 0,
1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0)), row.names =
c(NA, 20L), class = "data.frame")

How to efficiently specify a large predictor matrix for stan data block

I would appreciate any help to create a large predictor matrix for stan data block.
I want to use variables w_1 to w_K from the data below as predictor "matrix" real<lower=0> weights[N, W]; in my model. K=W is the number of variables weights (columns of weights), N is the number of observation (rows of weights), so K and N are int.
my current approach below works for a few columns (e.g., K=10) but I have more, K>100 columns, therefore, given the data below, I need a function that provides an efficient and scalable way to do this:
#for the desired data block
dat1 <- list (N = N,
ncases = ncases, A = A, B = B, id = id, P = imput,
nn = nn, W = 10,
weights = cbind(w_1, w_2, w_3, w_4, w_5, w_6, w_7, w_8, w_9, w_10))
I explored compose_data from tidybayes but I fail to see how I could use that to accomplish what I want for desired data block. Therefore, Any help would be much appreciated.
#sample data
dat <- data.frame(
id = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4),
imput = c(1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5),
A = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
B = c(1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0),
Pass = c(278, 278, 278, 278, 278, 100, 100, 100, 100, 100, 153, 153, 153, 153, 153, 79, 79, 79, 79, 79),
Fail = c(740, 743, 742, 743, 740, 7581, 7581, 7581, 7581, 7581, 1231, 1232, 1235, 1235, 1232, 1731, 1732, 1731, 1731, 1731),
W_1= c(4, 3, 4, 3, 3, 1, 2, 1, 2, 1, 12, 12, 11, 12, 12, 3, 5, 3, 3, 3),
W_2= c(3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 12, 12, 12, 12, 12, 3, 3, 3, 3, 3),
W_3= c(4, 3, 3, 3, 3, 1, 2, 1, 1, 1, 12, 12, 11, 12, 12, 3, 3, 3, 3, 3),
W_4= c(3, 3, 4, 3, 3, 1, 1, 1, 2, 1, 12, 12, 13, 12, 12, 3, 2, 3, 3, 3),
W_5= c(3, 3, 3, 3, 3, 1, 0, 1, 1, 1, 12, 12, 12, 12, 12, 3, 3, 3, 3, 3),
W_6= c(4, 3, 3, 3, 3, 1, 1, 1, 1, 1, 12, 12, 12, 12, 12, 3, 3, 3, 3, 3),
W_7= c(3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 12, 12, 12, 12, 12, 3, 3, 3, 3, 3),
W_8= c(3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 15, 12, 12, 12, 12, 3, 3, 3, 3, 3),
W_9= c(3, 3, 3, 4, 3, 1, 1, 1, 1, 1, 12, 12, 12, 12, 12, 2, 3, 3, 3, 3),
W_10= c(3, 3, 4, 3, 3, 1, 1, 1, 1, 1, 12, 10, 12, 12, 12, 3, 3, 3, 3, 3)
)
#my current approach
N <- nrow(dat)
ncases <- dat$Pass
nn <- dat$Fail + dat$Pass
A <- dat$A
B <- dat$B
id <- dat$id
imput <- dat$imput
w_1 <- dat$W_1
w_2 <- dat$W_2
w_3 <- dat$W_3
w_4 <- dat$W_4
w_5 <- dat$W_5
w_6 <- dat$W_6
w_7 <- dat$W_7
w_8 <- dat$W_8
w_9 <- dat$W_9
w_10 <- dat$W_10
#for current data block
dat_list <-dat %>%compose_data(.n_name = n_prefix("N"))
#for desired data block
dat1 <- list (N = N,
ncases = ncases, A = A, B = B, id = id, P = imput, nn = nn, W = 10,
weights = cbind(w_1, w_2, w_3, w_4, w_5, w_6, w_7, w_8, w_9, w_10))
#current data block
data{
int N; // number of observations
int ncases[N];
int A[N];
int B[N];
int nn[N];
int id[N];
real<lower=0> w_1[N]; // variable w_1
real<lower=0> w_2[N]; // variable w_2
real<lower=0> w_3[N]; // variable w_3
real<lower=0> w_4[N]; // variable w_4
real<lower=0> w_5[N]; // variable w_5
real<lower=0> w_6[N]; // variable w_6
real<lower=0> w_7[N]; // variable w_7
real<lower=0> w_8[N]; // variable w_8
real<lower=0> w_9[N]; // variable w_9
real<lower=0> w_10[N]; // variable w_10
}
#desired data block
data{
int N; // number of observations
int ncases[N];
int A[N];
int B[N];
int nn[N];
int id[N];
real<lower=0> weights[N, W]; // N by W block of weights
}
This question has also been posted here. Thanks in advance for any help.
If all the predictor columns in dat start with W_, then I think this should do the trick:
w.matrix = as.matrix(dat[,grepl("^W_", colnames(dat))])
dat1 <- list (N = N, ncases = ncases, A = A, B = B, id = id, P = imput, nn = nn,
W = ncol(w.matrix), weights = w.matrix)

Select Input in ggplot Shiny dashboard - Error: object not found

I am trying to create ggplot output using R Markdown Shiny Document. I need it to plot data based on the selection in a dropdown menu. My code:
df<- data.frame(df,out)
renderRpivotTable({
rpivotTable(data = df, rows = c("organisationunitname","X2"), cols = "X1", vals = "value",
aggregatorName = "List Unique Values",inclusions = list(organisationunitname=list("All OUs")),
rendererName = "Lab Table", width = "100%", height = "500px") })
orgunit <- c("Cy3L", "Yieu", "j9ao", "H3LY", "U3nd",
"qU1l", "jXVh", "dXHb", "tCq8", "Blee", "5jra", "qO2V", "Qa9J",
"2XIy", "MJpY", "tNKa", "UorU", "7pZt", "Mxsz", "WCkd", "BiDp",
"Zw8w", "0J7c", "9YtI", "TAkB", "py3Q", "RdQt", "Yhv1", "PB0X",
"H3L4", "INY7", "DpTW", "3zXP", "OqpO", "tiZU", "5wnz")
inputPanel(selectInput("OU", label = "Select OU:", choices = orgunit, selected = "All OUs"))
renderPlot({
df1=reactive({return(df[organisationunitname %in% as.character(input$OU)])})
ggplot(data = df1(),aes(x=X1,y=value))+geom_bar(stat = "identity")+facet_grid(X2~.)
})
It gives me this error: object 'organisationunitname' not found
Error Message
My data:
structure(list(country = c("Cy3L", "Yieu", "j9ao", "H3LY", "U3nd",
"qU1l", "jXVh", "dXHb", "tCq8", "Blee", "5jra", "qO2V", "Qa9J",
"2XIy", "MJpY", "tNKa", "UorU", "7pZt", "Mxsz", "WCkd", "BiDp",
"Zw8w", "0J7c", "9YtI", "TAkB", "py3Q", "RdQt", "Yhv1", "PB0X",
"H3L4", "INY7", "DpTW", "3zXP", "OqpO", "tiZU", "5wnz"), cd4_perform_result = structure(c(24L,
6L, 7L, 1L, 1L, 1L, 5L, 3L, 2L, 1L, 10L, 1L, 2L, 8L, 1L, 2L,
17L, 1L, 1L, 23L, 12L, 1L, 14L, 11L, 18L, 1L, 21L, 16L, 1L, 22L,
19L, 4L, 1L, 15L, 20L, 9L), .Label = c("0", "1", "11", "125",
"130", "14", "15", "194", "24", "261", "27", "31", "3442", "370",
"4", "5", "51", "567", "577", "73", "76", "79", "796", "9", "end"
), class = "factor"), cd4_participate_result = c(1, 8, 8, 1,
1, 1, 5, 3, 2, 1, 7, 1, 2, 9, 1, 2, 17, 1, 1, 18, 12, 1, 4, 15,
14, 1, 20, 16, 1, 21, 10, 6, 1, 19, 13, 3), cd4_pass_result = c(1,
4, 19, 1, 1, 1, 5, 3, 2, 1, 21, 1, 2, 20, 1, 2, 13, 1, 1, 14,
6, 1, 11, 12, 10, 1, 18, 2, 1, 16, 7, 17, 1, 15, 9, 3), eid_perform_result = c(2,
1, 7, 1, 1, 1, 1, 9, 1, 1, 8, 1, 2, 3, 5, 2, 5, 1, 1, 10, 5,
1, 4, 2, 11, 1, 5, 1, 1, 5, 9, 2, 1, 1, 9, 5), eid_participate_result = c(2,
1, 5, 1, 1, 1, 1, 8, 1, 1, 7, 1, 2, 10, 5, 2, 5, 1, 1, 4, 2,
1, 10, 2, 9, 1, 5, 1, 1, 5, 7, 2, 1, 1, 6, 5), eid_pass_result = c(2,
1, 5, 1, 1, 1, 1, 7, 1, 1, 6, 1, 2, 10, 1, 2, 5, 1, 1, 4, 2,
1, 9, 2, 8, 1, 5, 1, 1, 5, 6, 2, 1, 1, 5, 5), vl_perform_result = c(2,
1, 3, 1, 1, 1, 1, 9, 1, 1, 10, 1, 2, 11, 5, 2, 5, 1, 1, 6, 5,
1, 8, 7, 6, 1, 12, 1, 1, 5, 9, 2, 1, 1, 8, 5), vl_participate_result = c(2,
1, 7, 1, 1, 1, 1, 7, 1, 1, 8, 1, 2, 8, 4, 2, 4, 1, 1, 5, 2, 1,
4, 6, 3, 1, 9, 1, 1, 4, 7, 2, 1, 1, 6, 1), vl_pass_result = c(2,
1, 7, 1, 1, 1, 1, 7, 1, 1, 9, 1, 2, 8, 1, 2, 5, 1, 1, 4, 2, 1,
2, 6, 3, 1, 11, 1, 1, 5, 7, 2, 1, 1, 5, 1), hiv_perform_result = c(19,
29, 14, 1, 1, 1, 26, 21, 10, 1, 6, 11, 9, 7, 20, 27, 8, 15, 1,
28, 12, 1, 25, 18, 24, 1, 22, 5, 1, 23, 17, 16, 1, 2, 3, 4),
hiv_participate_result = c(19, 28, 14, 1, 1, 1, 22, 20, 4,
1, 16, 9, 10, 3, 12, 27, 5, 1, 1, 21, 6, 1, 24, 18, 13, 1,
25, 8, 1, 23, 15, 17, 1, 2, 26, 7), hiv_pass_result = c(20,
28, 14, 1, 1, 1, 18, 22, 7, 1, 17, 27, 11, 2, 24, 26, 10,
1, 1, 15, 4, 1, 21, 19, 12, 1, 23, 8, 1, 16, 13, 9, 1, 3,
25, 6), tbafb_perform_result = c(9, 1, 8, 1, 1, 1, 1, 7,
1, 1, 6, 1, 21, 5, 1, 2, 12, 1, 1, 15, 13, 1, 17, 11, 20,
1, 10, 1, 1, 14, 16, 4, 1, 18, 3, 1), tbafb_participate_result = c(1,
1, 18, 1, 1, 1, 1, 5, 1, 1, 12, 1, 19, 11, 1, 2, 6, 1, 1,
13, 7, 1, 10, 9, 14, 1, 8, 1, 1, 16, 15, 4, 1, 18, 3, 1),
tbafb_pass_result = c(1, 1, 19, 1, 1, 1, 1, 6, 1, 1, 13,
1, 20, 11, 1, 2, 4, 1, 1, 15, 5, 1, 7, 10, 12, 1, 8, 1, 1,
16, 9, 3, 1, 14, 18, 1), tbculture_perform_result = c(3,
1, 2, 1, 1, 1, 1, 1, 1, 1, 6, 1, 3, 8, 1, 2, 2, 1, 1, 7,
3, 1, 5, 4, 7, 1, 5, 1, 1, 3, 6, 6, 1, 3, 3, 1), tbculture_participate_result = c(1,
1, 2, 1, 1, 1, 1, 1, 1, 1, 6, 1, 4, 9, 1, 2, 2, 1, 1, 8,
2, 1, 7, 5, 7, 1, 1, 1, 1, 4, 4, 6, 1, 4, 4, 1), tbculture_pass_result = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1, 4, 8, 1, 2, 2, 1, 1, 9,
2, 1, 7, 5, 6, 1, 1, 1, 1, 4, 4, 7, 1, 4, 4, 1), tbxpert_perform_result = c(1,
1, 4, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 17, 1, 8, 3, 1, 1, 5,
9, 1, 16, 7, 13, 1, 4, 1, 1, 12, 11, 1, 1, 6, 14, 10), tbxpert_participate_result = c(1,
1, 5, 1, 1, 1, 1, 1, 1, 1, 16, 1, 1, 4, 1, 12, 3, 1, 1, 2,
7, 1, 17, 9, 11, 1, 1, 1, 1, 14, 10, 1, 1, 6, 8, 13), tbxpert_pass_result = c(1,
1, 2, 1, 1, 1, 1, 1, 1, 1, 13, 1, 1, 4, 1, 9, 3, 1, 1, 15,
6, 1, 14, 8, 8, 1, 1, 1, 1, 12, 6, 1, 1, 5, 7, 10)), .Names = c("organisationunitname",
"cd4_perform_result", "cd4_participate_result", "cd4_pass_result",
"eid_perform_result", "eid_participate_result", "eid_pass_result",
"vl_perform_result", "vl_participate_result", "vl_pass_result",
"hiv_perform_result", "hiv_participate_result", "hiv_pass_result",
"tbafb_perform_result", "tbafb_participate_result", "tbafb_pass_result",
"tbculture_perform_result", "tbculture_participate_result", "tbculture_pass_result",
"tbxpert_perform_result", "tbxpert_participate_result", "tbxpert_pass_result"
), row.names = c(NA, 36L), class = "data.frame")
I am not sure why it's not reading the "organisationunitname" column. Please help.
I think your error is this line:
df1=reactive({return(df[organisationunitname %in% as.character(input$OU)])})
Change it to:
df1=df[df$organisationunitname %in% as.character(input$OU),])
You also have the incorrect number of dimensions and reactive is not required here because the expression is already in a reactive function: renderPlot.

Resources