IPTW with multiple imputations: ordinal regression analysis? - r
I am currently dealing with multiple imputations in the context of IPTW analyses. Specifically, I have proceeded as follows: for the multiple imputations I have used the mice package, then performed the weighted analyses with the survey package and then combined the results of the outcome models using the mitools package to form the final estimates.
So far, so good. For my two binary outcome variables (dead/alive & functionally dependent [mrs_0to2==no]/independent[mrs_0to2==yes]) this works fine using binary logistic regression (via svyglm() function); however, for my last ordinal outcome variable I cannot find a corresponding function in the package that would allow me to do an ordinal weighted regression with imputed data.
I figured if anyone knows, it would be here...
The code I used is:
#Load data
library(tidyverse)
library(MASS)
currentDataset <- read_csv("Data/df.csv")
#Impute missing variables
library(ipw)
library(mice)
library(mitools)
formulas <- make.formulas(currentDataset)
formulas$age =age ~ time + mrs + nihss + srr + drr + hypert + dm + afib + cbf + mism + ica + m1 + m2 + rtpa + glc + mt
formulas$time =time ~ age + mrs + nihss + srr + drr + hypert + dm + afib + cbf + mism + ica + m1 + m2 + rtpa + glc + mt
formulas$mrs =mrs ~ age + time + nihss + srr + drr + hypert + dm + afib + cbf + mism + ica + m1 + m2 + rtpa + glc + mt
formulas$nihss =nihss ~ age + time + mrs + srr + drr + hypert + dm + afib + cbf + mism + ica + m1 + m2 + rtpa + glc + mt
formulas$srr =srr ~ age + time + mrs + nihss + drr + hypert + dm + afib + cbf + mism + ica + m1 + m2 + rtpa + glc + mt
formulas$drr =drr ~ age + time + mrs + nihss + srr + hypert + dm + afib + cbf + mism + ica + m1 + m2 + rtpa + glc + mt
formulas$hypert =hypert ~ age + time + mrs + nihss + srr + drr + dm + afib + cbf + mism + ica + m1 + m2 + rtpa + glc + mt
formulas$dm =dm ~ age + time + mrs + nihss + srr + drr + hypert + afib + cbf + mism + ica + m1 + m2 + rtpa + glc + mt
formulas$afib =afib ~ age + time + mrs + nihss + srr + drr + hypert + dm + cbf + mism + ica + m1 + m2 + rtpa + glc + mt
formulas$cbf =cbf ~ age + time + mrs + nihss + srr + drr + hypert + dm + afib + mism + ica + m1 + m2 + rtpa + glc + mt
formulas$mism =mism ~ age + time + mrs + nihss + srr + drr + hypert + dm + afib + cbf + ica + m1 + m2 + rtpa + glc + mt
formulas$ica =ica ~ age + time + mrs + nihss + srr + drr + hypert + dm + afib + cbf + mism + m1 + m2 + rtpa + glc + mt
formulas$m1 =m1 ~ age + time + mrs + nihss + srr + drr + hypert + dm + afib + cbf + mism + ica + m2 + rtpa + glc + mt
formulas$m2 =m2 ~ age + time + mrs + nihss + srr + drr + hypert + dm + afib + cbf + mism + ica + m1 + rtpa + glc + mt
formulas$rtpa =rtpa ~ age + time + mrs + nihss + srr + drr + hypert + dm + afib + cbf + mism + ica + m1 + m2 + glc + mt
formulas$glc =glc ~ age + time + mrs + nihss + srr + drr + hypert + dm + afib + cbf + mism + ica + m1 + m2 + rtpa + mt
formulas$evt =mt ~ age + time + mrs + nihss + srr + drr + hypert + dm + afib + cbf + mism + ica + m1 + m2 + rtpa + glc
meth <- make.method(currentDataset)
meth["perf_type"] <- ""
meth["a1"] <- ""
meth["mrs_ord"] <- ""
meth["mrs_0to2"] <- ""
meth["death"] <- ""
meth["group1"] <- ""
imputedDataset <- parlmice(currentDataset,
method = meth,
formulas = formulas,
m = 5,
n.core = 1,
n.imp.core = 5)
plot(imputedDataset)
currentDataset <- complete(imputedDataset, action = "long", include = TRUE)
#Calculate IPTW
split_imp <- currentDataset$.imp
mi_dataList <- split(currentDataset, split_imp)
for(i in 2:length(mi_dataList)) {
weight <- ipwpoint(exposure = evt, family = "binomial", link = "logit",
numerator =~ age+time+mrs+nihss+srr+drr+hypert+dm+afib+cbf+mism+ica+m1+m2+rtpa+glc,
denominator =~ age+time+mrs+nihss+srr+drr+hypert+dm+afib+cbf+mism+ica+m1+m2+rtpa+glc,
trunc = 0.01, data = as.data.frame(mi_dataList[[i]]))
mi_dataList[[i]]$.ipw0 = weight$weights.trunc
mi_dataList[[i]]$.final_weight <- mi_dataList[[i]]$.ipw0
}
mi_dataList[[1]]$.ipw0 <- NA
mi_dataList[[1]]$.final_weight <- NA
currentDataset <- unsplit(mi_dataList, split_imp)
I have now been able to estimate a dichotomous outcome parameter as follows (e.g. death):
#### Weighted analysis: death
library(mice)
library(mitools)
library(survey)
mi_dataList <- currentDataset[currentDataset$.imp != 0,]
mi_dataList <- split(mi_dataList, mi_dataList$.imp)
mi_dataList <- imputationList(mi_dataList)
clus <- svydesign(id =~ 1, weights =~ .final_weight, data = mi_dataList)
res <- with(clus, svyglm(death ~ mt, family = binomial))
summary(MIcombine(res), alpha = 0.05,
logeffect = TRUE)
res1 <- res[[1]]
car::infIndexPlot(res1)
Where I get stuck is in the ordinal analysis. I have tried it with the polr function from MASS and the ordinal package...
library(ordinal)
mi_dataList <- currentDataset[currentDataset$.imp != 0,]
mi_dataList <- split(mi_dataList, mi_dataList$.imp)
mi_dataList <- imputationList(mi_dataList)
clus <- svydesign(id =~ 1, weights =~ .final_weight, data = mi_dataList)
res <- with(clus, polr(as.factor(mrs_ord) ~ mt))
summary(MIcombine(res), alpha = 0.05,
logeffect = TRUE)
res1 <- res[[1]]
car::infIndexPlot(res1)
Related
Problem with Lavaan not computing standard errors, the information matrix could not be converted
I am trying to run a CFA in R. The code looks like this: item.model1 <- ' Reflective =~ IES_EFPR_3 + IES_EFPR_10 + IES_EFPR_16 + IES_EFPR_17 + IES_EFPR_23 + IES_EFPR_24 + IES_EFPR_25 + IES_EFPR_26 + IES_RHSC_11 + IES_RHSC_12 + IES_RHSC_13 + IES_RHSC_35 + IES_RHSC_36 + IES_RHSC_37 + IES_BFCC_31 + IES_BFCC_32 + IES_BFCC_33 + SREBQ_A + SREBQ_B + SREBQ_C + SREBQ_D + SREBQ_E Reactive =~ BES_1 + BES_2 + BES_3 + BES_4 + BES_5 + BES_6 + BES_7 + BES_8 + BES_9 + BES_10 + BES_11 + BES_12 + BES_13 + BES_14 + BES_15 + BES_16 + PFS_1 + PFS_2 + PFS_3 + PFS_4 + PFS_5 + PFS_6 + PFS_7 + PFS_8 + PFS_9 + PFS_10 + PFS_11 + PFS_12 + PFS_13 + PFS_14 + PFS_15 + AEBQ_153 + AEBQ_155 + AEBQ_154 + AEBQ_156 + AEBQ_157 + AEBQ_146 + AEBQ_145 + AEBQ_144 + AEBQ_147 + AEBQ_148 + AEBQ_149 + AEBQ_150 + AEBQ_151 + AEBQ_152 + DEBQ_11 + DEBQ_12 + DEBQ_13 + DEBQ_14 + DEBQ_15 + DEBQ_16 + DEBQ_17 + DEBQ_18 + DEBQ_19 + DEBQ_20 + TFEQ_D_16 + TFEQ_D_25 + TFEQ_D_31 + TFEQ_D_1 + TFEQ_D_2 + TFEQ_D_7 + TFEQ_D_9 + TFEQ_D_11 + TFEQ_D_13 + TFEQ_D_15 + TFEQ_D_20 + TFEQ_D_27 + TFEQ_D_36 + TFEQ_D_45 + TFEQ_D_49 + TFEQ_D_51 + TFEQ_H_3 + TFEQ_H_5 + TFEQ_H_8 + TFEQ_H_12 + TFEQ_H_17 + TFEQ_H_19 + TFEQ_H_22 + TFEQ_H_24 + TFEQ_H_26 + TFEQ_H_29 + TFEQ_H_34 + TFEQ_H_39 + TFEQ_H_41 + TFEQ_H_47 + PNEES_1 + PNEES_2 + PNEES_4 + PNEES_6 + PNEES_7 + PNEES_8 + PNEES_11 + PNEES_12 + PNEES_13 + PNEES_15 + PNEES_16 + PNEES_18 IES.EFPR =~ IES_EFPR_3 + IES_EFPR_10 + IES_EFPR_16 + IES_EFPR_17 + IES_EFPR_23 + IES_EFPR_24 + IES_EFPR_25 + IES_EFPR_26 IES.RHSC =~ IES_RHSC_11 + IES_RHSC_12 + IES_RHSC_13 + IES_RHSC_35 + IES_RHSC_36 + IES_RHSC_37 IES.BFCC =~ IES_BFCC_31 + IES_BFCC_32 + IES_BFCC_33 SREBQ. =~ SREBQ_A + SREBQ_B + SREBQ_C + SREBQ_D + SREBQ_E BES. =~ BES_1 + BES_2 + BES_3 + BES_4 + BES_5 + BES_6 + BES_7 + BES_8 + BES_9 + BES_10 + BES_11 + BES_12 + BES_13 + BES_14 + BES_15 + BES_16 PFS. =~ PFS_1 + PFS_2 + PFS_3 + PFS_4 + PFS_5 + PFS_6 + PFS_7 + PFS_8 + PFS_9 + PFS_10 + PFS_11 + PFS_12 + PFS_13 + PFS_14 + PFS_15 AEBQ.EOE =~ AEBQ_153 + AEBQ_155 + AEBQ_154 + AEBQ_156 + AEBQ_157 AEBQ.H =~ AEBQ_146 + AEBQ_145 + AEBQ_144 + AEBQ_147 + AEBQ_148 AEBQ.FR =~ AEBQ_149 + AEBQ_150 + AEBQ_151 + AEBQ_152 DEBQ.EX =~ DEBQ_11 + DEBQ_12 + DEBQ_13 + DEBQ_14 + DEBQ_15 + DEBQ_16 + DEBQ_17 + DEBQ_18 + DEBQ_19 + DEBQ_20 TFEQ.D =~ TFEQ_D_16 + TFEQ_D_25 + TFEQ_D_31 + TFEQ_D_1 + TFEQ_D_2 + TFEQ_D_7 + TFEQ_D_9 + TFEQ_D_11 + TFEQ_D_13 + TFEQ_D_15 + TFEQ_D_20 + TFEQ_D_27 + TFEQ_D_36 + TFEQ_D_45 + TFEQ_D_49 + TFEQ_D_51 TFEQ.H =~ TFEQ_H_3 + TFEQ_H_5 + TFEQ_H_8 + TFEQ_H_12 + TFEQ_H_17 + TFEQ_H_19 + TFEQ_H_22 + TFEQ_H_24 + TFEQ_H_26 + TFEQ_H_29 + TFEQ_H_34 + TFEQ_H_39 + TFEQ_H_41 + TFEQ_H_47 PNEES.N =~ PNEES_1 + PNEES_2 + PNEES_4 + PNEES_6 + PNEES_7 + PNEES_8 + PNEES_11 + PNEES_12 + PNEES_13 + PNEES_15 + PNEES_16 + PNEES_18 ' ### calculate model item.cfa.1 <- cfa(item.model1, data = item.dat, missing="pairwise", std.lv = TRUE, ordered =ALL) summary(item.cfa.1, fit.measures=TRUE, standardized=TRUE) When I run the code I get this error message: In lav_model_vcov(lavmodel = lavmodel, lavsamplestats = lavsamplestats, : lavaan WARNING: Could not compute standard errors! The information matrix could not be inverted. This may be a symptom that the model is not identified. I understand this could be because my model is not identified. However, when I check the df's it says there are 7021 df's. I am also not sure how to test my model to see if it under identified. Any advice would be very helpful.
ggplot2 mean symbol in boxplots
I have the following data: GENDER Addressee_gender_and_age likelihood Female F20 4 Female F20 5 Male F20 3 Female F20 3 Female F20 4 Male F20 1 I am interested in getting the boxplot p = ggplot(data = melteddata, aes(x=Addressee_gender_and_age, y=likelihood)) + ggtitle("Distribution of the likelihood of complaining by gender") + theme(plot.title = element_text(hjust = 0.5)) + geom_boxplot(aes(fill=GENDER)) p + facet_wrap( ~ Addressee_gender_and_age, scales="free") + stat_summary(fun=mean, colour="darkred", geom="point", shape=18, size=3,show_guide = FALSE) The problem is the mean symbol is given for the entire wrap as follows:
The problem is that you have the wrong value set as the x axis. I first create a reproducible example (https://stackoverflow.com/help/minimal-reproducible-example), and then change aes(x=age, y=x) to aes(x=gender, y=x) In your example it would be GENDER instead of Addressee_gender_and_age Test<-data.frame(x=rnorm(40),age=rep(c(10,20,30,40,50),8), gender=rep(c("Male","Female"),20)) library(ggplot2) ggplot(data = Test, aes(x=age, y=x)) + geom_boxplot(aes(fill=gender))+ facet_wrap( ~ age, scales="free") + stat_summary(fun=mean, colour="darkred", geom="point", shape=18, size=3,show_guide = FALSE) ggplot(data = Test, aes(x=gender, y=x)) + geom_boxplot(aes(fill=gender))+ facet_wrap( ~ age, scales="free") + stat_summary(fun=mean, colour="darkred", geom="point", shape=18, size=3,show_guide = FALSE)
Why glm make an input error on this function
I'm trying to run a glm in R but it results me with an error I can't figure it out how to solve: > GLM.3 <- glm(log(Total_Pass + 1) ~ Total_Pass + Total_Buzz + dm_plant + dm_cdeagua + dm_cultivo + dm_humed + dm_bnativ + dm_snaspe + Cultivos + BosqNat + Plantac + Pastizal + Matorral + Humedal + C_agua + Sup_imper + Tie_desnud + hielo + alt_media + pend_media + Temp_media + PP_media + CA _100 + PLAND _100 + PD _100 + ED _100 + AREA_MN _100 + ENN_MN_100 + CA _210 + PLAND _210 + PD _210 + ED _210 + AREA_MN _210 + ENN_MN_210 + CA _600 + PLAND _600 + PD _600 + ED _600 + AREA_MN _600 + ENN_MN_600 + SHDI + SIDI + MSIDI + SHEI + SIEI + MSIEI, family=gaussian(identity), data=bats_araucania_500) Error: unexpected input in "Total_Pass + Total_Buzz + dm_plant + dm_cdeagua + dm_cultivo + dm_humed + dm_bnativ + dm_snaspe + Cultivos + BosqNat + Plantac + Pastizal + Matorral + Humedal + C_agua + Sup_imper + Tie_desnud" Any help is useful
R can not handle column names with space: CA _210. Try to wrap these columns between two ` (backticks) or rename your columns without spaces. FYI : If you are using all columns as predictors, you can write your code this way: glm(log(y+1) ~ . , nextargs...)
Paste not working for long strings? [closed]
Closed. This question is not reproducible or was caused by typos. It is not currently accepting answers. This question was caused by a typo or a problem that can no longer be reproduced. While similar questions may be on-topic here, this one was resolved in a way less likely to help future readers. Closed 5 years ago. Improve this question I cannot for the life of me figure out why paste with collapse="\n" won't work for me, for just this line (it works in other parts of the code). Perhaps a character limit with the paste function? (I have trimmed leading and lagging white space) Below you will notice that paste does not in fact insert \n between the two long strings: > MM [1] "F1_all =~ target\nF2_all =~ target\nF3_all =~ target\nF4_all =~ target\nF5_all =~ target\nF6_all =~ target" > regsflat [1] "F1_all ~ 1*F1_0351 + 1*F1_0354 + 1*F1_0414 + 1*F1_0415 + 1*F1_0420 + 1*F1_0430 + 1*F1_0464 + 1*F1_0484 + 1*F1_0488 + 1*F1_0496 + 1*F1_0508 + 1*F1_0517 + 1*F1_0527 + 1*F1_0592 + 1*F1_0593 + 1*F1_0596 + 1*F1_0609 + 1*F1_0640 + 1*F1_0646 + 1*F1_0647 + 1*F1_0683 + 1*F1_0686 + 1*F1_0691 + 1*F1_0696 + 1*F1_0713 + 1*F1_0715 + 1*F1_0717 + 1*F1_0757 + 1*F1_0759 + 1*F1_0764 + 1*F1_0765 + 1*F1_0771 + 1*F1_0772 + 1*F1_0775 + 1*F1_0776 + 1*F1_0778 + 1*F1_0781 + 1*F1_0793 + 1*F1_0796 + 1*F1_0797 + 1*F1_0799 + 1*F1_0842 + 1*F1_0843 + 1*F1_0845 + 1*F1_0865 + 1*F1_0879 + 1*F1_0895 + 1*F1_0936 + 1*F1_1544 + 1*F1_1545 + 1*F1_1802 + 1*F1_1803 + 1*F1_1804 + 1*F1_1805 + 1*F1_1806 + 1*F1_1807 + 1*F1_1809 + 1*F1_1815 + 1*F1_2261 + 1*F1_2262 + 1*F1_2353 + 1*F1_2354 + 1*F1_2435 + 1*F1_BBRM1WA + 1*F1_BBRM2WA + 1*F1_BUSINESSBANKWA + 1*F1_CBWACENTRAL + 1*F1_CBWASOUTH + 1*F1_R&R-WESTCOAST\nF2_all ~ 1*F2_0351 + 1*F2_0354 + 1*F2_0414 + 1*F2_0415 + 1*F2_0420 + 1*F2_0430 + 1*F2_0464 + 1*F2_0484 + 1*F2_0488 + 1*F2_0496 + 1*F2_0508 + 1*F2_0517 + 1*F2_0527 + 1*F2_0592 + 1*F2_0593 + 1*F2_0596 + 1*F2_0609 + 1*F2_0640 + 1*F2_0646 + 1*F2_0647 + 1*F2_0683 + 1*F2_0686 + 1*F2_0691 + 1*F2_0696 + 1*F2_0713 + 1*F2_0715 + 1*F2_0717 + 1*F2_0757 + 1*F2_0759 + 1*F2_0764 + 1*F2_0765 + 1*F2_0771 + 1*F2_0772 + 1*F2_0775 + 1*F2_0776 + 1*F2_0778 + 1*F2_0781 + 1*F2_0793 + 1*F2_0796 + 1*F2_0797 + 1*F2_0799 + 1*F2_0842 + 1*F2_0843 + 1*F2_0845 + 1*F2_0865 + 1*F2_0879 + 1*F2_0895 + 1*F2_0936 + 1*F2_1544 + 1*F2_1545 + 1*F2_1802 + 1*F2_1803 + 1*F2_1804 + 1*F2_1805 + 1*F2_1806 + 1*F2_1807 + 1*F2_1809 + 1*F2_1815 + 1*F2_2261 + 1*F2_2262 + 1*F2_2353 + 1*F2_2354 + 1*F2_2435 + 1*F2_BBRM1WA + 1*F2_BBRM2WA + 1*F2_BUSINESSBANKWA + 1*F2_CBWACENTRAL + 1*F2_CBWASOUTH + 1*F2_R&R-WESTCOAST\nF3_all ~ 1*F3_0351 + 1*F3_0354 + 1*F3_0414 + 1*F3_0415 + 1*F3_0420 + 1*F3_0430 + 1*F3_0464 + 1*F3_0484 + 1*F3_0488 + 1*F3_0496 + 1*F3_0508 + 1*F3_0517 + 1*F3_0527 + 1*F3_0592 + 1*F3_0593 + 1*F3_0596 + 1*F3_0609 + 1*F3_0640 + 1*F3_0646 + 1*F3_0647 + 1*F3_0683 + 1*F3_0686 + 1*F3_0691 + 1*F3_0696 + 1*F3_0713 + 1*F3_0715 + 1*F3_0717 + 1*F3_0757 + 1*F3_0759 + 1*F3_0764 + 1*F3_0765 + 1*F3_0771 + 1*F3_0772 + 1*F3_0775 + 1*F3_0776 + 1*F3_0778 + 1*F3_0781 + 1*F3_0793 + 1*F3_0796 + 1*F3_0797 + 1*F3_0799 + 1*F3_0842 + 1*F3_0843 + 1*F3_0845 + 1*F3_0865 + 1*F3_0879 + 1*F3_0895 + 1*F3_0936 + 1*F3_1544 + 1*F3_1545 + 1*F3_1802 + 1*F3_1803 + 1*F3_1804 + 1*F3_1805 + 1*F3_1806 + 1*F3_1807 + 1*F3_1809 + 1*F3_1815 + 1*F3_2261 + 1*F3_2262 + 1*F3_2353 + 1*F3_2354 + 1*F3_2435 + 1*F3_BBRM1WA + 1*F3_BBRM2WA + 1*F3_BUSINESSBANKWA + 1*F3_CBWACENTRAL + 1*F3_CBWASOUTH + 1*F3_R&R-WESTCOAST\nF4_all ~ 1*F4_0351 + 1*F4_0354 + 1*F4_0414 + 1*F4_0415 + 1*F4_0420 + 1*F4_0430 + 1*F4_0464 + 1*F4_0484 + 1*F4_0488 + 1*F4_0496 + 1*F4_0508 + 1*F4_0517 + 1*F4_0527 + 1*F4_0592 + 1*F4_0593 + 1*F4_0596 + 1*F4_0609 + 1*F4_0640 + 1*F4_0646 + 1*F4_0647 + 1*F4_0683 + 1*F4_0686 + 1*F4_0691 + 1*F4_0696 + 1*F4_0713 + 1*F4_0715 + 1*F4_0717 + 1*F4_0757 + 1*F4_0759 + 1*F4_0764 + 1*F4_0765 + 1*F4_0771 + 1*F4_0772 + 1*F4_0775 + 1*F4_0776 + 1*F4_0778 + 1*F4_0781 + 1*F4_0793 + 1*F4_0796 + 1*F4_0797 + 1*F4_0799 + 1*F4_0842 + 1*F4_0843 + 1*F4_0845 + 1*F4_0865 + 1*F4_0879 + 1*F4_0895 + 1*F4_0936 + 1*F4_1544 + 1*F4_1545 + 1*F4_1802 + 1*F4_1803 + 1*F4_1804 + 1*F4_1805 + 1*F4_1806 + 1*F4_1807 + 1*F4_1809 + 1*F4_1815 + 1*F4_2261 + 1*F4_2262 + 1*F4_2353 + 1*F4_2354 + 1*F4_2435 + 1*F4_BBRM1WA + 1*F4_BBRM2WA + 1*F4_BUSINESSBANKWA + 1*F4_CBWACENTRAL + 1*F4_CBWASOUTH + 1*F4_R&R-WESTCOAST\nF5_all ~ 1*F5_0351 + 1*F5_0354 + 1*F5_0414 + 1*F5_0415 + 1*F5_0420 + 1*F5_0430 + 1*F5_0464 + 1*F5_0484 + 1*F5_0488 + 1*F5_0496 + 1*F5_0508 + 1*F5_0517 + 1*F5_0527 + 1*F5_0592 + 1*F5_0593 + 1*F5_0596 + 1*F5_0609 + 1*F5_0640 + 1*F5_0646 + 1*F5_0647 + 1*F5_0683 + 1*F5_0686 + 1*F5_0691 + 1*F5_0696 + 1*F5_0713 + 1*F5_0715 + 1*F5_0717 + 1*F5_0757 + 1*F5_0759 + 1*F5_0764 + 1*F5_0765 + 1*F5_0771 + 1*F5_0772 + 1*F5_0775 + 1*F5_0776 + 1*F5_0778 + 1*F5_0781 + 1*F5_0793 + 1*F5_0796 + 1*F5_0797 + 1*F5_0799 + 1*F5_0842 + 1*F5_0843 + 1*F5_0845 + 1*F5_0865 + 1*F5_0879 + 1*F5_0895 + 1*F5_0936 + 1*F5_1544 + 1*F5_1545 + 1*F5_1802 + 1*F5_1803 + 1*F5_1804 + 1*F5_1805 + 1*F5_1806 + 1*F5_1807 + 1*F5_1809 + 1*F5_1815 + 1*F5_2261 + 1*F5_2262 + 1*F5_2353 + 1*F5_2354 + 1*F5_2435 + 1*F5_BBRM1WA + 1*F5_BBRM2WA + 1*F5_BUSINESSBANKWA + 1*F5_CBWACENTRAL + 1*F5_CBWASOUTH + 1*F5_R&R-WESTCOAST\nF6_all ~ 1*F6_0351 + 1*F6_0354 + 1*F6_0414 + 1*F6_0415 + 1*F6_0420 + 1*F6_0430 + 1*F6_0464 + 1*F6_0484 + 1*F6_0488 + 1*F6_0496 + 1*F6_0508 + 1*F6_0517 + 1*F6_0527 + 1*F6_0592 + 1*F6_0593 + 1*F6_0596 + 1*F6_0609 + 1*F6_0640 + 1*F6_0646 + 1*F6_0647 + 1*F6_0683 + 1*F6_0686 + 1*F6_0691 + 1*F6_0696 + 1*F6_0713 + 1*F6_0715 + 1*F6_0717 + 1*F6_0757 + 1*F6_0759 + 1*F6_0764 + 1*F6_0765 + 1*F6_0771 + 1*F6_0772 + 1*F6_0775 + 1*F6_0776 + 1*F6_0778 + 1*F6_0781 + 1*F6_0793 + 1*F6_0796 + 1*F6_0797 + 1*F6_0799 + 1*F6_0842 + 1*F6_0843 + 1*F6_0845 + 1*F6_0865 + 1*F6_0879 + 1*F6_0895 + 1*F6_0936 + 1*F6_1544 + 1*F6_1545 + 1*F6_1802 + 1*F6_1803 + 1*F6_1804 + 1*F6_1805 + 1*F6_1806 + 1*F6_1807 + 1*F6_1809 + 1*F6_1815 + 1*F6_2261 + 1*F6_2262 + 1*F6_2353 + 1*F6_2354 + 1*F6_2435 + 1*F6_BBRM1WA + 1*F6_BBRM2WA + 1*F6_BUSINESSBANKWA + 1*F6_CBWACENTRAL + 1*F6_CBWASOUTH + 1*F6_R&R-WESTCOAST" > paste(MM, regsflat, collapse="\n") [1] "F1_all =~ target\nF2_all =~ target\nF3_all =~ target\nF4_all =~ target\nF5_all =~ target\nF6_all =~ target F1_all ~ 1*F1_0351 + 1*F1_0354 + 1*F1_0414 + 1*F1_0415 + 1*F1_0420 + 1*F1_0430 + 1*F1_0464 + 1*F1_0484 + 1*F1_0488 + 1*F1_0496 + 1*F1_0508 + 1*F1_0517 + 1*F1_0527 + 1*F1_0592 + 1*F1_0593 + 1*F1_0596 + 1*F1_0609 + 1*F1_0640 + 1*F1_0646 + 1*F1_0647 + 1*F1_0683 + 1*F1_0686 + 1*F1_0691 + 1*F1_0696 + 1*F1_0713 + 1*F1_0715 + 1*F1_0717 + 1*F1_0757 + 1*F1_0759 + 1*F1_0764 + 1*F1_0765 + 1*F1_0771 + 1*F1_0772 + 1*F1_0775 + 1*F1_0776 + 1*F1_0778 + 1*F1_0781 + 1*F1_0793 + 1*F1_0796 + 1*F1_0797 + 1*F1_0799 + 1*F1_0842 + 1*F1_0843 + 1*F1_0845 + 1*F1_0865 + 1*F1_0879 + 1*F1_0895 + 1*F1_0936 + 1*F1_1544 + 1*F1_1545 + 1*F1_1802 + 1*F1_1803 + 1*F1_1804 + 1*F1_1805 + 1*F1_1806 + 1*F1_1807 + 1*F1_1809 + 1*F1_1815 + 1*F1_2261 + 1*F1_2262 + 1*F1_2353 + 1*F1_2354 + 1*F1_2435 + 1*F1_BBRM1WA + 1*F1_BBRM2WA + 1*F1_BUSINESSBANKWA + 1*F1_CBWACENTRAL + 1*F1_CBWASOUTH + 1*F1_R&R-WESTCOAST\nF2_all ~ 1*F2_0351 + 1*F2_0354 + 1*F2_0414 + 1*F2_0415 + 1*F2_0420 + 1*F2_0430 + 1*F2_0464 + 1*F2_0484 + 1*F2_0488 + 1*F2_0496 + 1*F2_0508 + 1*F2_0517 + 1*F2_0527 + 1*F2_0592 + 1*F2_0593 + 1*F2_0596 + 1*F2_0609 + 1*F2_0640 + 1*F2_0646 + 1*F2_0647 + 1*F2_0683 + 1*F2_0686 + 1*F2_0691 + 1*F2_0696 + 1*F2_0713 + 1*F2_0715 + 1*F2_0717 + 1*F2_0757 + 1*F2_0759 + 1*F2_0764 + 1*F2_0765 + 1*F2_0771 + 1*F2_0772 + 1*F2_0775 + 1*F2_0776 + 1*F2_0778 + 1*F2_0781 + 1*F2_0793 + 1*F2_0796 + 1*F2_0797 + 1*F2_0799 + 1*F2_0842 + 1*F2_0843 + 1*F2_0845 + 1*F2_0865 + 1*F2_0879 + 1*F2_0895 + 1*F2_0936 + 1*F2_1544 + 1*F2_1545 + 1*F2_1802 + 1*F2_1803 + 1*F2_1804 + 1*F2_1805 + 1*F2_1806 + 1*F2_1807 + 1*F2_1809 + 1*F2_1815 + 1*F2_2261 + 1*F2_2262 + 1*F2_2353 + 1*F2_2354 + 1*F2_2435 + 1*F2_BBRM1WA + 1*F2_BBRM2WA + 1*F2_BUSINESSBANKWA + 1*F2_CBWACENTRAL + 1*F2_CBWASOUTH + 1*F2_R&R-WESTCOAST\nF3_all ~ 1*F3_0351 + 1*F3_0354 + 1*F3_0414 + 1*F3_0415 + 1*F3_0420 + 1*F3_0430 + 1*F3_0464 + 1*F3_0484 + 1*F3_0488 + 1*F3_0496 + 1*F3_0508 + 1*F3_0517 + 1*F3_0527 + 1*F3_0592 + 1*F3_0593 + 1*F3_0596 + 1*F3_0609 + 1*F3_0640 + 1*F3_0646 + 1*F3_0647 + 1*F3_0683 + 1*F3_0686 + 1*F3_0691 + 1*F3_0696 + 1*F3_0713 + 1*F3_0715 + 1*F3_0717 + 1*F3_0757 + 1*F3_0759 + 1*F3_0764 + 1*F3_0765 + 1*F3_0771 + 1*F3_0772 + 1*F3_0775 + 1*F3_0776 + 1*F3_0778 + 1*F3_0781 + 1*F3_0793 + 1*F3_0796 + 1*F3_0797 + 1*F3_0799 + 1*F3_0842 + 1*F3_0843 + 1*F3_0845 + 1*F3_0865 + 1*F3_0879 + 1*F3_0895 + 1*F3_0936 + 1*F3_1544 + 1*F3_1545 + 1*F3_1802 + 1*F3_1803 + 1*F3_1804 + 1*F3_1805 + 1*F3_1806 + 1*F3_1807 + 1*F3_1809 + 1*F3_1815 + 1*F3_2261 + 1*F3_2262 + 1*F3_2353 + 1*F3_2354 + 1*F3_2435 + 1*F3_BBRM1WA + 1*F3_BBRM2WA + 1*F3_BUSINESSBANKWA + 1*F3_CBWACENTRAL + 1*F3_CBWASOUTH + 1*F3_R&R-WESTCOAST\nF4_all ~ 1*F4_0351 + 1*F4_0354 + 1*F4_0414 + 1*F4_0415 + 1*F4_0420 + 1*F4_0430 + 1*F4_0464 + 1*F4_0484 + 1*F4_0488 + 1*F4_0496 + 1*F4_0508 + 1*F4_0517 + 1*F4_0527 + 1*F4_0592 + 1*F4_0593 + 1*F4_0596 + 1*F4_0609 + 1*F4_0640 + 1*F4_0646 + 1*F4_0647 + 1*F4_0683 + 1*F4_0686 + 1*F4_0691 + 1*F4_0696 + 1*F4_0713 + 1*F4_0715 + 1*F4_0717 + 1*F4_0757 + 1*F4_0759 + 1*F4_0764 + 1*F4_0765 + 1*F4_0771 + 1*F4_0772 + 1*F4_0775 + 1*F4_0776 + 1*F4_0778 + 1*F4_0781 + 1*F4_0793 + 1*F4_0796 + 1*F4_0797 + 1*F4_0799 + 1*F4_0842 + 1*F4_0843 + 1*F4_0845 + 1*F4_0865 + 1*F4_0879 + 1*F4_0895 + 1*F4_0936 + 1*F4_1544 + 1*F4_1545 + 1*F4_1802 + 1*F4_1803 + 1*F4_1804 + 1*F4_1805 + 1*F4_1806 + 1*F4_1807 + 1*F4_1809 + 1*F4_1815 + 1*F4_2261 + 1*F4_2262 + 1*F4_2353 + 1*F4_2354 + 1*F4_2435 + 1*F4_BBRM1WA + 1*F4_BBRM2WA + 1*F4_BUSINESSBANKWA + 1*F4_CBWACENTRAL + 1*F4_CBWASOUTH + 1*F4_R&R-WESTCOAST\nF5_all ~ 1*F5_0351 + 1*F5_0354 + 1*F5_0414 + 1*F5_0415 + 1*F5_0420 + 1*F5_0430 + 1*F5_0464 + 1*F5_0484 + 1*F5_0488 + 1*F5_0496 + 1*F5_0508 + 1*F5_0517 + 1*F5_0527 + 1*F5_0592 + 1*F5_0593 + 1*F5_0596 + 1*F5_0609 + 1*F5_0640 + 1*F5_0646 + 1*F5_0647 + 1*F5_0683 + 1*F5_0686 + 1*F5_0691 + 1*F5_0696 + 1*F5_0713 + 1*F5_0715 + 1*F5_0717 + 1*F5_0757 + 1*F5_0759 + 1*F5_0764 + 1*F5_0765 + 1*F5_0771 + 1*F5_0772 + 1*F5_0775 + 1*F5_0776 + 1*F5_0778 + 1*F5_0781 + 1*F5_0793 + 1*F5_0796 + 1*F5_0797 + 1*F5_0799 + 1*F5_0842 + 1*F5_0843 + 1*F5_0845 + 1*F5_0865 + 1*F5_0879 + 1*F5_0895 + 1*F5_0936 + 1*F5_1544 + 1*F5_1545 + 1*F5_1802 + 1*F5_1803 + 1*F5_1804 + 1*F5_1805 + 1*F5_1806 + 1*F5_1807 + 1*F5_1809 + 1*F5_1815 + 1*F5_2261 + 1*F5_2262 + 1*F5_2353 + 1*F5_2354 + 1*F5_2435 + 1*F5_BBRM1WA + 1*F5_BBRM2WA + 1*F5_BUSINESSBANKWA + 1*F5_CBWACENTRAL + 1*F5_CBWASOUTH + 1*F5_R&R-WESTCOAST\nF6_all ~ 1*F6_0351 + 1*F6_0354 + 1*F6_0414 + 1*F6_0415 + 1*F6_0420 + 1*F6_0430 + 1*F6_0464 + 1*F6_0484 + 1*F6_0488 + 1*F6_0496 + 1*F6_0508 + 1*F6_0517 + 1*F6_0527 + 1*F6_0592 + 1*F6_0593 + 1*F6_0596 + 1*F6_0609 + 1*F6_0640 + 1*F6_0646 + 1*F6_0647 + 1*F6_0683 + 1*F6_0686 + 1*F6_0691 + 1*F6_0696 + 1*F6_0713 + 1*F6_0715 + 1*F6_0717 + 1*F6_0757 + 1*F6_0759 + 1*F6_0764 + 1*F6_0765 + 1*F6_0771 + 1*F6_0772 + 1*F6_0775 + 1*F6_0776 + 1*F6_0778 + 1*F6_0781 + 1*F6_0793 + 1*F6_0796 + 1*F6_0797 + 1*F6_0799 + 1*F6_0842 + 1*F6_0843 + 1*F6_0845 + 1*F6_0865 + 1*F6_0879 + 1*F6_0895 + 1*F6_0936 + 1*F6_1544 + 1*F6_1545 + 1*F6_1802 + 1*F6_1803 + 1*F6_1804 + 1*F6_1805 + 1*F6_1806 + 1*F6_1807 + 1*F6_1809 + 1*F6_1815 + 1*F6_2261 + 1*F6_2262 + 1*F6_2353 + 1*F6_2354 + 1*F6_2435 + 1*F6_BBRM1WA + 1*F6_BBRM2WA + 1*F6_BUSINESSBANKWA + 1*F6_CBWACENTRAL + 1*F6_CBWASOUTH + 1*F6_R&R-WESTCOAST" >
Try this: paste(MM, regsflat, sep="\n")
Random Forest in R (multi-label-classification)
I'm fairly new to R, trying to implement Random Forest algorithm. My training and test set have 60 features in the format: Train: feature1,feature2 .. feature60,Label Test: FileName,feature1,feature2 ... feature60 Train-sample mov-mov,or-or,push-push,or-mov,sub-sub,mov-or,sub-mov,xor-or,call-sub,mul-imul,mov-push,push-mov,push-call,or-jz,mov-mul,cmp-or,mov-sub,sub-or,or-sub,or-push,jnz-or,jmp-sub,or-in,mov-call,retn-sub,mul-mul,or-jmp,imul-mul,pop-pop,nop-nop,nop-mul,sub-push,imul-mov,test-or,mul-mov,lea-push,std-mov,in-call,or-call,mov-std,mov-cmp,std-mul,call-or,jz-mov,push-or,pop-retn,add-mov,mov-add,mov-xor,in-inc,mov-pop,in-or,in-push,push-lea,lea-mov,mov-lea,sub-add,std-std,sub-cmp,or-cmp,Label 687,1346,1390,1337,750,2770,1518,418,1523,0,441,532,612,512,0,411,354,310,412,495,134,236,318,237,226,0,0,0,200,0,0,386,39,365,0,0,0,125,528,0,125,0,41,260,169,143,149,61,89,0,127,126,107,44,45,40,79,0,273,157,9 812,873,83,533,88,484,264,106,199,0,188,137,128,51,38,92,131,102,52,58,37,26,428,95,107,0,34,0,58,0,0,39,0,26,0,27,0,152,152,0,45,0,124,0,0,73,84,88,22,23,59,319,105,56,86,47,0,0,43,41,2 Test-sample FileName,mov-mov,or-or,push-push,or-mov,sub-sub,mov-or,xor-or,sub-mov,call-sub,mul-imul,push-mov,mov-push,push-call,mov-mul,or-jz,cmp-or,mov-sub,sub-or,or-sub,or-push,jmp-sub,jnz-or,or-in,mul-mul,or-jmp,mov-call,retn-sub,imul-mul,nop-mul,pop-pop,nop-nop,imul-mov,sub-push,mul-mov,test-or,lea-push,std-mov,or-call,mov-std,in-call,std-mul,mov-cmp,call-or,push-or,jz-mov,pop-retn,in-or,add-mov,mov-add,in-inc,mov-xor,in-push,push-lea,mov-pop,lea-mov,mov-lea,mov-nop,or-cmp,sub-add,sub-cmp Ig2DB5tSiEy1cJvV0zdw,166,360,291,194,41,201,62,61,41,18,85,56,121,18,15,0,57,131,113,123,0,9,54,0,0,18,15,0,0,15,0,8,25,0,0,11,0,70,0,43,0,0,63,37,0,14,51,43,56,36,26,0,20,14,17,14,0,9,18,0 k4HCwy5WRFXczJU6eQdT,3,88,106,23,104,0,12,43,59,0,65,87,99,0,2,2,47,22,4,53,1,5,0,0,0,0,46,0,0,0,0,0,4,0,0,6,0,44,0,21,0,0,0,0,0,0,0,2,1,1,3,0,1,2,9,2,0,0,44,2 So what I have so far in R is this, library(randomForest); dat <- read.csv("train-sample.csv", sep=",", h=T); test <- read.csv("test-sample.csv", sep=",", h=T); attach(dat); #If I do this, I get Error: unexpected 'in' ... rfmodel = randomForest (Label ~ mov-mov + or-or + push-push + or-mov + sub-sub + mov-or + sub-mov + xor-or + call-sub + mul-imul + mov-push + push-mov + push-call + or-jz + mov-mul + cmp-or + mov-sub + sub-or + or-sub + or-push + jnz-or + jmp-sub + or-in + mov-call + retn-sub + mul-mul + or-jmp + imul-mul + pop-pop + nop-nop + nop-mul + sub-push + imul-mov + test-or + mul-mov + lea-push + std-mov + in-call + or-call + mov-std + mov-cmp + std-mul + call-or + jz-mov + push-or + pop-retn + add-mov + mov-add + mov-xor + in-inc + mov-pop + in-or + in-push + push-lea + lea-mov + mov-lea + sub-add + std-std + sub-cmp + or-cmp, data=dat); #If I do this, I get Error in terms.formula(formula, data = data) : invalid model formula in ExtractVars rfmodel = randomForest (Label ~ 'mov-mov' + 'or-or' + 'push-push' + or-mov + sub-sub + mov-or + sub-mov + xor-or + call-sub + mul-imul + mov-push + push-mov + push-call + or-jz + mov-mul + cmp-or + mov-sub + sub-or + or-sub + or-push + jnz-or + jmp-sub + 'or-in' + mov-call + retn-sub + mul-mul + or-jmp + imul-mul + pop-pop + nop-nop + nop-mul + sub-push + imul-mov + test-or + mul-mov + lea-push + 'std-mov' + 'in-call' + 'or-call' + 'mov-std' + 'mov-cmp' + 'std-mul' + 'call-or' + 'jz-mov' + 'push-or' + 'pop-retn' + 'add-mov' + 'mov-add' + 'mov-xor' + 'in-inc' + 'mov-pop' + 'in-or' + 'in-push' + 'push-lea' + 'lea-mov' + 'mov-lea' + 'sub-add' + 'std-std' + 'sub-cmp' + 'or-cmp', data=dat); #I even tried this and got Error in na.fail.default(list(Label = c(9L, 2L, 9L, 1L, 8L, 6L, 2L, 2L, : missing values in object rfmodel <- randomForest(Label~., dat); So I'm kinda stuck. I want to end up using something like, predicted <- predict(rfmodel, test, type="response"); prop.table(table(test$FileName, predicted),1); To get an output in form of: FileName, Label1, Label2, Label3 .. Label9 name1, 0.98, 0, 0.02, 0, 0 .. 0 (basically the fileName with probabilities of each label) Any help is appreciated. Thank you.