I have a dataframe with 10 columns like this:
df = structure(list(X1 = c(-0.158841494166799, 1.74997712540787,
-0.603638753496694, -0.253379995687274, -1.13536828104642, -2.72698649676692,
0.0243826193956672, 3.21776393858788, -2.3633921387719, 0.0305028420399468,
-4.26940546325382, 1.55584592541131, -1.05118869595721, -2.84985861365441,
0.729293004856561, -3.80058253179317, 1.31191742952459, 2.33589025288871,
-0.315014740930705, 2.92302109498542, 2.26246357678861, 1.37039290089958,
0.0582396621450368, -0.902295561314538, 2.14889801735908, 0.391493314141552,
2.33280118206325, -0.0490864536654898, 0.068965431468872, 1.24553936025063
), X2 = c(0.428917030889186, 1.38179284926331, 2.83063848525283,
-0.926689840680047, -1.3903457418351, 0.708618895316463, -0.750232095654876,
0.894660121671367, 0.124277732759992, -1.98558522788821, 2.34165530946135,
2.85945910959032, 2.36563560772223, 0.0185741299096399, -2.48859548768527,
-1.26592501904523, -0.790849261930494, -0.828149152178906, 3.2014616981455,
-0.15442363719638, -0.516775154465199, -0.176649448869891, -1.41825025459806,
2.17806157731774, -0.44973481848562, 2.24655423722927, 2.47628395430464,
1.91613790409017, -0.393928617864301, -0.148584327095393), X3 = c(-0.430287082709084,
-0.625327045828844, -0.514958706302276, 0.751266821352889, 1.18006572060265,
-0.802136052260364, 0, 0.105706401868772, 0.442778243504233,
0.905950590121364, -1.26899829497004, 0.745801518841586, -0.978033251950966,
0.113286791573796, 1.20968420311602, -0.635010679093886, 0.858274389205649,
-1.009308286611, 0, 0.167623720006668, -0.720624475890533, -0.443172067136528,
-0.0181650919153045, -0.818017257660172, 1.18137870264753, -0.0566121497554404,
0.572314218120067, -1.01361737919216, 0.637155618813563, 0.00507063594816648
), X4 = c(1.05105923858325, -0.808507106602501, 1.01063388325313,
-0.363828197971125, -0.889357817262751, 0.0808507106602501, 0,
0.4851042639615, 0.687231040612125, -1.53616350254475, 2.02126776650625,
-1.09148459391338, -0.929783172592876, 0.323402842641, 0.768081751272376,
0.444678908631375, -1.53616350254475, 0.970208527923, 0, 0.646805685282001,
-1.01063388325313, 0.40425355330125, -0.323402842641, -0.202126776650625,
1.09148459391338, -0.970208527923, -0.606380329951876, 0, 0.606380329951876,
0.161701421320499), X5 = c(-1.68622583629267, -2.11651291900176,
-2.7418399648306, -3.25679867113288, -2.50553184977999, -1.32546612917734,
-2.12760218143771, -2.12760218143771, -2.02189577956894, -1.5791175360647,
-0.67316694594334, -1.94216524091338, -1.19636372207179, -2.17439697402276,
-2.06111018244896, -0.85142597933294, -1.48643665842683, -0.628162269221177,
-1.63747055583218, -1.63747055583218, -1.46984683582551, -2.19047131171604,
-2.63364337885257, -2.65180847076788, -3.46982572842805, -2.28844702578052,
-2.34505917553596, -1.77274495741589, -2.78636233660805, -2.14920671779449
), X6 = c(-3.52265292326337, -2.47159368468012, -3.28010079128262,
-2.26946690802949, -2.63329510600062, -3.52265292326337, -3.44180221260312,
-3.44180221260312, -2.95669794864162, -2.26946690802949, -3.80563041057424,
-1.78436264406799, -2.87584723798137, -3.80563041057424, -3.48222756793324,
-2.71414581666087, -2.26946690802949, -3.80563041057424, -2.83542188265124,
-2.83542188265124, -2.18861619736924, -3.19925008062237, -2.79499652732112,
-3.11839936996212, -3.32052614661274, -2.22904155269937, -3.19925008062237,
-3.80563041057424, -3.80563041057424, -3.19925008062237), X7 = c(1,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 1, 0, 0), X8 = c(0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0),
X9 = c(1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0), X10 = c(3.2936,
3.283, 3.264, 3.3162, 3.31, 3.3485, 3.3017, 3.2927, 3.3186,
3.3004, 3.3043, 3.0945, 2.7868, 2.7635, 2.749, 2.5605, 2.5571,
2.2133, 2.0783, 2.0981, 2.0209, 2.0148, 1.973, 2.0567, 2.0214,
2.0315, 2.0057, 2.078, 2.0152, 2.0277)), row.names = c(NA,
30L), class = "data.frame")
What I want to do is be much faster in the implementation of the following code:
p = 3
instru1 = df$X7[1:(30-p)]
instru2 = df$X8[1:(30-p)]
instru3 = df$X9[1:(30-p)]
var <- VAR(df[, c(1:4,10)], p, type = "const")
summary(lm(var$varresult$[# the name of the first variable that appears]$residuals ~ instru1))
linearHypothesis(lm(var$varresult$[# the name of the first variable that appears]$residuals ~ instru1), "instru1 = 0", test = "F", vcov = vcovHAC, type = "HC1")
I did this code only for instru1 and for one column of interest (col 10 in df). I would like to do the same exercise, for instru1, instru2 and instru3 and, in addition to
VAR(df[, c(1:4,10)], p, type = "const")
for
VAR(df[, c(1:4,5)], p, type = "const")
and
VAR(df[, c(1:4,6)], p, type = "const").
In other words, I would like to avoid doing:
p = 3
instru1 = df$X7[1:(30-p)]
instru2 = df$X8[1:(30-p)]
instru3 = df$X9[1:(30-p)]
var1 <- VAR(df[, c(1:4,5)], p, type = "const")
summary(lm(var1$varresult$[# the name of the first variable that appears]$residuals ~ instru1))
linearHypothesis(lm(var1$varresult$[# the name of the first variable that appears]$residuals ~ instru1), "instru1 = 0", test = "F", vcov = vcovHAC, type = "HC1")
var2 <- VAR(df[, c(1:4,6)], p, type = "const")
summary(lm(var2$varresult$[# the name of the first variable that appears]$residuals ~ instru1))
linearHypothesis(lm(var2$varresult$[# the name of the first variable that appears]$residuals ~ instru1), "instru1 = 0", test = "F", vcov = vcovHAC, type = "HC1")
# and then the same but plugging instru2 and instru3 in place of instru1. Is it possible to have all in one loop?
Ideally I would like to have everything in one loop but, if not feasible, it is also fine to have a code for the VAR part and then I will update the 'instru' bit manually.
Can anyone help me?
Thanks a lot!
I believe that the best way is to write a function generalizing the VAR/linearHypothesis code, and call that function with the required arguments.
Step 1: the function
library(vars)
library(car)
customVAR <- function(DF, Select, Regr, p, type = "const"){
n <- nrow(DF)
instru <- DF[[Regr]][seq.int(n - p)]
var_fit <- VAR(DF[, Select], p = p, type = type)
r <- resid(var_fit$varresult[[1]])
lm_fit <- lm(r ~ instru)
smry <- summary(lm_fit)
lh <- linearHypothesis(lm_fit, "instru = 0", test = "F", vcov = vcovHAC, type = "HC1")
list(Summary = smry, linearHyp = lh)
}
Step 2: test it for one model
customVAR(df, Select = c(1:4, 10), Regr = 'X7', p = 3)
Step 3: fit several models, varying the regressors
Regr_vec <- paste0('X', 7:9)
var_list <- lapply(Regr_vec, function(R){
customVAR(df, Select = c(1:4, 10), Regr = R, p = 3)
})
var_list[[2]]$Summary
var_list[[2]]$linearHyp
Step 4: vary the responses
Select_list <- list(c(1:4, 10), c(1:4, 5), c(1:4, 6))
var_list2 <- lapply(Select_list, function(S){
customVAR(df, Select = S, Regr = 'X7', p = 3)
})
var_list2[[3]]$Summary
var_list2[[3]]$linearHyp
Hi Rollo/Arma 91 hopefully this answers your question (note this is untested as the package used is not specified).
# Constant:
p = 3
# Instruments:
instru_col_seq <- 7:10
instru_list <- vector("list", length(instru_col_seq))
instru_list <- setNames(lapply(instru_col_seq, function(i){
df[(1:(30-p)), i]
}
), paste0("instru", 1:length(instru_col_seq)))
# Var Setup:
var_col_seq <- unique(pmin(seq_along(df)+3, ncol(df)))
var_list <- vector("list", length(col_seq))
var_lm_summmary_list <- var_list
var_lh_list <- var_list
# Var computations:
var_list <- lapply(col_seq, function(j){
var <- VAR(df[, c(1:4,j)], p, type = "const")
}
)
# Var lm:
var_lm_summmary_list <- lapply(seq_along(var_list), function(k){
summary(lm(var_list[k]$varresult$[names(df)[var_col_seq[k]]]$residuals ~ instru_list[1]))
}
)
# Var lh:
var_lh_list <- lapply(seq_along(var_list), function(l){
linearHypothesis(lm(var$varresult$[names(df)[var_col_seq[l]]]$residuals ~ instru_list[1]),
"instru1 = 0", test = "F", vcov = vcovHAC, type = "HC1")
}
)
Data:
df <- structure(list(X1 = c(-0.158841494166799, 1.74997712540787,
-0.603638753496694, -0.253379995687274, -1.13536828104642, -2.72698649676692,
0.0243826193956672, 3.21776393858788, -2.3633921387719, 0.0305028420399468,
-4.26940546325382, 1.55584592541131, -1.05118869595721, -2.84985861365441,
0.729293004856561, -3.80058253179317, 1.31191742952459, 2.33589025288871,
-0.315014740930705, 2.92302109498542, 2.26246357678861, 1.37039290089958,
0.0582396621450368, -0.902295561314538, 2.14889801735908, 0.391493314141552,
2.33280118206325, -0.0490864536654898, 0.068965431468872, 1.24553936025063
), X2 = c(0.428917030889186, 1.38179284926331, 2.83063848525283,
-0.926689840680047, -1.3903457418351, 0.708618895316463, -0.750232095654876,
0.894660121671367, 0.124277732759992, -1.98558522788821, 2.34165530946135,
2.85945910959032, 2.36563560772223, 0.0185741299096399, -2.48859548768527,
-1.26592501904523, -0.790849261930494, -0.828149152178906, 3.2014616981455,
-0.15442363719638, -0.516775154465199, -0.176649448869891, -1.41825025459806,
2.17806157731774, -0.44973481848562, 2.24655423722927, 2.47628395430464,
1.91613790409017, -0.393928617864301, -0.148584327095393), X3 = c(-0.430287082709084,
-0.625327045828844, -0.514958706302276, 0.751266821352889, 1.18006572060265,
-0.802136052260364, 0, 0.105706401868772, 0.442778243504233,
0.905950590121364, -1.26899829497004, 0.745801518841586, -0.978033251950966,
0.113286791573796, 1.20968420311602, -0.635010679093886, 0.858274389205649,
-1.009308286611, 0, 0.167623720006668, -0.720624475890533, -0.443172067136528,
-0.0181650919153045, -0.818017257660172, 1.18137870264753, -0.0566121497554404,
0.572314218120067, -1.01361737919216, 0.637155618813563, 0.00507063594816648
), X4 = c(1.05105923858325, -0.808507106602501, 1.01063388325313,
-0.363828197971125, -0.889357817262751, 0.0808507106602501, 0,
0.4851042639615, 0.687231040612125, -1.53616350254475, 2.02126776650625,
-1.09148459391338, -0.929783172592876, 0.323402842641, 0.768081751272376,
0.444678908631375, -1.53616350254475, 0.970208527923, 0, 0.646805685282001,
-1.01063388325313, 0.40425355330125, -0.323402842641, -0.202126776650625,
1.09148459391338, -0.970208527923, -0.606380329951876, 0, 0.606380329951876,
0.161701421320499), X5 = c(-1.68622583629267, -2.11651291900176,
-2.7418399648306, -3.25679867113288, -2.50553184977999, -1.32546612917734,
-2.12760218143771, -2.12760218143771, -2.02189577956894, -1.5791175360647,
-0.67316694594334, -1.94216524091338, -1.19636372207179, -2.17439697402276,
-2.06111018244896, -0.85142597933294, -1.48643665842683, -0.628162269221177,
-1.63747055583218, -1.63747055583218, -1.46984683582551, -2.19047131171604,
-2.63364337885257, -2.65180847076788, -3.46982572842805, -2.28844702578052,
-2.34505917553596, -1.77274495741589, -2.78636233660805, -2.14920671779449
), X6 = c(-3.52265292326337, -2.47159368468012, -3.28010079128262,
-2.26946690802949, -2.63329510600062, -3.52265292326337, -3.44180221260312,
-3.44180221260312, -2.95669794864162, -2.26946690802949, -3.80563041057424,
-1.78436264406799, -2.87584723798137, -3.80563041057424, -3.48222756793324,
-2.71414581666087, -2.26946690802949, -3.80563041057424, -2.83542188265124,
-2.83542188265124, -2.18861619736924, -3.19925008062237, -2.79499652732112,
-3.11839936996212, -3.32052614661274, -2.22904155269937, -3.19925008062237,
-3.80563041057424, -3.80563041057424, -3.19925008062237), X7 = c(1,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 1, 0, 0), X8 = c(0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0),
X9 = c(1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0), X10 = c(3.2936,
3.283, 3.264, 3.3162, 3.31, 3.3485, 3.3017, 3.2927, 3.3186,
3.3004, 3.3043, 3.0945, 2.7868, 2.7635, 2.749, 2.5605, 2.5571,
2.2133, 2.0783, 2.0981, 2.0209, 2.0148, 1.973, 2.0567, 2.0214,
2.0315, 2.0057, 2.078, 2.0152, 2.0277)), row.names = c(NA,
30L), class = "data.frame")
I tackled similar problem recently.
What helped me was using assign() function inside for loop:
example:
for (i in 1:3){
assign(x = paste0("variable",i),value = i*10)}
What automates process of manual assignment like:
variable1 = 10
variable2 = 20
variable3 = 30
Related
I would like to aggregate, in order to reduce the number of constructs, its following data frame containing only binary variables that correspond to "yes/no", its following data frame (first 10 row). The original data frame contains 169 rows.
outcome <-
structure(list(Q9_Automazione.processi = c(0, 0, 0, 0, 0, 0,
1, 1, 1, 0), Q9_Velocita.Prod = c(1, 0, 0, 1, 0, 0, 1, 1, 1,
0), Q9_Flessibilita.Prod = c(0, 0, 0, 1, 0, 0, 1, 1, 0, 1), Q9_Controllo.processi = c(0,
0, 0, 1, 0, 0, 1, 1, 0, 0), Q9_Effic.Magazzino = c(0, 0, 0, 1,
0, 0, 0, 0, 0, 0), Q9_Riduz.Costi = c(0, 1, 0, 0, 0, 0, 0, 0,
0, 1), Q9_Miglior.Sicurezza = c(0, 0, 0, 0, 0, 0, 1, 0, 1, 1),
Q9_Connett.Interna = c(0, 0, 0, 0, 0, 0, 0, 1, 1, 0), Q9_Connett.Esterna = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0), Q9_Virtualizzazione = c(0, 0,
0, 0, 0, 0, 0, 0, 0, 0), Q9_Innov.Prod = c(0, 0, 0, 0, 0,
1, 0, 0, 0, 1), Q9_Person.Prod = c(0, 1, 0, 1, 0, 1, 0, 0,
0, 1), Q9_Nuovi.Mercati = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
Q9_Nuovi.BM = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Q9_Perform.Energ = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0), Q9_Perform.SostAmb = c(0, 0,
0, 0, 0, 0, 0, 0, 0, 0)), row.names = c(NA, 10L), class = "data.frame")
I have tried performing factor analysis via the tethracoric method on the obtained correlation matrix ( the obtained value from the KMO function turns out to be inadequate) both directly on the dataframe and then using tethracoric correletions in fafunction (using cor = "tet" I get a negative Tucker Lewis Index).
I have been reading up on this but cannot find a methodology that is adequate and of which I am certain of the correctness of the analysis.
So basically what I would like to achieve is to aggregate similar constructs, e.g., assess whether column 5 has value 1 (i.e., "yes") almost always when column 11 has value 1 and then aggregate.
Here the code that I try to used
library(psych)
tet <- tetrachoric(outcome)
corrplot(tet$rho, "ellipse", tl.cex = 0.75, tl.col = "black")
par(mfrow = c(1,2))
corr_matrix %>%
ggcorrplot(show.diag = F,
type="lower",
lab=TRUE,
lab_size=2)
KMO(corr_matrix)
cortest.bartlett(corr_matrix)
fa.parallel(corr_matrix, fm = "ml")
factor <- fa(corr_matrix, nfactors = 3, rotate = "oblimin", fm = "ml")
print(factor, cut = 0.3, digits = 3)
# -------- Pearson --------
cor(outcome, method = 'pearson', use = "pairwise.complete.obs") %>%
ggcorrplot(show.diag = F,
type="lower",
lab=TRUE,
lab_size=2)
KMO(outcome)
cortest.bartlett(outcome)
fa.parallel(outcome)
factor1 <- fa(outcome, nfactors = 3, rotate = "oblimin", cor = "tet", fm = "ml")
print(factor1, cut = 0.3, digits = 3)
I have been creating a bar plot for the result of a sentiment analysis model in R. The data is very confidential feedbacks from the customers. So, the feedbacks are then fed into a sentiment analysis model to generate outputs. My work is to generate a chart for each combination for example zone = delhi and delhi has sub zones like eastdelhi, westdelhi,northdelhi,southdelhi. I want to generate charts with combination like
zone = delhi and sub-zone = eastdelhi. And I want to save it to a jpeg file.I have written a for loop to do so. But for some reason it isn't working. This is the code
#Set locales
rm(list = ls())
Sys.setlocale(category = "LC_ALL",locale = "English")
#Load libraries
LoadLibraries <- c("openxlsx",
"dplyr",
"tidyr",
"plotly",
"RColorBrewer",
"shiny",
"officer",
"parallel",
"dplyr",
"tidyr",
"magrittr",
"knitr")
lapply(LoadLibraries, require, character.only = TRUE)
path = "C:/Users/R_Visual/Data/visual_data.xlsx"
input_data <- read.xlsx(path)
name <- names(input_data[,1:10])
#Filtering the zones and circles
for (i in 1:length(unique(Zone.Final))){
for (j in 1:length(unique(Circle.Final))){
fileName = 'C:/Users/R_Visual/'+ str(i) + str(j) + '.jpeg'
jpeg(fileName, width = 900, height = 450)
df <- input_data %>%
filter(input_data$Zone.Final[i])
df <- df %>%
filter(df$Circle.Final[j])
color <- c("#ca2f27","#f56d43","#f8c38a","#fde08b","#d9ef8b","#a7d86f","#67bd64","#1a984f","#D3D3D3","#A9A9A9")
plot <- barplot(sort(colSums(input_data[, 1:10])),
main = paste("Sentiment Analysis for Zone",df$Zone.Final[i]," and Circle",df$Circle.Final[j], sep = ""),
xlab = "Sentiments",
ylab = "Count",
horiz = FALSE,
names = name,
col = color,
border = FALSE,
legend = TRUE,
beside = TRUE,
legend.text = name,
args.legend = list(bty = "n", x = "topleft",ncol = 1, cex = 0.8, y.intersp = 0.8, x.intersp = 0.25, horiz = F, xpd = TRUE, inset = c(0,0)))
dev.off()
}
}
EDIT:
This is the sample of input_data
> dput(input_data)
structure(list(anger = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), anticipation = c(1,
0, 0, 0, 0, 0, 1, 0, 0, 0), disgust = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0), fear = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), joy = c(0,
0, 0, 0, 0, 0, 1, 0, 0, 0), sadness = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0), surprise = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), trust = c(0,
0, 1, 1, 1, 0, 2, 0, 0, 0), negative = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0), positive = c(1, 0, 0, 0, 1, 1, 2, 1, 0, 1), Zone.Final = c("Delhi",
"Lucknow", "Durgapur", "Lucknow", "Mumbai", "Bhopal", "Chandigarh",
"Chandigarh", "Gurugram", "Chandigarh"), Circle.Final = c("Noida",
"Gorakhpur", "Murshidabad", "Gorakhpur", "Mumbai City", "Bhopal",
"Chandigarh", "Panchkula", "Hisar", "Karnal")), row.names = c(NA,
10L), class = "data.frame")
If anyone could help me with the code, it would be of great help.
You can try creating a list combining the zone and subzone:
#Data
input_data <- structure(list(anger = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), anticipation = c(1,
0, 0, 0, 0, 0, 1, 0, 0, 0), disgust = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0), fear = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), joy = c(0,
0, 0, 0, 0, 0, 1, 0, 0, 0), sadness = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0), surprise = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), trust = c(0,
0, 1, 1, 1, 0, 2, 0, 0, 0), negative = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0), positive = c(1, 0, 0, 0, 1, 1, 2, 1, 0, 1), Zone.Final = c("Delhi",
"Lucknow", "Durgapur", "Lucknow", "Mumbai", "Bhopal", "Chandigarh",
"Chandigarh", "Gurugram", "Chandigarh"), Circle.Final = c("Noida",
"Gorakhpur", "Murshidabad", "Gorakhpur", "Mumbai City", "Bhopal",
"Chandigarh", "Panchkula", "Hisar", "Karnal")), row.names = c(NA,
10L), class = "data.frame")
#Code
#First create and global id to combine zone and subzone
df <- input_data
df$id <- paste(df$Zone.Final,df$Circle.Final,sep = '-')
#Split
List <- split(df,df$id)
#Plot
color <- c("#ca2f27","#f56d43","#f8c38a","#fde08b","#d9ef8b","#a7d86f","#67bd64","#1a984f","#D3D3D3","#A9A9A9")
#Plot names
vnames <- paste0(names(List),'.jpeg')
#Loop
for(i in 1:length(List))
{
name <- names(List[[i]][, 1:10])
#Plot
jpeg(filename = vnames[i], width = 900, height = 450)
barplot(sort(colSums(List[[i]][, 1:10])),
main = paste("Sentiment Analysis for Zone ",
unique(List[[i]]$Zone.Final),
" and Circle ",unique(List[[i]]$Circle.Final), sep = ""),
xlab = "Sentiments",
ylab = "Count",
horiz = FALSE,
names = name,
col = color,
border = FALSE,
legend = TRUE,
beside = TRUE,
legend.text = name,
args.legend = list(bty = "n", x = "topleft",ncol = 1,
cex = 0.8, y.intersp = 0.8, x.intersp = 0.25,
horiz = F, xpd = TRUE, inset = c(0,0)))
dev.off()
}
That will create the plots. Of course you can add a path to vnames like the dir you have to save the plots in that folder.
For an assignment, I am applying mixture modeling with the mixtools package on R. When I try to figure out the optimal amount of components with bootstrap. I get the following error
Error in boot.comp(y, x, N = NULL, max.comp = 2, B = 5, sig = 0.05, arbmean = TRUE, :
Number of trials must be specified!
I found out that I have to fill an N: An n-vector of number of trials for the logistic regression type logisregmix. If
NULL, then N is an n-vector of 1s for binary logistic regression.
But, I don't know how to find out what the N is in fact to make my bootstrap working.
Link to my codes:
https://www.kaggle.com/blastchar/telco-customer-churn
My codes:
data <- read.csv("Desktop/WA_Fn-UseC_-Telco-Customer-Churn.csv", stringsAsFactors = FALSE,
na.strings = c("NA", "N/A", "Unknown*", "NULL", ".P"))
data <- droplevels(na.omit(data))
data <- data[c(1:5032),]
testdf <- data[c(5033:7032),]
data <- subset(data, select = -customerID)
set.seed(100)
library(plyr)
library(mixtools)
data$Churn <- revalue(data$Churn, c("Yes"=1, "No"=0))
y <- as.numeric(data$Churn)
x <- model.matrix(Churn ~ . , data = data)
x <- x[, -1] #remove intercept
x <-x[,-c(7, 11, 13, 15, 17, 19, 21)] #multicollinearity
a <- boot.comp(y, x, N = NULL, max.comp = 2, B = 100,
sig = 0.05, arbmean = TRUE, arbvar = TRUE,
mix.type = "logisregmix", hist = TRUE)
Below there is more information about my predictors:
dput(x[1:4,])
structure(c(0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
34, 2, 45, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0,
0, 0, 1, 1, 0, 29.85, 56.95, 53.85, 42.3, 29.85, 1889.5, 108.15,
1840.75), .Dim = c(4L, 23L), .Dimnames = list(c("1", "2", "3",
"4"), c("genderMale", "SeniorCitizen", "PartnerYes", "DependentsYes",
"tenure", "PhoneServiceYes", "MultipleLinesYes", "InternetServiceFiber optic",
"InternetServiceNo", "OnlineSecurityYes", "OnlineBackupYes",
"DeviceProtectionYes", "TechSupportYes", "StreamingTVYes", "StreamingMoviesYes",
"ContractOne year", "ContractTwo year", "PaperlessBillingYes",
"PaymentMethodCredit card (automatic)", "PaymentMethodElectronic check",
"PaymentMethodMailed check", "MonthlyCharges", "TotalCharges"
)))
My response variable is binary
I hope you guys can help me out!
Looking in the source code of mixtools::boot.comp, which is scary as it is over 800 lines long and in serious need of refactoring, the offending lines are:
if (mix.type == "logisregmix") {
if (is.null(N))
stop("Number of trials must be specified!")
Despite what the documentation says, N must be specified.
Try to set it to a vector of 1s: N = rep(1, length(y)) or N = rep(1, nrow(x))
In fact, if you look in mixtools::logisregmixEM, the internal function called by boot.comp, you'll see how N is set if NULL:
n <- length(y)
if (is.null(N)) {
N = rep(1, n)
}
Too bad this is never reached if N is NULL since it stops with an error before. This is a bug.
I have get the covariance matrix of my data set Usedata. And I also get the diagonal matrix of the eigenvalues and output from R and save it as csv .
I cannot see each variables name at the top of csv file, instead, it just shows me "X1","X2", etc. . I want to see the variable name at the top of each column so I can see which variable has the biggest eigenvalue.
My code:
Usedata <- structure(list(X1 = c(1, 0, 0, 0.244012404, 0, 0, 6, 0, 0, 0,
0, 0), X2 = c(52.72564729, 2, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0),
X3 = c(0, 0, 3, 0, 0.142522511, 0, 0, 0, 8, 0, 0, 0), X4 = c(0,
0.341103073, 0, 4, 0, 0, 0, 0, 0, 9, 0, 0), X5 = c(0, 0,
0, 0, 5, 0.091644475, 0, 0, 0, 0, 10, 0)), .Names = c("X1",
"X2", "X3", "X4", "X5"), class = "data.frame",
row.names = c(NA, -12L))
smallcov <- cov(Usedata)
lam <- eigen(smallcov)$values
LamM <-diag(lam)
diagresult <- data.frame(LamM)
write.csv(diagresult, file = "myoutput.csv")
I have a data frame that look something like this
> dput(tes)
structure(list(path = structure(1:6, .Label = c("1893-chicago-fair",
"1960s-afghanistan", "1970s-iran", "1980s-new-york", "20-bizarre-vintage-ads",
"20-bizarre-vintage-ads?utm_campaign=6678&utm_medium=rpages&utm_source=Facebook&utm_term=1e8e704f7b587515c72e6cf7895d55fd110b652c480d98c1440f0a7acba5fb0e",
"20-photos-segregation-america-show-far-weve-come-much-farther-go",
"7-bizarre-cultural-practices", "7-creepy-abandoned-cities?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=4015a7368b588ff09694c96ba720c58f4e7f41a05b4181908b582bae682bef5e",
"a-brief-history-of-hippies", "abandoned-photographs", "albert-kahn",
"amazing-facts", "american-bison-extinction-1800s", "american-english-vs-british-english",
"andre-the-giant-photos", "andre-the-giant-photos??utm_source=facebook&sr_source=lift_facebook&utm_campaign=simplereach_andre-the-giant-photos&utm_medium=social",
"andre-the-giant-photos?grvVariant=d27feef0bfad84d60f335d3a8d241d9e",
"andre-the-giant-photos?grvVariant=d27feef0bfad84d60f335d3a8d241d9e&utm_campaign=gravityus2_142deb68f67fb1a99e7b80250fecc932&utm_medium=referral&utm_source=gravity",
"andre-the-giant-photos?grvVariant=d27feef0bfad84d60f335d3a8d241d9e&utm_campaign=gravityus2_16d63cf07ecf656f602b2d6b209344f7&utm_medium=referral&utm_source=gravity",
"andre-the-giant-photos?grvVariant=d27feef0bfad84d60f335d3a8d241d9e&utm_campaign=gravityus2_713050ecffc51540af02b2246ddf57dd&utm_medium=referral&utm_source=gravity",
"andre-the-giant-photos?grvVariant=d27feef0bfad84d60f335d3a8d241d9e&utm_campaign=gravityus2_c5bb3bc5e9408e0ad52ec9e787bd8654&utm_medium=referral&utm_source=gravity",
"andre-the-giant-photos?sr_source=lift_facebook&utm_campaign=simplereach_andre-the-giant-photos&utm_medium=social&utm_source=facebook",
"astounding-aerial-photography", "astounding-aerial-photography?utm_campaign=7002&utm_medium=rpages&utm_source=Facebook&utm_term=38e9e903d9ba59106d8b4d19be593f3de7ff8b91b12eafa03f2e382228f7b0d1",
"august-landmesser", "ben-franklin", "best-all-that-is-interesting-articles",
"bigfoot-facts", "celebrity-school-photos?grvVariant=82c0ce57a33dfd0209bdefc878665de0&utm_campaign=gravityus2_bc8646aefd6d0a16af03d7caf248f226&utm_medium=referral&utm_source=gravity",
"coolest-mushrooms?utm_campaign=taboolaINTL&utm_medium=referral&utm_source=taboola",
"craziest-ways-drugs-smuggled", "creepy-halloween-costumes",
"danakil-depression", "dark-john-lennon-quotes", "david-bowie-quotes",
"days-in-groundhog-day", "death-photos", "death-photos?utm_campaign=taboolaINTL&utm_medium=referral&utm_source=taboola",
"dr-seuss-quotes", "dream-chaser-spacecraft", "dust-bowl", "earth-two-planets",
"eixample-barcelona", "email-to-space", "evil-science-experiments",
"famous-incest", "famous-spies", "fun-facts-trivia", "golden-age-air-travel?utm_campaign=taboolaINTL&utm_medium=referral&utm_source=taboola",
"gross-foods", "gross-foods?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=106965c54919c24bf37356500ec50f0709b1de621d6950bb4c5d48759ea3677e",
"gross-foods?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=184e0ee39e66af82f9b124b904f6e07964b211e902cb0dc00c28771ff46163a2",
"gross-foods?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=1a0ddea7bed770d5473c45e9f8d81dfd0c4fdd232f207c6b88b53c41ff220c59",
"gross-foods?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=538659f1fc53f28d2c87b93ac73973681c1a46a04954964ab6c52ed1ab09b33a",
"gross-foods?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=87caf0acb91ae2b202f1b00ad9eaad3fef20bbfb23405b9047fb2b5a5462ab9c",
"gross-foods?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=91eae42c8fc9568103d46e0b6b6ec08fc34fd68b2e1918ffe2333ec73035c95a",
"gross-foods?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=a72946874b2003a8e40635c6cf10c851d4e1c0ed45e645d69663214239550602",
"gross-foods?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=ab594f0a1be002c8c3db297e8d33b04678af40e6a6469ac815884ae0a014b3a3",
"gross-foods?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=fb1e333dd58cb7bb9251ec52290aae21771149f73e083440047068a69aaeae09",
"hilarious-insults", "hippie-communes", "hippie-communes?grvVariant=fda07538efb1c25617f7cc3d09c37c79",
"hippie-communes?grvVariant=fda07538efb1c25617f7cc3d09c37c79&utm_campaign=gravityus2_e3cd42d4745768460dab4694a972fd82&utm_medium=referral&utm_source=gravity",
"hippie-communes?pp=0", "history-of-the-vibrator", "history-of-the-vibrator?utm_campaign=whfbpd&utm_medium=social&utm_source=facebook",
"homosexuality-norm", "hunger-games-facts?utm_campaign=6905&utm_medium=rpages&utm_source=Facebook&utm_term=1a9e42ac8abb6ffa90bf0542206505e74d3df12114a2c4445527fb2b88ef8880",
"influential-photographs", "ingeniously-creative-ads", "insane-cults",
"insane-rulers", "inspirational-quotes", "inspirational-quotes?utm_medium=referral&utm_source=taboolainternal",
"interesting-facts-about-the-world", "interesting-quotes", "krokodil",
"making-a-murderer-theories", "maya-angelou-greatest-quotes",
"medieval-torture-devices", "milky-way-colorado", "montreal-metro",
"most-popular-female-names-in-america", "neil-degrasse-tyson-tweets",
"new-york-city-cinemagraphs", "new-york-subways-1980s", "north-korea-photographs",
"north-korea-photographs?utm_campaign=taboolaINTL&utm_medium=referral&utm_source=taboola",
"north-korea-photographs?utm_medium=referral&utm_source=taboolainternal",
"obama-aging", "pablo-escobar", "pablo-escobar??utm_source=facebook",
"pablo-escobar??utm_source=facebook&sr_source=lift_facebook&utm_campaign=simplereach_pablo-escobar&utm_medium=social",
"pablo-escobar?utm_campaign=whfbpd&utm_medium=social&utm_source=facebook",
"panda-facts", "photo-of-the-day-nasa-releases-crystal-clear-image-of-pluto",
"pollution-in-china-photographs", "pollution-in-china-photographs?utm_campaign=3434&utm_medium=rpages&utm_source=Facebook&utm_term=1a0ddea7bed770d5473c45e9f8d81dfd0c4fdd232f207c6b88b53c41ff220c59",
"pollution-in-china-photographs?utm_campaign=3434&utm_medium=rpages&utm_source=Facebook&utm_term=e28a76c1572c36c3a13965e52b4b2ea10518eb9f9c79c4bc84cfb85db16be81e",
"pollution-in-china-photographs?utm_campaign=6806&utm_medium=rpages&utm_source=Facebook&utm_term=1a0ddea7bed770d5473c45e9f8d81dfd0c4fdd232f207c6b88b53c41ff220c59",
"pollution-in-china-photographs?utm_campaign=7048&utm_medium=rpages&utm_source=Facebook&utm_term=2ef4bd7b6cd587601d6eeb35925282a1ed095ebbd4e9e4c0337ef868c7de7a0b",
"pollution-in-china-photographs?utm_campaign=7458&utm_medium=rpages&utm_source=Facebook&utm_term=b9e79a51cd4daf4c3ec02accce75b3e1fc9a22cb3133460c9c32a4f2f9cdb68c",
"powerful-photos-of-2014", "real-x-files", "romanovs-last-days",
"science-of-human-decay", "scientific-discoveries-2015", "scully-effect",
"serial-killer-quotes", "shah-iran", "six-of-the-craziest-gods-in-mythology",
"space-facts", "sun-facts", "sunken-cities", "sunken-ships",
"super-bowl-i-facts", "superhero-movies", "surreal-places", "syrian-civil-war-photographs",
"the-five-greatest-mysteries-of-human-history", "the-four-most-important-battles-of-ancient-greece",
"the-most-colorful-cities-in-the-world", "titanic-facts", "titanic-facts?utm_campaign=6385&utm_medium=rpages&utm_source=Facebook&utm_term=f5905e878216d14e20457ee3265caf6c10022d9545609edfb9a3cb0642c1a310",
"titanic-facts?utm_campaign=6899&utm_medium=rpages&utm_source=Facebook&utm_term=b9e79a51cd4daf4c3ec02accce75b3e1fc9a22cb3133460c9c32a4f2f9cdb68c",
"titanic-facts?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=106965c54919c24bf37356500ec50f0709b1de621d6950bb4c5d48759ea3677e",
"titanic-facts?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=538659f1fc53f28d2c87b93ac73973681c1a46a04954964ab6c52ed1ab09b33a",
"titanic-facts?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=91eae42c8fc9568103d46e0b6b6ec08fc34fd68b2e1918ffe2333ec73035c95a",
"titanic-facts?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=ab594f0a1be002c8c3db297e8d33b04678af40e6a6469ac815884ae0a014b3a3",
"titanic-facts?utm_campaign=6928&utm_medium=rpages&utm_source=Facebook&utm_term=d1864657a05e5b716bb5cb16a29f068a55652eb39fb669ea9c22a6486198f227",
"titanic-facts?utm_campaign=7292&utm_medium=rpages&utm_source=Facebook&utm_term=f5905e878216d14e20457ee3265caf6c10022d9545609edfb9a3cb0642c1a310",
"us-veterans-portraits", "vintage-disneyland", "wall-street-early-20th-century",
"what-we-love-this-week-the-incredible-last-words-of-famous-historical-figures",
"woodstock-photos", "zombie-proof-house"), class = "factor"),
`0089` = c(0, 0, 0, 0, 0, 1), `0096` = c(0, 0, 0, 0, 0, 0
), `02` = c(0, 0, 0, 0, 0, 0), `0215` = c(0, 0, 0, 0, 0,
0), `0225` = c(0, 0, 0, 0, 0, 0), `0252` = c(0, 0, 0, 0,
0, 0), `0271` = c(0, 0, 0, 0, 0, 0), `0272` = c(0, 0, 0,
0, 0, 0), `03` = c(0, 0, 0, 0, 1, 1)), .Names = c("path",
"0089", "0096", "02", "0215", "0225", "0252", "0271", "0272",
"03"), row.names = c(NA, 6L), class = "data.frame")
and I need to apply the min(x,1) function such that this function scan each value in the dataframe (except first column which is not numeric) and return the min(x,1). that way I have only zero's and one's.
I have tried:
f <- function(x) min(1,x)
res1<-do.call(f,tes[,2:ncol(tes)])
but that does not output the right result.
Any help aapreciated
We can use pmin
tes[,-1] <- pmin(1, as.matrix(tes[,-1]))
Or if we need only binary values
tes[,-1] <- +(!!tes[,-1])