loop for binom.test() in r - r

I have a dataset of successes, probabilities, and sample sizes that I am running binomial tests on.
Here is a sample of the data (note that the actual dataset has me run >100 binomial tests):
km n_1 prey_pred p0_prey_pred
<fct> <dbl> <int> <dbl>
80 93 12 0.119
81 1541 103 0.0793
83 316 5 0.0364
84 721 44 0.0796
89 866 58 0.131
I normally run this (example for first row):
n=93
p0=0.119
successes=12
binom.test(obs.successes, n, p0, "two.sided")
> Exact binomial test
data: 12 and 93
number of successes = 12, number of trials = 93, p-value = 0.74822
alternative hypothesis: true probability of success is not equal to 0.119
95 percent confidence interval:
0.068487201 0.214548325
sample estimates:
probability of success
0.12903226
Is there a way to systematically have it run multiple binomial tests on each row of data, and then storing all the output (p-value, confidence intervals, probability of success) as separate columns?
I've tried the solution proposed here, but I am clearly m

Using apply.
res <- t(`colnames<-`(apply(dat, 1, FUN=function(x) {
rr <- binom.test(x[3], x[2], x[4], "two.sided")
with(rr, c(x, "2.5%"=conf.int[1], estimate=unname(estimate),
"97.5%"=conf.int[2], p.value=unname(p.value)))
}), dat$km))
res
# km n_1 prey_pred p0_prey_pred 2.5% estimate 97.5% p.value
# 80 80 93 12 0.1190 0.068487201 0.12903226 0.21454832 7.482160e-01
# 81 81 1541 103 0.0793 0.054881013 0.06683971 0.08047927 7.307921e-02
# 83 83 316 5 0.0364 0.005157062 0.01582278 0.03653685 4.960168e-02
# 84 84 721 44 0.0796 0.044688325 0.06102635 0.08106220 7.311463e-02
# 89 89 866 58 0.1310 0.051245893 0.06697460 0.08572304 1.656621e-09
Edit
If you have multiple column sets, in wide format (and for some reason want to stay there)
dat2 <- `colnames<-`(cbind(dat, dat[-1]), c("km", "n_1.1", "prey_pred.1", "p0_prey_pred.1",
"n_1.2", "prey_pred.2", "p0_prey_pred.2"))
dat2[1:3,]
# km n_1.1 prey_pred.1 p0_prey_pred.1 n_1.2 prey_pred.2 p0_prey_pred.2
# 1 80 93 12 0.1190 93 12 0.1190
# 2 81 1541 103 0.0793 1541 103 0.0793
# 3 83 316 5 0.0364 316 5 0.0364
you may do:
res2 <- t(`colnames<-`(apply(dat2, 1, FUN=function(x) {
rr1 <- binom.test(x[3], x[2], x[4], "two.sided")
rr2 <- binom.test(x[6], x[5], x[7], "two.sided")
rrr1 <- with(rr1, c("2.5%.1"=conf.int[1], estimate.1=unname(estimate),
"97.5%.1"=conf.int[2], p.value.1=unname(p.value)))
rrr2 <- with(rr2, c("2.5%.1"=conf.int[1], estimate.1=unname(estimate),
"97.5%.1"=conf.int[2], p.value.1=unname(p.value)))
c(x, rrr1, rrr2)
}), dat2$km))
res2
# km n_1.1 prey_pred.1 p0_prey_pred.1 n_1.2 prey_pred.2 p0_prey_pred.2 2.5%.1
# 80 80 93 12 0.1190 93 12 0.1190 0.068487201
# 81 81 1541 103 0.0793 1541 103 0.0793 0.054881013
# 83 83 316 5 0.0364 316 5 0.0364 0.005157062
# 84 84 721 44 0.0796 721 44 0.0796 0.044688325
# 89 89 866 58 0.1310 866 58 0.1310 0.051245893
# estimate.1 97.5%.1 p.value.1 2.5%.1 estimate.1 97.5%.1 p.value.1
# 80 0.12903226 0.21454832 7.482160e-01 0.068487201 0.12903226 0.21454832 7.482160e-01
# 81 0.06683971 0.08047927 7.307921e-02 0.054881013 0.06683971 0.08047927 7.307921e-02
# 83 0.01582278 0.03653685 4.960168e-02 0.005157062 0.01582278 0.03653685 4.960168e-02
# 84 0.06102635 0.08106220 7.311463e-02 0.044688325 0.06102635 0.08106220 7.311463e-02
# 89 0.06697460 0.08572304 1.656621e-09 0.051245893 0.06697460 0.08572304 1.656621e-09
One could code this more nested, but I recommend to keep things easy so later others understand better what's going on, and probably including oneself.
Data:
dat <- read.table(text="km n_1 prey_pred p0_prey_pred
80 93 12 0.119
81 1541 103 0.0793
83 316 5 0.0364
84 721 44 0.0796
89 866 58 0.131 ", header=TRUE)

You can define a function for this as suggested in the comments:
my_binom <- function(x, n, p){
res <- binom.test(x, n, p)
out <- data.frame(res$p.value, res$conf.int[1], res$conf.int[2], res$estimate)
names(out) <- c("p", "lower_ci", "upper_ci", "p_success")
rownames(out) <- NULL
return(out)
}
Then you can apply it for each row
do.call("rbind.data.frame", apply(df, 1, function(row_i){
my_binom(x= row_i["prey_pred"], n= row_i["n_1"], p=
row_i["p0_prey_pred"])
}))

Related

Using Linear Regression and Model Selection Techniques to Predict Y based on Xs in R

I attempted to utilize the principles of linear regression and feature selection to predict a target variable (Y) based on a set of predictor variables (X1, X2, X3, X4, X5, X6, X7, and X8). I started by implementing a full model, which included all predictor variables, and then used stepwise regression to select the most relevant variables for my model through the use of backward, forward, and both selection methods. I then compared the performance of my model using AIC, BIC, and root mean squared error (RMSE) to determine the best model for my data. Finally, I used this best model to predict the value of Y for a specific set of predictor variable values and compared it to the actual value to assess the accuracy of my model. However, I encountered a problem in my data where the value of Y in the 39th semester was missing, so I couldn't evaluate the prediction results.
#Jeu de donnée : Classeur2.xlsx
setwd("D:/third year/alaoui")
# load
library(readxl)
data2 <- read_excel("D:/third year/alaoui/tpnote/Classeur2.xlsx")
data <- data2[-c(39),]
View(data)
# Analyse descriptive
summary(data)
str(data)
# analyse de correlation
#install.packages("psych")
library(psych)
# check if all values r numeric if not convert em
num_cols <- sapply(data, is.numeric)
data[, !num_cols] <- lapply(data[, !num_cols], as.numeric)
#
matrice_correlation <- cor(data[,c("Y","X1","X2","X4","X5...5","X5...6","X6","X7","X8")])
KMO(matrice_correlation)
cortest.bartlett(matrice_correlation, n = nrow(data))
# Analyse en composantes principales
library("FactoMineR")
library("factoextra")
library("corrplot")
p=PCA(data,graph=FALSE)
p
pca=PCA(data,ncp=2)
print(pca)
eig.val <- get_eigenvalue(pca)
eig.val
fviz_eig(pca)
fviz_pca_var(pca, col.var = "contrib", gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"))
# Régression linéaire
model <- lm(Y ~ X1 + X2 + X4 + X5...5 + X5...6 + X6 + X7 + X8, data = data)
summary(model)
#Vérification des hypothèses de la régression linéaire
#1. Linearité
par(mfrow = c(2, 2))
plot(model)
#2. Homoscédasticité
library(car)
ncvTest(model)
#3. Normalité des résidus
library(lmtest)
library(tseries)
residuals <- resid(model)
qqnorm(residuals)
qqline(residuals)
shapiro.test(residuals)
#4. Indépendance des résidus
plot(residuals ~ fitted(model))
durbinWatsonTest(model)
#Sélection de variables
# Fit the full model
full_model <- lm(Y ~ X1 + X2 + X4 + X5...5 + X5...6 + X6 + X7 + X8, data = data)
# Fit the null model (constant only)
null_model <- lm(Y ~ 1, data = data)
# Perform backward stepwise selection
backward_model <- step(full_model, direction = "backward")
# Perform forward stepwise selection
forward_model <- step(null_model, scope = list(lower = null_model, upper = full_model), direction = "forward")
# Perform both stepwise selection
both_model <- step(null_model, scope = list(upper = full_model), direction = "both")
# Compare AIC, BIC and RMSE for each model
AIC_full <- AIC(full_model)
AIC_backward <- AIC(backward_model)
AIC_forward <- AIC(forward_model)
AIC_both <- AIC(both_model)
BIC_full <- BIC(full_model)
BIC_backward <- BIC(backward_model)
BIC_forward <- BIC(forward_model)
BIC_both <- BIC(both_model)
RMSE_full <- sqrt(mean((resid(full_model))^2))
RMSE_backward <- sqrt(mean((resid(backward_model))^2))
RMSE_forward <- sqrt(mean((resid(forward_model))^2))
RMSE_both <- sqrt(mean((resid(both_model))^2))
#Print the model selection criteria for each model
cat("Full model:")
cat("\tAIC:", AIC_full, "\tBIC:", BIC_full, "\tRMSE:", RMSE_full, "\n")
cat("Backward model:")
cat("\tAIC:", AIC_backward, "\tBIC:", BIC_backward, "\tRMSE:", RMSE_backward, "\n")
cat("Forward model:")
cat("\tAIC:", AIC_forward, "\tBIC:", BIC_forward, "\tRMSE:", RMSE_forward, "\n")
cat("Both model:")
cat("\tAIC:", AIC_both, "\tBIC:", BIC_both, "\tRMSE:", RMSE_both, "\n")
#Select the model with the lowest AIC, BIC, and RMSE
model_names <- c("Full Model", "Backward Model", "Forward Model", "Both Model")
best_model <- model_names[which.min(c(AIC_full, AIC_backward, AIC_forward, AIC_both))]
print(best_model)
# predict the value of Y in the 39th semester
predicted_Y <- predict(backward_model, newdata = data.frame(X1 = 500, X2 = 100, X4 = 83, X5...5 = 30, X5...6= 50, X6 = 90, X7 = 300, X8 = 200))
print(predicted_Y)
# to make sure that its correct
#Calculate mean squared error
MSE <- mean((predicted_Y - data$Y[39])^2)
#Calculate root mean squared error
RMSE <- sqrt(MSE)
#Calculate R-squared value
R_squared <- summary(backward_model)$r.squared
#Print the results
print(paste("Predicted value of Y:", predicted_Y))
print(paste("Mean Squared Error:", MSE))
print(paste("Root Mean Squared Error:", RMSE))
print(paste("R-Squared value:", R_squared))
#Compare the predicted value with the actual value
print(paste("Actual value of Y:", data$Y[39]))
print(paste("Difference:", abs(predicted_Y - data$Y[39])))
#Plot the model
par(xpd = TRUE)
plot(backward_model,which=1)
abline(backward_model,col="blue")
#Plot the residuals
plot(backward_model, which=2)
#Normality test on residuals
shapiro.test(residuals(backward_model))
#Homoscedasticity test on residuals
ncvTest(backward_model)
#Linearity test on residuals
dwtest(backward_model)
this is my file.csv
X4 X5 X5 X6 X7 X8 Y
56 12 50 77 229 98 5540
59 9 17 89 177 225 5439
57 29 89 51 166 263 4290
58 13 107 40 258 321 5502
59 13 143 52 209 407 4872
60 11 61 21 180 247 4708
60 25 -30 40 213 328 4627
60 21 -45 32 201 298 4110
63 8 -28 12 176 218 4123
62 11 76 68 175 410 4842
65 22 144 52 253 93 5741
65 24 113 77 208 307 5094
64 14 128 96 195 107 5383
66 15 10 48 154 305 4888
67 22 -25 27 181 60 4033
67 23 117 73 220 239 4942
66 13 120 62 235 141 5313
68 8 122 25 258 291 5140
69 27 71 74 196 414 5397
71 18 4 63 279 206 5149
69 8 47 29 207 80 5151
70 10 8 91 213 429 4989
73 27 128 74 296 273 5927
73 16 -50 16 245 309 4704
73 32 100 43 276 280 5366
75 20 -40 41 211 315 4630
73 15 68 93 283 212 5712
74 11 88 83 218 118 5095
74 27 27 75 307 345 6124
77 20 59 88 211 141 4787
79 35 142 74 270 83 5036
77 23 126 21 328 398 5288
78 36 30 26 258 124 4647
78 22 18 95 233 118 5316
81 20 42 93 324 161 6180
80 16 -22 50 267 405 4801
81 35 148 83 257 111 5512
82 27 -18 91 267 170 5272
83 30 50 90 300 200 .

Writing a function to compare differences of a series of numeric variables

I am working on a problem set and absolutely cannot figure this one out. I think I've fried my brain to the point where it doesn't even make sense anymore.
Here is a look at the data ...
sex age chol tg ht wt sbp dbp vldl hdl ldl bmi
<chr> <int> <int> <int> <dbl> <dbl> <int> <int> <int> <int> <int> <dbl>
1 M 60 137 50 68.2 112. 110 70 10 53 74 2.40
2 M 26 154 202 82.8 185. 88 64 34 31 92 2.70
3 M 33 198 108 64.2 147 120 80 22 34 132 3.56
4 F 27 154 47 63.2 129 110 76 9 57 88 3.22
5 M 36 212 79 67.5 176. 130 100 16 37 159 3.87
6 F 31 197 90 64.5 121 122 78 18 58 111 2.91
7 M 28 178 163 66.5 167 118 68 19 30 135 3.78
8 F 28 146 60 63 105. 120 80 12 46 88 2.64
9 F 25 231 165 64 126 130 72 23 70 137 3.08
10 M 22 163 30 68.8 173 112 70 6 50 107 3.66
# … with 182 more rows
I must write a function, myTtest, to perform the following task:
Perform a two-sample t-tests to compare the differences of a series of numeric variables between each level of a classification variable
The first argument, dat, is a data frame
The second argument, classVar, is a character vector of length 1. It is the name of the classification variable, such as 'sex.'
The third argument, numVar, is a character vector that contains the name of the numeric variables, such as c("age", "chol", "tg"). This means I need to perform three t-tests to compare the difference of those between males and females.
The function should return a data frame with the following variables: Varname, F.mean, M.mean, t (for t-statistics), df (for degrees of freedom), and p (for p-value).
I should be able to run this ...
myTtest(dat = chol, classVar = "sex", numVar = c("age", "chol", "tg")
... and then get the data frame to appear.
Any help is greatly appreciated. I am pulling my hair out over this one! As well, as noted in my comment below, this has to be done without Tidyverse ... which is why I'm having so much trouble to begin with.
The intuition for this solution is that you can loop over your dependent variables, and call t.test() in each loop. Then save the results from each DV and stack them together in one big data frame.
I'll leave out some bits for you to fill in, but here's the gist:
First, some example data:
set.seed(123)
n <- 20
grp <- sample(c("m", "f"), n, replace = TRUE)
df <- data.frame(grp = grp, age = rnorm(n), chol = rnorm(n), tg = rnorm(n))
df
grp age chol tg
1 m 1.2240818 0.42646422 0.25331851
2 m 0.3598138 -0.29507148 -0.02854676
3 m 0.4007715 0.89512566 -0.04287046
4 f 0.1106827 0.87813349 1.36860228
5 m -0.5558411 0.82158108 -0.22577099
6 f 1.7869131 0.68864025 1.51647060
7 f 0.4978505 0.55391765 -1.54875280
8 f -1.9666172 -0.06191171 0.58461375
9 m 0.7013559 -0.30596266 0.12385424
10 m -0.4727914 -0.38047100 0.21594157
Now make a container that each of the model outputs will go into:
fits_df <- data.frame()
Loop over each DV and append the model output to fits_df each time with rbind:
for (dv in c("age", "chol", "tg")) {
frml <- as.formula(paste0(dv, " ~ grp")) # make a model formula: dv ~ grp
fit <- t.test(frml, two.sided = TRUE, data = df) # perform the t-test
# hint: use str(fit) to figure out how to pull out each value you care about
fit_df <- data.frame(
dv = col,
f_mean = xxx,
m_mean = xxx,
t = xxx,
df = xxx,
p = xxx
)
fits_df <- rbind(fits_df, fit_df)
}
Your output will look like this:
fits_df
dv f_mean m_mean t df p
1 age -0.18558068 -0.04446755 -0.297 15.679 0.7704954
2 chol 0.07731514 0.22158672 -0.375 17.828 0.7119400
3 tg 0.09349567 0.23693052 -0.345 14.284 0.7352112
One note: When you're pulling out values from fit, you may get odd row names in your output data frame. This is due to the names property of the various fit attributes. You can get rid of these by using as.numeric() or as.character() wrappers around the values you pull from fit (for example, fit$statistic can be cleaned up with as.character(round(fit$statistic, 3))).

R data.table divide set of columns and flag using any

I am working on a data set which is large and having many columns. I am using data.table to speed up the calculations. However at certain points I am not sure how to go about and convert my data.table back to data.frame and do the calculation. This slows up the process. It would help a lot to have suggestions on how I can write the below in data.table. Below is a snap of my code on a dummy data -
library(data.table)
#### set the seed value
set.seed(9901)
#### create the sample variables for creating the data
p01 <- sample(1:100,1000,replace = T)
p02 <- sample(1:100,1000,replace = T)
p03 <- sample(1:100,1000,replace = T)
p04 <- sample(1:100,1000,replace = T)
p05 <- sample(1:100,1000,replace = T)
p06 <- sample(1:100,1000,replace = T)
p07 <- sample(1:100,1000,replace = T)
#### create the data.table
data <- data.table(cbind(p01,p02,p03,p04,p05,p06,p07))
###user input for last column
lcol <- 6
###calculate start column as last - 3
scol <- lcol-3
###calculate average for scol:lcol
data <- data[,avg:= apply(.SD,1,mean,na.rm=T),.SDcols=scol:lcol]
###converting to data.frame since do not know the solution in data.table
data <- as.data.frame(data)
###calculate the trend in percentage
data$t01 <- data[,lcol-00]/data[,"avg"]-1
data$t02 <- data[,lcol-01]/data[,"avg"]-1
data$t03 <- data[,lcol-02]/data[,"avg"]-1
data$t04 <- data[,lcol-03]/data[,"avg"]-1
data$t05 <- data[,lcol-04]/data[,"avg"]-1
###converting back to data.table
data <- as.data.table(data)
###calculate the min and max for the trend
data1 <- data[,`:=` (trend_min = apply(.SD,1,min,na.rm=T),
trend_max = apply(.SD,1,max,na.rm=T)),.SDcols=c(scol:lcol)]
###calculate flag if any of t04 OR t05 is an outlier for min and max values. This would be many columns in actual data
data1$flag1 <- ifelse(data1$t04 < data1$trend_min | data1$t04 > data1$trend_max,1,0)
data1$flag2 <- ifelse(data1$t05 < data1$trend_min | data1$t05 > data1$trend_max,1,0)
data1$flag <- ifelse(data1$flag1 == 1 | data1$flag2 == 1,1,0)
So basically, how can I -
calculate the percentages based on user input of column index. Note it is not simple divide but percentage
How can I create the flag variable....I think I need to use any function but not sure how....
Some steps can be made more efficient, i.e. instead of using the apply with MARGIN = 1, the mean, min, max can be replaced with rowMeans, pmin, pmax
library(data.table)
data[ , avg:= rowMeans(.SD, na.rm = TRUE) ,.SDcols=scol:lcol]
data[, sprintf('t%02d', 1:5) := lapply(.SD, function(x) x/avg -1),
.SDcol = patterns("^p0[1-5]")]
data[,`:=` (trend_min = do.call(pmin, c(.SD,na.rm=TRUE)),
trend_max = do.call(pmax, c(.SD,na.rm=TRUE)) ),.SDcols=c(scol:lcol)]
data
# p01 p02 p03 p04 p05 p06 p07 avg t01 t02 t03 t04 t05 trend_min trend_max
# 1: 35 53 22 82 100 59 69 65.75 -0.46768061 -0.19391635 -0.6653992 0.24714829 0.5209125 22 100
# 2: 78 75 15 65 70 69 66 54.75 0.42465753 0.36986301 -0.7260274 0.18721461 0.2785388 15 70
# 3: 15 45 27 61 63 75 99 56.50 -0.73451327 -0.20353982 -0.5221239 0.07964602 0.1150442 27 75
# 4: 41 80 13 22 63 84 17 45.50 -0.09890110 0.75824176 -0.7142857 -0.51648352 0.3846154 13 84
# 5: 53 9 75 47 25 75 66 55.50 -0.04504505 -0.83783784 0.3513514 -0.15315315 -0.5495495 25 75
# ---
# 996: 33 75 9 61 74 55 57 49.75 -0.33668342 0.50753769 -0.8190955 0.22613065 0.4874372 9 74
# 997: 24 68 74 11 43 75 37 50.75 -0.52709360 0.33990148 0.4581281 -0.78325123 -0.1527094 11 75
# 998: 62 78 82 97 56 50 74 71.25 -0.12982456 0.09473684 0.1508772 0.36140351 -0.2140351 50 97
# 999: 70 88 93 4 39 75 93 52.75 0.32701422 0.66824645 0.7630332 -0.92417062 -0.2606635 4 93
#1000: 20 50 99 94 62 66 98 80.25 -0.75077882 -0.37694704 0.2336449 0.17133956 -0.2274143 62 99
and then create the 'flag'
data[, flag := +(Reduce(`|`, lapply(.SD, function(x)
x < trend_min| x > trend_max))), .SDcols = t04:t05]

How to resample and remodel n times by vectorization?

here's my for loop version of doing resample and remodel,
B <- 999
n <- nrow(butterfly)
estMat <- matrix(NA, B+1, 2)
estMat[B+1,] <- model$coef
for (i in 1:B) {
resample <- butterfly[sample(1:n, n, replace = TRUE),]
re.model <- lm(Hk ~ inv.alt, resample)
estMat[i,] <- re.model$coef
}
I tried to avoid for loop,
B <- 999
n <- nrow(butterfly)
resample <- replicate(B, butterfly[sample(1:n, replace = TRUE),], simplify = FALSE)
re.model <- lapply(resample, lm, formula = Hk ~ inv.alt)
re.model.coef <- sapply(re.model,coef)
estMat <- cbind(re.model.coef, model$coef)
It worked but didn't improve efficiency. Is there any approach I can do vectorization?
Sorry, not quite familiar with StackOverflow. Here's the dataset butterfly.
colony alt precip max.temp min.temp Hk
pd+ss 0.5 58 97 16 98
sb 0.8 20 92 32 36
wsb 0.57 28 98 26 72
jrc+jrh 0.55 28 98 26 67
sj 0.38 15 99 28 82
cr 0.93 21 99 28 72
mi 0.48 24 101 27 65
uo+lo 0.63 10 101 27 1
dp 1.5 19 99 23 40
pz 1.75 22 101 27 39
mc 2 58 100 18 9
hh 4.2 36 95 13 19
if 2.5 34 102 16 42
af 2 21 105 20 37
sl 6.5 40 83 0 16
gh 7.85 42 84 5 4
ep 8.95 57 79 -7 1
gl 10.5 50 81 -12 4
(Assuming butterfly$inv.alt <- 1/butterfly$alt)
You get the error because resample is not a list of resampled data.frames, which you can obtain with:
resample <- replicate(B, butterfly[sample(1:n, replace = TRUE),], simplify = FALSE)
The the following should work:
re.model <- lapply(resample, lm, formula = Hk ~ inv.alt)
To extract coefficients from a list of models, re.model$coef does work. The correct path to coefficients are: re.model[[1]]$coef, re.model[[2]]$coef, .... You can get all of them with the following code:
re.model.coef <- sapply(re.model, coef)
Then you can combined it with the observed coefficients:
estMat <- cbind(re.model.coef, model$coef)
In fact, you can put all of them into replicate:
re.model.coef <- replicate(B, {
bf.rs <- butterfly[sample(1:n, replace = TRUE),]
coef(lm(formula = Hk ~ inv.alt, data = bf.rs))
})
estMat <- cbind(re.model.coef, model$coef)

Loop Linear Regression

As a begginer in R i have a, probably, simple question.
I have a linear regression with this specification:
X1 = X1_t-h + X2_t-h
h for is equal to 1,2,3,4,5:
For example, when h=1 i run this code:
Modelo11 <- dynlm(X1 ~ L(X1,1) + L(X2, 1)-1, data = GDP)
Its a simple regression.
I want to implement a function that gives me the five linear regressions (h=1,2,3,4 and 5) with and without HAC heteroscedasticity estimation:
I did this, and didnt work:
for(h in 1:5){
Modelo1[h] <- dynlm(GDPTrimestralemT ~ L(SpreademT,h) + L(GDPTrimestralemT, h)-1, data = MatrizDadosUS)
coeftest(Modelo1[h], df = Inf, vcov = parzenHAC)
return(list(summary(Modelo1[h])))
}
One of the error message is:
number of items to replace is not a multiple of replacement length
This is my data.frame:
GDP <- data.frame(data )
GDP
X1 X2
1 0.542952690 0.226341364
2 0.102328393 0.743360185
3 0.166345969 0.186533485
4 1.406733422 1.392420181
5 -0.469811005 -0.114609464
6 -0.509268267 0.687555461
7 1.470439930 0.298655018
8 1.046456428 -1.056387597
9 -0.492462197 -0.530284962
10 -0.516065519 0.645957530
11 0.624638996 1.044731264
12 0.213616470 -1.652979785
13 0.669747432 1.398602289
14 0.552089131 -0.821013792
15 0.452715216 1.420094663
16 -0.892063248 -1.436600779
17 1.429284965 0.559738610
18 0.853740565 -0.898976767
19 0.741864168 1.352012831
20 0.171494650 1.704764705
21 0.422326351 -0.267064235
22 -1.261643503 -2.090694608
23 -1.321086283 -0.273954212
24 0.365226000 1.965167113
25 -0.080888690 -0.594498893
26 -0.183293801 -0.483053404
27 -1.033792032 0.586491772
28 0.718322432 1.776210145
29 -2.822693790 -0.731509917
30 -1.251740437 -1.918124078
31 1.184256949 -0.016548037
32 2.255202675 0.303438286
33 -0.930446147 0.803126180
34 -1.691383225 -0.157839283
35 -1.081643279 -0.006652717
36 1.034162006 -1.970063305
37 -0.716827488 0.306792930
38 0.098471514 0.338333164
39 0.343536547 0.389775011
40 1.442117465 -0.668885360
41 0.095131066 -0.298356861
42 0.222524607 0.291485267
43 -0.499969717 1.308312472
44 0.588162304 0.026539575
45 0.581215173 0.167710855
46 0.629343124 -0.052835206
47 0.811618963 0.716913172
48 1.463610069 -0.356369304
49 -2.000576321 1.226446201
50 1.278233553 0.313606888
51 -0.700373666 0.770273988
52 -1.206455648 0.344628878
53 0.024602262 1.001621886
54 0.858933385 -0.865771777
55 -1.592291995 -0.384908852
56 -0.833758365 -1.184682199
57 -0.281305858 2.070391729
58 -0.122848757 -0.308397782
59 -0.661013984 1.590741535
60 1.887869805 -1.240283364
61 -0.313677463 -1.393252994
62 1.142864110 -1.150916732
63 -0.633380499 -0.223923970
64 -0.158729527 -1.245647224
65 0.928619010 -1.050636078
66 0.424317087 0.593892028
67 1.108704956 -1.792833100
68 -1.338231248 1.138684394
69 -0.647492569 0.181495183
70 0.295906675 -0.101823172
71 -0.079827607 0.825158278
72 0.050353111 -0.448453121
73 0.129068772 0.205619797
74 -0.221450137 0.051349511
75 -1.300967949 1.639063824
76 -0.861963677 1.273104220
77 -1.691001610 0.746514122
78 0.365888734 -0.055308006
79 1.297349754 1.146102001
80 -0.652382297 -1.095031447
81 0.165682952 -0.012926971
82 0.127996446 0.510673745
83 0.338743162 -3.141650682
84 -0.266916587 -2.483389321
85 0.148135154 -1.239997153
86 1.256591385 0.051984536
87 -0.646281986 0.468210275
88 0.180472423 0.393014848
89 0.231892902 -0.545305005
90 -0.709986273 0.104969765
91 1.231712844 -1.703489840
92 0.435378714 0.876505107
93 -1.880394798 -0.885893722
94 1.083580732 0.117560662
95 -0.499072654 -1.039222894
96 1.850756855 -1.308752222
97 1.653952857 0.440405804
98 -1.057618294 -1.611779530
99 -0.021821282 -0.807071503
100 0.682923562 -2.358596342
101 -1.132293845 -1.488806929
102 0.319237353 0.706203968
103 -2.393105781 -1.562111727
104 0.188653972 -0.637073832
105 0.667003685 0.047694037
106 -0.534018861 1.366826933
107 -2.240330371 -0.071797320
108 -0.220633546 1.612879694
109 -0.022442941 1.172582601
110 -1.542418139 0.635161458
111 -0.684128812 -0.334973482
112 0.688849615 0.056557966
113 0.848602803 0.785297518
114 -0.874157558 -0.434518305
115 -0.404999060 -0.078893114
116 0.735896917 1.637873669
117 -0.174398836 0.542952690
118 0.222418628 0.102328393
119 0.419461884 0.166345969
120 -0.042602368 1.406733422
121 2.135670836 -0.469811005
122 1.197644287 -0.509268267
123 0.395951293 1.470439930
124 0.141327444 1.046456428
125 0.691575897 -0.492462197
126 -0.490708151 -0.516065519
127 -0.358903359 0.624638996
128 -0.227550909 0.213616470
129 -0.766692832 0.669747432
130 -0.001690915 0.552089131
131 -1.786701123 0.452715216
132 -1.251495762 -0.892063248
133 1.123462446 1.429284965
134 0.237862653 0.853740565
Thanks.
Your variable Modelo1 is a vector which cannot store lm objects. When Modelo1 is a list it should work.
library(dynlm)
df<-data.frame(rnorm(50),rnorm(50))
names(df)<-c("a","b")
c<-list()
for(h in 1:5){
c[[h]] <- dynlm(a ~ L(a,h) + L(b, h)-1, data = df)
}
To get the summary you have to access the single list elements. For example:
summary(c[[1]])
*edit in response to Richard Scriven comment
The most efficent way to to get all summaries would be:
lapply(c, summary)
This applies the summary function to each element of the list and returns a list with the results.

Resources