A lot of variables in lm regression and large list - r

My task is to do some researches in dummy variables. Here is a R code:
parameters_estimation2 <- function(n)
{
age <- sample(20:40, n, replace=TRUE)
male <- sample(0:1, n, replace=TRUE)
education <- sample(0:6, n, replace=TRUE)
experience <- floor(rexp(n, 0.2))
for(i in 1:n)
{if(experience[i]>15) {
experience[i] <- floor(rexp(1, 0.2))
if(experience[i]>15) { i <- i-1 }
}}
sqexperience <- experience*experience
e <- rnorm(n, 0, 4)
B0 <- -200; B1 <- 15; B2 <- 100; B3 <-10; B4 <- 5; B5 <-20;
wage <- B0 + B1*age + B2*male + B3*education+ B4*experience+ B5*sqexperience+e
#Dummy making
expe1 <- c(rep(0,n)); expe2 <- c(rep(0,n)); expe3 <- c(rep(0,n)); expe4 <- c(rep(0,n));
expe5 <- c(rep(0,n)); expe6 <- c(rep(0,n)); expe7 <- c(rep(0,n)); expe8 <- c(rep(0,n));
expe9 <- c(rep(0,n)); expe10 <- c(rep(0,n)); expe11 <- c(rep(0,n)); expe12 <- c(rep(0,n));
expe13 <- c(rep(0,n)); expe14 <- c(rep(0,n)); expe15 <- c(rep(0,n));
for(i in 1:n)
{
if(experience[i]==1) { expe1[i] <-1
} else if(experience[i]==2) { expe2[i] <-1
} else if(experience[i]==3) { expe3[i] <-1
} else if(experience[i]==4) { expe4[i] <-1
} else if(experience[i]==5) { expe5[i] <-1
} else if(experience[i]==6) { expe6[i] <-1
} else if(experience[i]==7) { expe7[i] <-1
} else if(experience[i]==8) { expe8[i] <-1
} else if(experience[i]==9) { expe9[i] <-1
} else if(experience[i]==10) { expe10[i] <-1
} else if(experience[i]==11) { expe11[i] <-1
} else if(experience[i]==12) { expe12[i] <-1
} else if(experience[i]==13) { expe13[i] <-1
} else if(experience[i]==14) { expe14[i] <-1
} else if(experience[i]==15) { expe15[i] <-1
}}
regression<-lm(wage~age+male+education+expe1+expe2+expe3+expe4+expe5+expe6+expe7+expe8+expe9+expe10+expe11+expe12+expe13+expe14+expe15)
return(summary(regression)$coefficients[,"Estimate"])
}
times <- 1000
size <- rep(200, times)
koeficientai1 <-mapply(parameters_estimation2, size)
blah <- as.data.table(koeficientai1)
beta0sample200d <- mean(koeficientai1[,"(Intercept)"])
And the problem is that in last line I get:
Error in koeficientai1[, "(Intercept)"] : incorrect number of dimensions
I think the problem is that koeficientai1 is large list. But then I'm trying another lm regression with just 5 variables, the code is working and I get simple data frame.

Try replacing the last line by
beta0sample200d <- mean(sapply(koeficientai1, function(x) x["(Intercept)"]))
koeficientai1 is a list, but you try to access it as a data.frame, hence the error message.
sapply extracts the element named (Intercept) from each list element in koeficientai1(in your case each list element is a named vector) and returns a vector that contains the results.

Related

function generating NA in R

I need help, my function not work correctly when i try make any sum with the results.
have a lot of NA values and I don't know why.
Craps <- function(jogadas){
for (i in 1:jogadas){
comeOut <- sample(1:6,1)+ sample(1:6,1)
if(comeOut %in% c(2,3,12)){
result <- F #False
}else if(comeOut %in% c(7,11)){
}
else{
dados <- sample(1:6,1) + sample(1:6,1)
if(dados == 7){
result <- F
}else {
result <- T
}
while (!(dados %in% c(7,comeOut))){
dados <- sample(1:6,1)+ sample(1:6,1)
}
if(dados == 7)
result <- F
else result <- T
}
print(result)
#probability
prob<-NULL
prob[i] <- result
prob2<-sum(prob)/jogadas
print(prob2)
}
}
Craps(1000)
You put prob=NULL inside your loop, so it will become NULL at each iteration of the loop, just create prob before the loop. Also you forgot one line as noticed in the comments :
Craps <- function(jogadas){
prob<-NULL
for (i in 1:jogadas){
comeOut <- sample(1:6,1)+ sample(1:6,1)
if(comeOut %in% c(2,3,12)){
result <- F #False
}else if(comeOut %in% c(7,11)){
result <- T
}
else{
dados <- sample(1:6,1) + sample(1:6,1)
if(dados == 7){
result <- F
}else {
result <- T
}
while (!(dados %in% c(7,comeOut))){
dados <- sample(1:6,1)+ sample(1:6,1)
}
if(dados == 7)
result <- F
else result <- T
}
print(result)
#probability
prob[i] <- result
prob2<-sum(prob)/jogadas
print(prob2)
}
}

For optimx() with method of bobyqa, how to set the initial value

I was trying to maximize my Likelihood with the R package 'optimx'. Here is my code. With the initial value (5,5) and (1,1), I got different Maximized likelihood. I also have tried different method like 'Nelder=Mead', but the estimated log likelihood are different under different methods...
library('optimx')
n=225
X = matrix(runif(225),ncol=1)
e2 = matrix(runif(225,0,2),ncol=1)
set.seed(123)
This is the function to generate some data I will use
get_mls_basis<- function(p){
depth <- ceiling(runif(1)*p)
knot <- matrix(rep(0,depth+1),ncol=1)
lr <- runif(1) > 0.5
x <- matrix(rep(0,n),ncol=1)
not_finished <- 1
while (not_finished == 1) {
data_indx = ceiling(runif(1)*n)
var = matrix(rep(0,depth),ncol=1)
for (j in 1:depth) {
not_ok <- 1
while (not_ok == 1) {
ind <- ceiling(runif(1)*p)
if (!is.element (ind,var[1:j]))
{
var[j] <- ind
not_ok <- 0
}
}
}
x_v <- as.matrix(X[data_indx, var])
knot[1:depth] <- rgamma(depth,1,1)
knot[1:depth] <- knot[1:depth] / sqrt(sum(knot^2))
knot[depth+1] <- -x_v %*% knot[1:depth]
ones <- matrix(rep(1,n),ncol=1)
temp <- as.matrix(cbind(X[,var], ones)) %*% knot
if (lr == 0) {
for (i in 1:n)
{
temp[i] <- max(0,temp[i])
}
}
else {
for (i in 1:n)
{
temp[i] <- min(0,temp[i])
}
}
x <- temp
not_finished <- all(x==0)
}
mx <- mean(x)
stx <- sd(x)
x <- (x-mx)/stx
x
}
This is my log likelihood
Lik1<-function(theta, basis){
theta0=theta[1]
theta1=theta[2]
L=-n/2*log(theta0)-sum(basis/2)*log(theta1)-0.5/theta0*sum(e2/theta1^basis)
return(L)
}
basis1=get_mls_basis(1)
Here I used 5 as initial value
optimx(par=c(5,5), Lik1,
basis=basis1,method='bobyqa',control = list(maximize=TRUE))

Getting rid of repeated point in plot

I am simulating traders and their actions and am trying to get rid of repeated points in the plot statement. What is the best way to do this? In other words, for ninterval >1 I keep getting a plot where points are repeated.
f1 <- function(n,m,priceinitial,delta,mean, sd, ninterval){
traders <- vector(mode="character", length=n)
traderscurrent <- vector(mode="character", length=n)
price <- vector(mode="numeric")
pricecurrent <- vector(mode="numeric")
for(nint in 1:ninterval)
{
L = floor(rnorm(1,mean,sd))
print(L)
x3 <- runif(2,0,1)
v <- c(0, min(x3), max(x3))
for(i in 1:n)
{
traders[i] <- runif(1,0,1)
if(findInterval(traders[i],v) == sample(c(1,3),1))
{
traders[i] <- "B"
}
else if(findInterval(traders[i],v) == 2)
{
traders[i] <- "N"
}
else {
traders[i] <- "S"
}
}
print(table(traders))
for(step in 1:L)
{
for(i in 1:n)
{
b <- sample(traders[-i], m)
print(b)
table(b)
traderscurrent[i] <- sample(b,1)
}
print(table(traderscurrent))
pricecurrent[step] = priceinitial+length(which(traderscurrent == "B"))*delta-length(which(traderscurrent == "S"))*delta
priceinitial = pricecurrent[step]
traders <- traderscurrent
#print(nint)
#print(step)
}
price <- c(price,pricecurrent)
price <- price[-L]
}
print(price)
plot(price)
}
The call to generate the plot is:
f1(10,2,100,5,10,1,5)
The last three dots don't make sense.
I think this is what you wanted, it's kind of hard to tell though without code comments or more context.
When you did c(price, pricecurrent) you did add a repeated point, for instance in my experiment price got set to 90 on step 1 then pricecurrent was 90 80 on step 2 and the result was 90 90 80.
It looked like you were trying to fix this in the next line, but I'm not sure that it made sense to use -L, which was 9 in my experiment and thus had no effect on the repeated point. I think you want to use step there, but again I have no context so I can't be sure of the use case-specific logic.
f1(10,2,100,5,10,1,5)
f1 <- function(n=10,m=2,priceinitial=100,delta=5,mean=10, sd=1, ninterval=5){
cat("I need to comment my code")
traders <- vector(mode="character", length=n)
traderscurrent <- vector(mode="character", length=n)
price <- vector(mode="numeric")
pricecurrent <- vector(mode="numeric")
for(nint in 1:ninterval)
{
L = floor(rnorm(1,mean,sd))
print(L)
x3 <- runif(2,0,1)
v <- c(0, min(x3), max(x3))
for(i in 1:n)
{
traders[i] <- runif(1,0,1)
if(findInterval(traders[i],v) == sample(c(1,3),1))
{
traders[i] <- "B"
}
else if(findInterval(traders[i],v) == 2)
{
traders[i] <- "N"
}
else {
traders[i] <- "S"
}
}
print(table(traders))
for(step in 1:L)
{
for(i in 1:n)
{
b <- sample(traders[-i], m)
print(b)
table(b)
traderscurrent[i] <- sample(b,1)
}
print(table(traderscurrent))
pricecurrent[step] = priceinitial+length(which(traderscurrent == "B"))*delta-length(which(traderscurrent == "S"))*delta
priceinitial = pricecurrent[step]
traders <- traderscurrent
#print(nint)
#print(step)
}
price <- c(price,pricecurrent)
price <- price[-step]
}
print(price)
plot(price)
}
f()

Function inputting all values into a matrix

I am having trouble with my function. When I call the function, it only seems to have looped through first value in my for loop and does not continue to fill my matrix. Here is the code. The output should be a matrix filled with 1's.
binfunction <- function(y) { #Set up a function that takes a vector input and puts the elements into bins
L <- length(y)
x <- c(0, cumsum(y))
U <- runif(1, min = 0 , max = sum(y))
for(i in 1:L) {
if(x[i] <= U && x[i+1] > U){
return(i)
}
}
}
randomgraph <- function(n, beta) {
mat <- matrix(0,n,n)
mat[1,2] <- 1
mat[2,1] <- 1
for(i in 3:n) { #Loop that fills matrix
degvect <- colSums(mat[ , (1:(i-1))])
degvect <- degvect^(beta)
j <- binfunction(degvect)
mat[i,j] <- 1
mat[j,i] <- 1
return(mat)
}
}

How can I write the return values of a function into a matrix in R?

I want to write the values returned by funcStdErrMle and funcStdErrMome into a matrix to show the results for different values of n and theta. When I try doing this, it shows the following error:
**Error in `[<-`(`*tmp*`, i, j, value = 0.419321467165103) :
subscript out of bounds**
Here is my code:
n <- c(5,10,30,100)
theta <- c(1,2,4)
funcPop_Mean<-function(theta)
{
pop_mean<-mean(runif(5000,0,theta))
return(pop_mean)
}
funcMLE<-function(n,theta)
{
run1 <- runif(n,0,theta)
mle_est <- max(run1)
return (mle_est)
}
funcMOME<-function(n,theta)
{
run2 <- runif(n,0,theta)
avg<-mean(run2)
mome_est <- 2*avg
return (mome_est)
}
funcStdErrMome<-function(n,theta)
{
diff1 <- funcPop_Mean(theta)-funcMOME(n,theta)
se_mome <- mean(diff1**2)
return (se_mome)
}
funcStdErrMle<-function(n,theta)
{
diff2 <- funcPop_Mean(theta)-funcMLE(n,theta)
se_mle <- mean(diff2**2)
return (se_mle)
}
MOME_res <- matrix(nrow=3,ncol=4)
MLE_res <- matrix(nrow=3,ncol=4)
for(i in theta)
{
for(j in n)
{
MOME_res[i,j] <- funcStdErrMome(i,j)
}
}
for(i in theta)
{
for(j in n)
{
MLE_res[i,j] <- funcStdErrMle(i,j)
}
}
Does changing the for loops to the following arrive at what you need?
for(i in 1:length(theta)) {
for(j in 1:length(n)) {
MOME_res[i, j] <- funcStdErrMome(theta[i], n[j])
}
}
for(i in 1:length(theta)) {
for(j in 1:length(n)) {
MLE_res[i, j] <- funcStdErrMle(theta[i], n[j])
}
}

Resources