I've been trying to implement backpropagation in R, but I've been getting some strange results. It appears that after 1000 iterations of backprop, the program predicts 1 for all values. I was hoping it was a problem in the test function, but testing on smaller numbers of iterations shows that 0 is predicted as an output value in some instances. It seems that somewhere in iterating through the dataset, the weight updates tend toward increasing, when they should tend toward reducing error.
I apologize that the code is difficult to read in spots. I'm working on this with a partner and I dislike the way that he names variables. It's also not as fully commented as I'd like. Any help is appreciated
# initialize a global output vector and a global vector of data frames
createNeuralNet <- function(numberOfInputNodes,hiddenLayers,nodesInHiddenLayer){
L <<- initializeWeightDataFrames(numberOfInputNodes,nodesInHiddenLayer,hiddenLayers)
# print(L)
OutputList <<- initializeOutputVectors(hiddenLayers)
}
# creates a list of weight data frames
# each weight data frame uses the row as an index of the "tail" for a connection
# the "head" of the connection (where the arrow points) is in the column index
# the value in the weight data frame is the weight of that connection
# the last row is the weight between the bias and a particular node
initializeWeightDataFrames <- function(numberOfInputNodes, nodesPerHiddenLayer, numberOfHiddenLayers) {
weights <- vector("list", numberOfHiddenLayers + 1)
# this code simply creates empty data frames of the proper size so that they may
first <- read.csv(text=generateColumnNamesCSV(nodesPerHiddenLayer))
middle <- read.csv(text=generateColumnNamesCSV(nodesPerHiddenLayer))
# assume binary classifier, so output layer has 1 node
last <- read.csv(text=generateColumnNamesCSV(1))
first <- assignWeights(first, numberOfInputNodes + 1)
weights[[1]] <- first
# assign random weights to each row
if (numberOfHiddenLayers != 1) {
for (i in 1:numberOfHiddenLayers - 1) {
middle <- assignWeights(middle, nodesPerHiddenLayer + 1)
weights[[i+1]] <- middle
}
}
last <- assignWeights(last, nodesPerHiddenLayer + 1)
weights[[length(weights)]] <- last
return(weights)
}
# generate a comma-separated string of column names c1 thru cn for creating arbitrary size data frame
generateColumnNamesCSV <- function(n) {
namesCSV <- ""
if (n==1) {
return("c1")
}
for (i in 1:(n-1)) {
namesCSV <- paste0(namesCSV, "c", i, ",")
}
namesCSV <- paste0(namesCSV, "c", n)
return(namesCSV)
}
assignWeights <- function(weightDF, numRows) {
modifiedweightDF <- weightDF
for (rowNum in 1:numRows) {
# creates a bunch of random numbers from -1 to 1, used to populate a row
rowVector <- runif(length(weightDF))
for (i in 1:length(rowVector)) {
sign <- (-1)^round(runif(1))
rowVector[i] <- sign * rowVector[i]
}
modifiedweightDF[rowNum,] <- rowVector
}
return(modifiedweightDF)
}
# create an empty list of the right size, will hold vectors of node outputs in the future
initializeOutputVectors <- function(numberOfHiddenLayers) {
numberOfLayers <- numberOfHiddenLayers + 1
outputVectors <- vector("list", numberOfLayers)
return(outputVectors)
}
# this is the main loop that does feed-forward and back prop
trainNeuralNet <- function(trainingData,target,iterations){
count <- 0
# iterations is a constant for how many times the dataset should be iterated through
while(count<iterations){
print(count)
for(row in 1:nrow(trainingData)) { # for each row in the data set
#Feed Forward
# instance is the current row that's being looked at
instance <- trainingData[row,]
# print(instance)
for (l in 1:length(L)) { # for each weight data frame
# w is the current weights
w <- L[[l]]
#print(w)
Output <- rep(NA, length(w))
if (l!=1) {
# x is the values in the previous layer
# can't access the previous layer if you're on the first layer
x <- OutputList[[l-1]]
#print(x)
}
for (j in 1:ncol(w)) { # for each node j in the "head" layer
s <- 0
for (i in 1:(nrow(w)-1)) {
# calculate the weighted sum s of connection weights and node values
# this is used to calculate a node in the next layer
# check the instance if on the first layer
if (l==1) {
# print(i)
# print(instance[1,i])
# i+1 skips over the target column
s <- s + instance[1,i+1]*w[i,j]
# print(s)
# if the layer is 2 or more
}else{
# print(i)
#print(j)
# print(w)
# print(w[i,j])
s <- s + x[i]*w[i,j] # weighted sum
# sigmoid activation function value for node j
}
}
#print(s)
s <- s + w[nrow(w),j] # add weighted bias
# print("s")
# print(s)
# print("sigmoid s")
# print(sigmoid(s))
Output[j] <- sigmoid(s)
}
OutputList[[l]] <- Output
}
# print(OutputList)
# print("w")
# print(L)
# print("BAck prop Time")
#Back Propagation
out <- OutputList[length(OutputList)]
#print(OutputList)
outputError <- rep(NA, length(w))
outputErrorPresent <- rep(NA, length(w))
outputError[1] <- out[[1]]*(1-out[[1]])*(out[[1]]-target[row])
for (h in (length(L)):1) { # for each weight matrix in hidden area h (going backwards)
hiddenOutput <- OutputList[h]
#print("hiddenOutput")
#print(h)
if (row==1||row==2) {
# print("h")
# print(h)
# print("output error Present")
# print(outputErrorPresent)
}
if (h!=(length(L))) {
outputError <- outputErrorPresent
}
w <- L[[h]]
for (j in 1:(nrow(w))) { # for each node j in hidden layer h
#print("length w")
#print(length(w))
if (row==1||row==2) {
# print("j")
# print(j)
}
errSum <- 0
nextLayerNodes <- L[[h]]
# print(nextLayerNodes)
#print(class(nextLayerNodes))
for (k in 1:ncol(nextLayerNodes)) {
errSum <- errSum + outputError[k]*nextLayerNodes[j,k]
}
m <- 0
if (h == 1) {
m <- as.numeric(instance)
m <- m[-1]
} else {
m <- OutputList[h-1][[1]]
}
deltaWeight <- 0
for (k in 1:ncol(nextLayerNodes)) {
hiddenNodeError <- hiddenOutput[[1]][k]*(1- hiddenOutput[[1]][k])*errSum
if (j == nrow(w)) {
deltaWeight <- learningRate*hiddenNodeError
} else {
deltaWeight <- learningRate*hiddenNodeError*m[j]
}
# print(deltaWeight)
w[j,k] <- w[j,k] + deltaWeight
}
if (j != nrow(w)) {
outputErrorPresent[j] <- hiddenNodeError
}
}
L[[h]] <<- w
}
# print(OutputList)
}
count <- count +1
# print(L)
#calculate global error
}
########################repeat
# print("w")
}
sigmoid <- function(s){
sig <- 1/(1+exp(-s))
return(sig)
}
testNeuralNetwork <- function(testingData,testTarget){
correctCount <- 0
# run the same code as feed forward
# this time run it on testing examples and compare the outputs
for(row in 1:nrow(testingData)) { # for each test instance
#Feed Forward
instance <- testingData[row,]
#print(instance)
for (l in 1:length(L)) { # for each layer l
w <- L[[l]]
#print(w)
Output <- rep(NA, length(w))
if (l!=1) {
x <- OutputList[[l-1]]
#print(x)
}
for (j in 1:ncol(w)) { # for each node j in layer l
s <- 0
for (i in 1:(nrow(w)-1)) {
if (l==1) {
# i+1 skips over the target column
s <- s + instance[1,i+1]*w[i,j]
# print(s)
}else{
# print(i)
#print(j)
# print(w)
# print(w[i,j])
s <- s + x[i]*w[i,j] # weighted sum
# sigmoid activation function value for node j
}
}
#print(s)
s <- s + w[nrow(w),j] # add weighted bias
Output[j] <- sigmoid(s)
#print(sigmoid(s))
}
OutputList[[l]] <- Output
}
# print(OutputList)
outputVal <- threshold(OutputList[[length(OutputList)]])
if (outputVal==testTarget[row]) {
print(paste0(" ", outputVal, " Correct!"))
correctCount <- correctCount + 1
}else{
print(paste0(" ", outputVal, " Wrong."))
}
#print()
#print(paste0("s2 ",str))
}
}
# convert real-valued output to a binary classification
threshold <- function(value){
if (value>=0.5) {
return(1)
}else{
return(0)
}
}
# this modifies df by removing 30 random rows
# this means that the same df will be changed permanently, so be careful of that
# it also returns the 30 random rows as a test set
makeTestSet <- function(df, size) {
len <- 1:length(df[,1])
randRows <- sample(len, size, replace=F)
return(randRows)
}
Data <- read.csv(file = "Downloads/numericHouse-votes-84.csv", head = TRUE, sep = ",")
learningRate <<- 0.1
# assume that the first column of the data is the column that is to be predicted
# thus the number of inputs is 1 less than the number of columnns
numberOfInputNodes <- ncol(Data) - 1
randRows <- makeTestSet(Data,30) #change this to 30
testData <- Data[randRows,]
trainingData <- Data[-randRows,]
testTarget <- testData[,1]
#trainingData <- Data[,1:numberOfInputNodes]
trainingTarget <- trainingData[,1]
createNeuralNet(numberOfInputNodes,1,numberOfInputNodes)
iterations <- 100
trainNeuralNet(trainingData,trainingTarget,iterations)
testNeuralNetwork(testData,testTarget)
L
Related
I was trying to maximize my Likelihood with the R package 'optimx'. Here is my code. With the initial value (5,5) and (1,1), I got different Maximized likelihood. I also have tried different method like 'Nelder=Mead', but the estimated log likelihood are different under different methods...
library('optimx')
n=225
X = matrix(runif(225),ncol=1)
e2 = matrix(runif(225,0,2),ncol=1)
set.seed(123)
This is the function to generate some data I will use
get_mls_basis<- function(p){
depth <- ceiling(runif(1)*p)
knot <- matrix(rep(0,depth+1),ncol=1)
lr <- runif(1) > 0.5
x <- matrix(rep(0,n),ncol=1)
not_finished <- 1
while (not_finished == 1) {
data_indx = ceiling(runif(1)*n)
var = matrix(rep(0,depth),ncol=1)
for (j in 1:depth) {
not_ok <- 1
while (not_ok == 1) {
ind <- ceiling(runif(1)*p)
if (!is.element (ind,var[1:j]))
{
var[j] <- ind
not_ok <- 0
}
}
}
x_v <- as.matrix(X[data_indx, var])
knot[1:depth] <- rgamma(depth,1,1)
knot[1:depth] <- knot[1:depth] / sqrt(sum(knot^2))
knot[depth+1] <- -x_v %*% knot[1:depth]
ones <- matrix(rep(1,n),ncol=1)
temp <- as.matrix(cbind(X[,var], ones)) %*% knot
if (lr == 0) {
for (i in 1:n)
{
temp[i] <- max(0,temp[i])
}
}
else {
for (i in 1:n)
{
temp[i] <- min(0,temp[i])
}
}
x <- temp
not_finished <- all(x==0)
}
mx <- mean(x)
stx <- sd(x)
x <- (x-mx)/stx
x
}
This is my log likelihood
Lik1<-function(theta, basis){
theta0=theta[1]
theta1=theta[2]
L=-n/2*log(theta0)-sum(basis/2)*log(theta1)-0.5/theta0*sum(e2/theta1^basis)
return(L)
}
basis1=get_mls_basis(1)
Here I used 5 as initial value
optimx(par=c(5,5), Lik1,
basis=basis1,method='bobyqa',control = list(maximize=TRUE))
I have tried to improve my previous code so that I can incorporate conditional probability.
Source Code
states <- c(1, 2)
alpha <- c(1, 1)/2
mat <- matrix(c(0.5, 0.5,
0, 1), nrow = 2, ncol = 2, byrow = TRUE)
# this function calculates the next state, if present state is given.
# X = present states
# pMat = probability matrix
nextX <- function(X, pMat)
{
#set.seed(1)
probVec <- vector() # initialize vector
if(X == states[1]) # if the present state is 1
{
probVec <- pMat[1,] # take the 1st row
}
if(X==states[2]) # if the prsent state is 2
{
probVec <- pMat[2,] # take the 2nd row
}
return(sample(states, 1, replace=TRUE, prob=probVec)) # calculate the next state
}
# this function simulates 5 steps
steps <- function(alpha1, mat1, n1)
{
vec <- vector(mode="numeric", length = n1+1) # initialize an empty vector
X <- sample(states, 1, replace=TRUE, prob=alpha1) # initial state
vec[1] <- X
for (i in 2:(n1+1))
{
X <- nextX(X, mat1)
vec[i] <- X
}
return (vec)
}
# this function repeats the simulation n1 times.
# steps(alpha1=alpha, mat1=mat, n1=5)
simulate <- function(alpha1, mat1, n1)
{
mattt <- matrix(nrow=n1, ncol=6, byrow=T);
for (i in 1:(n1))
{
temp <- steps(alpha1, mat1, 5)
mattt[i,] <- temp
}
return (mattt)
}
Execution
I created this function so that it can handle any conditional probability:
prob <- function(simMat, fromStep, toStep, fromState, toState)
{
mean(simMat[toStep+1, simMat[fromStep+1, ]==fromState]==toState)
}
sim <- simulate(alpha, mat, 10)
p <- prob(sim, 0,1,1,1) # P(X1=1|X0=1)
p
Output
NaN
Why is this source code giving NaN?
How can I correct it?
I didn't inspect the rest of your code, but it seems that only prob has a mistake; you are mixing up rows with columns and instead it should be
prob <- function(simMat, fromStep, toStep, fromState, toState)
mean(simMat[simMat[, fromStep + 1] == fromState, toStep + 1] == toState)
Then NaN still remains a valid possibility for the following reason. We are looking at a conditional probability P(X1=1|X0=1) which, by definition, is well defined only when P(X0=1)>0. The same holds with sample estimates: if there are no cases where X0=1, then the "denominator" in the mean inside of prob is zero. Thus, it cannot and should not be fixed (i.e., returning 0 in those cases would be wrong).
So I´m trying to run the fuction below hoping to get 224 vectors in the output, but only get one and I have no idea why.
ee <- 0.95
td <- 480
tt <- c(60,10,14,143,60)
tt <- as.data.frame(tt)
r <- vector()
m <- function(d)
{
n <- length(tt)
c <- nrow(d)
for (j in 1:c)
{
for (i in 1:n)
{
r[i] <- tt[i]/(td*ee/d[j,])
}
return(r)
}
#where d is a data frame of 224 obs. of 1 variable
and the output i´m getting is
[[1]]
[1] 1026.3158 171.0526 239.4737 2446.0526 1026.3158
The problem comes from the fact that your function returns only the last r vector that is computed, due to where return is placed within your loop.
One way to do this is to store the results in a list:
r <- vector()
m_bis <- function(d) {
res <- list() # store all the vectors here
n <- length(tt)
c <- nrow(d)
for (j in 1:c) {
for (i in 1:n) {
r[i] <- tt[i] / (td * ee / d[j,])
}
res[j] <- r
}
return(res)
}
That should yield something like this:
m_bis(as.data.frame(mtcars$mpg))
> [[1]]
[1] 2.7631579 0.4605263 0.6447368 6.5855263 2.7631579
...
[[32]]
[1] 2.8157895 0.4692982 0.6570175 6.7109649 2.8157895
outer(as.vector(tt[,1]), as.vector(d[,1]), function(x,y){x*y/(td*ee)})
Use vectorization to accelerate the computation.
Im trying to create my very own first project in R but have hit a roadblock.
I have a data frame such as below where every row represents dataset of a financial option.
type <- c("C", "C")
marketV <- c(1.1166, 1.911)
S <- c(20, 60)
K <- c(20, 56)
T <- c(0.333, 0.5)
df <- data.frame(type, marketV, S, K, T)
I made a user defined function to take this data frame as an input and works great when the data frame is one row long. However, I'm not sure how to have my function iterate through all the data frame rows and produce a result for all of them.
I'm new to R so I'm unsure whether I should be running a 'for' loop around or playing around with lapply, or if theres a simple syntax answer. I simply want the function to take the df as input, but repeat its calculation for n row, and produce n results. Thank you for the help in advance.
My current function code for a df with 1 row below as reference:
This is a corrected version of your program:
df <- data.frame(type=c("C", "C"), marketV=c(1.1166, 1.911), S=c(20, 60), K=c(20, 56), T=c(0.333, 0.5))
IV <- function(df) {
# check if df has more then 1 row:
if (nrow(df)>1) { message("!! nrow(df)>1 !!"); return(NA) }
# Initializing of variables
r <- 0
sigma <- 0.3
sigma_down <- 0.001
sigma_up <- 1
count <- 0
type <- df$type; marketV <- df$marketV; S <- df$S; K <- df$K; T <- df$T
d1 <- (log(S/K) + (sigma^2/2)*T)/(sigma*sqrt(T))
d2 <- (log(S/K) - (sigma^2/2)*T)/(sigma*sqrt(T))
if(type=="C") {
V <- exp(-r*T)*(S*pnorm(d1) - K*pnorm(d2))
} else {
V <- exp(-r*T)*(K*pnorm(-d2) - S*pnorm(-d1)) }
difference <- V - marketV
# Root finding of sigma by Bisection method
while(abs(difference)>0.001 && count<1000) {
if(difference < 0) {
sigma_down <- sigma
sigma <- (sigma_up + sigma)/2
} else {
sigma_up <- sigma
sigma <- (sigma_down + sigma)/2
}
d1 <- (log(S/K) + (sigma^2/2)*T)/(sigma*sqrt(T))
d2 <- d1 - sigma*sqrt(T)
if(type=="C") {
V <- exp(-r*T)*(S*pnorm(d1) - K*pnorm(d2))
} else {
V <- exp(-r*T)*(K*pnorm(-d2) - S*pnorm(-d1)) }
difference <- V - marketV
count <- count + 1
}
if(count == 1000){
return(NA) # If sigma to satisfy Black76 price cannot be found
} else{
return(sigma)
}
}
sapply(split(df, seq(nrow(df))), IV)
The main thing is to run row by row through the dataframe. This is done by
sapply(split(df, seq(nrow(df))), IV)
In your original function are many errors: the biggest is accessing to S, K and so on. You might thinking taking the values from the dataframe df. But in fact you were taking the values from the workspace! I corrected this by redefining:
type <- df$type; marketV <- df$marketV; S <- df$S; K <- df$K; T <- df$T
I inserted a test for the number of rows in df, so you will get:
> IV(df)
!! nrow(df)>1 !!
[1] NA
Here is a cleaned up version of your program:
df <- data.frame(type=c("C", "C"), marketV=c(1.1166, 1.911), S=c(20, 60), K=c(20, 56), T=c(0.333, 0.5))
IV2 <- function(type, marketV, S, K, T) {
r <- 0; sigma <- 0.3
sigma_down <- 0.001; sigma_up <- 1
count <- 0
if(type=="C") {
f.sig <- function(sigma) {
d1 <- (log(S/K) + (sigma^2/2)*T)/(sigma*sqrt(T))
d2 <- d1 - sigma*sqrt(T)
exp(-r*T)*(S*pnorm(d1) - K*pnorm(d2)) - marketV
}
} else {
f.sig <- function(sigma) {
d1 <- (log(S/K) + (sigma^2/2)*T)/(sigma*sqrt(T))
d2 <- d1 - sigma*sqrt(T)
exp(-r*T)*(K*pnorm(-d2) - S*pnorm(-d1)) - marketV
}
}
ifelse(f.sig(sigma_down)*f.sig(sigma_up) < 0, uniroot(f.sig, c(sigma_down,sigma_up))$root, NA) # sigma
}
sapply(split(df, seq(nrow(df))), do.call, what="IV2")
I have a glm based on data A and I'd like to score data B to do validation, but some records in B have missing data.
Instead of these ending up without a score (na.omit) or being removed (na.exclude) I'd like them to end up with an outputted prediction that uses the model to determine a value based only on the data with values.
A reproducible example...
data(mtcars)
model<-glm(mpg~.,data=mtcars)
mtcarsNA<-mtcars
NAins <- NAinsert <- function(df, prop = .1){
n <- nrow(df)
m <- ncol(df)
num.to.na <- ceiling(prop*n*m)
id <- sample(0:(m*n-1), num.to.na, replace = FALSE)
rows <- id %/% m + 1
cols <- id %% m + 1
sapply(seq(num.to.na), function(x){
df[rows[x], cols[x]] <<- NA
}
)
return(df)
}
mtcarsNA<-NAins(mtcarsNA,.4)
mtcarsNA$mpg<-mtcars$mpg
predict(model,newdata=mtcarsNA,type="response")
Where I need the last line to return a result (non-NA) for all records. Can you point me in the direction of the code needed?
Based on the conversation in the comments, you want to replace NA values with zero before predicting. This seems dangerous/dubious to me -- use at your own risk.
naZero <- function(x) { x[is.na(x)] <- 0; x }
mtcarszero <- lapply(mtcarsNA,naZero)
predict(model,newdata=mtcarszero,type="response")
should be what you want.
For categorical variables, if you are using default treatment contrasts, then I think the consistent thing to do is something like this:
naZero <- function(x) { if (is.numeric(x)) {
repVal <- 0
} else {
if (is.factor(x)) {
repVal <- levels(x)[1]
} else stop("uh-oh")
}
x[is.na(x)] <- repVal
x }