### by Chuck Anderson, for CS545 ### http://www.cs.colostate.edu/~anderson/cs545 ### You may use, but please credit the source. source("gradientDescents.R") source("mlUtilities.R") source("drawNNet.R") makeNN <- function(X,T,nh, lambda=0, xPrecision=1e-8,fPrecision=1e-8, nIterations=10000) { force(X); force(T); force(nh) # may not need all of these standardizeF <- makeStandardizeF(X) X <- standardizeF(X) ni <- ncol(X) no <- ncol(T) V <- matrix(0.1*(runif((ni+1)*nh)-0.5), ni+1,nh) W <- matrix(0.1*(runif((nh+1)*no)-0.5), nh+1,no) X1 <- cbind(1,X) pack <- function(v,w) { matrix(c(v,w)) } unpack <- function(allw) { list(V = matrix(allw[1:((ni+1)*nh)],ni+1,nh), W = matrix(allw[-(1:((ni+1)*nh))],nh+1,no)) } errorTrace <- NULL nTrace <- 0 sqErrorF <- function(weights) { r <- unpack(weights) V <- r[[1]] W <- r[[2]] Z <- tanh(X1 %*% V) Y <- cbind(1,Z) %*% W Vnotfirst <- matrix(V[-1,,drop=FALSE]) sqerror <- mean((Y - T)^2) + lambda * t(Vnotfirst) %*% Vnotfirst 0.5*sqerror } gradF <- function(weights) { r <- unpack(weights) V <- r[[1]] W <- r[[2]] Z <- tanh(X1 %*% V) Y <- cbind(1,Z) %*% W error <- Y - T NK <- nrow(X1) * ncol(T) dV <- t(X1) %*% (error %*% t(W[-1,,drop=FALSE]) * (1-Z^2)) / NK + lambda * rbind(0,V[-1,,drop=FALSE]) dW <- t(cbind(1,Z)) %*% error / NK pack(dV,dW) } scgresult <- scg(pack(V,W),sqErrorF,gradF, xPrecision = xPrecision, fPrecision = fPrecision, nIterations = nIterations) cat("SCG stopped due to limit on ",scgresult$reason,"\n") r <- unpack(scgresult$x) V <- r[[1]] W <- r[[2]] list(standardizeF=standardizeF, V=V, W=W, lambda=lambda) } useNN <- function(nnet,X) { X <- nnet$standardizeF(X) X1 <- cbind(1,X) Z <- tanh(X1 %*% nnet$V) Y <- cbind(1,Z) %*% nnet$W Y }