Basic neural network


// input I[i] = any real numbers ("doubles" in C++)
// y[j]
// network output y[k] = sigmoid continuous 0 to 1
// correct output O[k] = continuous 0 to 1


// assumes throughout that all i are linked to all j, and that all j are linked to all k
// if want some NOT to be connected, will need to introduce:
//   Boolean connected [ TOTAL ] [ TOTAL ];
// initialise it, and then keep checking:
//   if (connected[i][j])
// don't really need to do this,
// since we can LEARN a weight of 0 on this link




double sigmoid ( double x ) 
{
 return 1.0 / (1 + exp(-x)); 
}



const int TOTAL = NOINPUT+NOHIDDEN+NOOUTPUT;

// units all unique ids - so no ambiguity about which we refer to:

const int loi = 0;
const int hii = NOINPUT-1;
const int loj = NOINPUT;
const int hij = NOINPUT+NOHIDDEN-1;
const int lok = NOINPUT+NOHIDDEN;
const int hik = NOINPUT+NOHIDDEN+NOOUTPUT-1;

#define for_i 	for ( i=loi; i<=hii; i++ )
#define for_j 	for ( j=loj; j<=hij; j++ )
#define for_k 	for ( k=lok; k<=hik; k++ )





class NeuralNetwork
{
 int i,j,k;

 double		I [ TOTAL ];		
 double		y [ TOTAL ];		
 double		O [ TOTAL ];	
 double 	w [ TOTAL ] [ TOTAL ];		// w[i][j] 
 double 	wt [ TOTAL ];			// bias weights wt[i]
		
 double		dx [ TOTAL ];			// dE/dx[i] 
 double		dy [ TOTAL ];			// dE/dy[i] 
};

How Input is passed forward through the network:



NeuralNetwork :: forwardpass()
{
 double 	x;				// temporary variable - x[i]

//----- forwardpass I[i] -> y[j] ------------------------------------------------
 for_j
 {
  x = 0;
  for_i		
   x = x + ( I[i] * w[i][j] );	
  y[j] = sigmoid ( x - wt[j] );
 }

//----- forwardpass y[j] -> y[k] ------------------------------------------------
 for_k
 {
  x = 0;
  for_j		
   x = x + ( y[j] * w[j][k] );	
  y[k] = sigmoid ( x - wt[k] );
 }
}



NeuralNetwork :: report ( ostream& stream )		// report on the forwardpass we just did
{
 stream << "I[i] ";
 for_i
  { sprintf ( buf, "%.2f", I[i] ); stream << buf << " "; }
 stream << "\n";

 stream << "y[j] ";
 for_j
  { sprintf ( buf, "%.2f", y[j] ); stream << buf << " "; }
 stream << "\n";

 stream << "y[k] ";
 for_k
  { sprintf ( buf, "%.2f", y[k] ); stream << buf << " "; }
 stream << "\n";

 stream << "O[k] ";
 for_k
  { sprintf ( buf, "%.2f", O[k] ); stream << buf << " "; }
 stream << "\n";

 double E = 0;
 for_k
  E = E + double_square(y[k] - O[k]);
 E = E/2;
 sprintf ( buf, "%.3f", E );
 stream << "E " << buf << "\n";
}




NeuralNetwork :: print ( ostream& stream )
// graphic drawing of network, with bias/thresholds bracketed
{
// many ways of doing this
}

Initialisation:


// going to do w++ and w--
// so might think should start with all w=0
// (all y[k]=0.5 - halfway between possibles 0 and 1)
// in fact, if all w same they tend to march in step together
// need *asymmetry* if want them to specialise (form a representation scheme)
// best to start with diverse w
//
// also, large positive or negative w -> slow learning
// so start with small absolute w -> fast learning


double initw()
{
 return float_randomAtoB ( -C, C );
}



NeuralNetwork :: init()
{
 visits = 0;

 for_i
  for_j
   w[i][j] = initw();

 for_j
  for_k
   w[j][k] = initw();

 for_j
  wt[j] = initw();

 for_k
  wt[k] = initw();
}

How Error is back-propagated through the network:



NeuralNetwork :: backpropagate()
{
 double 	dw;  				// temporary variable - dE/dw[i][j]



//----- backpropagate O[k] -> dy[k] -> dx[k] -> w[j][k],wt[k] ---------------------------------
 for_k
 {
  dy[k] = y[k] - O[k];
  dx[k] = ( dy[k] ) * y[k] * (1-y[k]);
 }

//----- backpropagate dx[k],w[j][k] -> dy[j] -> dx[j] -> w[i][j],wt[j] ------------------------
//----- use OLD w values here (that's what the equations refer to) .. -------------------------
 for_j
 {
  double t = 0;
  for_k
   t = t + ( dx[k] * w[j][k] );
  dy[j] = t;
  dx[j] = ( dy[j] ) * y[j] * (1-y[j]);
 }

//----- .. do all w changes together at end ---------------------------------------------------
 for_j
  for_k 
  {
   dw = dx[k] * y[j];  		
   w[j][k] = w[j][k] - ( RATE * dw );
  }

 for_i
  for_j
  {
   dw = dx[j] * I[i];  		
   w[i][j] = w[i][j] - ( RATE * dw );
  }

 for_k 
 {
  dw = dx[k] * (-1);  		
  wt[k] = wt[k] - ( RATE * dw );
 }

 for_j
 {
  dw = dx[j] * (-1);  		
  wt[j] = wt[j] - ( RATE * dw );
 }
}

Ways of using the Network:



NeuralNetwork :: learn ( int CEILING )
{
 for ( int c=1; c<=CEILING; c++ )
 {
  newIO();

   // new I/O pair
   // put I into I[i]
   // put O into O[k]

  forwardpass();
  backpropagate();
 }
}



NeuralNetwork :: exploit()
{
 for ( int c=1; c<=30; c++ )		
 {
  newIO();
  forwardpass();
  reportIO ( cout  );  
 }
}



NeuralNetwork net;