Sample code - How to implement vectors and the state-space as a lookup table

A vector of integers (e.g. for state, or action)


#define forall_i                for ( int i=1; i<=n; i++ )



class intvector                  
{
 int *secret;

public:

 alloc ( int n )
 {
  secret = new int [ n ];		// 0..(n-1) indexing
 }

 int& operator[] ( int i ) 
 // all access to secret is via this (with 1..n indexing).
 // return reference so can do assignments.
 {
  return secret[i-1];
 }

 intvector& operator= ( intvector& other )	
 // define assignment vector = vector
 { 
  forall_i
   (*this)[i] = other[i];
  return *this;         
 } 
};

A vector of real numbers (e.g. to store the Q(x,a) values)


class floatvector               
{
 float *secret;

public:

 alloc ( int n )
 {
  secret = new float [ n ];
 }

 float& operator[] ( int i ) 
 {
  return secret[i-1];
 }

 floatvector& operator= ( floatvector& other )
 { 
  forall_i              
   (*this)[i] = other[i];
  return *this;         
 } 
};

If our vector components take only a finite number of possible values (e.g. state or action vectors), then we can enumerate them:


// an enumerablevector x = (x1..xn) comes with
//  an associated cvector c = (c1..cn) 
//  which defines its limits:
// for each i
//  xi should be in range 0..(ci-1)

// now can work out things such as no. of possible vectors
// and can give them unique ID numbers (for use in lookup table)


typedef intvector cvector;      


class enumerablevector : public intvector
{  
public:
 cvector        c;
 int            no;             // no. of possible vectors
               
 allocvector ( cvector carg )
 {
  c.alloc ( carg.n ); c = carg;
    alloc ( c.n );

 // no. of possible vectors = c1*c2*..*cn 
  no = 1;
  forall_i
   no = no * c[i];                     
 }


 int id()                       // vector2id 
 {                       // vectors have unique IDs  0..(no-1)
  int total = 0;
  int p = 1;
  for ( int i=n; i>=1; i-- )
  {
   int xi = (*this)[i];
   total = total + (xi*p);
   p = p*c[i];
  }
  return total;
 }


 from ( int totalarg )          // id2vector
 {
  int total = totalarg;
  int p = no;
  forall_i
  {
   p = p / c[i];
   (*this)[i] = total / p;
   total = total % p;
  }
 }


 testEnumeration()              // handy routine to test that enumeration scheme works
 {
  for ( int i=0; i<=(no-1); i++ )
  {
   from(i);
   int j = id();
   if ( i != j )
   {
    cout << "Error: enumeration failed \n";
   }
  }
 }
};



typedef enumerablevector state;         
typedef enumerablevector action;   


// global variables cf and df define state and action
// these are defined in the actual problem world:

cvector cf;
cvector df;

The Q(x,a) space is just a vector of real numbers, indexed by (x,a). The enumeration of states and actions allows us access this vector uniquely:


class StateActionSpace : public floatvector 
{
 state xf;                      // these define the space
 action af;

public:
 allocvector ( cvector carg, cvector darg )
 {
  xf.allocvector ( carg );
  af.allocvector ( darg );
  alloc ( xf.no * af.no );
 }

 float& at ( state x, action a )
 {
  int id = ( a.id() * x.no ) + x.id();
  return (*this) [ id+1 ];              
 }
};