Sample code - How to implement vectors and the state-space as a lookup table
A vector of integers (e.g. for state, or action)
#define forall_i for ( int i=1; i<=n; i++ )
class intvector
{
int *secret;
public:
alloc ( int n )
{
secret = new int [ n ]; // 0..(n-1) indexing
}
int& operator[] ( int i )
// all access to secret is via this (with 1..n indexing).
// return reference so can do assignments.
{
return secret[i-1];
}
intvector& operator= ( intvector& other )
// define assignment vector = vector
{
forall_i
(*this)[i] = other[i];
return *this;
}
};
A vector of real numbers (e.g. to store the Q(x,a) values)
class floatvector
{
float *secret;
public:
alloc ( int n )
{
secret = new float [ n ];
}
float& operator[] ( int i )
{
return secret[i-1];
}
floatvector& operator= ( floatvector& other )
{
forall_i
(*this)[i] = other[i];
return *this;
}
};
If our vector components take only a finite number of possible values
(e.g. state or action vectors),
then we can enumerate them:
// an enumerablevector x = (x1..xn) comes with
// an associated cvector c = (c1..cn)
// which defines its limits:
// for each i
// xi should be in range 0..(ci-1)
// now can work out things such as no. of possible vectors
// and can give them unique ID numbers (for use in lookup table)
typedef intvector cvector;
class enumerablevector : public intvector
{
public:
cvector c;
int no; // no. of possible vectors
allocvector ( cvector carg )
{
c.alloc ( carg.n ); c = carg;
alloc ( c.n );
// no. of possible vectors = c1*c2*..*cn
no = 1;
forall_i
no = no * c[i];
}
int id() // vector2id
{ // vectors have unique IDs 0..(no-1)
int total = 0;
int p = 1;
for ( int i=n; i>=1; i-- )
{
int xi = (*this)[i];
total = total + (xi*p);
p = p*c[i];
}
return total;
}
from ( int totalarg ) // id2vector
{
int total = totalarg;
int p = no;
forall_i
{
p = p / c[i];
(*this)[i] = total / p;
total = total % p;
}
}
testEnumeration() // handy routine to test that enumeration scheme works
{
for ( int i=0; i<=(no-1); i++ )
{
from(i);
int j = id();
if ( i != j )
{
cout << "Error: enumeration failed \n";
}
}
}
};
typedef enumerablevector state;
typedef enumerablevector action;
// global variables cf and df define state and action
// these are defined in the actual problem world:
cvector cf;
cvector df;
The Q(x,a) space is just a vector of real numbers, indexed by (x,a).
The enumeration of states and actions allows us access this vector uniquely:
class StateActionSpace : public floatvector
{
state xf; // these define the space
action af;
public:
allocvector ( cvector carg, cvector darg )
{
xf.allocvector ( carg );
af.allocvector ( darg );
alloc ( xf.no * af.no );
}
float& at ( state x, action a )
{
int id = ( a.id() * x.no ) + x.id();
return (*this) [ id+1 ];
}
};