Sample implementation - the HouseRobot problem

For an overview of this problem see here,
and for a detailed explanation of the state-space in this problem see here.


defineState()        
{
 cf.alloc(8);   

   cf[1] = 10;          cf[2] = 2; 
// dirt                 Boolean full 0..1

   cf[3] = 10;  
// plug                 

   cf[4] = 10;
// wall

   cf[5] = 10;          cf[6] = 3;
// human                classification 0..2

   cf[7] = 10;          cf[8] = 2;
// smoke                Boolean wallinway 0..1          
}



defineAction()       
{
 df.alloc(1);   

   df[1] = 9;
// actions 0..8
}

Various Agents that have reward functions in this world:


class Adirt : public Agent
{
public:
 float reward ( state x, state y ) 
 {
  if ( (x[1]!=8) && (y[1]==8) && ! y[2] ) return r[1];
					     	else return 0; 
 }
};  
			// rewarded for picking up dirt (if not full)


class Aplug : public Agent
{
public:
 float reward ( state x, state y )
 {
  if ( (x[3]!=8) && (y[3]==8) ) return r[1];
				 else return 0;
 }
};  	
			// rewarded for arriving at plug

// etc.

The HouseRobot is a Creature in this world, containing Agents like the above:


class HouseRobot : public Creature
{
               observe();
               execute(action);
};


HouseRobot :: observe()
{
 s[1] = house.directionDirt();
 s[2] = full;
 s[3] = house.directionPlug();
 s[4] = house.directionWall();
 s[5] = house.directionHuman();         
 s[6] = classification;
 s[7] = house.directionSmoke();
 s[8] = house.wallinway;
}

HouseRobot :: execute ( action a )
{
 house.move ( a[1] );
}


HouseRobot :: multiple ( int mode, long int NOSTEPS )
// interact with the world multiple times
{
 house.randomise();
 for ( long int step=1; step<=NOSTEPS; step++ )
 {
  interact ( mode );
 }       
}

The main() function:


// interact with the world many times to learn,
// then exploit

main()                                      
{
 creature.resetQ();
 creature.multiple ( _learnQ, CHILDHOODSTEPS );
 creature.multiple ( _exploit,   TESTSTEPS );
}