defineState()
{
cf.alloc(8);
cf[1] = 10; cf[2] = 2;
// dirt Boolean full 0..1
cf[3] = 10;
// plug
cf[4] = 10;
// wall
cf[5] = 10; cf[6] = 3;
// human classification 0..2
cf[7] = 10; cf[8] = 2;
// smoke Boolean wallinway 0..1
}
defineAction()
{
df.alloc(1);
df[1] = 9;
// actions 0..8
}
class Adirt : public Agent
{
public:
float reward ( state x, state y )
{
if ( (x[1]!=8) && (y[1]==8) && ! y[2] ) return r[1];
else return 0;
}
};
// rewarded for picking up dirt (if not full)
class Aplug : public Agent
{
public:
float reward ( state x, state y )
{
if ( (x[3]!=8) && (y[3]==8) ) return r[1];
else return 0;
}
};
// rewarded for arriving at plug
// etc.
class HouseRobot : public Creature
{
observe();
execute(action);
};
HouseRobot :: observe()
{
s[1] = house.directionDirt();
s[2] = full;
s[3] = house.directionPlug();
s[4] = house.directionWall();
s[5] = house.directionHuman();
s[6] = classification;
s[7] = house.directionSmoke();
s[8] = house.wallinway;
}
HouseRobot :: execute ( action a )
{
house.move ( a[1] );
}
HouseRobot :: multiple ( int mode, long int NOSTEPS )
// interact with the world multiple times
{
house.randomise();
for ( long int step=1; step<=NOSTEPS; step++ )
{
interact ( mode );
}
}
// interact with the world many times to learn,
// then exploit
main()
{
creature.resetQ();
creature.multiple ( _learnQ, CHILDHOODSTEPS );
creature.multiple ( _exploit, TESTSTEPS );
}