School of Computing. Dublin City University.
Online coding site: Ancient Brain
coders JavaScript worlds
defineState() { cf.alloc(8); cf[1] = 10; cf[2] = 2; // dirt Boolean full 0..1 cf[3] = 10; // plug cf[4] = 10; // wall cf[5] = 10; cf[6] = 3; // human classification 0..2 cf[7] = 10; cf[8] = 2; // smoke Boolean wallinway 0..1 } defineAction() { df.alloc(1); df[1] = 9; // actions 0..8 }
class Adirt : public Agent { public: float reward ( state x, state y ) { if ( (x[1]!=8) && (y[1]==8) && ! y[2] ) return r[1]; else return 0; } }; // rewarded for picking up dirt (if not full) class Aplug : public Agent { public: float reward ( state x, state y ) { if ( (x[3]!=8) && (y[3]==8) ) return r[1]; else return 0; } }; // rewarded for arriving at plug // etc.
class HouseRobot : public Creature { observe(); execute(action); }; HouseRobot :: observe() { s[1] = house.directionDirt(); s[2] = full; s[3] = house.directionPlug(); s[4] = house.directionWall(); s[5] = house.directionHuman(); s[6] = classification; s[7] = house.directionSmoke(); s[8] = house.wallinway; } HouseRobot :: execute ( action a ) { house.move ( a[1] ); } HouseRobot :: multiple ( int mode, long int NOSTEPS ) // interact with the world multiple times { house.randomise(); for ( long int step=1; step<=NOSTEPS; step++ ) { interact ( mode ); } }
// interact with the world many times to learn, // then exploit main() { creature.resetQ(); creature.multiple ( _learnQ, CHILDHOODSTEPS ); creature.multiple ( _exploit, TESTSTEPS ); }