3/3/2020 1
2
Data structures
Global constants
#define MAXINP 256 // Maximum number of input units #define ASE_ETA0 10.0 // ASE default learning rate #define ASE_DECAY0 0.85 // ASE default eligibility decay #define ACE_ETA0 0.5 // ACE default learning rate #define ACE_DECAY0 0.4 // ACE default eligibility decay #define GAMMA0 0.95 // default prediction discount
y wa wc r r*
ASE ACE
System System Critic Critic x1 xn state vector
D E C O D E R
3
Global variables
static float wa[MAXINP]; // ASE weights static float wc[MAXINP]; // ACE weights static float x[MAXINP]; // input vector static float eligi[MAXINP]; // ASE eligibility vector static float trace[MAXINP]; // ACE trace vector static float ase_eta; // ASE learning rate static float ase_decay; // ASE eligibility decay static float ace_eta; // ACE learning rate static float ace_deacay; // ACE eligibility decay static float discount; // ACE prediction discount static int inputs; // number of input units static int r; // primary reinforcement static int s; // ACE output (sec. reinf.) static int y; // ASE output
4
Basic functions
init_net init_net ase_output ase_output
box y
frand frand min
r
sign sign
x y
Auxiliary functions update_ase_weights update_ase_weights max clear_traces clear_traces ace_output ace_output
box p
update_ace_weights update_ace_weights ase_trace_decay ase_trace_decay ace_trace_decay ace_trace_decay
n
5
float frand(float *xmin, float *xmax) { float range; range = (xmax ‐ xmin); return (xmin + range*(float)rand()/RAND_MAX); }
Auxiliary functions
int sign(float x) { if (x > 0) return 1; return ‐1; }
6
Initialize network
void init_net(int in) { int i; inputs = in; for (i=0; i<inputs; i++) wa[i] = wc[i] = 0.0; ase_eta = ASE_ETA0; ace_eta = ACE_ETA0; ase_decay = ASE_DECAY0; ace_decay = ACE_DECAY0; discount = GAMMA0; }