Data structures wa y x 1 D ASE System System E C r* O r - - PDF document

data structures
SMART_READER_LITE
LIVE PREVIEW

Data structures wa y x 1 D ASE System System E C r* O r - - PDF document

3/3/2020 Data structures wa y x 1 D ASE System System E C r* O r state D Critic Critic E ACE vector x n R wc Global constants #define MAXINP 256 // Maximum number of input units #define ASE_ETA0 10.0 // ASE default


slide-1
SLIDE 1

3/3/2020 1

2

Data structures

Global constants

#define MAXINP 256 // Maximum number of input units #define ASE_ETA0 10.0 // ASE default learning rate #define ASE_DECAY0 0.85 // ASE default eligibility decay #define ACE_ETA0 0.5 // ACE default learning rate #define ACE_DECAY0 0.4 // ACE default eligibility decay #define GAMMA0 0.95 // default prediction discount

y wa wc r r*

ASE ACE

System System Critic Critic x1 xn state vector

D E C O D E R

3

Global variables

static float wa[MAXINP]; // ASE weights static float wc[MAXINP]; // ACE weights static float x[MAXINP]; // input vector static float eligi[MAXINP]; // ASE eligibility vector static float trace[MAXINP]; // ACE trace vector static float ase_eta; // ASE learning rate static float ase_decay; // ASE eligibility decay static float ace_eta; // ACE learning rate static float ace_deacay; // ACE eligibility decay static float discount; // ACE prediction discount static int inputs; // number of input units static int r; // primary reinforcement static int s; // ACE output (sec. reinf.) static int y; // ASE output

4

Basic functions

init_net init_net ase_output ase_output

box y

frand frand min

r

sign sign

x y

Auxiliary functions update_ase_weights update_ase_weights max clear_traces clear_traces ace_output ace_output

box p

update_ace_weights update_ace_weights ase_trace_decay ase_trace_decay ace_trace_decay ace_trace_decay

n

5

float frand(float *xmin, float *xmax) { float range; range = (xmax ‐ xmin); return (xmin + range*(float)rand()/RAND_MAX); }

Auxiliary functions

int sign(float x) { if (x > 0) return 1; return ‐1; }

6

Initialize network

void init_net(int in) { int i; inputs = in; for (i=0; i<inputs; i++) wa[i] = wc[i] = 0.0; ase_eta = ASE_ETA0; ace_eta = ACE_ETA0; ase_decay = ASE_DECAY0; ace_decay = ACE_DECAY0; discount = GAMMA0; }

slide-2
SLIDE 2

3/3/2020 2

7

float ase_output(int x) { float net int y; net = wa[x] + frand(‐0.5, 0.5); y = sign(net); return y; }

Compute outputs

float ace_output(int x) { return wc[x]; }

8

void update_weights(float r) { int i; for (i=0; i<inputs; i++) { wa[i] += r * ase_eta * eligi[i]; wc[i] += r * ace_eta * trace[i]; } }

Update weights / traces

void update_traces(int box, int y) { eligi[box] += (1.0 ‐ ase_decay) * y; trace[box] += (1.0 ‐ ase_decay); }

9

Trace decay

void clear_traces() { int i; for (i=0; i<inputs; i++) eligi[i] = trace[i] = 0.0; } void decay_traces() { int i; for (i=0; i<inputs; i++) { eligi[i] = ase_decay * eligi[i]; trace[i] = ace_decay * trace[i]; } }

10

Cart-pole model

     

2 2

cos ) ( 3 4 cos ) sin ( sin ) ( L M L M M L M F g M M

p p c p p c

        

p c p

M M L M F x     ) cos sin (

2

         F  

.

x x

.

Mc = cart mass Mp = pole mass L = pole length F = applied force

11

int compute_state(float force, STATE *s) { float ct, st; // sin, cos float x_acc, t_acc; // linear & angular acceleration float dt; // integration step ct = cos(s.theta); st = sin(s.theta); t_acc = <see equation  >; x_acc = <see equation x >; s.pos += s.speed*dt; s.speed += x_acc*dt; s.theta += s.omega*dt; s.omega += t_acc*dt; }

Compute state

.. ..

12

typedef struct { float x; // cart position float v; // cart speed float tetha; // pole angle float

  • mega;

// pole angular velocity } STATE;

System state

x v  

XL

  • XL

VL

  • VL

T1

  • T1

T6

  • T6

W50

  • W50

3 3 6 3 162

slide-3
SLIDE 3

3/3/2020 3

1 2 3 4 5 6 7 8

13

Decode state

x v

  • XL

XL

  • VL

VL

14

Decode state

int decode_state(STATUS s) { int box; if (s.x < ‐XL) box = 0; else if (s.x < XL) box = 1; else box = 2; if (s.v < ‐VL) ; else if (s.v < VL) box += 3; else box += 6;

#define XL 0.8 // ASE default learning rate #define VL 0.5 // ASE default eligibility decay #define T1 0.01745 // PI/180 #define T6 0.10472 // 6*PI/180 #define W50 0.87266 // 50*PI/180

15

Decode state

if (s.theta < ‐T6) ; else if (s.theta < ‐T1) box += 9; else if (s.theta < 0) box += 18; else if (s.theta < T1) box += 27; else if (s.theta < T6) box += 36; else box += 45; if (s.omega < ‐W50) ; else if (s.omega < W50) box += 54; else box += 108; return box; }

16

Decode state

int decode_x(float x) { if (x < ‐XL) return 0; if (x < XL) return 1; else return 2; } int decode_v(float v) { if (v < ‐VL) return 0; if (v < VL) return 1; else return 2; }

A more structured way to determine the box number is to decode each state variable and then combine them:

17

Decode state

int decode_t(float t) { if (t < ‐T6) return 0; if (t < ‐T1) return 1; if (t < 0) return 2; if (t < T1) return 3; if (t < T6) return 4; else return 5; } int decode_w(float w) { if (w < ‐W50) return 0; if (w < W50) return 1; else return 2; }

18

Decode state

The overall box number can be computed as:

#define NBX 3 // number of boxes for position x #define NBV 3 // number of boxes for speed v #define NBT 6 // number of boxes for theta #define NBW 3 // number of boxes for omega

int decode_state(STATUS s) { int box; box = box_x(s.pos) + box_v(s.speed) * NBX + box_t(s.theta) * NBX*NBV + box_w(s.omega) * NBX*BBV*NBT; return box; }

slide-4
SLIDE 4

3/3/2020 4

19

int main() { long duration; // # steps pole balanced long failures; // # failures long total_steps; // total # of steps int y; // ASE output int box; // decoded state region int fail;// failure flag float force; // applied force to the cart STATE s; // system state init_net(NBOXES); clear_traces(); s = set_state(0,0,0,0); box = decode_state(s); duration = 0; total_steps = 0;

Learning cycle

20

while (duration < MAX_ITE) { duration++; y = ase_output(box); update_traces(box, y); force = FORCE*y; fail = compute_state(force, &s); box = decode_state(s); update_weights(box, fail); if (fail) { clear_traces(); s = set_state(0,0,0,0); box = decode_state(s); duration = 0; failures++; } } }

Learning cycle