SLIDE 1
Outline
- Problem statement
- Option Critic
- Double Actor Critic
DAC: The Double Actor-Critic Architecture for Learning Options - - PowerPoint PPT Presentation
DAC: The Double Actor-Critic Architecture for Learning Options NeurIPS 2019 Shangtong Zhang, Shimon Whiteson Presenter: Ehsan Mehralian March 17, 2020 Outline Problem statement Option Critic Double Actor Critic Problem statement
3
4
5
θ
θ
∞
t=1
dπ(s) =
∞
X
t=0
γtPr(st = s|s0, π)
<latexit sha1_base64="FD9wRK489dXj4FzHPCVtNLhkGI=">ADcXicbVLbtNAFJ04PEp4pYUNQqCrRhFOMZFdQLCJVEBIbJBSIG2lTGKNJ+NkVL+YGVdErtf8Hzt+g0/wNhxm5D0SpbOnPs69/p6ScClsu3fNaN+7fqNm1u3Grfv3L13v7m9cyTjVFA2oHEQixOPSBbwiA0UVwE7SQjoRewY+/0Q+E/PmNC8j6puYJG4VkGnGfU6I05W7XfrY/A2bfU34GOPtqvbO+mNIiHQv6pnx2XsK+a5uyAxaekjAkgPNGyd8vAc9IGKQ/LDzbCaMUVyLGYxmNp7yXTgijDQLzXzvOxjXnAkBSx5CMkywCzYTj7EMg3dTPWcfJxhHvlqni9kjDP1wsmFq86la1uw2nKk9fmC0AwnRChOAihk5SvPqkPyvISJrp4wnM9ZUnoGdfyE15uZbPG4UWm9jaWZS5Ka+X2UjlU0hX0hSld1ZOX4jtus2V37dJgEzgVaKHK+m7zF57ENA1ZpGhApBw6dqJGWSGPBixv4FSyhNBTMmVDSMSMjnKyovJoa2ZCfix0F+koGRXMzISjkPR1Z/Cm57ivIq3zDVPlvRxmPklSxiC4a+WkAKobi/GDCBaMqmGtAqOBaK9AZ0ctW+kgbegnO+sib4Gi/67zsvj581Tp4X61jCz1Gu8hEDnqDtAn1EcDRGt/jIfGE+Op8bf+qA713UWoUatyHqD/rP78H3fwF8A=</latexit>6
7
∞
t=0
8
∞
t=0
a
9
∞
t=0
a
s0
10
∞
t=0
a
s0
11
12
s,ω
a
µΩ(s, ω|s0, ω0) =
∞
X
t=0
γtP(st = s, ωt = ω|s0, ω0)
<latexit sha1_base64="IltlN+mtDiZKwP18kd38cjPiVy4=">AHLniclVLb9NAEHYLCSW8Chy5jKi2DRUNg/BJVJ5SXBApEDaStnU2ribdFW/8K5bKnd/ERf+ChyQACGu/AzGdpw4aBgKcrs7Hwz3y7Y/dDlwtpml8XFs+crVTPLZ2vXbh46fKV5avXNkUQRw7rOIEbRNt9KpjLfdaRXLpsO4wY9fou2+rvP0n3tw5YJHjgv5VHIet5dOjzAXeoRJd9tfK0/hIexfzAyDJm+aj5mtdNKnRhLYuGseZ2bZNXRjQJEPqeRSIqtVJyHduQtoNCQefW8nRO4xSRWJ9gLQcXfsMWBOGOBK7vX7yTOV+mgMRHAPwkmAnoN1SUi9uxEtiy1kxDuD+SRymnsJPK2pSJbHgvbEK5ZA/5DSLqJCSkeTUhZSWKi1HRVqQpRewi8lDrDLzIE9zuBDnqlyMsdGgcTdWn2Sp8iN1M0JdRhxl9COdGHLlhizRzQJPDbE2twH8io1a3X9BXaVuVXR43jVx/rjtdFCPHap58jmiB/+HyDh3MxKHdpmRq58ALOQPGV3TgMF/wj9q5bK2OfhtomybxTJUkEOxyLQMvWijtLpsTA27A6GFjuphrkHJi7MEmX6wioUVzBjJhpqcks7eoEQDcwytcQMugW3pyWDkjZIt2EYZfaNlP7qKYjNMgCLIplcl0KTQiRZ9GPL+aJgREkVjBqnytAp7tQO8njDsOzk0JZY5jD3qL+3UcByNqN2kpkG8kTGfw5WUpysIl5cvhq5d+r2/Hny5l+cOaNoJ53s3qWj+A8F54/oZELTAcUYMTm7FJFLeAxTqezlFXPNzB4aVgjY0UbPW17+TPZDZzY750XCpE1zJD2UvShyXqRqJBQups0+HrIumTz0mekn2uldQR8uDIf76EzFtGJNQT4sjrY2Q65WJ2L3XO2+vGcvCwl3A/jCXznbzQIHZBpB+O2CXR8yR7hEa1Ik4cgVnj+J5SfzC1FAEa7blk8bmnTXr7tr9jXsr649HcixpN7Sbmq5Z2gNtXutbWO5lQ+VD5VvlW+Vz9Wv1R/VH/moYsLI8x1beqp/voNlCZozA=</latexit>13
s0,ω
14
15
16
17
18
19
20
M L
<latexit sha1_base64="eD0jDCfI5TeKMRUwdHCvFE6w=">ALSHiclVZb+NEFPaW28bLpQuPvBxRbFpGtlcBFopaBeEFJCrtmTXSlOrInjJGbjxHgmLZXrn8cLj7zxG3jhAYR48yMx3ZSb7tYinPmzLl85szM57Ey5Ay/r93t5r7/x5lv3G/qDt959739h+f0/Um8YOBv16uk+cTQoNluAoGLGTL4HmcBCSaLINnkxf8PlnF0FCw/XqKbuKg1FE5qtwFvqEocp72Bg3j8ENftqEF+Cm/faT9g8GbROzDacGbV0L8dSzDGpC252TKCLgZnrTjcPx9AFkszdiPzspS5bBIxkbrJYg4GzhcaEGjPAEVtMJum3GdeRDbg0jCAuDQyuNbOhSzeRl7KunY1TN1zN2FUmYxTdmRniceuqWe1oZpyhPhmCfFTNyYJC8kSOKysMsyTdEGEpzDF4HGYZVCgTXu+MehYOVmjDPlibN6s4yjYiN0q4QOXYGp4lBPdalBXr0dtdRMfc4QrcEy7qTeM7rEqoM1VjMZpg/mJsdtEfqzSkZzvHh/8XCFiKItWlZwlw1QXYdZEhzUFKPwJ6g/tTKCX5p6FtJ+pYJyQy4IEUoWu8mQGuabmTdAUzXDOZQaKFUYJRH8wiGoFhTIaCsru3RgKA/awihbQ4xg2HC0TRlUuEG4LdOsom9x+Id3eJxXHTApgpG8KE4USUzV47F6UtCiwgpaFaGEN/e7swJpb5q2l156DNcSk12exnKTaLJy0l3CpCBzP+5sVK1spkbaqtIbVb3fPynVfODVb0UsHou/EVnyFjPV7tNyifIeiDS0Xj3tIDq9hK9SdxBSEY4gj1b717LRKsHYF7G6CWzsT6nM/2elwcZDf4Be1eDV2DilZL15PE4deRegSbp9HolLIq2eWEIjko8POY8n8rDzNzF3Pin+aS8FK4hNqQgc/AzyfL9ASPv2MTvrKgErRX+KGoPLnI76oeOox7pQsPys37/Eg84a8+ko0SvjqdTrvPRWGs3HvUQE1CecLRpJkfQkqp95UCKEWYlfhSHuZMOECx4J/eMcm8poTJ5yEazbxyFtNARNDSIHAJPwx8PldLcDwl3MbIufVETkKkSMRObWIHGyS7zlX4YoVsy9dFkwVeSWDpz53fUQyzGFXLlt7hTmjR30NwpzJ28tXZSQ9Fz+XJXFA5XOMWnC0YoL1bcjRMaMOjv9H32CPpbFA+tj2qeuL9XBwOptoIJSQdU3r7B1bHEg/cFOxcONDy59Tb/82drv1NFKyYvySUDm0rZqOU72V/GWS6u6FBTPwXZB4MUVyRKCjVHwIZtBEDfbOsEf0im0VY+URJReRO05Dp7hxX1s0N2z25SgNV/GBStfJptlsDWwL8qYRom2KXLKxSIn4SIFfwFwQOL4benjiTYuyXfFM4/6difdj4/+zg8dc5Hfe1D7WPNEOztS+0x1pPO9UGmt/4pfFH46/G3/qv+p/6P/q/0nTvXu7zgb1PNj7D1BExf8=</latexit>22
23
24
25
CartPole = (balance, balance_sparse) Reacher = (easy, hard) Cheetah = (run, backward) Fish = (upright, downleft) Walker1 = (squat, stand) Walker2 = (walk, backward)
26
27