From 92a744d2481c9a6511f4db685e2fc4d115d3e688 Mon Sep 17 00:00:00 2001 From: MichaelKonobeev Date: Wed, 11 Sep 2019 04:02:45 +0500 Subject: [PATCH] Fix formulas in PPO hw (#293) --- week09_policy_II/ppo.ipynb | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/week09_policy_II/ppo.ipynb b/week09_policy_II/ppo.ipynb index 396e678cf..7e3bc7bb6 100644 --- a/week09_policy_II/ppo.ipynb +++ b/week09_policy_II/ppo.ipynb @@ -400,27 +400,27 @@ "modifies the typical policy gradient loss in the following way:\n", "\n", "$$\n", - "L_{\\pi} = \\frac{1}{T-1}\\sum_{l=0}^{T-1}\n", + "L_{\\pi} = \\frac{1}{T}\\sum_{l=0}^{T-1}\n", "\\frac{\\pi_\\theta(a_{t+l}|s_{t+l})}{\\pi_\\theta^{\\text{old}}(a_{t+l}|s_{t+l})}\n", "A^{\\mathrm{GAE}(\\gamma,\\lambda)}_{t+l}\\\\\n", - "L_{\\pi}^{\\text{clipped}} = \\frac{1}{T-1}\\sum_{l=0}^{T-1}\\mathrm{clip}\\left(\n", + "L_{\\pi}^{\\text{clipped}} = \\frac{1}{T}\\sum_{l=0}^{T-1}\\mathrm{clip}\\left(\n", "\\frac{\\pi_\\theta(a_{t+l}|s_{t+l})}{\\pi_{\\theta^{\\text{old}}}(a_{t+l}|s_{t+l})}\n", "\\cdot A^{\\mathrm{GAE(\\gamma, \\lambda)}}_{t+l},\n", "1 - \\text{cliprange}, 1 + \\text{cliprange}\\right)\\\\\n", - "L_{\\text{policy}} = \\max\\left(L_\\pi, L_{\\pi}^{\\text{clipped}}\\right).\n", + "L_{\\text{policy}} = -\\min\\left(L_\\pi, L_{\\pi}^{\\text{clipped}}\\right).\n", "$$\n", "\n", "Additionally, the value loss is modified in the following way:\n", "\n", "$$\n", - "L_V = \\frac{1}{T-1}\\sum_{l=0}^{T-1}(V_\\theta(s_{t+l}) - \\hat{V}(s_{t+l}))^2\\\\\n", - "L_{V}^{\\text{clipped}} = \\frac{1}{T-1}\\sum_{l=0}^{T-1}\n", + "L_V = \\frac{1}{T}\\sum_{l=0}^{T-1}(V_\\theta(s_{t+l}) - \\hat{V}(s_{t+l}))^2\\\\\n", + "L_{V}^{\\text{clipped}} = \\frac{1}{T}\\sum_{l=0}^{T-1}\n", "V_{\\theta^{\\text{old}}}(s_{t+l}) +\n", "\\text{clip}\\left(\n", "V_\\theta(s_{t+l}) - V_{\\theta^\\text{old}}(s_{t+l}),\n", "-\\text{cliprange}, \\text{cliprange}\n", "\\right)\\\\\n", - "L_{\\text{value}} = \\max\\left(L_V, L_V^{\\text{clipped}}\\right).\n", + "L_{\\text{value}} = -\\min\\left(L_V, L_V^{\\text{clipped}}\\right).\n", "$$" ] },