start_training.py

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#!/usr/bin/env python\n",
    "\n",
    "'''\n",
    "    Training code made by Ricardo Tellez <rtellez@theconstructsim.com>\n",
    "    Based on many other examples around Internet\n",
    "    Visit our website at www.theconstruct.ai\n",
    "'''\n",
    "import gym\n",
    "import time\n",
    "import numpy\n",
    "import random\n",
    "import time\n",
    "import qlearn\n",
    "from gym import wrappers\n",
    "\n",
    "# ROS packages required\n",
    "import rospy\n",
    "import rospkg\n",
    "\n",
    "# import our training environment\n",
    "import codrone_env\n",
    "\n",
    "\n",
    "if __name__ == '__main__':\n",
    "    \n",
    "    rospy.init_node('drone_gym', anonymous=True)\n",
    "\n",
    "    # Create the Gym environment\n",
    "    env = gym.make('CodroneEnv-v0')\n",
    "    rospy.loginfo ( \"Gym environment done\")\n",
    "        \n",
    "    # Set the logging system\n",
    "    rospack = rospkg.RosPack()\n",
    "    pkg_path = rospack.get_path('codrone_training')\n",
    "    outdir = pkg_path + '/training_results'\n",
    "    env = wrappers.Monitor(env, outdir, force=True) \n",
    "    rospy.loginfo ( \"Monitor Wrapper started\")\n",
    "\n",
    "    last_time_steps = numpy.ndarray(0)\n",
    "\n",
    "    # Loads parameters from the ROS param server\n",
    "    # Parameters are stored in a yaml file inside the config directory\n",
    "    # They are loaded at runtime by the launch file\n",
    "    Alpha = rospy.get_param(\"/alpha\")\n",
    "    Epsilon = rospy.get_param(\"/epsilon\")\n",
    "    Gamma = rospy.get_param(\"/gamma\")\n",
    "    epsilon_discount = rospy.get_param(\"/epsilon_discount\")\n",
    "    nepisodes = rospy.get_param(\"/nepisodes\")\n",
    "    nsteps = rospy.get_param(\"/nsteps\")\n",
    "\n",
    "    # Initialises the algorithm that we are going to use for learning\n",
    "    qlearn = qlearn.QLearn(actions=range(env.action_space.n),\n",
    "                    alpha=Alpha, gamma=Gamma, epsilon=Epsilon)\n",
    "    initial_epsilon = qlearn.epsilon\n",
    "\n",
    "    start_time = time.time()\n",
    "    highest_reward = 0\n",
    "\n",
    "    # Starts the main training loop: the one about the episodes to do\n",
    "    for x in range(nepisodes):\n",
    "        rospy.loginfo (\"STARTING Episode #\"+str(x))\n",
    "        \n",
    "        cumulated_reward = 0  \n",
    "        done = False\n",
    "        if qlearn.epsilon > 0.05:\n",
    "            qlearn.epsilon *= epsilon_discount\n",
    "        \n",
    "        # Initialize the environment and get first state of the robot\n",
    "        observation = env.reset()\n",
    "        state = ''.join(map(str, observation))\n",
    "        \n",
    "        # Show on screen the actual situation of the robot\n",
    "        env.render()\n",
    "        \n",
    "        # for each episode, we test the robot for nsteps\n",
    "        for i in range(nsteps):\n",
    "\n",
    "            # Pick an action based on the current state\n",
    "            action = qlearn.chooseAction(state)\n",
    "            \n",
    "            # Execute the action in the environment and get feedback\n",
    "            observation, reward, done, info = env.step(action)\n",
    "            cumulated_reward += reward\n",
    "            if highest_reward < cumulated_reward:\n",
    "                highest_reward = cumulated_reward\n",
    "\n",
    "            nextState = ''.join(map(str, observation))\n",
    "\n",
    "            # Make the algorithm learn based on the results\n",
    "            qlearn.learn(state, action, reward, nextState)\n",
    "\n",
    "            if not(done):\n",
    "                state = nextState\n",
    "            else:\n",
    "                rospy.loginfo (\"DONE\")\n",
    "                last_time_steps = numpy.append(last_time_steps, [int(i + 1)])\n",
    "                break \n",
    "\n",
    "        m, s = divmod(int(time.time() - start_time), 60)\n",
    "        h, m = divmod(m, 60)\n",
    "        rospy.loginfo ( (\"EP: \"+str(x+1)+\" - [alpha: \"+str(round(qlearn.alpha,2))+\" - gamma: \"+str(round(qlearn.gamma,2))+\" - epsilon: \"+str(round(qlearn.epsilon,2))+\"] - Reward: \"+str(cumulated_reward)+\"     Time: %d:%02d:%02d\" % (h, m, s)))\n",
    "\n",
    "    \n",
    "    rospy.loginfo ( (\"\\n|\"+str(nepisodes)+\"|\"+str(qlearn.alpha)+\"|\"+str(qlearn.gamma)+\"|\"+str(initial_epsilon)+\"*\"+str(epsilon_discount)+\"|\"+str(highest_reward)+\"| PICTURE |\"))\n",
    "\n",
    "    l = last_time_steps.tolist()\n",
    "    l.sort()\n",
    "\n",
    "    #print(\"Parameters: a=\"+str)\n",
    "    rospy.loginfo(\"Overall score: {:0.2f}\".format(last_time_steps.mean()))\n",
    "    rospy.loginfo(\"Best 100 score: {:0.2f}\".format(reduce(lambda x, y: x + y, l[-100:]) / len(l[-100:])))\n",
    "\n",
    "    env.close()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}