-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstart_training.py
146 lines (146 loc) · 5.34 KB
/
start_training.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#!/usr/bin/env python\n",
"\n",
"'''\n",
" Training code made by Ricardo Tellez <[email protected]>\n",
" Based on many other examples around Internet\n",
" Visit our website at www.theconstruct.ai\n",
"'''\n",
"import gym\n",
"import time\n",
"import numpy\n",
"import random\n",
"import time\n",
"import qlearn\n",
"from gym import wrappers\n",
"\n",
"# ROS packages required\n",
"import rospy\n",
"import rospkg\n",
"\n",
"# import our training environment\n",
"import codrone_env\n",
"\n",
"\n",
"if __name__ == '__main__':\n",
" \n",
" rospy.init_node('drone_gym', anonymous=True)\n",
"\n",
" # Create the Gym environment\n",
" env = gym.make('CodroneEnv-v0')\n",
" rospy.loginfo ( \"Gym environment done\")\n",
" \n",
" # Set the logging system\n",
" rospack = rospkg.RosPack()\n",
" pkg_path = rospack.get_path('codrone_training')\n",
" outdir = pkg_path + '/training_results'\n",
" env = wrappers.Monitor(env, outdir, force=True) \n",
" rospy.loginfo ( \"Monitor Wrapper started\")\n",
"\n",
" last_time_steps = numpy.ndarray(0)\n",
"\n",
" # Loads parameters from the ROS param server\n",
" # Parameters are stored in a yaml file inside the config directory\n",
" # They are loaded at runtime by the launch file\n",
" Alpha = rospy.get_param(\"/alpha\")\n",
" Epsilon = rospy.get_param(\"/epsilon\")\n",
" Gamma = rospy.get_param(\"/gamma\")\n",
" epsilon_discount = rospy.get_param(\"/epsilon_discount\")\n",
" nepisodes = rospy.get_param(\"/nepisodes\")\n",
" nsteps = rospy.get_param(\"/nsteps\")\n",
"\n",
" # Initialises the algorithm that we are going to use for learning\n",
" qlearn = qlearn.QLearn(actions=range(env.action_space.n),\n",
" alpha=Alpha, gamma=Gamma, epsilon=Epsilon)\n",
" initial_epsilon = qlearn.epsilon\n",
"\n",
" start_time = time.time()\n",
" highest_reward = 0\n",
"\n",
" # Starts the main training loop: the one about the episodes to do\n",
" for x in range(nepisodes):\n",
" rospy.loginfo (\"STARTING Episode #\"+str(x))\n",
" \n",
" cumulated_reward = 0 \n",
" done = False\n",
" if qlearn.epsilon > 0.05:\n",
" qlearn.epsilon *= epsilon_discount\n",
" \n",
" # Initialize the environment and get first state of the robot\n",
" observation = env.reset()\n",
" state = ''.join(map(str, observation))\n",
" \n",
" # Show on screen the actual situation of the robot\n",
" env.render()\n",
" \n",
" # for each episode, we test the robot for nsteps\n",
" for i in range(nsteps):\n",
"\n",
" # Pick an action based on the current state\n",
" action = qlearn.chooseAction(state)\n",
" \n",
" # Execute the action in the environment and get feedback\n",
" observation, reward, done, info = env.step(action)\n",
" cumulated_reward += reward\n",
" if highest_reward < cumulated_reward:\n",
" highest_reward = cumulated_reward\n",
"\n",
" nextState = ''.join(map(str, observation))\n",
"\n",
" # Make the algorithm learn based on the results\n",
" qlearn.learn(state, action, reward, nextState)\n",
"\n",
" if not(done):\n",
" state = nextState\n",
" else:\n",
" rospy.loginfo (\"DONE\")\n",
" last_time_steps = numpy.append(last_time_steps, [int(i + 1)])\n",
" break \n",
"\n",
" m, s = divmod(int(time.time() - start_time), 60)\n",
" h, m = divmod(m, 60)\n",
" rospy.loginfo ( (\"EP: \"+str(x+1)+\" - [alpha: \"+str(round(qlearn.alpha,2))+\" - gamma: \"+str(round(qlearn.gamma,2))+\" - epsilon: \"+str(round(qlearn.epsilon,2))+\"] - Reward: \"+str(cumulated_reward)+\" Time: %d:%02d:%02d\" % (h, m, s)))\n",
"\n",
" \n",
" rospy.loginfo ( (\"\\n|\"+str(nepisodes)+\"|\"+str(qlearn.alpha)+\"|\"+str(qlearn.gamma)+\"|\"+str(initial_epsilon)+\"*\"+str(epsilon_discount)+\"|\"+str(highest_reward)+\"| PICTURE |\"))\n",
"\n",
" l = last_time_steps.tolist()\n",
" l.sort()\n",
"\n",
" #print(\"Parameters: a=\"+str)\n",
" rospy.loginfo(\"Overall score: {:0.2f}\".format(last_time_steps.mean()))\n",
" rospy.loginfo(\"Best 100 score: {:0.2f}\".format(reduce(lambda x, y: x + y, l[-100:]) / len(l[-100:])))\n",
"\n",
" env.close()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}