-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPolicy.py
102 lines (83 loc) · 2.69 KB
/
Policy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import numpy as np
class UniversalPID:
def __init__(self, P, I, D, t, name):
self._P = P
self._I = I
self._D = D
self._t = t
self._tOld = t
self._errorDerivative = 0.0
self._errorAccumulation = 0.0
self._errorOld = 0.0
self._name = name
def signal(self, error, t):
dt = t - self._t
self._t = t
self._errorDerivative = 0.0
if dt > 0:
self._errorDerivative = (error - self._errorOld)/dt
self._errorAccumulation += dt*error
self._errorOld = error
return self._P*error + self._I*self._errorAccumulation + self._D*self._errorDerivative
class Policy_PID:
def __init__(self, p, i, d, goal, t, name):
self._name = name
self._goal = goal
self._pid = UniversalPID(p, i, d, t, name)
def setGoal(self, goal):
self._goal = goal
def getAction(self, state, t):
error = np.array(self._goal) - np.array(state)
signal = self._pid.signal(error[0], t)
if signal > 1:
signal = 1
if signal < -1:
signal = -1
return signal
class Policy_NoAction:
def __init__(self, name="NoAction"):
self._name = name
def setGoal(self, goal):
return
def getAction(self, state, t):
return 0
class Policy_RandomlyGeneratedPID:
def __init__(self, goal, t, name):
self._name = name
self._goal = goal
p = 0.01 + np.random.rand()*0.02
i = np.random.rand()*0.001
d = 0.005 + np.random.rand()*0.01
self._pid = UniversalPID(p, i, d, t, name)
def setGoal(self, goal):
self._goal = goal
def getAction(self, state, t):
error = np.array(self._goal) - np.array(state)
signal = self._pid.signal(error[0], t)
if signal > 1:
signal = 1
if signal < -1:
signal = -1
return signal
class Policy_RandomLazyPID:
def __init__(self, goal, t, name, laziness_probability=0.2):
self._name = name
self._goal = goal
p = 0.01 + np.random.rand()*0.02
i = np.random.rand()*0.001
d = 0.005 + np.random.rand()*0.01
self._laziness_probabiliy = laziness_probability
self._pid = UniversalPID(p, i, d, t, name)
def setGoal(self, goal):
self._goal = goal
def getAction(self, state, t):
error = np.array(self._goal) - np.array(state)
signal = self._pid.signal(error[0], t)
if np.random.rand() > self._laziness_probabiliy:
if signal > 1:
signal = 1
if signal < -1:
signal = -1
return signal
else:
return 0