-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbell_test.py
44 lines (36 loc) · 1.37 KB
/
bell_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import numpy as np
from numpy import ndarray
class Bellman(object):
def __init__(self, states: list, gamma: float =0.5):
self.gamma = gamma
self.states = states
class Action(object):
def __init__(self, probability, utility):
self.probability: float = probability
self.destination: int = utility
class State(object):
def __init__(self,reward,utility,action_space):
self.reward = reward
self.utility = utility
self.action_space = action_space
self.next_state: State = self.set_consecutive()
def set_consecutive(self):
number_of_actions = len(self.action_space)
action_space = []
rewards = []
consecutive_rewards = []
consectutive_actions = []
for i in range(number_of_actions):
if i < round(number_of_actions/2):
action_space.append(self.action_space[i])
rewards.append(self.reward[i])
else:
consectutive_actions.append(self.action_space[i])
consecutive_rewards.append(self.reward[i])
self.action_space = action_space
self.reward = rewards
consecutive_state = []
consecutive_state.append([consecutive_rewards,self.utility,consectutive_actions])
return consecutive_state
# data splitting
# 70 training, 15 test and 15 validation