Skip to content

Commit

Permalink
belief aggregation, stopping
Browse files Browse the repository at this point in the history
  • Loading branch information
Limmen committed Jan 8, 2025
1 parent edae518 commit 60c931f
Show file tree
Hide file tree
Showing 15 changed files with 236 additions and 88 deletions.
2 changes: 1 addition & 1 deletion examples/manual_play/intrusion_recovery_pomdp.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
actions=IntrusionRecoveryPomdpUtil.action_space(),
observations=IntrusionRecoveryPomdpUtil.observation_space(num_observations=num_observations),
cost_tensor=cost_tensor, observation_tensor=observation_tensor, transition_tensor=transition_tensor,
b1=IntrusionRecoveryPomdpUtil.initial_belief(p_a=p_a), T=BTR,
b1=IntrusionRecoveryPomdpUtil.initial_belief(), T=BTR,
simulation_env_name=simulation_name, gym_env_name="csle-tolerance-intrusion-recovery-pomdp-v1"
)
env = gym.make("csle-tolerance-intrusion-recovery-pomdp-v1",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
actions=IntrusionRecoveryPomdpUtil.action_space(),
observations=IntrusionRecoveryPomdpUtil.observation_space(num_observations=num_observations),
cost_tensor=cost_tensor, observation_tensor=observation_tensor, transition_tensor=transition_tensor,
b1=IntrusionRecoveryPomdpUtil.initial_belief(p_a=p_a), T=BTR,
b1=IntrusionRecoveryPomdpUtil.initial_belief(), T=BTR,
simulation_env_name=simulation_name, gym_env_name="csle-tolerance-intrusion-recovery-pomdp-v1"
)
simulation_env_config.simulation_env_input_config = input_config
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
actions=IntrusionRecoveryPomdpUtil.action_space(),
observations=IntrusionRecoveryPomdpUtil.observation_space(num_observations=num_observations),
cost_tensor=cost_tensor, observation_tensor=observation_tensor, transition_tensor=transition_tensor,
b1=IntrusionRecoveryPomdpUtil.initial_belief(p_a=p_a), T=BTR,
b1=IntrusionRecoveryPomdpUtil.initial_belief(), T=BTR,
simulation_env_name=simulation_name, gym_env_name="csle-tolerance-intrusion-recovery-pomdp-v1"
)
simulation_env_config.simulation_env_input_config = input_config
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
actions=IntrusionRecoveryPomdpUtil.action_space(),
observations=IntrusionRecoveryPomdpUtil.observation_space(num_observations=num_observations),
cost_tensor=cost_tensor, observation_tensor=observation_tensor, transition_tensor=transition_tensor,
b1=IntrusionRecoveryPomdpUtil.initial_belief(p_a=p_a), T=BTR,
b1=IntrusionRecoveryPomdpUtil.initial_belief(), T=BTR,
simulation_env_name=simulation_name, gym_env_name="csle-tolerance-intrusion-recovery-pomdp-v1"
)
simulation_env_config.simulation_env_input_config = input_config
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
actions=IntrusionRecoveryPomdpUtil.action_space(),
observations=IntrusionRecoveryPomdpUtil.observation_space(num_observations=num_observations),
cost_tensor=cost_tensor, observation_tensor=observation_tensor, transition_tensor=transition_tensor,
b1=IntrusionRecoveryPomdpUtil.initial_belief(p_a=p_a), T=BTR,
b1=IntrusionRecoveryPomdpUtil.initial_belief(), T=BTR,
simulation_env_name=simulation_name, gym_env_name="csle-tolerance-intrusion-recovery-pomdp-v1"
)
pomdp_solve_file_str = IntrusionRecoveryPomdpUtil.pomdp_solver_file(config=config)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
actions=IntrusionRecoveryPomdpUtil.action_space(),
observations=IntrusionRecoveryPomdpUtil.observation_space(num_observations=num_observations),
cost_tensor=cost_tensor, observation_tensor=observation_tensor, transition_tensor=transition_tensor,
b1=IntrusionRecoveryPomdpUtil.initial_belief(p_a=p_a), T=BTR,
b1=IntrusionRecoveryPomdpUtil.initial_belief(), T=BTR,
simulation_env_name=simulation_name, gym_env_name="csle-tolerance-intrusion-recovery-pomdp-v1"
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
actions=IntrusionRecoveryPomdpUtil.action_space(),
observations=IntrusionRecoveryPomdpUtil.observation_space(num_observations=num_observations),
cost_tensor=cost_tensor, observation_tensor=observation_tensor, transition_tensor=transition_tensor,
b1=IntrusionRecoveryPomdpUtil.initial_belief(p_a=p_a), T=BTR,
b1=IntrusionRecoveryPomdpUtil.initial_belief(), T=BTR,
simulation_env_name=simulation_name, gym_env_name="csle-tolerance-intrusion-recovery-pomdp-v1"
)
input_config.max_horizon = 30
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
actions=IntrusionRecoveryPomdpUtil.action_space(),
observations=IntrusionRecoveryPomdpUtil.observation_space(num_observations=num_observations),
cost_tensor=cost_tensor, observation_tensor=observation_tensor, transition_tensor=transition_tensor,
b1=IntrusionRecoveryPomdpUtil.initial_belief(p_a=p_a), T=BTR,
b1=IntrusionRecoveryPomdpUtil.initial_belief(), T=BTR,
simulation_env_name=simulation_name, gym_env_name="csle-tolerance-intrusion-recovery-pomdp-v1"
)
simulation_env_config.simulation_env_input_config = input_config
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
actions=IntrusionRecoveryPomdpUtil.action_space(),
observations=IntrusionRecoveryPomdpUtil.observation_space(num_observations=num_observations),
cost_tensor=cost_tensor, observation_tensor=observation_tensor, transition_tensor=transition_tensor,
b1=IntrusionRecoveryPomdpUtil.initial_belief(p_a=p_a), T=BTR,
b1=IntrusionRecoveryPomdpUtil.initial_belief(), T=BTR,
simulation_env_name=simulation_name, gym_env_name="csle-tolerance-intrusion-recovery-pomdp-v1"
)
simulation_env_config.simulation_env_input_config = input_config
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -262,8 +262,7 @@ def default_initial_state_distribution_config(p_a: float) -> InitialStateDistrib
:return: the default initial state distribution configuration
"""
initial_state_distribution_config = InitialStateDistributionConfig(
initial_state_distribution=IntrusionRecoveryPomdpUtil.initial_belief(p_a=p_a)
)
initial_state_distribution=IntrusionRecoveryPomdpUtil.initial_belief())
return initial_state_distribution_config


Expand Down Expand Up @@ -301,7 +300,7 @@ def default_input_config(eta: float, p_a: float, p_c_1: float, p_c_2: float, p_u
actions=IntrusionRecoveryPomdpUtil.action_space(),
observations=IntrusionRecoveryPomdpUtil.observation_space(num_observations=num_observations),
cost_tensor=cost_tensor, observation_tensor=observation_tensor, transition_tensor=transition_tensor,
b1=IntrusionRecoveryPomdpUtil.initial_belief(p_a=p_a), T=BTR,
b1=IntrusionRecoveryPomdpUtil.initial_belief(), T=BTR,
simulation_env_name=simulation_env_name, gym_env_name="csle-tolerance-intrusion-recovery-pomdp-v1"
)
return config
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def state_space() -> List[int]:
return [0, 1, 2]

@staticmethod
def initial_belief(p_a: float) -> List[float]:
def initial_belief() -> List[float]:
"""
Gets the initial belief state of the POMDP
Expand Down Expand Up @@ -72,9 +72,7 @@ def cost_function(s: int, a: int, eta: float, negate: bool = False) -> float:
return cost

@staticmethod
def cost_tensor(
eta: float, states: List[int], actions: List[int], negate: bool = False
) -> List[List[float]]:
def cost_tensor(eta: float, states: List[int], actions: List[int], negate: bool = False) -> List[List[float]]:
"""
Creates a |A|x|S| tensor with the costs (or rewards) of the POMDP
Expand Down Expand Up @@ -119,9 +117,7 @@ def observation_function(s: int, o: int, num_observations: int) -> float:
return 0.0

@staticmethod
def observation_tensor(
states: List[int], observations: List[int]
) -> List[List[float]]:
def observation_tensor(states: List[int], observations: List[int]) -> List[List[float]]:
"""
Creates a |S|x|O| tensor with the observation probabilities
Expand All @@ -143,9 +139,7 @@ def observation_tensor(
return observation_tensor

@staticmethod
def transition_function(
s: int, s_prime: int, a: int, p_a: float, p_c_1: float, p_u: float, p_c_2: float
) -> float:
def transition_function(s: int, s_prime: int, a: int, p_a: float, p_c_1: float, p_u: float, p_c_2: float) -> float:
"""
The transition function of the POMDP
Expand Down Expand Up @@ -184,9 +178,7 @@ def transition_function(
return 0

@staticmethod
def transition_function_game(
s: int, s_prime: int, a1: int, a2: int, p_a: float, p_c_1: float
) -> float:
def transition_function_game(s: int, s_prime: int, a1: int, a2: int, p_a: float, p_c_1: float) -> float:
"""
The transition function of the POSG
Expand Down Expand Up @@ -216,14 +208,8 @@ def transition_function_game(
return 0

@staticmethod
def transition_tensor(
states: List[int],
actions: List[int],
p_a: float,
p_c_1: float,
p_c_2: float,
p_u: float,
) -> List[List[List[float]]]:
def transition_tensor(states: List[int], actions: List[int], p_a: float, p_c_1: float, p_c_2: float, p_u: float) \
-> List[List[List[float]]]:
"""
Creates a |A|x|S|x|S| tensor with the transition probabilities of the POMDP
Expand Down Expand Up @@ -258,13 +244,8 @@ def transition_tensor(
return transition_tensor

@staticmethod
def transition_tensor_game(
states: List[int],
defender_actions: List[int],
attacker_actions: List[int],
p_a: float,
p_c_1: float,
) -> List[List[List[List[float]]]]:
def transition_tensor_game(states: List[int], defender_actions: List[int], attacker_actions: List[int], p_a: float,
p_c_1: float) -> List[List[List[List[float]]]]:
"""
Creates a |A|x|A|x|S|x|S| tensor with the transition probabilities of the POSG
Expand Down Expand Up @@ -304,9 +285,7 @@ def sample_initial_state(b1: List[float]) -> int:
return int(np.random.choice(np.arange(0, len(b1)), p=b1))

@staticmethod
def sample_next_observation(
observation_tensor: List[List[float]], s_prime: int, observations: List[int]
) -> int:
def sample_next_observation(observation_tensor: List[List[float]], s_prime: int, observations: List[int]) -> int:
"""
Samples the next observation
Expand All @@ -322,9 +301,7 @@ def sample_next_observation(
return int(o)

@staticmethod
def sample_next_state_game(
transition_tensor: List[List[List[List[float]]]], s: int, a1: int, a2: int
) -> int:
def sample_next_state_game(transition_tensor: List[List[List[List[float]]]], s: int, a1: int, a2: int) -> int:
"""
Samples the next observation
Expand All @@ -341,16 +318,8 @@ def sample_next_state_game(
return int(s_prime)

@staticmethod
def bayes_filter(
s_prime: int,
o: int,
a: int,
b: List[float],
states: List[int],
observations: List[int],
observation_tensor: List[List[float]],
transition_tensor: List[List[List[float]]],
) -> float:
def bayes_filter(s_prime: int, o: int, a: int, b: List[float], states: List[int], observations: List[int],
observation_tensor: List[List[float]], transition_tensor: List[List[List[float]]]) -> float:
"""
A Bayesian filter to compute b[s_prime] of the POMDP
Expand Down Expand Up @@ -386,14 +355,8 @@ def bayes_filter(
return b_prime_s_prime

@staticmethod
def p_o_given_b_a1_a2(
o: int,
b: List[float],
a: int,
states: List[int],
transition_tensor: List[List[List[float]]],
observation_tensor: List[List[float]],
) -> float:
def p_o_given_b_a1_a2(o: int, b: List[float], a: int, states: List[int], transition_tensor: List[List[List[float]]],
observation_tensor: List[List[float]]) -> float:
"""
Computes P[o|a,b] of the POMDP
Expand All @@ -417,15 +380,8 @@ def p_o_given_b_a1_a2(
return prob

@staticmethod
def next_belief(
o: int,
a: int,
b: List[float],
states: List[int],
observations: List[int],
observation_tensor: List[List[float]],
transition_tensor: List[List[List[float]]],
) -> List[float]:
def next_belief(o: int, a: int, b: List[float], states: List[int], observations: List[int],
observation_tensor: List[List[float]], transition_tensor: List[List[List[float]]]) -> List[float]:
"""
Computes the next belief using a Bayesian filter
Expand All @@ -441,15 +397,8 @@ def next_belief(
b_prime = [0.0] * len(states)
for s_prime in states:
b_prime[s_prime] = IntrusionRecoveryPomdpUtil.bayes_filter(
s_prime=s_prime,
o=o,
a=a,
b=b,
states=states,
observations=observations,
transition_tensor=transition_tensor,
observation_tensor=observation_tensor,
)
s_prime=s_prime, o=o, a=a, b=b, states=states, observations=observations,
transition_tensor=transition_tensor, observation_tensor=observation_tensor)
if round(sum(b_prime), 2) != 1:
print(f"error, b_prime:{b_prime}, o:{o}, a:{a}, b:{b}")
assert round(sum(b_prime), 2) == 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ def test_from_json_file(self, mocker: pytest_mock.MockFixture) -> None:
cost_tensor=cost_tensor,
observation_tensor=observation_tensor,
transition_tensor=transition_tensor,
b1=IntrusionRecoveryPomdpUtil.initial_belief(p_a=p_a),
b1=IntrusionRecoveryPomdpUtil.initial_belief(),
T=int(BTR),
simulation_env_name=simulation_name,
gym_env_name="csle-tolerance-intrusion-recovery-pomdp-v1",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ def test_from_json_file(self, mocker: pytest_mock.MockFixture) -> None:
cost_tensor=cost_tensor,
observation_tensor=observation_tensor,
transition_tensor=transition_tensor,
b1=IntrusionRecoveryPomdpUtil.initial_belief(p_a=p_a),
b1=IntrusionRecoveryPomdpUtil.initial_belief(),
T=BTR,
simulation_env_name=simulation_name,
gym_env_name="csle-tolerance-intrusion-recovery-pomdp-v1",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def test_initial_belief(self) -> None:
:return: None
"""
assert sum(IntrusionRecoveryPomdpUtil.initial_belief(p_a=0.5)) == 1
assert sum(IntrusionRecoveryPomdpUtil.initial_belief()) == 1

def test_action_space(self) -> None:
"""
Expand Down
Loading

0 comments on commit 60c931f

Please sign in to comment.