Merge pull request #8 from patham9/rejection_sampling

Non-deterministically prefer hypotheses which have higher truth expectation
patham9 · Jan 25, 2024 · c093b93 · c093b93
2 parents df980a7 + 0ea878a
commit c093b93
Show file tree

Hide file tree

Showing 2 changed files with 13 additions and 5 deletions.
diff --git a/hypothesis.py b/hypothesis.py
@@ -23,7 +23,9 @@
  * """
 
 import sys
+from copy import deepcopy
 from prettyprint import *
+import random
 
 #Register operations in case euclidean space operation alignment assumptions should be exploited which helps data efficiency
 def Hypothesis_UseMovementOpAssumptions(leftOp, rightOp, upOp, downOp, DisableOpSymmetryAssumptionFlag):
@@ -95,12 +97,18 @@ def Hypothesis_ValidCondition(cond):  #restrict to neighbours (CA assumption)
     return False
 
 #We exclude rules which have more negative evidence than positive, and choose the highest truth-exp ones whenever a different outcome would be predicted for the same conditions
-def Hypothesis_BestSelection(rules, rulesExcluded, RuleEvidence, rulesin):
+def Hypothesis_BestSelection(rules, rulesExcluded, RuleEvidence):
+    rulesin = deepcopy(rules)
     for i, rule1 in enumerate(rulesin):
-        if Hypothesis_TruthExpectation(Hypothesis_TruthValue(RuleEvidence[rule1])) <= 0.5: #exclude rules which are not better than exp (only 0.5+ makes sense here)
+        #if Hypothesis_TruthExpectation(Hypothesis_TruthValue(RuleEvidence[rule1])) <= 0.5: #exclude rules which are not better than exp (only 0.5+ makes sense here)
+        if Hypothesis_TruthExpectation(Hypothesis_TruthValue(RuleEvidence[rule1])) <= 0.5 or \
+           random.random()*random.random() > Hypothesis_TruthExpectation(Hypothesis_TruthValue(RuleEvidence[rule1])) or \
+           RuleEvidence[rule1][1] > 0:
             if rule1 in rules:
                 rulesExcluded.add(rule1)
                 rules.remove(rule1)
+    rulesin = deepcopy(rules)
+    for i, rule1 in enumerate(rulesin):
         for j, rule2 in enumerate(rulesin):
             if i != j:  #exclude rules of same precondition which are worse by truth value
                 if rule1[0] == rule2[0]:

diff --git a/nace.py b/nace.py
@@ -41,7 +41,7 @@ def NACE_Cycle(Time, FocusSet, RuleEvidence, loc, observed_world, rulesin, negru
     rulesExcluded = set([])
     rules = deepcopy(rulesin)
     observed_world = World_FieldOfView(Time, loc, observed_world, oldworld)
-    Hypothesis_BestSelection(rules, rulesExcluded, RuleEvidence, rulesin)
+    Hypothesis_BestSelection(rules, rulesExcluded, RuleEvidence)
     if "manual" not in sys.argv:
         favoured_actions, airis_score, favoured_actions_for_revisit, oldest_age = _Plan(Time, observed_world, rules, actions, customGoal = World_CupIsOnTable)
     else:
@@ -52,7 +52,7 @@ def NACE_Cycle(Time, FocusSet, RuleEvidence, loc, observed_world, rulesin, negru
     print("\033[1;1H\033[2J")
     if "manual" not in sys.argv:
         exploit_babble = random.random() > 1.0 #babbling when wanting to achieve something or curious about something, and babbling when exploring:
-        explore_babble = random.random() > (0.9 if "DisableOpSymmetryAssumption" in sys.argv else 1.0) #since it might not know yet about all ops, exploring then can be limited
+        explore_babble = random.random() > 1.0 #since it might not know yet about all ops, exploring then can be limited
         plan = []
         if airis_score >= 1.0 or exploit_babble or len(favoured_actions) == 0:
             if not exploit_babble and not explore_babble and oldest_age > 0.0 and airis_score == 1.0 and len(favoured_actions_for_revisit) != 0:
@@ -266,7 +266,7 @@ def _Observe(Time, FocusSet, RuleEvidence, oldworld, action, newworld, oldrules,
             scores, highscore, rule = positionscores[(y,x)]
             #for rule in oldrules:
             if _RuleApplicable(scores, highscore, highesthighscore, rule):
-                if rule[1][2] != newworld[BOARD][y][x] and rule in scores and scores[rule] == highesthighscore:
+                if rule[1][2] != newworld[BOARD][y][x]:
                     (precondition, consequence) = rule
                     action_score_and_preconditions = list(precondition)
                     values = action_score_and_preconditions[1]