diff --git a/clean_pufferl.py b/clean_pufferl.py
index 4bf6f1b4..b9ede3b8 100644
--- a/clean_pufferl.py
+++ b/clean_pufferl.py
@@ -119,7 +119,7 @@ def evaluate(data):
         with profile.eval_misc:
             value = value.flatten()
             actions = actions.cpu().numpy()
-            mask = torch.as_tensor(mask)# * policy.mask)
+            mask = torch.as_tensor(mask)
             o = o if config.cpu_offload else o_device
             experience.store(o, value, actions, logprob, r, d, env_id, mask)
 
@@ -411,7 +411,7 @@ def __init__(self, batch_size, bptt_horizon, minibatch_size, hidden_size,
         self.dones=torch.zeros(batch_size, pin_memory=pin)
         self.truncateds=torch.zeros(batch_size, pin_memory=pin)
         self.values=torch.zeros(batch_size, pin_memory=pin)
-        self.e3b_inv = 1*torch.eye(hidden_size).repeat(lstm_total_agents, 1, 1).to(device)
+        self.e3b_inv = 10*torch.eye(hidden_size).repeat(lstm_total_agents, 1, 1).to(device)
 
         self.actions_np = np.asarray(self.actions)
         self.logprobs_np = np.asarray(self.logprobs)
diff --git a/pufferlib/models.py b/pufferlib/models.py
index 5d9e8b29..5ff66239 100644
--- a/pufferlib/models.py
+++ b/pufferlib/models.py
@@ -86,16 +86,16 @@ def decode_actions(self, hidden, lookup, concat=True, e3b=None):
             batch = hidden.shape[0]
             return probs, value
 
-        intrinsic_reward = None
+        b = None
         if e3b is not None:
             phi = hidden.detach()        
-            intrinsic_reward = (phi.unsqueeze(1) @ e3b @ phi.unsqueeze(2))
-            e3b = 0.95*e3b - (phi.unsqueeze(2) @ phi.unsqueeze(1))/(1 + intrinsic_reward)
-            intrinsic_reward = intrinsic_reward.squeeze()
-            intrinsic_reward = 0.1*torch.clamp(intrinsic_reward, -1, 1)
+            u = phi.unsqueeze(1) @ e3b
+            b = u @ phi.unsqueeze(2)
+            e3b = 0.99*e3b - (u.mT @ u) / (1 + b)
+            b = b.squeeze()
 
         actions = self.decoder(hidden)
-        return actions, value, e3b, intrinsic_reward
+        return actions, value, e3b, b
 
 class LSTMWrapper(nn.Module):
     def __init__(self, env, policy, input_size=128, hidden_size=128, num_layers=1):