PaddlePaddle · gongel · Sep 26, 2022 · Jul 29, 2022 · Sep 9, 2022 · Sep 9, 2022
diff --git a/model_zoo/gpt/dataset.py b/model_zoo/gpt/dataset.py
@@ -442,17 +442,13 @@ def _construct_sample(self, tokens):
         labels = tokens[1:]
         tokens = tokens[:-1]
         seq_length = len(tokens)
-        # Attention mask for the attention calulate
-        attention_mask = np.tri(seq_length, seq_length).reshape(
-            (1, seq_length, seq_length))
 
         # The pad and eos tokens do not contribute the loss
         loss_mask = np.ones(seq_length, dtype="float32")
         loss_mask[np.where(np.array(tokens) == self.eos_id)] = 0.0
         position_ids = np.arange(0, seq_length, dtype="int64")
 
-        attention_mask = (attention_mask - 1.0) * 1e9
-        attention_mask = attention_mask.astype("float32")
+        attention_mask = loss_mask
         labels = np.array(labels, dtype="int64")
         return [tokens, loss_mask, attention_mask, position_ids, labels]