diff --git a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/test_modeling_tf_{{cookiecutter.lowercase_modelname}}.py b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/test_modeling_tf_{{cookiecutter.lowercase_modelname}}.py index 16b31500dd6c..57fd95dd3ff6 100644 --- a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/test_modeling_tf_{{cookiecutter.lowercase_modelname}}.py +++ b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/test_modeling_tf_{{cookiecutter.lowercase_modelname}}.py @@ -21,7 +21,7 @@ from transformers.testing_utils import require_tf, slow from ..test_configuration_common import ConfigTester -from ..test_modeling_tf_common import TFModelTesterMixin, floats_tensor, ids_tensor +from ..test_modeling_tf_common import TFModelTesterMixin, floats_tensor, ids_tensor, random_attention_mask if is_tf_available(): @@ -92,7 +92,7 @@ def prepare_config_and_inputs(self): input_mask = None if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + input_mask = random_attention_mask([self.batch_size, self.seq_length]) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/albert/test_modeling_tf_albert.py b/tests/albert/test_modeling_tf_albert.py index 59815561c056..7eacc1f32a47 100644 --- a/tests/albert/test_modeling_tf_albert.py +++ b/tests/albert/test_modeling_tf_albert.py @@ -21,7 +21,7 @@ from transformers.testing_utils import require_tf, slow from ..test_configuration_common import ConfigTester -from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor +from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor, random_attention_mask if is_tf_available(): @@ -96,7 +96,7 @@ def prepare_config_and_inputs(self): input_mask = None if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + input_mask = random_attention_mask([self.batch_size, self.seq_length]) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/bert/test_modeling_tf_bert.py b/tests/bert/test_modeling_tf_bert.py index 611268337ffd..8c709e093801 100644 --- a/tests/bert/test_modeling_tf_bert.py +++ b/tests/bert/test_modeling_tf_bert.py @@ -21,7 +21,7 @@ from transformers.testing_utils import require_tf, slow from ..test_configuration_common import ConfigTester -from ..test_modeling_tf_common import TFModelTesterMixin, floats_tensor, ids_tensor +from ..test_modeling_tf_common import TFModelTesterMixin, floats_tensor, ids_tensor, random_attention_mask from ..utils.test_modeling_tf_core import TFCoreModelTesterMixin @@ -96,7 +96,7 @@ def prepare_config_and_inputs(self): input_mask = None if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + input_mask = random_attention_mask([self.batch_size, self.seq_length]) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/clip/test_modeling_tf_clip.py b/tests/clip/test_modeling_tf_clip.py index 02e289cd5b2a..d3c3cb9f5033 100644 --- a/tests/clip/test_modeling_tf_clip.py +++ b/tests/clip/test_modeling_tf_clip.py @@ -301,6 +301,12 @@ def prepare_config_and_inputs(self): input_mask = None if self.use_input_mask: input_mask = random_attention_mask([self.batch_size, self.seq_length]) + # make sure the first token has attention mask `1` to ensure that, after combining the causal mask, there + # is still at least one token being attended to for each batch. + # TODO: Change `random_attention_mask` in PT/TF/Flax common test file, after a discussion with the team. + input_mask = tf.concat( + [tf.ones_like(input_mask[:, :1], dtype=input_mask.dtype), input_mask[:, 1:]], axis=-1 + ) config = self.get_config() diff --git a/tests/convbert/test_modeling_tf_convbert.py b/tests/convbert/test_modeling_tf_convbert.py index ff4cbb1aa974..e2d68876263a 100644 --- a/tests/convbert/test_modeling_tf_convbert.py +++ b/tests/convbert/test_modeling_tf_convbert.py @@ -20,7 +20,7 @@ from transformers.testing_utils import require_tf, slow from ..test_configuration_common import ConfigTester -from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor +from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor, random_attention_mask if is_tf_available(): @@ -94,7 +94,7 @@ def prepare_config_and_inputs(self): input_mask = None if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + input_mask = random_attention_mask([self.batch_size, self.seq_length]) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/ctrl/test_modeling_tf_ctrl.py b/tests/ctrl/test_modeling_tf_ctrl.py index 65b984b51c9a..d17a97a3ad83 100644 --- a/tests/ctrl/test_modeling_tf_ctrl.py +++ b/tests/ctrl/test_modeling_tf_ctrl.py @@ -20,7 +20,7 @@ from transformers.testing_utils import require_tf, slow from ..test_configuration_common import ConfigTester -from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor +from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor, random_attention_mask if is_tf_available(): @@ -69,7 +69,7 @@ def prepare_config_and_inputs(self): input_mask = None if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + input_mask = random_attention_mask([self.batch_size, self.seq_length]) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/deberta/test_modeling_tf_deberta.py b/tests/deberta/test_modeling_tf_deberta.py index 581f6f02f470..7e2a3c3110ee 100644 --- a/tests/deberta/test_modeling_tf_deberta.py +++ b/tests/deberta/test_modeling_tf_deberta.py @@ -20,7 +20,7 @@ from transformers.testing_utils import require_tf, slow from ..test_configuration_common import ConfigTester -from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor +from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor, random_attention_mask if is_tf_available(): @@ -92,7 +92,7 @@ def prepare_config_and_inputs(self): input_mask = None if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + input_mask = random_attention_mask([self.batch_size, self.seq_length]) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/deberta_v2/test_modeling_tf_deberta_v2.py b/tests/deberta_v2/test_modeling_tf_deberta_v2.py index 391afee59784..4fd967c2fa6e 100644 --- a/tests/deberta_v2/test_modeling_tf_deberta_v2.py +++ b/tests/deberta_v2/test_modeling_tf_deberta_v2.py @@ -20,7 +20,7 @@ from transformers.testing_utils import require_tf, slow from ..test_configuration_common import ConfigTester -from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor +from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor, random_attention_mask if is_tf_available(): @@ -95,7 +95,7 @@ def prepare_config_and_inputs(self): input_mask = None if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + input_mask = random_attention_mask([self.batch_size, self.seq_length]) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/distilbert/test_modeling_tf_distilbert.py b/tests/distilbert/test_modeling_tf_distilbert.py index 7a146e9c3bf8..5266723f1f86 100644 --- a/tests/distilbert/test_modeling_tf_distilbert.py +++ b/tests/distilbert/test_modeling_tf_distilbert.py @@ -20,7 +20,7 @@ from transformers.testing_utils import require_tf, slow from ..test_configuration_common import ConfigTester -from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor +from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor, random_attention_mask if is_tf_available(): @@ -70,7 +70,7 @@ def prepare_config_and_inputs(self): input_mask = None if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + input_mask = random_attention_mask([self.batch_size, self.seq_length]) sequence_labels = None token_labels = None diff --git a/tests/dpr/test_modeling_tf_dpr.py b/tests/dpr/test_modeling_tf_dpr.py index 7a48a2254e10..ffce36efc3a6 100644 --- a/tests/dpr/test_modeling_tf_dpr.py +++ b/tests/dpr/test_modeling_tf_dpr.py @@ -19,7 +19,7 @@ from transformers.testing_utils import require_tf, slow from ..test_configuration_common import ConfigTester -from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor +from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor, random_attention_mask if is_tf_available(): @@ -94,9 +94,8 @@ def prepare_config_and_inputs(self): input_mask = None if self.use_input_mask: - input_mask = ids_tensor( - [self.batch_size, self.seq_length], vocab_size=2 - ) # follow test_modeling_tf_ctrl.py + # follow test_modeling_tf_ctrl.py + input_mask = random_attention_mask([self.batch_size, self.seq_length]) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/electra/test_modeling_tf_electra.py b/tests/electra/test_modeling_tf_electra.py index 4593ecff6100..ff2acd37e69f 100644 --- a/tests/electra/test_modeling_tf_electra.py +++ b/tests/electra/test_modeling_tf_electra.py @@ -20,7 +20,7 @@ from transformers.testing_utils import require_tf, slow from ..test_configuration_common import ConfigTester -from ..test_modeling_tf_common import TFModelTesterMixin, floats_tensor, ids_tensor +from ..test_modeling_tf_common import TFModelTesterMixin, floats_tensor, ids_tensor, random_attention_mask if is_tf_available(): @@ -71,7 +71,7 @@ def prepare_config_and_inputs(self): input_mask = None if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + input_mask = random_attention_mask([self.batch_size, self.seq_length]) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/flaubert/test_modeling_tf_flaubert.py b/tests/flaubert/test_modeling_tf_flaubert.py index 62503bac2861..86bcd6ea6484 100644 --- a/tests/flaubert/test_modeling_tf_flaubert.py +++ b/tests/flaubert/test_modeling_tf_flaubert.py @@ -19,7 +19,7 @@ from transformers.testing_utils import require_sentencepiece, require_tf, require_tokenizers, slow from ..test_configuration_common import ConfigTester -from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor +from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor, random_attention_mask if is_tf_available(): @@ -75,7 +75,7 @@ def __init__( def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - input_mask = ids_tensor([self.batch_size, self.seq_length], 2, dtype=tf.float32) + input_mask = random_attention_mask([self.batch_size, self.seq_length], dtype=tf.float32) input_lengths = None if self.use_input_lengths: diff --git a/tests/funnel/test_modeling_tf_funnel.py b/tests/funnel/test_modeling_tf_funnel.py index 6105f9ab8035..c3ae3788d61e 100644 --- a/tests/funnel/test_modeling_tf_funnel.py +++ b/tests/funnel/test_modeling_tf_funnel.py @@ -20,7 +20,7 @@ from transformers.testing_utils import require_tf from ..test_configuration_common import ConfigTester -from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor +from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor, random_attention_mask if is_tf_available(): @@ -111,7 +111,7 @@ def prepare_config_and_inputs(self): input_mask = None if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + input_mask = random_attention_mask([self.batch_size, self.seq_length]) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/gpt2/test_modeling_tf_gpt2.py b/tests/gpt2/test_modeling_tf_gpt2.py index f94387509e6a..d6470c0d1526 100644 --- a/tests/gpt2/test_modeling_tf_gpt2.py +++ b/tests/gpt2/test_modeling_tf_gpt2.py @@ -19,7 +19,7 @@ from transformers.testing_utils import require_tf, slow from ..test_configuration_common import ConfigTester -from ..test_modeling_tf_common import TFModelTesterMixin, floats_tensor, ids_tensor +from ..test_modeling_tf_common import TFModelTesterMixin, floats_tensor, ids_tensor, random_attention_mask from ..utils.test_modeling_tf_core import TFCoreModelTesterMixin @@ -74,7 +74,7 @@ def prepare_config_and_inputs(self): input_mask = None if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + input_mask = random_attention_mask([self.batch_size, self.seq_length]) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/gptj/test_modeling_tf_gptj.py b/tests/gptj/test_modeling_tf_gptj.py index 32ce3f8564b0..63feffb8c62e 100644 --- a/tests/gptj/test_modeling_tf_gptj.py +++ b/tests/gptj/test_modeling_tf_gptj.py @@ -20,7 +20,7 @@ from transformers.testing_utils import require_tf, slow, tooslow from ..test_configuration_common import ConfigTester -from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor +from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor, random_attention_mask from ..utils.test_modeling_tf_core import TFCoreModelTesterMixin @@ -70,7 +70,7 @@ def prepare_config_and_inputs(self): input_mask = None if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + input_mask = random_attention_mask([self.batch_size, self.seq_length]) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/layoutlm/test_modeling_tf_layoutlm.py b/tests/layoutlm/test_modeling_tf_layoutlm.py index f60d0c6f91d5..90e2b4fcf169 100644 --- a/tests/layoutlm/test_modeling_tf_layoutlm.py +++ b/tests/layoutlm/test_modeling_tf_layoutlm.py @@ -21,7 +21,7 @@ from transformers.testing_utils import require_tf, slow from ..test_configuration_common import ConfigTester -from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor +from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor, random_attention_mask if is_tf_available(): @@ -107,7 +107,7 @@ def prepare_config_and_inputs(self): input_mask = None if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + input_mask = random_attention_mask([self.batch_size, self.seq_length]) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/longformer/test_modeling_tf_longformer.py b/tests/longformer/test_modeling_tf_longformer.py index 37c1ce534953..6bfa708912dd 100644 --- a/tests/longformer/test_modeling_tf_longformer.py +++ b/tests/longformer/test_modeling_tf_longformer.py @@ -20,7 +20,7 @@ from transformers.testing_utils import require_sentencepiece, require_tf, require_tokenizers, slow from ..test_configuration_common import ConfigTester -from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor +from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor, random_attention_mask if is_tf_available(): @@ -79,7 +79,7 @@ def prepare_config_and_inputs(self): input_mask = None if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + input_mask = random_attention_mask([self.batch_size, self.seq_length]) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/lxmert/test_modeling_tf_lxmert.py b/tests/lxmert/test_modeling_tf_lxmert.py index 8d91d249d90b..63ec44a1ad90 100644 --- a/tests/lxmert/test_modeling_tf_lxmert.py +++ b/tests/lxmert/test_modeling_tf_lxmert.py @@ -23,7 +23,7 @@ from transformers.testing_utils import require_tf, slow from ..test_configuration_common import ConfigTester -from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor +from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor, random_attention_mask if is_tf_available(): @@ -124,7 +124,7 @@ def prepare_config_and_inputs(self): input_mask = None if self.use_lang_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + input_mask = random_attention_mask([self.batch_size, self.seq_length]) token_type_ids = None if self.use_token_type_ids: token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) diff --git a/tests/mobilebert/test_modeling_tf_mobilebert.py b/tests/mobilebert/test_modeling_tf_mobilebert.py index 4cbfcefee874..c0ddf043562f 100644 --- a/tests/mobilebert/test_modeling_tf_mobilebert.py +++ b/tests/mobilebert/test_modeling_tf_mobilebert.py @@ -20,7 +20,7 @@ from transformers.testing_utils import require_tf, slow from ..test_configuration_common import ConfigTester -from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor +from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor, random_attention_mask if is_tf_available(): @@ -114,7 +114,7 @@ def prepare_config_and_inputs(self): input_mask = None if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + input_mask = random_attention_mask([self.batch_size, self.seq_length]) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/mpnet/test_modeling_tf_mpnet.py b/tests/mpnet/test_modeling_tf_mpnet.py index 23448610cc21..f9f9e2d51201 100644 --- a/tests/mpnet/test_modeling_tf_mpnet.py +++ b/tests/mpnet/test_modeling_tf_mpnet.py @@ -20,7 +20,7 @@ from transformers.testing_utils import require_tf, slow from ..test_configuration_common import ConfigTester -from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor +from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor, random_attention_mask if is_tf_available(): @@ -90,7 +90,7 @@ def prepare_config_and_inputs(self): input_mask = None if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + input_mask = random_attention_mask([self.batch_size, self.seq_length]) sequence_labels = None token_labels = None diff --git a/tests/openai/test_modeling_tf_openai.py b/tests/openai/test_modeling_tf_openai.py index 227689df59aa..f74a85ee60d6 100644 --- a/tests/openai/test_modeling_tf_openai.py +++ b/tests/openai/test_modeling_tf_openai.py @@ -20,7 +20,7 @@ from transformers.testing_utils import require_tf, slow from ..test_configuration_common import ConfigTester -from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor +from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor, random_attention_mask if is_tf_available(): @@ -70,7 +70,7 @@ def prepare_config_and_inputs(self): input_mask = None if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + input_mask = random_attention_mask([self.batch_size, self.seq_length]) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/rembert/test_modeling_tf_rembert.py b/tests/rembert/test_modeling_tf_rembert.py index f8f17f30a9dd..d5d52062e8c9 100644 --- a/tests/rembert/test_modeling_tf_rembert.py +++ b/tests/rembert/test_modeling_tf_rembert.py @@ -20,7 +20,7 @@ from transformers.testing_utils import require_tf, slow from ..test_configuration_common import ConfigTester -from ..test_modeling_tf_common import TFModelTesterMixin, floats_tensor, ids_tensor +from ..test_modeling_tf_common import TFModelTesterMixin, floats_tensor, ids_tensor, random_attention_mask if is_tf_available(): @@ -95,7 +95,7 @@ def prepare_config_and_inputs(self): input_mask = None if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + input_mask = random_attention_mask([self.batch_size, self.seq_length]) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/roberta/test_modeling_tf_roberta.py b/tests/roberta/test_modeling_tf_roberta.py index fa947d64f081..9771673d8748 100644 --- a/tests/roberta/test_modeling_tf_roberta.py +++ b/tests/roberta/test_modeling_tf_roberta.py @@ -20,7 +20,7 @@ from transformers.testing_utils import require_sentencepiece, require_tf, require_tokenizers, slow from ..test_configuration_common import ConfigTester -from ..test_modeling_tf_common import TFModelTesterMixin, floats_tensor, ids_tensor +from ..test_modeling_tf_common import TFModelTesterMixin, floats_tensor, ids_tensor, random_attention_mask if is_tf_available(): @@ -72,7 +72,7 @@ def prepare_config_and_inputs(self): input_mask = None if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + input_mask = random_attention_mask([self.batch_size, self.seq_length]) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/roformer/test_modeling_tf_roformer.py b/tests/roformer/test_modeling_tf_roformer.py index 1f26f7e2adc6..9a23ca3b83d2 100644 --- a/tests/roformer/test_modeling_tf_roformer.py +++ b/tests/roformer/test_modeling_tf_roformer.py @@ -20,7 +20,7 @@ from transformers.testing_utils import require_tf, slow from ..test_configuration_common import ConfigTester -from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor +from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor, random_attention_mask if is_tf_available(): @@ -95,7 +95,7 @@ def prepare_config_and_inputs(self): input_mask = None if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + input_mask = random_attention_mask([self.batch_size, self.seq_length]) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/t5/test_modeling_tf_t5.py b/tests/t5/test_modeling_tf_t5.py index a2ea255faca5..c6585f83b18e 100644 --- a/tests/t5/test_modeling_tf_t5.py +++ b/tests/t5/test_modeling_tf_t5.py @@ -20,7 +20,7 @@ from transformers.utils import cached_property from ..test_configuration_common import ConfigTester -from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor +from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor, random_attention_mask if is_tf_available(): @@ -58,7 +58,7 @@ def prepare_config_and_inputs(self): input_mask = None if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + input_mask = random_attention_mask([self.batch_size, self.seq_length]) token_labels = None if self.use_labels: diff --git a/tests/tapas/test_modeling_tf_tapas.py b/tests/tapas/test_modeling_tf_tapas.py index 936273a6ca30..9e3cb63f70b5 100644 --- a/tests/tapas/test_modeling_tf_tapas.py +++ b/tests/tapas/test_modeling_tf_tapas.py @@ -38,7 +38,7 @@ from transformers.utils import cached_property from ..test_configuration_common import ConfigTester -from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor +from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor, random_attention_mask if is_tf_available(): @@ -158,7 +158,7 @@ def prepare_config_and_inputs(self): input_mask = None if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + input_mask = random_attention_mask([self.batch_size, self.seq_length]) token_type_ids = [] for type_vocab_size in self.type_vocab_sizes: diff --git a/tests/test_modeling_tf_common.py b/tests/test_modeling_tf_common.py index 3d2f7976cf6c..9473a50f53aa 100644 --- a/tests/test_modeling_tf_common.py +++ b/tests/test_modeling_tf_common.py @@ -1440,7 +1440,7 @@ def ids_tensor(shape, vocab_size, rng=None, name=None, dtype=None): def random_attention_mask(shape, rng=None, name=None, dtype=None): attn_mask = ids_tensor(shape, vocab_size=2, rng=None, name=None, dtype=dtype) # make sure that at least one token is attended to for each batch - attn_mask = tf.concat([tf.constant(value=1, shape=(shape[0], 1), dtype=dtype), attn_mask[:, 1:]], axis=1) + attn_mask = tf.concat([attn_mask[:, :-1], tf.ones_like(attn_mask[:, -1:], dtype=dtype)], axis=-1) return attn_mask diff --git a/tests/xlm/test_modeling_tf_xlm.py b/tests/xlm/test_modeling_tf_xlm.py index 5fc4d2413f9e..412a8430ad6d 100644 --- a/tests/xlm/test_modeling_tf_xlm.py +++ b/tests/xlm/test_modeling_tf_xlm.py @@ -20,7 +20,7 @@ from transformers.testing_utils import require_tf, slow from ..test_configuration_common import ConfigTester -from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor +from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor, random_attention_mask if is_tf_available(): @@ -75,7 +75,7 @@ def __init__( def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - input_mask = ids_tensor([self.batch_size, self.seq_length], 2, dtype=tf.float32) + input_mask = random_attention_mask([self.batch_size, self.seq_length], dtype=tf.float32) input_lengths = None if self.use_input_lengths: diff --git a/tests/xlnet/test_modeling_tf_xlnet.py b/tests/xlnet/test_modeling_tf_xlnet.py index 4b92581a0efc..8cf4ca2099bd 100644 --- a/tests/xlnet/test_modeling_tf_xlnet.py +++ b/tests/xlnet/test_modeling_tf_xlnet.py @@ -22,7 +22,7 @@ from transformers.testing_utils import require_tf, slow from ..test_configuration_common import ConfigTester -from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor +from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor, random_attention_mask if is_tf_available(): @@ -75,7 +75,7 @@ def prepare_config_and_inputs(self): input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids_2 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) segment_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) - input_mask = ids_tensor([self.batch_size, self.seq_length], 2, dtype=tf.float32) + input_mask = random_attention_mask([self.batch_size, self.seq_length], dtype=tf.float32) input_ids_q = ids_tensor([self.batch_size, self.seq_length + 1], self.vocab_size) perm_mask = tf.zeros((self.batch_size, self.seq_length + 1, self.seq_length), dtype=tf.float32)