PaddlePaddle · FrostML · Sep 23, 2022 · Sep 22, 2022 · Sep 23, 2022 · Sep 23, 2022
diff --git a/paddlenlp/transformers/unimo/modeling.py b/paddlenlp/transformers/unimo/modeling.py
@@ -151,16 +151,39 @@ def __init__(self,
                  hidden_size=768,
                  hidden_dropout_prob=0.1,
                  max_position_embeddings=512,
-                 type_vocab_size=4):
+                 type_vocab_size=4,
+                 pad_token_id=None):
         super(UNIMOEmbeddings, self).__init__()
         self.word_embeddings = nn.Embedding(vocab_size, hidden_size)
         self.position_embeddings = nn.Embedding(max_position_embeddings,
                                                 hidden_size)
         self.token_type_embeddings = nn.Embedding(type_vocab_size, hidden_size)
+        self.pad_token_id = pad_token_id
 
-    def forward(self, input_ids, token_type_ids, position_ids):
+    def forward(self, input_ids, token_type_ids=None, position_ids=None):
         input_embedings = self.word_embeddings(input_ids)
+
+        if position_ids is None:
+            if self.pad_token_id is None:
+                position_ids = paddle.expand_as(
+                    paddle.arange(end=paddle.shape(input_ids)[1],
+                                  dtype="int64"), input_ids)
+            else:
+                num_pad = paddle.sum(
+                    (input_ids == self.pad_token_id).astype("float32"),
+                    axis=-1,
+                    keepdim=True)
+                position_ids = F.relu(
+                    paddle.expand_as(
+                        paddle.arange(end=paddle.shape(input_ids)[1],
+                                      dtype="float32"), input_ids) -
+                    num_pad).astype("int64")
+            position_ids.stop_gradient = True
         position_embeddings = self.position_embeddings(position_ids)
+
+        if token_type_ids is None:
+            token_type_ids = paddle.zeros_like(input_ids, dtype="int64")
+            token_type_ids.stop_gradient = True
         token_type_embeddings = self.token_type_embeddings(token_type_ids)
 
         embeddings = input_embedings + position_embeddings + token_type_embeddings
@@ -274,7 +297,7 @@ def __init__(
         self.embeddings = UNIMOEmbeddings(vocab_size, hidden_size,
                                           hidden_dropout_prob,
                                           max_position_embeddings,
-                                          type_vocab_size)
+                                          type_vocab_size, self.pad_token_id)
         encoder_layer = nn.TransformerEncoderLayer(
             hidden_size,
             num_attention_heads,
@@ -294,11 +317,17 @@ def __init__(
 
         self.apply(self.init_weights)
 
+    def get_input_embeddings(self):
+        return self.embeddings.word_embeddings
+
+    def set_input_embeddings(self, value):
+        self.embeddings.word_embeddings = value
+
     def forward(self,
                 input_ids,
-                token_type_ids,
-                position_ids,
-                attention_mask,
+                token_type_ids=None,
+                position_ids=None,
+                attention_mask=None,
                 use_cache=False,
                 cache=None):
         r"""
@@ -364,6 +393,10 @@ def forward(self,
                 inputs = tokenizer.gen_encode("Welcome to use PaddlePaddle and PaddleNLP!", return_tensors=True)
                 outputs = model(**inputs)
         """
+        if attention_mask is None:
+            attention_mask = ((input_ids == self.pad_token_id).astype(
+                paddle.get_default_dtype()) * -1e4).unsqueeze([1, 2])
+            attention_mask.stop_gradient = True
 
         embedding_output = self.embeddings(input_ids, token_type_ids,
                                            position_ids)
@@ -435,9 +468,9 @@ def __init__(self, unimo):
 
     def forward(self,
                 input_ids,
-                token_type_ids,
-                position_ids,
-                attention_mask,
+                token_type_ids=None,
+                position_ids=None,
+                attention_mask=None,
                 masked_positions=None,
                 use_cache=False,
                 cache=None):
@@ -527,18 +560,48 @@ def adjust_logits_during_generation(self, logits):
 
     def prepare_inputs_for_generation(self,
                                       input_ids,
-                                      token_type_ids,
-                                      position_ids,
-                                      attention_mask,
+                                      token_type_ids=None,
+                                      position_ids=None,
+                                      attention_mask=None,
                                       use_cache=False,
                                       cache=None,
                                       **kwargs):
+
+        if position_ids is None:
+            if self.pad_token_id is None:
+                position_ids = paddle.expand_as(
+                    paddle.arange(end=paddle.shape(input_ids)[1],
+                                  dtype="int64"), input_ids)
+            else:
+                num_pad = paddle.sum(
+                    (input_ids == self.pad_token_id).astype("float32"),
+                    axis=-1,
+                    keepdim=True)
+                position_ids = F.relu(
+                    paddle.expand_as(
+                        paddle.arange(end=paddle.shape(input_ids)[1],
+                                      dtype="float32"), input_ids) -
+                    num_pad).astype("int64")
+            position_ids.stop_gradient = True
+
+        if token_type_ids is None:
+            token_type_ids = paddle.zeros_like(input_ids, dtype="int64")
+            token_type_ids.stop_gradient = True
+
+        if attention_mask is None:
+            attention_mask = ((input_ids == self.pad_token_id).astype(
+                paddle.get_default_dtype()) * -1e4).unsqueeze([1, 2])
+            attention_mask.stop_gradient = True
+
         # only last token for inputs_ids if cache is defined in kwargs
         if cache is not None:
             input_ids = input_ids[:, -1].unsqueeze(-1)
-            token_type_ids = token_type_ids[:, -1].unsqueeze(-1)
-            position_ids = position_ids[:, -1].unsqueeze(-1)
-            attention_mask = attention_mask[:, :, -1, :].unsqueeze(2)
+            if token_type_ids is not None:
+                token_type_ids = token_type_ids[:, -1].unsqueeze(-1)
+            if position_ids is not None:
+                position_ids = position_ids[:, -1].unsqueeze(-1)
+        if attention_mask is not None:
+            attention_mask = attention_mask[:, :, -1:, :]
 
         return {
             "input_ids": input_ids,

diff --git a/paddlenlp/transformers/unimo/tokenizer.py b/paddlenlp/transformers/unimo/tokenizer.py
@@ -162,6 +162,13 @@ def load_vocabulary(filepath,
                                 **kwargs)
         return vocab
 
+    def get_vocab(self):
+        vocab = {
+            self.convert_ids_to_tokens(i): i
+            for i in range(self.vocab_size)
+        }
+        return vocab
+
     def _tokenize(self, text):
         r"""
         End-to-end tokenization for UNIMO models.

diff --git a/tests/fixtures/vocab.zh.unimo.txt b/tests/fixtures/vocab.zh.unimo.txt
@@ -0,0 +1,18 @@
+[UNK]	0
+[SEP]	1
+[PAD]	2
+[CLS]	3
+[MASK]	4
+欢	5
+迎	6
+使	7
+用	8
+百	9
+度	10
+飞	11
+桨	12
+深	13
+学	14
+习	15
+框	16
+架	17
diff --git a/tests/transformers/unimo/__init__.py b/tests/transformers/unimo/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.