diff --git a/paddlespeech/s2t/exps/deepspeech2/model.py b/paddlespeech/s2t/exps/deepspeech2/model.py index 7ab8cf853ad..d007a9e3964 100644 --- a/paddlespeech/s2t/exps/deepspeech2/model.py +++ b/paddlespeech/s2t/exps/deepspeech2/model.py @@ -27,7 +27,6 @@ from paddlespeech.s2t.io.dataloader import BatchDataLoader from paddlespeech.s2t.models.ds2 import DeepSpeech2InferModel from paddlespeech.s2t.models.ds2 import DeepSpeech2Model -from paddlespeech.s2t.training.gradclip import ClipGradByGlobalNormWithLog from paddlespeech.s2t.training.reporter import report from paddlespeech.s2t.training.timer import Timer from paddlespeech.s2t.training.trainer import Trainer @@ -148,7 +147,7 @@ def setup_model(self): if not self.train: return - grad_clip = ClipGradByGlobalNormWithLog(config.global_grad_clip) + grad_clip = paddle.nn.ClipGradByGlobalNorm(config.global_grad_clip) lr_scheduler = paddle.optimizer.lr.ExponentialDecay( learning_rate=config.lr, gamma=config.lr_decay, verbose=True) optimizer = paddle.optimizer.Adam( diff --git a/paddlespeech/s2t/training/gradclip.py b/paddlespeech/s2t/training/gradclip.py deleted file mode 100644 index 06587c749b5..00000000000 --- a/paddlespeech/s2t/training/gradclip.py +++ /dev/null @@ -1,86 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import paddle -from paddle.fluid import core -from paddle.fluid import layers -from paddle.fluid.dygraph import base as imperative_base - -from paddlespeech.s2t.utils.log import Log - -__all__ = ["ClipGradByGlobalNormWithLog"] - -logger = Log(__name__).getlog() - - -class ClipGradByGlobalNormWithLog(paddle.nn.ClipGradByGlobalNorm): - def __init__(self, clip_norm): - super().__init__(clip_norm) - - def __repr__(self): - return f"{self.__class__.__name__}(global_clip_norm={self.clip_norm})" - - @imperative_base.no_grad - def _dygraph_clip(self, params_grads): - params_and_grads = [] - sum_square_list = [] - for i, (p, g) in enumerate(params_grads): - if g is None: - continue - if getattr(p, 'need_clip', True) is False: - continue - merge_grad = g - if g.type == core.VarDesc.VarType.SELECTED_ROWS: - merge_grad = layers.merge_selected_rows(g) - merge_grad = layers.get_tensor_from_selected_rows(merge_grad) - square = paddle.square(merge_grad) - sum_square = paddle.sum(square) - sum_square_list.append(sum_square) - - # debug log, not dump all since slow down train process - if i < 10: - logger.debug( - f"Grad Before Clip: {p.name}: {float(sum_square.sqrt()) }") - - # all parameters have been filterd out - if len(sum_square_list) == 0: - return params_grads - - global_norm_var = paddle.concat(sum_square_list) - global_norm_var = paddle.sum(global_norm_var) - global_norm_var = paddle.sqrt(global_norm_var) - - # debug log - logger.debug(f"Grad Global Norm: {float(global_norm_var)}!!!!") - - max_global_norm = paddle.full( - shape=[1], dtype=global_norm_var.dtype, fill_value=self.clip_norm) - clip_var = paddle.divide( - x=max_global_norm, - y=paddle.maximum(x=global_norm_var, y=max_global_norm)) - for i, (p, g) in enumerate(params_grads): - if g is None: - continue - if getattr(p, 'need_clip', True) is False: - params_and_grads.append((p, g)) - continue - new_grad = paddle.multiply(x=g, y=clip_var) - params_and_grads.append((p, new_grad)) - - # debug log, not dump all since slow down train process - if i < 10: - logger.debug( - f"Grad After Clip: {p.name}: {float(new_grad.square().sum().sqrt())}" - ) - - return params_and_grads diff --git a/paddlespeech/s2t/training/optimizer/__init__.py b/paddlespeech/s2t/training/optimizer/__init__.py index aafdc5b6a3f..0f998ddeca5 100644 --- a/paddlespeech/s2t/training/optimizer/__init__.py +++ b/paddlespeech/s2t/training/optimizer/__init__.py @@ -19,7 +19,7 @@ import paddle from paddle.optimizer import Optimizer from paddle.regularizer import L2Decay -from paddlespeech.s2t.training.gradclip import ClipGradByGlobalNormWithLog + from paddlespeech.s2t.utils.dynamic_import import dynamic_import from paddlespeech.s2t.utils.dynamic_import import instance_class from paddlespeech.s2t.utils.log import Log @@ -100,7 +100,7 @@ def from_args(cls, name: str, args: Dict[Text, Any]): assert "parameters" in args, "parameters not in args." assert "learning_rate" in args, "learning_rate not in args." - grad_clip = ClipGradByGlobalNormWithLog( + grad_clip = paddle.nn.ClipGradByGlobalNorm( args['grad_clip']) if "grad_clip" in args else None weight_decay = L2Decay( args['weight_decay']) if "weight_decay" in args else None diff --git a/tests/unit/tts/test_ssml.py b/tests/unit/tts/test_ssml.py index 2c24018377a..4c3e9d538a1 100644 --- a/tests/unit/tts/test_ssml.py +++ b/tests/unit/tts/test_ssml.py @@ -72,3 +72,12 @@ for i, sub in enumerate(outs): print(i, sub) print() + + import json + import xmltodict + text = "我们的声学模型使用了 Fast Speech Two。前浪在沙滩上,沙滩上倒了一堆。 想象干干的树干了, 里面有个干尸,不知是被谁死的。" + ssml = xmltodict.parse(text) + print(json.dumps(ssml)) + print(ssml['speak'].keys()) + print(ssml['speak']['#text']) + print(ssml['speak']['say-as'])