PaddlePaddle · luotao1 · Dec 17, 2024 · Dec 16, 2024
@@ -346,8 +346,6 @@ neigbhors = 'neigbhors'
 Neigbors = 'Neigbors'
 neighor = 'neighor'
 netwrok = 'netwrok'
-normlized = 'normlized'
-Normlized = 'Normlized'
 normlize = 'normlize'
 noraml = 'noraml'
 numer = 'numer'

diff --git a/paddle/fluid/primitive/decomp_rule/decomp_vjp/details.h b/paddle/fluid/primitive/decomp_rule/decomp_vjp/details.h
@@ -834,16 +834,16 @@ void layer_norm_grad(const Tensor& x,
   auto bias_ptr = bias.get_ptr();
   LayerNormDecompHelper decomp_help(x, scale, bias, begin_norm_axis);
 
-  std::vector<int64_t> normlized_axis;
+  std::vector<int64_t> normalized_axis;
   std::vector<int64_t> mean_var_new_shape(mean.dims().size(), 0);
   for (int i = begin_norm_axis; i < x_dims.size(); ++i) {
     mean_var_new_shape.push_back(1);
-    normlized_axis.push_back(i);
+    normalized_axis.push_back(i);
   }
 
-  std::vector<int64_t> un_normlized_axis;
+  std::vector<int64_t> un_normalized_axis;
   for (int i = 0; i < begin_norm_axis; ++i) {
-    un_normlized_axis.push_back(i);
+    un_normalized_axis.push_back(i);
   }
 
   auto mean_ = reshape<T>(mean, mean_var_new_shape);
@@ -875,14 +875,14 @@ void layer_norm_grad(const Tensor& x,
     }
 
     auto dx_end = sqrt_var_1 * out_grad_scale;
-    auto d_mean = dx_end.sum(normlized_axis, x_cast.dtype(), true);  // M,1
+    auto d_mean = dx_end.sum(normalized_axis, x_cast.dtype(), true);  // M,1
 
     auto d_std_1 = (tmp * x_sub_mean * out_grad_scale)
-                       .sum(normlized_axis, x_cast.dtype(), true);  // M,1
+                       .sum(normalized_axis, x_cast.dtype(), true);  // M,1
     auto d_std = d_std_1 * x_sub_mean_mul_sqrt_var_1;  // M,1 * M,N = M,N
 
     auto d_mean_d_std =
-        (d_mean + d_std) / decomp_help.GetNormlizedNumel<T>(d_std);
+        (d_mean + d_std) / decomp_help.GetNormalizedNumel<T>(d_std);
 
     auto x_grad_tmp = dx_end - d_mean_d_std;
     x_grad_tmp = ConverToOrig<T>(x_grad_tmp, x.dtype());
@@ -893,7 +893,7 @@ void layer_norm_grad(const Tensor& x,
   if (scale_grad) {
     if (scale_ptr) {
       auto scale_grad_tmp = (x_sub_mean_mul_sqrt_var_1 * out_grad_cast)
-                                .sum(un_normlized_axis, x_cast.dtype(), true);
+                                .sum(un_normalized_axis, x_cast.dtype(), true);
       scale_grad_tmp = reshape<T>(scale_grad_tmp, {-1});
       scale_grad_tmp = ConverToOrig<T>(scale_grad_tmp, scale_ptr->dtype());
 
@@ -906,7 +906,7 @@ void layer_norm_grad(const Tensor& x,
   if (bias_grad) {
     if (bias_ptr) {
       auto bias_grad_tmp =
-          out_grad_cast.sum(un_normlized_axis, x_cast.dtype(), true);
+          out_grad_cast.sum(un_normalized_axis, x_cast.dtype(), true);
       bias_grad_tmp = reshape<T>(bias_grad_tmp, {-1});
       bias_grad_tmp = ConverToOrig<T>(bias_grad_tmp, bias_ptr->dtype());
 

diff --git a/paddle/fluid/primitive/decomp_utils/decomp_utils.h b/paddle/fluid/primitive/decomp_utils/decomp_utils.h
@@ -322,22 +322,22 @@ class LayerNormDecompHelper {
     for (int i = begin_norm_axis; i < x_rank_; ++i) {
       if (x_dims[i] < 0) {
         static_norm_shape_ = false;
-        normlized_numel_ = -1;
+        normalized_numel_ = -1;
         break;
       }
 
-      normlized_shape_.push_back(x_dims[i]);
+      normalized_shape_.push_back(x_dims[i]);
 
-      normlized_numel_ *= x_dims[i];
+      normalized_numel_ *= x_dims[i];
     }
 
     if (!static_norm_shape_) {
       // try get static norm numel from sacle for bias
-      normlized_numel_ = -1;
+      normalized_numel_ = -1;
       if (scale.get_ptr()) {
-        normlized_numel_ = scale->dims()[0];
+        normalized_numel_ = scale->dims()[0];
       } else if (bias.get_ptr()) {
-        normlized_numel_ = bias->dims()[0];
+        normalized_numel_ = bias->dims()[0];
       }
     }
   }
@@ -349,17 +349,17 @@ class LayerNormDecompHelper {
     }
 
     if (static_norm_shape_) {
-      return reshape<T>(s, normlized_shape_);
+      return reshape<T>(s, normalized_shape_);
     } else {
       return backend::reshape<T>(
           s, get_slice_vec<T>(shape64<T>(x), begin_norm_axis_, x_rank_));
     }
   }
 
   template <typename T>
-  Tensor GetNormlizedNumel(const Tensor& x) {
-    if (normlized_numel_ != -1) {
-      return full_scalar<T>(normlized_numel_, x.dtype());
+  Tensor GetNormalizedNumel(const Tensor& x) {
+    if (normalized_numel_ != -1) {
+      return full_scalar<T>(normalized_numel_, x.dtype());
     } else {
       auto x_shape = shape64<T>(x);
       auto numel = get_slice<T>(x_shape, begin_norm_axis_);
@@ -372,11 +372,11 @@ class LayerNormDecompHelper {
   }
 
  private:
-  std::vector<int64_t> normlized_shape_;
+  std::vector<int64_t> normalized_shape_;
   bool scale_need_reshape_;
   bool static_norm_shape_;
   int64_t x_rank_;
-  int64_t normlized_numel_{1};
+  int64_t normalized_numel_{1};
   int begin_norm_axis_;
 };