From 3f3041648424a068885212bc0a006c25a0cf100e Mon Sep 17 00:00:00 2001
From: yinfan98 <1106310035@qq.com>
Date: Sat, 21 Dec 2024 14:52:56 +0000
Subject: [PATCH 1/2] fix_typo

---
 _typos.toml                                   | 30 -------------
 .../group_schedule/search/config_searcher.cc  |  4 +-
 .../group_schedule/search/config_searcher.h   |  2 +-
 paddle/cinn/ir/ir_base.cc                     | 14 +++----
 paddle/cinn/ir/ir_base.h                      |  2 +-
 paddle/cinn/ir/op/ir_operators.cc             | 13 +++---
 paddle/common/ddim.h                          |  2 +-
 .../distributed/ps/service/heter_server.cc    |  2 +-
 .../ps/table/common_graph_table.cc            | 22 +++++-----
 .../ir/embedding_fc_lstm_fuse_pass.cc         |  2 +-
 .../interpreter/dependency_builder.cc         | 12 +++---
 .../framework/new_executor/pir_interpreter.cc |  6 +--
 .../new_executor/program_interpreter.cc       |  6 +--
 .../new_executor/workqueue/events_waiter.h    |  2 +-
 paddle/fluid/framework/operator.cc            |  4 +-
 paddle/fluid/framework/var_desc.h             |  2 +-
 paddle/fluid/imperative/prepared_operator.cc  |  2 +-
 .../fluid/inference/api/paddle_pass_builder.h |  2 +-
 .../onednn/conv2d_transpose_bn_fuse_pass.cc   |  4 +-
 .../phi/core/memory/allocation/memory_block.h |  4 +-
 .../spmd_rules/default_data_parallel.cc       | 16 +++----
 paddle/phi/infermeta/spmd_rules/replicated.cc | 42 +++++++++----------
 .../kernels/cpu/overlap_add_grad_kernel.cc    |  2 +-
 .../gpu/fused_dropout_add_grad_kernel.cu      |  4 +-
 paddle/phi/kernels/gpu/assign_pos_kernel.cu   |  4 +-
 .../kernels/gpu/c_embedding_grad_kernel.cu    |  4 +-
 paddle/phi/kernels/gpu/c_embedding_kernel.cu  |  4 +-
 ..._softmax_with_cross_entropy_grad_kernel.cu |  4 +-
 .../c_softmax_with_cross_entropy_kernel.cu    |  4 +-
 paddle/phi/kernels/gpu/c_split_kernel.cu      |  4 +-
 .../kernels/gpu/class_center_sample_kernel.cu |  4 +-
 .../gpu/collect_fpn_proposals_kernel.cu       |  4 +-
 .../gpu/distribute_fpn_proposals_kernel.cu    |  4 +-
 paddle/phi/kernels/gpu/nll_loss.h             |  4 +-
 .../kernels/gpu/overlap_add_grad_kernel.cu    |  2 +-
 .../gpu/prune_gate_by_capacity_kernel.cu      |  4 +-
 .../phi/kernels/gpu/roi_align_grad_kernel.cu  |  4 +-
 paddle/phi/kernels/gpu/roi_align_kernel.cu    |  4 +-
 .../phi/kernels/gpu/roi_pool_grad_kernel.cu   |  4 +-
 paddle/phi/kernels/gpu/roi_pool_kernel.cu     |  4 +-
 .../gpu/sigmoid_cross_entropy_with_logits.h   |  4 +-
 paddle/phi/kernels/impl/frame_kernel_impl.h   |  2 +-
 .../kernels/impl/margin_cross_entropy.cu.h    |  4 +-
 paddle/phi/kernels/kps/elementwise_kernel.cu  |  2 +-
 .../legacy/cpu/fused_elementwise_kernel.cc    |  2 +-
 ...kernel.h => elementwise_multiply_kernel.h} |  0
 .../elementwise_multiply_kernel.h             |  2 +-
 .../sparse/gpu/fused_attention_grad_kernel.cu |  2 +-
 .../distributed/auto_parallel/moe_utils.py    |  2 +-
 python/paddle/distributed/auto_tuner/utils.py |  2 +-
 .../paddle/jit/dy2static/convert_operators.py |  4 +-
 .../utils/cpp_extension/cpp_extension.py      |  2 +-
 python/paddle/vision/transforms/transforms.py |  2 +-
 .../semi_auto_parallel_for_replicated_spmd.py |  2 +-
 test/ir/pir/cinn/test_anchor_fusion.py        |  2 +-
 test/legacy_test/test_case.py                 |  2 +-
 test/legacy_test/test_matmul_fp8_op.py        |  2 +-
 test/legacy_test/test_max_min_amax_amin_op.py |  2 +-
 .../legacy_test/test_tensor_type_promotion.py |  4 +-
 test/legacy_test/test_variable.py             | 10 ++---
 test/ps/__init__.py                           |  2 +-
 ...t2_int8_image_classification_comparison.py |  4 +-
 .../quant2_int8_nlp_comparison.py             |  2 +-
 ...nt_int8_image_classification_comparison.py |  2 +-
 tools/coverage/gcda_clean.py                  |  2 +-
 tools/gen_pybind11_stub.py                    |  2 +-
 66 files changed, 151 insertions(+), 180 deletions(-)
 rename paddle/phi/kernels/legacy/{elementwise_multipy_kernel.h => elementwise_multiply_kernel.h} (100%)

diff --git a/_typos.toml b/_typos.toml
index 045134f1dc197a..8b477d563113f6 100644
--- a/_typos.toml
+++ b/_typos.toml
@@ -282,36 +282,6 @@ localy = 'localy'
 Localy = 'Localy'
 logarithmical = 'logarithmical'
 Loggin = 'Loggin'
-macor = 'macor'
-mantain = 'mantain'
-mak = 'mak'
-manualy = 'manualy'
-mannualy = 'mannualy'
-maping = 'maping'
-Maxinum = 'Maxinum'
-maxmium = 'maxmium'
-menas = 'menas'
-momory = 'momory'
-Meatadata = 'Meatadata'
-miliseconds = 'miliseconds'
-minimun = 'minimun'
-Minium = 'Minium'
-minumum = 'minumum'
-modication = 'modication'
-modifed = 'modifed'
-modifing = 'modifing'
-modifty = 'modifty'
-moduels = 'moduels'
-Modul = 'Modul'
-Morevoer = 'Morevoer'
-mutiple = 'mutiple'
-mutiply = 'mutiply'
-Mutiply = 'Mutiply'
-multipy = 'multipy'
-Multiplie = 'Multiplie'
-Muti = 'Muti'
-muti = 'muti'
-mutexs = 'mutexs'
 occured = 'occured'
 Ocurred = 'Ocurred'
 occures = 'occures'
diff --git a/paddle/cinn/ir/group_schedule/search/config_searcher.cc b/paddle/cinn/ir/group_schedule/search/config_searcher.cc
index bd3ed6be17a636..aa3f0f6210336b 100644
--- a/paddle/cinn/ir/group_schedule/search/config_searcher.cc
+++ b/paddle/cinn/ir/group_schedule/search/config_searcher.cc
@@ -208,7 +208,7 @@ ScheduleConfigSearcher::ScheduleConfigSearcher(
       constraints_(constraints) {}
 
 std::pair<ScoreType, CandidateType> ScheduleConfigSearcher::Search(
-    bool is_search_minimun) {
+    bool is_search_minimum) {
   VLOG(6) << "Start Search...";
   CandidateGenerator candidate_generator(candidate_range_, constraints_);
   std::vector<CandidateType> candidates = candidate_generator.Candidates();
@@ -222,7 +222,7 @@ std::pair<ScoreType, CandidateType> ScheduleConfigSearcher::Search(
     VLOG(6) << "Score = " << score;
     records_[score] = candidate;
   }
-  return is_search_minimun ? *records_.begin() : *(records_.end()--);
+  return is_search_minimum ? *records_.begin() : *(records_.end()--);
 }
 
 }  // namespace search
diff --git a/paddle/cinn/ir/group_schedule/search/config_searcher.h b/paddle/cinn/ir/group_schedule/search/config_searcher.h
index a176676aa9b281..72e3be65b114d0 100644
--- a/paddle/cinn/ir/group_schedule/search/config_searcher.h
+++ b/paddle/cinn/ir/group_schedule/search/config_searcher.h
@@ -87,7 +87,7 @@ class ScheduleConfigSearcher {
       const std::vector<std::pair<int, int>>& candidate_range,
       const std::vector<ConstraintFunc>& constraints = {});
 
-  std::pair<ScoreType, CandidateType> Search(bool is_search_minimun = true);
+  std::pair<ScoreType, CandidateType> Search(bool is_search_minimum = true);
 
  private:
   std::vector<std::unique_ptr<BaseObjectiveFunc>> objective_funcs_;
diff --git a/paddle/cinn/ir/ir_base.cc b/paddle/cinn/ir/ir_base.cc
index 99671977993fa8..43c24bc1edcb66 100644
--- a/paddle/cinn/ir/ir_base.cc
+++ b/paddle/cinn/ir/ir_base.cc
@@ -404,14 +404,14 @@ const IndexExpr IndexExpr::operand(int32_t i) const {
   return get()->operand(i).as_index();
 }
 
-int64_t IndexExpr::GetLargestMutiplyPart() const {
+int64_t IndexExpr::GetLargestMultiplyPart() const {
   switch (node_type()) {
     case cinn::ir::IrNodeTy::_Var_:
       return 1;
     case cinn::ir::IrNodeTy::Div: {
       if (operand(1).type().is_index_type()) {
-        int64_t lhsDiv = operand(0).GetLargestMutiplyPart();
-        int64_t rhsDiv = operand(1).GetLargestMutiplyPart();
+        int64_t lhsDiv = operand(0).GetLargestMultiplyPart();
+        int64_t rhsDiv = operand(1).GetLargestMultiplyPart();
         if (lhsDiv % rhsDiv == 0) return std::abs(lhsDiv / rhsDiv);
       }
       return 1;
@@ -421,13 +421,13 @@ int64_t IndexExpr::GetLargestMutiplyPart() const {
       return std::abs(int_imm->value);
     }
     case cinn::ir::IrNodeTy::Mul: {
-      return operand(0).GetLargestMutiplyPart() *
-             operand(1).GetLargestMutiplyPart();
+      return operand(0).GetLargestMultiplyPart() *
+             operand(1).GetLargestMultiplyPart();
     }
     case cinn::ir::IrNodeTy::Add:
     case cinn::ir::IrNodeTy::Mod: {
-      return std::gcd(operand(0).GetLargestMutiplyPart(),
-                      operand(1).GetLargestMutiplyPart());
+      return std::gcd(operand(0).GetLargestMultiplyPart(),
+                      operand(1).GetLargestMultiplyPart());
     }
   }
   PADDLE_THROW(::common::errors::Unimplemented("Unsupported type of expr: %s",
diff --git a/paddle/cinn/ir/ir_base.h b/paddle/cinn/ir/ir_base.h
index e7d46d82766015..5e269de04de907 100644
--- a/paddle/cinn/ir/ir_base.h
+++ b/paddle/cinn/ir/ir_base.h
@@ -499,7 +499,7 @@ struct IndexExpr : public IrNodeRef {
 
   Type type() const { return p_->type(); }
 
-  int64_t GetLargestMutiplyPart() const;
+  int64_t GetLargestMultiplyPart() const;
 
   IndexExpr Normalize() const;
 
diff --git a/paddle/cinn/ir/op/ir_operators.cc b/paddle/cinn/ir/op/ir_operators.cc
index de5a8974de2db9..1faa8a8cc97bea 100644
--- a/paddle/cinn/ir/op/ir_operators.cc
+++ b/paddle/cinn/ir/op/ir_operators.cc
@@ -509,8 +509,8 @@ static IndexExpr SimplifyDiv(const IndexExpr &lhs, const IndexExpr &rhs) {
 
     // (expr1 * c1 * c2 + expr2 * c1 * c3) / c1 ===> expr1 * c2 + expr2 * c3.
     if (lhsAdd) {
-      int64_t llhsFactor = lhsAdd->a().as_index().GetLargestMutiplyPart();
-      int64_t lrhsFactor = lhsAdd->b().as_index().GetLargestMutiplyPart();
+      int64_t llhsFactor = lhsAdd->a().as_index().GetLargestMultiplyPart();
+      int64_t lrhsFactor = lhsAdd->b().as_index().GetLargestMultiplyPart();
       if (llhsFactor % rhsConst->value == 0 &&
           lrhsFactor % rhsConst->value == 0) {
         return lhsAdd->a().as_index() / rhsConst->value +
@@ -565,8 +565,8 @@ static IndexExpr SimplifyMod(const IndexExpr &lhs, const IndexExpr &rhs) {
 
     // (expr1 * c1 * c2+ expr2 * c3) % c1 ===> expr2 * c3 % c1.
     if (lhsAdd) {
-      int64_t llhsFactor = lhsAdd->a().as_index().GetLargestMutiplyPart();
-      int64_t lrhsFactor = lhsAdd->b().as_index().GetLargestMutiplyPart();
+      int64_t llhsFactor = lhsAdd->a().as_index().GetLargestMultiplyPart();
+      int64_t lrhsFactor = lhsAdd->b().as_index().GetLargestMultiplyPart();
       if (llhsFactor % rhsConst->value == 0)
         return lhsAdd->b().as_index() % rhsConst->value;
       if (lrhsFactor % rhsConst->value == 0)
@@ -574,11 +574,12 @@ static IndexExpr SimplifyMod(const IndexExpr &lhs, const IndexExpr &rhs) {
     }
 
     // expr1 * (c1 * c2) % c1 ===> 0.
-    if (lhs.GetLargestMutiplyPart() % rhsConst->value == 0) return IndexExpr(0);
+    if (lhs.GetLargestMultiplyPart() % rhsConst->value == 0)
+      return IndexExpr(0);
 
     // expr1 % (c1 * c2) % c1 ===> expr1 % c1.
     if (lhsMod) {
-      int64_t llhsFactor = lhsMod->b().as_index().GetLargestMutiplyPart();
+      int64_t llhsFactor = lhsMod->b().as_index().GetLargestMultiplyPart();
       if (llhsFactor % rhsConst->value == 0)
         return lhsMod->a().as_index() % rhsConst->value;
     }
diff --git a/paddle/common/ddim.h b/paddle/common/ddim.h
index 5af0ae660c1b78..f0d301203a70fa 100644
--- a/paddle/common/ddim.h
+++ b/paddle/common/ddim.h
@@ -217,7 +217,7 @@ TEST_API std::ostream& operator<<(std::ostream&, const DDim&);
 
 /**
  * \brief Flatten dim to 3d
- * e.g., DDim d = mak_ddim({1, 2, 3, 4, 5, 6})
+ * e.g., DDim d = make_ddim({1, 2, 3, 4, 5, 6})
  *       flatten_to_3d(d, 2, 4); ===> {1*2, 3*4, 5*6} ===> {2, 12, 30}
  */
 TEST_API DDim flatten_to_3d(const DDim& src,
diff --git a/paddle/fluid/distributed/ps/service/heter_server.cc b/paddle/fluid/distributed/ps/service/heter_server.cc
index 525ea32128100f..b83163570a1f2f 100644
--- a/paddle/fluid/distributed/ps/service/heter_server.cc
+++ b/paddle/fluid/distributed/ps/service/heter_server.cc
@@ -112,7 +112,7 @@ int SendAndRecvVariableHandler::SaveInSwitchWithShard(
   VLOG(4) << "entering SaveInSwitchWithShard";
   int32_t group_id = request->group_id();
   if (group_id >= FLAGS_heter_world_size) {
-    LOG(ERROR) << "group id exceed maxmium";
+    LOG(ERROR) << "group id exceed maximum";
   }
   auto& local_shard = _local_shards[group_id];
   auto& request_io_buffer = cntl->request_attachment();
diff --git a/paddle/fluid/distributed/ps/table/common_graph_table.cc b/paddle/fluid/distributed/ps/table/common_graph_table.cc
index 4fe05675838324..625102394daa8f 100644
--- a/paddle/fluid/distributed/ps/table/common_graph_table.cc
+++ b/paddle/fluid/distributed/ps/table/common_graph_table.cc
@@ -368,11 +368,11 @@ paddle::framework::GpuPsCommRankFea GraphTable::make_gpu_ps_rank_fea(
   // 遍历 rank_nodes[i][shard_num]，分8份，分配到 res
   std::vector<std::future<size_t>> tasks;
 
-  auto mutexs = new std::mutex[shard_num_per_server];
+  auto mutexes = new std::mutex[shard_num_per_server];
   for (int i = 0; i < node_num_; i++) {
     for (size_t shard_id = 0; shard_id < shard_num_per_server; shard_id++) {
       tasks.push_back(_cpu_worker_pool[gpu_id]->enqueue(
-          [i, gpu_id, shard_id, &rank_nodes, &node_num_vec, &mutexs]()
+          [i, gpu_id, shard_id, &rank_nodes, &node_num_vec, &mutexes]()
               -> size_t {
             auto &rank_node = rank_nodes[i][shard_id];
             size_t start = 0;
@@ -381,9 +381,9 @@ paddle::framework::GpuPsCommRankFea GraphTable::make_gpu_ps_rank_fea(
                 start++;
               }
             }
-            mutexs[shard_id].lock();
+            mutexes[shard_id].lock();
             node_num_vec[shard_id] += start;
-            mutexs[shard_id].unlock();
+            mutexes[shard_id].unlock();
             return start;
           }));
     }
@@ -3150,12 +3150,12 @@ class MergeShardVector {
     _slice_num = slice_num;
     _shard_keys = output;
     _shard_keys->resize(slice_num);
-    _mutexs = new std::mutex[slice_num];
+    _mutexes = new std::mutex[slice_num];
   }
   ~MergeShardVector() {
-    if (_mutexs != nullptr) {
-      delete[] _mutexs;
-      _mutexs = nullptr;
+    if (_mutexes != nullptr) {
+      delete[] _mutexes;
+      _mutexes = nullptr;
     }
   }
   // merge shard keys
@@ -3165,15 +3165,15 @@ class MergeShardVector {
       auto &dest = (*_shard_keys)[shard_id];
       auto &src = shard_keys[shard_id];
 
-      _mutexs[shard_id].lock();
+      _mutexes[shard_id].lock();
       dest.insert(dest.end(), src.begin(), src.end());
-      _mutexs[shard_id].unlock();
+      _mutexes[shard_id].unlock();
     }
   }
 
  private:
   int _slice_num = 0;
-  std::mutex *_mutexs = nullptr;
+  std::mutex *_mutexes = nullptr;
   std::vector<std::vector<uint64_t>> *_shard_keys;
 };
 
diff --git a/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.cc b/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.cc
index ae028086bcce88..d0805c74e94715 100644
--- a/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.cc
@@ -93,7 +93,7 @@ static int BuildFusion(Graph* graph,
     embeddings_tensor->Resize(
         {embedding_tensor.dims()[0], weightx_tensor.dims()[1]});
 
-    // Multiplie embeddings via WeightsX and add bias
+    // Multiply embeddings via WeightsX and add bias
     auto embedding_data = embedding_tensor.data<float>();
     auto weightx_data = weightx_tensor.data<float>();
     auto embeddings_data =
diff --git a/paddle/fluid/framework/new_executor/interpreter/dependency_builder.cc b/paddle/fluid/framework/new_executor/interpreter/dependency_builder.cc
index ce11504ef0a3bb..9807b237f41df2 100644
--- a/paddle/fluid/framework/new_executor/interpreter/dependency_builder.cc
+++ b/paddle/fluid/framework/new_executor/interpreter/dependency_builder.cc
@@ -532,7 +532,7 @@ void DependencyBuilder::ShrinkDownstreamMap() {
       continue;
     }
 
-    std::set<size_t> minumum_nexts;
+    std::set<size_t> minimum_nexts;
     for (size_t item : op_downstream_map_->at(i)) {
       bool not_after_any = true;
       // find the op that is not executed after any
@@ -546,12 +546,12 @@ void DependencyBuilder::ShrinkDownstreamMap() {
       }
       if (not_after_any) {
         VLOG(8) << "downstream op of " << i << ": " << item;
-        minumum_nexts.insert(item);
+        minimum_nexts.insert(item);
       }
     }
     // NOTE(Ruibiao): op_happens_before will not be changed when shrink
     // downstream map
-    (*op_downstream_map_)[i] = minumum_nexts;
+    (*op_downstream_map_)[i] = minimum_nexts;
   }
   VLOG(8) << "Finish shrink downstream map";
   VLOG(8) << "downstream count: " << CountDownstreamMap(*op_downstream_map_);
@@ -1008,7 +1008,7 @@ void DependencyBuilderSimplify::ShrinkDownstreamMap() {
       continue;
     }
 
-    std::set<size_t> minumum_nexts;
+    std::set<size_t> minimum_nexts;
     for (size_t item : op_downstream_map_.at(i)) {
       bool not_after_any = true;
       // find the op that is not executed  any
@@ -1022,12 +1022,12 @@ void DependencyBuilderSimplify::ShrinkDownstreamMap() {
       }
       if (not_after_any) {
         VLOG(8) << "downstream op of " << i << ": " << item;
-        minumum_nexts.insert(item);
+        minimum_nexts.insert(item);
       }
     }
     // NOTE(Ruibiao): op_happens_before will not be changed when shrink
     // downstream map
-    op_downstream_map_.at(i) = minumum_nexts;
+    op_downstream_map_.at(i) = minimum_nexts;
   }
   VLOG(8) << "Finish shrink downstream map";
   VLOG(8) << "downstream count: " << CountDownstreamMap(op_downstream_map_);
diff --git a/paddle/fluid/framework/new_executor/pir_interpreter.cc b/paddle/fluid/framework/new_executor/pir_interpreter.cc
index 65928aaa78fc9d..cae032233dd39a 100644
--- a/paddle/fluid/framework/new_executor/pir_interpreter.cc
+++ b/paddle/fluid/framework/new_executor/pir_interpreter.cc
@@ -1413,7 +1413,7 @@ void PirInterpreter::CalculateLastLiveOps() {
   }
   VLOG(4) << "var_ref_count_.size() : " << var_ref_count_.size();
   for (size_t i = 0; i < last_live_ops_.size(); ++i) {
-    std::set<size_t> minumum_last_live_ops;
+    std::set<size_t> minimum_last_live_ops;
     for (size_t item : last_live_ops_[i]) {
       bool not_before_any = true;
       // find the op that is not executed before any
@@ -1429,11 +1429,11 @@ void PirInterpreter::CalculateLastLiveOps() {
         VLOG(6) << "last live op of var " << i << " "
                 << value_exe_info_->GetNameById(static_cast<int>(i)) << " : "
                 << item << " " << vec_instruction_base_[item]->Name();
-        minumum_last_live_ops.insert(item);
+        minimum_last_live_ops.insert(item);
         vec_instruction_base_[item]->AddGCCheckVar(i);
       }
     }
-    last_live_ops_[i] = minumum_last_live_ops;
+    last_live_ops_[i] = minimum_last_live_ops;
     var_ref_count_[i] = static_cast<int>(last_live_ops_[i].size());
   }
   VLOG(4) << "shrink the last_live_ops list for all vars in skip_gc_vars";
diff --git a/paddle/fluid/framework/new_executor/program_interpreter.cc b/paddle/fluid/framework/new_executor/program_interpreter.cc
index f83efd767a5d5a..7ce7a68d5c99dd 100644
--- a/paddle/fluid/framework/new_executor/program_interpreter.cc
+++ b/paddle/fluid/framework/new_executor/program_interpreter.cc
@@ -842,7 +842,7 @@ void ProgramInterpreter::Convert(
   // in this case, a is the input of op1 and op2, we only need to check
   // a after op2, because op2 always uses a after op1.
   for (size_t i = 0; i < last_live_ops_.size(); ++i) {
-    std::set<size_t> minumum_last_live_ops;
+    std::set<size_t> minimum_last_live_ops;
     for (size_t item : last_live_ops_[i]) {
       bool not_before_any = true;
       // find the op that is not executed before any
@@ -858,14 +858,14 @@ void ProgramInterpreter::Convert(
         VLOG(8) << "last live op of var " << i << " "
                 << var_scope_.GetNameById(static_cast<int>(i)) << " : " << item
                 << " " << vec_instruction_[item].OpBase()->Type();
-        minumum_last_live_ops.insert(item);
+        minimum_last_live_ops.insert(item);
         if (!(var_scope_.VarDesc(static_cast<int>(i)) &&
               var_scope_.VarDesc(static_cast<int>(i))->Persistable())) {
           vec_instruction_[item].AddGCCheckVar(i);
         }
       }
     }
-    last_live_ops_[i] = minumum_last_live_ops;
+    last_live_ops_[i] = minimum_last_live_ops;
     vec_meta_info[i].var_ref_count_ =
         static_cast<int>(last_live_ops_[i].size());
   }
diff --git a/paddle/fluid/framework/new_executor/workqueue/events_waiter.h b/paddle/fluid/framework/new_executor/workqueue/events_waiter.h
index a1c2f506b85d6a..0ab47375f996da 100644
--- a/paddle/fluid/framework/new_executor/workqueue/events_waiter.h
+++ b/paddle/fluid/framework/new_executor/workqueue/events_waiter.h
@@ -29,7 +29,7 @@ namespace framework {
 
 // A multiplexing waiter, be able to wait multiple kinds of events
 // simultaneously.
-// Muti-Producer single-consumer single-slot message-queue.
+// Multi-Producer single-consumer single-slot message-queue.
 class EventsWaiter {
  public:
   using EventId = std::size_t;
diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc
index 19391938ec6c45..3bab6c0389c640 100644
--- a/paddle/fluid/framework/operator.cc
+++ b/paddle/fluid/framework/operator.cc
@@ -1815,7 +1815,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
         if (is_xpu_kp_support) {
           auto expected_kernel_key_library_type = kernel_type_->library_type_;
           kernel_type_->library_type_ = LibraryType::kKP;
-          VLOG(3) << "modifing XPU KP kernel in static graph: "
+          VLOG(3) << "modifying XPU KP kernel in static graph: "
                   << phi_kernel_name
                   << ", using_kernel_key:" << *kernel_type_.get();
           auto try_phi_kernel_key =
@@ -1899,7 +1899,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
         if (is_xpu_kp_support) {
           auto expected_kernel_key_library_type = kernel_type_->library_type_;
           kernel_type_->library_type_ = LibraryType::kKP;
-          VLOG(3) << "modifing XPU KP kernel in static graph: "
+          VLOG(3) << "modifying XPU KP kernel in static graph: "
                   << phi_kernel_name
                   << ", using_kernel_key:" << *kernel_type_.get();
           auto try_phi_kernel_key =
diff --git a/paddle/fluid/framework/var_desc.h b/paddle/fluid/framework/var_desc.h
index 13af5f76d57664..639f98c0db848e 100644
--- a/paddle/fluid/framework/var_desc.h
+++ b/paddle/fluid/framework/var_desc.h
@@ -222,7 +222,7 @@ class TEST_API VarDesc {
   proto::VarType::TensorDesc *mutable_tensor_desc();
   std::vector<proto::VarType::TensorDesc *> mutable_tensor_descs();
 
-  // Is it really needed? Or just mantain a ptr from the block?
+  // Is it really needed? Or just maintain a ptr from the block?
   proto::VarDesc desc_;
   AttributeMap attrs_;
 
diff --git a/paddle/fluid/imperative/prepared_operator.cc b/paddle/fluid/imperative/prepared_operator.cc
index 09b3a14133db1b..0e4c21094fe127 100644
--- a/paddle/fluid/imperative/prepared_operator.cc
+++ b/paddle/fluid/imperative/prepared_operator.cc
@@ -271,7 +271,7 @@ PreparedOp PrepareImpl(
       if (is_xpu_kp_support) {
         auto expected_kernel_key_backend = expected_kernel_key.backend();
         expected_kernel_key.set_backend(phi::Backend::KPS);
-        VLOG(3) << "modifing XPU KP kernel: " << phi_kernel_name
+        VLOG(3) << "modifying XPU KP kernel: " << phi_kernel_name
                 << ", using_kernel_key:" << expected_kernel_key;
 
         if (!phi_kernel_factory.HasKernel(phi_kernel_name,
diff --git a/paddle/fluid/inference/api/paddle_pass_builder.h b/paddle/fluid/inference/api/paddle_pass_builder.h
index 8c2d903e7fb239..acb612052862dd 100644
--- a/paddle/fluid/inference/api/paddle_pass_builder.h
+++ b/paddle/fluid/inference/api/paddle_pass_builder.h
@@ -102,7 +102,7 @@ class PD_INFER_DECL PaddlePassBuilder {
   std::vector<std::string> AnalysisPasses() const {
     auto passes = analysis_passes_;
     // To make sure the ir_graph_to_program should be the last pass so any
-    // modication of IR will persist to the program.
+    // modification of IR will persist to the program.
     passes.push_back("ir_graph_to_program_pass");
     return passes;
   }
diff --git a/paddle/fluid/pir/transforms/onednn/conv2d_transpose_bn_fuse_pass.cc b/paddle/fluid/pir/transforms/onednn/conv2d_transpose_bn_fuse_pass.cc
index 2b5599e0991eec..23a1969b12cb48 100644
--- a/paddle/fluid/pir/transforms/onednn/conv2d_transpose_bn_fuse_pass.cc
+++ b/paddle/fluid/pir/transforms/onednn/conv2d_transpose_bn_fuse_pass.cc
@@ -137,7 +137,7 @@ class Conv2dTransposeBnOneDNNFusePattern : public paddle::drr::DrrPatternBase {
     //--- deal with filter ---
 
     // ConvTranpose weight is gIOHW, conv is gOIHW
-    // We transpose IOHW to IOHW first, then multipy scale, and transpose it to
+    // We transpose IOHW to IOHW first, then multiply scale, and transpose it to
     // IOHW again
     const auto &new_conv2d_filter_shape = res.ComputeAttr(
         [](const paddle::drr::MatchContext &match_ctx) -> std::vector<int> {
@@ -329,7 +329,7 @@ class Conv2dTransposeEltwiseBnOneDNNFusePattern
     //--- deal with filter ---
 
     // ConvTranpose weight is gIOHW, conv is gOIHW
-    // We transpose IOHW to IOHW first, then multipy scale, and transpose it to
+    // We transpose IOHW to IOHW first, then multiply scale, and transpose it to
     // IOHW again
     const auto &new_conv2d_filter_shape = res.ComputeAttr(
         [](const paddle::drr::MatchContext &match_ctx) -> std::vector<int> {
diff --git a/paddle/phi/core/memory/allocation/memory_block.h b/paddle/phi/core/memory/allocation/memory_block.h
index 631fca44f5157b..1cff33dfc45c7b 100644
--- a/paddle/phi/core/memory/allocation/memory_block.h
+++ b/paddle/phi/core/memory/allocation/memory_block.h
@@ -37,7 +37,7 @@ struct MemoryBlock {
   // init saves the MemoryBlock::Desc of the memory block in a MetadataCache.
   // If it is a CPU memory block, the MetadataCache writes the
   // MemoryBlock::Desc to the beginning of the block; or, if it is a GPU memory
-  // block, the MetadataCache writes the Meatadata to a std::map in
+  // block, the MetadataCache writes the Metadata to a std::map in
   // the CPU.
   void Init(MetadataCache* cache,
             Type t,
@@ -123,7 +123,7 @@ class MetadataCache {
   // Returns the MemoryBlock::Desc for a memory block.  When MetadataCache is
   // used to manage CPU memory, the MemoryBlock::Desc resides at the beginning
   // of the memory block; when used to manage GPU memory, the
-  // Meatadata resides in CPU memory indexed by cache_.
+  // Metadata resides in CPU memory indexed by cache_.
   MemoryBlock::Desc* LoadDesc(MemoryBlock* memory_block);
 
   // Saves the MemoryBlock::Desc of a memory block into the cache.  For CPU
diff --git a/paddle/phi/infermeta/spmd_rules/default_data_parallel.cc b/paddle/phi/infermeta/spmd_rules/default_data_parallel.cc
index e65fb33615ae69..880be6329db28e 100644
--- a/paddle/phi/infermeta/spmd_rules/default_data_parallel.cc
+++ b/paddle/phi/infermeta/spmd_rules/default_data_parallel.cc
@@ -59,9 +59,9 @@ SpmdInfo DefaultDataParallelInferSpmd(
     int ndim = outs[i]->dims().size();
     TensorDistAttr dist_attr_dst =
         CopyTensorDistAttrForOutput(ins[0]->dist_attr());
-    std::vector<int64_t> dst_dims_maping =
+    std::vector<int64_t> dst_dims_mapping =
         GetDefaultDataParallelDimsMapping(batch_axis_dim, ndim);
-    dist_attr_dst.set_dims_mapping(dst_dims_maping);
+    dist_attr_dst.set_dims_mapping(dst_dims_mapping);
     output_dist_attrs.emplace_back(dist_attr_dst);
   }
 
@@ -71,9 +71,9 @@ SpmdInfo DefaultDataParallelInferSpmd(
     int ndim = ins[i]->dims().size();
     TensorDistAttr dist_attr_dst =
         CopyTensorDistAttrForOutput(ins[i]->dist_attr());
-    std::vector<int64_t> dst_dims_maping =
+    std::vector<int64_t> dst_dims_mapping =
         GetDefaultDataParallelDimsMapping(batch_axis_dim, ndim);
-    dist_attr_dst.set_dims_mapping(dst_dims_maping);
+    dist_attr_dst.set_dims_mapping(dst_dims_mapping);
     dst_input_dist_attrs.emplace_back(dist_attr_dst);
   }
 
@@ -122,9 +122,9 @@ SpmdInfo DefaultDataParallelInferSpmdReverse(
     int ndim = outs[i]->dims().size();
     TensorDistAttr dist_attr_dst =
         CopyTensorDistAttrForOutput(outs[i]->dist_attr());
-    std::vector<int64_t> dst_dims_maping =
+    std::vector<int64_t> dst_dims_mapping =
         GetDefaultDataParallelDimsMapping(batch_axis_dim, ndim);
-    dist_attr_dst.set_dims_mapping(dst_dims_maping);
+    dist_attr_dst.set_dims_mapping(dst_dims_mapping);
     output_dist_attrs.emplace_back(dist_attr_dst);
   }
 
@@ -134,9 +134,9 @@ SpmdInfo DefaultDataParallelInferSpmdReverse(
     int ndim = ins[i]->dims().size();
     TensorDistAttr dist_attr_dst =
         CopyTensorDistAttrForOutput(ins[i]->dist_attr());
-    std::vector<int64_t> dst_dims_maping =
+    std::vector<int64_t> dst_dims_mapping =
         GetDefaultDataParallelDimsMapping(batch_axis_dim, ndim);
-    dist_attr_dst.set_dims_mapping(dst_dims_maping);
+    dist_attr_dst.set_dims_mapping(dst_dims_mapping);
     dst_input_dist_attrs.emplace_back(dist_attr_dst);
   }
 
diff --git a/paddle/phi/infermeta/spmd_rules/replicated.cc b/paddle/phi/infermeta/spmd_rules/replicated.cc
index 390117862e04e1..3134b428dd5216 100644
--- a/paddle/phi/infermeta/spmd_rules/replicated.cc
+++ b/paddle/phi/infermeta/spmd_rules/replicated.cc
@@ -46,8 +46,8 @@ SpmdInfo ReplicatedInferSpmd(const std::vector<const DistMetaTensor*>& ins,
     int ndim = outs[i]->dims().size();
     TensorDistAttr dist_attr_dst =
         CopyTensorDistAttrForOutput(ins[0]->dist_attr());
-    std::vector<int64_t> dst_dims_maping = GetReplicatedDimsMapping(ndim);
-    dist_attr_dst.set_dims_mapping(dst_dims_maping);
+    std::vector<int64_t> dst_dims_mapping = GetReplicatedDimsMapping(ndim);
+    dist_attr_dst.set_dims_mapping(dst_dims_mapping);
     output_dist_attrs.emplace_back(dist_attr_dst);
   }
 
@@ -61,8 +61,8 @@ SpmdInfo ReplicatedInferSpmd(const std::vector<const DistMetaTensor*>& ins,
     }
     TensorDistAttr dist_attr_dst =
         CopyTensorDistAttrForOutput(ins[i]->dist_attr());
-    std::vector<int64_t> dst_dims_maping = GetReplicatedDimsMapping(ndim);
-    dist_attr_dst.set_dims_mapping(dst_dims_maping);
+    std::vector<int64_t> dst_dims_mapping = GetReplicatedDimsMapping(ndim);
+    dist_attr_dst.set_dims_mapping(dst_dims_mapping);
     dst_input_dist_attrs.emplace_back(dist_attr_dst);
   }
 
@@ -103,8 +103,8 @@ SpmdInfo ReplicatedInferSpmdReverse(
     int ndim = outs[i]->dims().size();
     TensorDistAttr dist_attr_dst =
         CopyTensorDistAttrForOutput(outs[i]->dist_attr());
-    std::vector<int64_t> dst_dims_maping = GetReplicatedDimsMapping(ndim);
-    dist_attr_dst.set_dims_mapping(dst_dims_maping);
+    std::vector<int64_t> dst_dims_mapping = GetReplicatedDimsMapping(ndim);
+    dist_attr_dst.set_dims_mapping(dst_dims_mapping);
     output_dist_attrs.emplace_back(dist_attr_dst);
   }
 
@@ -114,8 +114,8 @@ SpmdInfo ReplicatedInferSpmdReverse(
     int ndim = ins[i]->dims().size();
     TensorDistAttr dist_attr_dst =
         CopyTensorDistAttrForOutput(ins[i]->dist_attr());
-    std::vector<int64_t> dst_dims_maping = GetReplicatedDimsMapping(ndim);
-    dist_attr_dst.set_dims_mapping(dst_dims_maping);
+    std::vector<int64_t> dst_dims_mapping = GetReplicatedDimsMapping(ndim);
+    dist_attr_dst.set_dims_mapping(dst_dims_mapping);
     dst_input_dist_attrs.emplace_back(dist_attr_dst);
   }
 
@@ -148,19 +148,19 @@ SpmdInfo ReplicatedInferDynamic(
   int64_t ninputs = static_cast<int64_t>(inputs.size());
   SpmdInfo spmd_info;
 
-  auto build_tensor_dist_attr =
-      [&nonnull_inputs](const DistMetaTensor& dist_meta_tensor) {
-        int ndim = dist_meta_tensor.dims().size();
-        TensorDistAttr dist_attr_dst =
-            CopyTensorDistAttrForOutput(dist_meta_tensor.dist_attr());
-        // `ndim == -1` means input is nullptr
-        if (ndim >= 0) {
-          std::vector<int64_t> dst_dims_maping = GetReplicatedDimsMapping(ndim);
-          dist_attr_dst.set_dims_mapping(dst_dims_maping);
-          nonnull_inputs.push_back(&dist_meta_tensor);
-        }
-        return dist_attr_dst;
-      };
+  auto build_tensor_dist_attr = [&nonnull_inputs](
+                                    const DistMetaTensor& dist_meta_tensor) {
+    int ndim = dist_meta_tensor.dims().size();
+    TensorDistAttr dist_attr_dst =
+        CopyTensorDistAttrForOutput(dist_meta_tensor.dist_attr());
+    // `ndim == -1` means input is nullptr
+    if (ndim >= 0) {
+      std::vector<int64_t> dst_dims_mapping = GetReplicatedDimsMapping(ndim);
+      dist_attr_dst.set_dims_mapping(dst_dims_mapping);
+      nonnull_inputs.push_back(&dist_meta_tensor);
+    }
+    return dist_attr_dst;
+  };
 
   for (int64_t i = 0; i < ninputs; i++) {
     if (paddle::holds_alternative<const DistMetaTensor*>(inputs[i])) {
diff --git a/paddle/phi/kernels/cpu/overlap_add_grad_kernel.cc b/paddle/phi/kernels/cpu/overlap_add_grad_kernel.cc
index 16415533df18f7..eb24ee1bdd6ae7 100644
--- a/paddle/phi/kernels/cpu/overlap_add_grad_kernel.cc
+++ b/paddle/phi/kernels/cpu/overlap_add_grad_kernel.cc
@@ -41,7 +41,7 @@ void OverlapAddGradKernel(const Context& dev_ctx,
                   : out_grad.dims()[static_cast<int>(out_grad_rank) - 1]);
 
   // When the number of input dims is larger than 2, it needs to copy
-  // from x to resize input into 2d and output into 3d. Morevoer, output
+  // from x to resize input into 2d and output into 3d. Moreover, output
   // dims will be restored at the last step.
   DenseTensor out_grad_(out_grad.type());
   out_grad_ = out_grad;
diff --git a/paddle/phi/kernels/fusion/gpu/fused_dropout_add_grad_kernel.cu b/paddle/phi/kernels/fusion/gpu/fused_dropout_add_grad_kernel.cu
index 90a511a46243e1..f185a28dac46b6 100644
--- a/paddle/phi/kernels/fusion/gpu/fused_dropout_add_grad_kernel.cu
+++ b/paddle/phi/kernels/fusion/gpu/fused_dropout_add_grad_kernel.cu
@@ -24,11 +24,11 @@
 #include "paddle/phi/kernels/funcs/dropout_impl.cu.h"
 
 static constexpr int kNumCUDAThreads = 512;
-static constexpr int kNumMaxinumNumBlocks = 4096;
+static constexpr int kNumMaximumNumBlocks = 4096;
 
 static inline int NumBlocks(const int N) {
   return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads,
-                  kNumMaxinumNumBlocks);
+                  kNumMaximumNumBlocks);
 }
 
 namespace phi {
diff --git a/paddle/phi/kernels/gpu/assign_pos_kernel.cu b/paddle/phi/kernels/gpu/assign_pos_kernel.cu
index 891a18fc413155..bcb4283e953df8 100644
--- a/paddle/phi/kernels/gpu/assign_pos_kernel.cu
+++ b/paddle/phi/kernels/gpu/assign_pos_kernel.cu
@@ -21,11 +21,11 @@
 namespace phi {
 
 static constexpr int kNumCUDAThreads = 512;
-static constexpr int kNumMaxinumNumBlocks = 4096;
+static constexpr int kNumMaximumNumBlocks = 4096;
 
 static inline int NumBlocks(const int N) {
   return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads,
-                  kNumMaxinumNumBlocks);
+                  kNumMaximumNumBlocks);
 }
 
 template <typename T>
diff --git a/paddle/phi/kernels/gpu/c_embedding_grad_kernel.cu b/paddle/phi/kernels/gpu/c_embedding_grad_kernel.cu
index ee50761ebbad7c..c8df1091b67eb5 100644
--- a/paddle/phi/kernels/gpu/c_embedding_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/c_embedding_grad_kernel.cu
@@ -28,11 +28,11 @@ COMMON_DECLARE_int64(embedding_deterministic);
 namespace phi {
 
 static constexpr int kNumCUDAThreads = 512;
-static constexpr int kNumMaxinumNumBlocks = 4096;
+static constexpr int kNumMaximumNumBlocks = 4096;
 
 static inline int NumBlocks(const int N) {
   return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads,
-                  kNumMaxinumNumBlocks);
+                  kNumMaximumNumBlocks);
 }
 
 template <typename T, typename IndexT>
diff --git a/paddle/phi/kernels/gpu/c_embedding_kernel.cu b/paddle/phi/kernels/gpu/c_embedding_kernel.cu
index 57da8b801e6126..f73db3e02540fe 100644
--- a/paddle/phi/kernels/gpu/c_embedding_kernel.cu
+++ b/paddle/phi/kernels/gpu/c_embedding_kernel.cu
@@ -20,11 +20,11 @@
 namespace phi {
 
 static constexpr int kNumCUDAThreads = 512;
-static constexpr int kNumMaxinumNumBlocks = 4096;
+static constexpr int kNumMaximumNumBlocks = 4096;
 
 static inline int NumBlocks(const int N) {
   return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads,
-                  kNumMaxinumNumBlocks);
+                  kNumMaximumNumBlocks);
 }
 
 template <typename T, typename IndexT>
diff --git a/paddle/phi/kernels/gpu/c_softmax_with_cross_entropy_grad_kernel.cu b/paddle/phi/kernels/gpu/c_softmax_with_cross_entropy_grad_kernel.cu
index 26380df9814059..5ac4c3784f45f2 100644
--- a/paddle/phi/kernels/gpu/c_softmax_with_cross_entropy_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/c_softmax_with_cross_entropy_grad_kernel.cu
@@ -27,11 +27,11 @@
 namespace phi {
 
 static constexpr int kNumCUDAThreads = 512;
-static constexpr int64_t kNumMaxinumNumBlocks = 4096;
+static constexpr int64_t kNumMaximumNumBlocks = 4096;
 
 static inline int64_t NumBlocks(const int64_t N) {
   return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads,
-                  kNumMaxinumNumBlocks);
+                  kNumMaximumNumBlocks);
 }
 
 template <typename T, typename IndexT>
diff --git a/paddle/phi/kernels/gpu/c_softmax_with_cross_entropy_kernel.cu b/paddle/phi/kernels/gpu/c_softmax_with_cross_entropy_kernel.cu
index da4b033a89e45f..8c64272165cafe 100644
--- a/paddle/phi/kernels/gpu/c_softmax_with_cross_entropy_kernel.cu
+++ b/paddle/phi/kernels/gpu/c_softmax_with_cross_entropy_kernel.cu
@@ -44,11 +44,11 @@ struct CSoftmaxWithCrossEntropyFunctor {
 };
 
 static constexpr int kNumCUDAThreads = 512;
-static constexpr int64_t kNumMaxinumNumBlocks = 4096;
+static constexpr int64_t kNumMaximumNumBlocks = 4096;
 
 static inline int64_t NumBlocks(const int64_t N) {
   return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads,
-                  kNumMaxinumNumBlocks);
+                  kNumMaximumNumBlocks);
 }
 
 template <typename T, typename IndexT>
diff --git a/paddle/phi/kernels/gpu/c_split_kernel.cu b/paddle/phi/kernels/gpu/c_split_kernel.cu
index b9796e784d04f9..8ec8aefa670693 100644
--- a/paddle/phi/kernels/gpu/c_split_kernel.cu
+++ b/paddle/phi/kernels/gpu/c_split_kernel.cu
@@ -19,11 +19,11 @@
 namespace phi {
 
 static constexpr int64_t kNumCUDAThreads = 512;
-static constexpr int64_t kNumMaxinumNumBlocks = 4096;
+static constexpr int64_t kNumMaximumNumBlocks = 4096;
 
 static inline int64_t NumBlocks(const int64_t N) {
   return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads,
-                  kNumMaxinumNumBlocks);
+                  kNumMaximumNumBlocks);
 }
 
 template <typename T>
diff --git a/paddle/phi/kernels/gpu/class_center_sample_kernel.cu b/paddle/phi/kernels/gpu/class_center_sample_kernel.cu
index dd9ac32a6196f2..b817bebcc3cd4f 100644
--- a/paddle/phi/kernels/gpu/class_center_sample_kernel.cu
+++ b/paddle/phi/kernels/gpu/class_center_sample_kernel.cu
@@ -48,11 +48,11 @@ namespace phi {
        i += step)
 
 static constexpr int kNumCUDAThreads = 512;
-static constexpr int kNumMaxinumNumBlocks = 4096;
+static constexpr int kNumMaximumNumBlocks = 4096;
 
 inline int32_t NumBlocks(const int32_t n) {
   return std::min((n + kNumCUDAThreads - 1) / kNumCUDAThreads,
-                  kNumMaxinumNumBlocks);
+                  kNumMaximumNumBlocks);
 }
 
 template <typename T>
diff --git a/paddle/phi/kernels/gpu/collect_fpn_proposals_kernel.cu b/paddle/phi/kernels/gpu/collect_fpn_proposals_kernel.cu
index 2c3c3cdb550a8f..f75694421d6a88 100644
--- a/paddle/phi/kernels/gpu/collect_fpn_proposals_kernel.cu
+++ b/paddle/phi/kernels/gpu/collect_fpn_proposals_kernel.cu
@@ -36,13 +36,13 @@ namespace cub = hipcub;
 namespace phi {
 
 static constexpr int kNumCUDAThreads = 64;
-static constexpr int kNumMaxinumNumBlocks = 4096;
+static constexpr int kNumMaximumNumBlocks = 4096;
 
 const int kBBoxSize = 4;
 
 static inline int NumBlocks(const int N) {
   return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads,
-                  kNumMaxinumNumBlocks);
+                  kNumMaximumNumBlocks);
 }
 
 static __global__ void GetLengthLoD(const int nthreads,
diff --git a/paddle/phi/kernels/gpu/distribute_fpn_proposals_kernel.cu b/paddle/phi/kernels/gpu/distribute_fpn_proposals_kernel.cu
index 95b70da2ff4830..6370b41b53c5e3 100644
--- a/paddle/phi/kernels/gpu/distribute_fpn_proposals_kernel.cu
+++ b/paddle/phi/kernels/gpu/distribute_fpn_proposals_kernel.cu
@@ -36,13 +36,13 @@ namespace cub = hipcub;
 namespace phi {
 
 static constexpr int kNumCUDAThreads = 64;
-static constexpr int kNumMaxinumNumBlocks = 4096;
+static constexpr int kNumMaximumNumBlocks = 4096;
 
 int const BBoxSize = 4;
 
 static inline int NumBlocks(const int N) {
   return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads,
-                  kNumMaxinumNumBlocks);
+                  kNumMaximumNumBlocks);
 }
 
 template <class T>
diff --git a/paddle/phi/kernels/gpu/nll_loss.h b/paddle/phi/kernels/gpu/nll_loss.h
index 648b69b45253c1..badb5a6a74a44f 100644
--- a/paddle/phi/kernels/gpu/nll_loss.h
+++ b/paddle/phi/kernels/gpu/nll_loss.h
@@ -26,11 +26,11 @@
 
 namespace phi {
 static constexpr int kNumCUDAThreads = 512;
-static constexpr int kNumMaxinumNumBlocks = 4096;
+static constexpr int kNumMaximumNumBlocks = 4096;
 static const int NTHREADS = 32;
 static inline int NumBlocks(const int N) {
   return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads,
-                  kNumMaxinumNumBlocks);
+                  kNumMaximumNumBlocks);
 }
 
 template <typename T>
diff --git a/paddle/phi/kernels/gpu/overlap_add_grad_kernel.cu b/paddle/phi/kernels/gpu/overlap_add_grad_kernel.cu
index 337620a556db5e..b9b16560adde46 100644
--- a/paddle/phi/kernels/gpu/overlap_add_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/overlap_add_grad_kernel.cu
@@ -38,7 +38,7 @@ void OverlapAddGradKernel(const Context& dev_ctx,
       (axis == 0) ? out_grad.dims()[0] : out_grad.dims()[out_grad_rank - 1];
 
   // When the number of input dims is larger than 2, it needs to copy
-  // from x to resize input into 2d and output into 3d. Morevoer, output
+  // from x to resize input into 2d and output into 3d. Moreover, output
   // dims will be restored at the last step.
   DenseTensor out_grad_(out_grad.type());
   out_grad_ = out_grad;
diff --git a/paddle/phi/kernels/gpu/prune_gate_by_capacity_kernel.cu b/paddle/phi/kernels/gpu/prune_gate_by_capacity_kernel.cu
index 24614d50446d01..28f48abaf98ec1 100644
--- a/paddle/phi/kernels/gpu/prune_gate_by_capacity_kernel.cu
+++ b/paddle/phi/kernels/gpu/prune_gate_by_capacity_kernel.cu
@@ -22,11 +22,11 @@
 namespace phi {
 
 static constexpr int kNumCUDAThreads = 512;
-static constexpr int kNumMaxinumNumBlocks = 4096;
+static constexpr int kMaximumNumBlocks = 4096;
 
 static inline int NumBlocks(const int N) {
   return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads,
-                  kNumMaxinumNumBlocks);
+                  kMaximumNumBlocks);
 }
 
 template <typename T1, typename T2>
diff --git a/paddle/phi/kernels/gpu/roi_align_grad_kernel.cu b/paddle/phi/kernels/gpu/roi_align_grad_kernel.cu
index f388ede1fe73ca..fe6826efe3ddd7 100644
--- a/paddle/phi/kernels/gpu/roi_align_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/roi_align_grad_kernel.cu
@@ -26,12 +26,12 @@
 namespace phi {
 
 static constexpr int kNumCUDAThreads = 512;
-static constexpr int kNumMaxinumNumBlocks = 4096;
+static constexpr int kNumMaximumNumBlocks = 4096;
 static constexpr int kROISize = 4;
 
 static inline int NumBlocks(const int N) {
   return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads,
-                  kNumMaxinumNumBlocks);
+                  kNumMaximumNumBlocks);
 }
 
 template <class T>
diff --git a/paddle/phi/kernels/gpu/roi_align_kernel.cu b/paddle/phi/kernels/gpu/roi_align_kernel.cu
index b2de0d83f8917d..f3bf4062dd4047 100644
--- a/paddle/phi/kernels/gpu/roi_align_kernel.cu
+++ b/paddle/phi/kernels/gpu/roi_align_kernel.cu
@@ -23,12 +23,12 @@
 namespace phi {
 
 static constexpr int kNumCUDAThreads = 512;
-static constexpr int kNumMaxinumNumBlocks = 4096;
+static constexpr int kNumMaximumNumBlocks = 4096;
 static constexpr int kROISize = 4;
 
 static inline int NumBlocks(const int N) {
   return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads,
-                  kNumMaxinumNumBlocks);
+                  kNumMaximumNumBlocks);
 }
 
 template <class T>
diff --git a/paddle/phi/kernels/gpu/roi_pool_grad_kernel.cu b/paddle/phi/kernels/gpu/roi_pool_grad_kernel.cu
index b3ec72a5032eca..496f8ad103cba2 100644
--- a/paddle/phi/kernels/gpu/roi_pool_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/roi_pool_grad_kernel.cu
@@ -25,11 +25,11 @@
 namespace phi {
 
 static constexpr int kNumCUDAThreads = 512;
-static constexpr int kNumMaxinumNumBlocks = 4096;
+static constexpr int kNumMaximumNumBlocks = 4096;
 
 static inline int NumBlocks(const int N) {
   return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads,
-                  kNumMaxinumNumBlocks);
+                  kNumMaximumNumBlocks);
 }
 
 template <typename T>
diff --git a/paddle/phi/kernels/gpu/roi_pool_kernel.cu b/paddle/phi/kernels/gpu/roi_pool_kernel.cu
index 968fea06ce2a4b..b2dde0075d8f22 100644
--- a/paddle/phi/kernels/gpu/roi_pool_kernel.cu
+++ b/paddle/phi/kernels/gpu/roi_pool_kernel.cu
@@ -23,11 +23,11 @@
 namespace phi {
 
 static constexpr int kNumCUDAThreads = 512;
-static constexpr int kNumMaxinumNumBlocks = 4096;
+static constexpr int kNumMaximumNumBlocks = 4096;
 
 static inline int NumBlocks(const int N) {
   return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads,
-                  kNumMaxinumNumBlocks);
+                  kNumMaximumNumBlocks);
 }
 
 template <typename T>
diff --git a/paddle/phi/kernels/gpu/sigmoid_cross_entropy_with_logits.h b/paddle/phi/kernels/gpu/sigmoid_cross_entropy_with_logits.h
index 2baa96d2a51600..7de11ad7463593 100644
--- a/paddle/phi/kernels/gpu/sigmoid_cross_entropy_with_logits.h
+++ b/paddle/phi/kernels/gpu/sigmoid_cross_entropy_with_logits.h
@@ -36,11 +36,11 @@ namespace cub = hipcub;
 namespace phi {
 
 static constexpr int kNumCUDAThreads = 512;
-static constexpr int kNumMaxinumNumBlocks = 4096;
+static constexpr int kNumMaximumNumBlocks = 4096;
 
 static inline int NumBlocks(const int N) {
   return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads,
-                  kNumMaxinumNumBlocks);
+                  kNumMaximumNumBlocks);
 }
 
 template <typename T>
diff --git a/paddle/phi/kernels/impl/frame_kernel_impl.h b/paddle/phi/kernels/impl/frame_kernel_impl.h
index fa0c5658efe550..fb75fa30a66658 100644
--- a/paddle/phi/kernels/impl/frame_kernel_impl.h
+++ b/paddle/phi/kernels/impl/frame_kernel_impl.h
@@ -31,7 +31,7 @@ void FrameKernel(const Context& dev_ctx,
   const int n_frames = (axis == 0) ? out->dims()[0] : out->dims()[out_rank - 1];
   const int seq_length = (axis == 0) ? x.dims()[0] : x.dims()[x_rank - 1];
   // When the number of input dims is larger than 2, it needs to copy
-  // from x to resize input into 2d and output into 3d. Morevoer, output
+  // from x to resize input into 2d and output into 3d. Moreover, output
   // dims will be restored at the last step.
   DenseTensor x_tmp = x;
 
diff --git a/paddle/phi/kernels/impl/margin_cross_entropy.cu.h b/paddle/phi/kernels/impl/margin_cross_entropy.cu.h
index ed861cde093223..e9590e05d84531 100644
--- a/paddle/phi/kernels/impl/margin_cross_entropy.cu.h
+++ b/paddle/phi/kernels/impl/margin_cross_entropy.cu.h
@@ -48,11 +48,11 @@ namespace cub = hipcub;
 namespace phi {
 
 static constexpr int kNumCUDAThreads = 512;
-static constexpr int kNumMaxinumNumBlocks = 4096;
+static constexpr int kNumMaximumNumBlocks = 4096;
 
 static inline int NumBlocks(const int N) {
   return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads,
-                  kNumMaxinumNumBlocks);
+                  kNumMaximumNumBlocks);
 }
 
 template <typename T, typename Context>
diff --git a/paddle/phi/kernels/kps/elementwise_kernel.cu b/paddle/phi/kernels/kps/elementwise_kernel.cu
index 183bcac10fc9ed..13b4f44df1c19d 100644
--- a/paddle/phi/kernels/kps/elementwise_kernel.cu
+++ b/paddle/phi/kernels/kps/elementwise_kernel.cu
@@ -22,7 +22,7 @@
 #include "paddle/phi/kernels/legacy/elementwise_add_kernel.h"
 #include "paddle/phi/kernels/legacy/elementwise_divide_kernel.h"
 #include "paddle/phi/kernels/legacy/elementwise_kernel.h"
-#include "paddle/phi/kernels/legacy/elementwise_multipy_kernel.h"
+#include "paddle/phi/kernels/legacy/elementwise_multiply_kernel.h"
 #include "paddle/phi/kernels/legacy/elementwise_subtract_kernel.h"
 
 namespace phi {
diff --git a/paddle/phi/kernels/legacy/cpu/fused_elementwise_kernel.cc b/paddle/phi/kernels/legacy/cpu/fused_elementwise_kernel.cc
index ec640c2257c3f8..393ff0889c380d 100644
--- a/paddle/phi/kernels/legacy/cpu/fused_elementwise_kernel.cc
+++ b/paddle/phi/kernels/legacy/cpu/fused_elementwise_kernel.cc
@@ -20,7 +20,7 @@
 #include "paddle/phi/kernels/impl/elementwise_kernel_impl.h"
 #include "paddle/phi/kernels/legacy/elementwise_add_kernel.h"
 #include "paddle/phi/kernels/legacy/elementwise_divide_kernel.h"
-#include "paddle/phi/kernels/legacy/elementwise_multipy_kernel.h"
+#include "paddle/phi/kernels/legacy/elementwise_multiply_kernel.h"
 #include "paddle/phi/kernels/legacy/elementwise_subtract_kernel.h"
 
 namespace phi {
diff --git a/paddle/phi/kernels/legacy/elementwise_multipy_kernel.h b/paddle/phi/kernels/legacy/elementwise_multiply_kernel.h
similarity index 100%
rename from paddle/phi/kernels/legacy/elementwise_multipy_kernel.h
rename to paddle/phi/kernels/legacy/elementwise_multiply_kernel.h
diff --git a/paddle/phi/kernels/selected_rows/elementwise_multiply_kernel.h b/paddle/phi/kernels/selected_rows/elementwise_multiply_kernel.h
index e48ff7d8cc6277..9e0a991b26a058 100644
--- a/paddle/phi/kernels/selected_rows/elementwise_multiply_kernel.h
+++ b/paddle/phi/kernels/selected_rows/elementwise_multiply_kernel.h
@@ -16,7 +16,7 @@ limitations under the License. */
 
 #include "paddle/phi/core/dense_tensor.h"
 #include "paddle/phi/core/selected_rows.h"
-#include "paddle/phi/kernels/legacy/elementwise_multipy_kernel.h"
+#include "paddle/phi/kernels/legacy/elementwise_multiply_kernel.h"
 
 namespace phi {
 namespace sr {
diff --git a/paddle/phi/kernels/sparse/gpu/fused_attention_grad_kernel.cu b/paddle/phi/kernels/sparse/gpu/fused_attention_grad_kernel.cu
index 91ac5f1a1509b6..fe780b4e1c4179 100644
--- a/paddle/phi/kernels/sparse/gpu/fused_attention_grad_kernel.cu
+++ b/paddle/phi/kernels/sparse/gpu/fused_attention_grad_kernel.cu
@@ -71,7 +71,7 @@ void FusedAttentionCsrGradKernel(const Context& dev_ctx,
   MatmulCsrDenseGradKernel<T, Context>(
       dev_ctx, softmax, value, dout, &dsoftmax, dvalue);
 
-  /* Step2: Calculate grad of sdd_result, manualy not reuse */
+  /* Step2: Calculate grad of sdd_result, manually not reuse */
   SparseCsrTensor d_sdd_result;
   EmptyLikeCsrKernel<T, Context>(dev_ctx, dsoftmax, &d_sdd_result);
   auto q_dim = query.dims();
diff --git a/python/paddle/distributed/auto_parallel/moe_utils.py b/python/paddle/distributed/auto_parallel/moe_utils.py
index b6ac80231b4d0a..9a1957be3ea9a0 100644
--- a/python/paddle/distributed/auto_parallel/moe_utils.py
+++ b/python/paddle/distributed/auto_parallel/moe_utils.py
@@ -261,7 +261,7 @@ def _dist_reshape(
 ):
     """
     Reshape the local tensors of the dist tensor on each rank,
-    and mannualy set the process_mesh and placements of the output.
+    and manually set the process_mesh and placements of the output.
     """
     tgt_global_shape = infer_positive_shape(dist_tensor.shape, global_shape)
     tgt_local_shape = _cal_local_shape(tgt_global_shape, mesh, placements)
diff --git a/python/paddle/distributed/auto_tuner/utils.py b/python/paddle/distributed/auto_tuner/utils.py
index eca9b48f287955..d60e3b34fc2153 100644
--- a/python/paddle/distributed/auto_tuner/utils.py
+++ b/python/paddle/distributed/auto_tuner/utils.py
@@ -439,7 +439,7 @@ def search_all(tuner_cfg):
                         i += 1
 
                     if tuner_cfg.get("schedule_mode") != "performance":
-                        # momory sort
+                        # memory sort
                         for rr_dim_cfg in rr_dim_cfgs:
                             cfg = (
                                 list(valid_degree)
diff --git a/python/paddle/jit/dy2static/convert_operators.py b/python/paddle/jit/dy2static/convert_operators.py
index 82a523f624b20e..9b18b3b4bf42c3 100644
--- a/python/paddle/jit/dy2static/convert_operators.py
+++ b/python/paddle/jit/dy2static/convert_operators.py
@@ -506,11 +506,11 @@ def new_false_fn():
             "Unsupported return type of true_fn and false_fn in cond", str(e)
         ):
             raise Dygraph2StaticException(
-                f"Your if/else have different return type. TODO: add link to modifty. {e}"
+                f"Your if/else have different return type. TODO: add link to modify. {e}"
             )
         if re.search("Incompatible return values of", str(e)):
             raise Dygraph2StaticException(
-                f"Your if/else have different number of return value. TODO: add link to modifty. {e}"
+                f"Your if/else have different number of return value. TODO: add link to modify. {e}"
             )
         raise e
     get_args = lambda: helper.get(union_name)
diff --git a/python/paddle/utils/cpp_extension/cpp_extension.py b/python/paddle/utils/cpp_extension/cpp_extension.py
index da0bc9d5481b17..2a99fae8adfb93 100644
--- a/python/paddle/utils/cpp_extension/cpp_extension.py
+++ b/python/paddle/utils/cpp_extension/cpp_extension.py
@@ -659,7 +659,7 @@ def win_custom_spawn(cmd):
                 elif isinstance(self.cflags, list):
                     cflags = MSVC_COMPILE_FLAGS + self.cflags
                     cmd += cflags
-                # Append this macor only when jointly compiling .cc with .cu
+                # Append this macro only when jointly compiling .cc with .cu
                 if not is_cuda_file(src) and self.contain_cuda_file:
                     cmd.append('-DPADDLE_WITH_CUDA')
 
diff --git a/python/paddle/vision/transforms/transforms.py b/python/paddle/vision/transforms/transforms.py
index 0584f836d43ffc..31af62bd8c2488 100644
--- a/python/paddle/vision/transforms/transforms.py
+++ b/python/paddle/vision/transforms/transforms.py
@@ -1458,7 +1458,7 @@ class Pad(BaseTransform[_InputT, _RetT]):
             ``reflect`` means pads with reflection of image (without repeating the last value on the edge)
             padding ``[1, 2, 3, 4]`` with 2 elements on both sides in reflect mode
             will result in ``[3, 2, 1, 2, 3, 4, 3, 2]``.
-            ``symmetric`` menas pads with reflection of image (repeating the last value on the edge)
+            ``symmetric`` means pads with reflection of image (repeating the last value on the edge)
             padding ``[1, 2, 3, 4]`` with 2 elements on both sides in symmetric mode
             will result in ``[2, 1, 1, 2, 3, 4, 4, 3]``.
         keys (list[str]|tuple[str], optional): Same as ``BaseTransform``. Default: None.
diff --git a/test/auto_parallel/semi_auto_parallel_for_replicated_spmd.py b/test/auto_parallel/semi_auto_parallel_for_replicated_spmd.py
index 7a0ba9bf07899b..c05d1029d4a865 100644
--- a/test/auto_parallel/semi_auto_parallel_for_replicated_spmd.py
+++ b/test/auto_parallel/semi_auto_parallel_for_replicated_spmd.py
@@ -173,7 +173,7 @@ def test_adamax(self):
         self.check_tensor_eq(local_inf_norm_out, dist_inf_norm_out)
         self.check_tensor_eq(local_master_param_out, dist_master_param_out)
 
-    # mutiple operators
+    # multiple operators
     def test_mse_loss(self):
         x = np.random.random(size=[4, 4]).astype(self._dtype)
         y = np.random.random(size=[4]).astype(self._dtype)
diff --git a/test/ir/pir/cinn/test_anchor_fusion.py b/test/ir/pir/cinn/test_anchor_fusion.py
index a7cff47696998c..80a8300d4ce596 100644
--- a/test/ir/pir/cinn/test_anchor_fusion.py
+++ b/test/ir/pir/cinn/test_anchor_fusion.py
@@ -108,7 +108,7 @@ def init():
             x = paddle.ones((16, 32, 64, 128))
             return (x,)
 
-        # This case can't be fused to one kernel because muti-downstream
+        # This case can't be fused to one kernel because multi-downstream
         # transpose op will sink currently.
         self.check_accuracy_and_kernel_num(init, func)
 
diff --git a/test/legacy_test/test_case.py b/test/legacy_test/test_case.py
index b6c7d6c2d14712..1708fd6ce45b58 100644
--- a/test/legacy_test/test_case.py
+++ b/test/legacy_test/test_case.py
@@ -605,7 +605,7 @@ def type_error_default():
 
 
 # when optimizer in case
-class TestMutiTask(unittest.TestCase):
+class TestMultiTask(unittest.TestCase):
 
     def test_optimizer_in_case(self):
         BATCH_SIZE = 1
diff --git a/test/legacy_test/test_matmul_fp8_op.py b/test/legacy_test/test_matmul_fp8_op.py
index 09e0b417f67115..ad09ba17bd4ec9 100644
--- a/test/legacy_test/test_matmul_fp8_op.py
+++ b/test/legacy_test/test_matmul_fp8_op.py
@@ -51,7 +51,7 @@ def check_fp8_support() -> bool:
 
 def _to_fp8_saturated(x: paddle.Tensor, float8_dtype) -> paddle.Tensor:
     # The default behavior in Paddle for casting to `float8_e4m3fn`
-    # and `e5m2` is to not saturate. So we saturate here manualy.
+    # and `e5m2` is to not saturate. So we saturate here manually.
     if float8_dtype == paddle.float8_e4m3fn:
         x = x.clip(min=-1 * E4M3_MAX_POS, max=E4M3_MAX_POS)
     else:
diff --git a/test/legacy_test/test_max_min_amax_amin_op.py b/test/legacy_test/test_max_min_amax_amin_op.py
index ca6b83f93cb99c..335a2c3334ae8d 100644
--- a/test/legacy_test/test_max_min_amax_amin_op.py
+++ b/test/legacy_test/test_max_min_amax_amin_op.py
@@ -200,7 +200,7 @@ def init_case(self):
         self.keepdim = False
 
 
-# test input grad when out is operated like mutiply
+# test input grad when out is operated like multiply
 class TestMaxMinAmaxAminAPI7(TestMaxMinAmaxAminAPI):
     def init_case(self):
         self.x_np = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]).astype(
diff --git a/test/legacy_test/test_tensor_type_promotion.py b/test/legacy_test/test_tensor_type_promotion.py
index 0fe923a6af0418..97eb9385d8cb42 100644
--- a/test/legacy_test/test_tensor_type_promotion.py
+++ b/test/legacy_test/test_tensor_type_promotion.py
@@ -3299,7 +3299,7 @@ def run_api(self):
 create_test_case(TestAPIMaximumInStatic, 'float32', 'float64', 'float64')
 
 
-class TestAPIMiniumInStatic(TestOperatorOverloadAddInStatic):
+class TestAPIMinimumInStatic(TestOperatorOverloadAddInStatic):
     def run_api(self):
         prog = paddle.static.Program()
         with paddle.static.program_guard(prog):
@@ -3312,7 +3312,7 @@ def run_api(self):
         return res
 
 
-create_test_case(TestAPIMiniumInStatic, 'float32', 'float64', 'float64')
+create_test_case(TestAPIMinimumInStatic, 'float32', 'float64', 'float64')
 
 
 class TestAPINextAfterInStatic(TestOperatorOverloadAddInStatic):
diff --git a/test/legacy_test/test_variable.py b/test/legacy_test/test_variable.py
index ef4d1b5dec1da1..e51200d4653579 100644
--- a/test/legacy_test/test_variable.py
+++ b/test/legacy_test/test_variable.py
@@ -578,7 +578,7 @@ def test_dygraph_list_index(self):
             array = array[0]
             index = index[0]
 
-    def test_static_graph_list_index_muti_dim(self):
+    def test_static_graph_list_index_multi_dim(self):
         paddle.enable_static()
         inps_shape = [3, 4, 5]
         array = np.arange(self.numel(inps_shape), dtype='float32').reshape(
@@ -643,7 +643,7 @@ def test_static_graph_list_index_muti_dim(self):
                 err_msg=f'\n numpy:{y2},\n paddle:{getitem_pp[0]}',
             )
 
-    def test_dygraph_list_index_muti_dim(self):
+    def test_dygraph_list_index_multi_dim(self):
         paddle.disable_static()
         inps_shape = [3, 4, 5]
         array = np.arange(self.numel(inps_shape), dtype='float32').reshape(
@@ -878,7 +878,7 @@ def test_static_graph_setitem_bool_scalar_index(self):
         with paddle.static.program_guard(program):
             self.run_setitem_list_index(array, index, value_np)
 
-    def test_static_graph_tensor_index_setitem_muti_dim(self):
+    def test_static_graph_tensor_index_setitem_multi_dim(self):
         paddle.enable_static()
         inps_shape = [3, 4, 5, 4]
         array = np.arange(self.numel(inps_shape), dtype='float32').reshape(
@@ -970,7 +970,7 @@ def test_static_graph_tensor_index_setitem_muti_dim(self):
             index1 = index1[0]
             index2 = index2[0]
 
-    def test_static_graph_array_index_muti_dim(self):
+    def test_static_graph_array_index_multi_dim(self):
         paddle.enable_static()
         inps_shape = [3, 4, 5, 4]
         array = np.arange(self.numel(inps_shape), dtype='float32').reshape(
@@ -1053,7 +1053,7 @@ def test_static_graph_array_index_muti_dim(self):
             index1 = index1[0]
             index2 = index2[0]
 
-    def test_dygraph_array_index_muti_dim(self):
+    def test_dygraph_array_index_multi_dim(self):
         paddle.disable_static()
         inps_shape = [3, 4, 5, 4]
         array = np.arange(self.numel(inps_shape), dtype='float32').reshape(
diff --git a/test/ps/__init__.py b/test/ps/__init__.py
index 5a5bd1e0048c4b..c46b094697dd36 100644
--- a/test/ps/__init__.py
+++ b/test/ps/__init__.py
@@ -13,5 +13,5 @@
 # limitations under the License.p
 
 # Note: On Windows, import form subdirectories such as dirA()->dirB(), current directory
-# will still be dirA(), But is should be dirB(). So it will ModulNotFoundError
+# will still be dirA(), But is should be dirB(). So it will ModuleNotFoundError
 # please refer to https://stackoverflow.com/questions/8953844/import-module-from-subfolder
diff --git a/test/quantization/quant2_int8_image_classification_comparison.py b/test/quantization/quant2_int8_image_classification_comparison.py
index fac217637d54b7..7f6666c7b6a90d 100644
--- a/test/quantization/quant2_int8_image_classification_comparison.py
+++ b/test/quantization/quant2_int8_image_classification_comparison.py
@@ -262,7 +262,7 @@ def _predict(
                         },
                         fetch_list=fetch_targets,
                     )
-                    batch_time = (time.time() - start) * 1000  # in miliseconds
+                    batch_time = (time.time() - start) * 1000  # in milliseconds
                     batch_acc1, batch_acc5 = out[1], out[2]
                     outputs.append(batch_acc1)
                 else:
@@ -273,7 +273,7 @@ def _predict(
                         feed={feed_target_names[0]: images},
                         fetch_list=fetch_targets,
                     )
-                    batch_time = (time.time() - start) * 1000  # in miliseconds
+                    batch_time = (time.time() - start) * 1000  # in milliseconds
                     outputs.append(out[0])
                     # Calculate accuracy result
                     batch_acc1, batch_acc5 = self._get_batch_accuracy(
diff --git a/test/quantization/quant2_int8_nlp_comparison.py b/test/quantization/quant2_int8_nlp_comparison.py
index 985fb62f1d11e9..032f8c49a44c9c 100644
--- a/test/quantization/quant2_int8_nlp_comparison.py
+++ b/test/quantization/quant2_int8_nlp_comparison.py
@@ -211,7 +211,7 @@ def _predict(
 
             start = time.time()
             predictor.run()
-            batch_time = (time.time() - start) * 1000  # in miliseconds
+            batch_time = (time.time() - start) * 1000  # in milliseconds
 
             out = []
             out = predictor.get_output_handle(output_names[0]).copy_to_cpu()
diff --git a/test/quantization/quant_int8_image_classification_comparison.py b/test/quantization/quant_int8_image_classification_comparison.py
index 4cfb3bdf798659..f0944eb34b3afe 100644
--- a/test/quantization/quant_int8_image_classification_comparison.py
+++ b/test/quantization/quant_int8_image_classification_comparison.py
@@ -224,7 +224,7 @@ def _predict(
                     feed={feed_target_names[0]: images},
                     fetch_list=fetch_targets,
                 )
-                batch_time = (time.time() - start) * 1000  # in miliseconds
+                batch_time = (time.time() - start) * 1000  # in milliseconds
                 outputs.append(out[0])
                 batch_acc1, batch_acc5 = self._get_batch_accuracy(
                     out[0], labels
diff --git a/tools/coverage/gcda_clean.py b/tools/coverage/gcda_clean.py
index 2abba39636d076..4b8c433f76f5d8 100644
--- a/tools/coverage/gcda_clean.py
+++ b/tools/coverage/gcda_clean.py
@@ -85,7 +85,7 @@ def clean(pull_id):
 
                 # convert paddle/fluid/imperative/CMakeFiles/layer.dir/layer.cc.gcda
                 # to paddle/fluid/imperative/layer.cc.gcda
-                # modifed to make it more robust
+                # modified to make it more robust
                 # covert /paddle/build/paddle/phi/backends/CMakeFiles/phi_backends.dir/gpu/cuda/cuda_info.cc.gcda
                 # to /paddle/build/paddle/phi/backends/gpu/cuda/cuda_info.cc.gcda
                 trimmed_tmp = []
diff --git a/tools/gen_pybind11_stub.py b/tools/gen_pybind11_stub.py
index 3e8dfe839f14cd..4cfcc19640fff5 100644
--- a/tools/gen_pybind11_stub.py
+++ b/tools/gen_pybind11_stub.py
@@ -399,7 +399,7 @@ def post_process(output_dir: str):
             replace_bad_attr(filename)
             check_remove_syntax_error(filename)
 
-            # insert moduels if necessary
+            # insert modules if necessary
             insert_import_modules(filename)
 
 

From d9994aec585b997f1caaa4101a8e5b9a43076029 Mon Sep 17 00:00:00 2001
From: yinfan98 <1106310035@qq.com>
Date: Sat, 21 Dec 2024 23:02:15 +0800
Subject: [PATCH 2/2] Update prune_gate_by_capacity_kernel.cu

---
 paddle/phi/kernels/gpu/prune_gate_by_capacity_kernel.cu | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/paddle/phi/kernels/gpu/prune_gate_by_capacity_kernel.cu b/paddle/phi/kernels/gpu/prune_gate_by_capacity_kernel.cu
index 28f48abaf98ec1..16d2a67665f67c 100644
--- a/paddle/phi/kernels/gpu/prune_gate_by_capacity_kernel.cu
+++ b/paddle/phi/kernels/gpu/prune_gate_by_capacity_kernel.cu
@@ -22,11 +22,11 @@
 namespace phi {
 
 static constexpr int kNumCUDAThreads = 512;
-static constexpr int kMaximumNumBlocks = 4096;
+static constexpr int kNumMaximumNumBlocks = 4096;
 
 static inline int NumBlocks(const int N) {
   return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads,
-                  kMaximumNumBlocks);
+                  kNumMaximumNumBlocks);
 }
 
 template <typename T1, typename T2>