From 3f3041648424a068885212bc0a006c25a0cf100e Mon Sep 17 00:00:00 2001 From: yinfan98 <1106310035@qq.com> Date: Sat, 21 Dec 2024 14:52:56 +0000 Subject: [PATCH 1/2] fix_typo --- _typos.toml | 30 ------------- .../group_schedule/search/config_searcher.cc | 4 +- .../group_schedule/search/config_searcher.h | 2 +- paddle/cinn/ir/ir_base.cc | 14 +++---- paddle/cinn/ir/ir_base.h | 2 +- paddle/cinn/ir/op/ir_operators.cc | 13 +++--- paddle/common/ddim.h | 2 +- .../distributed/ps/service/heter_server.cc | 2 +- .../ps/table/common_graph_table.cc | 22 +++++----- .../ir/embedding_fc_lstm_fuse_pass.cc | 2 +- .../interpreter/dependency_builder.cc | 12 +++--- .../framework/new_executor/pir_interpreter.cc | 6 +-- .../new_executor/program_interpreter.cc | 6 +-- .../new_executor/workqueue/events_waiter.h | 2 +- paddle/fluid/framework/operator.cc | 4 +- paddle/fluid/framework/var_desc.h | 2 +- paddle/fluid/imperative/prepared_operator.cc | 2 +- .../fluid/inference/api/paddle_pass_builder.h | 2 +- .../onednn/conv2d_transpose_bn_fuse_pass.cc | 4 +- .../phi/core/memory/allocation/memory_block.h | 4 +- .../spmd_rules/default_data_parallel.cc | 16 +++---- paddle/phi/infermeta/spmd_rules/replicated.cc | 42 +++++++++---------- .../kernels/cpu/overlap_add_grad_kernel.cc | 2 +- .../gpu/fused_dropout_add_grad_kernel.cu | 4 +- paddle/phi/kernels/gpu/assign_pos_kernel.cu | 4 +- .../kernels/gpu/c_embedding_grad_kernel.cu | 4 +- paddle/phi/kernels/gpu/c_embedding_kernel.cu | 4 +- ..._softmax_with_cross_entropy_grad_kernel.cu | 4 +- .../c_softmax_with_cross_entropy_kernel.cu | 4 +- paddle/phi/kernels/gpu/c_split_kernel.cu | 4 +- .../kernels/gpu/class_center_sample_kernel.cu | 4 +- .../gpu/collect_fpn_proposals_kernel.cu | 4 +- .../gpu/distribute_fpn_proposals_kernel.cu | 4 +- paddle/phi/kernels/gpu/nll_loss.h | 4 +- .../kernels/gpu/overlap_add_grad_kernel.cu | 2 +- .../gpu/prune_gate_by_capacity_kernel.cu | 4 +- .../phi/kernels/gpu/roi_align_grad_kernel.cu | 4 +- paddle/phi/kernels/gpu/roi_align_kernel.cu | 4 +- .../phi/kernels/gpu/roi_pool_grad_kernel.cu | 4 +- paddle/phi/kernels/gpu/roi_pool_kernel.cu | 4 +- .../gpu/sigmoid_cross_entropy_with_logits.h | 4 +- paddle/phi/kernels/impl/frame_kernel_impl.h | 2 +- .../kernels/impl/margin_cross_entropy.cu.h | 4 +- paddle/phi/kernels/kps/elementwise_kernel.cu | 2 +- .../legacy/cpu/fused_elementwise_kernel.cc | 2 +- ...kernel.h => elementwise_multiply_kernel.h} | 0 .../elementwise_multiply_kernel.h | 2 +- .../sparse/gpu/fused_attention_grad_kernel.cu | 2 +- .../distributed/auto_parallel/moe_utils.py | 2 +- python/paddle/distributed/auto_tuner/utils.py | 2 +- .../paddle/jit/dy2static/convert_operators.py | 4 +- .../utils/cpp_extension/cpp_extension.py | 2 +- python/paddle/vision/transforms/transforms.py | 2 +- .../semi_auto_parallel_for_replicated_spmd.py | 2 +- test/ir/pir/cinn/test_anchor_fusion.py | 2 +- test/legacy_test/test_case.py | 2 +- test/legacy_test/test_matmul_fp8_op.py | 2 +- test/legacy_test/test_max_min_amax_amin_op.py | 2 +- .../legacy_test/test_tensor_type_promotion.py | 4 +- test/legacy_test/test_variable.py | 10 ++--- test/ps/__init__.py | 2 +- ...t2_int8_image_classification_comparison.py | 4 +- .../quant2_int8_nlp_comparison.py | 2 +- ...nt_int8_image_classification_comparison.py | 2 +- tools/coverage/gcda_clean.py | 2 +- tools/gen_pybind11_stub.py | 2 +- 66 files changed, 151 insertions(+), 180 deletions(-) rename paddle/phi/kernels/legacy/{elementwise_multipy_kernel.h => elementwise_multiply_kernel.h} (100%) diff --git a/_typos.toml b/_typos.toml index 045134f1dc197a..8b477d563113f6 100644 --- a/_typos.toml +++ b/_typos.toml @@ -282,36 +282,6 @@ localy = 'localy' Localy = 'Localy' logarithmical = 'logarithmical' Loggin = 'Loggin' -macor = 'macor' -mantain = 'mantain' -mak = 'mak' -manualy = 'manualy' -mannualy = 'mannualy' -maping = 'maping' -Maxinum = 'Maxinum' -maxmium = 'maxmium' -menas = 'menas' -momory = 'momory' -Meatadata = 'Meatadata' -miliseconds = 'miliseconds' -minimun = 'minimun' -Minium = 'Minium' -minumum = 'minumum' -modication = 'modication' -modifed = 'modifed' -modifing = 'modifing' -modifty = 'modifty' -moduels = 'moduels' -Modul = 'Modul' -Morevoer = 'Morevoer' -mutiple = 'mutiple' -mutiply = 'mutiply' -Mutiply = 'Mutiply' -multipy = 'multipy' -Multiplie = 'Multiplie' -Muti = 'Muti' -muti = 'muti' -mutexs = 'mutexs' occured = 'occured' Ocurred = 'Ocurred' occures = 'occures' diff --git a/paddle/cinn/ir/group_schedule/search/config_searcher.cc b/paddle/cinn/ir/group_schedule/search/config_searcher.cc index bd3ed6be17a636..aa3f0f6210336b 100644 --- a/paddle/cinn/ir/group_schedule/search/config_searcher.cc +++ b/paddle/cinn/ir/group_schedule/search/config_searcher.cc @@ -208,7 +208,7 @@ ScheduleConfigSearcher::ScheduleConfigSearcher( constraints_(constraints) {} std::pair ScheduleConfigSearcher::Search( - bool is_search_minimun) { + bool is_search_minimum) { VLOG(6) << "Start Search..."; CandidateGenerator candidate_generator(candidate_range_, constraints_); std::vector candidates = candidate_generator.Candidates(); @@ -222,7 +222,7 @@ std::pair ScheduleConfigSearcher::Search( VLOG(6) << "Score = " << score; records_[score] = candidate; } - return is_search_minimun ? *records_.begin() : *(records_.end()--); + return is_search_minimum ? *records_.begin() : *(records_.end()--); } } // namespace search diff --git a/paddle/cinn/ir/group_schedule/search/config_searcher.h b/paddle/cinn/ir/group_schedule/search/config_searcher.h index a176676aa9b281..72e3be65b114d0 100644 --- a/paddle/cinn/ir/group_schedule/search/config_searcher.h +++ b/paddle/cinn/ir/group_schedule/search/config_searcher.h @@ -87,7 +87,7 @@ class ScheduleConfigSearcher { const std::vector>& candidate_range, const std::vector& constraints = {}); - std::pair Search(bool is_search_minimun = true); + std::pair Search(bool is_search_minimum = true); private: std::vector> objective_funcs_; diff --git a/paddle/cinn/ir/ir_base.cc b/paddle/cinn/ir/ir_base.cc index 99671977993fa8..43c24bc1edcb66 100644 --- a/paddle/cinn/ir/ir_base.cc +++ b/paddle/cinn/ir/ir_base.cc @@ -404,14 +404,14 @@ const IndexExpr IndexExpr::operand(int32_t i) const { return get()->operand(i).as_index(); } -int64_t IndexExpr::GetLargestMutiplyPart() const { +int64_t IndexExpr::GetLargestMultiplyPart() const { switch (node_type()) { case cinn::ir::IrNodeTy::_Var_: return 1; case cinn::ir::IrNodeTy::Div: { if (operand(1).type().is_index_type()) { - int64_t lhsDiv = operand(0).GetLargestMutiplyPart(); - int64_t rhsDiv = operand(1).GetLargestMutiplyPart(); + int64_t lhsDiv = operand(0).GetLargestMultiplyPart(); + int64_t rhsDiv = operand(1).GetLargestMultiplyPart(); if (lhsDiv % rhsDiv == 0) return std::abs(lhsDiv / rhsDiv); } return 1; @@ -421,13 +421,13 @@ int64_t IndexExpr::GetLargestMutiplyPart() const { return std::abs(int_imm->value); } case cinn::ir::IrNodeTy::Mul: { - return operand(0).GetLargestMutiplyPart() * - operand(1).GetLargestMutiplyPart(); + return operand(0).GetLargestMultiplyPart() * + operand(1).GetLargestMultiplyPart(); } case cinn::ir::IrNodeTy::Add: case cinn::ir::IrNodeTy::Mod: { - return std::gcd(operand(0).GetLargestMutiplyPart(), - operand(1).GetLargestMutiplyPart()); + return std::gcd(operand(0).GetLargestMultiplyPart(), + operand(1).GetLargestMultiplyPart()); } } PADDLE_THROW(::common::errors::Unimplemented("Unsupported type of expr: %s", diff --git a/paddle/cinn/ir/ir_base.h b/paddle/cinn/ir/ir_base.h index e7d46d82766015..5e269de04de907 100644 --- a/paddle/cinn/ir/ir_base.h +++ b/paddle/cinn/ir/ir_base.h @@ -499,7 +499,7 @@ struct IndexExpr : public IrNodeRef { Type type() const { return p_->type(); } - int64_t GetLargestMutiplyPart() const; + int64_t GetLargestMultiplyPart() const; IndexExpr Normalize() const; diff --git a/paddle/cinn/ir/op/ir_operators.cc b/paddle/cinn/ir/op/ir_operators.cc index de5a8974de2db9..1faa8a8cc97bea 100644 --- a/paddle/cinn/ir/op/ir_operators.cc +++ b/paddle/cinn/ir/op/ir_operators.cc @@ -509,8 +509,8 @@ static IndexExpr SimplifyDiv(const IndexExpr &lhs, const IndexExpr &rhs) { // (expr1 * c1 * c2 + expr2 * c1 * c3) / c1 ===> expr1 * c2 + expr2 * c3. if (lhsAdd) { - int64_t llhsFactor = lhsAdd->a().as_index().GetLargestMutiplyPart(); - int64_t lrhsFactor = lhsAdd->b().as_index().GetLargestMutiplyPart(); + int64_t llhsFactor = lhsAdd->a().as_index().GetLargestMultiplyPart(); + int64_t lrhsFactor = lhsAdd->b().as_index().GetLargestMultiplyPart(); if (llhsFactor % rhsConst->value == 0 && lrhsFactor % rhsConst->value == 0) { return lhsAdd->a().as_index() / rhsConst->value + @@ -565,8 +565,8 @@ static IndexExpr SimplifyMod(const IndexExpr &lhs, const IndexExpr &rhs) { // (expr1 * c1 * c2+ expr2 * c3) % c1 ===> expr2 * c3 % c1. if (lhsAdd) { - int64_t llhsFactor = lhsAdd->a().as_index().GetLargestMutiplyPart(); - int64_t lrhsFactor = lhsAdd->b().as_index().GetLargestMutiplyPart(); + int64_t llhsFactor = lhsAdd->a().as_index().GetLargestMultiplyPart(); + int64_t lrhsFactor = lhsAdd->b().as_index().GetLargestMultiplyPart(); if (llhsFactor % rhsConst->value == 0) return lhsAdd->b().as_index() % rhsConst->value; if (lrhsFactor % rhsConst->value == 0) @@ -574,11 +574,12 @@ static IndexExpr SimplifyMod(const IndexExpr &lhs, const IndexExpr &rhs) { } // expr1 * (c1 * c2) % c1 ===> 0. - if (lhs.GetLargestMutiplyPart() % rhsConst->value == 0) return IndexExpr(0); + if (lhs.GetLargestMultiplyPart() % rhsConst->value == 0) + return IndexExpr(0); // expr1 % (c1 * c2) % c1 ===> expr1 % c1. if (lhsMod) { - int64_t llhsFactor = lhsMod->b().as_index().GetLargestMutiplyPart(); + int64_t llhsFactor = lhsMod->b().as_index().GetLargestMultiplyPart(); if (llhsFactor % rhsConst->value == 0) return lhsMod->a().as_index() % rhsConst->value; } diff --git a/paddle/common/ddim.h b/paddle/common/ddim.h index 5af0ae660c1b78..f0d301203a70fa 100644 --- a/paddle/common/ddim.h +++ b/paddle/common/ddim.h @@ -217,7 +217,7 @@ TEST_API std::ostream& operator<<(std::ostream&, const DDim&); /** * \brief Flatten dim to 3d - * e.g., DDim d = mak_ddim({1, 2, 3, 4, 5, 6}) + * e.g., DDim d = make_ddim({1, 2, 3, 4, 5, 6}) * flatten_to_3d(d, 2, 4); ===> {1*2, 3*4, 5*6} ===> {2, 12, 30} */ TEST_API DDim flatten_to_3d(const DDim& src, diff --git a/paddle/fluid/distributed/ps/service/heter_server.cc b/paddle/fluid/distributed/ps/service/heter_server.cc index 525ea32128100f..b83163570a1f2f 100644 --- a/paddle/fluid/distributed/ps/service/heter_server.cc +++ b/paddle/fluid/distributed/ps/service/heter_server.cc @@ -112,7 +112,7 @@ int SendAndRecvVariableHandler::SaveInSwitchWithShard( VLOG(4) << "entering SaveInSwitchWithShard"; int32_t group_id = request->group_id(); if (group_id >= FLAGS_heter_world_size) { - LOG(ERROR) << "group id exceed maxmium"; + LOG(ERROR) << "group id exceed maximum"; } auto& local_shard = _local_shards[group_id]; auto& request_io_buffer = cntl->request_attachment(); diff --git a/paddle/fluid/distributed/ps/table/common_graph_table.cc b/paddle/fluid/distributed/ps/table/common_graph_table.cc index 4fe05675838324..625102394daa8f 100644 --- a/paddle/fluid/distributed/ps/table/common_graph_table.cc +++ b/paddle/fluid/distributed/ps/table/common_graph_table.cc @@ -368,11 +368,11 @@ paddle::framework::GpuPsCommRankFea GraphTable::make_gpu_ps_rank_fea( // 遍历 rank_nodes[i][shard_num],分8份,分配到 res std::vector> tasks; - auto mutexs = new std::mutex[shard_num_per_server]; + auto mutexes = new std::mutex[shard_num_per_server]; for (int i = 0; i < node_num_; i++) { for (size_t shard_id = 0; shard_id < shard_num_per_server; shard_id++) { tasks.push_back(_cpu_worker_pool[gpu_id]->enqueue( - [i, gpu_id, shard_id, &rank_nodes, &node_num_vec, &mutexs]() + [i, gpu_id, shard_id, &rank_nodes, &node_num_vec, &mutexes]() -> size_t { auto &rank_node = rank_nodes[i][shard_id]; size_t start = 0; @@ -381,9 +381,9 @@ paddle::framework::GpuPsCommRankFea GraphTable::make_gpu_ps_rank_fea( start++; } } - mutexs[shard_id].lock(); + mutexes[shard_id].lock(); node_num_vec[shard_id] += start; - mutexs[shard_id].unlock(); + mutexes[shard_id].unlock(); return start; })); } @@ -3150,12 +3150,12 @@ class MergeShardVector { _slice_num = slice_num; _shard_keys = output; _shard_keys->resize(slice_num); - _mutexs = new std::mutex[slice_num]; + _mutexes = new std::mutex[slice_num]; } ~MergeShardVector() { - if (_mutexs != nullptr) { - delete[] _mutexs; - _mutexs = nullptr; + if (_mutexes != nullptr) { + delete[] _mutexes; + _mutexes = nullptr; } } // merge shard keys @@ -3165,15 +3165,15 @@ class MergeShardVector { auto &dest = (*_shard_keys)[shard_id]; auto &src = shard_keys[shard_id]; - _mutexs[shard_id].lock(); + _mutexes[shard_id].lock(); dest.insert(dest.end(), src.begin(), src.end()); - _mutexs[shard_id].unlock(); + _mutexes[shard_id].unlock(); } } private: int _slice_num = 0; - std::mutex *_mutexs = nullptr; + std::mutex *_mutexes = nullptr; std::vector> *_shard_keys; }; diff --git a/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.cc b/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.cc index ae028086bcce88..d0805c74e94715 100644 --- a/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.cc @@ -93,7 +93,7 @@ static int BuildFusion(Graph* graph, embeddings_tensor->Resize( {embedding_tensor.dims()[0], weightx_tensor.dims()[1]}); - // Multiplie embeddings via WeightsX and add bias + // Multiply embeddings via WeightsX and add bias auto embedding_data = embedding_tensor.data(); auto weightx_data = weightx_tensor.data(); auto embeddings_data = diff --git a/paddle/fluid/framework/new_executor/interpreter/dependency_builder.cc b/paddle/fluid/framework/new_executor/interpreter/dependency_builder.cc index ce11504ef0a3bb..9807b237f41df2 100644 --- a/paddle/fluid/framework/new_executor/interpreter/dependency_builder.cc +++ b/paddle/fluid/framework/new_executor/interpreter/dependency_builder.cc @@ -532,7 +532,7 @@ void DependencyBuilder::ShrinkDownstreamMap() { continue; } - std::set minumum_nexts; + std::set minimum_nexts; for (size_t item : op_downstream_map_->at(i)) { bool not_after_any = true; // find the op that is not executed after any @@ -546,12 +546,12 @@ void DependencyBuilder::ShrinkDownstreamMap() { } if (not_after_any) { VLOG(8) << "downstream op of " << i << ": " << item; - minumum_nexts.insert(item); + minimum_nexts.insert(item); } } // NOTE(Ruibiao): op_happens_before will not be changed when shrink // downstream map - (*op_downstream_map_)[i] = minumum_nexts; + (*op_downstream_map_)[i] = minimum_nexts; } VLOG(8) << "Finish shrink downstream map"; VLOG(8) << "downstream count: " << CountDownstreamMap(*op_downstream_map_); @@ -1008,7 +1008,7 @@ void DependencyBuilderSimplify::ShrinkDownstreamMap() { continue; } - std::set minumum_nexts; + std::set minimum_nexts; for (size_t item : op_downstream_map_.at(i)) { bool not_after_any = true; // find the op that is not executed any @@ -1022,12 +1022,12 @@ void DependencyBuilderSimplify::ShrinkDownstreamMap() { } if (not_after_any) { VLOG(8) << "downstream op of " << i << ": " << item; - minumum_nexts.insert(item); + minimum_nexts.insert(item); } } // NOTE(Ruibiao): op_happens_before will not be changed when shrink // downstream map - op_downstream_map_.at(i) = minumum_nexts; + op_downstream_map_.at(i) = minimum_nexts; } VLOG(8) << "Finish shrink downstream map"; VLOG(8) << "downstream count: " << CountDownstreamMap(op_downstream_map_); diff --git a/paddle/fluid/framework/new_executor/pir_interpreter.cc b/paddle/fluid/framework/new_executor/pir_interpreter.cc index 65928aaa78fc9d..cae032233dd39a 100644 --- a/paddle/fluid/framework/new_executor/pir_interpreter.cc +++ b/paddle/fluid/framework/new_executor/pir_interpreter.cc @@ -1413,7 +1413,7 @@ void PirInterpreter::CalculateLastLiveOps() { } VLOG(4) << "var_ref_count_.size() : " << var_ref_count_.size(); for (size_t i = 0; i < last_live_ops_.size(); ++i) { - std::set minumum_last_live_ops; + std::set minimum_last_live_ops; for (size_t item : last_live_ops_[i]) { bool not_before_any = true; // find the op that is not executed before any @@ -1429,11 +1429,11 @@ void PirInterpreter::CalculateLastLiveOps() { VLOG(6) << "last live op of var " << i << " " << value_exe_info_->GetNameById(static_cast(i)) << " : " << item << " " << vec_instruction_base_[item]->Name(); - minumum_last_live_ops.insert(item); + minimum_last_live_ops.insert(item); vec_instruction_base_[item]->AddGCCheckVar(i); } } - last_live_ops_[i] = minumum_last_live_ops; + last_live_ops_[i] = minimum_last_live_ops; var_ref_count_[i] = static_cast(last_live_ops_[i].size()); } VLOG(4) << "shrink the last_live_ops list for all vars in skip_gc_vars"; diff --git a/paddle/fluid/framework/new_executor/program_interpreter.cc b/paddle/fluid/framework/new_executor/program_interpreter.cc index f83efd767a5d5a..7ce7a68d5c99dd 100644 --- a/paddle/fluid/framework/new_executor/program_interpreter.cc +++ b/paddle/fluid/framework/new_executor/program_interpreter.cc @@ -842,7 +842,7 @@ void ProgramInterpreter::Convert( // in this case, a is the input of op1 and op2, we only need to check // a after op2, because op2 always uses a after op1. for (size_t i = 0; i < last_live_ops_.size(); ++i) { - std::set minumum_last_live_ops; + std::set minimum_last_live_ops; for (size_t item : last_live_ops_[i]) { bool not_before_any = true; // find the op that is not executed before any @@ -858,14 +858,14 @@ void ProgramInterpreter::Convert( VLOG(8) << "last live op of var " << i << " " << var_scope_.GetNameById(static_cast(i)) << " : " << item << " " << vec_instruction_[item].OpBase()->Type(); - minumum_last_live_ops.insert(item); + minimum_last_live_ops.insert(item); if (!(var_scope_.VarDesc(static_cast(i)) && var_scope_.VarDesc(static_cast(i))->Persistable())) { vec_instruction_[item].AddGCCheckVar(i); } } } - last_live_ops_[i] = minumum_last_live_ops; + last_live_ops_[i] = minimum_last_live_ops; vec_meta_info[i].var_ref_count_ = static_cast(last_live_ops_[i].size()); } diff --git a/paddle/fluid/framework/new_executor/workqueue/events_waiter.h b/paddle/fluid/framework/new_executor/workqueue/events_waiter.h index a1c2f506b85d6a..0ab47375f996da 100644 --- a/paddle/fluid/framework/new_executor/workqueue/events_waiter.h +++ b/paddle/fluid/framework/new_executor/workqueue/events_waiter.h @@ -29,7 +29,7 @@ namespace framework { // A multiplexing waiter, be able to wait multiple kinds of events // simultaneously. -// Muti-Producer single-consumer single-slot message-queue. +// Multi-Producer single-consumer single-slot message-queue. class EventsWaiter { public: using EventId = std::size_t; diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 19391938ec6c45..3bab6c0389c640 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -1815,7 +1815,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope, if (is_xpu_kp_support) { auto expected_kernel_key_library_type = kernel_type_->library_type_; kernel_type_->library_type_ = LibraryType::kKP; - VLOG(3) << "modifing XPU KP kernel in static graph: " + VLOG(3) << "modifying XPU KP kernel in static graph: " << phi_kernel_name << ", using_kernel_key:" << *kernel_type_.get(); auto try_phi_kernel_key = @@ -1899,7 +1899,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope, if (is_xpu_kp_support) { auto expected_kernel_key_library_type = kernel_type_->library_type_; kernel_type_->library_type_ = LibraryType::kKP; - VLOG(3) << "modifing XPU KP kernel in static graph: " + VLOG(3) << "modifying XPU KP kernel in static graph: " << phi_kernel_name << ", using_kernel_key:" << *kernel_type_.get(); auto try_phi_kernel_key = diff --git a/paddle/fluid/framework/var_desc.h b/paddle/fluid/framework/var_desc.h index 13af5f76d57664..639f98c0db848e 100644 --- a/paddle/fluid/framework/var_desc.h +++ b/paddle/fluid/framework/var_desc.h @@ -222,7 +222,7 @@ class TEST_API VarDesc { proto::VarType::TensorDesc *mutable_tensor_desc(); std::vector mutable_tensor_descs(); - // Is it really needed? Or just mantain a ptr from the block? + // Is it really needed? Or just maintain a ptr from the block? proto::VarDesc desc_; AttributeMap attrs_; diff --git a/paddle/fluid/imperative/prepared_operator.cc b/paddle/fluid/imperative/prepared_operator.cc index 09b3a14133db1b..0e4c21094fe127 100644 --- a/paddle/fluid/imperative/prepared_operator.cc +++ b/paddle/fluid/imperative/prepared_operator.cc @@ -271,7 +271,7 @@ PreparedOp PrepareImpl( if (is_xpu_kp_support) { auto expected_kernel_key_backend = expected_kernel_key.backend(); expected_kernel_key.set_backend(phi::Backend::KPS); - VLOG(3) << "modifing XPU KP kernel: " << phi_kernel_name + VLOG(3) << "modifying XPU KP kernel: " << phi_kernel_name << ", using_kernel_key:" << expected_kernel_key; if (!phi_kernel_factory.HasKernel(phi_kernel_name, diff --git a/paddle/fluid/inference/api/paddle_pass_builder.h b/paddle/fluid/inference/api/paddle_pass_builder.h index 8c2d903e7fb239..acb612052862dd 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.h +++ b/paddle/fluid/inference/api/paddle_pass_builder.h @@ -102,7 +102,7 @@ class PD_INFER_DECL PaddlePassBuilder { std::vector AnalysisPasses() const { auto passes = analysis_passes_; // To make sure the ir_graph_to_program should be the last pass so any - // modication of IR will persist to the program. + // modification of IR will persist to the program. passes.push_back("ir_graph_to_program_pass"); return passes; } diff --git a/paddle/fluid/pir/transforms/onednn/conv2d_transpose_bn_fuse_pass.cc b/paddle/fluid/pir/transforms/onednn/conv2d_transpose_bn_fuse_pass.cc index 2b5599e0991eec..23a1969b12cb48 100644 --- a/paddle/fluid/pir/transforms/onednn/conv2d_transpose_bn_fuse_pass.cc +++ b/paddle/fluid/pir/transforms/onednn/conv2d_transpose_bn_fuse_pass.cc @@ -137,7 +137,7 @@ class Conv2dTransposeBnOneDNNFusePattern : public paddle::drr::DrrPatternBase { //--- deal with filter --- // ConvTranpose weight is gIOHW, conv is gOIHW - // We transpose IOHW to IOHW first, then multipy scale, and transpose it to + // We transpose IOHW to IOHW first, then multiply scale, and transpose it to // IOHW again const auto &new_conv2d_filter_shape = res.ComputeAttr( [](const paddle::drr::MatchContext &match_ctx) -> std::vector { @@ -329,7 +329,7 @@ class Conv2dTransposeEltwiseBnOneDNNFusePattern //--- deal with filter --- // ConvTranpose weight is gIOHW, conv is gOIHW - // We transpose IOHW to IOHW first, then multipy scale, and transpose it to + // We transpose IOHW to IOHW first, then multiply scale, and transpose it to // IOHW again const auto &new_conv2d_filter_shape = res.ComputeAttr( [](const paddle::drr::MatchContext &match_ctx) -> std::vector { diff --git a/paddle/phi/core/memory/allocation/memory_block.h b/paddle/phi/core/memory/allocation/memory_block.h index 631fca44f5157b..1cff33dfc45c7b 100644 --- a/paddle/phi/core/memory/allocation/memory_block.h +++ b/paddle/phi/core/memory/allocation/memory_block.h @@ -37,7 +37,7 @@ struct MemoryBlock { // init saves the MemoryBlock::Desc of the memory block in a MetadataCache. // If it is a CPU memory block, the MetadataCache writes the // MemoryBlock::Desc to the beginning of the block; or, if it is a GPU memory - // block, the MetadataCache writes the Meatadata to a std::map in + // block, the MetadataCache writes the Metadata to a std::map in // the CPU. void Init(MetadataCache* cache, Type t, @@ -123,7 +123,7 @@ class MetadataCache { // Returns the MemoryBlock::Desc for a memory block. When MetadataCache is // used to manage CPU memory, the MemoryBlock::Desc resides at the beginning // of the memory block; when used to manage GPU memory, the - // Meatadata resides in CPU memory indexed by cache_. + // Metadata resides in CPU memory indexed by cache_. MemoryBlock::Desc* LoadDesc(MemoryBlock* memory_block); // Saves the MemoryBlock::Desc of a memory block into the cache. For CPU diff --git a/paddle/phi/infermeta/spmd_rules/default_data_parallel.cc b/paddle/phi/infermeta/spmd_rules/default_data_parallel.cc index e65fb33615ae69..880be6329db28e 100644 --- a/paddle/phi/infermeta/spmd_rules/default_data_parallel.cc +++ b/paddle/phi/infermeta/spmd_rules/default_data_parallel.cc @@ -59,9 +59,9 @@ SpmdInfo DefaultDataParallelInferSpmd( int ndim = outs[i]->dims().size(); TensorDistAttr dist_attr_dst = CopyTensorDistAttrForOutput(ins[0]->dist_attr()); - std::vector dst_dims_maping = + std::vector dst_dims_mapping = GetDefaultDataParallelDimsMapping(batch_axis_dim, ndim); - dist_attr_dst.set_dims_mapping(dst_dims_maping); + dist_attr_dst.set_dims_mapping(dst_dims_mapping); output_dist_attrs.emplace_back(dist_attr_dst); } @@ -71,9 +71,9 @@ SpmdInfo DefaultDataParallelInferSpmd( int ndim = ins[i]->dims().size(); TensorDistAttr dist_attr_dst = CopyTensorDistAttrForOutput(ins[i]->dist_attr()); - std::vector dst_dims_maping = + std::vector dst_dims_mapping = GetDefaultDataParallelDimsMapping(batch_axis_dim, ndim); - dist_attr_dst.set_dims_mapping(dst_dims_maping); + dist_attr_dst.set_dims_mapping(dst_dims_mapping); dst_input_dist_attrs.emplace_back(dist_attr_dst); } @@ -122,9 +122,9 @@ SpmdInfo DefaultDataParallelInferSpmdReverse( int ndim = outs[i]->dims().size(); TensorDistAttr dist_attr_dst = CopyTensorDistAttrForOutput(outs[i]->dist_attr()); - std::vector dst_dims_maping = + std::vector dst_dims_mapping = GetDefaultDataParallelDimsMapping(batch_axis_dim, ndim); - dist_attr_dst.set_dims_mapping(dst_dims_maping); + dist_attr_dst.set_dims_mapping(dst_dims_mapping); output_dist_attrs.emplace_back(dist_attr_dst); } @@ -134,9 +134,9 @@ SpmdInfo DefaultDataParallelInferSpmdReverse( int ndim = ins[i]->dims().size(); TensorDistAttr dist_attr_dst = CopyTensorDistAttrForOutput(ins[i]->dist_attr()); - std::vector dst_dims_maping = + std::vector dst_dims_mapping = GetDefaultDataParallelDimsMapping(batch_axis_dim, ndim); - dist_attr_dst.set_dims_mapping(dst_dims_maping); + dist_attr_dst.set_dims_mapping(dst_dims_mapping); dst_input_dist_attrs.emplace_back(dist_attr_dst); } diff --git a/paddle/phi/infermeta/spmd_rules/replicated.cc b/paddle/phi/infermeta/spmd_rules/replicated.cc index 390117862e04e1..3134b428dd5216 100644 --- a/paddle/phi/infermeta/spmd_rules/replicated.cc +++ b/paddle/phi/infermeta/spmd_rules/replicated.cc @@ -46,8 +46,8 @@ SpmdInfo ReplicatedInferSpmd(const std::vector& ins, int ndim = outs[i]->dims().size(); TensorDistAttr dist_attr_dst = CopyTensorDistAttrForOutput(ins[0]->dist_attr()); - std::vector dst_dims_maping = GetReplicatedDimsMapping(ndim); - dist_attr_dst.set_dims_mapping(dst_dims_maping); + std::vector dst_dims_mapping = GetReplicatedDimsMapping(ndim); + dist_attr_dst.set_dims_mapping(dst_dims_mapping); output_dist_attrs.emplace_back(dist_attr_dst); } @@ -61,8 +61,8 @@ SpmdInfo ReplicatedInferSpmd(const std::vector& ins, } TensorDistAttr dist_attr_dst = CopyTensorDistAttrForOutput(ins[i]->dist_attr()); - std::vector dst_dims_maping = GetReplicatedDimsMapping(ndim); - dist_attr_dst.set_dims_mapping(dst_dims_maping); + std::vector dst_dims_mapping = GetReplicatedDimsMapping(ndim); + dist_attr_dst.set_dims_mapping(dst_dims_mapping); dst_input_dist_attrs.emplace_back(dist_attr_dst); } @@ -103,8 +103,8 @@ SpmdInfo ReplicatedInferSpmdReverse( int ndim = outs[i]->dims().size(); TensorDistAttr dist_attr_dst = CopyTensorDistAttrForOutput(outs[i]->dist_attr()); - std::vector dst_dims_maping = GetReplicatedDimsMapping(ndim); - dist_attr_dst.set_dims_mapping(dst_dims_maping); + std::vector dst_dims_mapping = GetReplicatedDimsMapping(ndim); + dist_attr_dst.set_dims_mapping(dst_dims_mapping); output_dist_attrs.emplace_back(dist_attr_dst); } @@ -114,8 +114,8 @@ SpmdInfo ReplicatedInferSpmdReverse( int ndim = ins[i]->dims().size(); TensorDistAttr dist_attr_dst = CopyTensorDistAttrForOutput(ins[i]->dist_attr()); - std::vector dst_dims_maping = GetReplicatedDimsMapping(ndim); - dist_attr_dst.set_dims_mapping(dst_dims_maping); + std::vector dst_dims_mapping = GetReplicatedDimsMapping(ndim); + dist_attr_dst.set_dims_mapping(dst_dims_mapping); dst_input_dist_attrs.emplace_back(dist_attr_dst); } @@ -148,19 +148,19 @@ SpmdInfo ReplicatedInferDynamic( int64_t ninputs = static_cast(inputs.size()); SpmdInfo spmd_info; - auto build_tensor_dist_attr = - [&nonnull_inputs](const DistMetaTensor& dist_meta_tensor) { - int ndim = dist_meta_tensor.dims().size(); - TensorDistAttr dist_attr_dst = - CopyTensorDistAttrForOutput(dist_meta_tensor.dist_attr()); - // `ndim == -1` means input is nullptr - if (ndim >= 0) { - std::vector dst_dims_maping = GetReplicatedDimsMapping(ndim); - dist_attr_dst.set_dims_mapping(dst_dims_maping); - nonnull_inputs.push_back(&dist_meta_tensor); - } - return dist_attr_dst; - }; + auto build_tensor_dist_attr = [&nonnull_inputs]( + const DistMetaTensor& dist_meta_tensor) { + int ndim = dist_meta_tensor.dims().size(); + TensorDistAttr dist_attr_dst = + CopyTensorDistAttrForOutput(dist_meta_tensor.dist_attr()); + // `ndim == -1` means input is nullptr + if (ndim >= 0) { + std::vector dst_dims_mapping = GetReplicatedDimsMapping(ndim); + dist_attr_dst.set_dims_mapping(dst_dims_mapping); + nonnull_inputs.push_back(&dist_meta_tensor); + } + return dist_attr_dst; + }; for (int64_t i = 0; i < ninputs; i++) { if (paddle::holds_alternative(inputs[i])) { diff --git a/paddle/phi/kernels/cpu/overlap_add_grad_kernel.cc b/paddle/phi/kernels/cpu/overlap_add_grad_kernel.cc index 16415533df18f7..eb24ee1bdd6ae7 100644 --- a/paddle/phi/kernels/cpu/overlap_add_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/overlap_add_grad_kernel.cc @@ -41,7 +41,7 @@ void OverlapAddGradKernel(const Context& dev_ctx, : out_grad.dims()[static_cast(out_grad_rank) - 1]); // When the number of input dims is larger than 2, it needs to copy - // from x to resize input into 2d and output into 3d. Morevoer, output + // from x to resize input into 2d and output into 3d. Moreover, output // dims will be restored at the last step. DenseTensor out_grad_(out_grad.type()); out_grad_ = out_grad; diff --git a/paddle/phi/kernels/fusion/gpu/fused_dropout_add_grad_kernel.cu b/paddle/phi/kernels/fusion/gpu/fused_dropout_add_grad_kernel.cu index 90a511a46243e1..f185a28dac46b6 100644 --- a/paddle/phi/kernels/fusion/gpu/fused_dropout_add_grad_kernel.cu +++ b/paddle/phi/kernels/fusion/gpu/fused_dropout_add_grad_kernel.cu @@ -24,11 +24,11 @@ #include "paddle/phi/kernels/funcs/dropout_impl.cu.h" static constexpr int kNumCUDAThreads = 512; -static constexpr int kNumMaxinumNumBlocks = 4096; +static constexpr int kNumMaximumNumBlocks = 4096; static inline int NumBlocks(const int N) { return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads, - kNumMaxinumNumBlocks); + kNumMaximumNumBlocks); } namespace phi { diff --git a/paddle/phi/kernels/gpu/assign_pos_kernel.cu b/paddle/phi/kernels/gpu/assign_pos_kernel.cu index 891a18fc413155..bcb4283e953df8 100644 --- a/paddle/phi/kernels/gpu/assign_pos_kernel.cu +++ b/paddle/phi/kernels/gpu/assign_pos_kernel.cu @@ -21,11 +21,11 @@ namespace phi { static constexpr int kNumCUDAThreads = 512; -static constexpr int kNumMaxinumNumBlocks = 4096; +static constexpr int kNumMaximumNumBlocks = 4096; static inline int NumBlocks(const int N) { return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads, - kNumMaxinumNumBlocks); + kNumMaximumNumBlocks); } template diff --git a/paddle/phi/kernels/gpu/c_embedding_grad_kernel.cu b/paddle/phi/kernels/gpu/c_embedding_grad_kernel.cu index ee50761ebbad7c..c8df1091b67eb5 100644 --- a/paddle/phi/kernels/gpu/c_embedding_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/c_embedding_grad_kernel.cu @@ -28,11 +28,11 @@ COMMON_DECLARE_int64(embedding_deterministic); namespace phi { static constexpr int kNumCUDAThreads = 512; -static constexpr int kNumMaxinumNumBlocks = 4096; +static constexpr int kNumMaximumNumBlocks = 4096; static inline int NumBlocks(const int N) { return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads, - kNumMaxinumNumBlocks); + kNumMaximumNumBlocks); } template diff --git a/paddle/phi/kernels/gpu/c_embedding_kernel.cu b/paddle/phi/kernels/gpu/c_embedding_kernel.cu index 57da8b801e6126..f73db3e02540fe 100644 --- a/paddle/phi/kernels/gpu/c_embedding_kernel.cu +++ b/paddle/phi/kernels/gpu/c_embedding_kernel.cu @@ -20,11 +20,11 @@ namespace phi { static constexpr int kNumCUDAThreads = 512; -static constexpr int kNumMaxinumNumBlocks = 4096; +static constexpr int kNumMaximumNumBlocks = 4096; static inline int NumBlocks(const int N) { return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads, - kNumMaxinumNumBlocks); + kNumMaximumNumBlocks); } template diff --git a/paddle/phi/kernels/gpu/c_softmax_with_cross_entropy_grad_kernel.cu b/paddle/phi/kernels/gpu/c_softmax_with_cross_entropy_grad_kernel.cu index 26380df9814059..5ac4c3784f45f2 100644 --- a/paddle/phi/kernels/gpu/c_softmax_with_cross_entropy_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/c_softmax_with_cross_entropy_grad_kernel.cu @@ -27,11 +27,11 @@ namespace phi { static constexpr int kNumCUDAThreads = 512; -static constexpr int64_t kNumMaxinumNumBlocks = 4096; +static constexpr int64_t kNumMaximumNumBlocks = 4096; static inline int64_t NumBlocks(const int64_t N) { return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads, - kNumMaxinumNumBlocks); + kNumMaximumNumBlocks); } template diff --git a/paddle/phi/kernels/gpu/c_softmax_with_cross_entropy_kernel.cu b/paddle/phi/kernels/gpu/c_softmax_with_cross_entropy_kernel.cu index da4b033a89e45f..8c64272165cafe 100644 --- a/paddle/phi/kernels/gpu/c_softmax_with_cross_entropy_kernel.cu +++ b/paddle/phi/kernels/gpu/c_softmax_with_cross_entropy_kernel.cu @@ -44,11 +44,11 @@ struct CSoftmaxWithCrossEntropyFunctor { }; static constexpr int kNumCUDAThreads = 512; -static constexpr int64_t kNumMaxinumNumBlocks = 4096; +static constexpr int64_t kNumMaximumNumBlocks = 4096; static inline int64_t NumBlocks(const int64_t N) { return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads, - kNumMaxinumNumBlocks); + kNumMaximumNumBlocks); } template diff --git a/paddle/phi/kernels/gpu/c_split_kernel.cu b/paddle/phi/kernels/gpu/c_split_kernel.cu index b9796e784d04f9..8ec8aefa670693 100644 --- a/paddle/phi/kernels/gpu/c_split_kernel.cu +++ b/paddle/phi/kernels/gpu/c_split_kernel.cu @@ -19,11 +19,11 @@ namespace phi { static constexpr int64_t kNumCUDAThreads = 512; -static constexpr int64_t kNumMaxinumNumBlocks = 4096; +static constexpr int64_t kNumMaximumNumBlocks = 4096; static inline int64_t NumBlocks(const int64_t N) { return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads, - kNumMaxinumNumBlocks); + kNumMaximumNumBlocks); } template diff --git a/paddle/phi/kernels/gpu/class_center_sample_kernel.cu b/paddle/phi/kernels/gpu/class_center_sample_kernel.cu index dd9ac32a6196f2..b817bebcc3cd4f 100644 --- a/paddle/phi/kernels/gpu/class_center_sample_kernel.cu +++ b/paddle/phi/kernels/gpu/class_center_sample_kernel.cu @@ -48,11 +48,11 @@ namespace phi { i += step) static constexpr int kNumCUDAThreads = 512; -static constexpr int kNumMaxinumNumBlocks = 4096; +static constexpr int kNumMaximumNumBlocks = 4096; inline int32_t NumBlocks(const int32_t n) { return std::min((n + kNumCUDAThreads - 1) / kNumCUDAThreads, - kNumMaxinumNumBlocks); + kNumMaximumNumBlocks); } template diff --git a/paddle/phi/kernels/gpu/collect_fpn_proposals_kernel.cu b/paddle/phi/kernels/gpu/collect_fpn_proposals_kernel.cu index 2c3c3cdb550a8f..f75694421d6a88 100644 --- a/paddle/phi/kernels/gpu/collect_fpn_proposals_kernel.cu +++ b/paddle/phi/kernels/gpu/collect_fpn_proposals_kernel.cu @@ -36,13 +36,13 @@ namespace cub = hipcub; namespace phi { static constexpr int kNumCUDAThreads = 64; -static constexpr int kNumMaxinumNumBlocks = 4096; +static constexpr int kNumMaximumNumBlocks = 4096; const int kBBoxSize = 4; static inline int NumBlocks(const int N) { return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads, - kNumMaxinumNumBlocks); + kNumMaximumNumBlocks); } static __global__ void GetLengthLoD(const int nthreads, diff --git a/paddle/phi/kernels/gpu/distribute_fpn_proposals_kernel.cu b/paddle/phi/kernels/gpu/distribute_fpn_proposals_kernel.cu index 95b70da2ff4830..6370b41b53c5e3 100644 --- a/paddle/phi/kernels/gpu/distribute_fpn_proposals_kernel.cu +++ b/paddle/phi/kernels/gpu/distribute_fpn_proposals_kernel.cu @@ -36,13 +36,13 @@ namespace cub = hipcub; namespace phi { static constexpr int kNumCUDAThreads = 64; -static constexpr int kNumMaxinumNumBlocks = 4096; +static constexpr int kNumMaximumNumBlocks = 4096; int const BBoxSize = 4; static inline int NumBlocks(const int N) { return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads, - kNumMaxinumNumBlocks); + kNumMaximumNumBlocks); } template diff --git a/paddle/phi/kernels/gpu/nll_loss.h b/paddle/phi/kernels/gpu/nll_loss.h index 648b69b45253c1..badb5a6a74a44f 100644 --- a/paddle/phi/kernels/gpu/nll_loss.h +++ b/paddle/phi/kernels/gpu/nll_loss.h @@ -26,11 +26,11 @@ namespace phi { static constexpr int kNumCUDAThreads = 512; -static constexpr int kNumMaxinumNumBlocks = 4096; +static constexpr int kNumMaximumNumBlocks = 4096; static const int NTHREADS = 32; static inline int NumBlocks(const int N) { return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads, - kNumMaxinumNumBlocks); + kNumMaximumNumBlocks); } template diff --git a/paddle/phi/kernels/gpu/overlap_add_grad_kernel.cu b/paddle/phi/kernels/gpu/overlap_add_grad_kernel.cu index 337620a556db5e..b9b16560adde46 100644 --- a/paddle/phi/kernels/gpu/overlap_add_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/overlap_add_grad_kernel.cu @@ -38,7 +38,7 @@ void OverlapAddGradKernel(const Context& dev_ctx, (axis == 0) ? out_grad.dims()[0] : out_grad.dims()[out_grad_rank - 1]; // When the number of input dims is larger than 2, it needs to copy - // from x to resize input into 2d and output into 3d. Morevoer, output + // from x to resize input into 2d and output into 3d. Moreover, output // dims will be restored at the last step. DenseTensor out_grad_(out_grad.type()); out_grad_ = out_grad; diff --git a/paddle/phi/kernels/gpu/prune_gate_by_capacity_kernel.cu b/paddle/phi/kernels/gpu/prune_gate_by_capacity_kernel.cu index 24614d50446d01..28f48abaf98ec1 100644 --- a/paddle/phi/kernels/gpu/prune_gate_by_capacity_kernel.cu +++ b/paddle/phi/kernels/gpu/prune_gate_by_capacity_kernel.cu @@ -22,11 +22,11 @@ namespace phi { static constexpr int kNumCUDAThreads = 512; -static constexpr int kNumMaxinumNumBlocks = 4096; +static constexpr int kMaximumNumBlocks = 4096; static inline int NumBlocks(const int N) { return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads, - kNumMaxinumNumBlocks); + kMaximumNumBlocks); } template diff --git a/paddle/phi/kernels/gpu/roi_align_grad_kernel.cu b/paddle/phi/kernels/gpu/roi_align_grad_kernel.cu index f388ede1fe73ca..fe6826efe3ddd7 100644 --- a/paddle/phi/kernels/gpu/roi_align_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/roi_align_grad_kernel.cu @@ -26,12 +26,12 @@ namespace phi { static constexpr int kNumCUDAThreads = 512; -static constexpr int kNumMaxinumNumBlocks = 4096; +static constexpr int kNumMaximumNumBlocks = 4096; static constexpr int kROISize = 4; static inline int NumBlocks(const int N) { return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads, - kNumMaxinumNumBlocks); + kNumMaximumNumBlocks); } template diff --git a/paddle/phi/kernels/gpu/roi_align_kernel.cu b/paddle/phi/kernels/gpu/roi_align_kernel.cu index b2de0d83f8917d..f3bf4062dd4047 100644 --- a/paddle/phi/kernels/gpu/roi_align_kernel.cu +++ b/paddle/phi/kernels/gpu/roi_align_kernel.cu @@ -23,12 +23,12 @@ namespace phi { static constexpr int kNumCUDAThreads = 512; -static constexpr int kNumMaxinumNumBlocks = 4096; +static constexpr int kNumMaximumNumBlocks = 4096; static constexpr int kROISize = 4; static inline int NumBlocks(const int N) { return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads, - kNumMaxinumNumBlocks); + kNumMaximumNumBlocks); } template diff --git a/paddle/phi/kernels/gpu/roi_pool_grad_kernel.cu b/paddle/phi/kernels/gpu/roi_pool_grad_kernel.cu index b3ec72a5032eca..496f8ad103cba2 100644 --- a/paddle/phi/kernels/gpu/roi_pool_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/roi_pool_grad_kernel.cu @@ -25,11 +25,11 @@ namespace phi { static constexpr int kNumCUDAThreads = 512; -static constexpr int kNumMaxinumNumBlocks = 4096; +static constexpr int kNumMaximumNumBlocks = 4096; static inline int NumBlocks(const int N) { return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads, - kNumMaxinumNumBlocks); + kNumMaximumNumBlocks); } template diff --git a/paddle/phi/kernels/gpu/roi_pool_kernel.cu b/paddle/phi/kernels/gpu/roi_pool_kernel.cu index 968fea06ce2a4b..b2dde0075d8f22 100644 --- a/paddle/phi/kernels/gpu/roi_pool_kernel.cu +++ b/paddle/phi/kernels/gpu/roi_pool_kernel.cu @@ -23,11 +23,11 @@ namespace phi { static constexpr int kNumCUDAThreads = 512; -static constexpr int kNumMaxinumNumBlocks = 4096; +static constexpr int kNumMaximumNumBlocks = 4096; static inline int NumBlocks(const int N) { return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads, - kNumMaxinumNumBlocks); + kNumMaximumNumBlocks); } template diff --git a/paddle/phi/kernels/gpu/sigmoid_cross_entropy_with_logits.h b/paddle/phi/kernels/gpu/sigmoid_cross_entropy_with_logits.h index 2baa96d2a51600..7de11ad7463593 100644 --- a/paddle/phi/kernels/gpu/sigmoid_cross_entropy_with_logits.h +++ b/paddle/phi/kernels/gpu/sigmoid_cross_entropy_with_logits.h @@ -36,11 +36,11 @@ namespace cub = hipcub; namespace phi { static constexpr int kNumCUDAThreads = 512; -static constexpr int kNumMaxinumNumBlocks = 4096; +static constexpr int kNumMaximumNumBlocks = 4096; static inline int NumBlocks(const int N) { return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads, - kNumMaxinumNumBlocks); + kNumMaximumNumBlocks); } template diff --git a/paddle/phi/kernels/impl/frame_kernel_impl.h b/paddle/phi/kernels/impl/frame_kernel_impl.h index fa0c5658efe550..fb75fa30a66658 100644 --- a/paddle/phi/kernels/impl/frame_kernel_impl.h +++ b/paddle/phi/kernels/impl/frame_kernel_impl.h @@ -31,7 +31,7 @@ void FrameKernel(const Context& dev_ctx, const int n_frames = (axis == 0) ? out->dims()[0] : out->dims()[out_rank - 1]; const int seq_length = (axis == 0) ? x.dims()[0] : x.dims()[x_rank - 1]; // When the number of input dims is larger than 2, it needs to copy - // from x to resize input into 2d and output into 3d. Morevoer, output + // from x to resize input into 2d and output into 3d. Moreover, output // dims will be restored at the last step. DenseTensor x_tmp = x; diff --git a/paddle/phi/kernels/impl/margin_cross_entropy.cu.h b/paddle/phi/kernels/impl/margin_cross_entropy.cu.h index ed861cde093223..e9590e05d84531 100644 --- a/paddle/phi/kernels/impl/margin_cross_entropy.cu.h +++ b/paddle/phi/kernels/impl/margin_cross_entropy.cu.h @@ -48,11 +48,11 @@ namespace cub = hipcub; namespace phi { static constexpr int kNumCUDAThreads = 512; -static constexpr int kNumMaxinumNumBlocks = 4096; +static constexpr int kNumMaximumNumBlocks = 4096; static inline int NumBlocks(const int N) { return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads, - kNumMaxinumNumBlocks); + kNumMaximumNumBlocks); } template diff --git a/paddle/phi/kernels/kps/elementwise_kernel.cu b/paddle/phi/kernels/kps/elementwise_kernel.cu index 183bcac10fc9ed..13b4f44df1c19d 100644 --- a/paddle/phi/kernels/kps/elementwise_kernel.cu +++ b/paddle/phi/kernels/kps/elementwise_kernel.cu @@ -22,7 +22,7 @@ #include "paddle/phi/kernels/legacy/elementwise_add_kernel.h" #include "paddle/phi/kernels/legacy/elementwise_divide_kernel.h" #include "paddle/phi/kernels/legacy/elementwise_kernel.h" -#include "paddle/phi/kernels/legacy/elementwise_multipy_kernel.h" +#include "paddle/phi/kernels/legacy/elementwise_multiply_kernel.h" #include "paddle/phi/kernels/legacy/elementwise_subtract_kernel.h" namespace phi { diff --git a/paddle/phi/kernels/legacy/cpu/fused_elementwise_kernel.cc b/paddle/phi/kernels/legacy/cpu/fused_elementwise_kernel.cc index ec640c2257c3f8..393ff0889c380d 100644 --- a/paddle/phi/kernels/legacy/cpu/fused_elementwise_kernel.cc +++ b/paddle/phi/kernels/legacy/cpu/fused_elementwise_kernel.cc @@ -20,7 +20,7 @@ #include "paddle/phi/kernels/impl/elementwise_kernel_impl.h" #include "paddle/phi/kernels/legacy/elementwise_add_kernel.h" #include "paddle/phi/kernels/legacy/elementwise_divide_kernel.h" -#include "paddle/phi/kernels/legacy/elementwise_multipy_kernel.h" +#include "paddle/phi/kernels/legacy/elementwise_multiply_kernel.h" #include "paddle/phi/kernels/legacy/elementwise_subtract_kernel.h" namespace phi { diff --git a/paddle/phi/kernels/legacy/elementwise_multipy_kernel.h b/paddle/phi/kernels/legacy/elementwise_multiply_kernel.h similarity index 100% rename from paddle/phi/kernels/legacy/elementwise_multipy_kernel.h rename to paddle/phi/kernels/legacy/elementwise_multiply_kernel.h diff --git a/paddle/phi/kernels/selected_rows/elementwise_multiply_kernel.h b/paddle/phi/kernels/selected_rows/elementwise_multiply_kernel.h index e48ff7d8cc6277..9e0a991b26a058 100644 --- a/paddle/phi/kernels/selected_rows/elementwise_multiply_kernel.h +++ b/paddle/phi/kernels/selected_rows/elementwise_multiply_kernel.h @@ -16,7 +16,7 @@ limitations under the License. */ #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/selected_rows.h" -#include "paddle/phi/kernels/legacy/elementwise_multipy_kernel.h" +#include "paddle/phi/kernels/legacy/elementwise_multiply_kernel.h" namespace phi { namespace sr { diff --git a/paddle/phi/kernels/sparse/gpu/fused_attention_grad_kernel.cu b/paddle/phi/kernels/sparse/gpu/fused_attention_grad_kernel.cu index 91ac5f1a1509b6..fe780b4e1c4179 100644 --- a/paddle/phi/kernels/sparse/gpu/fused_attention_grad_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/fused_attention_grad_kernel.cu @@ -71,7 +71,7 @@ void FusedAttentionCsrGradKernel(const Context& dev_ctx, MatmulCsrDenseGradKernel( dev_ctx, softmax, value, dout, &dsoftmax, dvalue); - /* Step2: Calculate grad of sdd_result, manualy not reuse */ + /* Step2: Calculate grad of sdd_result, manually not reuse */ SparseCsrTensor d_sdd_result; EmptyLikeCsrKernel(dev_ctx, dsoftmax, &d_sdd_result); auto q_dim = query.dims(); diff --git a/python/paddle/distributed/auto_parallel/moe_utils.py b/python/paddle/distributed/auto_parallel/moe_utils.py index b6ac80231b4d0a..9a1957be3ea9a0 100644 --- a/python/paddle/distributed/auto_parallel/moe_utils.py +++ b/python/paddle/distributed/auto_parallel/moe_utils.py @@ -261,7 +261,7 @@ def _dist_reshape( ): """ Reshape the local tensors of the dist tensor on each rank, - and mannualy set the process_mesh and placements of the output. + and manually set the process_mesh and placements of the output. """ tgt_global_shape = infer_positive_shape(dist_tensor.shape, global_shape) tgt_local_shape = _cal_local_shape(tgt_global_shape, mesh, placements) diff --git a/python/paddle/distributed/auto_tuner/utils.py b/python/paddle/distributed/auto_tuner/utils.py index eca9b48f287955..d60e3b34fc2153 100644 --- a/python/paddle/distributed/auto_tuner/utils.py +++ b/python/paddle/distributed/auto_tuner/utils.py @@ -439,7 +439,7 @@ def search_all(tuner_cfg): i += 1 if tuner_cfg.get("schedule_mode") != "performance": - # momory sort + # memory sort for rr_dim_cfg in rr_dim_cfgs: cfg = ( list(valid_degree) diff --git a/python/paddle/jit/dy2static/convert_operators.py b/python/paddle/jit/dy2static/convert_operators.py index 82a523f624b20e..9b18b3b4bf42c3 100644 --- a/python/paddle/jit/dy2static/convert_operators.py +++ b/python/paddle/jit/dy2static/convert_operators.py @@ -506,11 +506,11 @@ def new_false_fn(): "Unsupported return type of true_fn and false_fn in cond", str(e) ): raise Dygraph2StaticException( - f"Your if/else have different return type. TODO: add link to modifty. {e}" + f"Your if/else have different return type. TODO: add link to modify. {e}" ) if re.search("Incompatible return values of", str(e)): raise Dygraph2StaticException( - f"Your if/else have different number of return value. TODO: add link to modifty. {e}" + f"Your if/else have different number of return value. TODO: add link to modify. {e}" ) raise e get_args = lambda: helper.get(union_name) diff --git a/python/paddle/utils/cpp_extension/cpp_extension.py b/python/paddle/utils/cpp_extension/cpp_extension.py index da0bc9d5481b17..2a99fae8adfb93 100644 --- a/python/paddle/utils/cpp_extension/cpp_extension.py +++ b/python/paddle/utils/cpp_extension/cpp_extension.py @@ -659,7 +659,7 @@ def win_custom_spawn(cmd): elif isinstance(self.cflags, list): cflags = MSVC_COMPILE_FLAGS + self.cflags cmd += cflags - # Append this macor only when jointly compiling .cc with .cu + # Append this macro only when jointly compiling .cc with .cu if not is_cuda_file(src) and self.contain_cuda_file: cmd.append('-DPADDLE_WITH_CUDA') diff --git a/python/paddle/vision/transforms/transforms.py b/python/paddle/vision/transforms/transforms.py index 0584f836d43ffc..31af62bd8c2488 100644 --- a/python/paddle/vision/transforms/transforms.py +++ b/python/paddle/vision/transforms/transforms.py @@ -1458,7 +1458,7 @@ class Pad(BaseTransform[_InputT, _RetT]): ``reflect`` means pads with reflection of image (without repeating the last value on the edge) padding ``[1, 2, 3, 4]`` with 2 elements on both sides in reflect mode will result in ``[3, 2, 1, 2, 3, 4, 3, 2]``. - ``symmetric`` menas pads with reflection of image (repeating the last value on the edge) + ``symmetric`` means pads with reflection of image (repeating the last value on the edge) padding ``[1, 2, 3, 4]`` with 2 elements on both sides in symmetric mode will result in ``[2, 1, 1, 2, 3, 4, 4, 3]``. keys (list[str]|tuple[str], optional): Same as ``BaseTransform``. Default: None. diff --git a/test/auto_parallel/semi_auto_parallel_for_replicated_spmd.py b/test/auto_parallel/semi_auto_parallel_for_replicated_spmd.py index 7a0ba9bf07899b..c05d1029d4a865 100644 --- a/test/auto_parallel/semi_auto_parallel_for_replicated_spmd.py +++ b/test/auto_parallel/semi_auto_parallel_for_replicated_spmd.py @@ -173,7 +173,7 @@ def test_adamax(self): self.check_tensor_eq(local_inf_norm_out, dist_inf_norm_out) self.check_tensor_eq(local_master_param_out, dist_master_param_out) - # mutiple operators + # multiple operators def test_mse_loss(self): x = np.random.random(size=[4, 4]).astype(self._dtype) y = np.random.random(size=[4]).astype(self._dtype) diff --git a/test/ir/pir/cinn/test_anchor_fusion.py b/test/ir/pir/cinn/test_anchor_fusion.py index a7cff47696998c..80a8300d4ce596 100644 --- a/test/ir/pir/cinn/test_anchor_fusion.py +++ b/test/ir/pir/cinn/test_anchor_fusion.py @@ -108,7 +108,7 @@ def init(): x = paddle.ones((16, 32, 64, 128)) return (x,) - # This case can't be fused to one kernel because muti-downstream + # This case can't be fused to one kernel because multi-downstream # transpose op will sink currently. self.check_accuracy_and_kernel_num(init, func) diff --git a/test/legacy_test/test_case.py b/test/legacy_test/test_case.py index b6c7d6c2d14712..1708fd6ce45b58 100644 --- a/test/legacy_test/test_case.py +++ b/test/legacy_test/test_case.py @@ -605,7 +605,7 @@ def type_error_default(): # when optimizer in case -class TestMutiTask(unittest.TestCase): +class TestMultiTask(unittest.TestCase): def test_optimizer_in_case(self): BATCH_SIZE = 1 diff --git a/test/legacy_test/test_matmul_fp8_op.py b/test/legacy_test/test_matmul_fp8_op.py index 09e0b417f67115..ad09ba17bd4ec9 100644 --- a/test/legacy_test/test_matmul_fp8_op.py +++ b/test/legacy_test/test_matmul_fp8_op.py @@ -51,7 +51,7 @@ def check_fp8_support() -> bool: def _to_fp8_saturated(x: paddle.Tensor, float8_dtype) -> paddle.Tensor: # The default behavior in Paddle for casting to `float8_e4m3fn` - # and `e5m2` is to not saturate. So we saturate here manualy. + # and `e5m2` is to not saturate. So we saturate here manually. if float8_dtype == paddle.float8_e4m3fn: x = x.clip(min=-1 * E4M3_MAX_POS, max=E4M3_MAX_POS) else: diff --git a/test/legacy_test/test_max_min_amax_amin_op.py b/test/legacy_test/test_max_min_amax_amin_op.py index ca6b83f93cb99c..335a2c3334ae8d 100644 --- a/test/legacy_test/test_max_min_amax_amin_op.py +++ b/test/legacy_test/test_max_min_amax_amin_op.py @@ -200,7 +200,7 @@ def init_case(self): self.keepdim = False -# test input grad when out is operated like mutiply +# test input grad when out is operated like multiply class TestMaxMinAmaxAminAPI7(TestMaxMinAmaxAminAPI): def init_case(self): self.x_np = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]).astype( diff --git a/test/legacy_test/test_tensor_type_promotion.py b/test/legacy_test/test_tensor_type_promotion.py index 0fe923a6af0418..97eb9385d8cb42 100644 --- a/test/legacy_test/test_tensor_type_promotion.py +++ b/test/legacy_test/test_tensor_type_promotion.py @@ -3299,7 +3299,7 @@ def run_api(self): create_test_case(TestAPIMaximumInStatic, 'float32', 'float64', 'float64') -class TestAPIMiniumInStatic(TestOperatorOverloadAddInStatic): +class TestAPIMinimumInStatic(TestOperatorOverloadAddInStatic): def run_api(self): prog = paddle.static.Program() with paddle.static.program_guard(prog): @@ -3312,7 +3312,7 @@ def run_api(self): return res -create_test_case(TestAPIMiniumInStatic, 'float32', 'float64', 'float64') +create_test_case(TestAPIMinimumInStatic, 'float32', 'float64', 'float64') class TestAPINextAfterInStatic(TestOperatorOverloadAddInStatic): diff --git a/test/legacy_test/test_variable.py b/test/legacy_test/test_variable.py index ef4d1b5dec1da1..e51200d4653579 100644 --- a/test/legacy_test/test_variable.py +++ b/test/legacy_test/test_variable.py @@ -578,7 +578,7 @@ def test_dygraph_list_index(self): array = array[0] index = index[0] - def test_static_graph_list_index_muti_dim(self): + def test_static_graph_list_index_multi_dim(self): paddle.enable_static() inps_shape = [3, 4, 5] array = np.arange(self.numel(inps_shape), dtype='float32').reshape( @@ -643,7 +643,7 @@ def test_static_graph_list_index_muti_dim(self): err_msg=f'\n numpy:{y2},\n paddle:{getitem_pp[0]}', ) - def test_dygraph_list_index_muti_dim(self): + def test_dygraph_list_index_multi_dim(self): paddle.disable_static() inps_shape = [3, 4, 5] array = np.arange(self.numel(inps_shape), dtype='float32').reshape( @@ -878,7 +878,7 @@ def test_static_graph_setitem_bool_scalar_index(self): with paddle.static.program_guard(program): self.run_setitem_list_index(array, index, value_np) - def test_static_graph_tensor_index_setitem_muti_dim(self): + def test_static_graph_tensor_index_setitem_multi_dim(self): paddle.enable_static() inps_shape = [3, 4, 5, 4] array = np.arange(self.numel(inps_shape), dtype='float32').reshape( @@ -970,7 +970,7 @@ def test_static_graph_tensor_index_setitem_muti_dim(self): index1 = index1[0] index2 = index2[0] - def test_static_graph_array_index_muti_dim(self): + def test_static_graph_array_index_multi_dim(self): paddle.enable_static() inps_shape = [3, 4, 5, 4] array = np.arange(self.numel(inps_shape), dtype='float32').reshape( @@ -1053,7 +1053,7 @@ def test_static_graph_array_index_muti_dim(self): index1 = index1[0] index2 = index2[0] - def test_dygraph_array_index_muti_dim(self): + def test_dygraph_array_index_multi_dim(self): paddle.disable_static() inps_shape = [3, 4, 5, 4] array = np.arange(self.numel(inps_shape), dtype='float32').reshape( diff --git a/test/ps/__init__.py b/test/ps/__init__.py index 5a5bd1e0048c4b..c46b094697dd36 100644 --- a/test/ps/__init__.py +++ b/test/ps/__init__.py @@ -13,5 +13,5 @@ # limitations under the License.p # Note: On Windows, import form subdirectories such as dirA()->dirB(), current directory -# will still be dirA(), But is should be dirB(). So it will ModulNotFoundError +# will still be dirA(), But is should be dirB(). So it will ModuleNotFoundError # please refer to https://stackoverflow.com/questions/8953844/import-module-from-subfolder diff --git a/test/quantization/quant2_int8_image_classification_comparison.py b/test/quantization/quant2_int8_image_classification_comparison.py index fac217637d54b7..7f6666c7b6a90d 100644 --- a/test/quantization/quant2_int8_image_classification_comparison.py +++ b/test/quantization/quant2_int8_image_classification_comparison.py @@ -262,7 +262,7 @@ def _predict( }, fetch_list=fetch_targets, ) - batch_time = (time.time() - start) * 1000 # in miliseconds + batch_time = (time.time() - start) * 1000 # in milliseconds batch_acc1, batch_acc5 = out[1], out[2] outputs.append(batch_acc1) else: @@ -273,7 +273,7 @@ def _predict( feed={feed_target_names[0]: images}, fetch_list=fetch_targets, ) - batch_time = (time.time() - start) * 1000 # in miliseconds + batch_time = (time.time() - start) * 1000 # in milliseconds outputs.append(out[0]) # Calculate accuracy result batch_acc1, batch_acc5 = self._get_batch_accuracy( diff --git a/test/quantization/quant2_int8_nlp_comparison.py b/test/quantization/quant2_int8_nlp_comparison.py index 985fb62f1d11e9..032f8c49a44c9c 100644 --- a/test/quantization/quant2_int8_nlp_comparison.py +++ b/test/quantization/quant2_int8_nlp_comparison.py @@ -211,7 +211,7 @@ def _predict( start = time.time() predictor.run() - batch_time = (time.time() - start) * 1000 # in miliseconds + batch_time = (time.time() - start) * 1000 # in milliseconds out = [] out = predictor.get_output_handle(output_names[0]).copy_to_cpu() diff --git a/test/quantization/quant_int8_image_classification_comparison.py b/test/quantization/quant_int8_image_classification_comparison.py index 4cfb3bdf798659..f0944eb34b3afe 100644 --- a/test/quantization/quant_int8_image_classification_comparison.py +++ b/test/quantization/quant_int8_image_classification_comparison.py @@ -224,7 +224,7 @@ def _predict( feed={feed_target_names[0]: images}, fetch_list=fetch_targets, ) - batch_time = (time.time() - start) * 1000 # in miliseconds + batch_time = (time.time() - start) * 1000 # in milliseconds outputs.append(out[0]) batch_acc1, batch_acc5 = self._get_batch_accuracy( out[0], labels diff --git a/tools/coverage/gcda_clean.py b/tools/coverage/gcda_clean.py index 2abba39636d076..4b8c433f76f5d8 100644 --- a/tools/coverage/gcda_clean.py +++ b/tools/coverage/gcda_clean.py @@ -85,7 +85,7 @@ def clean(pull_id): # convert paddle/fluid/imperative/CMakeFiles/layer.dir/layer.cc.gcda # to paddle/fluid/imperative/layer.cc.gcda - # modifed to make it more robust + # modified to make it more robust # covert /paddle/build/paddle/phi/backends/CMakeFiles/phi_backends.dir/gpu/cuda/cuda_info.cc.gcda # to /paddle/build/paddle/phi/backends/gpu/cuda/cuda_info.cc.gcda trimmed_tmp = [] diff --git a/tools/gen_pybind11_stub.py b/tools/gen_pybind11_stub.py index 3e8dfe839f14cd..4cfcc19640fff5 100644 --- a/tools/gen_pybind11_stub.py +++ b/tools/gen_pybind11_stub.py @@ -399,7 +399,7 @@ def post_process(output_dir: str): replace_bad_attr(filename) check_remove_syntax_error(filename) - # insert moduels if necessary + # insert modules if necessary insert_import_modules(filename) From d9994aec585b997f1caaa4101a8e5b9a43076029 Mon Sep 17 00:00:00 2001 From: yinfan98 <1106310035@qq.com> Date: Sat, 21 Dec 2024 23:02:15 +0800 Subject: [PATCH 2/2] Update prune_gate_by_capacity_kernel.cu --- paddle/phi/kernels/gpu/prune_gate_by_capacity_kernel.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/phi/kernels/gpu/prune_gate_by_capacity_kernel.cu b/paddle/phi/kernels/gpu/prune_gate_by_capacity_kernel.cu index 28f48abaf98ec1..16d2a67665f67c 100644 --- a/paddle/phi/kernels/gpu/prune_gate_by_capacity_kernel.cu +++ b/paddle/phi/kernels/gpu/prune_gate_by_capacity_kernel.cu @@ -22,11 +22,11 @@ namespace phi { static constexpr int kNumCUDAThreads = 512; -static constexpr int kMaximumNumBlocks = 4096; +static constexpr int kNumMaximumNumBlocks = 4096; static inline int NumBlocks(const int N) { return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads, - kMaximumNumBlocks); + kNumMaximumNumBlocks); } template