diff --git a/paddle/phi/api/yaml/generator/api_base.py b/paddle/phi/api/yaml/generator/api_base.py index 86d79f6543efb1..471fc277eb46c0 100644 --- a/paddle/phi/api/yaml/generator/api_base.py +++ b/paddle/phi/api/yaml/generator/api_base.py @@ -899,41 +899,12 @@ def gene_input(self, kernel_tensor_type=None, code_indent=''): return input_name_tensor_map, input_tensor_code - def get_kernel_args(self, kernel_tensor_type=None, code_indent=''): - dense_input_trans_map = { - 'const Tensor&': 'const phi::DenseTensor&', - 'const std::vector&': 'const std::vector&', - 'const paddle::optional': 'paddle::optional', - 'const paddle::optional&': 'const paddle::optional&', - 'const paddle::optional>&': 'const paddle::optional>&', - } - dense_out_trans_map = { - 'Tensor': 'phi::DenseTensor*', - 'std::vector': 'std::vector', - } - sr_input_trans_map = { - 'const Tensor&': 'const phi::SelectedRows&', - 'const paddle::optional&': 'const paddle::optional&', - } - sr_out_trans_map = {'Tensor': 'phi::SelectedRows*'} - input_names = self.inputs['names'] - input_infos = self.inputs['input_info'] - kernel_args_type_list = ['const phi::DeviceContext&'] - - attr_names = self.attrs['names'] - kernel_param = self.kernel['param'] - if kernel_param is None: - kernel_param = input_names + attr_names - - input_name_tensor_map, input_tensor_code = self.gene_input( - kernel_tensor_type, code_indent - ) - - input_tensor_code = ( - input_tensor_code - + f""" + def generate_record_op_info_supplement( + self, input_name_tensor_map, code_indent='', in_auto_parallel=False + ): + record_op_info_supplement_str = f""" {code_indent} if(phi::RecordOpInfoSupplement::IsEnabled()){{""" - ) + single_tensor_names = [] list_tensor_names = [] for input_name, input_tensors in input_name_tensor_map.items(): @@ -946,8 +917,8 @@ def get_kernel_args(self, kernel_tensor_type=None, code_indent=''): else: list_tensor_names.append(input_name) if not single_tensor_names: - input_tensor_code = ( - input_tensor_code + record_op_info_supplement_str = ( + record_op_info_supplement_str + f""" {code_indent} std::vector>> input_shapes;""" ) @@ -955,96 +926,99 @@ def get_kernel_args(self, kernel_tensor_type=None, code_indent=''): for input_name in single_tensor_names: if input_name in self.optional_vars: input_tensors = input_name_tensor_map[input_name] - input_tensor_code = ( - input_tensor_code + record_op_info_supplement_str = ( + record_op_info_supplement_str + f""" {code_indent} std::vector {input_name}_record_shapes;""" ) for input_tensor, _ in input_tensors: - input_tensor_code = ( - input_tensor_code + record_op_info_supplement_str = ( + record_op_info_supplement_str + f""" {code_indent} if({input_tensor}){{ {code_indent} {input_name}_record_shapes.push_back((*{input_tensor}).dims()); {code_indent} }}""" ) - input_tensor_code = ( - input_tensor_code + record_op_info_supplement_str = ( + record_op_info_supplement_str + f""" {code_indent} std::vector>> input_shapes{{""" ) for input_name in single_tensor_names[:-1]: if input_name in self.optional_vars: - input_tensor_code = ( - input_tensor_code + record_op_info_supplement_str = ( + record_op_info_supplement_str + f""" {code_indent} {{"{input_name}", {input_name}_record_shapes}},""" ) else: - input_tensor_code = ( - input_tensor_code + record_op_info_supplement_str = ( + record_op_info_supplement_str + f""" {code_indent} {{"{input_name}", {{""" ) input_tensors = input_name_tensor_map[input_name] for input_tensor, _ in input_tensors[:-1]: - input_tensor_code = ( - input_tensor_code + record_op_info_supplement_str = ( + record_op_info_supplement_str + f""" {code_indent} (*{input_tensor}).dims(),""" ) - input_tensor_code = ( - input_tensor_code + record_op_info_supplement_str = ( + record_op_info_supplement_str + f""" {code_indent} (*{input_tensors[-1][0]}).dims()}}}},""" ) if single_tensor_names[-1] in self.optional_vars: - input_tensor_code = ( - input_tensor_code + record_op_info_supplement_str = ( + record_op_info_supplement_str + f""" {code_indent} {{"{single_tensor_names[-1]}", {code_indent} {single_tensor_names[-1]}_record_shapes}}}};""" ) else: - input_tensor_code = ( - input_tensor_code + record_op_info_supplement_str = ( + record_op_info_supplement_str + f""" {code_indent} {{"{single_tensor_names[-1]}", {{""" ) input_tensors = input_name_tensor_map[single_tensor_names[-1]] for input_tensor, _ in input_tensors[:-1]: - input_tensor_code = ( - input_tensor_code + record_op_info_supplement_str = ( + record_op_info_supplement_str + f""" {code_indent} (*{input_tensor}).dims(),""" ) - input_tensor_code = ( - input_tensor_code + record_op_info_supplement_str = ( + record_op_info_supplement_str + f""" {code_indent} (*{input_tensors[-1][0]}).dims()}}}}}};""" ) if list_tensor_names: - input_tensor_code = ( - input_tensor_code + record_op_info_supplement_str = ( + record_op_info_supplement_str + f""" {code_indent} std::vector ddims_vec;""" ) for input_name in list_tensor_names: - input_tensor_code = ( - input_tensor_code + record_op_info_supplement_str = ( + record_op_info_supplement_str + f""" {code_indent} ddims_vec.clear();""" ) for input_tensor, is_vector in input_name_tensor_map[input_name]: if is_vector: input_tensor_truncate = input_tensor[:-4] - if input_name in self.inplace_map.values(): + if ( + input_name in self.inplace_map.values() + or in_auto_parallel + ): input_tensor_truncate = input_tensor if input_name in self.optional_vars: - input_tensor_code = ( - input_tensor_code + record_op_info_supplement_str = ( + record_op_info_supplement_str + f""" {code_indent} if ({input_tensor_truncate}){{ {code_indent} ddims_vec.reserve({input_tensor_truncate}->size()); @@ -1054,8 +1028,8 @@ def get_kernel_args(self, kernel_tensor_type=None, code_indent=''): {code_indent} }}""" ) else: - input_tensor_code = ( - input_tensor_code + record_op_info_supplement_str = ( + record_op_info_supplement_str + f""" {code_indent} ddims_vec.reserve({input_tensor_truncate}.size()); {code_indent} for (size_t i = 0; i < {input_tensor_truncate}.size(); ++i) {{ @@ -1063,30 +1037,30 @@ def get_kernel_args(self, kernel_tensor_type=None, code_indent=''): {code_indent} }}""" ) else: - input_tensor_code = ( - input_tensor_code + record_op_info_supplement_str = ( + record_op_info_supplement_str + f""" ddims_vec.emplace_back((*{input_tensor}).dims()); {code_indent} """ ) - input_tensor_code = ( - input_tensor_code + record_op_info_supplement_str = ( + record_op_info_supplement_str + f""" {code_indent} input_shapes.emplace_back("{input_name}", ddims_vec);""" ) - input_tensor_code += f""" + record_op_info_supplement_str += f""" {code_indent} phi::AttributeMap attrs;""" for attr_name in self.attrs['names']: if 'IntArray' in self.attrs['attr_info'][attr_name][0]: - input_tensor_code += f""" + record_op_info_supplement_str += f""" {code_indent} attrs["{attr_name}"] = {attr_name}.GetData();""" elif 'vector' in self.attrs['attr_info'][attr_name][0]: - input_tensor_code += f""" + record_op_info_supplement_str += f""" {code_indent} attrs["{attr_name}"] = "";""" # TODO(kuizhiqing) elif 'Scalar' in self.attrs['attr_info'][attr_name][0]: - input_tensor_code += f""" + record_op_info_supplement_str += f""" {code_indent} switch ({attr_name}.dtype()) {{ {code_indent} case DataType::FLOAT32: {code_indent} attrs["{attr_name}"] = static_cast({attr_name}.to()); @@ -1136,15 +1110,54 @@ def get_kernel_args(self, kernel_tensor_type=None, code_indent=''): elif 'Place' in self.attrs['attr_info'][attr_name][0]: pass # no need else: - input_tensor_code += f""" + record_op_info_supplement_str += f""" {code_indent} attrs["{attr_name}"] = {attr_name};""" - input_tensor_code = ( - input_tensor_code + record_op_info_supplement_str = ( + record_op_info_supplement_str + f""" {code_indent} phi::RecordOpInfoSupplement("{self.api}", input_shapes, attrs); {code_indent} }}""" ) + return record_op_info_supplement_str + + def get_kernel_args(self, kernel_tensor_type=None, code_indent=''): + dense_input_trans_map = { + 'const Tensor&': 'const phi::DenseTensor&', + 'const std::vector&': 'const std::vector&', + 'const paddle::optional': 'paddle::optional', + 'const paddle::optional&': 'const paddle::optional&', + 'const paddle::optional>&': 'const paddle::optional>&', + } + dense_out_trans_map = { + 'Tensor': 'phi::DenseTensor*', + 'std::vector': 'std::vector', + } + sr_input_trans_map = { + 'const Tensor&': 'const phi::SelectedRows&', + 'const paddle::optional&': 'const paddle::optional&', + } + sr_out_trans_map = {'Tensor': 'phi::SelectedRows*'} + input_names = self.inputs['names'] + input_infos = self.inputs['input_info'] + kernel_args_type_list = ['const phi::DeviceContext&'] + + attr_names = self.attrs['names'] + kernel_param = self.kernel['param'] + if kernel_param is None: + kernel_param = input_names + attr_names + + input_name_tensor_map, input_tensor_code = self.gene_input( + kernel_tensor_type, code_indent + ) + + input_tensor_code = ( + input_tensor_code + + self.generate_record_op_info_supplement( + input_name_tensor_map, code_indent + ) + ) + kernel_args = ["*dev_ctx"] for param in kernel_param: if param in input_names: diff --git a/paddle/phi/api/yaml/generator/dist_api_gen.py b/paddle/phi/api/yaml/generator/dist_api_gen.py index 431a98d829ae6b..9442dd6f1d3353 100644 --- a/paddle/phi/api/yaml/generator/dist_api_gen.py +++ b/paddle/phi/api/yaml/generator/dist_api_gen.py @@ -13,6 +13,7 @@ # limitations under the License. import argparse +import collections import re import yaml @@ -52,6 +53,7 @@ // 4. Select Kernel{} // 5. Reshard Input{}\n // 6. PrepareData (DataTransform & Prepare Dense Input){} + // RecordOpInfoSupplement{} // 7. Infer Local DenseTensor Meta{} // 8. DenseTensor Kernel Call{} }}\n @@ -339,9 +341,16 @@ TUPLE_OUTPUT_NAME_TEMPLATE = """ """ KERNEL_CALL_TEMPLATE = """ + phi::RecordEvent* kernel_record_event = nullptr; + if(phi::RecordEvent::IsEnabled()){{ + kernel_record_event = new phi::RecordEvent(\"{} dist compute\", phi::TracerEventType::OperatorInner, 1); + }} using kernel_signature = {}; auto* kernel_fn = kernel.GetVariadicKernelFn(); (*kernel_fn)({}, {}); + if(kernel_record_event != nullptr){{ + delete kernel_record_event; + }} """ # TODO(GhostScreaming): Some operators generate shape info in runtime, @@ -1058,10 +1067,7 @@ def generate_reshard_input_code(self) -> str: return input_reshard_code - def generate_single_dense_input( - self, - input_name, - ): + def generate_single_dense_input(self, input_name, input_name_tensor_map): input_tensor_code = "" trans_flag = self.gene_trans_flag(input_name) input_names = self.inputs['names'] @@ -1082,13 +1088,11 @@ def generate_single_dense_input( idx=kernel_param.index(input_name), trans_flag=trans_flag, ) + input_name_tensor_map[input_name].append((f'input_{input_name}', False)) return input_tensor_code - def generate_vector_dense_input( - self, - input_name, - ): + def generate_vector_dense_input(self, input_name, input_name_tensor_map): input_tensor_code = "" trans_flag = self.gene_trans_flag(input_name) input_names = self.inputs['names'] @@ -1101,12 +1105,14 @@ def generate_vector_dense_input( idx=kernel_param.index(input_name), trans_flag=trans_flag, ) + input_name_tensor_map[input_name].append( + (f'dense_input_{input_name}_vec', True) + ) return input_tensor_code def generate_optional_single_dense_input( - self, - input_name, + self, input_name, input_name_tensor_map ): input_tensor_code = "" trans_flag = self.gene_trans_flag(input_name) @@ -1130,12 +1136,12 @@ def generate_optional_single_dense_input( trans_flag=trans_flag, ) ) + input_name_tensor_map[input_name].append((f'input_{input_name}', False)) return input_tensor_code def generate_optional_vector_dense_input( - self, - input_name, + self, input_name, input_name_tensor_map ): input_tensor_code = "" trans_flag = self.gene_trans_flag(input_name) @@ -1150,6 +1156,8 @@ def generate_optional_vector_dense_input( trans_flag=trans_flag, ) + input_name_tensor_map[input_name].append((f'input_{input_name}', True)) + return input_tensor_code def generate_prepare_data_code(self) -> str: @@ -1158,6 +1166,7 @@ def generate_prepare_data_code(self) -> str: kernel_param = self.kernel['param'] if kernel_param is None: kernel_param = input_names + attr_names + input_name_tensor_map = collections.defaultdict(list) input_tensor_code = "" for i, input_name in enumerate(input_names): # set input code @@ -1168,7 +1177,7 @@ def generate_prepare_data_code(self) -> str: if api_tensor_type in self.gene_dist_input_func.keys(): input_tensor_code += self.gene_dist_input_func[ api_tensor_type - ][phi_tensor_type](input_name) + ][phi_tensor_type](input_name, input_name_tensor_map) else: # do nothing pass @@ -1200,7 +1209,7 @@ def generate_prepare_data_code(self) -> str: ) ) - return input_tensor_code + return input_tensor_code, input_name_tensor_map def generate_infer_meta_code(self) -> str: input_names = self.inputs['names'] @@ -1351,6 +1360,7 @@ def generate_kernel_call_code(self) -> str: kernel_signature = "void(*)(" + ", ".join(kernel_args_type_list) + ")" result = KERNEL_CALL_TEMPLATE.format( + self.api, kernel_signature, ", ".join(input_args), ", ".join(self.dense_output_args), @@ -1397,17 +1407,38 @@ def generate_auto_paralel_branch(self) -> str: # if no tensor input, do not genetate auto parallel branch if len(self.inputs['names']) == 0: return "" + + infer_spmd_code = self.generate_infer_spmd_code() + output_creation_code = self.generate_output_creation_code() + infer_global_shape_code = self.generate_infer_global_shape_code() + kernel_selection_code = self.generate_kernel_selection_code() + reshard_input_code = self.generate_reshard_input_code() + ( + prepare_data_code, + input_name_tensor_map, + ) = self.generate_prepare_data_code() + record_op_info_supplement_code = ( + self.generate_record_op_info_supplement( + input_name_tensor_map, ' ', True + ) + ) + infer_meta_code = self.generate_infer_meta_code() + kernel_call_code = self.generate_kernel_call_code() + output_dist_attr_setting = self.generate_output_dist_attr_setting() + return_code = self.generate_return_code() + return MAIN_DIST_BRANCH_TEMPLATE.format( - self.generate_infer_spmd_code(), - self.generate_output_creation_code(), - self.generate_infer_global_shape_code(), - self.generate_kernel_selection_code(), - self.generate_reshard_input_code(), - self.generate_prepare_data_code(), - self.generate_infer_meta_code(), - self.generate_kernel_call_code(), - self.generate_output_dist_attr_setting(), - self.generate_return_code(), + infer_spmd_code, + output_creation_code, + infer_global_shape_code, + kernel_selection_code, + reshard_input_code, + prepare_data_code, + record_op_info_supplement_code, + infer_meta_code, + kernel_call_code, + output_dist_attr_setting, + return_code, ) def check_argument_whether_support_auto_parallel(self): diff --git a/paddle/phi/api/yaml/generator/dist_bw_api_gen.py b/paddle/phi/api/yaml/generator/dist_bw_api_gen.py index d1b71de591cc73..e10725fcd31cb1 100644 --- a/paddle/phi/api/yaml/generator/dist_bw_api_gen.py +++ b/paddle/phi/api/yaml/generator/dist_bw_api_gen.py @@ -33,6 +33,8 @@ // 5. Select Kernel{} // 6. Reshard Input{}\n // 7. PrepareData (DataTransform & Prepare Dense Input){} + // RecordOpInfoSupplement + {} // 8. Infer Local DenseTensor Meta{} // 9. DenseTensor Kernel Call{} }} @@ -291,18 +293,39 @@ def generate_auto_paralel_branch(self) -> str: # if no tensor input, do not genetate auto parallel branch if len(self.inputs['names']) == 0: return "" + infer_spmd_code = self.generate_infer_spmd_code() + output_creation_code = self.generate_output_creation_code() + infer_global_shape_code = self.generate_infer_global_shape_code() + output_dist_attr_setting = self.generate_output_dist_attr_setting() + kernel_selection_code = self.generate_kernel_selection_code() + reshard_input_code = self.generate_reshard_input_code() + ( + prepare_data_code, + input_name_tensor_map, + ) = self.generate_prepare_data_code() + record_op_info_supplement_code = ( + self.generate_record_op_info_supplement( + input_name_tensor_map, ' ', True + ) + ) + infer_meta_code = self.generate_infer_meta_code() + kernel_call_code = self.generate_kernel_call_code() + reshard_output_code = self.generate_reshard_output_code() + return_code = self.generate_return_code() + return MAIN_DIST_BRANCH_TEMPLATE.format( - self.generate_infer_spmd_code(), - self.generate_output_creation_code(), - self.generate_infer_global_shape_code(), - self.generate_output_dist_attr_setting(), - self.generate_kernel_selection_code(), - self.generate_reshard_input_code(), - self.generate_prepare_data_code(), - self.generate_infer_meta_code(), - self.generate_kernel_call_code(), - self.generate_reshard_output_code(), - self.generate_return_code(), + infer_spmd_code, + output_creation_code, + infer_global_shape_code, + output_dist_attr_setting, + kernel_selection_code, + reshard_input_code, + prepare_data_code, + record_op_info_supplement_code, + infer_meta_code, + kernel_call_code, + reshard_output_code, + return_code, )