update

PaddlePaddle · Nov 16, 2023 · fbc5064 · fbc5064
1 parent a041eb0
commit fbc5064
Showing 1 changed file with 40 additions and 32 deletions.
diff --git a/python/paddle/jit/dy2static/partial_program.py b/python/paddle/jit/dy2static/partial_program.py
@@ -223,10 +223,49 @@ def __init__(
         for var in self._inputs:
             if isinstance(var, framework.Variable):
                 self._in_var_names.append(var.desc.name())
+
         self._out_var_descs = [
             self._outputs[var_id].desc for var_id in self._outputs.var_ids
         ]
 
+        self._attrs = [
+            'is_test',
+            not self.training,
+            'program_id',
+            self.program_id,
+        ]
+
+        if self.training:
+            # NOTE: In the case of higher-order gradient, the names of the parameter grads may be like
+            # `grad/grad/grad/linear_0.w_0@GRAD` instead of simply `linear_0.w_0@GRAD`, so we get
+            # the correct names of the parameter grads from program. And out grads are similar to above.
+            self._attrs.extend(
+                (
+                    'param_grad_names',
+                    self._grad_var_names.get('param', []),
+                    'out_grad_names',
+                    self._grad_var_names.get('out', []),
+                    'x_grad_names',
+                    self._grad_var_names.get('x', []),
+                )
+            )
+        if self._cuda_graph_capture_mode:
+            self._attrs.extend(
+                (
+                    'cuda_graph_capture_mode',
+                    self._cuda_graph_capture_mode,
+                    'cuda_graph_pool_id',
+                    self._cuda_graph_pool_id,
+                )
+            )
+
+        self._attrs.extend(
+            [
+                "x_names",
+                self._in_var_names,
+            ]
+        )
+
     def __call__(self, inputs):
         """
         Execute static graph by Interpreter and Return dynamic Tensors.
@@ -237,7 +276,6 @@ def __call__(self, inputs):
         out_vars = self._prepare_outputs()
         self._cast_fp16_if_pure_fp16(in_vars)
         attrs = self._prepare_attributes()
-        attrs.extend(["x_names", in_var_names])
 
         self._sync_lr_value_with_scheduler()
 
@@ -267,8 +305,6 @@ def sot_call(self, inputs):
         out_vars = self._prepare_outputs()
         self._cast_fp16_if_pure_fp16(inputs)
         attrs = self._prepare_attributes()
-        attrs.extend(["x_names", self._in_var_names])
-        self._sync_lr_value_with_scheduler()
 
         _legacy_C_ops.run_program(
             self._valid_vars(inputs),
@@ -770,35 +806,7 @@ def _prepare_attributes(self):
             self.forward_program.desc.block(0),
             'backward_global_block',
             self.backward_program.desc.block(0),
-            'is_test',
-            not self.training,
-            'program_id',
-            self.program_id,
-        ]
-
-        if self.training:
-            # NOTE: In the case of higher-order gradient, the names of the parameter grads may be like
-            # `grad/grad/grad/linear_0.w_0@GRAD` instead of simply `linear_0.w_0@GRAD`, so we get
-            # the correct names of the parameter grads from program. And out grads are similar to above.
-            attrs.extend(
-                (
-                    'param_grad_names',
-                    self._grad_var_names.get('param', []),
-                    'out_grad_names',
-                    self._grad_var_names.get('out', []),
-                    'x_grad_names',
-                    self._grad_var_names.get('x', []),
-                )
-            )
-        if self._cuda_graph_capture_mode:
-            attrs.extend(
-                (
-                    'cuda_graph_capture_mode',
-                    self._cuda_graph_capture_mode,
-                    'cuda_graph_pool_id',
-                    self._cuda_graph_pool_id,
-                )
-            )
+        ] + self._attrs
         return attrs
 
     @switch_to_static_graph