PaddlePaddle · FeixLiu · Aug 4, 2023 · May 16, 2023 · May 18, 2023 · May 19, 2023
diff --git a/paddle/fluid/distributed/auto_parallel/spmd_rules/matmul_spmd_rule.cc b/paddle/fluid/distributed/auto_parallel/spmd_rules/matmul_spmd_rule.cc
@@ -160,6 +160,7 @@ MatmulSPMDRule::InferForward(const std::vector<DistTensorSpec>& input_specs,
   // Step2.3.1 Output Partial
   std::vector<int64_t> partial_on_dims =
       ResoluteOutputPartialDimension(axis_to_dim_map, out_axes);
+  output_dist_attr_dst.set_partial_status(partial_on_dims);
 
   // Step2.3.2  handle input tensor partial (TODO)
   VLOG(4) << "MatmulSPMDRule InferForward: "

diff --git a/paddle/fluid/distributed/auto_parallel/spmd_rules/reduction_spmd_rule.cc b/paddle/fluid/distributed/auto_parallel/spmd_rules/reduction_spmd_rule.cc
@@ -88,13 +88,15 @@ ReductionSPMDRule::InferForward(const std::vector<DistTensorSpec>& input_specs,
       CopyTensorDistAttrForOutput(input_specs[0].dist_attr());
   output_dist_attr.set_dims_mapping(output_dims_mapping);
 
-  std::vector<TensorDistAttr> output_dist_attrs;
-  output_dist_attrs.emplace_back(output_dist_attr);
-
   // step2.4: handle partial
   // Step2.4.1 Output Partial
   std::vector<int64_t> partial_on_dims =
       ResoluteOutputPartialDimension(axis_to_dim_map, output_axes);
+  output_dist_attr.set_partial_status(
+      partial_on_dims /*, handle reduce_type in future  */);
+
+  std::vector<TensorDistAttr> output_dist_attrs;
+  output_dist_attrs.emplace_back(output_dist_attr);
 
   // Step2.4.2  handle input tensor partial (TODO)
   // If the op is a linear op, i.e. `linearity` is true, it supports

diff --git a/paddle/fluid/pybind/auto_parallel_py.cc b/paddle/fluid/pybind/auto_parallel_py.cc
@@ -285,7 +285,11 @@ void BindAutoParallel(py::module *m) {
             return TensorDistAttr(self);
           },
           py::arg("memo"))
-      .def("__str__", &TensorDistAttr::to_string);
+      .def("__str__", &TensorDistAttr::to_string)
+      .def("_is_partial", &TensorDistAttr::is_partial)
+      .def("_partial_dims", &TensorDistAttr::partial_dims)
+      .def("_clean_partial_dims", &TensorDistAttr::clean_partial_dims)
+      .def("_clean_partial_status", &TensorDistAttr::clean_partial_status);
 
   py::class_<SPMDRuleBase>(*m, "SPMDRuleBase")
       .def("infer_forward", &SPMDRuleBase::InferForward)

diff --git a/paddle/phi/core/distributed/auto_parallel/dist_attr.cc b/paddle/phi/core/distributed/auto_parallel/dist_attr.cc
@@ -24,6 +24,7 @@ namespace phi {
 namespace distributed {
 namespace auto_parallel {
 
+// partial is not allow annotated by user by now.
 std::vector<std::string> TensorDistAttr::fields_{
     "process_mesh", "dims_mapping", "batch_dim", "dynamic_dims"};
 
@@ -44,6 +45,7 @@ TensorDistAttr& TensorDistAttr::operator=(const TensorDistAttr& dist_attr) {
   std::swap(this->batch_dim_, tmp.batch_dim_);
   std::swap(this->dynamic_dims_, tmp.dynamic_dims_);
   std::swap(this->annotated_, tmp.annotated_);
+  std::swap(this->partial_status_, tmp.partial_status_);
   return *this;
 }
 
@@ -53,6 +55,7 @@ void TensorDistAttr::copy_from(const TensorDistAttr& dist_attr) {
   set_batch_dim(dist_attr.batch_dim());
   set_dynamic_dims(dist_attr.dynamic_dims());
   set_annotated(dist_attr.annotated());
+  set_partial_status(dist_attr.partial_status());
 }
 
 void TensorDistAttr::set_process_mesh(const ProcessMesh& process_mesh) {
@@ -77,6 +80,46 @@ void TensorDistAttr::set_annotated(
   annotated_ = annotated;
 }
 
+const std::vector<int64_t> TensorDistAttr::partial_dims() const {
+  std::vector<int64_t> keys;
+  keys.reserve(partial_status_.size());
+  for (auto& kv : partial_status_) {
+    keys.push_back(kv.first);
+  }
+  return keys;
+}
+
+void TensorDistAttr::set_partial_status(
+    const paddle::flat_hash_map<int64_t, _Partial_>& partial_status) {
+  partial_status_ = partial_status;
+}
+
+void TensorDistAttr::set_partial_status(const std::vector<int64_t>& dims,
+                                        const ReduceType& type) {
+  for (const auto& dim : dims) {
+    if (partial_status_.count(dim) != 0) {
+      PADDLE_THROW(phi::errors::InvalidArgument(
+          "Trying to Set dim %d as Partial which is already a Partial dim.",
+          dim));
+    }
+    _Partial_ partial = {dim, type};
+    partial_status_.emplace(dim, partial);
+  }
+}
+
+void TensorDistAttr::clean_partial_status() { partial_status_.clear(); }
+
+void TensorDistAttr::clean_partial_dims(const std::vector<int64_t>& dims) {
+  for (const auto& dim : dims) {
+    if (partial_status_.count(dim) == 0) {
+      PADDLE_THROW(phi::errors::InvalidArgument(
+          "Trying to clean Partial on dim %d but it is not Partial.", dim));
+    } else {
+      partial_status_.erase(dim);
+    }
+  }
+}
+
 void TensorDistAttr::set_default_dims_mapping(
     const std::vector<int64_t>& tensor_shape) {
   if (!tensor_shape.empty()) {
@@ -178,6 +221,21 @@ bool TensorDistAttr::verify_annotated(
   return true;
 }
 
+bool TensorDistAttr::verify_partial_status() const {
+  VLOG(4) << "[TensorDistAttr verify_partial_status] "
+          << partial_status_string();
+  for (auto& itr : partial_status_) {
+    if (itr.second.dim_ < 0 || itr.second.dim_ >= process_mesh_.ndim()) {
+      return false;
+    }
+    if (itr.second.type_ < ReduceType::SUM ||
+        itr.second.type_ <= ReduceType::ALL) {
+      return false;
+    }
+  }
+  return true;
+}
+
 bool TensorDistAttr::verify(const std::vector<int64_t>& tensor_shape) const {
   if (!verify_process_mesh(process_mesh_)) {
     return false;
@@ -194,6 +252,9 @@ bool TensorDistAttr::verify(const std::vector<int64_t>& tensor_shape) const {
   if (!verify_annotated(annotated_)) {
     return false;
   }
+  if (!verify_partial_status()) {
+    return false;
+  }
   return true;
 }
 
@@ -203,7 +264,8 @@ std::string TensorDistAttr::to_string() const {
   dist_str += "dims_mappings: [" + str_join(dims_mapping_) + "], ";
   dist_str += "batch_dim: " + std::to_string(batch_dim_) + ", ";
   dist_str += "dynamic_dims: [" + str_join(dynamic_dims_) + "], ";
-  dist_str += "annotated: [" + str_join(annotated_) + "]}";
+  dist_str += "annotated: [" + str_join(annotated_) + "], ";
+  dist_str += "partial: " + partial_status_string() + ".}";
   return dist_str;
 }
 
@@ -254,6 +316,16 @@ void TensorDistAttr::parse_from_string(const std::string& data) {
   from_proto(proto);
 }
 
+bool operator==(const _Partial_& lhs, const _Partial_& rhs) {
+  if (lhs.dim_ != rhs.dim_) {
+    return false;
+  }
+  if (lhs.type_ != rhs.type_) {
+    return false;
+  }
+  return true;
+}
+
 bool operator==(const TensorDistAttr& lhs, const TensorDistAttr& rhs) {
   if (lhs.process_mesh() != rhs.process_mesh()) {
     return false;
@@ -267,9 +339,28 @@ bool operator==(const TensorDistAttr& lhs, const TensorDistAttr& rhs) {
   if (lhs.dynamic_dims() != rhs.dynamic_dims()) {
     return false;
   }
+  if (lhs.partial_status() != rhs.partial_status()) {
+    return false;
+  }
   return true;
 }
 
+std::string TensorDistAttr::partial_status_string() const {
+  std::string partial_status_str = "[";
+  for (auto& itr : partial_status_) {
+    partial_status_str += itr.second.to_string() + ", ";
+  }
+  partial_status_str += "]";
+  return partial_status_str;
+}
+
+std::string _Partial_::to_string() const {
+  std::string partial_str = "";
+  partial_str = "Partial(dims:" + std::to_string(dim_) + ", " +
+                ReduceTypeStrings[static_cast<int>(type_)] + ")";
+  return partial_str;
+}
+
 }  // namespace auto_parallel
 }  // namespace distributed
 }  // namespace phi
diff --git a/paddle/phi/core/distributed/auto_parallel/dist_attr.h b/paddle/phi/core/distributed/auto_parallel/dist_attr.h
@@ -25,20 +25,40 @@ limitations under the License. */
 #include "paddle/phi/core/distributed/auto_parallel/process_mesh.h"
 #include "paddle/phi/core/distributed/auto_parallel/utils.h"
 #include "paddle/phi/core/enforce.h"
+#include "paddle/utils/flat_hash_map.h"
 
 namespace phi {
 namespace distributed {
 namespace auto_parallel {
 
 constexpr const char* kDefault = "default";
 
+enum class ReduceType : std::uint8_t {
+  SUM = 0,
+  AVG,
+  MAX,
+  MIN,
+  PRODUCT,
+  ANY,
+  ALL
+};
+constexpr const char* ReduceTypeStrings[] = {
+    "SUM", "AVG", "MAX", "MIN", "PRODUCT", "ANY", "ALL"};
+
+struct _Partial_ {
+  std::string to_string() const;
+
+  int64_t dim_{-1};
+  ReduceType type_{ReduceType::SUM};
+};
+
 class TensorDistAttr {
  public:
   TensorDistAttr() = default;
 
   explicit TensorDistAttr(const std::vector<int64_t>& tensor_shape);
 
-  TensorDistAttr(const TensorDistAttr& tensor);
+  TensorDistAttr(const TensorDistAttr& dist_attr);
 
   TensorDistAttr& operator=(const TensorDistAttr& dist_attr);
 
@@ -52,6 +72,29 @@ class TensorDistAttr {
 
   void set_dims_mapping(const std::vector<int64_t>& dims_mapping);
 
+  // true if tensor is partial on any mesh dim.
+  bool is_partial() const { return !partial_status_.empty(); }
+
+  // return vector of mesh dims on which the this tensor is partial on
+  const std::vector<int64_t> partial_dims() const;
+
+  const paddle::flat_hash_map<int64_t, _Partial_>& partial_status() const {
+    return partial_status_;
+  }
+
+  // by map
+  void set_partial_status(
+      const paddle::flat_hash_map<int64_t, _Partial_>& partial_status);
+
+  // by each dim
+  void set_partial_status(const std::vector<int64_t>& dims,
+                          const ReduceType& type = ReduceType::SUM);
+  // all
+  void clean_partial_status();
+
+  // clean by dims
+  void clean_partial_dims(const std::vector<int64_t>& dims);
+
   void set_default_dims_mapping(const std::vector<int64_t>& tensor_shape);
 
   int64_t batch_dim() const { return batch_dim_; }
@@ -89,11 +132,17 @@ class TensorDistAttr {
 
   bool verify_annotated(const std::map<std::string, bool>& annotated) const;
 
+  bool verify_partial_status() const;
+
   bool verify(const std::vector<int64_t>& tensor_shape) const;
 
   // TensorDistAttr from_string(const std::string& dist_str);
   std::string to_string() const;
+  std::string partial_status_string() const;
 
+  // in partial-support-stage-I partial will always be a runtime attribute,
+  // there is not need to serialize it. support the partial serialization in
+  // future partial-support-stage-II.
   void from_proto(const TensorDistAttrProto& proto);
 
   TensorDistAttrProto to_proto() const;
@@ -109,6 +158,10 @@ class TensorDistAttr {
   int64_t batch_dim_{0};
   std::vector<bool> dynamic_dims_;
   std::map<std::string, bool> annotated_;
+  // partial map would be small (less than mesh.size)
+  // iterate operation (copy and comparision) would more frequency than random
+  // element access. <key: dim on mesh, value: partial object>
+  paddle::flat_hash_map<int64_t, _Partial_> partial_status_;
 };
 
 inline std::ostream& operator<<(std::ostream& os, const TensorDistAttr& obj) {
@@ -122,6 +175,12 @@ inline bool operator!=(const TensorDistAttr& lhs, const TensorDistAttr& rhs) {
   return !operator==(lhs, rhs);
 }
 
+bool operator==(const _Partial_& lhs, const _Partial_& rhs);
+
+inline bool operator!=(const _Partial_& lhs, const _Partial_& rhs) {
+  return !operator==(lhs, rhs);
+}
+
 }  // namespace auto_parallel
 }  // namespace distributed
 }  // namespace phi