PaddlePaddle · wanghuancoder · Oct 30, 2023 · Oct 27, 2023 · Oct 27, 2023 · Oct 27, 2023
diff --git a/paddle/phi/api/lib/tensor_copy.cc b/paddle/phi/api/lib/tensor_copy.cc
@@ -24,7 +24,11 @@ limitations under the License. */
 #include "paddle/phi/core/meta_tensor.h"
 #include "paddle/phi/core/tensor_utils.h"
 #include "paddle/phi/infermeta/unary.h"
-
+#ifdef PADDLE_WITH_DISTRIBUTE
+#include "paddle/phi/api/lib/data_transform.h"
+#include "paddle/phi/core/distributed/auto_parallel/reshard_utils.h"
+#include "paddle/phi/infermeta/spmd_rules/rules.h"
+#endif
 namespace paddle {
 namespace experimental {
 
@@ -40,7 +44,45 @@ void copy(const Tensor& src, const Place& place, bool blocking, Tensor* dst) {
   auto& pool = paddle::experimental::DeviceContextPool::Instance();
   auto* dev_ctx = pool.GetMutable(
       target_place.GetType() == place.GetType() ? place : target_place);
+#ifdef PADDLE_WITH_DISTRIBUTE
+  bool run_auto_parallel = AllInputsAreDistTensor(src);
+  bool rank_is_in_current_mesh = false;
+  if (run_auto_parallel) {
+    auto mesh =
+        std::static_pointer_cast<phi::distributed::DistTensor>(src.impl())
+            ->dist_attr()
+            .process_mesh();
+    rank_is_in_current_mesh = phi::distributed::IsCurRankInMesh(mesh);
+
+    auto meta_dist_input_x = MakeDistMetaTensor(*src.impl());
+
+    auto dist_out = SetKernelDistOutput(dst, meta_dist_input_x.dist_attr());
+    auto dense_out = dist_out->unsafe_mutable_value();
+    if (!rank_is_in_current_mesh) {
+      *dense_out =
+          phi::DenseTensor(std::make_shared<phi::Allocation>(
+                               nullptr, 0, phi::distributed::GetDefaultPlace()),
+                           phi::DenseTensorMeta());
+    }
+
+    phi::MetaTensor meta_dist_out(dist_out);
+    phi::UnchangedInferMeta(MakeMetaTensor(*(src.impl())), &meta_dist_out);
+
+    if (rank_is_in_current_mesh) {
+      auto dist_input_x =
+          static_cast<phi::distributed::DistTensor*>(src.impl().get());
+
+      auto input_x = &dist_input_x->value();
+
+      phi::MetaTensor meta_dense_out(dense_out);
+      phi::UnchangedInferMeta(MakeMetaTensor(*input_x), &meta_dense_out);
 
+      phi::Copy(*dev_ctx, *input_x, place, blocking, dense_out);
+    }
+    VLOG(6) << "copy finished. ";
+    return;
+  }
+#endif
   auto dense_x = TensorToDenseTensor(src);
 
   auto kernel_out = SetKernelOutput(dst);

diff --git a/test/auto_parallel/CMakeLists.txt b/test/auto_parallel/CMakeLists.txt
@@ -160,6 +160,8 @@ if(WITH_DISTRIBUTE AND WITH_GPU)
   set_tests_properties(test_fuse_adamw_pass PROPERTIES TIMEOUT 20)
   py_test_modules(test_rule_based_tuner_o2 MODULES test_rule_based_tuner_o2)
   set_tests_properties(test_rule_based_tuner_o2 PROPERTIES TIMEOUT 50)
+  py_test_modules(test_semi_auto_parallel_functional_in_single_card MODULES
+                  test_semi_auto_parallel_functional_in_single_card)
   # End of unittests WITH single card and timeout
 
   # NOTE(zyl): unittests WITH single card and WITHOUT timeout

diff --git a/test/auto_parallel/test_semi_auto_parallel_functional_in_single_card.py b/test/auto_parallel/test_semi_auto_parallel_functional_in_single_card.py
@@ -0,0 +1,33 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import paddle
+import paddle.distributed as dist
+
+
+class TestSemiAutoParallelFunctionalInSingleCard(unittest.TestCase):
+    def test_tensor_copy_to(self):
+        mesh = dist.ProcessMesh([0, 1], dim_names=["x"])
+        dense_tensor = paddle.randn([10, 20])
+        dist_tensor = dist.shard_tensor(
+            dense_tensor,
+            dist_attr=dist.DistAttr(mesh=mesh, sharding_specs=[None, None]),
+        )
+        dist_tensor._copy_to(paddle.CPUPlace(), True)
+
+
+if __name__ == "__main__":
+    unittest.main()