From b878b96bf2ce022de2cf14a0eff470edde48c7d7 Mon Sep 17 00:00:00 2001
From: "Deng, Daisy" <daisy.deng@intel.com>
Date: Tue, 25 Feb 2025 09:06:24 +0000
Subject: [PATCH 1/5] enabled fsdp distrubuted test

---
 test/xpu/run_distributed.py | 32 ++++++++++---
 test/xpu/skip_list_dist.py  | 90 +++++++++++++++++++++++++++++++++++++
 2 files changed, 115 insertions(+), 7 deletions(-)
 create mode 100644 test/xpu/skip_list_dist.py

diff --git a/test/xpu/run_distributed.py b/test/xpu/run_distributed.py
index bed342d74..13b38a5d8 100644
--- a/test/xpu/run_distributed.py
+++ b/test/xpu/run_distributed.py
@@ -2,22 +2,40 @@
 import subprocess
 import sys
 
+from skip_list_dist import skip_dict
+from xpu_test_utils import launch_test
 
+res = 0
+res2 = 0
+fail_test = []
+
+# run python test
 def run(test_command):
     result = subprocess.run(test_command, capture_output=True, text=True)
     print(result.stdout)
     print(result.stderr)
     if "FAILED" in result.stdout or "FAILED" in result.stderr:
-        return 0
-    else:
-        return 1
-
+        fail_test.append(" ".join(test_command))
+    return result.returncode 
 
-res = 0
 test_command = ["python", "distributed/test_c10d_ops_xccl.py"]
 res += run(test_command)
 test_command = ["python", "distributed/test_c10d_xccl.py"]
 res += run(test_command)
 
-exit_code = os.WEXITSTATUS(res)
-sys.exit(exit_code)
+# run pytest with skiplist
+for key in skip_dict:
+    skip_list = skip_dict[key]
+    fail = launch_test(key, skip_list)
+    res2 += fail
+    if fail:
+        fail_test.append(key)
+
+if fail_test:
+    print(",".join(fail_test) + " have failures")
+
+exit_code = os.WEXITSTATUS(res2)
+if exit_code == 0:
+    sys.exit(res)
+else:
+    sys.exit(exit_code)
diff --git a/test/xpu/skip_list_dist.py b/test/xpu/skip_list_dist.py
new file mode 100644
index 000000000..7eba27035
--- /dev/null
+++ b/test/xpu/skip_list_dist.py
@@ -0,0 +1,90 @@
+skip_dict = {
+    "../../../../test/distributed/fsdp/test_fsdp_checkpoint.py": (
+        "test_checkpoint_fsdp_wrapping_cpu_offload0_offload_activations_False_use_orig_params_False",
+        "test_checkpoint_fsdp_wrapping_cpu_offload1_offload_activations_False_use_orig_params_False",
+        "test_checkpoint_fsdp_wrapping_cpu_offload1_offload_activations_True_use_orig_params_False",
+        "test_checkpoint_submodule_use_reentrant_False_xpu",
+    ),
+    "../../../../test/distributed/fsdp/test_fsdp_apply.py": None,
+    "../../../../test/distributed/fsdp/test_fsdp_clip_grad_norm.py": (
+        "test_ddp_parity_xpu",
+    ),
+    "../../../../test/distributed/fsdp/test_fsdp_common.py": None,
+    "../../../../test/distributed/fsdp/test_fsdp_core.py": (
+        "test_delayed_optim_step_offload_false_no_shard_xpu",
+        "test_delayed_optim_step_offload_false_none_xpu",
+        "test_delayed_optim_step_offload_false_shard_grad_op_xpu",
+        "test_delayed_optim_step_offload_true_none_xpu",
+        "test_delayed_optim_step_offload_true_shard_grad_op_xpu",
+        "test_delayed_reduce_scatter_offload_false_no_shard_xpu",
+        "test_delayed_reduce_scatter_offload_false_none_xpu",
+        "test_delayed_reduce_scatter_offload_false_shard_grad_op_xpu",
+        "test_delayed_reduce_scatter_offload_true_none_xpu",
+        "test_delayed_reduce_scatter_offload_true_shard_grad_op_xpu",
+        "test_mixture_of_experts_offload_false_no_shard_xpu",
+        "test_mixture_of_experts_offload_false_none_xpu",
+        "test_mixture_of_experts_offload_false_shard_grad_op_xpu",
+        "test_mixture_of_experts_offload_true_none_xpu",
+        "test_mixture_of_experts_offload_true_shard_grad_op_xpu",
+        "test_mixture_of_experts_with_delay_before_free_offload_false_no_shard_xpu",
+        "test_mixture_of_experts_with_delay_before_free_offload_false_none_xpu",
+        "test_mixture_of_experts_with_delay_before_free_offload_false_shard_grad_op_xpu",
+        "test_mixture_of_experts_with_delay_before_free_offload_true_none_xpu",
+        "test_mixture_of_experts_with_delay_before_free_offload_true_shard_grad_op_xpu",
+        "test_nested_always_wrap_model_offload_false_no_shard_xpu",
+        "test_nested_always_wrap_model_offload_false_none_xpu",
+        "test_nested_always_wrap_model_offload_false_shard_grad_op_xpu",
+        "test_nested_always_wrap_model_offload_true_none_xpu",
+        "test_nested_always_wrap_model_offload_true_shard_grad_op_xpu",
+        "test_nested_wrapped_model_offload_false_no_shard_xpu",
+        "test_nested_wrapped_model_offload_false_none_xpu",
+        "test_nested_wrapped_model_offload_false_shard_grad_op_xpu",
+        "test_nested_wrapped_model_offload_true_none_xpu",
+        "test_nested_wrapped_model_offload_true_shard_grad_op_xpu",
+        "test_transformer_offload_false_no_shard_xpu",
+        "test_transformer_offload_false_none_xpu",
+        "test_transformer_offload_false_shard_grad_op_xpu",
+        "test_transformer_offload_true_none_xpu",
+        "test_transformer_offload_true_shard_grad_op_xpu",
+    ),
+    "../../../../test/distributed/fsdp/test_fsdp_dtensor_state_dict.py": (
+        "test_dtensor_sharded_model_load_state_dict_offload_to_cpu_False_is_even_sharded_model_False_xpu",
+        "test_dtensor_sharded_model_load_state_dict_offload_to_cpu_False_is_even_sharded_model_True_xpu",
+        "test_dtensor_sharded_model_load_state_dict_offload_to_cpu_True_is_even_sharded_model_False_xpu",
+        "test_dtensor_sharded_model_load_state_dict_offload_to_cpu_True_is_even_sharded_model_True_xpu",
+        "test_dtensor_sharded_optim_load_state_dict_offload_to_cpu_False_is_even_sharded_model_False_xpu",
+        "test_dtensor_sharded_optim_load_state_dict_offload_to_cpu_False_is_even_sharded_model_True_xpu",
+        "test_dtensor_sharded_optim_load_state_dict_offload_to_cpu_True_is_even_sharded_model_False_xpu",
+        "test_dtensor_sharded_optim_load_state_dict_offload_to_cpu_True_is_even_sharded_model_True_xpu",
+        "test_dtensor_sharded_tensor_state_dict_identical_offload_to_cpu_False_is_even_sharded_model_False_xpu",
+        "test_dtensor_sharded_tensor_state_dict_identical_offload_to_cpu_False_is_even_sharded_model_True_xpu",
+        "test_dtensor_sharded_tensor_state_dict_identical_offload_to_cpu_True_is_even_sharded_model_False_xpu",
+        "test_dtensor_sharded_tensor_state_dict_identical_offload_to_cpu_True_is_even_sharded_model_True_xpu",
+        "test_fsdp_init_with_device_mesh_is_even_sharded_model_False_xpu",
+        "test_fsdp_init_with_device_mesh_is_even_sharded_model_True_xpu",
+        "test_raises_warning_or_errors_xpu",
+        ),
+    "../../../../test/distributed/fsdp/test_fsdp_exec_order.py": None,
+    "../../../../test/distributed/fsdp/test_fsdp_fine_tune.py": (
+            "test_parity_with_non_frozen_fsdp_xpu",
+            "test_parity_with_ddp_xpu",
+        ),
+    "../../../../test/distributed/fsdp/test_fsdp_fx.py": None,
+    "../../../../test/distributed/fsdp/test_fsdp_input.py": None,
+    "../../../../test/distributed/fsdp/test_fsdp_multiple_forward.py": None,
+    "../../../../test/distributed/fsdp/test_fsdp_multiple_wrapping.py": None,
+    "../../../../test/distributed/fsdp/test_fsdp_pure_fp16.py": None,
+    "../../../../test/distributed/fsdp/test_fsdp_traversal.py": None,
+    "../../../../test/distributed/fsdp/test_fsdp_uneven.py": None,
+    "../../../../test/distributed/fsdp/test_hsdp_dtensor_state_dict.py": (
+        "test_dtensor_sharded_model_load_state_dict_offload_to_cpu_False_xpu",
+        "test_dtensor_sharded_model_load_state_dict_offload_to_cpu_True_xpu",
+        "test_dtensor_sharded_optim_load_state_dict_offload_to_cpu_False_xpu",
+        "test_dtensor_sharded_optim_load_state_dict_offload_to_cpu_True_xpu",
+        "test_dtensor_sharded_tensor_state_dict_identical_offload_to_cpu_False_xpu",
+        "test_dtensor_sharded_tensor_state_dict_identical_offload_to_cpu_True_xpu",
+        "test_hsdp_init_with_device_mesh_xpu",
+        "test_root_module_is_not_FSDP_xpu",
+        ),
+    "../../../../test/distributed/fsdp/test_utils.py": None,
+}

From b346bcaca2d6edbc66ef7cad3324262a9f6cb250 Mon Sep 17 00:00:00 2001
From: "Deng, Daisy" <daisy.deng@intel.com>
Date: Wed, 26 Feb 2025 13:44:53 +0000
Subject: [PATCH 2/5] update pytorch branch for testing only

---
 .github/workflows/_linux_build.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/_linux_build.yml b/.github/workflows/_linux_build.yml
index bd25a53b0..1c29164e9 100644
--- a/.github/workflows/_linux_build.yml
+++ b/.github/workflows/_linux_build.yml
@@ -70,6 +70,7 @@ jobs:
           cd ../ && rm -rf pytorch
           pip install requests
           git clone https://github.com/pytorch/pytorch pytorch
+          git chekcout cherry/enable-xccl-fsdp-tests
           if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then
             cd pytorch && git checkout $(echo ${{ inputs.pytorch }})
             # apply PRs for stock pytorch

From fc0ed5ecc2f10fd3ae781629d1860630363161bd Mon Sep 17 00:00:00 2001
From: daisyden <daisyden@intel.com>
Date: Thu, 27 Feb 2025 02:41:53 +0000
Subject: [PATCH 3/5] fix lint issues

---
 test/xpu/run_distributed.py |  4 +++-
 test/xpu/skip_list_dist.py  | 10 +++++-----
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/test/xpu/run_distributed.py b/test/xpu/run_distributed.py
index 13b38a5d8..0fa5bb337 100644
--- a/test/xpu/run_distributed.py
+++ b/test/xpu/run_distributed.py
@@ -9,6 +9,7 @@
 res2 = 0
 fail_test = []
 
+
 # run python test
 def run(test_command):
     result = subprocess.run(test_command, capture_output=True, text=True)
@@ -16,7 +17,8 @@ def run(test_command):
     print(result.stderr)
     if "FAILED" in result.stdout or "FAILED" in result.stderr:
         fail_test.append(" ".join(test_command))
-    return result.returncode 
+    return result.returncode
+
 
 test_command = ["python", "distributed/test_c10d_ops_xccl.py"]
 res += run(test_command)
diff --git a/test/xpu/skip_list_dist.py b/test/xpu/skip_list_dist.py
index 7eba27035..992a80e8f 100644
--- a/test/xpu/skip_list_dist.py
+++ b/test/xpu/skip_list_dist.py
@@ -63,12 +63,12 @@
         "test_fsdp_init_with_device_mesh_is_even_sharded_model_False_xpu",
         "test_fsdp_init_with_device_mesh_is_even_sharded_model_True_xpu",
         "test_raises_warning_or_errors_xpu",
-        ),
+    ),
     "../../../../test/distributed/fsdp/test_fsdp_exec_order.py": None,
     "../../../../test/distributed/fsdp/test_fsdp_fine_tune.py": (
-            "test_parity_with_non_frozen_fsdp_xpu",
-            "test_parity_with_ddp_xpu",
-        ),
+        "test_parity_with_non_frozen_fsdp_xpu",
+        "test_parity_with_ddp_xpu",
+    ),
     "../../../../test/distributed/fsdp/test_fsdp_fx.py": None,
     "../../../../test/distributed/fsdp/test_fsdp_input.py": None,
     "../../../../test/distributed/fsdp/test_fsdp_multiple_forward.py": None,
@@ -85,6 +85,6 @@
         "test_dtensor_sharded_tensor_state_dict_identical_offload_to_cpu_True_xpu",
         "test_hsdp_init_with_device_mesh_xpu",
         "test_root_module_is_not_FSDP_xpu",
-        ),
+    ),
     "../../../../test/distributed/fsdp/test_utils.py": None,
 }

From d5c5a6e42a3c5e790e1a4f3c7c33063cb9b008dd Mon Sep 17 00:00:00 2001
From: daisyden <daisyden@intel.com>
Date: Thu, 27 Feb 2025 08:06:39 +0000
Subject: [PATCH 4/5] fix pytorch branch

---
 .github/workflows/_linux_build.yml | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/_linux_build.yml b/.github/workflows/_linux_build.yml
index 1c29164e9..c01a9544d 100644
--- a/.github/workflows/_linux_build.yml
+++ b/.github/workflows/_linux_build.yml
@@ -69,10 +69,9 @@ jobs:
           source activate xpu_build
           cd ../ && rm -rf pytorch
           pip install requests
-          git clone https://github.com/pytorch/pytorch pytorch
-          git chekcout cherry/enable-xccl-fsdp-tests
+          git clone https://github.com/zhangxiaoli73/pytorch.git pytorch
           if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then
-            cd pytorch && git checkout $(echo ${{ inputs.pytorch }})
+            cd pytorch && git checkout cherry/enable-xccl-fsdp-tests
             # apply PRs for stock pytorch
             python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py
             git status && git show -s

From 3623ae64401b12cf22b75cc3e16f7d00d75a1cab Mon Sep 17 00:00:00 2001
From: daisyden <daisyden@intel.com>
Date: Thu, 27 Feb 2025 08:16:24 +0000
Subject: [PATCH 5/5] fix pytorch branch for testing only

---
 .github/workflows/_linux_ut.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index 22d2834ad..40c40c463 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -316,9 +316,9 @@ jobs:
           source activate xpu_op_${ZE_AFFINITY_MASK}
           cd ../ && rm -rf pytorch
           pip install requests
-          git clone https://github.com/pytorch/pytorch pytorch
+          git clone https://github.com/zhangxiaoli73/pytorch.git pytorch
           if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then
-            cd pytorch && git checkout $(echo ${{ inputs.pytorch }})
+            cd pytorch && git checkout cherry/enable-xccl-fsdp-tests
             # apply PRs for stock pytorch
             python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py
             git status && git show -s