[SPARK-50507][PYTHON][TESTS] Group pandas function related tests

### What changes were proposed in this pull request? Group pandas function related tests ### Why are the changes needed? these tests were grouped in classic side, also group the parity tests for consistency ### Does this PR introduce _any_ user-facing change? no, test-only ### How was this patch tested? updated ci ### Was this patch authored or co-authored using generative AI tooling? no Closes #49074 from zhengruifeng/group_connect_test_pandas. Authored-by: Ruifeng Zheng <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]>
apache · Dec 6, 2024 · b1c118f · b1c118f
1 parent ecd1911
commit b1c118f
Show file tree

Hide file tree

Showing 11 changed files with 34 additions and 18 deletions.
diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
@@ -534,8 +534,8 @@ def __hash__(self):
         "pyspark.sql.tests.pandas.test_pandas_udf_typehints",
         "pyspark.sql.tests.pandas.test_pandas_udf_typehints_with_future_annotations",
         "pyspark.sql.tests.pandas.test_pandas_udf_window",
+        "pyspark.sql.tests.pandas.test_pandas_sqlmetrics",
         "pyspark.sql.tests.pandas.test_converter",
-        "pyspark.sql.tests.test_pandas_sqlmetrics",
         "pyspark.sql.tests.test_python_datasource",
         "pyspark.sql.tests.test_python_streaming_datasource",
         "pyspark.sql.tests.test_readwriter",
@@ -1054,11 +1054,7 @@ def __hash__(self):
         "pyspark.sql.tests.connect.test_parity_memory_profiler",
         "pyspark.sql.tests.connect.test_parity_udtf",
         "pyspark.sql.tests.connect.test_parity_tvf",
-        "pyspark.sql.tests.connect.test_parity_pandas_udf",
-        "pyspark.sql.tests.connect.test_parity_pandas_map",
         "pyspark.sql.tests.connect.test_parity_arrow_map",
-        "pyspark.sql.tests.connect.test_parity_pandas_grouped_map",
-        "pyspark.sql.tests.connect.test_parity_pandas_cogrouped_map",
         "pyspark.sql.tests.connect.test_parity_arrow_grouped_map",
         "pyspark.sql.tests.connect.test_parity_arrow_cogrouped_map",
         "pyspark.sql.tests.connect.test_parity_python_datasource",
@@ -1074,13 +1070,17 @@ def __hash__(self):
         "pyspark.sql.tests.connect.streaming.test_parity_listener",
         "pyspark.sql.tests.connect.streaming.test_parity_foreach",
         "pyspark.sql.tests.connect.streaming.test_parity_foreach_batch",
-        "pyspark.sql.tests.connect.test_parity_pandas_grouped_map_with_state",
-        "pyspark.sql.tests.connect.test_parity_pandas_udf_scalar",
-        "pyspark.sql.tests.connect.test_parity_pandas_udf_grouped_agg",
-        "pyspark.sql.tests.connect.test_parity_pandas_udf_window",
         "pyspark.sql.tests.connect.test_resources",
         "pyspark.sql.tests.connect.shell.test_progress",
         "pyspark.sql.tests.connect.test_df_debug",
+        "pyspark.sql.tests.connect.pandas.test_parity_pandas_map",
+        "pyspark.sql.tests.connect.pandas.test_parity_pandas_grouped_map",
+        "pyspark.sql.tests.connect.pandas.test_parity_pandas_grouped_map_with_state",
+        "pyspark.sql.tests.connect.pandas.test_parity_pandas_cogrouped_map",
+        "pyspark.sql.tests.connect.pandas.test_parity_pandas_udf",
+        "pyspark.sql.tests.connect.pandas.test_parity_pandas_udf_scalar",
+        "pyspark.sql.tests.connect.pandas.test_parity_pandas_udf_grouped_agg",
+        "pyspark.sql.tests.connect.pandas.test_parity_pandas_udf_window",
     ],
     excluded_python_implementations=[
         "PyPy"  # Skip these tests under PyPy since they require numpy, pandas, and pyarrow and

diff --git a/python/pyspark/sql/tests/connect/pandas/__init__.py b/python/pyspark/sql/tests/connect/pandas/__init__.py
@@ -0,0 +1,16 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/...nnect/test_parity_pandas_cogrouped_map.py → ...andas/test_parity_pandas_cogrouped_map.py b/...nnect/test_parity_pandas_cogrouped_map.py → ...andas/test_parity_pandas_cogrouped_map.py
@@ -28,7 +28,7 @@ class CogroupedApplyInPandasTests(
 
 
 if __name__ == "__main__":
-    from pyspark.sql.tests.connect.test_parity_pandas_cogrouped_map import *  # noqa: F401
+    from pyspark.sql.tests.connect.pandas.test_parity_pandas_cogrouped_map import *  # noqa: F401
 
     try:
         import xmlrunner

diff --git a/...connect/test_parity_pandas_grouped_map.py → .../pandas/test_parity_pandas_grouped_map.py b/...connect/test_parity_pandas_grouped_map.py → .../pandas/test_parity_pandas_grouped_map.py
@@ -28,7 +28,7 @@ def test_supported_types(self):
 
 
 if __name__ == "__main__":
-    from pyspark.sql.tests.connect.test_parity_pandas_grouped_map import *  # noqa: F401
+    from pyspark.sql.tests.connect.pandas.test_parity_pandas_grouped_map import *  # noqa: F401
 
     try:
         import xmlrunner

diff --git a/...t_parity_pandas_grouped_map_with_state.py → ...t_parity_pandas_grouped_map_with_state.py b/...t_parity_pandas_grouped_map_with_state.py → ...t_parity_pandas_grouped_map_with_state.py
@@ -29,7 +29,7 @@ class GroupedApplyInPandasWithStateTests(
 
 
 if __name__ == "__main__":
-    from pyspark.sql.tests.connect.test_parity_pandas_grouped_map_with_state import *  # noqa: F401
+    from pyspark.sql.tests.connect.pandas.test_parity_pandas_grouped_map_with_state import *  # noqa: F401,E501
 
     try:
         import xmlrunner

diff --git a/...l/tests/connect/test_parity_pandas_map.py → .../connect/pandas/test_parity_pandas_map.py b/...l/tests/connect/test_parity_pandas_map.py → .../connect/pandas/test_parity_pandas_map.py
@@ -28,7 +28,7 @@ class MapInPandasParityTests(
 
 if __name__ == "__main__":
     import unittest
-    from pyspark.sql.tests.connect.test_parity_pandas_map import *  # noqa: F401
+    from pyspark.sql.tests.connect.pandas.test_parity_pandas_map import *  # noqa: F401
 
     try:
         import xmlrunner  # type: ignore[import]

diff --git a/...l/tests/connect/test_parity_pandas_udf.py → .../connect/pandas/test_parity_pandas_udf.py b/...l/tests/connect/test_parity_pandas_udf.py → .../connect/pandas/test_parity_pandas_udf.py
@@ -25,7 +25,7 @@ class PandasUDFParityTests(PandasUDFTestsMixin, ReusedConnectTestCase):
 
 if __name__ == "__main__":
     import unittest
-    from pyspark.sql.tests.connect.test_parity_pandas_udf import *  # noqa: F401
+    from pyspark.sql.tests.connect.pandas.test_parity_pandas_udf import *  # noqa: F401
 
     try:
         import xmlrunner  # type: ignore[import]

diff --git a/...ect/test_parity_pandas_udf_grouped_agg.py → ...das/test_parity_pandas_udf_grouped_agg.py b/...ect/test_parity_pandas_udf_grouped_agg.py → ...das/test_parity_pandas_udf_grouped_agg.py
@@ -28,7 +28,7 @@ class PandasUDFGroupedAggParityTests(
 
 
 if __name__ == "__main__":
-    from pyspark.sql.tests.connect.test_parity_pandas_udf_grouped_agg import *  # noqa: F401
+    from pyspark.sql.tests.connect.pandas.test_parity_pandas_udf_grouped_agg import *  # noqa: F401
 
     try:
         import xmlrunner  # type: ignore[import]

diff --git a/.../connect/test_parity_pandas_udf_scalar.py → ...t/pandas/test_parity_pandas_udf_scalar.py b/.../connect/test_parity_pandas_udf_scalar.py → ...t/pandas/test_parity_pandas_udf_scalar.py
@@ -24,7 +24,7 @@ class PandasUDFScalarParityTests(ScalarPandasUDFTestsMixin, ReusedConnectTestCas
 
 
 if __name__ == "__main__":
-    from pyspark.sql.tests.connect.test_parity_pandas_udf_scalar import *  # noqa: F401
+    from pyspark.sql.tests.connect.pandas.test_parity_pandas_udf_scalar import *  # noqa: F401
 
     try:
         import xmlrunner  # type: ignore[import]

diff --git a/.../connect/test_parity_pandas_udf_window.py → ...t/pandas/test_parity_pandas_udf_window.py b/.../connect/test_parity_pandas_udf_window.py → ...t/pandas/test_parity_pandas_udf_window.py
@@ -28,7 +28,7 @@ class PandasUDFWindowParityTests(
 
 
 if __name__ == "__main__":
-    from pyspark.sql.tests.connect.test_parity_pandas_udf_window import *  # noqa: F401
+    from pyspark.sql.tests.connect.pandas.test_parity_pandas_udf_window import *  # noqa: F401
 
     try:
         import xmlrunner  # type: ignore[import]

diff --git a/...spark/sql/tests/test_pandas_sqlmetrics.py → ...ql/tests/pandas/test_pandas_sqlmetrics.py b/...spark/sql/tests/test_pandas_sqlmetrics.py → ...ql/tests/pandas/test_pandas_sqlmetrics.py
@@ -57,7 +57,7 @@ def test_pandas(col1):
 
 
 if __name__ == "__main__":
-    from pyspark.sql.tests.test_pandas_sqlmetrics import *  # noqa: F401
+    from pyspark.sql.tests.pandas.test_pandas_sqlmetrics import *  # noqa: F401
 
     try:
         import xmlrunner