diff --git a/dev/create-release/spark-rm/Dockerfile b/dev/create-release/spark-rm/Dockerfile index ca9e10bebfc53..aadb2e2da11cf 100644 --- a/dev/create-release/spark-rm/Dockerfile +++ b/dev/create-release/spark-rm/Dockerfile @@ -125,7 +125,7 @@ RUN python3.9 -m pip install --force $BASIC_PIP_PKGS unittest-xml-reporting $CON # See 'docutils<0.18.0' in SPARK-39421 RUN python3.9 -m pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \ ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8' 'docutils<0.18.0' \ -'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.9.1' \ +'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.9.1' 'ruff==0.5.0' \ 'pandas-stubs==1.2.0.53' 'grpcio==1.62.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \ 'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' RUN python3.9 -m pip list diff --git a/dev/lint-python b/dev/lint-python index b8703310bc4b6..4accd882b18ee 100755 --- a/dev/lint-python +++ b/dev/lint-python @@ -21,6 +21,8 @@ MINIMUM_FLAKE8="3.9.0" MINIMUM_MYPY="1.8.0" MYPY_BUILD="mypy" PYTEST_BUILD="pytest" +RUFF_BUILD="ruff" +MINIMUM_RUFF="0.2.0" PYTHON_EXECUTABLE="${PYTHON_EXECUTABLE:-python3}" @@ -61,6 +63,9 @@ while (( "$#" )); do --mypy-data) MYPY_DATA_TEST=true ;; + --ruff) + RUFF_TEST=true + ;; *) echo "Error: $1 is not supported" exit_with_usage @@ -69,7 +74,7 @@ while (( "$#" )); do shift done -if [[ -z "$COMPILE_TEST$BLACK_TEST$PYSPARK_CUSTOM_ERRORS_CHECK_TEST$FLAKE8_TEST$MYPY_TEST$MYPY_EXAMPLES_TEST$MYPY_DATA_TEST" ]]; then +if [[ -z "$COMPILE_TEST$BLACK_TEST$PYSPARK_CUSTOM_ERRORS_CHECK_TEST$FLAKE8_TEST$MYPY_TEST$MYPY_EXAMPLES_TEST$MYPY_DATA_TEST$RUFF_TEST" ]]; then COMPILE_TEST=true BLACK_TEST=true PYSPARK_CUSTOM_ERRORS_CHECK_TEST=true @@ -77,6 +82,7 @@ if [[ -z "$COMPILE_TEST$BLACK_TEST$PYSPARK_CUSTOM_ERRORS_CHECK_TEST$FLAKE8_TEST$ MYPY_TEST=true MYPY_EXAMPLES_TEST=true MYPY_DATA_TEST=true + RUFF_TEST=true fi function satisfies_min_version { @@ -204,6 +210,34 @@ function mypy_examples_test { fi } +function ruff_test { + if ! hash "$RUFF_BUILD" 2> /dev/null; then + echo "The $RUFF_BUILD command was not found. Skipping for now." + return + fi + + _RUFF_VERSION=($($RUFF_BUILD --version)) + RUFF_VERSION="${_RUFF_VERSION[1]}" + EXPECTED_RUFF="$(satisfies_min_version $RUFF_VERSION $MINIMUM_RUFF)" + + if [[ "$EXPECTED_RUFF" == "False" ]]; then + echo "The minimum ruff version needs to be $MINIMUM_RUFF. Your current version is $RUFF_VERSION. Skipping for now." + return + fi + + RUFF_REPORT=$( $RUFF_BUILD check python/ --config dev/ruff.toml ) + RUFF_STATUS=$? + if [ "$RUFF_STATUS" -ne 0 ]; then + echo "ruff checks failed:" + echo "$RUFF_REPORT" + echo "$RUFF_STATUS" + exit "$RUFF_STATUS" + else + echo "ruff checks passed." + echo + fi +} + function mypy_test { if ! hash "$MYPY_BUILD" 2> /dev/null; then @@ -339,6 +373,10 @@ if [[ "$MYPY_TEST" == "true" ]] || [[ "$MYPY_EXAMPLES_TEST" == "true" ]] || [[ " mypy_test fi +if [[ "$RUFF_TEST" == "true" ]]; then + ruff_test +fi + echo echo "all lint-python tests passed!" diff --git a/dev/requirements.txt b/dev/requirements.txt index 88883a963950e..05efb5ba812b3 100644 --- a/dev/requirements.txt +++ b/dev/requirements.txt @@ -24,6 +24,7 @@ coverage mypy==1.8.0 pytest-mypy-plugins==1.9.3 flake8==3.9.0 +ruff==0.5.0 # See SPARK-38680. pandas-stubs<1.2.0.54 diff --git a/dev/ruff.toml b/dev/ruff.toml new file mode 100644 index 0000000000000..cb8ef5371defd --- /dev/null +++ b/dev/ruff.toml @@ -0,0 +1,8 @@ +line-length = 100 + +target-version = "py39" + +[lint] +select = [ + "NPY201", # Numpy 2.0 compatibility checks +] diff --git a/python/pyspark/ml/linalg/__init__.py b/python/pyspark/ml/linalg/__init__.py index d470f8b8b5c46..cedd3b04564ec 100644 --- a/python/pyspark/ml/linalg/__init__.py +++ b/python/pyspark/ml/linalg/__init__.py @@ -706,12 +706,12 @@ def dot(self, other: Iterable[float]) -> np.float64: elif isinstance(other, SparseVector): # Find out common indices. - self_cmind = np.in1d(self.indices, other.indices, assume_unique=True) + self_cmind = np.isin(self.indices, other.indices, assume_unique=True) self_values = self.values[self_cmind] if self_values.size == 0: return np.float64(0.0) else: - other_cmind = np.in1d(other.indices, self.indices, assume_unique=True) + other_cmind = np.isin(other.indices, self.indices, assume_unique=True) return np.dot(self_values, other.values[other_cmind]) else: diff --git a/python/pyspark/mllib/linalg/__init__.py b/python/pyspark/mllib/linalg/__init__.py index 02cef36c11c46..40f0255a91bbe 100644 --- a/python/pyspark/mllib/linalg/__init__.py +++ b/python/pyspark/mllib/linalg/__init__.py @@ -813,12 +813,12 @@ def dot(self, other: Iterable[float]) -> np.float64: elif isinstance(other, SparseVector): # Find out common indices. - self_cmind = np.in1d(self.indices, other.indices, assume_unique=True) + self_cmind = np.isin(self.indices, other.indices, assume_unique=True) self_values = self.values[self_cmind] if self_values.size == 0: return np.float64(0.0) else: - other_cmind = np.in1d(other.indices, self.indices, assume_unique=True) + other_cmind = np.isin(other.indices, self.indices, assume_unique=True) return np.dot(self_values, other.values[other_cmind]) else: