Skip to content

Commit

Permalink
check compatibility with NumPy 2.0 via ruff
Browse files Browse the repository at this point in the history
  • Loading branch information
codesorcery committed Jul 4, 2024
1 parent d5dc223 commit 4c6fe1f
Show file tree
Hide file tree
Showing 6 changed files with 53 additions and 6 deletions.
2 changes: 1 addition & 1 deletion dev/create-release/spark-rm/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ RUN python3.9 -m pip install --force $BASIC_PIP_PKGS unittest-xml-reporting $CON
# See 'docutils<0.18.0' in SPARK-39421
RUN python3.9 -m pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \
ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8' 'docutils<0.18.0' \
'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.9.1' \
'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.9.1' 'ruff==0.5.0' \
'pandas-stubs==1.2.0.53' 'grpcio==1.62.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \
'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5'
RUN python3.9 -m pip list
Expand Down
40 changes: 39 additions & 1 deletion dev/lint-python
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ MINIMUM_FLAKE8="3.9.0"
MINIMUM_MYPY="1.8.0"
MYPY_BUILD="mypy"
PYTEST_BUILD="pytest"
RUFF_BUILD="ruff"
MINIMUM_RUFF="0.2.0"

PYTHON_EXECUTABLE="${PYTHON_EXECUTABLE:-python3}"

Expand Down Expand Up @@ -61,6 +63,9 @@ while (( "$#" )); do
--mypy-data)
MYPY_DATA_TEST=true
;;
--ruff)
RUFF_TEST=true
;;
*)
echo "Error: $1 is not supported"
exit_with_usage
Expand All @@ -69,14 +74,15 @@ while (( "$#" )); do
shift
done

if [[ -z "$COMPILE_TEST$BLACK_TEST$PYSPARK_CUSTOM_ERRORS_CHECK_TEST$FLAKE8_TEST$MYPY_TEST$MYPY_EXAMPLES_TEST$MYPY_DATA_TEST" ]]; then
if [[ -z "$COMPILE_TEST$BLACK_TEST$PYSPARK_CUSTOM_ERRORS_CHECK_TEST$FLAKE8_TEST$MYPY_TEST$MYPY_EXAMPLES_TEST$MYPY_DATA_TEST$RUFF_TEST" ]]; then
COMPILE_TEST=true
BLACK_TEST=true
PYSPARK_CUSTOM_ERRORS_CHECK_TEST=true
FLAKE8_TEST=true
MYPY_TEST=true
MYPY_EXAMPLES_TEST=true
MYPY_DATA_TEST=true
RUFF_TEST=true
fi

function satisfies_min_version {
Expand Down Expand Up @@ -204,6 +210,34 @@ function mypy_examples_test {
fi
}

function ruff_test {
if ! hash "$RUFF_BUILD" 2> /dev/null; then
echo "The $RUFF_BUILD command was not found. Skipping for now."
return
fi

_RUFF_VERSION=($($RUFF_BUILD --version))
RUFF_VERSION="${_RUFF_VERSION[1]}"
EXPECTED_RUFF="$(satisfies_min_version $RUFF_VERSION $MINIMUM_RUFF)"

if [[ "$EXPECTED_RUFF" == "False" ]]; then
echo "The minimum ruff version needs to be $MINIMUM_RUFF. Your current version is $RUFF_VERSION. Skipping for now."
return
fi

RUFF_REPORT=$( $RUFF_BUILD check python/ --config dev/ruff.toml )
RUFF_STATUS=$?
if [ "$RUFF_STATUS" -ne 0 ]; then
echo "ruff checks failed:"
echo "$RUFF_REPORT"
echo "$RUFF_STATUS"
exit "$RUFF_STATUS"
else
echo "ruff checks passed."
echo
fi
}


function mypy_test {
if ! hash "$MYPY_BUILD" 2> /dev/null; then
Expand Down Expand Up @@ -339,6 +373,10 @@ if [[ "$MYPY_TEST" == "true" ]] || [[ "$MYPY_EXAMPLES_TEST" == "true" ]] || [[ "
mypy_test
fi

if [[ "$RUFF_TEST" == "true" ]]; then
ruff_test
fi

echo
echo "all lint-python tests passed!"

Expand Down
1 change: 1 addition & 0 deletions dev/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ coverage
mypy==1.8.0
pytest-mypy-plugins==1.9.3
flake8==3.9.0
ruff==0.5.0
# See SPARK-38680.
pandas-stubs<1.2.0.54

Expand Down
8 changes: 8 additions & 0 deletions dev/ruff.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
line-length = 100

target-version = "py39"

[lint]
select = [
"NPY201", # Numpy 2.0 compatibility checks
]
4 changes: 2 additions & 2 deletions python/pyspark/ml/linalg/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -706,12 +706,12 @@ def dot(self, other: Iterable[float]) -> np.float64:

elif isinstance(other, SparseVector):
# Find out common indices.
self_cmind = np.in1d(self.indices, other.indices, assume_unique=True)
self_cmind = np.isin(self.indices, other.indices, assume_unique=True)
self_values = self.values[self_cmind]
if self_values.size == 0:
return np.float64(0.0)
else:
other_cmind = np.in1d(other.indices, self.indices, assume_unique=True)
other_cmind = np.isin(other.indices, self.indices, assume_unique=True)
return np.dot(self_values, other.values[other_cmind])

else:
Expand Down
4 changes: 2 additions & 2 deletions python/pyspark/mllib/linalg/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -813,12 +813,12 @@ def dot(self, other: Iterable[float]) -> np.float64:

elif isinstance(other, SparseVector):
# Find out common indices.
self_cmind = np.in1d(self.indices, other.indices, assume_unique=True)
self_cmind = np.isin(self.indices, other.indices, assume_unique=True)
self_values = self.values[self_cmind]
if self_values.size == 0:
return np.float64(0.0)
else:
other_cmind = np.in1d(other.indices, self.indices, assume_unique=True)
other_cmind = np.isin(other.indices, self.indices, assume_unique=True)
return np.dot(self_values, other.values[other_cmind])

else:
Expand Down

0 comments on commit 4c6fe1f

Please sign in to comment.