Skip to content

Commit

Permalink
Merge branch 'branch-23.02' into make-proxy-tests-async
Browse files Browse the repository at this point in the history
  • Loading branch information
pentschev authored Jan 16, 2023
2 parents 4ba0d30 + 1149257 commit d71a5b9
Show file tree
Hide file tree
Showing 25 changed files with 844 additions and 167 deletions.
3 changes: 2 additions & 1 deletion .github/CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
dask_cuda/ @rapidsai/daskcuda-python-codeowners

#build/ops code owners
.github/ @rapidsai/ops-codeowners
.github/ @rapidsai/ops-codeowners
ci/ @rapidsai/ops-codeowners
conda/ @rapidsai/ops-codeowners
**/Dockerfile @rapidsai/ops-codeowners
**/.dockerignore @rapidsai/ops-codeowners
dependencies.yaml @rapidsai/ops-codeowners
2 changes: 1 addition & 1 deletion .github/ops-bot.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@ auto_merger: true
branch_checker: true
label_checker: true
release_drafter: true
external_contributors: false
copy_prs: true
recently_updated: true
63 changes: 63 additions & 0 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
name: build

on:
push:
branches:
- "branch-*"
tags:
- v[0-9][0-9].[0-9][0-9].[0-9][0-9]
workflow_dispatch:
inputs:
branch:
required: true
type: string
date:
required: true
type: string
sha:
required: true
type: string
build_type:
type: string
default: nightly

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
conda-python-build:
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@cuda-118
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
upload-conda:
needs: [conda-python-build]
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@cuda-118
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
wheel-build:
runs-on: ubuntu-latest
container:
image: rapidsai/ci:latest
defaults:
run:
shell: bash
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Build wheel
run: ci/build_python_pypi.sh
- name: Publish distribution 📦 to PyPI
if: inputs.build_type == 'nightly'
uses: pypa/gh-action-pypi-publish@release/v1
with:
password: ${{ secrets.RAPIDSAI_PYPI_TOKEN }}
52 changes: 52 additions & 0 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
name: pr

on:
push:
branches:
- "pull-request/[0-9]+"

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
pr-builder:
needs:
- checks
- conda-python-build
- conda-python-tests
- wheel-build
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@cuda-118
checks:
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@cuda-118
conda-python-build:
needs: checks
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@cuda-118
with:
build_type: pull-request
conda-python-tests:
needs: conda-python-build
secrets: inherit
# TODO: Switch this testing branch to "cuda-118" after `cudf` `3.10` builds are out.
# There is a circular testing dependency between `dask-cuda` and `cudf` right now, which
# prevents us from running `3.10` tests for `dask-cuda` until `3.10` `cudf` packages are published.
uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@main
with:
build_type: pull-request
wheel-build:
needs: checks
runs-on: ubuntu-latest
container:
image: rapidsai/ci:latest
defaults:
run:
shell: bash
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Build wheel
run: ci/build_python_pypi.sh
24 changes: 24 additions & 0 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name: test

on:
workflow_dispatch:
inputs:
branch:
required: true
type: string
date:
required: true
type: string
sha:
required: true
type: string

jobs:
conda-python-tests:
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@cuda-118
with:
build_type: nightly
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
17 changes: 17 additions & 0 deletions ci/build_python.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/bash
# Copyright (c) 2022, NVIDIA CORPORATION.

set -euo pipefail

source rapids-env-update

export CMAKE_GENERATOR=Ninja

rapids-print-env

rapids-logger "Begin py build"

rapids-mamba-retry mambabuild \
conda/recipes/dask-cuda

rapids-upload-conda-to-s3 python
18 changes: 18 additions & 0 deletions ci/build_python_pypi.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/bin/bash


python -m pip install build --user

# While conda provides these during conda-build, they are also necessary during
# the setup.py build for PyPI
export GIT_DESCRIBE_TAG=$(git describe --abbrev=0 --tags)
export GIT_DESCRIBE_NUMBER=$(git rev-list ${GIT_DESCRIBE_TAG}..HEAD --count)

# Compute/export VERSION_SUFFIX
source rapids-env-update

python -m build \
--sdist \
--wheel \
--outdir dist/ \
.
18 changes: 18 additions & 0 deletions ci/check_style.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/bin/bash
# Copyright (c) 2020-2022, NVIDIA CORPORATION.

set -euo pipefail

rapids-logger "Create checks conda environment"
. /opt/conda/etc/profile.d/conda.sh

rapids-dependency-file-generator \
--output conda \
--file_key checks \
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml

rapids-mamba-retry env create --force -f env.yaml -n checks
conda activate checks

# Run pre-commit checks
pre-commit run --hook-stage manual --all-files --show-diff-on-failure
2 changes: 1 addition & 1 deletion ci/gpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ cd "$WORKSPACE"
export GIT_DESCRIBE_TAG=`git describe --tags`
export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'`
export UCX_PATH=$CONDA_PREFIX
export UCXPY_VERSION=0.30.*
export UCXPY_VERSION=0.30
unset GIT_DESCRIBE_TAG

# Enable NumPy's __array_function__ protocol (needed for NumPy 1.16.x,
Expand Down
7 changes: 6 additions & 1 deletion ci/release/update-version.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ CURRENT_SHORT_TAG=${CURRENT_MAJOR}.${CURRENT_MINOR}
NEXT_MAJOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[1]}')
NEXT_MINOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[2]}')
NEXT_SHORT_TAG=${NEXT_MAJOR}.${NEXT_MINOR}
NEXT_UCXPY_VERSION="$(curl -s https://version.gpuci.io/rapids/${NEXT_SHORT_TAG}).*"
NEXT_UCXPY_VERSION="$(curl -s https://version.gpuci.io/rapids/${NEXT_SHORT_TAG})"

echo "Preparing release $CURRENT_TAG => $NEXT_FULL_TAG"

Expand All @@ -33,3 +33,8 @@ function sed_runner() {

# Update UCX-Py version
sed_runner "s/export UCXPY_VERSION=.*/export UCXPY_VERSION="${NEXT_UCXPY_VERSION}"/g" ci/gpu/build.sh

# Bump cudf and dask-cudf testing dependencies
sed_runner "s/cudf=.*/cudf=${NEXT_SHORT_TAG}/g" dependencies.yaml
sed_runner "s/dask-cudf=.*/dask-cudf=${NEXT_SHORT_TAG}/g" dependencies.yaml
sed_runner "s/ucx-py=.*/ucx-py=${NEXT_UCXPY_VERSION}/g" dependencies.yaml
89 changes: 89 additions & 0 deletions ci/test_python.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#!/bin/bash
# Copyright (c) 2022, NVIDIA CORPORATION.

set -euo pipefail

. /opt/conda/etc/profile.d/conda.sh

rapids-logger "Generate Python testing dependencies"
rapids-dependency-file-generator \
--output conda \
--file_key test_python \
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml

rapids-mamba-retry env create --force -f env.yaml -n test

# Temporarily allow unbound variables for conda activation.
set +u
conda activate test
set -u

rapids-logger "Downloading artifacts from previous jobs"
PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)

RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}
RAPIDS_COVERAGE_DIR=${RAPIDS_COVERAGE_DIR:-"${PWD}/coverage-results"}
mkdir -p "${RAPIDS_TESTS_DIR}" "${RAPIDS_COVERAGE_DIR}"
SUITEERROR=0

rapids-print-env

rapids-mamba-retry install \
-c "${PYTHON_CHANNEL}" \
dask-cuda

rapids-logger "Check GPU usage"
nvidia-smi

set +e

rapids-logger "pytest dask-cuda"
pushd dask_cuda
DASK_CUDA_TEST_SINGLE_GPU=1 \
UCXPY_IFNAME=eth0 \
UCX_WARN_UNUSED_ENV_VARS=n \
UCX_MEMTYPE_CACHE=n \
timeout 30m pytest \
-vv \
--capture=no \
--cache-clear \
--junitxml="${RAPIDS_TESTS_DIR}/junit-dask-cuda.xml" \
--cov-config=../pyproject.toml \
--cov=dask_cuda \
--cov-report=xml:"${RAPIDS_COVERAGE_DIR}/dask-cuda-coverage.xml" \
--cov-report=term \
tests
exitcode=$?

if (( ${exitcode} != 0 )); then
SUITEERROR=${exitcode}
echo "FAILED: 1 or more tests in dask-cuda"
fi
popd

rapids-logger "Run local benchmark"
python dask_cuda/benchmarks/local_cudf_shuffle.py \
--partition-size="1 KiB" \
-d 0 \
--runs 1 \
--backend dask
exitcode=$?

if (( ${exitcode} != 0 )); then
SUITEERROR=${exitcode}
echo "FAILED: Local benchmark with dask comms"
fi

python dask_cuda/benchmarks/local_cudf_shuffle.py \
--partition-size="1 KiB" \
-d 0 \
--runs 1 \
--backend explicit-comms
exitcode=$?

if (( ${exitcode} != 0 )); then
SUITEERROR=${exitcode}
echo "FAILED: Local benchmark with explicit comms"
fi

exit ${SUITEERROR}
7 changes: 3 additions & 4 deletions conda/recipes/dask-cuda/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
# Copyright (c) 2019-2021, NVIDIA CORPORATION.
# Copyright (c) 2019-2022, NVIDIA CORPORATION.

# Usage:
# conda build -c conda-forge .
{% set data = load_file_data("pyproject.toml") %}

{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
{% set number = environ.get('GIT_DESCRIBE_NUMBER', 0) %}
{% set py_version = environ.get('CONDA_PY', 36) %}
{% set py_version = environ['CONDA_PY'] %}
{% set git_hash = environ.get('GIT_DESCRIBE_HASH', '') %}

package:
Expand Down Expand Up @@ -42,9 +42,8 @@ test:
imports:
- dask_cuda


about:
home: http://rapids.ai/
home: https://rapids.ai/
license: Apache-2.0
license_file: ../../../LICENSE
summary: dask-cuda library
12 changes: 9 additions & 3 deletions dask_cuda/benchmarks/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,8 @@ class Config(NamedTuple):
def run_benchmark(client: Client, args: Namespace, config: Config):
"""Run a benchmark a specified number of times
If ``args.profile`` is set, the final run is profiled."""
If ``args.profile`` is set, the final run is profiled.
"""
results = []
for _ in range(max(1, args.runs) - 1):
res = config.bench_once(client, args, write_profile=None)
Expand All @@ -110,14 +111,18 @@ def gather_bench_results(client: Client, args: Namespace, config: Config):
def run(client: Client, args: Namespace, config: Config):
"""Run the full benchmark on the cluster
Waits for the cluster, sets up memory pools, prints and saves results"""
Waits for the cluster, sets up memory pools, prints and saves results
"""

wait_for_cluster(client, shutdown_on_failure=True)
assert len(client.scheduler_info()["workers"]) > 0
setup_memory_pools(
client,
args.type == "gpu",
args.rmm_pool_size,
args.disable_rmm_pool,
args.rmm_log_directory,
args.enable_rmm_statistics,
)
address_to_index, results, message_data = gather_bench_results(client, args, config)
p2p_bw = peer_to_peer_bandwidths(message_data, address_to_index)
Expand Down Expand Up @@ -156,7 +161,8 @@ def run_client_from_existing_scheduler(args: Namespace, config: Config):
def run_create_client(args: Namespace, config: Config):
"""Create a client + cluster and run
Shuts down the cluster at the end of the benchmark"""
Shuts down the cluster at the end of the benchmark
"""
cluster_options = get_cluster_options(args)
Cluster = cluster_options["class"]
cluster_args = cluster_options["args"]
Expand Down
Loading

0 comments on commit d71a5b9

Please sign in to comment.